├── .editorconfig
├── .github
    └── workflows
    │   ├── gen-deploy-site.yml
    │   ├── generate-adr-index.yml
    │   ├── generate-api-docs.yml
    │   ├── labeler.yml
    │   └── stale.yml
├── .gitignore
├── .vscode
    ├── extensions.json
    └── settings.json
├── ADR
    ├── 0000-adr-template.md
    ├── 0001-pipeline-service-phase-1.md
    ├── 0002-feature-flags.md
    ├── 0003-interacting-with-internal-services.md
    ├── 0004-component-image-location.md
    ├── 0006-log-conventions.md
    ├── 0007-change-management.md
    ├── 0008-environment-provisioning.md
    ├── 0009-pipeline-service-via-operator.md
    ├── 0010-namespace-metadata.md
    ├── 0011-roles-and-permissions.md
    ├── 0012-namespace-name-format.md
    ├── 0013-integration-service-api-contracts.md
    ├── 0014-let-pipelines-proceed.md
    ├── 0015-integration-service-two-phase-architecture.md
    ├── 0016-integration-service-promotion-logic.md
    ├── 0017-use-our-pipelines.md
    ├── 0018-apps-continuous-perf-testing.md
    ├── 0019-customize-url-github.md
    ├── 0020-source-retention.md
    ├── 0021-partner-tasks.md
    ├── 0022-secret-mgmt-for-user-workloads.md
    ├── 0023-git-references-to-furnish-integration-test-scenarios.md
    ├── 0024-release-attribution.md
    ├── 0025-appstudio-pipeline-serviceaccount.md
    ├── 0026-specifying-ocp-targets-for-fbc.md
    ├── 0027-availability-probe-framework.md
    ├── 0027-container-images.md
    ├── 0028-handling-snapshotenvironmentbinding-errors.md
    ├── 0029-component-dependencies.md
    ├── 0030-tekton-results-naming-convention.md
    ├── 0031-sprayproxy.md
    ├── 0032-decoupling-deployment.md
    ├── 0033-enable-native-opentelemetry-tracing.md
    ├── 0034-project-controller-for-multiversion.md
    ├── 0035-apps-continuous-chaos-testing.md
    ├── 0035-provisioning-ephemeral-openshift-clusters.md
    ├── 0036-trusted-artifacts.md
    ├── 0037-integration-service-promotes-to-GCL-immediately.md
    ├── 0038-integration-service-composite-removal.md
    ├── 0041-send-cloud-events.md
    ├── 0044-spdx-support.md
    ├── 0046-common-task-runner-image.md
    └── assets
    │   └── 0018-apps-continuous-perf-testing.svg
├── CODEOWNERS
├── README.md
├── _config.yml
├── architecture
    ├── build-service.md
    ├── enterprise-contract.md
    ├── gitops-service.md
    ├── hybrid-application-console.md
    ├── hybrid-application-service.md
    ├── image-controller.md
    ├── index.md
    ├── integration-service.md
    ├── internal-services.md
    ├── jvm-build-service.md
    ├── multi-platform-controller.md
    ├── pipeline-service.md
    ├── release-service.md
    ├── service-provider-integration.md
    └── workspace-and-terminal-service.md
├── diagrams
    ├── ADR-0003
    │   └── interacting-with-internal-services.jpg
    ├── ADR-0008
    │   ├── README.md
    │   ├── binding-controller.jpg
    │   ├── deprovision-loop.jpg
    │   ├── dt-dtc-lifecycle.jpg
    │   ├── flow-byoc-manual-creation.jpg
    │   ├── flow-cluster-manual-creation.jpg
    │   ├── flow-sandbox-manual-creation.jpg
    │   └── provision-loop.jpg
    ├── ADR-0015
    │   ├── component-phase.jpg
    │   └── composite-phase.jpg
    ├── ADR-0016
    │   └── promotion-logic.jpg
    ├── ADR-0023
    │   ├── git-references-ITS.jpg
    │   ├── tekton-bundle-ITS.jpg
    │   └── tekton-pipeline-definition-git-resolver.jpg
    ├── ADR-0024
    │   └── flowchart.jpg
    ├── ADR-0035
    │   ├── chaos-resilience.png
    │   └── chaos-sla.png
    ├── build-service
    │   └── build-service-diagram.svg
    ├── hybrid-application-service
    │   ├── cdq-detection.jpg
    │   ├── has-application-component-create.jpg
    │   └── has-create-application-seqeuence.png
    ├── index.md
    ├── integration-service
    │   └── integration-service-data-flow.jpg
    ├── internal-services
    │   └── internal-services-controller-overview.jpg
    ├── konflux-workspace-layout.drawio.svg
    ├── konflux.drawio.svg
    ├── personal-data.drawio.svg
    ├── pipeline-service
    │   └── architecture.jpg
    ├── release-service
    │   └── konflux-release-service-data-flow.jpg
    └── secret-mgmt.excalidraw.svg
├── ref
    ├── config.yaml
    └── index.md
└── tools
    └── security-tools.MD


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # EditorConfig is awesome: https://EditorConfig.org
 2 | 
 3 | # top-most EditorConfig file
 4 | root = true
 5 | 
 6 | [*.md]
 7 | indent_style = space
 8 | indent_size = 4
 9 | end_of_line = lf
10 | charset = utf-8
11 | trim_trailing_whitespace = true
12 | insert_final_newline = true
13 | 


--------------------------------------------------------------------------------
/.github/workflows/gen-deploy-site.yml:
--------------------------------------------------------------------------------
 1 | # Sample workflow for building and deploying a Jekyll site to GitHub Pages
 2 | name: Deploy Jekyll with GitHub Pages dependencies preinstalled
 3 | 
 4 | on:
 5 |   # Runs on pushes targeting the default branch
 6 |   push:
 7 |     branches: ["main"]
 8 | 
 9 |   # Allows you to run this workflow manually from the Actions tab
10 |   workflow_dispatch:
11 | 
12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
13 | permissions:
14 |   contents: read
15 |   pages: write
16 |   id-token: write
17 | 
18 | # Allow one concurrent deployment
19 | concurrency:
20 |   group: "pages"
21 |   cancel-in-progress: true
22 | 
23 | jobs:
24 |   generate-api-docs:
25 |     uses: ./.github/workflows/generate-api-docs.yml
26 |     secrets: inherit
27 | 
28 |   generate-adr-index:
29 |     uses: ./.github/workflows/generate-adr-index.yml
30 |     secrets: inherit
31 | 
32 |   # Build job
33 |   build:
34 |     needs:
35 |       - generate-api-docs
36 |       - generate-adr-index
37 |     runs-on: ubuntu-latest
38 |     steps:
39 |       - name: Checkout
40 |         uses: actions/checkout@v4
41 |       - name: Download generated API docs
42 |         uses: actions/download-artifact@v4
43 |         with:
44 |           name: api-docs
45 |           path: ref/
46 |       - name: Download generated ADR index
47 |         uses: actions/download-artifact@v4
48 |         with:
49 |           name: adr-index
50 |           path: ADR/
51 |       - name: Setup Pages
52 |         uses: actions/configure-pages@v5
53 |       - name: Build with Jekyll
54 |         uses: actions/jekyll-build-pages@v1
55 |         with:
56 |           source: ./
57 |           destination: ./_site
58 |       - name: Upload artifact
59 |         uses: actions/upload-pages-artifact@v3
60 | 
61 |   # Deployment job
62 |   deploy:
63 |     environment:
64 |       name: github-pages
65 |       url: ${{ steps.deployment.outputs.page_url }}
66 |     runs-on: ubuntu-latest
67 |     needs: build
68 |     steps:
69 |       - name: Deploy to GitHub Pages
70 |         id: deployment
71 |         uses: actions/deploy-pages@v4
72 | 


--------------------------------------------------------------------------------
/.github/workflows/generate-adr-index.yml:
--------------------------------------------------------------------------------
 1 | name: generate-adr-index
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   workflow_call:
 6 | 
 7 | jobs:
 8 |   generate-adr-index:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - name: Checkout docs repo
12 |         uses: actions/checkout@v4
13 | 
14 |       - name: Generate ADR header
15 |         run: echo "# Architecture Decision Records (ADRs)" > index.md
16 |         working-directory: ./ADR
17 | 
18 |       - name: Generate ADR list
19 |         run: for adr in *.md; do title=$(head -1 $adr); echo "* [${title:2}](./$adr)" >> index.md; done
20 |         working-directory: ./ADR
21 | 
22 |       - name: Delete self-reference to index.md
23 |         run: sed -i '/index.md/d' index.md
24 |         working-directory: ./ADR
25 | 
26 |       - name: Upload generated index
27 |         uses: actions/upload-artifact@v4
28 |         with:
29 |           name: adr-index
30 |           path: ADR/
31 | 


--------------------------------------------------------------------------------
/.github/workflows/generate-api-docs.yml:
--------------------------------------------------------------------------------
 1 | # This is a basic workflow to help you get started with Actions
 2 | 
 3 | name: generate-api-docs
 4 | 
 5 | # Controls when the workflow will run
 6 | on:
 7 |   workflow_dispatch:
 8 |   workflow_call:
 9 | 
10 | 
11 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
12 | jobs:
13 |   # This workflow contains a single job called "build"
14 |   generate-docs:
15 |     # The type of runner that the job will run on
16 |     runs-on: ubuntu-latest
17 |     # Steps represent a sequence of tasks that will be executed as part of the job
18 |     steps:
19 |       - name: Checkout docs repo
20 |         uses: actions/checkout@v4
21 | 
22 |       # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
23 |       - name: Checkout Application and Environment API
24 |         uses: actions/checkout@v4
25 |         with:
26 |           path: crd-temp/application-api
27 |           repository: konflux-ci/application-api
28 | 
29 |       - name: Checkout Integration Service API
30 |         uses: actions/checkout@v4
31 |         with:
32 |           path: crd-temp/integration-service
33 |           repository: konflux-ci/integration-service
34 | 
35 |       - name: Checkout Release Service API
36 |         uses: actions/checkout@v4
37 |         with:
38 |           path: crd-temp/release-service
39 |           repository: konflux-ci/release-service
40 | 
41 |       - name: Checkout Enterprise Contract API
42 |         uses: actions/checkout@v4
43 |         with:
44 |           path: crd-temp/enterprise-contract-controller
45 |           repository: enterprise-contract/enterprise-contract-controller
46 | 
47 |       - name: Checkout Internal Services API
48 |         uses: actions/checkout@v4
49 |         with:
50 |           path: crd-temp/internal-services
51 |           repository: konflux-ci/internal-services
52 | 
53 |       - name: Checkout Image Controller API
54 |         uses: actions/checkout@v4
55 |         with:
56 |           path: crd-temp/image-controller
57 |           repository: konflux-ci/image-controller
58 | 
59 |       - name: Install crd-ref-docs
60 |         uses: supplypike/setup-bin@v1
61 |         with:
62 |           uri: 'https://github.com/elastic/crd-ref-docs/releases/download/v0.0.8/crd-ref-docs'
63 |           name: 'crd-ref-docs'
64 |           version: '0.0.8'
65 | 
66 |       - name: Generate application and environment API docs
67 |         run: crd-ref-docs --log-level=ERROR --config=ref/config.yaml --output-path=ref/application-environment-api.md --renderer=markdown --source-path=crd-temp/application-api/api/v1alpha1
68 | 
69 |       - name: Generate Image Controller API docs
70 |         run: crd-ref-docs --log-level=ERROR --config=ref/config.yaml --output-path=ref/image-controller.md --renderer=markdown --source-path=crd-temp/image-controller/api/v1alpha1/
71 | 
72 |       - name: Generate Integration Service API docs
73 |         run: crd-ref-docs --log-level=ERROR --config=ref/config.yaml --output-path=ref/integration-service.md --renderer=markdown --source-path=crd-temp/integration-service/api/v1alpha1/
74 | 
75 |       - name: Generate Release Service API docs
76 |         run: crd-ref-docs --log-level=ERROR --config=ref/config.yaml --output-path=ref/release-service.md --renderer=markdown --source-path=crd-temp/release-service/api/v1alpha1/
77 | 
78 |       - name: Generate Enterprise Contract API docs
79 |         run: crd-ref-docs --log-level=ERROR --config=ref/config.yaml --output-path=ref/enterprise-contract.md --renderer=markdown --source-path=crd-temp/enterprise-contract-controller/api/v1alpha1/
80 | 
81 |       - name: Generate Internal Services API docs
82 |         run: crd-ref-docs --log-level=ERROR --config=ref/config.yaml --output-path=ref/internal-services.md --renderer=markdown --source-path=crd-temp/internal-services/api/v1alpha1/
83 | 
84 |       - name: Upload generated docs
85 |         uses: actions/upload-artifact@v4
86 |         with:
87 |           name: api-docs
88 |           path: ref/
89 | 


--------------------------------------------------------------------------------
/.github/workflows/labeler.yml:
--------------------------------------------------------------------------------
 1 | name: labeler
 2 | 
 3 | on: [pull_request]
 4 | 
 5 | permissions:
 6 |   pull-requests: write
 7 | 
 8 | jobs:
 9 |   labeler:
10 |     runs-on: ubuntu-latest
11 |     name: Label the PR size
12 |     steps:
13 |       - uses: codelytv/pr-size-labeler@v1
14 |         with:
15 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
16 |           xs_label: 'size/XS'
17 |           xs_max_size: '5'
18 |           s_label: 'size/S'
19 |           s_max_size: '15'
20 |           m_label: 'size/M'
21 |           m_max_size: '50'
22 |           l_label: 'size/L'
23 |           l_max_size: '200'
24 |           xl_label: 'size/XL'
25 |           fail_if_xl: 'false'
26 |           github_api_url: 'https://api.github.com'
27 | 


--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
 1 | name: 'Close stale issues and PRs'
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '30 1 * * *'
 6 |   workflow_dispatch: {}
 7 | 
 8 | permissions:
 9 |   issues: write
10 |   pull-requests: write
11 | 
12 | jobs:
13 |   stale:
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - uses: actions/stale@v9
17 |         with:
18 |           stale-issue-message: >-
19 |             This issue is stale because it has been open 90 days with no
20 |             activity.  Remove the `state/stale` label or comment, or this
21 |             will be closed in 30 days.
22 |           days-before-stale: 90
23 |           days-before-close: 30
24 |           stale-issue-label: 'state/stale'
25 |           stale-pr-label: 'state/stale'
26 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/.export
2 | .DS_Store
3 | .idea/


--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 |     "recommendations": [
3 |         "evilz.vscode-reveal",
4 |         "hediet.vscode-drawio"
5 |     ]
6 | }


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "revealjs.exportHTMLPath": "../.export"
3 | }


--------------------------------------------------------------------------------
/ADR/0000-adr-template.md:
--------------------------------------------------------------------------------
 1 | # 0. Record architecture decisions
 2 | 
 3 | Date: 2022-06-17
 4 | 
 5 | ## Status
 6 | 
 7 | Accepted
 8 | 
 9 | ## Context
10 | 
11 | We need to record the architectural decisions made on this project.
12 | 
13 | ## Decision
14 | 
15 | We will use Architecture Decision Records, as described by Michael Nygard in this article: http://thinkrelevance.com/blog/2011/11/15/documenting-architecture-decisions
16 | 
17 | ## Consequences
18 | 
19 | See Michael Nygard's article, linked above.
20 | 


--------------------------------------------------------------------------------
/ADR/0001-pipeline-service-phase-1.md:
--------------------------------------------------------------------------------
 1 | # 1. Pipeline Service Phase 1
 2 | 
 3 | Created: 2022-10-13
 4 | Last Updated: 2023-09-29
 5 | 
 6 | ## Status
 7 | 
 8 | Obsolete
 9 | 
10 | Superceded by [ADR-0009](./0009-pipeline-service-via-operator.md)
11 | 
12 | ## Context
13 | 
14 | App Studio initially ran on a single cluster and provisioned [Tekton](https://tekton.dev) controllers.
15 | With the migration to [kcp](https://github.com/kcp-dev/kcp), controllers need to either a) be made "kcp aware", or b) run on all workload clusters, targeting the same kcp `APIExport`.
16 | App Studio could build this on their own, however other services and teams beyond App Studio need the ability to run Tekton pipelines.
17 | 
18 | Tekton code utilizes libraries that are not simple to refactor and make "kcp aware."
19 | Furthermore, Tekton is an upstream project with a wide, active community.
20 | Adding kcp-aware changes would require upstream acceptance, or require us to fork Tekton and apply our "kcp aware" patches.
21 | 
22 | ## Decision
23 | 
24 | Tekton APIs and services will be provided through a separate, independent service - Pipeline Service.
25 | App Studio and HACBS will be "customer 0" for Pipeline Service.
26 | Future managed services which rely on Tekton APIs can bind to the Pipeline Service and start running pipelines right away.
27 | 
28 | Pipeline Service will deploy and manage Tekton controllers directly on workload clusters.
29 | kcp syncers will be used to generate APIExports from the workload cluster.
30 | We will utilize the OpenShift Pipelines Operator to deploy Tekton controllers to the furthest extent possible, whose configuration will be controlled via ArgoCD.
31 | Otherwise, other Tekton controllers will be deployed with direct manifests.
32 | 
33 | Arch Diagram: https://miro.com/app/board/uXjVOVEW0IM=/
34 | 
35 | ## Consequences
36 | 
37 | - Other services use an APIBinding to execute `PipelineRuns` (and access Tekton APIs) in kcp.
38 | - `TaskRun` objects cannot be synced to KCP.
39 |   App Studio and HACBS components may only interact with `PipelineRun` objects directly.
40 | - Workload clusters for Pipeline Service need to be directly managed by the Pipeline Service team.
41 |   We cannot rely on general "compute as a service" from kcp Control Plane Service (CPS).
42 | - Pipelines as Code (PaC) needs a separate ingress and service configured on KCP, which forwards traffic to PaC on the workload cluster.
43 |   - `Ingress` support on kcp comes from an add-on capability - the [kcp Global Load Balancer Controller](https://github.com/kcp-dev/kcp-glbc).
44 |   - `PipelineRun` objects created by PaC are not visible on kcp.
45 |   - We are limited to one workload cluster - the gateway cannot load balance traffic across clusters.
46 | - Tekton Results can only be accessed on workload clusters. It would require additional changes/patches to make it accessible from kcp.
47 | 


--------------------------------------------------------------------------------
/ADR/0002-feature-flags.md:
--------------------------------------------------------------------------------
  1 | # 2. Feature Flags
  2 | 
  3 | Date: 2022-06-01
  4 | 
  5 | ## Status
  6 | 
  7 | Accepted
  8 | 
  9 | ## Context
 10 | 
 11 | We know we need some way for processes to recognize that they’re working in a HACBS context or in an
 12 | App Studio context. We’ve been referring to this need loosely throughout the first half of 2022 as
 13 | “feature flags”.
 14 | 
 15 | Some examples:
 16 | 
 17 | - [hac] needs to know whether or not to render different screens
 18 |   for HACBS views and to direct the user to those views instead of App Studio views.
 19 | - [build-service] needs to know whether or not to install normal App Studio
 20 |   webhooks in the git repo of the user in order to trigger builds, or whether it should execute the
 21 |   setup logic necessary for HACBS [customized pipelines]
 22 |   (submitting a PR to the repo providing the default pipeline as source).
 23 | - [build-service] needs to know whether or not to promote a built image pullspec
 24 |   directly to the [Component] CR after it is built,
 25 |   or whether to wait and let the [integration-service] test things first.
 26 | - Build Service needs to know whether or not to create an [ApplicationSnapshot] after the
 27 |   [Component] CR is updated, or whether it should defer to the [integration-service] to create new
 28 |   [ApplicationSnapshots].
 29 | 
 30 | We have been thinking about this flag as a HACBS-wide flag. We had assumed that the *workspace
 31 | itself* would be HACBS enabled, or not. Perhaps the workspace would have an explicit type that
 32 | would let us know we are looking at or operating in a HACBS workspace, and not an App Studio
 33 | workspace.
 34 | 
 35 | **Problem**: workspaces don’t have a type useful for anything beyond initialization and they’re not
 36 | going to have one. Features and APIs should be composable in a single workspace. A user might use
 37 | a single workspace for *lots* of different activities - beyond just App Studio or HACBS. A workspace
 38 | type is too restrictive and single-purpose.
 39 | 
 40 | We had also been considering that the “flag” could be inferred from the organizational hierarchy of
 41 | the workspace - where, if the workspace was owned by an org that was owned by a tenant that was in
 42 | some pre-configured list of HACBS-enabled tenants, then this workspace should be considered
 43 | HACBS-enabled, workspace-wide.
 44 | 
 45 | **Problem**: we likely need to support the co-existence of HACBS-enabled workspaces and non-HACBS App
 46 | Studio workspaces in the same tenant. Tenants are big enterprises, with lots of teams, and those
 47 | teams have different adoption patterns. Some will want to be on App Studio, while others will want
 48 | to be on HACBS. Although we don’t have real customer input on this, it is reasonable to expect that
 49 | a single customer team may want to work on some projects in the HACBS feature set, and others in an
 50 | App Studio feature set. Much more realistically, imagine the path for “turning on HACBS” at the
 51 | tenant level. If you flip the switch at the tenant level, do all workspaces for all teams in the
 52 | tenant suddenly change behavior? A tenant-wide setting is too coarse and disruptive to tenant teams
 53 | that would appreciate independence.
 54 | 
 55 | ## Decision
 56 | 
 57 | Use “api discovery” to control the enablement of *individual* features in *individual* workspaces.
 58 | 
 59 | [KCP] provides the [APIBinding] resource as a way of letting the user declare that a particular API
 60 | (read: CRD) should be made available in a single workspace. The user installs something in their
 61 | workspace by creating an [APIBinding]. Our processes (controllers and [hac]) should query for the
 62 | availability of a particular API they care about, and let their behavior be influenced by the
 63 | existence or non-existence of that API.
 64 | 
 65 | **Example**: if the [IntegrationTestScenario] API is present in KCP for a workspace, then a process
 66 | can know that the [integration-service] features of HACBS are enabled in the workspace.
 67 | 
 68 | - When onboarding a new [Component], [build-service] should consult the discovery API for
 69 |   [IntegrationTestScenario], and if it exists it should not install App Studio webhooks but should
 70 |   instead submit a PR prompting the HACBS onboarding process.
 71 | - When a build completes, [build-service] should consult the discovery API for
 72 |   [IntegrationTestScenario], and if it exists it should not promote the built image pullspec to the
 73 |   [Component] CR. [integration-service] will handle that flow instead.
 74 | 
 75 | **Example**: if the [ReleasePlan] API is present in the workspace, then a process can know that the
 76 | [release-service] features of HACBS are enabled in the workspace.
 77 | 
 78 | - After testing a new [ApplicationSnapshot], the [integration-service] should consult the existence
 79 |   of the [ReleasePlan] via the discovery API before it checks for the existence of any [ReleasePlan]
 80 |   resources. If the API is present then the [integration-service] should proceed as normal. If the
 81 |   API is *not present*, then the [integration-service] should silently ignore its codepath to
 82 |   inspect [ReleasePlans] and trigger automated [Releases].
 83 |   - If the API is *not present,* that means the user is in a configuration where they have installed
 84 |     the [integration-service] but they have *not installed the [release service]*. We don’t have
 85 |     a concrete reason to support this configuration today - but explicitly checking for the API
 86 |     before checking for [ReleasePlan] makes a cleaner interface between the two services. They’re
 87 |     now composable, rather than being part of a monolithic “HACBS feature set”.
 88 | 
 89 | **Example**: In [hac], we’re implementing HACBS screens as part of the [hac-dev] plugin.
 90 | 
 91 | - When generating a list of workspaces, [hac] could describe those workspaces as HACBS-enabled or
 92 |   not if one or more HACBS APIs are available via kubernetes API discovery in kcp. Those APIs will
 93 |   be present if [APIBinding] objects are present in the workspace and have been handled by KCP.
 94 | - When viewing an App Studio workspace, the [hac-dev] plugin should present the user with the
 95 |   corresponding HACBS view if one or more HACBS APIs are present in the workspace, which again will
 96 |   be present if corresponding [APIBinding] objects have been created in the workspace and handled by
 97 |   fulfilled by KCP.
 98 | 
 99 | ## Open Questions
100 | 
101 | - How should [hac] decide whether or not to render the topology view for pipelines? It is reasonable
102 |   to check for the existence of an API from the build side of the house, but we don’t have an API
103 |   today that could signal this. It’s just PipelineRuns.
104 |   - Use the [IntegrationTestScenario] API today, which is not a perfect fit, but will let us move
105 |     forwards.
106 | 
107 | ## Consequences
108 | 
109 | - We should experience a cleaner API - composable services, more aligned with a larger App Cloud API
110 |   being developed by multiple teams.
111 | - We may find ourselves forced into creating a CRD (and corresponding [APIBinding]) just so that we
112 |   can influence the behavior of another service, just so we can give it a feature flag to check.
113 | - Services that change their behavior based on the existence or non-existence of APIs that they do
114 |   not own need to take special care if they manage some off-cluster state.
115 |   - For example, [build-service] manages git webhooks when users onboard with a [Component] CR.
116 |     However, the details of that webhook may change depending on whether or not the
117 |     [IntegrationTestScenario] API is present or not. If the [IntegrationTestScenario] is installed
118 |     or uninstalled, [has] should properly handle transitioning off-cluster state to align to the
119 |     currently available APIs in the workspace; it should reconcile the webhooks with the intended
120 |     state in the workspace which includes both Component CRs as well as the existence of
121 |     [IntegrationTestScenario] APIs (CRDs).
122 | 
123 | ## References
124 | 
125 | Originally drafted in a [google document](https://docs.google.com/document/d/1KcXWZ8VGUg_iR0RjdGuDYedP8ZW63XCgF26KZUNgpeQ/edit)
126 | 
127 | [hac]: ../architecture/hybrid-application-console.md
128 | [hac-dev]: https://github.com/openshift/hac-dev
129 | [has]: ../architecture/application-service.md
130 | [build-service]: ../architecture/build-service.md
131 | [integration-service]: ../architecture/integration-service.md
132 | [customized pipelines]: https://issues.redhat.com/browse/HACBS-9
133 | [KCP]: ../ref/kcp.md
134 | [APIBinding]: ../ref/kcp.md#apibinding
135 | [Component]: ../ref/application-environment-api.md#component
136 | [ApplicationSnapshot]: ../ref/application-environment-api.md#applicationsnapshot
137 | [ApplicationSnapshots]: ref/application-environment-api.md#applicationsnapshot
138 | [ReleasePlan]: ../ref/release-service.md#releaseplan
139 | [ReleasePlans]: ../ref/release-service.md#releaseplan
140 | [IntegrationTestScenario]: ../ref/integration-service.md#integrationtestscenario
141 | 


--------------------------------------------------------------------------------
/ADR/0003-interacting-with-internal-services.md:
--------------------------------------------------------------------------------
 1 | # 3. Interacting with Internal Services
 2 | 
 3 | Date: 2022-10-20
 4 | 
 5 | ## Status
 6 | 
 7 | ---
 8 | 
 9 | Accepted
10 | 
11 | ## Context
12 | 
13 | ---
14 | 
15 | Many organizations, including Red Hat, possess numerous internal services that help productize their software.
16 | In many cases, these internal services will continue to play a role in the release workflows used in Konflux.
17 | 
18 | We originally thought that we should expose access to an organization's internal services by encouraging the use of "[bastion](https://en.wikipedia.org/wiki/Bastion_host)" interfaces that are publicly addressable but which also have some degree of internal network access. On review, we see now that internal network ingress like this opens up unwanted
19 | attack vectors towards an organization's internal networks.
20 | 
21 | **Problem**: Konflux Release pipelines need to **initiate** processes with an organization's internal services which
22 | are **not publicly addressable** in a secure fashion and be able to obtain process status and completion results.
23 | 
24 | ## Decision
25 | 
26 | ---
27 | 
28 | Use a "controller" running in a private cluster that can watch and reconcile **Request** custom resources in
29 | one or more workspaces. This will be referred to as the **Internal Service Controller**.
30 | 
31 | **Request** is used here as a general type meaning that real use cases might involve custom resources
32 | such as **ServiceABCRequest**.
33 | 
34 | This strategy will make use of [KCP]'s VirtualWorkspace model allowing an internal service controller to watch a group of
35 | workspace via a single _KUBECONFIG_. This internal service controller is expected to trigger a specific job that encapsulates the Internal Service's unit of work
36 | that HACBS wants to initiate. It is expected that the internal service controller should update the **status** of the **Request** CR to denote the progress of the
37 | triggered unit of work.
38 | 
39 | The internal service controller should also be able to update the **Request** CR to provide a **result** back to the process that
40 | originally created the custom resource.
41 | 
42 | **Example:**
43 | 
44 | During the course of an attempt to release content, artifacts may need to be signed. The service that
45 | performs the signing process is an internal service within an organization with no publicly addressable API.
46 | The [release-service] may execute a release pipeline that has a step that wants to access that signing service's
47 | API and obtain a signature to be used in downstream steps in the release pipeline.
48 | 
49 | Using the pattern here, the user organization (called MyOrg) would create a signing controller hosted in a cluster inside their network, but which uses a KUBECONFIG pointed at the kcp VirtualWorkspace for a `MyOrgSigningRequest`. They would construct a release pipeline which creates those `MyOrgSigningRequest` CRs in their managed workspace, and which watches for `status` updates on those request CRs to determine when the internally hosted signing process has completed.
50 | 
51 | ## Architecture Overview
52 | 
53 | ---
54 | 
55 | ![Interacting with Internal Services](../diagrams/ADR-0003/interacting-with-internal-services.jpg)
56 | 
57 | ## Open Questions
58 | 
59 | ---
60 | 
61 | * How can internal services controllers control who they accept requests from?
62 | 
63 | ## Consequences
64 | 
65 | ---
66 | 
67 | * Managed workspaces will require the **Request** custom resource definitions installed.
68 |  * Then custom release pipelines can create CRs for that CRD to make a request to the Internal Service.
69 | * Skill gap. Not all engineers are experienced with writing controllers. Nonetheless, this pattern will enable
70 | developers to gain the experience.
71 | 
72 | ## Proof of Concept
73 | 
74 | ---
75 | 
76 | A proof of concept for the **Internal Services Controller** can be found [here](https://github.com/scoheb/internal-services-controller-poc)
77 | 
78 | ## References
79 | 
80 | ---
81 | 
82 | [KCP]: ../ref/kcp.md
83 | [release-service]: ../architecture/release-service.md
84 | 
85 | 


--------------------------------------------------------------------------------
/ADR/0004-component-image-location.md:
--------------------------------------------------------------------------------
  1 | # 4. Out-of-the-box image repository for StoneSoup users 
  2 | 
  3 | Date: Oct 29, 2022 
  4 | 
  5 | ## Status
  6 | 
  7 | Accepted
  8 | 
  9 | ## Approvers
 10 | 
 11 | * Alexey Kazakov
 12 | * Gorkem Ercan
 13 | 
 14 | ## Reviewers
 15 | 
 16 | * Alexey Kazakov
 17 | * Gorkem Ercan
 18 | * Parag Dave
 19 | * Ralph Bean
 20 | * Andrew McNamara
 21 | 
 22 | 
 23 | ## Context
 24 | 
 25 | **Problem**
 26 | StoneSoup does not have a internal registry where images could be pushed to as an intermediate step before being deployed as a container.
 27 | As an application centric experience, StoneSoup should not make it mandatory for its users to specify where the image ( built from source code ) needs
 28 | to be pushed to.
 29 | 
 30 | 
 31 | **Goals**: 
 32 | * Provide an out-of-the-box location for users’ images to be pushed after being built from source.
 33 | 
 34 | 
 35 | **Non-Goals**: 
 36 | * Define the user experience for users bringing existing images from other image registry services.
 37 | * Provide users an option to choose what the out-of-the-box location for images would be. 
 38 | * Define the user experience for users who are willing to configure additional credentials for pushing to an image registry of their choice.
 39 | 
 40 | **Design Goals**
 41 | * Use Quay.io as the out-of-the-box image registry in order to avoid getting into the business of maintaining an internal registry.
 42 | * Align user permissions in an StoneSoup workspace with those in Quay.io
 43 | * Maintain the right levels of isolation between images being expected of a multi-tenant system.
 44 | * Security:
 45 |   * Leverage short-lived Quay.io API tokens for repo/org management on Quay.io
 46 |   * Leverage robot accounts for pushing images from CI/Build pipelines.
 47 | 
 48 | 
 49 | 
 50 | ## Decision
 51 | 
 52 | ### What
 53 | 
 54 | * Per workspace called "david", setup a new org “quay.io/unique-org-david/…”
 55 | * Per component, setup a new new repo “quay.io/unique-org-david/unique-component-repo”
 56 | * Use User’s Quay.io API token to manage the org/repo. Short-term, we'll use a pre-configured Quay.io API token associated with StoneSoup to create the org/repo till we 
 57 |  figure out how to determinstically map a user in StoneSoup to a user in Quay.io.
 58 | * Generate a robot account token scoped to the relevant repository and persist it in the user's workspace for the image build and push process to consume.
 59 | 
 60 | 
 61 | ### How - Design
 62 | 
 63 | #### Quay.io API token Configuration
 64 | 
 65 | 1. Setup a Quay.io organization to host the OAuth app. 
 66 | 2. Create an OAuth Application in the Quay.io organization.
 67 | 3. Geneate a token for the OAuth Application. This token would act as the 'service account' using which Quay.io resources would be created. Important to note, the token acts on behalf of the user who is requesting it - but uses the explicit scopes specified at the time of token generation. 
 68 | 4. Allowlist user 'shbose' to be create organizations using non-user-tokens using the Quay.io API.
 69 | 
 70 | | Syntax      | Description |
 71 | | ----------- | ----------- |
 72 | | Quay.io organization      | quay.io/redhat-user-workloads       |
 73 | | OAuth Application  name | Created, name redacted        |
 74 | | Account used to generate token | `shbose` , `mkovarik` |
 75 | | Scope | Administer organizations, Adminster repositories, Create Repositories |
 76 | 
 77 | <img width="1313" alt="image" src="https://user-images.githubusercontent.com/545280/212758440-5807cd4e-11b1-43bc-aa03-385b9284cb9e.png">
 78 | 
 79 | 
 80 | #### Organization and Image Repository creation
 81 | 
 82 | When a user creates a Component, a StoneSoup service would need to generate the image repository for consumption by the 
 83 | build, test and deployment services.
 84 | 
 85 | * For each user, create a new Quay.io org “quay.io/unique-org-david”
 86 | * For each `Component` 'foo', create a new image repo “quay.io/unique-org-david/appname/componentname”
 87 | * Configure the robot account “redhat-<resource-uuid>” in “quay.io/unique-org-david” to be able to push to “quay.io/unique-org-david/appname-foo” 
 88 | * Configure a `Secret` in the user's namespace with the robot account token.
 89 | * Annotate the `Component` with the name of the image repository and the name of the `Secret` containing the robot account token.
 90 | 
 91 | The following deviations from this design would be implemented:
 92 | 
 93 | Until the capability to progammatically create organizations in Quay.io is activated:
 94 | * Images repositories would be created in quay.io/redhat-user-workloads
 95 | * Isolation would continue to be maintained the same way - every image repository would have a scoped robot account that would be rotatable using Quay.io API for the same https://docs.quay.io/api/swagger/#!/robot/regenerateUserRobotToken.
 96 | 
 97 | Until the capability to determine the associated user/tenant/Space a Component is implemented,
 98 | * The quay.io repository created with use the format   quay.io/org-name/namespace-name/application-name/component-name. Example, https://quay.io/repository/redhat-user-workloads/shbose/applicationname/automation-repo
 99 | 
100 | #### Lifecycle of the Quay.io resources
101 | 
102 | * Token generation: 
103 |     * Robot account token: At the moment, the controller responsible for generating the Quay.io resources would be responsible for rotating the tokens https://docs.quay.io/api/swagger/#!/robot/regenerateUserRobotToken
104 |     * Quay.io API token: No programmatic way to regenerate this token is known at this point of time. This would be a manual activity to begin with. 
105 | 
106 | * Upon deletion of an `Application`/`Component` from StoneSoup, 
107 |     * The controller/finalizer would delete the the relevant Quay.io resources namely, the image repository and the robot account.
108 |     * The controller/finalizer would delete the linked `Secret` from the user's namespace. Most likely, this should be a mere `ownerReference`-based garbage collection.
109 | 
110 | * Upon removal of a user from Stonesoup,
111 |     * The empty Quay.io organization associated with the user or the user's Space may not be deleted instantly, but would be scheduled for a delayed cleanup.
112 | * PR-based tags are to be deleted on a regular basis. Image tags associated with `main` may remain un-pruned for now.
113 |     
114 |     
115 | ### How - Implementation
116 | 
117 | The implementation of the above design will be improved overtime with the possible introduction of new CRDs/APIs. At the moment, no new API is being planned till the need for it arises.
118 | 
119 | 
120 | To request the Image controller to setup an image repository, annotate the `Component` with `image.redhat.com/generate: 'true'`.
121 | 
122 | 
123 | ```
124 | apiVersion: StoneSoup.redhat.com/v1alpha1
125 | kind: Component
126 | metadata:
127 |   annotations:
128 |     image.redhat.com/generate: 'true'
129 |   name: billing
130 |   namespace: image-controller-system
131 | spec:
132 |   application: city-transit
133 |   componentName: billing
134 | ```
135 | 
136 | The `Image controller` creates the necessary resources on Quay.io and writes out the details of the same into the `Component` resource as an annotation, namely: 
137 | 
138 | * The image repository URL.
139 | * The name of the Kubernets `Secret` in which the robot account token was written out to.
140 | 
141 | ```
142 | {
143 |    "image":"quay.io/redhat-user-workloads/image-controller-system/city-transit/billing",
144 |    "secret":"billing",
145 | }
146 | ```
147 | 
148 | ```
149 | apiVersion: StoneSoup.redhat.com/v1alpha1
150 | kind: Component
151 | metadata:
152 |   annotations:
153 |     image.redhat.com/generate: 'false'
154 |     image.redhat.com/image: >-
155 |       {"image":"quay.io/redhat-user-workloads/image-controller-system/city-transit/billing","secret":"billing"
156 |       }
157 |   name: billing
158 |   namespace: image-controller-system
159 |   resourceVersion: '86424'
160 |   uid: 0e0f30b6-d77e-406f-bfdf-5802db1447a4
161 | spec:
162 |   application: city-transit
163 |   componentName: billing
164 | ```
165 |     
166 |     
167 | 
168 | 
169 | ## Open Questions
170 | 
171 | - What would be a progammatic way to regenerate the main Quay.io API token ?
172 | - Since the long-term goal is to have the Quay.io organizations owned by the user, how do we build a frictionless experience to map the user's 
173 |   account with the user's Quay.io account ?
174 | - Considering the above is figured out, we would need to add existing users as members of the relevant organizations. This is a backend job that 
175 |   would need to be designed and executed at an appropriate time.
176 | 
177 | 
178 | 
179 | ## Consequences
180 | 
181 | - We will be able to deprecate the use of https://quay.io/repository/redhat-appstudio/user-workload for storing users' container images.
182 | - Users will not be forced to put in an image repository location when they use StoneSoup to import and deploy source code.
183 | - The image repository used could conditionally be made available to users outside of StoneSoup.
184 | - Given that scoped Quay.io robot account tokens would be available in user's workspaces for pushing/pulling images, the principle of minimum privilege
185 |   would be met.
186 | 
187 | ## References
188 | 
189 | * Originally drafted in an internal [document](https://docs.google.com/document/d/1KcXWZ8VGUg_iR0RjdGuDYedP8ZW63XCgF26KZUNgpeQ/edit)
190 | * Implementation: https://github.com/redhat-appstudio/image-controller
191 | 


--------------------------------------------------------------------------------
/ADR/0007-change-management.md:
--------------------------------------------------------------------------------
 1 | # 7. Change Management Process
 2 | 
 3 | Date: 2022-11-22
 4 | 
 5 | ## Status
 6 | 
 7 | Accepted
 8 | 
 9 | * Relates to [ADR 17. Use our own pipelines](0017-use-our-pipelines.html)
10 | * Relates to [ADR 20. Source Retention](0020-source-retention.html)
11 | 
12 | ## Approvers
13 | 
14 | * Ann Marie Fred
15 | * Gorkem Ercan
16 | * Ralph Bean
17 | 
18 | ## Reviewers
19 | 
20 | ## Context
21 | 
22 | Red Hat's ESS requirement SEC-CHG-REQ-1 (Change Management) states that "All applications/systems/platforms/services must follow Change Management process and procedures, as applicable / appropriate." Change management is important in order to ensure no unauthorized changes are made to systems or applications, in order to help prevent the purposeful or accidental introduction of security vulnerabilities and an increase in threat attack surface.
23 | 
24 | Because Stone Soup is using a continuous delivery model, we need to ensure that we use a lightweight change management process when it's appropriate, and follow a more intensive change management process when needed. The normal code review process is in effect already. The full change request process will be used once our offering goes into a 24x7 support mode.
25 | 
26 | ## Decision
27 | 
28 | Incremental code changes that are fully tested by automated tests, which will not cause an outage or a significant change to functionality, will follow the normal code review process in their respective code repositories. 
29 | 
30 | To deploy an infrastructure change or a new version of software to staging and production, a developer will make the required change to the infra-deployments repo or the App Interface repo.  Then the change must follow our normal code review process.
31 | 
32 | ### Normal code review process
33 | 
34 | A developer will make their code changes and write automated tests in a Git feature branch.  The developer will raise a Github Pull Request (PR) when they need other team members to review their work. This review could be for work in progress or when the developer feels the work is completed. If the developer wants a code review but feels that their work is not ready to be deployed yet, they will add the "do-not-merge/work-in-progress" label to the PR. We also recommend adding "WIP" to the pull request title.
35 | 
36 | At least one developer who is not the PR author must review and approve the code change. The reviewers will provide comments in Github for suggested changes. The reviewer has a responsibility to verify that the code follows our established best practices and "definition of done". From a change management perspective, the reviewer ensures that:
37 | * We continue to meet our security and privacy requirements. 
38 | * Code is fully exercised by passing unit tests.
39 | * Proper error and audit logging is in place.
40 | * There are no obvious implementation holes or incorrect logic.
41 | * Code is free of excessive "TODO" items.
42 | * Build/deployment/configuration changes are automated where possible, and they have proper operational documentation.
43 | 
44 | We also require that all of our repositories be instrumented with CI test automation that runs when a PR is raised, and after each new commit. The CI checks will install the software and run a suite of automated tests.  The CI checks will also run security scans.  All CI checks must pass before performing a Merge to the main branch, as this updates the staging environment and makes the code available for deployment into production.
45 | 
46 | When the review is completed and CI checks have passed, the approver should Approve the PR in GitHub and the PR author will then Merge the code. 
47 | 
48 | For changes to the infra-deployments repo, the PR author may add the "lgtm" (looks good to me) label to the PR rather than clicking on the Merge button. This will trigger the Prow/Tide deployment automation to create a batch of approved changes and test and deploy them together, usually within an hour. Batching changes avoids merge race conditions.
49 | 
50 | In the rare case that we must deploy a change that doesn't meet these requirements, we will document the reason for the exception in the PR itself, and Github will keep a record of who approved the change.
51 | 
52 | See the Engineering Standards and Expectations document for further details.
53 | 
54 | ### When is a formal change request required?
55 | There are a few cases where we need to use Red Hat's formal Change Enablement process:
56 | * If your work can or will result in a production service becoming unavailable or degraded during service hours.
57 | * If the functionality of the service is changing.
58 | * If you are releasing new software, or a new major version of existing software.
59 | * If you are updating firmware or applying patches to existing infrastructure.
60 | 
61 | This is not meant to be a complete list.  Most activities that impact production environments require a change request to be filed.  
62 | 
63 | A good rule of thumb: consider whether external stakeholders (customers, other service owners, our business owners) would expect advance notice of the change or planned outage. If so, this is the process to notify them.
64 | 
65 | ### Change Sensitivity or EOQ Sensitivity
66 | Change Sensitivity is a period of time where specific applications or services need to remain stable. Sometimes this could be due to major public events (such as Red Hat Summit), and other times it's related to financial close and reporting, such as End of Quarter Sensitivity (EOQ Sensitivity). 
67 | 
68 | During these periods, if a change will impact production infrastructure, sales, financial close, analysis, and financial reporting, the change will need to be approved by additional reviewers as described in the Change Enablement docs. 
69 | 
70 | These dates are tracked in the Developer Tools Pipeline Temperature document and our weekly Program Calls.
71 | 
72 | To avoid duplication of evolving documents, refer to the internal document on [Change Enablement](https://source.redhat.com/departments/it/itx/service_management_automation_platforms/change_enablement) for details about the process for Normal Changes (Low Risk, Medium Risk, High Risk), Standard Changes, Latent Changes, and Accelerated Changes.  Also see the [Change Management FAQ](https://source.redhat.com/departments/it/itx/service_management_automation_platforms/change_enablement/change_enablement_wiki/change_management_faq).
73 | 
74 | ## Consequences
75 | 
76 | Our normal code review process will ensure that all changes are properly tested, reviewed and recorded.
77 | 
78 | When it's needed, the formal change request process will add an additional paperwork burden and delay the code release to production.  Conversely, failure to invoke the formal process when it's necessary could lead to poor outcomes including outages during peak usage times, developers called in to fix outages on company holidays, failure to meet Service Level Agreements, demo failures, angry customers, or lost revenue.
79 | 


--------------------------------------------------------------------------------
/ADR/0009-pipeline-service-via-operator.md:
--------------------------------------------------------------------------------
 1 | # 9. Pipeline Service via Operator
 2 | 
 3 | Created: 2023-09-29
 4 | Last Updated: 2023-09-29
 5 | 
 6 | ## Status
 7 | 
 8 | Proposed
 9 | 
10 | ## Context
11 | 
12 | kcp is no longer being used as a control plane for RHTAP. This means that
13 | "Pipeline Service" cannot be deployed as an independent service. For our
14 | initial MVP, all Tekton APIs need to be deployed onto a standard OpenShift
15 | cluster (specifically OpenShift Dedicated).
16 | 
17 | ## Decision
18 | 
19 | All Tekton APIs will be provided using the stock OpenShift Pipelines operator (OSP).
20 | In the spirit of developing in a "Service First" manner, RHTAP will deploy
21 | a candidate "nightly" release of the operator. The service will be defined in
22 | the [pipeline-service](https://github.com/openshift-pipelines/pipeline-service)
23 | repository, which is then imported into
24 | [infra-deployments](https://github.com/redhat-appstudio/infra-deployments) as
25 | an ArgoCD application.
26 | 
27 | Not all metrics required for operating the service are exposed natively by the
28 | controllers. The `pipeline-metrics-exporter` controller is to be used as a test
29 | bed to expose new metrics, with the goal of upstreaming those metrics as they
30 | mature and prove their value.
31 | 
32 | Configurations that are specific to RHTAP must be made available through
33 | the OSP CRDs. The following changes are specific to RHTAP:
34 | 
35 | - Disable Tekton Triggers. Pipelines as Code will be the favored mechanism for
36 |   event-based triggering of pipelines for now. This decision can be revisited
37 |   in the future based on need.
38 | - Disable the pruner that comes with the operator. Tekton Results will be used
39 |   to prune `PipelineRun` and `TaskRun` data off cluster, thereby ensuring data
40 |   is archived before removal.
41 | - Direct Pipelines as Code to use a URL pattern that displays the `PipelineRun`
42 |   or `TaskRun` info from the Hybrid Application Console (HAC). This ensures
43 |   end users do not need access to the underlying compute cluster(s).
44 | - The Pipelines as Code application name must match the GitHub Application name, so that users understand which GitHubApplication is responsible for triggering the pipelines.
45 | - The GitHub Application secret value, deployed using an ExternalSecret.
46 | - Any configuration related to performance.
47 | 
48 | Furthermore, as the service will be accessed through CodeReadyToolchain (CRT), the
49 | following changes are also specific to RHTAP:
50 | - Deploying a proxy (known as `SprayProxy`) on the CRT host cluster that redirects
51 |   incoming PaC requests to the member clusters. More on SprayProxy [here](0031-sprayproxy.md).
52 | - Providing a plugin to the CRT Proxy so Tekton Results requests are redirected
53 |   to the appropriate member cluster.
54 | 
55 | ## Consequences
56 | 
57 | - Tekton Triggers should be disabled in RHTAP using the appropriate operator
58 |   configuration.
59 | - The Tekton Pruner needs to be disabled in RHTAP using the appropriate
60 |   operator configuration. This is done under the assumption that Results will
61 |   be responsible for pruning resources. Eventually the operator should automate
62 |   this setting if `Results` is deployed and configured to prune resources.
63 | - Pipelines as Code should use an appropriate URL to HAC when interacting with
64 |   SCM services, such as the GitHub
65 |   [Checks API](https://docs.github.com/en/rest/guides/getting-started-with-the-checks-api?apiVersion=2022-11-28).
66 | - Changes to Pipeline components need to be baked into the operator and built
67 |   rapidly.
68 | - Hot fixes need to be provided on a "roll forward" basis. OLM Operators do not
69 |   support "rollback" mechanisms today. Mean time to revert an offending change,
70 |   rebuild, and deploy needs to be measured in hours.
71 | - The version of the deployed operator needs to be configurable via ArgoCD.
72 |   This is should be doable by using `kustomize` to patch the `CatalogSource` and `ImageContentSourcePolicy`.
73 | 


--------------------------------------------------------------------------------
/ADR/0010-namespace-metadata.md:
--------------------------------------------------------------------------------
 1 | # 10. Namespace Metadata
 2 | 
 3 | Date: 2022-12-09
 4 | 
 5 | ## Status
 6 | 
 7 | Accepted
 8 | 
 9 | ## Approvers
10 | 
11 | * Ann Marie Fred
12 | * Gorkem Ercan
13 | 
14 | ## Reviewers
15 | 
16 | * Matous Jobanek
17 | * Ralph Bean
18 | * Alexey Kazakov
19 | 
20 | ## Context
21 | 
22 | We need metadata on our namespaces to make Konflux easier to operate and maintain. Standardizing namespace metadata will make it easier for us to search our logs and metrics across clusters. It will also allow us to enable logging for outgoing network traffic, one of our security requirements.
23 | 
24 | ## Namespace labels
25 | 
26 | We will apply the following labels to Konflux namespaces, to make them easier to identify programmatically. One namespace can have multiple types/labels:
27 | 
28 | - `appstudio.redhat.com/namespacetype: "controller"` for namespaces containing controllers developed for Konflux. For example, we would annotate the `gitops-service-argocd` namespace but not the `openshift-gitops` namespace.
29 | - `appstudio.redhat.com/namespacetype: "user-workspace-data"` for User workspaces where Applications, Components, and so on are stored
30 | - `appstudio.redhat.com/namespacetype: "user-deployments"` for the namespaces where GitOps deploys applications for users
31 | - `appstudio.redhat.com/namespacetype: "user-builds"` for the namespaces where the Pipeline Service manages users' PipelineRun resources
32 | 
33 | The following labels are used for billing and telemetry. Values can be left blank if they are not defined yet:
34 | 
35 | - `appstudio.redhat.com/workspace_name: "test_workspace"` a name for the workspace (unique identifier)
36 | - `appstudio.redhat.com/external_organization: 11868048` the Red Hat orgId of the user account that created the workspace
37 | - `appstudio.redhat.com/product_variant: "Stonesoup"` identifier for the type of product (allows tracking multiple variants in the same metric)
38 | - `appstudio.redhat.com/sku_account_support: "Standard"` Standard, Premium, or Self-Support. Must match the value from the SKU.
39 | - `appstudio.redhat.com/sku_env_usage: "Production"` Development/Test, Production, or Disaster Recovery. Must match the value from the SKU
40 | - `appstudio.redhat.com/billing_model: "marketplace"` must be set to marketplace to indicate this service instance is billed through marketplace. This allows you to mark some instances as billed via marketplace (and some as not billed through marketplace)
41 | - `appstudio.redhat.com/billing_marketplace: "aws"` which marketplace is used for billing
42 | - `appstudio.redhat.com/billing_marketplace_account: 123456789012` the customer account identifier (numeric AWS identifier). Necessary because a customer can have more than one AWS account.
43 | 
44 | The following labels are used by required operators:
45 | 
46 | - `argocd.argoproj.io/managed-by: gitops-service-argocd` is added by the GitOps Service, and is reconciled by the OpenShift GitOps operator. This label enables a (namespace-scoped) Argo CD instance in the `gitops-service-argo` Namespace to deploy to any Namespace with this label.
47 | 
48 | ## Namespace annotations
49 | 
50 | We will apply the following annotation to namespaces installed and maintained by Konflux on the clusters that Red Hat manages.  This will enable OVN network logging to log outgoing network traffic:
51 | 
52 | metadata:
53 |   annotations:
54 |     k8s.ovn.org/acl-logging: '{"deny": "info", "allow": "info"}'
55 | 
56 | ## Consequences
57 | 
58 | We might have to migrate `appstudio.redhat.com` to another product name in the future, but it's the best option we have right now.
59 | 


--------------------------------------------------------------------------------
/ADR/0012-namespace-name-format.md:
--------------------------------------------------------------------------------
 1 | # 12. Namespace Name Format
 2 | 
 3 | Date: 2023-01-23
 4 | 
 5 | ## Status
 6 | 
 7 | Accepted
 8 | 
 9 | ## Context
10 | 
11 | The OSD-based control plane provisions one namespace in the target member cluster for every workspace (internally represented by a Space CR) which is created for a Konflux user. All the namespace names provisioned in this way should have a fixed suffix because of two reasons:
12 | 1. Visual separation of the namespaces provisioned for Konflux workspaces.
13 | 2. Limiting the risk of conflicting with the names used for other namespaces that are present in the cluster - either by default for every OCP/OSD/ROSA cluster or created via other SRE pipelines.
14 | 
15 | ## Decision
16 | 
17 | Every namespace provisioned for an Konflux top-level workspace will have a name with the fixed suffix `-tenant`. The complete format will be `<workspace-name>-tenant`.
18 | Every namespace provisioned for an Konflux environment sub-workspace (created from a `SpaceRequest` CR using `appstudio-env` tier) will have a name with the fixed suffix `-env`. The complete format will be `<sub-workspace-name>-env`.
19 | 
20 | ## Consequences
21 | 
22 | Any changes in the format of the namespace names cause the deletion of all existing namespaces (provisioned for Konflux workspaces), followed by the creation of the namespaces which will use the new format. In other words, all data in the old namespaces will be deleted.
23 | 


--------------------------------------------------------------------------------
/ADR/0014-let-pipelines-proceed.md:
--------------------------------------------------------------------------------
 1 | # 14. Let Pipelines Proceed
 2 | 
 3 | * Date Decided: 2022-05-??
 4 | * Date Documented: 2023-01-31
 5 | 
 6 | ## Status
 7 | 
 8 | Accepted
 9 | 
10 | Relates to:
11 | * [ADR 13. Konflux Test Stream - API contracts](0013-integration-service-api-contracts.html)
12 | * [ADR 30. Tekton Results Naming Convention](0030-tekton-results-naming-convention.html)
13 | * [ADR 32. Decoupling Deployment](0032-decoupling-deployment.html)
14 | 
15 | ## Context
16 | 
17 | The user's build pipeline includes scanning and linting tasks that operate on the source code and
18 | the built image (SAST, antivirus, clair, etc..). The purpose of these tasks is to find problems in
19 | the user's source code or dependencies and alert the user so they can take action ([STONE-459]).
20 | 
21 | One frustration we've heard from users in previous systems is that they don't want to be blocked in
22 | their development or testing by complications in the build system, by compliance concerns. We want
23 | to fix that by offering a system that permits as much progress in the lifecycle of the user's
24 | application (build, test, and pre-production deployment) but which also protects production from
25 | non-compliant builds via mechanisms in the [enterprise contract].
26 | 
27 | A problem we face: in Tekton, a failing TaskRun causes the whole PipelineRun to fail. If the purpose
28 | of our linting and scanning tasks is to find problems - which usually looks like failure with
29 | a non-zero exit code - how do they do their job without constantly breaking the user's build,
30 | stopping them from running integration tests, and stopping them from deploying candidate builds to
31 | their lower [Environments]?
32 | 
33 | ## Decision
34 | 
35 | All scanning and linting TaskRuns should *succeed* even if they find problems in the content they
36 | are evaluating.
37 | 
38 | Use the `TEST_OUTPUT` result convention from [ADR-0030] to expose those results and render them
39 | for users ([STONE-459]).
40 | 
41 | ## Consequences
42 | 
43 | * Users should find that even if their scanners find problems, they can still build, test, and
44 |   deploy to lower [Environments].
45 | * Without special treatment in [STONE-459], users may be misled or confused if their tasks appear to
46 |   succeed but really are reporting errors under the hood.
47 | 
48 | ## Footnotes
49 | 
50 | * We originally made this decision verbally in May of 2022, and have been operating with it as an
51 |   unwritten principle. Documenting it here for posterity, visibility.
52 | 
53 | [STONE-459]: https://issues.redhat.com/browse/STONE-459
54 | [Environments]: ../ref/application-environment-api.html#environment
55 | [ADR-0030]: 0030-tekton-results-naming-convention.html
56 | [enterprise contract]: ../architecture/enterprise-contract.html
57 | 


--------------------------------------------------------------------------------
/ADR/0015-integration-service-two-phase-architecture.md:
--------------------------------------------------------------------------------
  1 | # 15. The Two-phase Architecture of the Integration Service
  2 | 
  3 | Date Documented: 2023-02-09
  4 | Date Accepted: 2023-02-14
  5 | 
  6 | ## Status
  7 | 
  8 | Superseded by [ADR 36. Integration service promotes components to GCL immediately after builds complete](0036-integration-service-promotes-to-GCL-immediately.html)
  9 | 
 10 | ## Context
 11 | 
 12 | The Integration Service is in charge of running integration test pipelines by executing
 13 | the Tekton pipeline for each user-defined IntegrationTestScenario.
 14 | The main payloads that are being tested are Snapshots which contain references to all
 15 | Components that belong to the Application, along with their images.
 16 | 
 17 | One problem faced by the integration service is caused by the fact that testing an
 18 | application with Tekton pipelines takes time. During the course of testing an application,
 19 | multiple component builds can happen in quick succession, leading to a potential race
 20 | condition between different Snapshots that are created for each of those builds.
 21 | This would primarily manifest itself by two Snapshots getting promoted in quick
 22 | succession but neither of them having the latest images contained within them.
 23 | 
 24 | ## Decision
 25 | 
 26 | In order to protect against the race conditions, the integration service will leverage a
 27 | two phase approach to testing. The phases will consist of a Component phase that will
 28 | always be executed and the optional Composite phase which would come into play only when
 29 | race conditions between component builds are detected.
 30 | 
 31 | ### Component phase
 32 | 
 33 | When a single component image is built, the Integration Service tests the application
 34 | by creating a Snapshot. All Components with their images from the Global Candidate List
 35 | are included within the Snapshot and then the Component that was newly built is
 36 | updated/overwritten to complete the Snapshot creation.
 37 | 
 38 | After all test pipelines for the Snapshot finish successfully, the Integration service
 39 | updates the Global Candidate List with the newly built Component image and checks if it
 40 | can promote the Component Snapshot. If the Global Candidate List for other Components
 41 | doesn't match the rest of the Component Snapshot contents, its status is marked as
 42 | invalid and the testing goes into the Composite phase.
 43 | Otherwise, the Component Snapshot is promoted according to user preferences.
 44 | 
 45 | ![](../diagrams/ADR-0015/component-phase.jpg)
 46 | 
 47 | ### Composite phase
 48 | 
 49 | The Composite phase is used when the Global Candidate List changes while testing a
 50 | Snapshot in the Component phase.
 51 | 
 52 | The Composite phase exists to resolve a race condition when teams merge multiple PRs
 53 | to multiple components of the same application at nearly the same time.
 54 | When multiple components are built at the same time, the Integration Service tests
 55 | the application by creating a composite Snapshot using multiple Components updated
 56 | to use the newly built images.
 57 | 
 58 | If all testing pipelines pass successfully, the Composite Snapshot is promoted
 59 | according to user preferences.
 60 | 
 61 | ![](../diagrams/ADR-0015/composite-phase.jpg)
 62 | 
 63 | ## Consequences
 64 | 
 65 | To illustrate the consequences of implementing the above approach,
 66 | we can outline two scenarios, one with a happy path where only a single component
 67 | is built at a time, and one with a race condition where two components are built
 68 | in quick succession.
 69 | 
 70 | ### Happy path with a single component build
 71 | 
 72 | In the happy path without race conditions, one PR merges to one component.
 73 | 1. The Component phase runs, and records results on the **Component** Snapshot.
 74 | 2. The Global Candidate List is updated.
 75 | 3. The Integration Service detects that the Snapshot is up-to-date by checking the
 76 | Global Candidate List and skips the second phase of testing, marking the Snapshot as
 77 | complete.
 78 | 4. The **Component** Snapshot is promoted according to the user preferences.
 79 | 
 80 | ### Path with a race condition
 81 | 
 82 | In the path with race conditions, two PRs merge to two components at the same time.
 83 | 
 84 | 1. Two instances of the Component phase start up in parallel, testing two different
 85 | Snapshots - each one including one of the two merged PRs.
 86 | 2. The first Snapshot finishes testing and detects that the Global Candidate List
 87 | has not changed in the meantime and finalizes the Snapshot in question,
 88 | updating the Global Candidate List with the newly built component image.
 89 | 3. The second Snapshot finishes testing. It is presented with an updated
 90 | Global Candidate List that includes both changes, which have not been tested together yet.
 91 | 4. A **Composite** Snapshot is created that includes both changes and the
 92 | Integration service starts testing it in the Composite phase.
 93 | 5. Composite phase testing runs in full
 94 | 6. The **Composite** Snapshot is promoted according to the user preferences.
 95 | 
 96 | ## Footnotes
 97 | For future consideration, if two components rely on each other to the point that
 98 | breaking changes from one component create issues in another component during testing,
 99 | a new feature to support batching of components together is being investigated.
100 | This would allow for the Integration service to hold off on testing a Component build
101 | until the dependent build is also completed.
102 | 
103 | We originally made this decision verbally and with diagrams back in May of 2022,
104 | and have been operating with it as the operating design since then.
105 | However, we realized (through conversations like
106 | [this slack conversation](https://redhat-internal.slack.com/archives/C02CTEB3MMF/p1669961732100869))
107 | that it is not obvious without documentation.
108 | We are documenting it here as an ADR for posterity, visibility.
109 | 
110 | 


--------------------------------------------------------------------------------
/ADR/0016-integration-service-promotion-logic.md:
--------------------------------------------------------------------------------
 1 | # 16. Promotion logic in the Integration Service
 2 | 
 3 | * Date Documented: 2023-02-08
 4 | * Date Accepted: 2023-02-27
 5 | 
 6 | ## Status
 7 | 
 8 | Superseded by [ADR 32. Decoupling Deployment](0032-decoupling-deployment.html)
 9 | Superseded by [ADR 36. Integration service promotes components to GCL immediately after builds complete](0036-integration-service-promotes-to-GCL-immediately.html)
10 | 
11 | ## Context
12 | 
13 | Before the merge of HACBS & AppStudio, the Konflux build-service created the
14 | ApplicationSnapshot and promoted the content to the user's lowest environment as soon as
15 | the build was completed and once the environment was built.
16 | The integration-service fulfilled the same role for HACBS in addition to running the
17 | IntegrationTestScenario tests first before promoting.
18 | 
19 | Here, we are documenting the decision to consolidate and move the logic at one place to avoid duplicacy
20 | on the actions.
21 | 
22 | Note: This functionality has now been completely dropped from the build-service side after the merger.
23 | 
24 | ### What is promotion logic ?
25 | 
26 | DevOps workflows often automate deployments of applications across different environments to ensure
27 | that the workloads are properly tested before being further promoted to an environment with a higher
28 | service level agreement.
29 | The promotion path can be represented with a directed acyclic graph from the environment with the
30 | lowest SLA to the one with the highest, for example development -> staging -> production.
31 | In Konflux, this promotion logic would be represented by a set of components (container images) defined by
32 | an immutable Snapshot being deployed to the relevant environment.
33 | 
34 | Once the Snapshot is tested and verified successfully, its contents will then be deployed to the user's defined
35 | lowest environments.
36 | 
37 | ## Decision
38 | 
39 | Consolidate the promotion logic for both HACBS & AppStudio and move it to the integration-service after
40 | the merger of Konflux.
41 | 
42 | ![](../diagrams/ADR-0016/promotion-logic.jpg)
43 | 
44 | Upon successful promotion of a Snapshot, the integration service will have:
45 | 
46 | * Updated the Global Candidate List (GCL) with the Components' ContainerImages upon the successful completion
47 |   of the test pipelines for the Snapshot.
48 | * Ensured that Releases associated with the given Snapshot and the ReleasePlan exists, creating one if necessary.
49 | * Ensured that all SnapshotEnvironmentBindings(SEB) for non-ephemeral lowest environments point to the
50 |   newly constructed Snapshot. If no SEBs existed to update, the integration service will have created a new binding
51 |   for each of the environments.
52 | 
53 | Note: In case of no IntegrationTestScenario defined, the testing of the Snapshot will be skipped and the
54 | outcome will be considered as passed followed by the same promotion process defined in the diagram.
55 | 
56 | To get the environment details, Integration service validates all the Environment CRs in the user's namespace
57 | and makes decisions on where to deploy the contents of the Snapshot.
58 | With the current functionality, integration-service can setup to deploy to more than one lowest environments.
59 | 
60 | The “lowest environments” are the Environments with no explicit parentEnvironment set.
61 | 
62 | The Application Service (HAS) and the Gitops Service listen to the SnapshotEnvironmentBinding CR created by the
63 | integration-service and they work together to deploy the Snapshot to the lowest defined environments(dev/stage/something else).
64 | 
65 | In tandem, Integration-service creates the Release for each ReleasePlan with an auto-release label based on the
66 | testing outcomes.
67 | The Release contains the Snapshot and the ReleasePlan which will have the user-supplied definition of where to
68 | release the Snapshot of an Application.
69 | 
70 | Release-service then communicates with the enterprise-contract to ensure the policy is satisfied to promote
71 | the Snapshot content to the production environment.
72 | 
73 | Note: Integration service does not promote the Snapshots originating from PRs, only those originating from
74 | push (merge-to-main) events gets promoted to lowest environments and released.
75 | 
76 | 
77 | ## Consequences
78 | 
79 | * As per this decision, Integration Service now holds the full charge to automatically promote the Snapshot of the
80 |   Application to the user’s defined lowest environments only.
81 |   The integration service doesn't hold the control to make promotions to the non-lowest/production environments.
82 | 
83 | * Once all the tests succeed the Snapshot will always be deployed via a single code path, in a single service.
84 |   This should make future design choices easier to reason about and communicate between teams.
85 | 
86 | 
87 | ## Footnotes
88 | 
89 | The promotion logic has originally been implemented as part of HACBS-802 / HACBS-801 and is currently in action.
90 | This document is created for posterity and visibility.
91 | 
92 | [parentEnvironment]: https://github.com/redhat-appstudio/application-api/blob/5f554103549049bf02c1e344a13f0711081df6a1/api/v1alpha1/environment_types.go#L36-L39
93 | [Global Candidate List]: ../architecture/integration-service.html
94 | [HACBS-802]: https://issues.redhat.com/browse/HACBS-802
95 | [HACBS-801]: https://issues.redhat.com/browse/HACBS-801
96 | 


--------------------------------------------------------------------------------
/ADR/0017-use-our-pipelines.md:
--------------------------------------------------------------------------------
 1 | # 17. Use our own pipelines
 2 | 
 3 | * Date 2023-02-10
 4 | 
 5 | ## Status
 6 | 
 7 | Accepted
 8 | 
 9 | * Relates to [ADR 7. Change Management](0007-change-management.md)
10 | * Relates to [ADR 27. Container Image Management Practice](0027-container-images.md)
11 | 
12 | ## Context
13 | 
14 | The maintainers of Konflux components need to demonstrate evidence of practices that support
15 | a secure software development lifecycle (for example scanning, manifesting, vulnerability detection,
16 | etc.)
17 | 
18 | There are lots of options out there for us to use, notably github actions. However, we're building
19 | a ci/cd platform that is meant to support a secure software development lifecycle from the start.
20 | 
21 | ## Decision
22 | 
23 | Use our own pipelines to build and scan Konflux components. Almost all of our components already
24 | do this today. Look for evidence in the `.tekton/` directory of their git repo.
25 | 
26 | However, we have stopped short of configuring an [Application] and [Components] for Konflux.
27 | We're using the pipelines directly, but not via the Konflux UI. This is something we intend to
28 | start doing, but haven't made time to do so yet.
29 | 
30 | ## Consequences
31 | 
32 | * When asked for evidence that teams are practicing secure development, they can point to Konflux
33 |   pipelines in some cases.
34 | * If our pipelines produce incorrect or erroneous errors, we will be in a position to notice this
35 |   sooner and act to fix them.
36 | * If there are gaps in the user experience, we'll also be in a position to notice this and work to
37 |   improve UX (i.e. [STONE-459](https://issues.redhat.com/browse/STONE-459)).
38 | * We won't get to exercise or benefit from the [integration-service] or the Konflux UI so long as
39 |   we are only using the Konflux build pipelines and not yet onboarding to use of the [Application]
40 |   and [Component] APIs.
41 | * This ADR supports [STONE-434](https://issues.redhat.com/browse/STONE-434).
42 | 
43 | [integration-service]: ../ref/integration-service.html
44 | [Application]: ../ref/application-environment-api.html#application
45 | [Components]: ../ref/application-environment-api.html#component
46 | 


--------------------------------------------------------------------------------
/ADR/0018-apps-continuous-perf-testing.md:
--------------------------------------------------------------------------------
 1 | # 18. Continuous Performance Testing (CPT) of Apps in Konflux
 2 | 
 3 | Date: 2023-03-10
 4 | 
 5 | ## Status
 6 | 
 7 | In consideration
 8 | 
 9 | ## Context
10 | 
11 | In general, performance testing is just another form of testing that helps application teams to ensure there are no regressions in their code and that their application behaves as expected.
12 | 
13 | The IntegrationTestScenario pipelines in Konflux are not suitable for full-blown performance and scale testing (that usually takes lots of time and involves human analysis so it is not suitable for quick automated checks we need for release gating runs in Konflux), but are a good place for a quick and small-scale regression test.
14 | 
15 | What makes performance testing different from functional testing is it is harder to decide if the test passed or failed as every performance test tests different aspects of the application and so it expects different performance, different metrics, different thresholds. Furthermore, even if the measured performance of the app under test did not change, there might be some significant resource usage by the application which we want to cause the test to be marked as failed.
16 | 
17 | The approach to make this pass/fail detection possible proposed here, is to use historical results as a benchmark – not only actual results of the performance test, but also monitoring data about the resource usage, etc.
18 | 
19 | > Example: Imagine you develop a web e-shop application that uses PostgreSQL data backend. Your perf test is browsing through a goods catalog and measures latency of pages. When you want to decide on pass or fail result of this test, you need to check that metrics like below are aligned to previous results of a same test with same configuration:
20 | >
21 | > - Average page load latency.
22 | > - Backend service CPU and memory usage...
23 | > - PostgreSQL CPU and memory usage…
24 | > - Internal PostgreSQL metrics like number of commits during the test or average number of database sessions during the test…
25 | >
26 | > If any of these metrics does not align to historical results, we mark this test result as a failure.
27 | 
28 | And even if some test fails, the application team should be able to review the data and change the test definition, so next time a new test result is being evaluated based on the historical results, this new result will be one of them.
29 | 
30 | The algorithm that says if the current test passes or fails when compared to historical results can vary. It can be:
31 | 
32 | - Using a floating average of historical data and setting a safe range around it and making sure the new result is in that range.
33 | - Some other possibilities might evolve later. Above is just an example used now by some Performance teams.
34 | 
35 | Goal of this ADR is to propose a way for this kind of testing to be implemented in Konflux (feature [STONE-679](https://issues.redhat.com/browse/STONE-679)). Even if it would not deprecate full-blown performance and scale testing, having a release gate with some short and small scale performance test is desirable for many application teams.
36 | 
37 | ### Glossary
38 | 
39 | - SUT: Subject Under Test
40 | - Horreum: a results storage service: <https://github.com/Hyperfoil/Horreum>
41 | - CPT: Continuous Performance Testing
42 | 
43 | ## Decision
44 | 
45 | Let's use this architecture with a single Horreum instance per control plane cluster (as it is similar to what we do for Tekton results).
46 | Horreum instances would be managed by Red Hat and used by tenants on specific cluster.
47 | 
48 | ![Architecture diagram with Horreum](assets/0018-apps-continuous-perf-testing.svg "Architecture diagram with Horreum")
49 | 
50 | 1. Performance test runs in the Tekton pipeline and generates JSON with test parameters and results.
51 | 2. Pipeline gathers configured monitoring metrics and add them to the JSON.
52 | 3. Pipeline uploads the JSON with all the data to Horreum.
53 | 4. Horreum performs result analysis, looking for changes in configured metrics.
54 | 5. Pipeline gets PASS/FAIL decision from Horreum back to the pipeline, so pipeline can return proper result.
55 | 
56 | Although Horreum provides rich web UI for configuring JSON parsing, change detection and data visualization, it will stay hidden to Konflux users. Konflux will expose subset of that functionality in it's own web UI and will talk to Horreum via it's API interface.
57 | 
58 | We need to make a decision about one instance per cluster or one instance per tenant.
59 | 
60 | ## Consequences
61 | 
62 | Pros:
63 | 
64 | - We can provide some best practices to customers wanting to do some basic performance regression testing.
65 | - Horreum is end-to-end solution that already exists, is already used by multiple teams and has documentation for the whole process.
66 | - Konflux UI can display all the trends because historical data will be available in Horreum.
67 | - Red Hat team that develops Horreum is willing to help with this effort.
68 | - Integration scripts needed to run in the pipeline (to gather monitoring data, to upload results to Horreum, to get PASS/FAIL decision from Horreum...) that already exists:
69 |     - Monitor performance workloads.
70 |     - Process the data in Horreum.
71 |     - Gather monitoring data about SUT from Prometheus and bundle it to results that are uploaded to Horreum.
72 | 
73 | Cons:
74 | 
75 | - Somebody has to manage the Horreum instance. Required development time on it. Service instance needs ownership.
76 | - Although Horreum uses Keycloak as well, some changes in the Horreum authentication mechanism might be required to cooperate with Konflux member cluster RBAC.
77 |     - To make sure Horreum users from one tenant are not able to access data from different tenant, created <https://github.com/Hyperfoil/Horreum/issues/420>.
78 | - Horreum is used by multiple teams without any capacity issues, but Horreum itself was not perf&scale tested formally, so there might be some scaling issues.
79 | - We would need to develop Konflux UI to get graphs/results from Horreum and to allow users to configure the change detection parameters for their tests.
80 | - If we need one Horreum instance per workspace, that would require further development work (i.e. operator for provisioning per worksapce, data backup/restore etc).
81 | - As the performance testing pipelines can be some heavy on resources, attribution of costs to users/workspaces might be tricky, but can be measured in a same way as we plan to measure functional tests or build pipelines.
82 | - Attribution of costs to run the Horreum, maybe tracking these can be added to scope of <https://issues.redhat.com/browse/STONE-215>.
83 | - Integration scripts that need to be created:
84 |     - Document / provide a step for integrating the change detection into pipeline.
85 | 


--------------------------------------------------------------------------------
/ADR/0019-customize-url-github.md:
--------------------------------------------------------------------------------
  1 | # 19. Customize URLs Sent to GitHub
  2 | 
  3 | Created Date: 2023-03-29
  4 | Accepted Date: 2023-03-31
  5 | 
  6 | ## Status
  7 | 
  8 | Accepted
  9 | 
 10 | ## Context
 11 | 
 12 | ### Motivation
 13 | 
 14 | When we run builds and tests on PRs (see [STONE-134](https://issues.redhat.com/browse/STONE-134)),
 15 | developers need to be provided a link to the Konflux UI so they can see the details of their
 16 | `PipelineRuns`. This is particularly important when a `PipelineRun` produces an error/failure.
 17 | The current implementation sends links back to the OpenShift developer console on the member
 18 | cluster, which should not be accessed by general public users.
 19 | 
 20 | ### Background
 21 | 
 22 | Users can get a direct link to their PipelineRun which looks like this today:
 23 | https://console.redhat.com/beta/hac/stonesoup/workspaces/adkaplan/applications/adkaplan-demo/pipelineruns/devfile-sample-qtpn-ft8dh.
 24 | Adding `/logs` to the URL lets the user drill in the logs of a particular TaskRun. At present,
 25 | there is no bookmarkable URL which allows users to see the logs of a specific TaskRun. This feature
 26 | may be added in the near future (see [HAC-3370](https://issues.redhat.com/browse/HAC-3307)).
 27 | 
 28 | The following components in Konflux send data/results back to GitHub via the Checks API:
 29 | 
 30 | - Pipelines as Code (PaC)
 31 | - Integration Service
 32 | - Other services which may run Pipelines in response to a GitHub event (ex - potentially Release Service).
 33 | 
 34 | An Konflux workspace is associated with a Kubernetes namespace on a Konflux “member cluster”,
 35 | which is not directly accessible by end users. The namespace name on the member cluster currently
 36 | correlates with the workspace name (a suffix is added). [ADR 10](0010-namespace-metadata.html)
 37 | specifies that a namespace label should exist which links the member cluster namespace back to the
 38 | Konflux workspace.
 39 | 
 40 | Artifacts in Konflux are organized into `Applications` and `Components`. A namespace can have more
 41 | than one `Application`, and an `Application` can have one or more `Components`. At present, each
 42 | `Component` has an associated PaC `Repository` object, though this may be subject to change in the future.
 43 | 
 44 | ### Requirements
 45 | 
 46 | For a given PipelineRun in Konflux, the following information must be identifiable based on
 47 | information in the Konflux member cluster:
 48 | 
 49 | - Workspace ID
 50 | - Application ID
 51 | - Pipeline run ID
 52 | 
 53 | Konflux components that report data back to GitHub must know the root URL for the Konflux UI.
 54 | This URL must be configurable per Konflux deployment (ex - staging, production in
 55 | [infra-deployments](https://github.com/redhat-appstudio/infra-deployments)).
 56 | Workspace and application IDs should _not_ be added to PipelineRun YAML that is added to git
 57 | repositories for PaC. PaC PipelineRun templates should be transportable when a user forks someone
 58 | else’s git repo.
 59 | 
 60 | ## Decision
 61 | 
 62 | Konflux components that provide `PipelineRun` result links to GitHub or other source control
 63 | management systems must provide URLs that are accessible to registered end users who have
 64 | permission to view the requested `PipelineRun` resource.
 65 | 
 66 | ## Consequences
 67 | 
 68 | ### Dev Sandbox: Member Cluster Labels
 69 | 
 70 | Member cluster namespaces MUST have the `appstudio.redhat.com/workspace_name` label, in accordance
 71 | with [ADR 10](0010-namespace-metadata.html).
 72 | 
 73 | ### Pipelines as Code: Customize URLs per Repository
 74 | 
 75 | Pipelines as Code's `Repository` custom resource will be extended to let users/service providers
 76 | customize the `PipelineRun` URL sent to SCM providers. This will take advantage of the upcoming
 77 | parameters feature in Pipelines as Code (see [SRVKP-2940](https://issues.redhat.com/browse/SRVKP-2940)).
 78 | 
 79 | The mechanism for how the console URL can be templated using parameters will be documented upstream
 80 | and published [online](https://pipelinesascode.com). Konflux will configure Pipelines as Code to
 81 | use the following parameters in the console URL templates:
 82 | 
 83 | - `workspace`
 84 | - `application`
 85 | 
 86 | ### Build Service: Propagate Konflux Data to Repository CR
 87 | 
 88 | The Konflux build service should be enhanced as follows:
 89 | 
 90 | - When the `Component` CR is created/reconciled, add the following parameters to the respective
 91 | `Repository`:
 92 |   - `workspace` - the Konflux workspace (`appstudio.redhat.com/workspace_name` label on namespace)
 93 |   - `application` - the Konflux application name for the `Component`.
 94 | 
 95 | ### Integration Test Service: Customize URLs
 96 | 
 97 | The Integration service should use similar mechanisms as Pipelines as Code to customize the URL
 98 | sent back to the respective SCM provider. Namely:
 99 | 
100 | - Use the same custom console URL
101 | - Use similar templating mechanisms for getting the PipelineRun URL - example by inferring the
102 |   Konflux workspace from the appropriate namespace and application via the parent Application
103 |   for a respective component.
104 | 
105 | ### Privacy Impact
106 | 
107 | Workspace names might contain identifying information (ex - usernames). This could be a privacy
108 | concern if these identifiers are distributed publicly via URLs submitted to GitHub or other public
109 | source control repositories. This potential disclosure of identifying information should be
110 | presented to end users.
111 | 
112 | This concern is mitigated if workspace names are changed to be an arbitrary string, or a hash of a
113 | known identifier (see [ADR 06](0006-log-conventions.html)).
114 | 


--------------------------------------------------------------------------------
/ADR/0020-source-retention.md:
--------------------------------------------------------------------------------
 1 | # 20. Source Retention
 2 | 
 3 | Date: 2023-04-04
 4 | 
 5 | ## Status
 6 | 
 7 | Accepted
 8 | 
 9 | Relates to [ADR 7. Change Management](0007-change-management.html)
10 | 
11 | ## Context
12 | 
13 | Red Hat's SSML requirements "SSML.PS.1.1.1 Securely Store All Forms of Code" requires that "The
14 | revision and its change history are preserved indefinitely and cannot be deleted, except when
15 | subject to an established and transparent policy for obliteration, such as a legal or policy
16 | requirement."
17 | 
18 | We intend for the Konflux pipeline to support this requirement in
19 | [RHTAP-107](https://issues.redhat.com/browse/RHTAP-107). Since we [Use our own pipelines (ADR
20 | .17)](0017-use-our-pipelines.html), this would satisfy the control for us, if it were implemented.
21 | 
22 | So long as it is not yet implemented, we need a policy (an "administrative control" rather than
23 | a "technical control") that precribes how our own source repositories must be managed.
24 | 
25 | ## Decision
26 | 
27 | The source history of branches used to build Konflux components (usually the `main` branch), must
28 | not be overwritten or deleted.
29 | 
30 | This practice can be supported by enabling branch protection rules on a repo by repo basis that
31 | prohibit force pushing and branch deletion for protected branches.
32 | 
33 | ## Consequences
34 | 
35 | * So long as [RHTAP-107](https://issues.redhat.com/browse/RHTAP-107) is not implemented, we will
36 |   need to abide by this administrative control, increasing the number of rules that team members
37 |   need to remember.
38 | * Github branch protection rules can help reduce that cognitive load, but could be accidentally
39 |   disabled if this ADR is not frequently discussed or referenced.
40 | 


--------------------------------------------------------------------------------
/ADR/0021-partner-tasks.md:
--------------------------------------------------------------------------------
 1 | # 21. Partner Tasks in Build/Test Pipelines
 2 | 
 3 | Date: 2023-03-06
 4 | 
 5 | ## Status
 6 | 
 7 | Accepted
 8 | 
 9 | ## Context
10 | 
11 | * As a Red Hat Partner, I would like to offer our service's capability as a Tekton Task that would be executed in a user's build/test Pipeline on StoneSoup.
12 | * As a StoneSoup maintainer, I would like to provide a way to ensure the above Tekton Task is validated and approved for use in the build/test Pipeline on StoneSoup.
13 | 
14 | Associated business ask
15 | https://issues.redhat.com/browse/STONE-549
16 | 
17 | ## Decision
18 | 
19 | ### Plumbing
20 | 
21 | 1. Setup a new Git high-level directory in https://github.com/redhat-appstudio/build-definitions for partners to contribute Tasks to.
22 | 2. Define a directory structure for Task submissions as manifests in the yaml format.
23 | 3. Configure a CI job that validates the Tasks upon opening of a pull request.
24 | 4. Optionally, configure a CI job that generates an OCI artifact consumable in a Tekton Pipeline.
25 | 
26 | ### Day-to-day operations
27 | 
28 | #### Adding/Updating a Task
29 | 
30 | 1. Partner opens a PR with a new/updated Task.
31 | 2. CI tests do the due diligence on the changes proposed in the PR. Success/Failures are reported in a way that the PR author can take reasonable
32 | action to resolve the issue.
33 | 3. Upon approval from the Build/Test team, the changes are merged.
34 | 
35 | #### Revoking a Task
36 | 1. Open a PR to delete the Task.
37 | 2. The Build/Test team reviews the PR and merges it.
38 | 3. The Build/Test team updates the https://github.com/redhat-appstudio/build-definitions to remove references to the Task's OCI image whenever it is reasonable to do so.
39 | 
40 | #### Definition of a valid Task
41 | 
42 | The due diligence is a transparent-to-the-Task-contributor CI job that's running on the repository that validates the Task before we merge it in.
43 | 
44 | Please see the following as prior art:
45 | 1. See CI Results in https://github.com/tektoncd/catalog/
46 | 2. https://github.com/k8s-operatorhub/community-operators/
47 | 2. https://github.com/openshift-helm-charts/charts
48 | 
49 | A non-exhaustive list of checks that would be run on a Task is:
50 | 
51 | * Linting
52 | * Scanning
53 | * No privilege escalation / or no requirements that need unreasonable privileges.
54 | * Should be integratabtle into the build pipeline, ie, works with the inputs/outputs we have today.
55 | * Should work with reasonable defaults.
56 | * Should be skip-able if credentials/tokens are missing.
57 | * *TBD*
58 | 
59 | ## Out-of-scope
60 | 
61 | * Supporting validation of Tasks inside Stonesoup before submission would be out-of-scope. However, partners should be able to import a
62 | Component into StoneSoup, customize their Pipeline definition in the .tekton directory and have the changes validated in a PipelineRun execution in StoneSoup. To be able to be productive with this flow, they'd need to be able to do https://github.com/redhat-appstudio/architecture/pull/64 .
63 | 
64 | ## Alternatives
65 | 
66 | * ~Use the github.com/redhat-appstudio/build-definitions for Task submissions by partners : This is being considered in the short-term, either way, the day-to-day operations will not quite change~ - this has been promoted to be the primary design.
67 | * Use Tekton Hub for host Tasks : Tekton Hub is being deprecated.
68 | 
69 | 
70 | ## Consequences
71 | 
72 | * We have a mechanism to take in Tekton Tasks from Partners with due diligence.
73 | * We should also be able to take in Tekton Tasks from Red Hat teams with the same level of validation/diligence that Red Hat Partners would have to go through.
74 | * We do build up a little tech-debt because this process needs to be merged with the official Red Hat certification process in the future.
75 | 
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/ADR/0023-git-references-to-furnish-integration-test-scenarios.md:
--------------------------------------------------------------------------------
 1 | # 23. Git references to furnish Integration Test Scenarios
 2 | 
 3 | * Date Documented: 2023-04-10
 4 | * Date Accepted: 2023-04-21
 5 | 
 6 | ## Status
 7 | 
 8 | Approved
 9 | 
10 | ## Context
11 | 
12 | Up to now, the Integration service has only supported setting the Tekton bundle (Bundle string `json:"bundle"`) in the 
13 | IntegrationTestScenario [CR] as a reference for the integration tests, in order to run the Tekton PipelineRuns.
14 | 
15 | Below is the existing format to supply the Integration test references:
16 | 
17 | ![](../diagrams/ADR-0023/tekton-bundle-ITS.jpg)
18 | 
19 | Building and maintaining Tekton bundles is cumbersome for users when providing custom tests.
20 | 
21 | Resolving tasks from a git repository is more natural for developers and allows for faster iteration. Hence, users
22 | should be able to reference their Tekton pipeline definitions that are hosted in git repositories ([RHTAP-402]).
23 | 
24 | To support the functionality of using git references, Tekton resolvers need to be enabled. As part of [PLNSRVCE-1030], 
25 | pipeline service adds support for the [Tekton-resolvers] including git resolvers which should in turn give us the ability
26 | to expand the support for resolvers in the IntegrationTestScenarios.
27 | 
28 | ## Decision
29 | 
30 | Integration service now migrates to the new version of the IntegrationTestScenario CRD having the expanded functionality
31 | to allow using different types of Tekton resolvers ex: bundles-resolver, cluster-resolver, hub-resolver along with 
32 | the git-resolver. This gives the extensibility to permit the use of different resolvers in the future.
33 | 
34 | The new format of adding the Integration Test Scenarios would look like:
35 | 
36 | 
37 | ![](../diagrams/ADR-0023/git-references-ITS.jpg)
38 | 
39 | Users git repository path to their test code can now provide the information or the location of their IntegrationTestScenarios
40 | through Git references like **Github URL**, **Git Options** (**Revision** & **Path in repository**). 
41 | 
42 | The information provided by the users in the above form will then be consumed by the integration service with the help of IntegrationTestScenario CR and will be used to run the integration tests against the Application Snapshot in question.
43 | 
44 | Example of the tekton pipeline definition with git resolver:
45 | 
46 | ![](../diagrams/ADR-0023/tekton-pipeline-definition-git-resolver.jpg)
47 | 
48 | The existing IntegrationTestScenarios will be migrated to the new API version with tekton resolvers, by using the 
49 | standard kubernetes conversion webhooks ([STONEINTG-386]).
50 | 
51 | ## Open Questions
52 | 
53 | Addressing the impact of malicious git [commits] to help protect (or warn) users against them is not being addressed
54 | as part of this feature and still open for further discussion.
55 | 
56 | Can the validation be added as security improvement from Tekton side and later adopt by Integration service ?
57 | 
58 | ## Consequences
59 | 
60 | * Better user experience through Tekton pipeline definitions that are hosted in git repositories which are easier to 
61 | maintain as opposed to building their own Tekton bundle images every time they adjust their test. 
62 | It also removes the need for our users to have to understand what a Tekton bundle is and learn how to build it, 
63 | if they don't already know.
64 | 
65 | * Resolvers are currently in Technology Preview for the OpenShift Pipelines 1.9. Technology Preview features are not
66 | supported with Red Hat production service level agreements (SLAs) and might not be functionally complete.
67 | Red Hat does not recommend using them in production. These features only provide early access to upcoming product features,
68 | enabling customers to test functionality and provide feedback during the development process.
69 | 
70 | 
71 | ## Footnotes
72 | 
73 | * The risk mentioned in the consequences has been [accepted] by the PM(s).
74 | * There will be no direct support for the Tekton bundles. But the IntegrationTestScenrios are expandable to make 
75 | use of bundle resolvers, if needed.
76 | 
77 | 
78 | [CR]: https://redhat-appstudio.github.io/architecture/ref/integration-service.html#integrationtestscenariospec
79 | [RHTAP-402]: https://issues.redhat.com/browse/RHTAP-402
80 | [PLNSRVCE-1030]: https://issues.redhat.com/browse/PLNSRVCE-1030
81 | [Tekton-resolvers]: https://tekton.dev/vault/pipelines-main/resolution/
82 | [STONEINTG-386]: https://issues.redhat.com/browse/STONEINTG-386
83 | [commits]: https://github.com/tektoncd/pipeline/issues/6315
84 | [accepted]: https://issues.redhat.com/browse/RHTAP-625?focusedId=21972490&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-21972490


--------------------------------------------------------------------------------
/ADR/0024-release-attribution.md:
--------------------------------------------------------------------------------
 1 | # 24. Release Objects Attribution Tracking and Propagation
 2 | 
 3 | Date: 2023-04-25
 4 | 
 5 | ## Status
 6 | 
 7 | Accepted
 8 | 
 9 | ## Context
10 | 
11 | It is imperative to know, given a change that has been delivered to a production environment, which individual it can be attributed to, in order to have accountability for that change.
12 | This information needs to be propagated through the release process and needs to be flexible enough to specify attribution
13 | either on a per-release basis or as a standing attribution.
14 | 
15 | ## Decision
16 | 
17 | An admission webhook will be used to capture and track the user information provided in the Kubernetes request. This user
18 | will be validated against Red Hat's SSO.
19 | 
20 | ### Architecture Overview
21 | 
22 | #### Terminology
23 | 
24 | * `Release`: A CRD that describes what should be released and using which plan.
25 | * `ReleasePlan`: A CRD that describes where an application should be released.
26 | * `ReleasePlanAdmission`: A CRD that describes which pipeline to execute along with specific pipeline parameters.
27 | 
28 | #### Architecture
29 | 
30 | ![](../diagrams/ADR-0024/flowchart.jpg)
31 | 
32 | ##### Capturing attribution data
33 | 
34 | The end goal of the ADR is to permit the release-service's `Pipeline`'s `Tasks` to have access to the name of the
35 | individual to which a change in production can be attributed. There are two kinds of attribution involved:
36 | 
37 | ###### Attribution for a particular Release
38 | 
39 | It will be obtained by capturing the user that created the object. This information is available for requests intercepted
40 | by admission webhooks. The `Release` CR will be updated with the label `release.rhtap.openshift.io/author` using the
41 | username that triggered the requests as its value.
42 | 
43 | As automated `Releases` would also obtain this label, it's important to add a new label to indicate their nature. Automated
44 | `Releases` are exclusively created by the Integration service, which will have to add the label
45 | `release.rhtap.openshift.io/automated` and set it to true so the author label is not set in the webhook.
46 | 
47 | ###### Standing Attribution for an Application
48 | 
49 | It will be obtained by capturing the user that creates or updates the `ReleasePlan` CR. This will be done exclusively when
50 | the resource has the label `release.rhtap.openshift.io/standing-attribution` set to `true`.
51 | 
52 | Since we expect ReleasePlans to be updated by users who may not intend to provide a `Standing Attribution`
53 | (i.e. Change `ReleasePlanAdmission`), it is important to keep track of author information when the annotation is set.
54 | ReleasePlans will make use of the same label described above (`release.rhtap.openshift.io/author`) to keep track of the
55 | user information.
56 | 
57 | ##### Changes in the Release reconciliation
58 | 
59 | The inclusion of attribution labels expands the validation phase. When a `Release` is reconciled, it must follow this
60 | process:
61 | * Verify that the `release.rhtap.openshift.io/author` label is present.
62 |   * If not present, consult the `ReleasePlan` to look if it is present there.
63 |   * If the author information is not present in either of them, fail with `ValidationError`.
64 |   * If found, copy the author label to the Release and add the `status.attribution.author`. The field
65 | `status.attribution.standingAttribution` should be also set to `true` in the case the author is coming from the
66 | `ReleasePlan`.
67 | * Verify if the author is a "real" `User` as opposed to a `ServiceAccount`. Given an author value, the system will query
68 | the SSO service to determine that the user is known for the SSO service. This rules out **kube:admin** and any other
69 | service account. It will also help weed out the cases where the user has left the organization.
70 | * Update `status.attribution.verified` and set it to `true` if verification passed. Otherwise, fail with a `ValidationError`.
71 | 
72 | At this point, the `status.attribution` will be populated and the author is validated to be a real user. The Release
73 | Service operator makes the `Release` along with the attribution info available to the Release Pipelines.
74 | 
75 | This validation process will not be performed when `status.attribution.verified` is set to true to reduce the number
76 | of requests to the SSO service.
77 | 
78 | ##### Removing attribution data
79 | 
80 | A `Standing Attribution` for an Application can be deleted by a user when the `ReleasePlan` CR has the label
81 | `release.rhtap.openshift.io/standing-attribution` set to `false` or removed.
82 | 
83 | ## Consequences
84 | 
85 | * The SSO service interaction must be adequately efficient to ensure the Release process is not delayed.
86 | * The system needs to handle the situation whereby a `User` specified in the `ReleasePlan` is not longer a valid user.
87 | 


--------------------------------------------------------------------------------
/ADR/0025-appstudio-pipeline-serviceaccount.md:
--------------------------------------------------------------------------------
 1 | # 25. appstudio-pipeline Service Account
 2 | 
 3 | Date: 2023-05-30
 4 | 
 5 | ## Status
 6 | 
 7 | Accepted
 8 | 
 9 | ## Context
10 | 
11 | A default service account must be provided to allow Konflux components to run pipelines.
12 | While OpenShift Pipelines has the option to automatically create a `pipeline` ServiceAccount on any namespace, the permissions granted to the account are overly broad and the solution was rejected after a security review.
13 | Therefore Konflux must manage this default service account.
14 | 
15 | ## Decision
16 | 
17 | Konflux will provide a service account named `appstudio-pipeline`.
18 | 
19 | ### Ownership
20 | 
21 | The Pipeline Service component owns the `appstudio-pipeline-scc` ClusterRole.
22 | 
23 | The CodeReadyToolchain is in charge of:
24 | * creating the `appstudio-pipeline` ServiceAccount on all tenant namespaces,
25 | * creating the `appstudio-pipeline-runner` ClusterRole,
26 | * granting the `appstudio-pipeline-runner` and `appstudio-pipeline-scc` ClusterRoles to the `appstudio-pipeline` ServiceAccount.
27 | 
28 | ### ClusterRoles
29 | 
30 | #### appstudio-pipeline-runner
31 | 
32 | The resource is defined [here](https://github.com/codeready-toolchain/member-operator/blob/master/config/appstudio-pipelines-runner/base/appstudio_pipelines_runner_role.yaml).
33 | 
34 | #### appstudio-pipeline-scc
35 | 
36 | The resource is defined [here](https://github.com/openshift-pipelines/pipeline-service/blob/main/operator/gitops/argocd/pipeline-service/openshift-pipelines/appstudio-pipelines-scc.yaml).
37 | 
38 | ## Consequences
39 | 
40 | * Tekton Pipelines users using the `pipeline` service account must migrate to the new `appstudio-pipeline` ServiceAccount.
41 | 


--------------------------------------------------------------------------------
/ADR/0026-specifying-ocp-targets-for-fbc.md:
--------------------------------------------------------------------------------
 1 | # 26. Specifying OCP targets for File-based Catalogs
 2 | 
 3 | * Date 2023-06-08
 4 | 
 5 | ## Status
 6 | 
 7 | Accepted
 8 | 
 9 | ## Context
10 | 
11 | One of the supported component types within Konflux are [File-based Catalogs (FBC)].
12 | These catalogs can either be used in isolation with a version of `opm` packaged in the container
13 | itself or in conjunction with other catalog configurations via a service such as
14 | [IIB Image Builder]. Red Hat OpenShift Container Platform (OCP) is one example of a platform that
15 | leverages FBCs for defining the operator graphs. In order to enable operator support to vary on a
16 | version-by-version basis, Red Hat maintains one catalog per OpenShift version.
17 | 
18 | In order to support being able to target FBC components to specific versions of Red Hat OpenShift,
19 | Konflux needs to be able to keep track of the specific targeted version. In addition to the concerns
20 | around releasing FBC components to OpenShift, the version of `opm` used by each version of OpenShift
21 | may differ, so the Konflux integration process will need to ensure that tests are run using an appropriate
22 | binary version.
23 | 
24 | ## Decision
25 | 
26 | All FBC components intending to be released to OCP will be built using a OCP-specific parent image containing
27 | the target version number as a tag. This will result in a `FROM` instruction like
28 | 
29 | ```
30 | # The base image is expected to contain
31 | # /bin/opm (with a serve subcommand) and /bin/grpc_health_probe
32 | FROM registry.redhat.io/openshift4/ose-operator-registry:v4.12
33 | ```
34 | 
35 | While the annotation "org.opencontainers.image.base.name" is populated by buildah, any additional image build
36 | processes for the FBC components will also need to include this annotation indicating the pullspec of the base
37 | image. The annotation will enable all components to use `skopeo` to inspect the artifact to retrieve the pullspec:
38 | 
39 | ```bash
40 | $ skopeo inspect --raw docker://quay.io/hacbs-release-tests/managed-release-team-tenant/sample-fbc-application/sample-fbc-component@sha256:da4bf45ba45b72aa306dc2889572e92bbac43da08de0a0146e2421f506c5517e | jq
41 | {
42 |   "schemaVersion": 2,
43 |   "mediaType": "application/vnd.oci.image.manifest.v1+json",
44 |   "config": {
45 |     "mediaType": "application/vnd.oci.image.config.v1+json",
46 |     "digest": "sha256:c83abcfb3af92d9b8ccea573fce6560a90919e77a8024c8269969b7799a2385c",
47 |     "size": 21327
48 |   },
49 |   "layers": [
50 |     [...]
51 |   ],
52 |   "annotations": {
53 |     "org.opencontainers.image.base.digest": "sha256:e5a07eff6865b2761889ee275d9fc940237c90d05d63b00f60350841ecf42df2",
54 |     "org.opencontainers.image.base.name": "registry.redhat.io/openshift4/ose-operator-registry:v4.12"
55 |   }
56 | }
57 | ```
58 | 
59 | The target Red Hat OpenShift version will then be able to be pulled from the image tag on the
60 | "org.opencontainers.image.base.name" annotation. If a task within the Konflux pipeline needs to access
61 | an appropriate `opm` binary for performing validation, it can determine the base image and use the binary from
62 | that container if it is trusted (for example, if it is an image from the
63 | `registry.redhat.io/openshift4/ose-operator-registry` repository), or fail if the base image isn't trusted.
64 | 
65 | 
66 | ## Consequences
67 | 
68 | * Konflux services should be able to avoid directly using the `opm` version packaged in FBC components
69 |   to prevent the execution of untrusted binaries by a process in the trusted control plane.
70 | * No additional kubernetes objects need to be created to track the target OCP versions
71 | * There is a desire to use [FBC templates] within Konflux in the future. The current decision can be
72 |   re-evaluated if and when that functionality is introduced.
73 | 
74 | [FBC templates]: https://olm.operatorframework.io/docs/reference/catalog-templates/
75 | [File-based Catalogs (FBC)]: https://olm.operatorframework.io/docs/reference/file-based-catalogs/
76 | [IIB Image Builder]: https://github.com/release-engineering/iib
77 | 


--------------------------------------------------------------------------------
/ADR/0027-availability-probe-framework.md:
--------------------------------------------------------------------------------
  1 | # 27. Availability Probe Framework
  2 | 
  3 | * Date 2023-07-06
  4 | 
  5 | ## Status
  6 | 
  7 | Accepted
  8 | 
  9 | ## Context
 10 | 
 11 | As an Konflux developer building functionality for the platform, I want to be able to
 12 | easily visualize and comprehend the stability and availability of deployed systems in
 13 | order to inform and influence future work towards improving the overall system
 14 | reliability.
 15 | 
 16 | Such indication should tell us the overall uptime of Konflux with respect to services
 17 | under the control of Konflux developers.
 18 | 
 19 | Konflux is defined to be available at a given moment if all of its components are
 20 | reporting to be available at that given moment, and unavailable otherwise.
 21 | 
 22 | A component is defined to be available at a given moment if all of its availability
 23 | probes are reporting to be available at that given moment, and unavailable otherwise.
 24 | 
 25 | A convention is required for providing the availability of a probe.
 26 | 
 27 | Once this is in place, those indicators can be aggregated in order to report the overall
 28 | availability of Konflux.
 29 | 
 30 | ## Decision
 31 | 
 32 | Probes' availability will be provided as a Prometheus metric. The metric will contain
 33 | metric labels to allow differentiating between the different probes. When exported out
 34 | of the cluster of origin, additional labels will be attached to the metric to mark the
 35 | cluster in which the metric was generated.
 36 | 
 37 | ### Details
 38 | 
 39 | The availability Prometheus metric will be computed for each component based on the exit
 40 | status of the latest execution of the CronJobs evaluating the component's availability.
 41 | Component owners will provide the implementation for each component's CronJobs. By
 42 | adhering to a specific standard
 43 | ([see naming convention below](#Probes-Naming-Convention)),
 44 | results will be aggregated into a standardized Prometheus metric to report on
 45 | availability (i.e. component owners will not be required to provide the translation
 46 | mechanism).
 47 | 
 48 | It is up for each team to define what it means for its component(s) to be available.
 49 | Fundamentally, a component should be reported as available as long as it's capable of
 50 | providing the service it aims to provide, and unavailable otherwise.
 51 | 
 52 | Each team will define CronJobs that will test the availability of their components.
 53 | The Job started by each CronJob will terminate successfully if the test completes
 54 | successfully, and will terminate with an error in case the test fails.
 55 | 
 56 | Kube-state-metrics is a Prometheus exporter generating metrics based on the Kubernetes
 57 | API server. It generates
 58 | [Jobs](https://github.com/kubernetes/kube-state-metrics/blob/main/docs/job-metrics.md)
 59 | and
 60 | [CronJob](https://github.com/kubernetes/kube-state-metrics/blob/main/docs/cronjob-metrics.md)
 61 | metrics that will be processed using Prometheus recording rules in order to
 62 | generate Konflux's availability Prometheus metric.
 63 | 
 64 | A single set of rules will be defined globally which will apply for all CronJobs.
 65 | 
 66 | The resulting Prometheus metric will identify each probe based on its CronJob name and
 67 | the namespace in which the CronJob was defined.
 68 | 
 69 | #### Probes Requirements
 70 | 
 71 | A Job running for a probe is required to:
 72 | 
 73 | * Evaluate the availability of the component being probed.
 74 | * Exit with status `Failed` if the component was evaluated to be unavailable or with
 75 |   status `Complete` if it was evaluated to be available (referring to the status field
 76 |   of the Kubernetes Job resource).
 77 | * Be able to exhaust its `backoffLimit` by the time it's due to run again (e.g. if the
 78 |   cronjob is defined to run every 10 minutes and can take up to 2 minutes to execute
 79 |   it cannot have a `backoffLimit` larger than 4).
 80 | * Clean up all resources generated during its run.
 81 | 
 82 | #### Probes Naming Convention
 83 | 
 84 | To allow generating the Prometheus metric only for relevant CronJobs, probe CronJob
 85 | names should have a standardized format:
 86 | 
 87 | * `appstudio-probe-<probe_name>`
 88 | 
 89 | `appstudio-probe-` being a literal to be used in order to capture only the relevant
 90 | CronJobs, and `<probe_name>` is a variable to be translated to a label in the resulting
 91 | Prometheus metric that will correlate the value to the individual probe or check.
 92 | 
 93 | To allow aggregating the Prometheus availability metric per component, the namespaces
 94 | in which the CronJobs will be created should have a standardized format:
 95 | 
 96 | * `appstudio-probe-<component_name>`
 97 | 
 98 | `<component_name>` is the name of the component under which probes will be aggregated.
 99 | 
100 | The nature and size of each service and component will dictate the number of probes it
101 | should have. E.g. some services may have multiple components, while some others may have
102 | just one. Some components may require multiple probes while others may require just one.
103 | 
104 | > **_NOTE:_** The probe-name part of the CronJob name should be unique in the context of
105 | the CronJob's **namespace**.
106 | 
107 | #### Probes Design Considerations
108 | 
109 | Considerations for defining probes' CronJobs:
110 | 
111 | * Where should it run in order to provide reliable evaluation?
112 |     * Which namespace?
113 |     * Which clusters?
114 | * What sort of permissions does it require to have?
115 |     * Would that be a good-enough representation of what it aims to evaluate?
116 | * What sort of APIs does it need to access?
117 | * How often should it run?
118 | * Does it affect performance in a reasonable manner?
119 | * How resources are to be cleaned up?
120 |     * Upon startup? completion? failure?
121 |     * Using another CronJob? Finalizers?
122 | 
123 | ## Consequences
124 | 
125 | * The different teams for all Konflux services will define the CronJobs required for
126 |   testing their components' availability, and will name them according to the naming
127 |   convention.
128 | * A single set of Prometheus recording rules will be defined for transforming the
129 |   CronJob results into availability Prometheus metric time series.
130 | * Existing Prometheus alerting rules should be examined and adjusted so that they do not
131 |   generate unnecessary alerts caused by CronJobs failures.
132 | 


--------------------------------------------------------------------------------
/ADR/0027-container-images.md:
--------------------------------------------------------------------------------
 1 | # 27. Container Image Management Practice
 2 | 
 3 | Date: 2023-06-30
 4 | 
 5 | ## Status
 6 | 
 7 | Proposed
 8 | 
 9 | * Relates to [ADR 17. Use our own pipelines](0017-use-our-pipelines.md)
10 | 
11 | ## Context
12 | 
13 | The purpose of this document is to establish container image management practices for Konflux container images that are deployed in the staging and production environments.  The goal is to ensure that Konflux is continuously maintaining secure operations that are in accordance with the ESS SEC-PATCH-REQ-2 (OS Patching) requirements.
14 | 
15 | ### Scope
16 | * The scope of this process is limited to the images found in our [quay.io/organization/redhat-appstudio](https://quay.io/organization/redhat-appstudio) repository.
17 | * Images from dependencies that fall outside of this Konflux process should follow the [ESS Security Patching at Application/OS Level (requirements 27 and 28)](https://drive.google.com/file/d/1P6-q2HJxA3yZhykaI29gF2IV4avzxtjM/view).  It is up to the component teams to ensure they are adhering to these requirements.
18 | * Images that are not intended for the staging and/or production environments are out of scope.
19 | 
20 | 
21 | ## Decision
22 | 
23 | ### Role
24 | 
25 | **Component Team**: Develops and maintains components that are built as images and deployed as part of Konflux
26 | 
27 | 
28 | ### Responsibilities
29 | 
30 | #### Automated Build and Scanning
31 | ###### Onboard to Pipelines As Code (PaC)
32 | 
33 | Component Teams are responsible for ensuring their container images are continuously built and scanned for vulnerabilities by following the
34 | [Extending the Service](https://redhat-appstudio.github.io/infra-deployments/docs/deployment/extending-the-service.html) process to onboard their component to the PaC service.
35 | 
36 | ***
37 | #### Container Images <br>
38 | 
39 | ###### Trigger Builds
40 | Under the PaC service, images are rebuilt when there are updates to the component’s git repository but additional configuration is needed in the Dockerfile to ensure the underlying base (UBI) images are updated with the latest packages (see the [HAS example](https://github.com/redhat-appstudio/application-service/blob/main/Dockerfile#L24])) or at the very least, the latest [security updates](https://developers.redhat.com/articles/2021/11/11/best-practices-building-images-pass-red-hat-container-certification#best_practice__5__include_the_latest_security_updates_in_your_image).   This will minimize the gap between patching and should meet our CVE timelines as long as the repository is active.
41 | 
42 | Component teams are encouraged to install [renovatebot](https://github.com/renovatebot/renovate) to keep their dependencies up to date.
43 | 
44 | ###### Scheduled Builds
45 | 
46 | Since image updates are based on how active our repos are, there is the risk that over time, as code stabilizes and/or enters maintenance mode, the triggers for rebuilds will be less frequent which will cause the images to degrade.  To avoid this, component teams should also ensure there are scheduled, weekly builds or builds driven by renovatebot or dependabot in place.
47 | 
48 | 
49 | ###### New Components
50 | Newly onboarded components are required to use a fully supported and patched major version release for their base images per ESS SEC-PATCH-REQ-2 requirement #3.  Installing [renovatebot](https://github.com/renovatebot/renovate) can
51 | help achieve this requirement.
52 | 
53 | ***
54 | 
55 | #### Vulnerability Alerts
56 | 
57 | It is recommended that component teams set up notifications to receive vulnerability alerts that are at least **_medium_** severity.  This can be done in a couple of ways:
58 | 
59 | * Set up an [alert in quay.io](https://docs.quay.io/guides/notifications.html) which supports email and Slack integration
60 | * Use the following github action to report vulnerabilities under the action tab.
61 | You can copy this [workflow](https://github.com/openshift-pipelines/pipeline-service/blob/main/.github/workflows/periodic-scanner-quay.yaml) and this [script](https://github.com/openshift-pipelines/pipeline-service/blob/main/ci/images/vulnerability-scan/scan.sh) onto your repo and set the variables
62 | 
63 | ***
64 | 
65 | #### Remediation
66 | 
67 | While our automation process will ensure that component teams are keeping their images updated, security scanners are not perfect.  Vulnerabilities can be reported through other channels, in which case, component teams must assess the severity of these findings and remediate according to the Infosec Remediation Guidelines
68 | 
69 | ***
70 | 
71 | #### End of Life Base Images (EOL)
72 | 
73 | Component teams should be aware of the lifecycle policy for their base images by referring to the RedHat [Product Lifecycle page](https://access.redhat.com/product-life-cycles/update_policies).   Any base image version that is within 3 months of retiring must be updated to the latest patched major release. This should be supported by the [deprecated-base-image](https://github.com/redhat-appstudio/build-definitions/blob/main/task/deprecated-image-check/0.2/deprecated-image-check.yaml#L11-L12) check in the PAC pipeline.
74 | 
75 | ***
76 | #### Exception Process
77 | 
78 | The Red Hat Product Security Exception Process must be followed in the event that images cannot be patched or updated within the remediation timelines.  Some example scenarios:
79 | 
80 | * A fix that poses a risk to our service is not being provided by the vendor within the remediation timeline
81 | * A deployed container image containing an EOL base image
82 | * An image that cannot be scanned due to an unsupported manifest
83 | 
84 | 
85 | ## Consequences
86 | 
87 | In summary, component teams should have the following in place in order to meet ESS requirements:
88 | 
89 | * Onboard and integrate with PaC
90 | * Ensure their container images are built regularly relying on both PR triggers and scheduled builds
91 | * Ensure the base image layer is also updated with every image rebuild
92 | * Set up vulnerability alerts
93 | * Understand and follow the remediation timelines
94 | * Understand and follow the exception process
95 | * Images deployed from dependencies must not have vulnerabilities
96 | 
97 | See also [RHTAP-828](https://issues.redhat.com/browse/RHTAP-828).
98 | 
99 | 


--------------------------------------------------------------------------------
/ADR/0028-handling-snapshotenvironmentbinding-errors.md:
--------------------------------------------------------------------------------
 1 | # 28. Handling SnapshotEnvironmentBinding Errors
 2 | Date: 2023-08-31
 3 | 
 4 | ## Status
 5 | Superceded by [ADR 32. Decoupling Deployment](0032-decoupling-deployment.html)
 6 | 
 7 | ## Context
 8 | It is currently not possible to determine whether a SnapshotEnvironmentBinding (SEB) is stuck in an unrecoverable state.  This is a major problem when deciding if an ephemeral SEB needs to be cleaned up by the integration service's SnapshotEnvironmentBinding controller.  An inability to clean up errored SEBs can overload the cluster.
 9 | 
10 | ## Decision
11 | The integration service has a reconciler function that cleans up errored SnapshotEnvironmentBindings with an ErrorOccured condition.  This condition is set to 'true' by default and is set to 'false' when the environment becomes available.  The integration service will consider that all SEBs with a 'true' ErrorOccured condition and a LastUpdateTime of more than five minutes ago are unrecoverable and can be cleaned up.
12 | 
13 | ## Consequence
14 | - SnapshotEnvironmentBindings will be cleaned up after five minutes if they are not ready.  Environments that take a long time to provision may be cleaned up erroneously.  To accomodate this, the timeout threshold can be adjusted via a pull request to the integration service.
15 | 
16 | ## Footnotes
17 | - This change has been implemented as a stopgap measure to avoid permanently stuck integration tests while a generic solution for detecting provisioning errors is designed.  That work will be tracked in [RHTAP-1530](https://issues.redhat.com/browse/RHTAP-1530).
18 | 


--------------------------------------------------------------------------------
/ADR/0031-sprayproxy.md:
--------------------------------------------------------------------------------
 1 | # 31. Sprayproxy
 2 | 
 3 | Created: 2023-10-24
 4 | 
 5 | ## Status
 6 | 
 7 | Accepted
 8 | 
 9 | ## Context
10 | 
11 | Konflux has multiple member (backend) clusters. Each member cluster is running a Pipelines-As-Code (PaC) service, accepting webhook requests. A GitHub App can only specify a single destination for webhook requests. We need to forward those requests to multiple clusters.
12 | 
13 | ## Decision
14 | 
15 | Deploy a service (`Sprayproxy`) on the Konflux host (frontend) clusters. The service route is configured in the GitHub App as a `Webhook URL`, so all webhook requests are directed to it. The service has a list of backends configured. The service does not distinguish between the type of requests the way PaC does (pull-request/push/comment etc), it treats them all equally. For each incoming request, a new outgoing request is constructed with the original payload and destination of each of the member clusters.
16 | 
17 | The service performs the following checks on incoming requests:
18 | 
19 | - Validating the request is from GitHub using a shared secret. The process is officially documented [here](https://docs.github.com/en/webhooks/using-webhooks/validating-webhook-deliveries).
20 | - [Payloads are capped at 25 MB](https://docs.github.com/en/webhooks/webhook-events-and-payloads#payload-cap), but that's configurable.
21 | 
22 | The QE team requires the ability to dynamically add/remove backends. That functionality is disabled by default as it represents a security risk (bad actor could access private content if they get the forwarded content or cause denial of service). To enable that functionality, the service should be started with a special flag. The actual registration/unregistration happens through additionally exposed backends endpoint using the respective GET/POST/DELETE HTTP requests. The authentication happens through kube-rbac-proxy.
23 | 
24 | The service exports metrics visible only on the dashboards on the host clusters where the service is deployed. The authentication happens through kube-rbac-proxy.
25 | 
26 | ## Consequences
27 | 
28 | - Each Konflux customer is onboarded to one cluster which means a pipeline on that particular cluster will be triggered as a result of the request. By "blindly" forwarding the request to all clusters, requests are also sent to clusters where they won't have effect and are discarded.
29 | - Sprayproxy performs the same type of request verification as does PaC. The reason for doing that is we do not forward invalid requests to multiple clusters, but it also means extra workload. Operationally both services use the same shared secret, so when rotating the secret it only has to be done in a single place.
30 | 


--------------------------------------------------------------------------------
/ADR/0033-enable-native-opentelemetry-tracing.md:
--------------------------------------------------------------------------------
 1 | # 33. Enable Native OpenTelemetry Tracing
 2 | 
 3 | Date started: 2024-02-27
 4 | 
 5 | Date accepted: 2024-04-02
 6 | 
 7 | ## Status
 8 | 
 9 | Accepted
10 | 
11 | ## Context
12 | 
13 | Konflux is a tool under active development and, therefore, unforeseen issues may arise. A recent (at the time of writing this ADR) example is the [long running](https://github.com/redhat-appstudio/build-definitions/pull/856/checks?check_run_id=22307468968) [e2e-test](https://github.com/redhat-appstudio/build-definitions/blob/main/.tekton/tasks/e2e-test.yaml) in Konflux’s build definitions. Fixing and debugging such issues is not a trivial thing for Konflux’s developers. Additional data, metrics, telemetry and tracing are essential in enabling Konflux developers and SREs to come up with fixes.
14 | 
15 | Tracing, in particular, enables a straightforward model for dealing with complex, distributed systems. It gives unique insight into a system’s execution, grouping functions together. These grouping functions can be critical for finding fields that correlate to some problem, and provide powerful insights to reduce the range of possible causes.
16 | 
17 | OpenTelemetry is the industry standard to instrument, generate, collect, and export telemetry data (metrics, logs, and traces) to help you analyze software performance and behavior. Our goal is to collect [traces](https://opentelemetry.io/docs/concepts/signals/traces/) from the Konflux activity of building applications.
18 | 
19 | Instrumenting Konflux with [OpenTelemetry (OTel)](https://opentelemetry.io/docs/) tracing will provide SREs and developers invaluable insight for incident response, debugging and monitoring. Our goal is to get traces for the Konflux activity in order to achieve an easier mental model to use for debugging.
20 | 
21 | ## Decision
22 | 
23 | We are going to enable as much native tracing in Konflux as we can by quickly enabling any pre-existing tracing capabilities in the system. Any other type of tracing (e.g. zero code instrumentation or code based instrumentation) is out of scope for this ADR.
24 | 
25 | Native tracing will be enabled for the core Tekton controller as no upstream changes are required in order to do so, as Tekton  already natively supports [OpenTelemetry Distributed Tracing for Tasks and Pipelines](https://github.com/tektoncd/community/blob/main/teps/0124-distributed-tracing-for-tasks-and-pipelines.md). We just need to work to enable this native tracing (e.g. set environment variables).
26 | 
27 | There are a few ways to enable native tracing in Konflux. Openshift and Tekton natively have Jaeger tracing which can be collected by a compatible application, such as an actual Jaeger instance, an OpenTelemetry collector or even something like Grafana Tempo.
28 | 
29 | We recommend using an OpenTelemetry Collector as the way to collect Konflux native tracing as it has the least installation and setup overhead while also providing flexibility to forward traces to any tracing frontend.
30 | 
31 | Other Tekton pieces that Konflux leverages such as [pipeline as code](https://pipelinesascode.com/), [chains](https://tekton.dev/docs/chains/) and [results](https://tekton.dev/docs/results/) will have to be instrumented separately and will require upstream changes, so they are out of scope for this ADR.
32 | 
33 | Also, other Konflux services such as the [build service](https://github.com/redhat-appstudio/architecture/blob/main/architecture/build-service.md), [application service](https://github.com/redhat-appstudio/architecture/blob/main/architecture/hybrid-application-service.md) and [integration service](https://github.com/redhat-appstudio/architecture/blob/main/architecture/integration-service.md) will also require either automatic instrumentation or code based instrumentation and therefore are also out of scope for this ADR.
34 | 
35 | Any other type of instrumentation that isn't native will be addressed in a future ADR.
36 | 
37 | ## Consequences
38 | 
39 | Additional applications will have to be installed in the same OpenShift cluster that the Konflux instance runs or OpenTelemetry collector(s) will have to be available in order to collect traces. Also, some configuration changes are required on the tekton-pipelines namespace in the OpenShift cluster that the Konflux instance runs.
40 | 
41 | Enabling Konflux native tracing is not without risks:
42 | - There is a span or trace flooding risk from within the OpenShift cluster
43 | - There is a secret leakage risk (although this is not an exclusive risk for traces, logs are also liable to this)
44 | 
45 | However, we assess that the benefits far outweigh the risks and therefore, by instrumenting Konflux with [OpenTelemetry (OTel)](https://opentelemetry.io/docs/) tracing, we will provide Konflux SREs and developers invaluable insight for incident response, debugging and monitoring, ultimately achieving an improved mental model for debugging and incident response.
46 | 


--------------------------------------------------------------------------------
/ADR/0035-apps-continuous-chaos-testing.md:
--------------------------------------------------------------------------------
 1 | # 35. Continuous Chaos Testing of Apps in AppStudio
 2 | 
 3 | Date: 2024-06-04
 4 | 
 5 | ## Status
 6 | 
 7 | Accepted
 8 | 
 9 | ## Context
10 | 
11 | The chaos engineering strategy enables users to discover potential causes of service degradation. It helps users understand their app behavior under unpredictable conditions, identify areas to harden, and utilize performance data points to size and tune their application to handle failures, thereby minimizing downtime.
12 | 
13 | There are two approaches to chaos testing in the CI/CD pipeline.
14 | 
15 | ### Resilience based Chaos scenario
16 | 
17 | These Chaos scenarios are expected to cause application failure. Example scenarios include simulating memory pressure, storage errors, killing random or dependent resources. The objective of these chaos test cases in the CI/CD pipeline is to assess whether the application is capable of mitigating and maintaining reliability.
18 | 
19 | ![Architecture diagram of Resilience based Chaos test scenario](../diagrams/ADR-0035/chaos-resilience.png "Architecture diagram of Resilience based Chaos test scenario")
20 | 
21 | ### SLA based Chaos scenario
22 | 
23 | Test the resiliency of a application under turbulent conditions by running tests that are designed to disrupt while monitoring the application adaptability and performance:
24 | Establish and define your steady state and metrics - understand the behavior and performance under stable conditions and define the metrics that will be used to evaluate the application’s behavior. Then decide on acceptable outcomes before injecting chaos.
25 | Analyze the statuses and metrics of all components during the chaos test runs.
26 | Improve the areas that are not resilient and performant by comparing the key metrics and Service Level Objectives (SLOs) to the stable conditions before the chaos. For example: evaluating the API server latency or application uptime to see if the key performance indicators and service level indicators are still within acceptable limits.
27 | 
28 | ![Architecture diagram of SLA based Chaos test scenario](../diagrams/ADR-0035/chaos-sla.png "Architecture diagram of SLA based Chaos test scenario")
29 | 
30 | 
31 | ### Glossary
32 | 
33 | - krkn: Chaos testing framework: <https://github.com/krkn-chaos/krkn>
34 | 
35 | ## Decision
36 | 
37 | * Users can leverage Krkn, a chaos testing framework, to execute chaos tests within the IntegrationTestScenarios.
38 | 
39 | * Ephemeral clusters ([provisioning-ephemeral-openshift-clusters](https://github.com/konflux-ci/architecture/pull/172)) will be used for executing the tests, to provide a more isolated and production-like testing environment compared to ephemeral namespaces.
40 | 
41 | * Users can gather Prometheus metrics for analysis during chaos testing.
42 | 
43 | * Optionally, users can gather metrics exposed by the application, if [monitoring for user-defined projects](https://docs.openshift.com/container-platform/4.15/observability/monitoring/enabling-monitoring-for-user-defined-projects.html#accessing-metrics-from-outside-cluster_enabling-monitoring-for-user-defined-projects) feature is enabled in the cluster.
44 | 
45 | ## Consequences
46 | 
47 | * The user/service account will require elevated privileges (cluster-admin) within the ephemeral environment to execute CRUD operations (configure RBAC,  Prometheus instances) 
48 | 
49 | * The Tekton Tasks within the chaos pipeline will need to ascertain the different permission levels  (cluster-admin vs. namespace admin vs. monitoring access) and set up the environment depending on the Chaos Test scenario, before starting the Tekton Tasks related to the Chaos testcase.


--------------------------------------------------------------------------------
/ADR/0036-trusted-artifacts.md:
--------------------------------------------------------------------------------
 1 | # 36. Trusted Artifacts
 2 | 
 3 | Date: 2024-06-11
 4 | 
 5 | ## Status
 6 | 
 7 | Accepted
 8 | 
 9 | ## Context
10 | 
11 | One of the properties of Konflux is that users should be allowed to include their own Tekton Tasks
12 | in a build Pipeline, e.g. to execute unit tests, without jeopardizing the integrity of the build
13 | process. This is distinct from other build systems where a rigid process prevents users from
14 | applying such customizations. To support this, Konflux build Pipelines use Trusted Artifacts to
15 | securely share files between Tasks. Enterprise Contract is then responsible for verifying that
16 | Trusted Artifacts were properly used in the parts of the build Pipeline that affect the build
17 | outcome, typically the `git-clone`, `prefetch-dependencies`, and `buildah` Tasks.
18 | 
19 | Trusted Artifacts is inspired by the upcoming work being done by the Tekton Community,
20 | [TEP-0139](https://github.com/tektoncd/community/blob/main/teps/0139-trusted-artifacts.md). The
21 | Konflux version is meant to be a stop-gap until that feature is implemented and ready to be used.
22 | When the time comes, the Konflux implementation should align with what is provided by the Tekton
23 | Community, requiring a revision of this ADR and likely a new ADR.
24 | 
25 | In brief, the processes of *creating* a Trusted Artifact wraps files into an archive. Then, the
26 | location of the archive and its checksum digests are recorded as a Task result. The process of
27 | *consuming* a Trusted Artifact extracts such an archive, while verifying its checksum digest, into a
28 | volume only accessible to the Task, e.g. `emptyDir`. The name and the checksum digest of the archive
29 | is provided via Task parameters. This ensures the artifacts produced by one Task are not tampered
30 | with when they are consumed by other Tasks.
31 | 
32 | Furthermore, Konflux takes the approach of sharing such artifacts between Tasks via an OCI registry,
33 | e.g. quay.io, instead of using Tekton Workspaces backed by Persistent Volume Claims. This has
34 | several advantages that were previously discussed
35 | [here](https://github.com/konflux-ci/build-definitions/pull/913#issue-2215784386).
36 | 
37 | ## Decision
38 | 
39 | Sharing files between Tasks is done via Trusted Artifacts backed by OCI storage.
40 | 
41 | ## Consequences
42 | 
43 | * To facilitate the transition, a set of new Tasks have been added to support Trusted Artifacts.
44 |   These are variants of existing Tasks. They follow the naming convention of using the suffix
45 |   `-oci-ta`, e.g. `git-clone-oci-ta`.
46 | * New Tasks that implement new functionality, e.g. new code scanner, and share files with other
47 |   Tasks do not not need to follow the naming convention.
48 | * Any Task that *uses* Trusted Artifacts must do so via parameters named with the suffix
49 |   `_ARTIFACT`, e.g. `SOURCE_ARTIFACT`.
50 | * Any Task that *creates* Trusted Artifacts must do so via results named with the suffix
51 |   `_ARTIFACT`, e.g. `SOURCE_ARTIFACT`.
52 | * Any Task that uses or creates Trusted Artifacts must NOT accept a general-purpose workspace. Files
53 |   must always be shared as a Trusted Artifact. Workspaces can, of course, still be used for other
54 |   purposes, such as mounting Secrets.
55 | 


--------------------------------------------------------------------------------
/ADR/0037-integration-service-promotes-to-GCL-immediately.md:
--------------------------------------------------------------------------------
 1 | # 37. Integration service promotes components to GCL immediately after builds complete
 2 | 
 3 | * Date: 2024-06-21
 4 | 
 5 | ## Status
 6 | 
 7 | Accepted
 8 | 
 9 | Supersedes:
10 | 
11 | - [ADR 15. The Two-phase Architecture of the Integration Service](0015-integration-service-two-phase-architecture.html)
12 | - [ADR 16. Integration Service Promotion Logic](0016-integration-service-promotion-logic.html)
13 | 
14 | Superseded by:
15 | 
16 | - [ADR 38. Integration service removes composite snapshots and logic around them](0038-integration-service-composite-removal.html)
17 | 
18 | ## Context
19 | 
20 | In the initial implementation of the Integration Service, when a single component image is built, the
21 | Integration Service tests the application by creating a Snapshot.
22 | All Components with their images from the Global Candidate List are included within the Snapshot and then the Component
23 | that was newly built is updated/overwritten to complete the Snapshot creation.
24 | The Global Candidate List for the newly built component would only be updated once all required integration tests
25 | for the created Snapshot have passed successfully.
26 | See more about this in [ADR 16. Integration Service Promotion Logic](0016-integration-service-promotion-logic.html) and
27 | [ADR 15. The Two-phase Architecture of the Integration Service](0015-integration-service-two-phase-architecture.html).
28 | 
29 | This logic created issues for the end users, especially in cases where older components would start failing
30 | Enterprise Contract checks as new rules/policies get introduced. This led to Global Candidate List deadlocks where
31 | it was impossible for a user to promote a new build of their component if more than one of their other components were
32 | failing integration tests.
33 | 
34 | ## Decision
35 | 
36 | Instead of holding off on promoting individual component builds to the Global Candidate List until they pass
37 | all required integration tests, the Integration service will promote the Snapshots that were created
38 | for those builds to the GCL immediately after they are created.
39 | 
40 | Note: Integration service still does not promote the Snapshots originating from PRs, only those originating from
41 | push (merge-to-main) events gets promoted to the Global Candidate List.
42 | 
43 | Note: Integration-service will still create the Releases for each ReleasePlan that has an auto-release label only for
44 | Snapshots that have passed all the required integration tests.
45 | 
46 | ## Consequences
47 | 
48 | * The users can have an assumption that their Global Candidate List is (in most cases) in sync
49 |   with the head of the branch for each of their components
50 | * The users can unblock most(if not all) deadlock-type issues by simply submitting new builds of
51 |   components that are causing issues
52 | * Related builds from monorepos or PR groups would not be blocked from being promoted after merging
53 | * Since the race-condition from the two-phase architecture has been eliminated on account of the Global Candidate List
54 |   being updated immediately, Integration service will stop creating composite Snapshots
55 | 
56 | ## Footnotes
57 | 
58 | The new promotion logic will be implemented as part of the STONEINTG-83 epic.
59 | This document is created for posterity and visibility.
60 | 
61 | [Global Candidate List]: ../architecture/integration-service.html
62 | 


--------------------------------------------------------------------------------
/ADR/0038-integration-service-composite-removal.md:
--------------------------------------------------------------------------------
 1 | # 38. Integration service removes composite snapshots and logic around them
 2 | 
 3 | * Date started: 2024-07-10
 4 | 
 5 | ## Status
 6 | 
 7 | Accepted
 8 | 
 9 | ## Context
10 | 
11 | Composite snapshots main goal was to prevent race condition when teams merged multiple PRs to multiple components
12 | of the same application at nearly the same time. This concept was confusing for users and we managed to simplify it
13 | by immediate promotion to GCL using override snapshots. Users also ran into the issue with GCL deadlock. In short, 
14 | components already in the GCL can cause the tests for a new component to fail if they are bundled into a snapshot. 
15 | If two or more components in an application run into this issue it can create a deadlock that prevents the release 
16 | of new versions of the application.
17 | 
18 | 
19 | ## Decision
20 | 
21 | Introduction of override snapshots should prevent this race condition and GCL deadlock with much simpler concept to understand,
22 | replacing the composite snapshots which served the same purpose.
23 | This decision led to removing of all logic regarding composite snapshots within integration-service codebase, since override snapshot
24 | solves same problems.
25 | Override snapshot is created manually by users, its special kind of Snapshot which, if it passes the integration tests,
26 | updates the GCL for all the components contained within it.
27 | 
28 | ## Consequences
29 | 
30 | * Removal of code connected to composite snapshots
31 | 
32 | ## Footnotes
33 | 
34 | The new promotion logic will be implemented as part of the STONEINTG-83 epic.
35 | This document is created for posterity and visibility.
36 | 
37 | 


--------------------------------------------------------------------------------
/ADR/0041-send-cloud-events.md:
--------------------------------------------------------------------------------
 1 | # 40. Konflux should send cloud events all system events.
 2 | 
 3 | Date: 2024-09-24
 4 | 
 5 | ## Status
 6 | 
 7 | Proposed
 8 | 
 9 | ## Context
10 | 
11 | Konflux had made the architectural decision to not use cloud events. However, that does not mean that
12 | Konflux should not emit cloud events.
13 | 
14 | Emitting cloud events would allow Konflux users to easily track what is happening in the system. In addition,
15 | they can use these cloud events to create their own application-specific infrastructure to support their build
16 | and release process.
17 | 
18 | To support this, all Konflux components should be required to emit cloud events for signicant events. These
19 | should be documented fully and made available for users.
20 | 
21 | Cloud event generation could be optional and that option could default to off. But users should be able to
22 | turn it on so that Konflux will generate cloud events that they can then act on.
23 | 
24 | Note again that this ADR does not propose that Konflux generate cloud events for consumption by Konflux
25 | itself. Rather it proposes Konflux generate cloud events to support addtional application-specific build and
26 | release functionality outside of Konflux.
27 | 
28 | ### Use Cases
29 | 
30 | * Teams want to kick off external QE/CI testing based on a some criteria using cloud events. These tests
31 |   potentially run for days, making them unsuitable for embedding directly in the pipeline.
32 | * Teams want to generate their own metrics for interal or external usage using cloud events.
33 | * Teams want to integrate with other tools that use eventing.
34 | * Teams want to move Jira states based on the generation of some artifacts.
35 | * Teams want to publish to all Satellite capsules in the network when release contents become available.
36 | * Teams want to be able to add infrastructure around their build and release processes without having to
37 |   modify existing stable pipelines.
38 | * Teams want to collect data for audit or send email alerts when certain artifacts are built or released.
39 | * Teams want to be able to control their costs by moving non-build and non-release processes out of the cluster.
40 | 
41 | ## Decision
42 | 
43 | All Konflux components shall generate cloud events for significant events.
44 | 
45 | ## Consequences
46 | 
47 | Application teams can more easily create application-specific build and release infrastructure in Konflux.
48 | 


--------------------------------------------------------------------------------
/ADR/0046-common-task-runner-image.md:
--------------------------------------------------------------------------------
  1 | # 46. Build a common Task Runner image
  2 | 
  3 | Date: 2024-11-15
  4 | 
  5 | ## Status
  6 | 
  7 | Accepted
  8 | 
  9 | ## Context
 10 | 
 11 | Tekton Tasks often depend on specific CLI tools.
 12 | The tools come from a restricted set of container images
 13 | (see `allowed_step_image_registry_prefixes` in the [policy data][rule-data]).
 14 | 
 15 | ### The current mess of Task containers
 16 | 
 17 | If no image containing a required CLI tool already exists, the current approach
 18 | is to build a new image specifically for that one tool. Or to add the tool to
 19 | one or more of the existing images, if Task authors find that more convenient.
 20 | Examples:
 21 | 
 22 | * [yq-container]
 23 | * [oras-container]
 24 |   (also includes `yq` and a copy of one script from [build-trusted-artifacts])
 25 | * [git-clone container][git-clone]
 26 |   (for the `git-init` tool, also includes `find`)
 27 | * [buildah-container]
 28 |   (also includes a wild variety of tools such as
 29 |   `dockerfile-json`, `rsync`, `kubectl`, `jq`, `subscription-manager` and others)
 30 | 
 31 | Then, we have some use-case-oriented containers which are somewhat intertwined
 32 | with the tool-oriented containers (or at least share software, sometimes installed
 33 | using different approaches).
 34 | 
 35 | * [build-trusted-artifacts]
 36 |   (a set of Trusted Artifacts scripts, also includes `oras` and `jq`)
 37 | * [source-container-build]
 38 |   (script for building source containers, also includes `skopeo` and `jq`)
 39 | 
 40 | And last, some Tasks use the [appstudio-utils] image, which contains a variety
 41 | of tools installed straight from GitHub releases. Many of which are also available
 42 | in the tool-oriented containers (and installed via more legitimate means).
 43 | 
 44 | The current situation increases confusion, maintenance burden (both for the container
 45 | maintainers and for [build-definitions] maintainers) and, in case of `appstudio-utils`,
 46 | breaks good secure supply chain practices.
 47 | 
 48 | ### Splitting Tasks into steps
 49 | 
 50 | The set of CLI tools you need may already be containerized, but in two or more separate
 51 | containers. In that case, rather than adding the tools you need to one of the containers,
 52 | the better solution could be to take advantage of Tekton Tasks' `steps` feature (each
 53 | step can use a different container image).
 54 | 
 55 | *Could* be, but isn't. In practice, what this achieves is:
 56 | 
 57 | * Increased complexity of the Task code, since it typically requires splitting the
 58 |   code in unnatural ways and sharing some data between Task steps. Inexperienced
 59 |   Tekton users may not even think of this approach or know how to achieve it.
 60 | * Increased compute resource requirements for the Task. The total resource requirements
 61 |   for a Task are not the *maximum* of its steps' resource requirements, they are
 62 |   the *sum* (see [Compute Resources in Tekton][compute-resources-in-tekton]).
 63 | * Reduced size limit of the results that the task can return (unless the Tekton
 64 |   installation enables [Results from sidecar logs][results-from-sidecar-logs]).
 65 | 
 66 | ### Konflux users and custom Tasks
 67 | 
 68 | The Enterprise Contract team has developed the Trusted Artifacts concept to enable
 69 | Konflux users to add custom Tasks to the pipelines without compromising the
 70 | trustworthiness of the build.
 71 | 
 72 | But Konflux users face the same difficulties described above (made worse by the
 73 | fact that they don't tend to have much Tekton experience). The initial hurdle of
 74 | finding/building the right container image for what they want to do may be too high.
 75 | 
 76 | ## Decision
 77 | 
 78 | Build and maintain a common "Task Runner" image.
 79 | 
 80 | The image must:
 81 | 
 82 | * Include all the tools commonly needed by Konflux build tasks.
 83 | * Build and release via Konflux, hermetically if possible.
 84 | * Document the list of installed tools and their versions, similar to how GitHub
 85 |   documents the [software installed in their runner images][github-runner-software].
 86 |   * The list of tools is a public interface, both Konflux devs and Konflux users
 87 |     can depend on it.
 88 | * Use proper semver versioning. The deletion of a tool, or a change in the major
 89 |   version of a tool, is a breaking change and must result in a major version change
 90 |   for the Task Runner image.
 91 | 
 92 | Gradually deprecate all the current tool-oriented container images and replace
 93 | their usage with the common Task Runner image.
 94 | 
 95 | The Task Runner image does not replace the more specialized use-case-oriented images,
 96 | but they can use it as a base image if desirable.
 97 | 
 98 | To include a tool in the Task Runner image, it should meet these requirements:
 99 | 
100 | * Be an actual standalone tool (e.g. not a haphazard collection of Bash/Python scripts)
101 | * Follow a versioning scheme (ideally semver)
102 |   * Have release notes or a changelog
103 | * And naturally, convince the Task Runner maintainers of its suitability for inclusion
104 | 
105 | ## Consequences
106 | 
107 | The maintenance of container images needed for Tasks becomes more consolidated.
108 | The total number of rebuilds needed due to CVEs stays the same but is not scattered
109 | across tool-container repos anymore.
110 | 
111 | Tasks get easier to write because all the tools you need are available in the same
112 | image. For both Konflux devs and Konflux users.
113 | 
114 | Tasks have lower resource requirements because there's less of a need to split
115 | them into steps.
116 | 
117 | The Task Runner image is larger than any of the individual images used by the Tasks
118 | at present. But it's much smaller than all the individual images combined. And
119 | because Tasks don't pull the image if it's already cached on the compute node,
120 | this is a win (there's a smaller set of images to cache, less pulling to do).
121 | 
122 | By reducing the reliance on a Tekton-specific feature (steps), most Tasks become
123 | nothing more than a bash script wrapped in some YAML. It enables a saner approach
124 | to authoring Tasks. Write a bash script that works on your machine, wrap it in
125 | a bunch of YAML, verify that it works, ship it. Exceptions can still exist where
126 | necessary/justified. For example, the Trusted Artifacts variants of Tasks would
127 | still use separate steps to create/use the artifacts.
128 | 
129 | <!-- links table -->
130 | [rule-data]: https://github.com/release-engineering/rhtap-ec-policy/blob/main/data/rule_data.yml
131 | [git-clone]: https://github.com/konflux-ci/git-clone/tree/main/Dockerfile
132 | [yq-container]: https://github.com/konflux-ci/yq-container/tree/main/Containerfile
133 | [oras-container]: https://github.com/konflux-ci/oras-container/tree/main/Containerfile
134 | [buildah-container]: https://github.com/konflux-ci/buildah-container/tree/main/Containerfile.task
135 | [build-trusted-artifacts]: https://github.com/konflux-ci/build-trusted-artifacts/tree/main/Containerfile
136 | [source-container-build]: https://github.com/konflux-ci/build-tasks-dockerfiles/blob/main/source-container-build/Dockerfile
137 | [appstudio-utils]: https://github.com/konflux-ci/build-definitions/blob/main/appstudio-utils/Dockerfile
138 | [build-definitions]: https://github.com/konflux-ci/build-definitions
139 | [results-from-sidecar-logs]: https://tekton.dev/docs/pipelines/tasks/#larger-results-using-sidecar-logs
140 | [compute-resources-in-tekton]: https://tekton.dev/docs/pipelines/compute-resources/
141 | [github-runner-software]: https://github.com/actions/runner-images/blob/main/images/ubuntu/Ubuntu2404-Readme.md
142 | 


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | # This is a comment.
 2 | # Each line is a file pattern followed by one or more owners.
 3 | 
 4 | # These owners will be the default owners for everything in
 5 | # the repo. Unless a later match takes precedence,
 6 | # @konflux-ci/book-publishers will be requested for
 7 | # review when someone opens a pull request.
 8 | *       @konflux-ci/book-publishers
 9 | 
10 | /architecture/build-service.md              @konflux-ci/build-maintainers
11 | /diagrams/build-service			    @konflux-ci/build-maintainers
12 | /architecture/image-controller.md           @konflux-ci/build-maintainers
13 | 
14 | /architecture/enterprise-contract.md        @konflux-ci/ec
15 | 
16 | /architecture/hybrid-application-console.md @konflux-ci/konflux-ui
17 | 
18 | /architecture/multi-platform-controller.md  @konflux-ci/infrastructure
19 | 
20 | /architecture/internal-services.md          @konflux-ci/release-service-maintainers
21 | /architecture/release-service.md            @konflux-ci/release-service-maintainers
22 | /diagrams/internal-services                 @konflux-ci/release-service-maintainers
23 | /diagrams/release-service                   @konflux-ci/release-service-maintainers
24 | 
25 | /architecture/integration-service.md        @konflux-ci/integration-service-maintainers
26 | /diagrams/integration-service  		    @konflux-ci/integration-service-maintainers
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Architecture of Konflux
 2 | 
 3 | This repository contains the technical and architecture documents for Konflux.
 4 | User documentation is out of scope.
 5 | 
 6 | ## Guide to Sections
 7 | 
 8 | ### The Technical Overview Document
 9 | 
10 | [/architecture](./architecture/index.md) folder hosts the technical overview document. This document represents the latest state of agreed technical and architectural decisions. See [contributing](#contributing) on how to propose changes.
11 | 
12 | [/ref](./ref/index.md) folder hosts the API references for all the related services. These API references are generated during publish flow.
13 | 
14 | ### Architecture Diagrams
15 | 
16 | [/diagrams](./diagrams/) folder stores the diagrams used on the overview document. These diagrams are done using [draw.io](https://draw.io) and stored in _.svg_ format.
17 | 
18 | ### Architecture Decision Records - ADRs
19 | [/ADR](./ADR/) folder contains the ADRs that are executed as part of the process to update these documents as explained in [contributing](#contributing) section.
20 | 
21 | ## Contributing
22 | 
23 | All changes to the documents and diagrams require a peer-reviewed pull request.
24 | 
25 | For significant changes that include changes to technical details or architecture the pull request should have
26 | 1. Changes to the overview document and diagrams where applicable.
27 | 2. An ADR record is added to the `/ADR` folder.
28 | 3. At least 2 approvals to be merged
29 | 
30 | The changes that are corrections and clarifications and that do not reflect a significant change pull request should have
31 | 1. Changes to the overview document and diagrams where applicable.
32 | 2. Should have a `skip-adr-check` label
33 | 3. At least 1 approval
34 | 
35 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
 1 | remote_theme: vaibhavvikas/jekyll-theme-minimalistic
 2 | title: Architecture Of Konflux
 3 | description: Technical documents about the Managed Developer Platform
 4 | color-scheme: auto
 5 | navigation:
 6 |   - name: Technical Overview Document
 7 |     link: /architecture/architecture/index.html
 8 |   - name: Architecture Decision Records
 9 |     link: /architecture/ADR/index.html
10 |   - name: API References
11 |     link: /architecture/ref/index.html
12 |     sublist:
13 |     - name: Application and Environment API
14 |       link: /architecture/ref/application-environment-api.html
15 |     - name: Image Controller
16 |       link: /architecture/ref/image-controller.html
17 |     - name: Integration Service
18 |       link: /architecture/ref/integration-service.html
19 |     - name: Enterprise Contract
20 |       link: /architecture/ref/enterprise-contract.html
21 |     - name: Release Service
22 |       link: /architecture/ref/release-service.html
23 |     - name: Internal Services
24 |       link: /architecture/ref/internal-services.html
25 | 


--------------------------------------------------------------------------------
/architecture/build-service.md:
--------------------------------------------------------------------------------
  1 | # Build Service
  2 | 
  3 | 
  4 | ## Overview
  5 | 
  6 | The Build Service is composed of controllers that create and configure build pipelines. The main input for the Build Service is a Component CR managed by the Konflux UI or created manually via `kubectl`.
  7 | 
  8 | ![](../diagrams/build-service/build-service-diagram.svg)
  9 | 
 10 | ### Dependencies
 11 | 
 12 | The Build Service is dependent on the following services:
 13 | - [Pipeline Service](./pipeline-service.md)
 14 |   - Pipeline execution, Pipeline logging
 15 | - [Image Controller](./image-controller.md)
 16 |   - Generation of a container image repository and robot account for Component CR which is used by PipelineRun
 17 | 
 18 | ## Controllers
 19 | 
 20 | The Build Service contains these controllers:
 21 | - Component Build Controller
 22 |   - Monitors Component CRs and creates PipelineRun definitions which will be used by [Pipelines As Code (PaC)](https://pipelinesascode.com) provided by Pipeline Service.
 23 | - PaC PipelineRun Pruner Controller
 24 |   - Deletes PipelineRuns for Components that were deleted.
 25 | - Component dependency update (nudging) controller
 26 |   - Monitors push PipelineRuns and based on set relationships runs renovate which updates
 27 |     SHA for image from PipelineRun in user's repository.
 28 | 
 29 | ### Component Build Controller
 30 | 
 31 | Component Build Controller is managed by Component CR changes (creation or update).
 32 | It's using Component CR annotations and configuration of the PipelineRuns.
 33 | 
 34 | #### Modes
 35 | The prerequisite is to have installed GitHub App which is used by the Build Service in the user's repository, or have gitlab/github secret created for usage via webhook
 36 | ([creating GitLab secrets](https://konflux.pages.redhat.com/docs/users/building/creating-secrets.html#gitlab-source-secret)).
 37 | 
 38 | Component Build Controller is working in multiple ways based on a request annotation `build.appstudio.openshift.io/request`:
 39 | - PaC provision, annotation value `configure-pac` (default when request annotation isn't set)
 40 |     - Sets up webhook if GitHub App isn't used.
 41 |     - Integrates with Pipelines As Code, creates PipelineRun definitions in the user code repository.
 42 | - PaC provision without MR creation, annotation value `configure-pac-no-mr`
 43 |     - Sets up webhook if GitHub App isn't used.
 44 |     - Integrates with Pipelines As Code, doesn't create PipelineRun definitions in the user code repository.
 45 | - PaC unprovision, annotation value `unconfigure-pac`
 46 |     - Removes webhook created during provisioning if GitHub App wasn't used.
 47 |     - Creates PR removing PipelineRuns definitions from the user code repository.
 48 | - Trigger PaC build, annotation value `trigger-pac-build`
 49 |     - Re-runs push pipeline runs (used for re-running failed push pipelines).
 50 | 
 51 | All those requests first wait for `.spec.containerImage` to be set, either manually or
 52 | by image-controller via
 53 | [ImageRepository CR](https://github.com/konflux-ci/architecture/blob/main/architecture/image-controller.md#to-create-an-image-repository-for-a-component-apply-this-yaml-code).
 54 | 
 55 | Controller will also create component specific service account `build-pipeline-$COMPONENT_NAME`
 56 | used for build pipelines.
 57 | 
 58 | PaC provision:
 59 | 1. Sets up webhook in the respository if GitHub App isn't used.
 60 | 1. Creates or reuses Repository CR (Component CR is set as the owner).
 61 | 1. Creates merge request in the user code repository with PipelineRun definitions.
 62 | 1. Sets `build.appstudio.openshift.io/status` annotation with either error, or state `enabled` and merge request link.
 63 | 1. Sets finalizer `pac.component.appstudio.openshift.io/finalizer`.
 64 | 1. Removes `build.appstudio.openshift.io/request` annotation.
 65 | 
 66 | PaC provision without MR creation:
 67 | 1. Sets up webhook in the repository if GitHub App isn't used.
 68 | 1. Creates or reuses Repository CR (Component CR is set as the owner).
 69 | 1. Doesn't create merge request in the user code repository with PipelineRun definitions, that is up to user.
 70 | 1. Sets `build.appstudio.openshift.io/status` annotation with either error, or state `enabled`.
 71 | 1. Sets finalizer `pac.component.appstudio.openshift.io/finalizer`.
 72 | 1. Removes `build.appstudio.openshift.io/request` annotation.
 73 | 
 74 | PaC unprovision:
 75 | 1. Removes finalizer `pac.component.appstudio.openshift.io/finalizer`.
 76 | 1. Removes webhook from repository if GitHub App isn't used and the repository isn't used in another component.
 77 | 1. Creates merge request in the user code repository removing PipelineRun definitions.
 78 | 1. Sets `build.appstudio.openshift.io/status` annotation with either error, or state `disabled` and merge request link.
 79 | 1. Removes `build.appstudio.openshift.io/request` annotation.
 80 | 
 81 | Trigger PaC build:
 82 | 1. Triggers push pipeline via PaC incoming webhook, requires pipeline run name to be the same as it was named during provisioning `$COMPONENT_NAME-on-push`.
 83 | 1. Sets `build.appstudio.openshift.io/status` annotation when error occures.
 84 | 1. Removes `build.appstudio.openshift.io/request` annotation.
 85 | 
 86 | #### PipelineRun selection
 87 | Available and default pipelines are in the config map present on the cluster in controller's namespace
 88 | [build pipelines config](https://github.com/redhat-appstudio/infra-deployments/blob/main/components/build-service/base/build-pipeline-config/build-pipeline-config.yaml).
 89 | 
 90 | Build pipeline is selected based on `build.appstudio.openshift.io/pipeline` annotation,
 91 | when annotation is missing, annotation with default pipeline (based on config map) will be added.
 92 | 
 93 | Annotation value is json in string eg. `'{"name":"docker-build","bundle":"latest"}`.
 94 | Name is the name of the pipeline, and the bundle is either `latest` which will use the tag from config map
 95 | or specific tag for the bundle (used mostly for testing).
 96 | 
 97 | When specified pipeline doesn't exist in config map, it will result with error.
 98 | 
 99 | #### PipelineRun parameters
100 | There are a few parameters that are set in PipelineRun created by the Build Service:
101 | - git-url - set to `'{{source_url}}'` (evaluated by PaC to git url)
102 | - revision - set to `'{{revision}}'` (evaluated by PaC to git commit SHA)
103 | - output-image - taken from Component CR's `spec.containerImage`,
104 |   for push pipeline appended tag `:{{revision}}`
105 |   and for pull pipeline appended tag `:on-pr-{{revision}}`
106 | - image-expires-after - set only for pull pipelines, value hadcoded in the code or from ENV variable `IMAGE_TAG_ON_PR_EXPIRATION`
107 | - dockerfile - path to Dockerfile, taken from Component CR's `spec.source.git.dockerfileUrl`,
108 |   default is `Dockerfile`
109 | - path-context - path to subdirectory with context, when used taken from Component CR's `spec.source.git.context`
110 | 
111 | Additionally in [build pipelines config](https://github.com/redhat-appstudio/infra-deployments/blob/main/components/build-service/base/build-pipeline-config/build-pipeline-config.yaml)
112 | pipelines may have specified `additional-params` which will be added with default values from pipeline itself.
113 | 
114 | ### PaC PipelineRun Pruner Controller
115 | The purpose of the PaC PipelineRun Pruner Controller is to remove the PipelineRun CRs created for Component CR which is being deleted.
116 | 
117 | It will remove all PipelineRuns based on `appstudio.openshift.io/component` label in PipelineRun.
118 | 
119 | ### Component dependency update controller (nudging)
120 | Monitors push PipelineRuns and based on defined relationships runs renovate,
121 | which updates SHA for the image produced by PipelineRun in user's repository.
122 | 
123 | Relationships can be set in a Component CR via `spec.build-nudges-ref` (list of components to be nudged)
124 | 
125 | 1. When PipelineRun is for a component which has set `spec.build-nudges-ref`, it will add finalizer to it
126 | `build.appstudio.openshift.io/build-nudge-finalizer`.
127 | 1. It will wait for PipelineRun to successfully finish.
128 | 1. When PipelineRun successfully finishes, it will run renovate on user's repositories
129 |    (for components specified in `spec.build-nudges-ref`),
130 |    updating files with SHA of the image which was built by PipelineRun.
131 | 1. Renovate will create merge request in user's repository if it finds matches.
132 | 1. Removes `build.appstudio.openshift.io/build-nudge-finalizer` finalizer from PipelineRun.
133 | 
134 | Default files which will be nudged are: `.*Dockerfile.*, .*.yaml, .*Containerfile.*`.
135 | 
136 | Users can modify list via:
137 | - `build.appstudio.openshift.io/build-nudge-files` annotation in push PipelineRun definition.
138 | - [custom nudging config map](https://konflux.pages.redhat.com/docs/users/building/component-nudges.html#customizing-nudging-prs) with `fileMatch` (takes precedence over annotation).
139 | 


--------------------------------------------------------------------------------
/architecture/enterprise-contract.md:
--------------------------------------------------------------------------------
  1 | 
  2 | Enterprise Contract
  3 | ===================
  4 | 
  5 | Overview
  6 | --------
  7 | 
  8 | The Enterprise Contract's purpose is to ensure container images produced by
  9 | Konflux meet certain clearly defined requirements before they are considered
 10 | releasable. Should a container image not meet the requirements the Enterprise
 11 | Contract will produce a list of the reasons why so they can be addressed as
 12 | required to produce a releasable build.
 13 | 
 14 | Enterprise Contract requirements fall broadly into two categories, "built-in
 15 | requirements"[^1] and "rule-based requirements".
 16 | 
 17 | ### Built-in requirements
 18 | 
 19 | The built-in requirements are as follows:
 20 | 
 21 | - The container image is signed with a known and trusted key
 22 | - The image has an attestation, also signed with a known and trusted key
 23 | 
 24 | ### Rule-based requirements
 25 | 
 26 | The rule-based requirements are based on the content of the pipeline run
 27 | attestation and are defined using [Rego](https://tekton.dev/docs/chains/), the
 28 | [Open Policy Agent](https://tekton.dev/docs/chains/) query language.
 29 | 
 30 | Some examples of rule-based requirements are:
 31 | 
 32 | - Tasks used in the pipeline run were defined in known and trusted task bundles
 33 | - A defined set of tests were run during the pipeline build with passing results
 34 | 
 35 | The technique of checking for a specific task result from a specific known and
 36 | trusted task definition is a useful way to create a robust policy rule. The
 37 | rego language is flexible and expressive so it's easy to create arbitrary
 38 | policy rules based on anything exposed in pipeline run attestation.
 39 | 
 40 | 
 41 | Components
 42 | ----------
 43 | 
 44 | ### EC CLI
 45 | 
 46 | The ec-cli is a command line utility written in Go. Its primary purpose is to
 47 | perform the EC policy validation, which it does as follows:
 48 | 
 49 | - For each image included in the release[^2]
 50 |     - Confirm the image is signed and verify the signature
 51 |     - Confirm the image has a signed and verifiable attestation
 52 |     - For each "policy source group"[^3] defined in the ECP CRD config:
 53 |         - Download all defined policy (rego) sources
 54 |         - Download all defined data sources
 55 |         - Run [Conftest](https://www.conftest.dev/) against the image's attestation using those policies and data
 56 | - Output results in JSON format showing details about failures, warnings or violations produced
 57 | 
 58 | The ec-cli also supports other related functions. For more information on
 59 | ec-cli refer to the
 60 | [documentation](https://enterprise-contract.github.io/ec-cli/main/reference.html)
 61 | and the [code](https://github.com/enterprise-contract/ec-cli).
 62 | 
 63 | ### EC Task Definition
 64 | 
 65 | The EC Task Definition defines how the ec-cli command should be run in a
 66 | Tekton task. It handles the task inputs and outputs and calls the ec-cli as
 67 | needed to perform the EC validation.
 68 | 
 69 | The task is defined
 70 | [here](https://github.com/enterprise-contract/ec-cli/blob/main/task/0.1/verify-enterprise-contract.yaml).
 71 | 
 72 | ### EC Policy CRD
 73 | 
 74 | The ECP CRD defines a Kubernetes CR which is used to hold the configuration
 75 | needed for running a specific instance of the Enterprise Contract. This
 76 | includes the public key required to verify signatures, the list of policy
 77 | and data sources, and any other required configuration.
 78 | 
 79 | You can view the source code for the ECP CRD
 80 | [here](https://github.com/enterprise-contract/enterprise-contract-controller) and
 81 | see its documentation [here](https://enterprise-contract.github.io/ecc/main/).
 82 | See also the related
 83 | [API Reference](https://redhat-appstudio.github.io/architecture/ref/enterprise-contract.html)
 84 | 
 85 | ### EC Policies
 86 | 
 87 | The reference set of policy rules for Konflux is defined
 88 | [here](https://github.com/enterprise-contract/ec-policies/) and documented
 89 | [here](https://enterprise-contract.github.io/ec-policies/). It includes rules for a
 90 | range of different policies that are considered useful for Konflux.
 91 | 
 92 | There are Conftest bundles containing the latest version of these policies
 93 | available in [quay.io
 94 | here](https://quay.io/repository/enterprise-contract/ec-release-policy?tab=tags).
 95 | 
 96 | 
 97 | Related Components
 98 | ------------------
 99 | 
100 | ### Tekton Chains
101 | 
102 | Tekton Chains is a dependency for EC since EC works by examining attestations
103 | created by Tekton Chains when Konflux build pipelines are running.
104 | 
105 | For more information on Tekton Chains refer to the
106 | [documentation](https://tekton.dev/docs/chains/), and the GitOps configuration
107 | [here](https://github.com/openshift-pipelines/pipeline-service/tree/main/operator/gitops/argocd/tekton-chains).
108 | 
109 | ### The Release Pipeline
110 | 
111 | The Konflux Release Pipeline contains an instance of the EC Task which is used
112 | to gate the release. If the EC task fails the release should be blocked. This
113 | functionality is handled by the Release Pipeline.
114 | 
115 | For more information, see [Konflux Release Service
116 | Bundles](https://github.com/redhat-appstudio/release-service-bundles).
117 | 
118 | ### EC and Renovate Bot
119 | 
120 | To verify that tasks used in the build pipeline are from known and trusted
121 | Tekton bundles, EC requires a list of those bundles.
122 | 
123 | Konflux users can leverage the [Renovate
124 | Bot](https://github.com/renovatebot/renovate#readme) service to keep such
125 | Tekton bundle lists up to date. The service can be configured to run
126 | periodically, and provide pull requests with updated references.
127 | Alternatively, users can run their own instance of Renovate either as a
128 | service or on-demand.
129 | 
130 | 
131 | Additional Resources
132 | --------------------
133 | 
134 | - [Konflux Documentation](https://redhat-appstudio.github.io/docs.appstudio.io)
135 | - [Enterprise Contract Documentation](https://enterprise-contract.github.io/)
136 | - [Architecture of Konflux](https://redhat-appstudio.github.io/architecture/)
137 | 
138 | 
139 | 
140 | [^1]: Not sure about the terminology here. Do we have a better term for
141 |     requirements enforced by ec-cli that are not defined by rego rules?
142 | 
143 | [^2]: The list of images in a release is defined in a Snapshot CRD. The input
144 |     to EC is a JSON formatted representation of this image list, but a
145 |     single image is also supported.
146 | 
147 | [^3]: Not sure about this terminology either. Conceptually the "source group"
148 |     consists of one or more policy sources and zero or more data sources.
149 | 
150 | 
151 | <!---
152 | Notes and todos
153 | ---------------
154 | 
155 | - Once we have the new EC task Tekton bundle and push automation stable we
156 |   should mention it here.
157 | - As per [^4] the pipeline definition validation is not mentioned, but
158 |   it probably should be since that is a key feature of EC.
159 | - IIUC there is an instance of the EC task that is triggered after every build,
160 |   i.e. well before the release pipeline is started. This doc should probably
161 |   mention it and describe it.
162 | - It seems like there should be a link to docs with more details on how
163 |   Renovate could be used, but I'm not sure if we have any yet.
164 | - Once we decide on some of the terminology footnotes ^1 and ^3 can be
165 |   removed.
166 | - Note that the source group stuff has not yet been implemented. I'm
167 |   describing how I think it will work in the future, so we should review
168 |   later and remove this note.
169 | - Would some diagrams be useful? What would they look like?
170 | - Currently this document doesn't mention Rekor, but perhaps it should, even
171 |   though we are not currently using Rekor.
172 | 
173 | --->
174 | 


--------------------------------------------------------------------------------
/architecture/gitops-service.md:
--------------------------------------------------------------------------------
1 | # GitOps Service
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/architecture/hybrid-application-console.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/architecture/hybrid-application-service.md:
--------------------------------------------------------------------------------
 1 | # Hybrid Application Service (HAS)
 2 | 
 3 | Hybrid Application Service (HAS) is a component within Konflux that provides Kubernetes webhooks for Application and Component resources
 4 | 
 5 | ## Webhooks
 6 | - Validation webhook for Application CRs that prevents resources from being created with invalid names or display names.
 7 | - Validation webhook for Component CRs that ensures valid application name, component name, and source are specified
 8 | - Defaulting webhook for Component CRs that configures the OwnerReference for the Component to be that of its parent Application. 
 9 | - Webhook to manage the BuildNudgesRef relationship between nudging components: setting and removing nudging components from the status of nudged components
10 | 
11 | 
12 | ## Links
13 | 
14 | - Repository: https://github.com/redhat-appstudio/application-service
15 | - Webhook definitions: https://github.com/redhat-appstudio/application-service/tree/main/webhooks
16 | 
17 | 


--------------------------------------------------------------------------------
/architecture/image-controller.md:
--------------------------------------------------------------------------------
  1 | # Image Controller
  2 | 
  3 | # Overview
  4 | Image controller sets up and manages container image repositories in configured quay.io organization.
  5 | It works either for general purpose image repository or [Component](https://konflux-ci.dev/architecture/ref/application-environment-api.html#component)-specific image repository.
  6 | 
  7 | The image controller can perform multiple actions with use of ImageRepository custom resources.
  8 | 
  9 | - **Setup image repository**: create an image repository and a robot accounts
 10 |   which are specific to that repository for image push and pull.
 11 |   A Kubernetes Secret objects are also created with push and pull robot account tokens,
 12 |   in order to make it available for build PipelineRun via service account.
 13 | 
 14 | - **Modify repository visibility**: switch an image repository's visibility between public and private.
 15 | 
 16 | - **Rotating credentials for repository**: rotate repository credentials, and update relevant secrets.
 17 | 
 18 | - **Verify and fix secrets linked to ServiceAccount**: verify and fix linking of secrets to ServiceAccount.
 19 | 
 20 | - **Cleanup**: When a Component CR is requested to be deleted, image controller will remove
 21 |   component's image repository (Component owns ImageRepository) and robot account from the remote registry
 22 |   (it is possible to skip repository removal).
 23 |   The Kubernetes Secret will be removed along with the Component CR eventually due to the
 24 |   ownership established between them (ImageRepository owns Secret).
 25 | 
 26 | # Dependencies
 27 | 
 28 | Image controller does not depend on other Konflux services, but a remote image registry.
 29 | Konflux services are able to use the resources prepared by image controller,
 30 | e.g. ServiceAccount with linked Secrets is available to every build PipelineRun of a component
 31 | for pushing image.
 32 | 
 33 | ## Controllers
 34 | 
 35 | The Image Controller contains these controllers:
 36 | - Image controller
 37 |   - Monitors ImageRepository CRs and creates image repository, robot accounts and
 38 |     links secret to service accounts.
 39 | - Application controller
 40 |   - Monitors Application CRs, creates application specific service account `$APPLICATION_NAME-pull`
 41 |     and links there (to both `secrets` and `imagePullSecrets` sections) all pull secrets from
 42 |     all Components in the Application.
 43 | 
 44 | # Interface
 45 | 
 46 | ## ImageRepository CR
 47 | 
 48 | The ImageRepository CR is the interface to interact with image controller to create and manage image repositories in a registry.
 49 | 
 50 | ### To create an general purpose image repository, apply this YAML code:
 51 | ```yaml
 52 | apiVersion: appstudio.redhat.com/v1alpha1
 53 | kind: ImageRepository
 54 | metadata:
 55 |     name: imagerepository-for-component-sample
 56 |     namespace: test-ns
 57 | ```
 58 | As a result, a public image repository `quay.io/my-org/test-ns/imagerepository-for-component-sample`
 59 | will be created, based on `$DEFAULT_REGISTRY_ORG/$USER_NAMESPACE/$IMAGE_REPOSITORY_NAME`.
 60 | - DEFAULT_REGISTRY_ORG - is taken from quay secret in the cluster
 61 | - USER_NAMESPACE - is taken from ImageRepository `.metadata.namespace`
 62 | - IMAGE_REPOSITORY_NAME - is taken from ImageRepository `.metadata.name`
 63 | 
 64 | Two robot accounts and corresponding Kubernetes Secrets for push and pull are created.
 65 | 
 66 | ### To create an image repository for a Component, apply this YAML code:
 67 | ```yaml
 68 | apiVersion: appstudio.redhat.com/v1alpha1
 69 | kind: ImageRepository
 70 | metadata:
 71 |     name: imagerepository-for-component-sample
 72 |     namespace: test-ns
 73 |     annotations:
 74 |         image-controller.appstudio.redhat.com/update-component-image: 'true'
 75 |     labels:
 76 |         appstudio.redhat.com/component: my-component
 77 |         appstudio.redhat.com/application: my-app
 78 | ```
 79 | As a result, a public image repository `quay.io/my-org/test-ns/my-component` will be created,
 80 | based on `$DEFAULT_REGISTRY_ORG/$USER_NAMESPACE/$COMPONENT_NAME`.
 81 | - DEFAULT_REGISTRY_ORG - is taken from quay secret in the cluster
 82 | - USER_NAMESPACE - is taken from ImageRepository `.metadata.namespace`
 83 | - COMPONENT_NAME - is taken from Component `.metadata.name`
 84 | 
 85 | Two robot accounts and corresponding Kubernetes Secrets for push and pull are created.
 86 | 
 87 | It will also link push secret to component specific service account `build-pipeline-$COMPONENT_NAME`
 88 | used for build pipelines (`secrets` section).
 89 | 
 90 | And it will also link pull secret to application specific service account `$APPLICATION_NAME-pull`
 91 | (to both `secrets` and `imagePullSecrets` sections).
 92 | 
 93 | Annotation `image-controller.appstudio.redhat.com/update-component-image` is required when using
 94 | ImageRepository with Component, as it will set Component's `spec.containerImage` allowing
 95 | Build service controller to continue.
 96 | 
 97 | ### User defined repository name
 98 | One may request custom image repository name by setting `spec.image.name` field upon
 99 | the ImageRepository object creation, but it will always be prepended by
100 | `$DEFAULT_REGISTRY_ORG/$USER_NAMESPACE`.
101 | 
102 | e.g. when `spec.image.name` is set to `my-repository` final repository url will be
103 | `$DEFAULT_REGISTRY_ORG/$USER_NAMESPACE/my-repository`.
104 | 
105 | Note, it's not possible to change image repository name after creation.
106 | Any changes to the field will be reverted by the operator.
107 | 
108 | ### Setting quay.io notifications
109 | Notifications can be set with:
110 | ```yaml
111 | spec:
112 |   notifications:
113 |   - config:
114 |       url: https://bombino.api.redhat.com/v1/sbom/quay/push
115 |     event: repo_push
116 |     method: webhook
117 |     title: SBOM-event-to-Bombino
118 | ```
119 | 
120 | ### Changing repository visibility
121 | By default, a public image repository is created.
122 | To change the image repository visibility, set `public` or `private` to `.spec.image.visibility`.
123 | 
124 | ### Credentials rotation for repository
125 | To regenerate tokens push and pull, set `true` to `.spec.credentials.regenerate-token`, it will also re-create secrets.
126 | 
127 | After token rotation, the `spec.credentials.regenerate-token` section will be deleted and
128 | `status.credentials.generationTimestamp` updated.
129 | 
130 | ### Verify and fix secrets linked to ServiceAccount
131 | - It will link secret to service account if link is missing.
132 | - It will remove duplicate links of secret in service account.
133 | - It will remove secret from imagePullSecrets in service account.
134 | 
135 | To perform verification and fix, set `true` to `.spec.credentials.verify-linking`.
136 | 
137 | After verification, the `spec.credentials.verify-linking` section will be deleted.
138 | 
139 | ### Skip repository deletion
140 | By default, if the ImageRepository resource is deleted, the repository it created in registry
141 | will get deleted as well.
142 | 
143 | In order to skip the removal of the repository, set `true` to `image-controller.appstudio.redhat.com/skip-repository-deletion` annotation.
144 | 
145 | ### Status explanation
146 | ImageRepository CR has `.status` which includes all final information about an image repository:
147 | 
148 | ```yaml
149 | status:
150 |   credentials:
151 |     generationTimestamp: '2025-03-21T14:28:59Z'
152 |     pull-robot-account: test_pull
153 |     pull-secret: imagerepository-for-test-image-pull
154 |     push-robot-account: test_push
155 |     push-secret: imagerepository-for-test-image-push
156 |   image:
157 |     url: quay.io/redhat-user-workloads/test-tenant/test
158 |     visibility: public
159 |   notifications:
160 |     - title: SBOM-event-to-Bombino
161 |       uuid: aaaaa-......
162 |   state: ready
163 | ```
164 | - `.status.credentials` includes info about credentials.
165 |   - `generationTimestamp` timestamp from when credentials were updated.
166 |   - `pull-robot-account` robot account name in configured registry organization with read permissions to the repository
167 |   - `pull-secret` Secret of `dockerconfigjson` type that contains image repository pull robot account token with read permissions.
168 |   - `push-robot-account`robot account name in configured registry organization with write permissions to the repository
169 |   - `push-secret`  Secret of `dockerconfigjson` type that contains image repository push robot account token with write permissions.
170 | - `.status.image` includes the full repository URL and current visibility.
171 | - `.status.notification` shows info about notifications.
172 | - `.status.state` shows whether image controller responded last operation request successfully or not.
173 | 
174 | ## Legacy interaction via Component annotations
175 | 
176 | Image controller uses annotations to interact with external services.
177 | 
178 | - `image.redhat.com/generate`: An external service is able to request an image repository for an application component by setting this annotation on the corresponding Component CR. For initial request, the value should include field `visibility` to indicate the visibility of the created image repository in the remote registry, and it can be set again subsequently to change the visibility on demand. Note that, this annotation will be removed once requested operation finishes.
179 | 
180 |   Here is an example that requests a private image repository:
181 | 
182 |   ```yaml
183 |   image.redhat.com/generate: '{"visibility": "private"}'
184 |   ```
185 | 
186 | - `image.redhat.com/image`: image controller provides information of prepared resources to external services via this annotation, which includes the image repository URL, the visibility of that repository, and a secret name pointing to the created Kubernetes Secret.
187 | 
188 |   If something went wrong during the requested operation, this annotation will include a field `Message` with a corresponding error message.
189 | 
190 |   Here is an example that shows a public image repository is ready on Quay.io and a Secret named `secret-name` contains a robot account token and is available for image push.
191 | 
192 |   ```yaml
193 |   image.redhat.com/image: "{\"image\":\"quay.io/redhat-user-workloads/image-controller-system/city-transit/billing\",\"visibility\":\"public\",\"secret\":\"secret-name\"}"
194 |   ```
195 | 


--------------------------------------------------------------------------------
/architecture/internal-services.md:
--------------------------------------------------------------------------------
  1 | # Internal Services
  2 | 
  3 | ## Overview
  4 | 
  5 | The Internal Services system consists of a kubernetes controller (running on an internal, private cluster) that is capable of watching and reconciling custom resources on a remote, public cluster.
  6 | These custom resources describe which pipelines and parameters to use to execute internal jobs on an internal, private cluster.
  7 | The results and outcome of the pipeline are added as an update to the custom resources. The remote, public cluster watches these resources to determine the outcome.
  8 | 
  9 | ## Goals
 10 | 
 11 | * The Internal Services system attempts to enable execution of internal jobs via a polling mechanism.
 12 | * The Internal Services system provides execution results back to the requesting cluster.
 13 | 
 14 | ## System Context
 15 | 
 16 | The diagram below shows the interaction of the internal services controller and the [Release Service](./release-service.md) and shows the flow of custom resources
 17 | 
 18 | ![](../diagrams/internal-services/internal-services-controller-overview.jpg)
 19 | 
 20 | ## Terminology
 21 | 
 22 | * **InternalRequest** - The custom resource that describes the internal service to trigger the internal job on.
 23 | * **Remote Cluster** - A **public**, Konflux cluster residing outside a private network.
 24 | * **Internal, Private Cluster** - A cluster that is not externally addressable but which has access to a private network.
 25 | 
 26 | ## Resources
 27 | Below is the list of CRs that the Internal Service is responsible for interacting with:
 28 | 
 29 | ### CREATE
 30 | 
 31 | | Custom Resource | When?                                                                                                             | Why?                                          |
 32 | |-----------------|-------------------------------------------------------------------------------------------------------------------|-----------------------------------------------|
 33 | | PipelineRun     | Once a InternalRequest has been reconciled and the Pipeline to run has been discovered and is ready for execution | To perform the steps in the Internal Pipeline |
 34 | 
 35 | ### READ
 36 | 
 37 | | Custom Resource        | When?                                                       | Why?                                |
 38 | |------------------------|-------------------------------------------------------------|-------------------------------------|
 39 | | InternalServicesConfig | During controller startup and during each reconcile attempt | To obtain configuration information |
 40 | 
 41 | ### UPDATE
 42 | 
 43 | | Custom Resource  | When?                                                              | Why?                                                                              |
 44 | |------------------|--------------------------------------------------------------------|-----------------------------------------------------------------------------------|
 45 | | InternalRequest  | During the lifecycle of an attempt to complete an InternalRequest  | To provide status for the execution of the Pipeline to the remote, public cluster |
 46 | 
 47 | ### WATCH
 48 | 
 49 | | Custom Resource  | When?                            | Why?                                                                                     |
 50 | |------------------|----------------------------------|------------------------------------------------------------------------------------------|
 51 | | InternalRequest  | Always                           | To provide an API to process an internal request                                         |
 52 | | PipelineRun      | Once the PipelineRun is created  | To relay the Internal PipelineRun status to the remote InternalRequest for viewing |
 53 | 
 54 | ### Samples
 55 | 
 56 | ```yaml
 57 | apiVersion: appstudio.redhat.com/v1alpha1
 58 | kind: InternalServicesConfig
 59 | metadata:
 60 |   name: config
 61 |   namespace: internal-services
 62 | spec:
 63 |   allowList:
 64 |     - managed-team-1-tenant
 65 |     - managed-team-2-tenant
 66 |   debug: false
 67 |   volumeClaim:
 68 |     name: pipeline
 69 |     size: 1Gi
 70 | ```
 71 | 
 72 | ```yaml
 73 | apiVersion: appstudio.redhat.com/v1alpha1
 74 | kind: InternalRequest
 75 | metadata:
 76 |   name: example
 77 |   namespace: default
 78 | spec:
 79 |     request: "internal-system-abc-pipeline"
 80 |     params:
 81 |         foo: quay.io/redhat-isv/operator-pipelines-images:released
 82 |         bar: sha256:dd517544390c4c9636a555081205115a2fd3e83c326e64026f17f391e24bd2e5
 83 | ```
 84 | 
 85 | ## Security Risk Mitigations
 86 | 
 87 | Enabling remote, public clusters the ability to run internal jobs carrying certain security risks:
 88 | 
 89 | * A user or a pipeline may attempt to run an internal job that it is not permitted to.
 90 | * A user or a pipeline may attempt to run an arbitrary job
 91 | 
 92 | The following list describes measures in places to mitigate those risks.
 93 | 
 94 | * The creation of an `InternalRequest` custom resource requires permission on the Remote Cluster.
 95 | * The Internal Services controller instance is configured to watch a specific cluster.
 96 |   * This cluster is provided to the controller as an argument to a secret that was added by the admin team.
 97 |   * The secret contains a KUBECONFIG file.
 98 | * Only `Pipelines` that are defined and exist within the controller's namespace can be executed on the internal, private cluster.
 99 | * The Internal Services controller only watches and acts on remote namespaces that are specifically allowed in the `Config` custom resource.
100 | 
101 | ## Detailed Workflow
102 | 
103 | > The following bullet points are numbered to align with the diagram displayed in the [System Context](#system-context) section above.
104 | 
105 | 1. A `InternalRequest` CR is created by a pipeline run by a service account on the remote, public cluster as part of a pipeline.
106 |     * The `spec.request` should reference the pipeline to execute.
107 |     * The `spec.params` should reference the parameters to pass to the pipeline.
108 | 2. The `InternalRequest` CR is noticed by the Internal Services controller and attempts to reconcile it.
109 | 3. The Internal Services controller verifies whether the remote namespace is allowed in the `InternalServicesConfig` CR.
110 |     * If it is not allowed, the Internal Services controller updates the `InternalRequest` CR with an invalid `status` and a rejection message and stops reconciling.
111 |     * If it is allowed, a `PipelineRun` is created based on the `Pipeline` name found in the `spec.request` and parameter values found in `spec.params` from the `InternalRequest` CR.
112 | 4. The remote `InternalRequest` CR is updated by the Internal Services controller to mark as in-progress.
113 | 5. The internal job encapsulated by the `Pipeline` completes.
114 | 6. The `PipelineRun` is watched by the Internal Services controller.
115 | 7. Upon completion, the Internal Services controller updates the `status` section of the remote `InternalRequest` CR
116 |     * The remote, public cluster calling pipeline sees the update to the `status` of the `InternalRequest` CR and continues its execution.
117 |     * By default, `PipelineRun` CRs are deleted once completed.
118 |       * There are only preserved if an admin has set the `InternalServicesConfig` CR `spec.debug` to `true`
119 | 


--------------------------------------------------------------------------------
/architecture/jvm-build-service.md:
--------------------------------------------------------------------------------
  1 | # Java Virtual Machine Build Service
  2 | 
  3 | ## Overview
  4 | 
  5 | The Java Virtual Machine Build Service (JBS, or JVM Build Service) is a controller that will rebuild Java and other JVM Language based dependencies from source.
  6 | 
  7 | The end user documentation is located [here](https://redhat-appstudio.github.io/docs.appstudio.io/Documentation/main/how-to-guides/Secure-your-supply-chain/proc_java_dependencies/) and contains background information.
  8 | 
  9 | The Java ecosystem uses a binary distribution model, where binary jar files are downloaded from central repositories (such as Maven Central). This means that the only way to ensure an application is completely built from source is to rebuild all its component libraries from source in a trusted environment. Unfortunately due to the nature of the Java ecosystem this is not an easy process and would normally require a large amount of manual toil.
 10 | 
 11 | Although the Java binary distribution model is very convenient, it does mean that you will inevitably use untrusted dependencies with unknown provenance maintained by external communities. In general, you don't know who has uploaded these artifacts, the environment that was used to build them, or how that build environment might be compromised. Building from source in a secure environment means that you can be be completely sure as to where the code you are running has come from.
 12 | 
 13 | JBS automates this process as much as possible, making it much less time-consuming to create builds that are build from source in a controlled environment.
 14 | 
 15 | ### Dependencies
 16 | 
 17 | JBS depends on Tekton, but is otherwise largely independent of the rest of Konflux. Given the right configuration it can be deployed outside of Konflux, which is mainly used for testing and development.
 18 | 
 19 | ## Architecture
 20 | 
 21 | ### Flow Overview
 22 | 
 23 | The JVM Build service can be enabled on a specific namespace by creating a [`JBSConfig`](#JBSConfig) object, with `enableRebuilds: true` specified. This will trigger the controller to create a deployment of the local Cache in the namespace. This cache has a number of purposes, but the primary one is to cache artifacts from Maven central to speed up Java builds.
 24 | 
 25 | Once it is enabled and the cache has started the JVM Build Service will watch for `PipelineRun` objects with the `JAVA_COMMUNITY_DEPENDENCIES` results. This will be present on end user builds that have dependencies from untrusted upstream sources.
 26 | 
 27 | When one of these `PipelineRun` objects is observed JBS will extract the dependency list, and attempt to rebuild all the listed JVM artifacts from source.
 28 | 
 29 | The first stage of this process is to create an [`ArtifactBuild`](#ArtifactBuild) object. This object represents a Maven `group:artifact:version` (GAV) coordinate of the library that needs to be built from source.
 30 | 
 31 | Once the `ArtifactBuild` object has been created JBS will then attempt to try and build it. This first step is to attempt to find the relevant source code.
 32 | 
 33 | Once the source code location has been discovered a [`DependencyBuild`](#DependencyBuild) object is created. There are generally less `DependencyBuild` objects than there are `ArtifactBuild`, as multiple artifacts can come from the same build. The controller will then try and build the artifact, first it will run a build discovery pipeline, that attempts to determine possible ways of building the artifact. Once discovery is complete it uses this information to attempt to build the artifact in a trial and error manner.
 34 | 
 35 | The dependency discovery pipeline can check both configured shared repositories and the main repository
 36 | (for example a Quay.io repository) for pre-existing builds. If a prior build is found, the pipeline will shortcut
 37 | to avoid building the artifact and will instead refer to the found build artifacts instead.
 38 | 
 39 | If a build is successful the results are stored in a container image and the state is marked as complete, otherwise it is marked as failed and manual effort is required to fix the build.
 40 | 
 41 | 
 42 | ### Components
 43 | 
 44 | JBS consists of the following components:
 45 | 
 46 | **Controller**
 47 | 
 48 | The controller is a Kubernetes controller written in go that orchestrates all aspects of the rebuild process. This will kick off all build related pipelines, and manage the state of the relevant Kubernetes objects.
 49 | 
 50 | **Cache**
 51 | 
 52 | The cache is a Quarkus Java application that caches artifacts from upstream repositories such as maven central. It also performs the following additional functions:
 53 | 
 54 | - Handles container image based dependencies that have been rebuilt
 55 | - Injects tracking metadata into class files to detect contaminated builds
 56 | - Looks up source and build information from the build recipe database
 57 | 
 58 | All dependency rebuilds are configured to only get their artifacts from the cache.
 59 | 
 60 | **Build Request Processor**
 61 | 
 62 | This is a multipurpose Quarkus CLI based app that performs quite a few different functions depending on the parameters it is invoked with. This is packaged up into an image, and then run in different parts of various pipelines by the operator.
 63 | 
 64 | In essence any custom logic that is needed in a pipeline goes in here, and then the operator will invoke it with the correct arguments. This is a single container image that can perform a whole host of different functions, based on the parameters that it is invoked with. It's functions include:
 65 | 
 66 | - Analysing a build and looking up its build information in the recipe database
 67 | - Checking for pre-existing builds
 68 | - Preprocessing a build to fix common problems, such as removing problematic plugins
 69 | - Verifying the results of a build match what is expected upstream
 70 | - Checking if a build has been contaminated by upstream class files
 71 | - Deploying the build to an image repository
 72 | 
 73 | **Recipe Database**
 74 | 
 75 | This is a git repository that contains information on where to find sources and how to build various projects. It is located at https://github.com/redhat-appstudio/jvm-build-data.
 76 | 
 77 | For full information on the format of this repository please see the documentation located inside the repository itself.
 78 | 
 79 | **Builder Images**
 80 | 
 81 | The builder images are maintained at the [builder images repository](https://github.com/redhat-appstudio/jvm-build-service-builder-images/).
 82 | 
 83 | **CLI**
 84 | 
 85 | This is a Quarkus application provided for end users to interact with the system.
 86 | 
 87 | ## Resource Details
 88 | 
 89 | The JVM Build service provides the following CRDs. All CRDs are located in the [JVM Build Service repo](https://github.com/redhat-appstudio/jvm-build-service/tree/main/deploy/crds/base). They are generated from goang objects that reside [here](https://github.com/redhat-appstudio/jvm-build-service/tree/main/pkg/apis/jvmbuildservice/v1alpha1).
 90 | 
 91 | ### `ArtifactBuild`
 92 | 
 93 | This represents a request to rebuild an artifact. Creating this object will kick off the JVM Build Service rebuild process. We should have one of these objects for every upstream GAV we want to rebuild.
 94 | 
 95 | These have the following states.
 96 | 
 97 | **ArtifactBuildNew**
 98 | 
 99 | Object has just been created.
100 | 
101 | **ArtifactBuildDiscovering**
102 | 
103 | The JVM Build Service is running a discovery pipeline to determine where the source code for this artifact is located.
104 | 
105 | **ArtifactBuildMissing**
106 | 
107 | We failed to find the source code location. The source code information must be added to the recipe database and the build retried.
108 | 
109 | **ArtifactBuildFailed**
110 | 
111 | The build failed.
112 | 
113 | **ArtifactBuildComplete**
114 | 
115 | The build was a success.
116 | 
117 | ### `DependencyBuild`
118 | 
119 | This represents a repository + tag combination we want to build. These are created automatically by the JVM Build Service Operator after it has looked up how to build.
120 | 
121 | Once these have been created the operator first runs a 'discovery' pipeline, that attempts to figure out how to build the repo, which both examines the code base, and also pulls in build information from the Build Recipe Repository. The result of
122 | this is a list of build recipes that the operator will attempt one after the other. This object has the following states:
123 | 
124 | **DependencyBuildStateNew**
125 | 
126 | The object has just been created.
127 | 
128 | **DependencyBuildStateAnalyzeBuild**
129 | 
130 | The operator is running a pipeline to attempt to discover how to build the repository.
131 | 
132 | **DependencyBuildStateBuilding**
133 | 
134 | A build pipeline is running.
135 | 
136 | **DependencyBuildStateComplete**
137 | 
138 | The build was a success.
139 | 
140 | **DependencyBuildStateFailed**
141 | 
142 | All attempts to build this repository have failed.
143 | 
144 | **DependencyBuildStateContaminated**
145 | 
146 | This state means that the build was a success, but community artifacts were shaded into the output of the build. The operator
147 | will attempt to fix this automatically, by creating new `ArtifactBuild` objects for everything that was shaded into the output.
148 | Once these have completed the build is automatically retried. A good example of this is the Netty build, which gets contaminated
149 | by JCTools. If these artifact builds fail then the `DependencyBuild` will stay in this state indefinitely.
150 | 
151 | ### `RebuiltArtifact`
152 | 
153 | This represents a GAV that has been built and deployed to the image repo. It is mainly for internal bookkeeping purposes.
154 | 
155 | ### `JBSConfig`
156 | 
157 | This object is used to configure all aspects of the JVM Build Service. The creation of this object is what triggers the creation of the Cache deployment for the namespace,
158 | and is required for any rebuilds to happen in a given namespace.
159 | 
160 | A minimal `JBSConfig` for rebuilding artifacts would be as follows:
161 | 
162 | ```yaml
163 | apiVersion: jvmbuildservice.io/v1alpha1
164 | kind: JBSConfig
165 | metadata:
166 |   name: jvm-build-config
167 | spec:
168 |   enableRebuilds: "true"
169 | ```
170 | 
171 | In order to avoid multiple rebuilds of the same artifact, a user may configure a shared registry explicitly within the
172 | `JBSConfig` custom resource or by using the provided CLI. A shared registry is one that may be shared by many users.
173 | 
174 | For example:
175 | 
176 | ```yaml
177 | apiVersion: jvmbuildservice.io/v1alpha1
178 | kind: JBSConfig
179 | metadata:
180 |   name: jvm-build-config
181 | spec:
182 |   enableRebuilds: "true"
183 |   registry:
184 |       owner: an-owner
185 |   sharedRegistries:
186 |       - host: quay.io
187 |         insecure: true
188 |         owner: my-team-owner
189 |         repository: test-jvm-namespace/jvm-build-service-artifacts
190 | ```
191 | 
192 | This assumes that another user has configured their registry to deploy builds to `my-team-owner`. For example:
193 | 
194 | ```yaml
195 | spec:
196 |   registry:
197 |     owner: my-team-owner
198 | ```
199 | 
200 | ### `SystemConfig`
201 | 
202 | This is a singleton object that configures the JVM Build System. The main configuration it provides is the builder images to use.
203 | 
204 | 


--------------------------------------------------------------------------------
/architecture/pipeline-service.md:
--------------------------------------------------------------------------------
 1 | # Pipeline Service
 2 | 
 3 | Pipeline Service provides Tekton APIs and services to RHTAP.
 4 | In the initial phase of RHTAP, Pipeline Service will be provided by a stock
 5 | installation of the OpenShift Pipelines operator.
 6 | This deployed version will be the a candidate build of the OpenShift Pipelines
 7 | operator from a Red Hat build system.
 8 | 
 9 | ![Pipelines operator deployment](../diagrams/pipeline-service.drawio.svg)
10 | 
11 | ## APIs and Services
12 | 
13 | Pipeline Service provides the following:
14 | 
15 | - Tekton APIs directly through its custom resource definitions.
16 | - Container image signing and provenance attestations through Tekton Chains.
17 | - Archiving of `PipelineRuns`, `TaskRuns`, and associated logs through Tekton
18 |   Results.
19 | 
20 | Pipeline Service also exposes the following ingress points:
21 | 
22 | - Pipelines as Code controller: this is a `Route` that receives webhook events
23 |   from source code repositories.
24 | - Tekton Results API: this is an `Ingress` that serves Tekton Results data
25 |   over a RESTful API. Clients authenticate with the same `Bearer` token used to
26 |   authenticate Kubernetes requests.
27 | 
28 | ## Deployment Configuration
29 | 
30 | The deployment of the OpenShift Pipelines operator will have the following
31 | notable configurations:
32 | 
33 | - Tekton Triggers will be disabled entirely.
34 | - The pruner (provided by the Pipelines operator) will be disabled in favor of
35 |   pruning via Tekton Results.
36 | - Pipelines as Code will link the results of pipeline tasks to an appropriate
37 |   [Hybrid Application Console (HAC)](./hybrid-application-console.md) URL.
38 | 
39 | ## Architecture
40 | 
41 | ### Diagram
42 | 
43 | Legend:
44 | * Blue: managed by Pipeline Service
45 | * Yellow: not managed by Pipeline Service
46 | 
47 | ![Architecture diagram](../diagrams/pipeline-service/architecture.jpg)
48 | 
49 | ### Tekton Pipelines
50 | 
51 | #### appstudio-pipeline Service Account
52 | 
53 | The service should offer users a service account for running pipelines.
54 | However, the automatic generation of a 'pipeline' service account within namespaces has been disabled in the component because it was found that the permissions granted to that account were overly broad.
55 | 
56 | The Pipeline Service component creates the `appstudio-pipelines-scc` ClusterRole, but does not bind this role to any service account.
57 | 
58 | The [CodeReadyToolchain](https://github.com/codeready-toolchain) platform (CRT) creates the `appstudio-pipelines-runner` ClusterRole on each tenant/member cluster. It also creates the `appstudio-pipeline` ServiceAccount on every tenant namespace as well as the role bindings for the `appstudio-pipeline` service account within the namespace.
59 | 
60 | ### Tekton Chains
61 | 
62 | #### Signing Secret
63 | 
64 | The signing secret is unique to each cluster, and is a long lived secret.
65 | Rotating the secret is extremely disruptive, as it invalidates any artifact that was built using that secret.
66 | 
67 | Moving to keyless signing would solve the issue and would be the long-term solution.
68 | 
69 | The public-key is stored in `openshift-pipelines` namespace as a Secret named `public-key`. The secret is readable by all authenticated users to allow them to verify signed artifacts.
70 | 
71 | ### Tekton Results
72 | 
73 | #### Storage
74 | 
75 | AWS RDS and S3 are used to handle the storage needs of Tekton Results.
76 | 
77 | ### Pipeline as Code
78 | 
79 | #### Secret management
80 | 
81 | The secrets for the GitHub Application are stored in Vault, and synchronized as an ExternalSecret. The refresh rate for the synchronization is aggressive so that rotating the secrets do not generate too long of an outage.
82 | 
83 | ## Repository
84 | 
85 | The official repository for the Pipeline Service can be found at https://github.com/openshift-pipelines/pipeline-service. This repository contains the source code, configuration files, and documentation needed to deploy and consume the service.
86 | 


--------------------------------------------------------------------------------
/architecture/release-service.md:
--------------------------------------------------------------------------------
  1 | # Release Service
  2 | 
  3 | ## Overview
  4 | 
  5 | The **Release service** is composed of a Release Operator that can create and orchestrate release pipelines defined
  6 | in Release Strategies to deliver content.
  7 | 
  8 | The main API that is exposed is called the **Release** custom resource. This custom resource is used by the Release
  9 | Service as input to request a release of content for an **Application**.
 10 | 
 11 | Additional custom resources are used to compose the system. **ReleasePlan** and **ReleasePlanAdmission** define the
 12 | relationship between **Development** Workspaces and **Managed** Workspaces.
 13 | 
 14 | **Release Strategies** are referenced in **ReleasePlanAdmissions** and are used to define which pipeline should be
 15 | executed to deliver content.
 16 | 
 17 | In addition, the Release service ensures that no violations in the [Enterprise Contract] exist prior to releasing content.
 18 | 
 19 | ## System Context
 20 | 
 21 | The diagram below shows the interaction of the release service and other services.
 22 | 
 23 | ![](../diagrams/integration-service/integration-service-data-flow.jpg)
 24 | 
 25 | ## Application Context
 26 | 
 27 | The diagram below shows the flow of custom resources and the orchestration of pipelines by the release service.
 28 | 
 29 | ![](../diagrams/release-service/konflux-release-service-data-flow.jpg)
 30 | 
 31 | ## Terminology
 32 | 
 33 | **Snapshot** - The custom resource that contains the list of all Components of an Application with their Component Image digests. Once created, the list of Components with their images is immutable. The Integration service updates the status of the resource to reflect the testing outcome.
 34 | **Release Pipeline** - An instance of a Pipeline that is orchestrated by the Release Service.
 35 | **Bundle** - A tekton bundle (image) that contains Tekton resources.
 36 | **Development Workspace** - A workspace shared by 1 or more team members when code is built and tested.
 37 | **Managed Workspace** - A controlled workspace that is managed by an SRE or Release Engineering team. This is where Release Pipelines execute.
 38 | 
 39 | ## Resources
 40 | Below is the list of CRs that the Release service is responsible for interacting with:
 41 | 
 42 | ### CREATE
 43 | 
 44 | | Custom Resource             | When?                                                       | Why?                                                                                          |
 45 | |-----------------------------|-------------------------------------------------------------|-----------------------------------------------------------------------------------------------|
 46 | | SnapshotEnvironmentBinding  | When deploying the Snapshot to an Environment               | To create a SnapshotEnvironmentBinding for the referenced Environment when one does not exist |
 47 | | PipelineRun                 | Once a Release has been composed and is ready for execution | To perform the steps in the Release Pipeline                                                  |
 48 | 
 49 | ### READ
 50 | 
 51 | | Custom Resources        | When?                                                  | Why?                                                             |
 52 | |-------------------------|--------------------------------------------------------|------------------------------------------------------------------|
 53 | | Application & Component | When validating ReleasePlans and ReleasePlanAdmissions | To ensure data consistency                                       |
 54 | | ReleasePlan             | When starting to process a Release                     | To determine the target workspace to run the Release Pipeline in |
 55 | | ReleasePlanAdmission    | When starting to process a Release                     | To determine how to compose the Release Pipeline                 |
 56 | | Snapshot                | Creating and triggering a Release Pipeline             | To determine which components to release                         |
 57 | 
 58 | ### UPDATE
 59 | 
 60 | | Custom Resource            | When?                                                    | Why?                                                             |
 61 | |----------------------------|----------------------------------------------------------|------------------------------------------------------------------|
 62 | | SnapshotEnvironmentBinding | When deploying the Snapshot to an Environment            | To update existing SnapshotEnvironmentBinding for a new Snapshot |
 63 | | Release                    | During the lifecycle of an attempt to release a Snapshot | To provide status for the execution of the Release Pipeline      |
 64 | 
 65 | ### WATCH
 66 | 
 67 | | Custom Resource            | When?                                                      | Why?                                                               |
 68 | |----------------------------|------------------------------------------------------------|--------------------------------------------------------------------|
 69 | | Release                    | Always                                                     | To provide an API to trigger a release                             |
 70 | | PipelineRun                | Once the PipelineRun is created                            | To relay the Release PipelineRun status to the Release for viewing |
 71 | | SnapshotEnvironmentBinding | After the SnapshotEnvironmentBinding is created or updated | To relay the GitOps deployment status to the Release for viewing   |
 72 | 
 73 | #### Annotations/Labels
 74 | 
 75 | Following the [annotation guidelines](https://docs.google.com/document/d/1gyXM3pkKFFfxHZnopBi_53vREFWhwA0pFUuIhopDuEo/edit#), the Release Service sets the below annotations on PipelineRuns.
 76 | ```
 77 | “pipelines.appstudio.openshift.io/type":     "release",
 78 | “appstudio.openshift.io/application":        “<application name>”,
 79 | "release.appstudio.openshift.io/name":       "<release name>",
 80 | "release.appstudio.openshift.io/namespace":  "<development workspace name>"
 81 | ```
 82 | The Release service will copy the annotations and labels from the Release CR and append those to the Release PipelineRuns for traceability across the system per [Labels and Annotations for StoneSoup pipelines](https://docs.google.com/document/d/1fJq4LDakLfcAPvOOoxxZNWJ_cuQ1ew9jfBjWa-fEGLE/edit#) and [StoneSoup builds and tests PRs](https://docs.google.com/document/d/113XTplEWRM63aIzk7WwgLruUBu2O7xVy-Zd_U6yjYr0/edit#).
 83 | 
 84 | ## Detailed Workflow
 85 | 
 86 | 1. A Release CR is created automatically by the Integration Service or manually by a user.
 87 |    * The `spec.snaphot` should reference the Snapshot to release.
 88 |    * The `spec.releasePlan` should reference the ReleasePlan to use.
 89 | 2. Find the matching ReleasePlanAdmission in the `spec.target` namespace specified in the ReleasePlan.
 90 | 3. Extract the `spec.pipelineRef`, `spec.serviceAccount`, `spec.policy` from the ReleasePlanAdmission
 91 | 5. Create a Release PipelineRun using the above info.
 92 | 6. Watch Release PipelineRun and update Release `status` with progress and outcome.
 93 | 7. If ReleasePlanAdmission specified a `spec.environment`, then do the following:
 94 |    * Copy the Snapshot to the managed workspace.
 95 |    * Verify that the Application and Components have been copied to managed workspace.
 96 |    * Create or update SnapshotEnvironmentBinding in the managed workspace.
 97 |    * Watch SnapshotEnvironmentBinding to relay deployment status to Release CR `status`.
 98 | 
 99 | ### Dependencies
100 | 
101 | The [Release Service](./release-service.md) is dependent on the following services:
102 | - [Pipeline Service](./pipeline-service.md)
103 |     - Pipeline execution, Pipeline logging
104 | - [Integration Service](./integration-service.md)
105 |     - Facilitates automated testing of content produced by the build pipelines
106 | - [GitOps Service](./gitops-service.md)
107 |     - Provides the facility to create
108 |         - Snapshots defining sets of Builds to release
109 |         - Environment to deploy the Application to
110 |         - SnapshotEnvironmentBindings to have the Snapshot of the Application deployed to a specific Environment
111 | - [Enterprise Contract Service](./enterprise-contract.md)
112 |     - Provides facilities to validate whether content has passed the Enterprise Contract.
113 | 
114 | ## References
115 | 
116 | [Enterprise Contract]: ./enterprise-contract.md
117 | [Integration Service]: ./integration-service.md
118 | [GitOps Service]: ./gitops-service.md
119 | [Pipeline Service]: ./pipeline-service.md
120 | 


--------------------------------------------------------------------------------
/architecture/service-provider-integration.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/architecture/service-provider-integration.md


--------------------------------------------------------------------------------
/architecture/workspace-and-terminal-service.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/architecture/workspace-and-terminal-service.md


--------------------------------------------------------------------------------
/diagrams/ADR-0003/interacting-with-internal-services.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/ADR-0003/interacting-with-internal-services.jpg


--------------------------------------------------------------------------------
/diagrams/ADR-0008/README.md:
--------------------------------------------------------------------------------
1 | # ADR-0008 Images
2 | 
3 | Images in this directory are exported from the [miro board](https://miro.com/app/board/uXjVP77ztI4=/) where they were originally drafted.


--------------------------------------------------------------------------------
/diagrams/ADR-0008/binding-controller.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/ADR-0008/binding-controller.jpg


--------------------------------------------------------------------------------
/diagrams/ADR-0008/deprovision-loop.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/ADR-0008/deprovision-loop.jpg


--------------------------------------------------------------------------------
/diagrams/ADR-0008/dt-dtc-lifecycle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/ADR-0008/dt-dtc-lifecycle.jpg


--------------------------------------------------------------------------------
/diagrams/ADR-0008/flow-byoc-manual-creation.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/ADR-0008/flow-byoc-manual-creation.jpg


--------------------------------------------------------------------------------
/diagrams/ADR-0008/flow-cluster-manual-creation.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/ADR-0008/flow-cluster-manual-creation.jpg


--------------------------------------------------------------------------------
/diagrams/ADR-0008/flow-sandbox-manual-creation.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/ADR-0008/flow-sandbox-manual-creation.jpg


--------------------------------------------------------------------------------
/diagrams/ADR-0008/provision-loop.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/ADR-0008/provision-loop.jpg


--------------------------------------------------------------------------------
/diagrams/ADR-0015/component-phase.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/ADR-0015/component-phase.jpg


--------------------------------------------------------------------------------
/diagrams/ADR-0015/composite-phase.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/ADR-0015/composite-phase.jpg


--------------------------------------------------------------------------------
/diagrams/ADR-0016/promotion-logic.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/ADR-0016/promotion-logic.jpg


--------------------------------------------------------------------------------
/diagrams/ADR-0023/git-references-ITS.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/ADR-0023/git-references-ITS.jpg


--------------------------------------------------------------------------------
/diagrams/ADR-0023/tekton-bundle-ITS.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/ADR-0023/tekton-bundle-ITS.jpg


--------------------------------------------------------------------------------
/diagrams/ADR-0023/tekton-pipeline-definition-git-resolver.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/ADR-0023/tekton-pipeline-definition-git-resolver.jpg


--------------------------------------------------------------------------------
/diagrams/ADR-0024/flowchart.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/ADR-0024/flowchart.jpg


--------------------------------------------------------------------------------
/diagrams/ADR-0035/chaos-resilience.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/ADR-0035/chaos-resilience.png


--------------------------------------------------------------------------------
/diagrams/ADR-0035/chaos-sla.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/ADR-0035/chaos-sla.png


--------------------------------------------------------------------------------
/diagrams/hybrid-application-service/cdq-detection.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/hybrid-application-service/cdq-detection.jpg


--------------------------------------------------------------------------------
/diagrams/hybrid-application-service/has-application-component-create.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/hybrid-application-service/has-application-component-create.jpg


--------------------------------------------------------------------------------
/diagrams/hybrid-application-service/has-create-application-seqeuence.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/hybrid-application-service/has-create-application-seqeuence.png


--------------------------------------------------------------------------------
/diagrams/index.md:
--------------------------------------------------------------------------------
 1 | # Diagrams
 2 | 
 3 | ## System Context
 4 | 
 5 | The diagram below shows the interaction of the Konflux with other systems and environments.
 6 | 
 7 | ## Application Context
 8 | 
 9 | ![](../diagrams/konflux.drawio.svg)
10 | 
11 | ## Workspace Layout
12 | 
13 | ![](../diagrams/konflux-workspace-layout.drawio.svg)
14 | 
15 | ## Personal Data
16 | 
17 | ![](../diagrams/personal-data.drawio.svg)
18 | 


--------------------------------------------------------------------------------
/diagrams/integration-service/integration-service-data-flow.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/integration-service/integration-service-data-flow.jpg


--------------------------------------------------------------------------------
/diagrams/internal-services/internal-services-controller-overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/internal-services/internal-services-controller-overview.jpg


--------------------------------------------------------------------------------
/diagrams/pipeline-service/architecture.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/pipeline-service/architecture.jpg


--------------------------------------------------------------------------------
/diagrams/release-service/konflux-release-service-data-flow.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/konflux-ci/architecture/409fa82f315f41fb163f6cdae2d302ea5894bb35/diagrams/release-service/konflux-release-service-data-flow.jpg


--------------------------------------------------------------------------------
/ref/config.yaml:
--------------------------------------------------------------------------------
 1 | processor:
 2 |   ignoreTypes:
 3 |     - "(Elasticsearch|Kibana|ApmServer)List$"
 4 |     - "(Elasticsearch|Kibana|ApmServer)Health$"
 5 |     - "(Elasticsearch|Kibana|ApmServer|Reconciler)Status$"
 6 |     - "ElasticsearchSettings$"
 7 |     - "Associa(ted|tor|tionStatus|tionConf)$"
 8 |   ignoreFields:
 9 |     - "status$"
10 |     - "TypeMeta$"
11 | 
12 | render:
13 |   kubernetesVersion: 1.22
14 | 


--------------------------------------------------------------------------------
/ref/index.md:
--------------------------------------------------------------------------------
 1 | # API Reference
 2 | 
 3 | ## Konflux
 4 | 
 5 | - [Application and Environment API](application-environment-api.md): Hybrid Application Service (HAS) provides an abstract way to define applications within the cloud. Also includes shared APIs for defining and managing environments.
 6 | - [Service Provider](service-provider.md): Responsible for providing a service-provider-neutral way of obtaining authentication tokens so that tools accessing the service provider do not have to deal with the intricacies of getting the access tokens from the different service providers.
 7 | - [GitOps Service](gitops.md): Responsible for synchronizing source K8s resources in a Git repository (as the single of truth), with target OpenShift/K8s cluster(s).
 8 | - [Pipeline Service](pipeline-service.md): Responsible for executing Tekton `PipelineRuns` and providing access to Tekton services.
 9 | - [JVM Build Service API](jvm-build-service.md): Responsible for rebuilding Java components of an application from the source.
10 | - [Integration Service API](integration-service.md): Responsible for initiating functional integration tests for an [Application] when one of its [Components] gets a new build.
11 | - [Release Service API](release-service.md): Responsible for carrying out [Releases] initiated either by the user or automatically by [integration-service](integration-service.md).
12 | - [Enterprise Contract API](enterprise-contract.md): Used by administrators to express policy. Interpreted by [Release Service](release-service.md) as a policy enforcement point.
13 | 
14 | ## Control Plane
15 | 
16 | [Application]: application-environment-api.md#application
17 | [Components]: application-environment-api.md#component
18 | [Releases]: release-service.md#release
19 | 


--------------------------------------------------------------------------------
/tools/security-tools.MD:
--------------------------------------------------------------------------------
 1 | # Security Guild recommended tools
 2 | 
 3 | This document lists the available tools that we researched for performing linting, Static Code Analysis, and vulnerability scanning on different platforms and languages. The goal is to introduce one of more of these tools into Konflux for customers to use.
 4 | 
 5 | From our discussions in the Secure Engineering Guild, our current position is that we would encourage teams to use the tools highlighted in this list for consistency. But if a team finds a different tool that's a better fit-for-purpose, that is also fine. The ProdSec org also supports some tools, and some tools will be integrated into our product over time (taking the responsibility of implementation off of our teams). Where we have a recommendation, it makes sense to follow it if it fits your team/product's needs.
 6 | Per our discussions with ProdSec, we should not use tools that could potentially "phone home," or any that could otherwise expose information about embargoed vulnerabilities, unless they have previously approved our particular use of the tool.
 7 | 
 8 | ### Linting Tools
 9 | 
10 | #### Golang
11 | **golangci-lint** - https://github.com/golangci/golangci-lint
12 | 
13 | ```
14 | Usage:
15 | $ cd my/golang/project
16 | $ golangci-lint run
17 | ```
18 | 
19 | #### Docker
20 | **hadolint** - https://github.com/hadolint/hadolint
21 | _Note: In order to lint Bash code inside a Dockerfile, it uses Shellcheck internally._
22 | 
23 | ```
24 | Usage:
25 | $ hadolint Dockerfile
26 | (or)
27 | $ docker run --rm -i ghcr.io/hadolint/hadolint < Dockerfile
28 | ```
29 | 
30 | ### Vulnerability Scanners
31 | 
32 | **clair** - https://github.com/quay/clair
33 | - quay.io uses Clair internally and the project is officially under them.
34 | - Check these [docs](https://quay.github.io/clair/howto/deployment.html) to understand the deployment models Clair currently uses.
35 | - For teams using quay.io as their container image registry, we enjoy the benefit of these scans via their website. You can check the results under the vulnerabilites tab of an image.
36 | 
37 | _Note: [clair-in-ci](https://quay.io/repository/konflux-ci/clair-in-ci) is a feature which includes security scanning via clair. It is enabled by default for any Pipelines created in Konflux. A Tekton Task is available that can be used to run clair in your own Pipelines [here](https://github.com/redhat-appstudio/build-definitions/tree/main/task/clair-scan/)._
38 | 
39 | ### SAST Tools
40 | 
41 | **_gosec_** - https://github.com/securego/gosec
42 | 
43 | **find-sec-bugs** - https://github.com/find-sec-bugs/find-sec-bugs
44 | 
45 | **synk** - https://github.com/snyk/cli
46 | 
47 | _Note: Konflux uses synk to perform static analysis. A Tekton Task is available that can be used to run synk in your own Pipelines [here](https://github.com/redhat-appstudio/build-definitions/blob/main/task/sast-snyk-check)._
48 | 
49 | **checkov** - https://github.com/bridgecrewio/checkov
50 | - Checkov uses a common command line interface to manage and analyze infrastructure as code (IaC) scan results across platforms such as Terraform, CloudFormation, Kubernetes, Helm, ARM Templates and Serverless framework. ([source](https://www.checkov.io/))
51 | - As mentioned above, checkov covers most cloud platforms / tools including Kubernetes / OpenShift. It enforces a bunch of best practices to be followed for every platform.
52 | - It also integrates well with [kustomize](https://www.checkov.io/7.Scan%20Examples/Kustomize.html) - we could simply scan a kustomize directory, and it would check everything within that.
53 | 
54 | 
55 | [kube-score](https://github.com/zegl/kube-score), [kubesec](https://github.com/controlplaneio/kubesec), [kubeconform](https://github.com/yannh/kubeconform), [kubelinter](https://github.com/stackrox/kube-linter) were some other tools that were explored. Teams are welcome to experiment with these or other tools if none of the above mentioned tools in the doc meet your requirements. But as mentioned earlier, beware of any security implications of using a tool. Checking with the ProdSec team on the approval status is a good first step when considering a new tool.
56 | 


--------------------------------------------------------------------------------