├── .cspell-config.json
├── .github
└── workflows
│ ├── pr-link-check.yml
│ └── scheduled-link-check.yml
├── .gitignore
├── .lycheeignore
├── .markdownlint-cli2.yaml
├── CONTRIBUTING.md
├── DCO
├── LICENSE
├── Makefile
├── OWNERS
├── OWNERS_ALIASES
├── README.md
├── SECURITY_CONTACTS
├── design
├── OWNERS
├── _template.md
├── api-design-principles.md
├── bare-metal-style-guide.md
├── baremetal-operator
│ ├── annotation-for-power-cycling-and-deleting-failed-nodes.md
│ ├── bios-config.md
│ ├── bmc-address.md
│ ├── bmc-events.md
│ ├── bmh-v1beta1.md
│ ├── bmh_live_iso.md
│ ├── bmh_non-bootable_iso.md
│ ├── bmo-ci-decoupling.md
│ ├── bmo-part-of-capm3.md
│ ├── bulk-set-bios-config.md
│ ├── deploy-steps.md
│ ├── detached-annotation.md
│ ├── disable-power-off.md
│ ├── explicit-boot-mode.md
│ ├── external-introspection.md
│ ├── firmware-interface.md
│ ├── hardware-status.md
│ ├── hardwaredata_crd.md
│ ├── host-config-drive.md
│ ├── host-live-updates.md
│ ├── how-ironic-works.md
│ ├── image-builder-integration.md
│ ├── implicit-boot-mode.md
│ ├── inspection-api.md
│ ├── inspector-deprecation.md
│ ├── kubebuilder-migration.md
│ ├── limit-hosts-provisioning.md
│ ├── managing-provisioning-dependencies.md
│ ├── raid-api.md
│ ├── raid-disk-controller.md
│ ├── reboot-interface.md
│ ├── remove-host.md
│ ├── secure-boot.md
│ ├── sub-states.md
│ ├── uefi-http-boot.md
│ ├── unmanaged-state.md
│ └── user-defined-root-device-hints.md
├── cluster-api-provider-metal3
│ ├── allow_disabling_node_disk_cleaning.md
│ ├── capm3-remediation-controller-improvement-proposal.md
│ ├── capm3-remediation-controller-proposal.md
│ ├── multi-tenancy_contract.md
│ └── node_reuse.md
├── community
│ ├── book-proposal.md
│ ├── cncf-sandbox-application.adoc
│ └── foundation-proposal.md
├── component-relationships.png
├── fd-support-kcp.md
├── hardware-classification-controller
│ ├── expected-hardware-configuration-validation.md
│ ├── hwcc_sequence_diagram.png
│ ├── support-for-error-count-parameter-hwcc.md
│ ├── support-for-new-parameters-hwcc-DiskAndNIC.md
│ └── support-for-new-parameters-hwcc.md
├── helm-charts
│ └── single-pod-helm-chart.md
├── image-ownership.md
├── images
│ ├── ironic_authentication.drawio
│ ├── ironic_authentication_mTLS.png
│ └── node_reuse_flow.svg
├── ip-address-manager
│ └── ip-address-management-for-networkdata.md
├── ironic-debuggability-improvement.md
├── ironic-standalone-operator.md
├── ironic_authentication.md
├── metadata-handling.md
├── nodes-machines-and-hosts.md
├── physical-network-api-prototype.md
├── reproducible-metal3-dev-env.md
├── sync-labels-bmh-to-node.md
└── use-ironic.md
├── docs
├── Dockerfile
├── README.md
├── prerequisites.md
├── presentations
│ ├── README.md
│ └── metal3-overview
│ │ ├── metal3-components.png
│ │ ├── metal3-integration-capi.png
│ │ ├── metal3-overview.html
│ │ ├── slide-example-1.png
│ │ ├── slide-example-2.png
│ │ └── slide-example-3.png
└── user-guide
│ ├── README.md
│ ├── book.toml
│ ├── src
│ ├── SUMMARY.md
│ ├── baremetal
│ │ └── guide.md
│ ├── bmo
│ │ ├── advanced_instance_customization.md
│ │ ├── automated_cleaning.md
│ │ ├── automatic_secure_boot.md
│ │ ├── detached_annotation.md
│ │ ├── external_inspection.md
│ │ ├── externally_provisioned.md
│ │ ├── features.md
│ │ ├── firmware_settings.md
│ │ ├── firmware_updates.md
│ │ ├── images
│ │ │ ├── ipa-provisioning.plantuml
│ │ │ ├── ipa-provisioning.png
│ │ │ ├── provisioning-states.dot
│ │ │ └── provisioning-states.png
│ │ ├── inspect_annotation.md
│ │ ├── install_baremetal_operator.md
│ │ ├── instance_customization.md
│ │ ├── introduction.md
│ │ ├── ironic_bmo_configmap.env
│ │ ├── live-iso.md
│ │ ├── live_updates_servicing.md
│ │ ├── provisioning.md
│ │ ├── raid.md
│ │ ├── reboot_annotation.md
│ │ ├── root_device_hints.md
│ │ ├── state_machine.md
│ │ ├── status_annotation.md
│ │ └── supported_hardware.md
│ ├── capm3
│ │ ├── automated_cleaning.md
│ │ ├── clusterclass.md
│ │ ├── features.md
│ │ ├── images
│ │ │ ├── object-ref.plantuml
│ │ │ └── object-ref.svg
│ │ ├── installation_guide.md
│ │ ├── introduction.md
│ │ ├── label_sync.md
│ │ ├── node_reuse.md
│ │ ├── pivoting.md
│ │ └── remediaton.md
│ ├── developer_environment
│ │ └── tryit.md
│ ├── images
│ │ ├── capi-machines.pantuml
│ │ ├── capi-machines.png
│ │ ├── metal3-capi-objects.plantuml
│ │ ├── metal3-capi-objects.png
│ │ ├── metal3-color.svg
│ │ ├── metal3-stack.plantuml
│ │ └── metal3-stack.png
│ ├── introduction.md
│ ├── ipam
│ │ ├── introduction.md
│ │ └── ipam_installation.md
│ ├── ironic
│ │ ├── introduction.md
│ │ ├── ironic-container-images.md
│ │ ├── ironic-python-agent.md
│ │ ├── ironic_installation.md
│ │ └── ironic_variables.md
│ ├── irso
│ │ ├── database.md
│ │ ├── install-basics.md
│ │ └── introduction.md
│ ├── project-overview.md
│ ├── quick-start.md
│ ├── reference.md
│ ├── security_policy.md
│ ├── troubleshooting.md
│ └── version_support.md
│ └── theme
│ └── favicon.svg
├── hack
├── markdownlint.sh
├── shellcheck.sh
├── spellcheck.sh
└── tools
│ ├── go.mod
│ ├── go.sum
│ └── releasetags
│ └── releasetags.go
├── images
├── architectural_diagrams
│ ├── cluster_api.svg
│ ├── components.svg
│ ├── ipam.svg
│ └── object_representation.svg
├── high-level-arch.png
├── metal3-.svg
├── metal3-banner.pdf
├── metal3-black.png
├── metal3-dev-env-transparent.png
├── metal3-dev-env.png
├── metal3-dev-env.svg
├── metal3-website-sticker.png
├── metal3-website-sticker.svg
├── metal3-white.png
├── metal3-white.svg
├── metal3.png
├── metal3.svg
├── metal3_facet-black-text.png
├── metal3_facet-blacktext.svg
├── metal3_facet-whitetext.png
└── metal3_facet-whitetext.svg
├── netlify.toml
├── processes
├── managing-reviewers.md
├── releasing.md
├── reviewer-permissions-migration.md
├── roadmap.md
└── triage.md
└── security
└── self-assessment.md
/.github/workflows/pr-link-check.yml:
--------------------------------------------------------------------------------
1 | name: PR Check Links
2 |
3 | on:
4 | pull_request:
5 | types: [opened, edited, reopened, synchronize, ready_for_review]
6 |
7 | permissions:
8 | contents: read
9 |
10 | jobs:
11 | check-pr-links:
12 | uses: metal3-io/project-infra/.github/workflows/pr-link-check.yml@main
13 | with:
14 | upstream: https://github.com/metal3-io/metal3-docs.git
15 |
--------------------------------------------------------------------------------
/.github/workflows/scheduled-link-check.yml:
--------------------------------------------------------------------------------
1 | name: Scheduled Link Check
2 |
3 | on:
4 | workflow_dispatch:
5 | schedule:
6 | - cron: "0 0 1 * *"
7 | repository_dispatch:
8 | # run manually
9 | types: [check-links]
10 |
11 | permissions:
12 | contents: read
13 | issues: write
14 |
15 | jobs:
16 | check-links:
17 | uses: metal3-io/project-infra/.github/workflows/scheduled-link-check.yml@main
18 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | design/*.orig.*
2 | design/*.toc.*
3 |
4 | # Ignore all binaries
5 | # from being committed.
6 | hack/tools/bin
7 | # Ignore mdbook boilerplate
8 | # from being committed.
9 | docs/user-guide/book
10 |
11 | # Ignore revealjs plugin
12 | # and dist directories
13 | # from being commited
14 | docs/presentations/dist
15 | docs/presentations/plugin
16 |
17 | # Development containers (https://containers.dev/)
18 | .devcontainer
19 |
--------------------------------------------------------------------------------
/.lycheeignore:
--------------------------------------------------------------------------------
1 | # https://github.com/issues based urls give error 404 if user is not logged in.
2 | https://github.com/issues
3 |
4 | # Pages with rigorous bot detection
5 | https://www.dell.com/
6 |
--------------------------------------------------------------------------------
/.markdownlint-cli2.yaml:
--------------------------------------------------------------------------------
1 | # Reference: https://github.com/DavidAnson/markdownlint-cli2#markdownlint-cli2yaml
2 |
3 | config:
4 | ul-indent:
5 | # Kramdown wanted us to have 3 earlier, tho this CLI recommends 2 or 4
6 | indent: 3
7 | line-length: false
8 |
9 | # Don't autofix anything, we're linting here
10 | fix: false
11 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | Metal3 projects are [Apache 2.0 licensed](LICENSE) and accept contributions via
4 | GitHub pull requests.
5 |
6 | ## Certificate of Origin
7 |
8 | By contributing to this project you agree to the Developer Certificate of
9 | Origin (DCO). This document was created by the Linux Kernel community and is a
10 | simple statement that you, as a contributor, have the legal right to make the
11 | contribution. See the [DCO](DCO) file for details.
12 |
13 | ## Contributing a new design document
14 |
15 | Please follow design documentation template design/_template.md.
16 |
--------------------------------------------------------------------------------
/DCO:
--------------------------------------------------------------------------------
1 | Developer Certificate of Origin
2 | Version 1.1
3 |
4 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
5 | 1 Letterman Drive
6 | Suite D4700
7 | San Francisco, CA, 94129
8 |
9 | Everyone is permitted to copy and distribute verbatim copies of this
10 | license document, but changing it is not allowed.
11 |
12 |
13 | Developer's Certificate of Origin 1.1
14 |
15 | By making a contribution to this project, I certify that:
16 |
17 | (a) The contribution was created in whole or in part by me and I
18 | have the right to submit it under the open source license
19 | indicated in the file; or
20 |
21 | (b) The contribution is based upon previous work that, to the best
22 | of my knowledge, is covered under an appropriate open source
23 | license and I have the right under that license to submit that
24 | work with modifications, whether created in whole or in part
25 | by me, under the same open source license (unless I am
26 | permitted to submit under a different license), as indicated
27 | in the file; or
28 |
29 | (c) The contribution was provided directly to me by some other
30 | person who certified (a), (b) or (c) and I have not modified
31 | it.
32 |
33 | (d) I understand and agree that this project and the contribution
34 | are public and that a record of the contribution (including all
35 | personal information I submit with it, including my sign-off) is
36 | maintained indefinitely and may be redistributed consistent with
37 | this project or the open source license(s) involved.
38 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | MDBOOK_VERSION ?= 0.4.37
2 | MDBOOK_BIN_VERSION ?= v$(MDBOOK_VERSION)
3 | SOURCE_PATH := docs/user-guide
4 | CONTAINER_RUNTIME ?= sudo docker
5 | IMAGE_NAME := quay.io/metal3-io/mdbook
6 | IMAGE_TAG ?= latest
7 | HOST_PORT ?= 3000
8 | MDBOOK_RELEASE_URL := https://github.com/rust-lang/mdBook/releases/download/$(MDBOOK_BIN_VERSION)/mdbook-$(MDBOOK_BIN_VERSION)-x86_64-unknown-linux-gnu.tar.gz
9 | TOOLS_DIR := hack/tools
10 | TOOLS_BIN_DIR := $(abspath $(TOOLS_DIR)/bin)
11 | MDBOOK_BIN := $(TOOLS_BIN_DIR)/mdbook
12 |
13 | export PATH := $(PATH):$(TOOLS_BIN_DIR)
14 |
15 | ## ------------------------------------
16 | ## Resolve placeholders as tags
17 | ## ------------------------------------
18 | RELEASETAGS := $(TOOLS_BIN_DIR)/mdbook-releasetags
19 | $(RELEASETAGS): $(TOOLS_DIR)/go.mod
20 | cd $(TOOLS_DIR); go build -tags=tools -o $(TOOLS_BIN_DIR)/mdbook-releasetags ./releasetags
21 |
22 | .PHONY: releasetags
23 | releasetags: $(RELEASETAGS)
24 |
25 | ## ------------------------------------
26 | ## Documentation tooling for Netlify
27 | ## ------------------------------------
28 |
29 | # This binary is used by Netlify. Because,
30 | # Netlify build image doesn't support docker/podman.
31 |
32 | $(MDBOOK_BIN): # Download the binary
33 | curl -L $(MDBOOK_RELEASE_URL) | tar xvz -C $(TOOLS_BIN_DIR)
34 |
35 | .PHONY: netlify-build
36 | netlify-build: $(RELEASETAGS) $(MDBOOK_BIN)
37 | $(MDBOOK_BIN) build $(SOURCE_PATH)
38 |
39 |
40 | ## ------------------------------------
41 | ## Documentation tooling for local dev
42 | ## ------------------------------------
43 |
44 | .PHONY: build
45 | docker-build: # Build the mdbook container image
46 | $(CONTAINER_RUNTIME) build --build-arg MDBOOK_VERSION=$(MDBOOK_VERSION) \
47 | --tag $(IMAGE_NAME):$(IMAGE_TAG) -f docs/Dockerfile .
48 |
49 | .PHONY: build
50 | build:# Build the user guide
51 | $(CONTAINER_RUNTIME) run \
52 | --rm -it --name metal3 \
53 | -v "$$(pwd):/workdir" \
54 | $(IMAGE_NAME):$(IMAGE_TAG) \
55 | mdbook build $(SOURCE_PATH)
56 |
57 | .PHONY: serve
58 | serve:# Serve the user-guide on localhost:3000 (by default)
59 | $(CONTAINER_RUNTIME) run \
60 | --rm -it --init --name metal3 \
61 | -v "$$(pwd):/workdir" \
62 | -p $(HOST_PORT):3000 \
63 | $(IMAGE_NAME):$(IMAGE_TAG) \
64 | mdbook serve --open $(SOURCE_PATH) -p 3000 -n 0.0.0.0
65 |
66 | .PHONY: clean
67 | clean: # Clean mdbook generated content
68 | $(CONTAINER_RUNTIME) run \
69 | --rm -it --name metal3 \
70 | -v "$$(pwd):/workdir" \
71 | $(IMAGE_NAME):$(IMAGE_TAG) \
72 | mdbook clean $(SOURCE_PATH)
73 |
74 | ## ------------------------------------
75 | ## Linting and testing
76 | ## ------------------------------------
77 |
78 | .PHONY: lint
79 | lint: markdownlint spellcheck shellcheck # Run all linting tools
80 |
81 | .PHONY: markdownlint
82 | markdownlint: # Run markdownlint
83 | ./hack/markdownlint.sh
84 |
85 | .PHONY: spellcheck
86 | spellcheck: # Run spellcheck
87 | ./hack/spellcheck.sh
88 |
89 | .PHONY: shellcheck
90 | shellcheck: # Run shellcheck
91 | ./hack/shellcheck.sh
92 |
--------------------------------------------------------------------------------
/OWNERS:
--------------------------------------------------------------------------------
1 | # See the OWNERS docs at https://go.k8s.io/owners
2 |
3 | approvers:
4 | - metal3-docs-maintainers
5 |
6 | reviewers:
7 | - metal3-docs-maintainers
8 | - metal3-docs-reviewers
9 |
10 | emeritus_approvers:
11 | - andfasano
12 | - fmuyassarov
13 | - furkatgofurov7
14 | - hardys
15 | - maelk
16 | - russellb
17 |
18 | emeritus_reviewers:
19 | - mboukhalfa
20 |
--------------------------------------------------------------------------------
/OWNERS_ALIASES:
--------------------------------------------------------------------------------
1 | # See the OWNERS docs: https://git.k8s.io/community/contributors/guide/owners.md
2 |
3 | aliases:
4 | # root
5 | metal3-docs-maintainers:
6 | - dtantsur
7 | - kashifest
8 | - lentzi90
9 | - zaneb
10 |
11 | metal3-docs-reviewers:
12 | - adilGhaffarDev
13 | - elfosardo
14 | - hroyrh
15 | - Rozzii
16 | - smoshiur1237
17 | - s3rj1k
18 | - tuminoid
19 | - zhouhao3
20 |
21 | # design
22 | metal3-docs-design-maintainers:
23 | - dtantsur
24 | - kashifest
25 | - Rozzii
26 | - zaneb
27 |
28 | metal3-docs-design-reviewers:
29 | - elfosardo
30 | - lentzi90
31 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | # Metal³
6 |
7 | [](https://clomonitor.io/projects/cncf/metal3-io)
8 |
9 | ## What is Metal³
10 |
11 |
12 |
13 | The Metal³ project (pronounced: Metal Kubed) exists to provide components that
14 | allow you to do bare metal host management for Kubernetes. Metal³ works as a
15 | Kubernetes application, meaning it runs on Kubernetes and is managed through
16 | Kubernetes interfaces.
17 |
18 | ## Useful links
19 |
20 | * [Quick start](https://book.metal3.io/developer_environment/tryit)
21 | * [Demos](https://www.youtube.com/watch?v=VFbIHc3NbJo&list=PL2h5ikWC8viKmhbXHo1epPelGdCkVlF16&ab_channel=Metal3)
22 | * [Blog posts](https://metal3.io/blog/index.html)
23 |
24 | ## Documentation
25 |
26 | Please see our [user-guide](https://book.metal3.io/) to familiarize yourself with Metal³ and its features. We are currently in the process of writing
27 | the user-guide. As such, not all the topics might be covered yet.
28 |
29 | ## Community
30 |
31 | Metal³ is constantly growing and we would be happy to collaborate with you.
32 | If you are interested in Metal³ or would like to reach out its community then
33 | come talk to us!
34 |
35 | * We are available on Kubernetes [slack](http://slack.k8s.io/) in the
36 | [#cluster-api-baremetal](https://kubernetes.slack.com/messages/CHD49TLE7)
37 | channel
38 | * Join to the [Metal3-dev](https://groups.google.com/forum/#!forum/metal3-dev)
39 | google group for the edit access to the
40 | [Community meetings Notes](https://docs.google.com/document/d/1IkEIh-ffWY3DaNX3aFcAxGbttdEY_symo7WAGmzkWhU/edit)
41 | * Subscribe to the [Metal³ Development Mailing List](https://groups.google.com/forum/#!forum/metal3-dev)
42 | for the project related announcements, discussions and questions.
43 | * Come and meet us in our weekly community meetings on every
44 | Wednesday at 14:00 UTC on [Zoom](https://zoom.us/j/97255696401?pwd=ZlJMckNFLzdxMDNZN2xvTW5oa2lCZz09)
45 | * If you missed the previous community meeting, you can still find the notes
46 | [here](https://docs.google.com/document/d/1IkEIh-ffWY3DaNX3aFcAxGbttdEY_symo7WAGmzkWhU/edit)
47 | and recordings [here](https://www.youtube.com/playlist?list=PL2h5ikWC8viJY4SNeOpCKTyERToTbJJJA)
48 |
49 | ## Code of Conduct
50 |
51 | See our [Code of Conduct](https://github.com/metal3-io/community/blob/main/CODE_OF_CONDUCT.md)
52 |
--------------------------------------------------------------------------------
/SECURITY_CONTACTS:
--------------------------------------------------------------------------------
1 | # Reporting a security vulnerability
2 |
3 | Please do:
4 | - not disclose any security issue publicly e.g. Pull Requests, Comments.
5 | - not disclose any security issue directly to any owner of the repository or
6 | to any other contributor.
7 |
8 | In this repository security reports are handled according to the
9 | Metal3-io project's security policy. For more information about the security
10 | policy consult the User-Guide [here](https://book.metal3.io/security_policy.html).
11 |
12 |
--------------------------------------------------------------------------------
/design/OWNERS:
--------------------------------------------------------------------------------
1 | # See the OWNERS docs at https://go.k8s.io/owners
2 |
3 | approvers:
4 | - metal3-docs-design-maintainers
5 |
6 | reviewers:
7 | - metal3-docs-design-maintainers
8 | - metal3-docs-design-reviewers
9 |
10 | emeritus_approvers:
11 | - andfasano
12 | - fmuyassarov
13 | - furkatgofurov7
14 | - hardys
15 | - maelk
16 | - russellb
17 |
18 | options:
19 | no_parent_owners: true
20 |
--------------------------------------------------------------------------------
/design/api-design-principles.md:
--------------------------------------------------------------------------------
1 |
7 |
8 | # api-design-principles
9 |
10 | ## Status
11 |
12 | implemented
13 |
14 | ## Summary
15 |
16 | This document describes the design principles being used to create the
17 | metal3 API.
18 |
19 | ## Motivation
20 |
21 | As our contributor community grows, having these principles written
22 | down ensures we design new features in a consistent way.
23 |
24 | ### Goals
25 |
26 | 1. Describe the general principles for adding to or changing the API.
27 |
28 | ### Non-Goals
29 |
30 | 1. Prescribe specific API choices for future features.
31 |
32 | ## Proposal
33 |
34 | ### Grow Slowly
35 |
36 | Given the backwards-compatibility constraints for APIs in general, we
37 | want to take care when adding new features. When in doubt about a
38 | design, wait to implement it. Waiting gives us time to find more use
39 | cases and implementation details that may make it easier to choose the
40 | right path.
41 |
42 | ### Be Explicit
43 |
44 | All fields must have well-defined types. No files may use
45 | `interface{}` types.
46 |
47 | We have two primary reasons for requiring explicitly naming and typing
48 | every API parameter.
49 |
50 | 1. Metal3 is meant to be an abstraction on top of a provisioning
51 | system. If we do not describe the API completely, the abstraction
52 | breaks and the user must understand another API in order to use
53 | ours. This exposes the underlying API in a way that makes it more
54 | difficult to change the metal3 API, while simultaneously making
55 | using metal3 for our users.
56 | 2. Future versions of kubernetes will improve support for OpenAPI
57 | validation, and will require good validation by default as a
58 | security measure. Passing unstructured data through the API and
59 | storing it exposes clusters to security issues if an API changes
60 | and new fields are added. See
61 | for details.
62 |
63 | ### Don't Assume Ironic
64 |
65 | Ironic is an implementation detail for the current version of
66 | metal3. Although it has allowed us to move quickly, we do not want to
67 | assume that we will continue to use it indefinitely or exclusively. We
68 | should therefore not expose Ironic API-isms such as names or workflow
69 | assumptions through the metal3 API.
70 |
71 | ### Don't Assume Machine API
72 |
73 | Don't make assumptions about what the BaremetalHost will be used
74 | for. The Machine API is not the only consumer and running a Kubernetes
75 | node is not the only thing users may want to do with the Host.
76 |
77 | ### Not Every Feature Needs to Go into the baremetal-operator
78 |
79 | Metal3 is designed to take advantage of the microservice nature of
80 | kubernetes. New features may require changing the BareMetalHost
81 | schema, but just as often it will be possible to add a new feature
82 | using a new API.
83 |
84 | Provisioning hosts can be complicated. The BareMetalHost API is
85 | designed so the `baremetal-operator` can eliminate all of the
86 | complexity beyond basic host management and provisioning
87 | operations. Other APIs and controllers drive the decisions about how
88 | to configure a host, which image to use, etc. For example, the
89 | `cluster-api-provider-metal3` is separated from the
90 | `baremetal-operator` so that we can plug Metal3 into the cluster API
91 | ecosystem, but also so different hosts can be configured in different
92 | ways using the Machine API and so the BareMetalHost API can be used by
93 | tools other than the cluster API provider.
94 |
95 | ### Make Features Optional
96 |
97 | We want to avoid making the Metal3 features so tightly coupled that in
98 | order to use any of them a user has to deploy or enable all of
99 | them. Where possible, features should be optional or pluggable so that
100 | users can replace one of our implementations with one of their own, or
101 | avoid using a feature or API entirely. This encourages adoption, by
102 | allowing users to start with a simple configuration and add features
103 | over time as they need them. It also makes it easier to integrate
104 | Metal3 with downstream products, which may already have some or all of
105 | the same features.
106 |
107 | ### Follow Kubernetes API Patterns
108 |
109 | We want Metal3 APIs to be easy for new users to adopt. One way to
110 | achieve that is to use patterns that are already common elsewhere in
111 | other kubernetes APIs. New APIs should act like other parts of the
112 | system. When designing a new Metal3 API, look at other APIs for
113 | guidance, where parallels are available.
114 |
--------------------------------------------------------------------------------
/design/bare-metal-style-guide.md:
--------------------------------------------------------------------------------
1 | # bare-metal-style-guide
2 |
3 | A trivial - but unfortunately common - challenge with Metal³ and
4 | related projects is choosing the correct spelling of "bare metal" in
5 | any given context. Is it "bare metal", or "baremetal", or
6 | "bare-metal", or "Baremetal", or "Baremetal"? The answer is ... it
7 | depends!
8 |
9 | The goal of this document is to resolve this question and make it easy
10 | for anyone to follow the agreed convention.
11 |
12 | ## 1. In prose, as a noun, it is "bare metal"
13 |
14 | Examples:
15 |
16 | "Some workloads only ever run on bare metal"
17 | "We need to ensure consistency between bare metal and other platforms"
18 |
19 | ## 2. In prose, as an adjective, it is "bare-metal"
20 |
21 | Examples:
22 |
23 | "We are implementing bare-metal host management for Kubernetes"
24 | "We manage bare-metal hosts"
25 |
26 | ## 3. For names, it is "Bare Metal"
27 |
28 | Examples:
29 |
30 | 1. "The Bare Metal Operator"
31 | 1. "The Bare Metal Actuator"
32 |
33 | ## 4. For lower-cased technical names, it is "baremetal"
34 |
35 | Examples:
36 |
37 | 1. "The Bare Metal Operator is in the baremetal-operator repo"
38 | 1. "The Bare Metal Actuator is in the cluster-api-provider-baremetal repo"
39 | 1. "The 'baremetal' driver implements support for bare metal servers"
40 |
41 | ## 5. For camel-cased identifiers in code, it is "BareMetal"
42 |
43 | Examples:
44 |
45 | 1. "The BareMetalHost resource"
46 |
--------------------------------------------------------------------------------
/design/baremetal-operator/bmc-address.md:
--------------------------------------------------------------------------------
1 |
7 |
8 | # bmc-address
9 |
10 | ## Status
11 |
12 | implemented
13 |
14 | ## Summary
15 |
16 | This document explains the way users will provide the network location
17 | of the bare metal management controller (BMC) on a host.
18 |
19 | ## Motivation
20 |
21 | We need to document how we are going to specify the location of a BMC,
22 | including how to tell its type, IP address, port number, and
23 | potentially the path.
24 |
25 | ### Goals
26 |
27 | 1. To agree on an address specification system, including assumptions
28 | that can be made when parts of an address are left out.
29 |
30 | ### Non-Goals
31 |
32 | 1. To list the scheme to use for every type of controller.
33 | 2. To specify the user interface for entering address information.
34 |
35 | ## Proposal
36 |
37 | ### Implementation Details/Notes/Constraints
38 |
39 | For each BMC, we need to know the type, IP, port, and optionally the
40 | path to use to communicate with the controller.
41 |
42 | We can collect all of this information using a single "address" field,
43 | if we allow that field to contain partial or full URLs.
44 |
45 | For each type of controller, we can often assume we know the protocol
46 | used to communicate with it (HTTP, HTTPS, etc.). Therefore the scheme
47 | in a URL provided by the user is redundant, and we can use that
48 | portion of the URL to specify the controller type. For example:
49 |
50 | ipmi://192.168.111.1
51 |
52 | In cases where we cannot assume the correct communication protocol, we
53 | will need to combine the type and protocol. For example:
54 |
55 | redfish+https://IP/redfish/v1/Systems/42
56 |
57 | Initially, we will only support IPMI controllers, so we do not need
58 | users to specify the type or protocol. If the field only contains a
59 | network address, we can assume that the controller uses IPMI on the
60 | standard port, 623. Therefore this would be equivalent to the previous
61 | example:
62 |
63 | 192.168.111.1
64 |
65 | ### Risks and Mitigations
66 |
67 | One risk in this approach is that we would need to tell users how to
68 | build the URLs, and this might be confusing.
69 |
70 | ## Design Details
71 |
72 | ### Work Items
73 |
74 | - The `IP` field of the `BMCDetails` data structure needs to be
75 | renamed `Address`.
76 | - A function to handle URL parsing and validation needs to be
77 | implemented, including understanding when the parsed URL needs to be
78 | interpreted has including default values.
79 |
80 | ### Dependencies
81 |
82 | N/A
83 |
84 | ### Test Plan
85 |
86 | We will have unit tests for the URL parsing logic.
87 |
88 | ### Upgrade / Downgrade Strategy
89 |
90 | N/A
91 |
92 | ### Version Skew Strategy
93 |
94 | N/A
95 |
96 | ## Alternatives
97 |
98 | The primary alternative is to expand the data structure to have
99 | separate fields for each value that would go into the URL. This
100 | complicates the UI, even for the simplest cases, since we either have
101 | to show all of the fields all of the time or include logic in the UI
102 | to show specific fields based on the "type" selector.
103 |
104 | ## References
105 |
106 | - [PR in baremetal-operator repo to change field name](https://github.com/metal3-io/baremetal-operator/pull/44)
107 |
--------------------------------------------------------------------------------
/design/baremetal-operator/bmo-ci-decoupling.md:
--------------------------------------------------------------------------------
1 | # BMO CI Decoupling
2 |
3 | Necessity of release and development branches have been acknowledged in metal3-io/baremetal-operator
4 | repository lately and the community has decided to introduce the branching as
5 | well as updating the release policy to maintain the branches. The issue was
6 | raised in and
7 | an update was made to BMO release document
8 | as a result.
9 |
10 | As a next step, the following proposal is made for the branching:
11 |
12 | - BMO e2e (completely independent of CAPM3) test has to be designed and put in
13 | place. So far BMO PRs are tested with CAPM3, CAPI and IPAM integration tests
14 | but with the introduction of BMO release branches and proper releases from
15 | those branches, BMO should not rely on CAPM3 releases and should be decoupled
16 | from CAPM3 integration tests when testing a BMO PR. We propose a completely
17 | independent BMO e2e test framework which will test BMO features with ironic.
18 | For this few action points have been identified:
19 |
20 | - Simplify BMO deployment through kustomize and get rid of deploy.sh script
21 | in the process.
22 |
23 | - Introduce BMO e2e test framework similar to what we have currently in CAPM3.
24 |
25 | - Write all the e2e feature tests necessary to test BMO code with ironic
26 | deployment which would be necessary to test integration of a PR landing in BMO.
27 |
28 | - Nightly jobs would be the place where we would test CAPM3 main branch
29 | integration with BMO main branch. This would help us identify if we need to
30 | introduce any change in CAPM3/BMO regarding the changes landing in BMO/CAPM3
31 | repo. Action points:
32 |
33 | - Configure JJBs for different combinations of CAPM3 and BMO releases for
34 | nightly fullstack jobs. We already test CAPM3 main branch-BMO main branch
35 | combination in periodic jobs.
36 |
37 | - Configure metal3-io/project-infra and metal3-io/metal3-dev-env to
38 | accommodate the release branches in the full stack jobs.
39 |
40 | - Meanwhile we can branch out BMO to release branch and continue development in
41 | main branch. Until BMO e2e tests are in place, we can continue testing BMO
42 | release branch and main branch in the same way we do it currently in dev-envs.
43 | Instead of using tags which is the way we test currently, we can use branch
44 | names for specific branches of CAPM3. For example, CAPM3 release-1.5 branch
45 | will be tested with BMO release-0.4 branch and CAPM3 main branch will be
46 | tested with BMO main branch. Releasing and branch maintenance is described in
47 | BMO [releasing document](https://github.com/metal3-io/baremetal-operator/blob/main/docs/releasing.md)
48 |
49 | - Release process for BMO need to have proper documentation or uplift
50 | instructions with detailed guideline for changes needed in BMO consumers (for
51 | example CAPM3) to uplift BMO in the go module .
52 |
53 | - Once BMO e2e tests are in place, metal3-dev-env test defaults should also
54 | change to test CAPM3 main branch with BMO latest release. This is because we
55 | can no longer guarantee whether CAPM3 main branch would work with BMO main
56 | branch as there might be breaking changes in BMO and this could potentially
57 | block dev-env PRs. Running CAPM3 main against stable BMO release should be
58 | enough for metal3-dev-env tests.
59 |
--------------------------------------------------------------------------------
/design/baremetal-operator/detached-annotation.md:
--------------------------------------------------------------------------------
1 |
7 |
8 | # Add support for Detached annotation
9 |
10 | ## Status
11 |
12 | implemented
13 |
14 | ## Summary
15 |
16 | Provide a way to prevent management of BaremetalHost resources
17 | after provisioning is completed, to facilitate the pivot of
18 | BaremetalHost resources in a multi-tier deployment.
19 |
20 | ## Motivation
21 |
22 | In a multi-tier deployment where one cluster deploys another, the "parent"
23 | cluster will contain BMH resources for initial provisioning,
24 | but the "child" cluster will later contain BMH resources that reference the
25 | same physical hosts.
26 |
27 | In this scenario it's necessary to prevent management operations from the
28 | parent cluster such as asserting power state, or BMH actions on the child
29 | cluster such as the [reboot annotation](reboot-interface.md) may fail due
30 | to unwanted BMC interactions from the parent cluster.
31 |
32 | There is an existing
33 | [pause annotation](https://github.com/metal3-io/baremetal-operator/blob/master/docs/api.md#pausing-reconciliation)
34 | which pauses reconciliation of the BMH resources, but this does not remove
35 | the underlying Ironic host, so power management actions may be performed
36 | even when the BMH is marked as `paused`.
37 |
38 | ### Goals
39 |
40 | * Add an API to disable management of a BMH on the parent cluster, including
41 | all power management actions
42 | * Make it possible to delete a BMH resource in this "detached" without
43 | triggering deprovisioning
44 | * Ensure it is possible to restart management of BMH resources (in the case
45 | where they are not deleted from the parent cluster)
46 | * Avoid changing behavior of the existing `paused` annotation since that
47 | behavior is necessary as part of the CAPI pivot process.
48 |
49 | ### Non-Goals
50 |
51 | * Any coordination between the tiers of BMH resources, that has to be handled externally
52 |
53 | ## Proposal
54 |
55 | ### Expected workflow
56 |
57 | * User creates BMH resource(s) in parent cluster
58 | * Provisioning of BMH triggered, which results in a running child cluster
59 | * Parent cluster BMH resources annotated as detached
60 | * Child cluster BMH resources created, with BMC credentials, but marked
61 | externallyProvisioned: true
62 |
63 | At this point, the physical hosts are owned by the child cluster BMH, but the
64 | inventory still exists in the parent cluster.
65 |
66 | In the event that all of the child cluster hosts require reprovisioning, it
67 | would be necessary to remove the detached annotation on the parent BMH resources,
68 | so that management of those resources can resume, e.g for reprovisioning.
69 |
70 | ### API changes
71 |
72 | Add support for a new annotation, where the key is `baremetalhost.metal3.io/detached`
73 |
74 | ```yaml
75 | baremetalhost.metal3.io/detached: ""
76 | ```
77 |
78 | The value is ignored, similar to the `paused` annotation and could optionally
79 | include context from the system/user which applies the annotation.
80 |
81 | This annotation will only be consumed while the BMH is in either `Provisioned`,
82 | `ExternallyProvisioned` or `Ready`/`Available` state, in all other cases it is ignored.
83 |
84 | This annotation will be evaluated early in the `Reconcile()` loop, but after the
85 | `paused` `status` and `hardwaredetails` annotations are evaluated.
86 |
87 | When the `detached` annotation is set, we will check the `status.provisioning.ID`
88 | and if necessary delete the corresponding host from Ironic (without triggering
89 | deprovisioning)
90 |
91 | When the `detached` annotation is removed, we will re-create the host in Ironic
92 | via the existing `ensureRegistered` state machine logic.
93 |
94 | If a BMH resource is deleted while the `detached` annotation is set, we will
95 | move directly to the `Deleting` state, without performing any `Deprovisioning`.
96 |
97 | ## Alternatives
98 |
99 | It would be possible to modify the behavior of the
100 | [pause annotation](https://github.com/metal3-io/baremetal-operator/blob/main/docs/api.md#pausing-reconciliation)
101 | such that Ironic hosts are removed while paused, however
102 | this means that we cannot reflect any error via the status
103 | or increment the errorCount for the retry backoff.
104 |
105 | We could add an API that sets the Ironic
106 | [maintenance mode flag](https://docs.openstack.org/api-ref/baremetal/?expanded=set-maintenance-flag-detail#set-maintenance-flag)
107 | but this means hosts could potentially permanently be in this state
108 | and there are concerns about corner-cases such as adoption when an
109 | ephemeral Ironic is used and a reschedule occurs.
110 |
111 | ## References
112 |
--------------------------------------------------------------------------------
/design/baremetal-operator/disable-power-off.md:
--------------------------------------------------------------------------------
1 |
7 |
8 |
9 |
10 | # Support hardware that cannot be powered off
11 |
12 | ## Status
13 |
14 | One of: implementable
15 |
16 | ## Summary
17 |
18 | This design document proposes a new BareMetalHost API field that makes sure
19 | that the underlying hardware is never powered off.
20 |
21 | ## Motivation
22 |
23 | Power off is a fundamental operation that is used in many places in Ironic and
24 | is exposed in the BareMetalHost API via the `online` field. However, there are
25 | cases where the hardware must never end up in the powered off state except for
26 | a brief moment during reboots. The motivating case here is the [NC-SI][ncsi]
27 | technology, which allows the BMC to share one of the "normal" physical network
28 | interfaces rather than having a separate one just for it. In at least some
29 | implementations of this technology, network access to the BMC is not possible
30 | when the hardware is powered off.
31 |
32 | See [the Ironic specification][ironic-ncsi-spec] for a more detailed breakdown
33 | of the use cases and an explanation of challenges related to powering off and
34 | rebooting machines in Ironic.
35 |
36 | [ncsi]: https://en.wikipedia.org/wiki/NC-SI
37 | [ironic-ncsi-spec]: https://specs.openstack.org/openstack/ironic-specs/specs/approved/nc-si.html
38 |
39 | ### Goals
40 |
41 | - A user can configure a BareMetalHost so that implicit power off actions never
42 | happen and explicit actions are rejected.
43 |
44 | ### Non-Goals
45 |
46 | - Changing the default behavior.
47 |
48 | ## Proposal
49 |
50 | ## Design Details
51 |
52 | Add a new field `DisablePowerOff` (boolean, default `false`) to the
53 | BareMetalHost `spec` object. This field will directly correspond to the
54 | Ironic's `disable_power_off` Node field.
55 |
56 | ### Implementation Details/Notes/Constraints
57 |
58 | Setting the `online` field to `false` will not be possible if
59 | `disable_power_off` is `true`. The webhook will reject such a change, the
60 | controller code will ignore the `online` field in this case (e.g. if the
61 | webhook is disabled).
62 |
63 | Rebooting via the reboot annotation will be implemented via the Ironic reboot
64 | API instead of a power off followed by power on. We'll mark the action as
65 | successful once the Ironic call has been issued.
66 |
67 | The `PoweringOffBeforeDelete` state will be skipped for hosts with
68 | `DisablePowerOff` set to `true`.
69 |
70 | Check the [Ironic specification][ironic-ncsi-spec] for more implementation
71 | details.
72 |
73 | ### Risks and Mitigations
74 |
75 | The code paths without power off will be less tested than the normal path and
76 | may not behave correctly in the presence of BMC bugs (e.g. we won't be able to
77 | detect that a reboot had no effect). We will mark this feature as advanced and
78 | recommend that operators don't use unless they understand all implications.
79 |
80 | ### Work Items
81 |
82 | - Add a new field to the API.
83 | - Update the webhook.
84 |
85 | ### Dependencies
86 |
87 | This proposal depends on the [Ironic feature][ironic-ncsi-spec] tracked in
88 | [bug 2077432](https://bugs.launchpad.net/ironic/+bug/2077432).
89 |
90 | ### Test Plan
91 |
92 | While we're planning on sushy-tools support for this feature, it won't be
93 | trivial to test it as part of the normal end-to-end tests, so we'll rely on
94 | unit tests and manual testing.
95 |
96 | ### Upgrade / Downgrade Strategy
97 |
98 | None
99 |
100 | ### Version Skew Strategy
101 |
102 | If the version of Ironic is not enough to set the `disable_power_off` field,
103 | the host will fail reconciling, and the error message will be set in the status
104 | until Ironic is upgraded or the `DisablePowerOff` field is unset.
105 |
106 | ## Drawbacks
107 |
108 | This is a rather exotic feature for a very specific hardware setup.
109 | Unfortunately, this setup seems to be gaining popularity in the *edge* world,
110 | so we cannot simply ignore it.
111 |
112 | ## Alternatives
113 |
114 | Users who currently need this feature with Metal3 are using
115 | [fakefish](https://github.com/openshift-metal3/fakefish) to prevent power off
116 | actions from working. This approach is very fragile and makes certain Ironic
117 | features broken. We cannot recommend it to the general user base.
118 |
119 | ## References
120 |
121 | [Ironic specification: Hardware that cannot be powered off][ironic-ncsi-spec]
122 |
--------------------------------------------------------------------------------
/design/baremetal-operator/explicit-boot-mode.md:
--------------------------------------------------------------------------------
1 |
7 |
8 | # explicit-boot-mode
9 |
10 | ## Status
11 |
12 | implemented
13 |
14 | ## Summary
15 |
16 | This design adds a field for the user to set the boot mode for a host
17 | explicitly.
18 |
19 | ## Motivation
20 |
21 | As was pointed out late in the [implicit-boot-mode
22 | review](https://github.com/metal3-io/metal3-docs/pull/78), we cannot
23 | always assume that old BMC protocols and old boot modes are
24 | automatically combined. We still want to provide a reasonable default
25 | behavior that encourages and assumes UEFI, but does not prevent the
26 | use of legacy boot mode.
27 |
28 | ### Goals
29 |
30 | - Describe an API change to allow the user to override the default
31 | boot mode of `UEFI`.
32 |
33 | ### Non-Goals
34 |
35 | - Change the implicit boot mode selection design.
36 |
37 | ## Proposal
38 |
39 | Add a new optional API input field, `spec.bootMode`, with possible
40 | values `UEFI` or `legacy`. If no value is provided, the value from the
41 | default `UEFI` will be used.
42 |
43 | ### User Stories
44 |
45 | #### Story 1
46 |
47 | As a user, I want to override the boot mode selected by metal3 because
48 | I have hardware that does not match the assumptions made by the
49 | implicit boot mode selection.
50 |
51 | ## Design Details
52 |
53 | Add a new optional string field `BootMode` to `BareMetalHostSpec`,
54 | with allowed values `"UEFI"` or `"legacy"`.
55 |
56 | Update so
57 | that when `Spec.BootMode` has a valid value it overrides the default.
58 |
59 | ### Implementation Details/Notes/Constraints
60 |
61 | The existing PR #469 needs to be rebased, and the work for this design
62 | can be folded into that.
63 |
64 | *Implementation moved to
65 | .*
66 |
67 | ### Risks and Mitigations
68 |
69 | Adding a new field provides a way for the user to specify the wrong
70 | value. However, the boot mode is not something we can always assume we
71 | can figure out. Making the field optional and trying to select the
72 | right value automatically should at least give users a chance of not
73 | having to know what to do but also allow them to correct our guess if
74 | it is wrong.
75 |
76 | ### Work Items
77 |
78 | - Rebase #469
79 | - Extend it with the new field
80 | - Ensure we are saving the boot mode as part of the other provisioning
81 | settings in the host controller
82 |
83 | ### Dependencies
84 |
85 | N/A
86 |
87 | ### Test Plan
88 |
89 | Manual testing, for now.
90 |
91 | ### Upgrade / Downgrade Strategy
92 |
93 | The new field is optional so it is forward compatible.
94 |
95 | Older versions of the software did not set a boot mode at all so
96 | losing the setting during a downgrade may result in an unavoidable
97 | change of behavior. That is mitigated by the fact that most old
98 | systems were using DHCP, which is configured to deliver the right
99 | information to the host based on the client request.
100 |
101 | ### Version Skew Strategy
102 |
103 | N/A
104 |
105 | ## Drawbacks
106 |
107 | We originally wanted to avoid exposing this field as it is one more
108 | thing the user has to understand to use the API.
109 |
110 | ## Alternatives
111 |
112 | Sticking with only the implicit boot mode implementation would leave
113 | some users unable to use metal3.
114 |
115 | ## References
116 |
117 | - PR #469 has the implicit boot mode implementation
118 | - [implicit-boot-mode](implicit-boot-mode.md) has the original design
119 | - PR #602 has the implementation for this design
120 |
--------------------------------------------------------------------------------
/design/baremetal-operator/external-introspection.md:
--------------------------------------------------------------------------------
1 | # Support external introspection
2 |
3 | ## Status
4 |
5 | implemented
6 |
7 | ## Summary
8 |
9 | A declarative API is proposed to disable inspection of a BareMetalHost
10 | and optionally allow external sources of inspection data to update the
11 | hardware status data.
12 |
13 | ## Motivation
14 |
15 | Related to the work to enable booting of a
16 | [LiveImage](https://github.com/metal3-io/metal3-docs/pull/150),
17 | there is the requirement to optionally disable inspection on initial
18 | registration of a BareMetalHost (so that the live image can boot
19 | more quickly, ref user stories below).
20 |
21 | ### Goals
22 |
23 | - A declarative API to disable inspection on BMH registration
24 | - Provide an interface to update hardware status data at an arbitrary time
25 | after BMH creation.
26 |
27 | ### Non-Goals
28 |
29 | - There are no plans for any mechanism to trigger or consume data from any out
30 | of band inspection process, other than providing hardware data via an annotation.
31 |
32 | ## Proposal
33 |
34 | ### Disable inspection proposal
35 |
36 | To align with the [inspection API proposal](https://github.com/metal3-io/metal3-docs/blob/main/design/baremetal-operator/inspection-api.md),
37 | the `inspect.metal3.io` annotation will be reused, with the addition of a value.
38 |
39 | The optional `inspect.metal3.io: disabled` annotation will be used to describe
40 | the situation where we wish to disable the default inspection behavior.
41 |
42 | When the BMO finds this annotation, it will skip performing inspection
43 | during the
44 | [Inspecting state](https://github.com/metal3-io/baremetal-operator/blob/main/docs/BaremetalHost_ProvisioningState.png)
45 |
46 | ### Hardware status update proposal
47 |
48 | In the current implementation, when `baremetalhost.metal3.io/status` is
49 | provided, it can set any status field, and thus is only evaluated on the
50 | very first reconcile (primarily to support externally provisioned hosts,
51 | where we collect the inspection data prior to creating the BMH resources).
52 |
53 | In the case where metal3 is booting a live-image that contains code that
54 | can collect hardware details, it's desirable to have a way to update the
55 | hardware status after the image is booted.
56 |
57 | To enable this safely, we can add a new `inspect.metal3.io/hardwaredetails`
58 | annotation, which will allow updating the status/hardware field:
59 |
60 | - At any time when inspect.metal3.io=disabled
61 | - When there is no existing HardwareDetails data in the Status
62 |
63 | In the latter case, it may be a potentially safer/more constrained interface
64 | than the current `baremetalhost.metal3.io/status` API.
65 |
66 | Given that the primary use-case for this is live-boot images (where no disk
67 | image is written to disk), and that profile matching is no longer the preferred
68 | interface for specifying root-disk hints, if `inspect.metal3.io/hardwaredetails`
69 | is updated and the BMH is in the `Ready` state, we will not attempt to match
70 | profiles based on this data.
71 |
72 | In the event that both `baremetalhost.metal3.io/status` and
73 | `inspect.metal3.io/hardwaredetails` are specified on BMH creation,
74 | `inspect.metal3.io/hardwaredetails` will take precedence and overwrite any
75 | hardware data specified via `baremetalhost.metal3.io/status`.
76 |
77 | ### User stories
78 |
79 | #### Fast-boot appliance live-image
80 |
81 | Where the LiveImage booted is an appliance that must be running as quickly as
82 | possible, it may be desirable to skip the time taken for inspection
83 | (and also cleaning which is discussed in an [existing proposal](https://github.com/metal3-io/metal3-docs/pull/151)
84 |
85 | #### Live-image installer does inspection
86 |
87 | [Installer ISO images](https://docs.fedoraproject.org/en-US/fedora-coreos/bare-metal/#_installing_from_live_iso)
88 | may be booted which can include their own inspection tooling.
89 |
90 | In this case, it is desirable to avoid the extra reboot and have the live-iso
91 | collect the required data (and update the BMH via the status annotation).
92 |
93 | ## Alternatives
94 |
95 | The main alternative is to provide a status annotation at the point of
96 | creating the BMH, which might be enough for the fast-boot appliance use-case,
97 | but isn't ideal for the case where there is data collected by the live-iso
98 | which can be used to subsequently update the hardware status.
99 |
100 | We could also enable evaluation of the existing annotation at any arbitrary time
101 | but this is potentially unsafe, given that the BMO stores data in some other
102 | status fields.
103 |
104 | ## References
105 |
106 | - Inspection API [proposal](https://github.com/metal3-io/metal3-docs/blob/main/design/baremetal-operator/inspection-api.md)
107 | - Live Image [proposal](https://github.com/metal3-io/metal3-docs/pull/150)
108 | - Live Image [implementation](https://github.com/metal3-io/baremetal-operator/pull/754)
109 |
--------------------------------------------------------------------------------
/design/baremetal-operator/host-config-drive.md:
--------------------------------------------------------------------------------
1 |
7 |
8 | # host-config-drive
9 |
10 |
11 |
12 | ## Status
13 |
14 | [Implemented](https://github.com/metal3-io/baremetal-operator/pull/70)
15 |
16 | ## Summary
17 |
18 | Provisioning hosts requires two separate images. The first is the
19 | primary target image for the host, and contains the operating system
20 | and other software that the host will run. These are generally
21 | reusable across many hosts. Customization data can also be provided
22 | via a second "config drive" image, which contains configuration settings
23 | that are typically interpreted by a firstboot agent (cloud-init, ignition)
24 | in the primary target image.
25 |
26 | Customization data can be provided in several formats, but most commonly
27 | a "user data" blob is provided, with a format that depends on the specific
28 | firstboot agent. This data can be built into an ISO image, which is handled
29 | by Ironic via writing an ISO to a separate partition with a predictable disk
30 | label, accessible to the primary target image when the host boots.
31 |
32 | Given use of Ironic, first boot agents must be configured to look for data
33 | in the OpenStack config drive format using the path
34 | `/openstack/latest/user_data`.
35 |
36 | User data contents are stored in a Secret within the
37 | kubernetes database because they can contain sensitive
38 | information.
39 |
40 | The baremetal operator can receive the Secret, extract the `userData`
41 | value, and pass the contents to Ironic as part of preparing the host
42 | for provisioning.
43 |
44 | ## Motivation
45 |
46 | ### Goals
47 |
48 | - Avoid having the baremetal operator tightly coupled to provisioning
49 | hosts to become nodes in the cluster.
50 | - Avoid leaking secrets when passing the config drive to the baremetal
51 | operator.
52 |
53 | ### Non-Goals
54 |
55 | N/A
56 |
57 | ## Proposal
58 |
59 | ### Implementation Details/Notes/Constraints
60 |
61 | User data settings come from the contents of a secret, is referenced
62 | via the BaremetalHost userData spec field. The format of this data may
63 | differ depending on the firstboot tool in the primary OS image, so
64 | assumptions regarding the specific tool should be avoided in the BMO.
65 |
66 | Corresponding changes will be required in the Cluster/Machine API layer
67 | to ensure the required secret for the given host role is provided via
68 | the BMH userData field.
69 |
70 | ### Risks and Mitigations
71 |
72 | Passing the user data to Ironic as a JSON string instead of an encoded
73 | ISO requires a recent version of Ironic (since the development cycle for Stein),
74 | an interim solution may be required until this is available in the metal3 images.
75 |
76 | ## Design Details
77 |
78 | ### Work Items
79 |
80 | - Add a `UserDataSecretRef` of type `SecretRef` to the
81 | `BareMetalHostSpec` structure to hold the location of the Secret
82 | containing the user data.
83 | - We may want to define a new type to hold all of the provisioning
84 | instructions, rather than adding individual fields to the host spec
85 | directly.
86 | - Update the cluster-api provider to find and pass the worker user data Secret to
87 | the baremetal operator through the new field in the
88 | `BareMetalHostSpec`.
89 | - Update the baremetal operator to retrieve the user data Secret
90 | content and pass it to Ironic, when it is present.
91 |
92 | ### Dependencies
93 |
94 | This will require work in both the actuator/provider and operator repositories.
95 |
96 | We will need to use version of Ironic from the Stein release series,
97 | which includes the user data support in the API.
98 |
99 | ### Test Plan
100 |
101 | Manual or automated integration tests for ensuring the config drive
102 | content is applied to the server during provisioning.
103 |
104 | ### Upgrade / Downgrade Strategy
105 |
106 | N/A
107 |
108 | ### Version Skew Strategy
109 |
110 | N/A
111 |
112 | ## Drawbacks
113 |
114 | N/A
115 |
116 | ## Alternatives
117 |
118 | N/A
119 |
120 | ## References
121 |
122 | - [CoreOS setting for the config drive user data path](https://github.com/coreos/ignition/blob/master/internal/providers/openstack/openstack.go#L42)
123 | - [golang config drive builder in gophercloud/utils](https://github.com/gophercloud/utils/blob/master/openstack/baremetal/v1/nodes/configdrive.go)
124 |
--------------------------------------------------------------------------------
/design/baremetal-operator/inspection-api.md:
--------------------------------------------------------------------------------
1 | # inspection API
2 |
3 | ## Status
4 |
5 | implemented
6 |
7 | ## Summary
8 |
9 | A declarative API is proposed to request the baremetal operator to
10 | inspect a `Ready` BareMetalHost.
11 |
12 | ## Motivation
13 |
14 | We would like to have an interface to allow a user to re-gather hardware
15 | inventory of a `Ready` BareMetalHost when a hardware replacement is made.
16 | When a user of the underlying infrastructure makes some changes to the actual
17 | server (e.g. replace or add NIC, disk, etc.), the latest hardware inventory
18 | including those changes need to be re-collected and updated on the spec of the
19 | corresponding BareMetalHost object without having to delete it.
20 |
21 | Implementation of this proposal is based on using annotation (similar to
22 | [Reboot API](https://github.com/metal3-io/metal3-docs/blob/main/design/baremetal-operator/reboot-interface.md))
23 | to request inspection of a `Ready` BareMetalHost.
24 | Once the annotation is set on BareMetalHost, the baremetal operator will
25 | request hardware inspection of the host from Ironic.
26 |
27 | ### Goals
28 |
29 | - A declarative API to perform inspection
30 | - Use this API for future Metal³ remediation controller
31 |
32 | ### Non-Goals
33 |
34 | - Automated mechanism to trigger inspection
35 |
36 | ## Proposal
37 |
38 | We follow Reboot API implementation and expect to implement the similar
39 | annotation based interface for a user to trigger inspection.
40 |
41 | `inspect.metal3.io` annotation form on BareMetalHost object
42 | triggers the controller to query Ironic for inspection of a host. This form
43 | has set-and-forget semantics and controller removes the annotation once
44 | inspection is completed.
45 | While the host is being inspected, the BareMetalHost will stay in
46 | `Inspecting` state until the process is completed.
47 |
48 | Re-inspection API can be requested only when BareMetalHost is in `Ready`
49 | state. If a re-inspection request is made while BareMetalHost is any other
50 | state than `Ready`, the request will be ignored. This is important in order to
51 | not reboot a BareMetalHost (e.g. when `spec.provisioning.state == provisioned`)
52 | or avoid having unintended inconsistent states.
53 |
54 | |BMH state|Externally provisioned|API action|Annotation|
55 | |---|---|---|---|
56 | |Ready|No|move to Inspecting state|delete|
57 | |Inspecting|No|nothing|delete|
58 | |Provisioning|No|nothing|keep it until BMH is in Ready state|
59 | |Provisioned|No|nothing|keep it until BMH is in Ready state|
60 | |Provisioned|Yes|nothing|keep it until BMH is in Inspecting state|
61 |
62 | After completing inspection, previous inspection data should be updated
63 | both in Ironic and on the spec of the BareMetalHost object. Both
64 | `status.operationHistory.inspect.start` and
65 | `status.operationHistory.inspect.end` timestamps should be updated accordingly.
66 |
67 | ### User stories
68 |
69 | #### Story1
70 |
71 | - As a cluster admin, I would like to have a simple way of triggering
72 | inspection for my server after I replace NIC, disk, etc.
73 |
74 | ## Future Enhancements
75 |
76 | Re-inspection API and reboot API interfaces can be modified to have a
77 | more formal interface for future Metal³ remediation controller.
78 |
79 | ## Alternatives
80 |
81 | One alternative approach to keep hardware details updated is to run Ironic
82 | Python Agent (IPA) as a pod on the node which would be constantly updating the
83 | hardware details of the host.
84 |
85 | ## References
86 |
87 | - Reboot API [proposal](https://github.com/metal3-io/metal3-docs/blob/main/design/baremetal-operator/reboot-interface.md)
88 | - Reboot API [implementation](https://github.com/metal3-io/baremetal-operator/pull/424)
89 |
--------------------------------------------------------------------------------
/design/baremetal-operator/remove-host.md:
--------------------------------------------------------------------------------
1 | # Remove a Host from a Cluster
2 |
3 | At some point you will need to remove a host from a cluster. You may be
4 | removing failed hardware, downsizing a healthy cluster, or have some other
5 | reason.
6 |
7 | Since removal involves a BareMetalHost, Machine, and MachineSet, it can be
8 | non-obvious how best to accomplish host removal. This document provides
9 | guidance on how to do so.
10 |
11 | ## Steps
12 |
13 | These steps are both safe and compatible with automation that scales
14 | MachineSets to match the number of BareMetalHosts.
15 |
16 | ### Annotate the Machine
17 |
18 | Find the Machine that corresponds to the BareMetalHost that you want to remove.
19 | Add the annotation `cluster.k8s.io/delete-machine` with any value that is not
20 | an empty string.
21 |
22 | This ensures that when you later scale down the MachineSet, this Machine is the
23 | one that will be removed.
24 |
25 | ### Delete the BareMetalHost
26 |
27 | Delete the BareMetalHost resource. This may take some time.
28 |
29 | ### Scale down MachineSet
30 |
31 | Find the corresponding MachineSet and scale it down to the correct level. This
32 | will cause the host's Machine to be deleted.
33 |
34 | ## Other Approaches
35 |
36 | ### Delete the Machine First
37 |
38 | If you delete the Machine first, that will cause the BareMetalHost to
39 | be deprovisioned. You would still need to issue a subsequent delete of the
40 | BareMetalHost. That opens the possibility that for some period of time, the
41 | BareMetalHost could be fully deprovisioned and show as "available";
42 | another Machine without a host could claim it before it gets deleted.
43 |
44 | Additionally, by deleting the Machine before scaling down the MachineSet, the
45 | MachineSet will try to replace it with a new Machine resource. That new
46 | resource could match a BareMetalHost if one is available and cause it to start
47 | provisioning. For this reason, it is better to not directly delete a Machine.
48 |
49 | ### Scale down the MachineSet
50 |
51 | You could annotate the Machine and then directly scale down the MachineSet
52 | without first deleting the BareMetalHost. This will cause the Machine to be
53 | deleted, but then the same downsides apply as described above; the
54 | BareMetalHost could be in an "available" state for some period of time.
55 |
--------------------------------------------------------------------------------
/design/baremetal-operator/secure-boot.md:
--------------------------------------------------------------------------------
1 |
7 |
8 | # Secure boot support
9 |
10 | ## Status
11 |
12 | implemented
13 |
14 | ## Summary
15 |
16 | This design proposes exposing an ability to turn UEFI secure boot on and off
17 | during provisioning and deprovisioning.
18 |
19 | ## Motivation
20 |
21 | Security-conscious deployments would like to make sure secure boot is enabled
22 | for their instances, so that the hardware refuses to boot kernel-level code
23 | that has not been signed with a known key.
24 |
25 | ### Goals
26 |
27 | - API addition to enable secure boot before booting the instance (and disable
28 | it on deprovisioning)
29 |
30 | ### Non-Goals
31 |
32 | - Support for custom secure boot keys.
33 | - Secure boot during deployment/cleaning/inspection.
34 |
35 | ## Proposal
36 |
37 | ## Design Details
38 |
39 | Add a new value for `BootMode` enumeration: `UEFISecureBoot`. If set on a host,
40 | the following change are done to the corresponding Ironic node object:
41 |
42 | - `boot_mode:uefi,secure_boot:true` is added to `properties.capabilities`.
43 | - `secure_boot` with a value of `true` is added to
44 | `instance_info.capabilities`.
45 |
46 | Add a `SupportsSecureBoot` call to `AccessDetails`, returning `true` for
47 | `redfish://`, `redfish-virtualmedia://`, `idrac-virtualmedia`, `ilo4://`,
48 | `ilo5://` and `irmc://`.
49 |
50 | ### Implementation Details/Notes/Constraints
51 |
52 | - Strictly speaking, it's enough to add the `secure_boot` capability only to
53 | `instance_info`, `properties` is only updated for consistency.
54 | - Secure boot can be used with live ISO but only when virtual media is used to
55 | deliver it (secure boot is incompatible with network booting in practice).
56 |
57 | ### Risks and Mitigations
58 |
59 | None, secure boot is off by default.
60 |
61 | ### Work Items
62 |
63 | - Update `AccessDetails` with a new call.
64 | - Define a new value for `BootMode`.
65 |
66 | ### Dependencies
67 |
68 | - [Ironic support for Redfish secure boot
69 | management](https://review.opendev.org/c/openstack/ironic/+/771493) is on
70 | review upstream.
71 |
72 | ### Test Plan
73 |
74 | Unfortunately, at this point it's only possible to test this feature on real
75 | hardware.
76 |
77 | ### Upgrade / Downgrade Strategy
78 |
79 | None
80 |
81 | ### Version Skew Strategy
82 |
83 | None
84 |
85 | ## Drawbacks
86 |
87 | None
88 |
89 | ## Alternatives
90 |
91 | Require users to configure secure boot manually. This approach has two large
92 | disadvantages:
93 |
94 | - It's not always trivial to do.
95 | - It breaks network booting.
96 |
97 | ## References
98 |
99 | - [Ironic secure boot
100 | documentation](https://docs.openstack.org/ironic/latest/admin/security.html#uefi-secure-boot-mode)
101 |
--------------------------------------------------------------------------------
/design/cluster-api-provider-metal3/capm3-remediation-controller-improvement-proposal.md:
--------------------------------------------------------------------------------
1 |
7 |
8 | # capm3-remediation-controller-improvement
9 |
10 | ## Status
11 |
12 | implemented
13 |
14 | ## Summary
15 |
16 | We would like to add Node deletion to the existing remediation strategy.
17 |
18 | ## Motivation
19 |
20 | The original reboot remediation controller proposal \[1\] misses details on how
21 | remediation should actually be done. During implementation there was
22 | some discussion if the Node should be deleted or not \[2\]. The decision was
23 | to keep it simple and skip Node deletion.
24 | Skipping Node deletion has a big drawback though: workloads on unresponsive
25 | Nodes won't be rescheduled quickly, because they are assumed to be still
26 | running. Deleting the the Node signals that the workloads are not running
27 | anymore, which results in quickly rescheduled workloads with less downtime.
28 |
29 | ### Goals
30 |
31 | * Quick rescheduling of workloads on failed Nodes by signaling that they
32 | are not running anymore by deleting the Node.
33 |
34 | ### Non-Goals
35 |
36 | * Change the remediation API
37 |
38 | ## Proposal
39 |
40 | The remediation controller's reconcile method will be modified to not only
41 | reboot the Machine, but also delete the Node.
42 |
43 | ### User Stories
44 |
45 | #### Story 1
46 |
47 | As a user, I expect minimal downtime of my workloads in case of Node issues.
48 |
49 | ## Design Details
50 |
51 | Unfortunately adding Node deletion to the controller's reconcile method is a
52 | bigger change in the implementation than it sounds, because the old
53 | one-step fencing process (trigger reboot by setting the appropriate annotation)
54 | becomes a multiple step process, and after each step we need to wait for
55 | success before executing the next one:
56 |
57 | * power Machine off
58 | * backup Node labels and annotations
59 | * delete the Node
60 | * power Machine on
61 | * restore labels and annotations on recreated Node
62 |
63 | ### Implementation Details/Notes/Constraints
64 |
65 | None.
66 |
67 | ### Risks and Mitigations
68 |
69 | None.
70 |
71 | ### Work Items
72 |
73 | None, already implemented.
74 |
75 | ### Dependencies
76 |
77 | Nothing new.
78 |
79 | ### Test Plan
80 |
81 | Unit and e2e tests are already updated.
82 |
83 | ### Upgrade / Downgrade Strategy
84 |
85 | Node deletion might fail on existing target cluster because of missing RBAC
86 | roles for it. In this case Node deletion will be skipped and the Machine
87 | will just be power cycled \[3\].
88 |
89 | ### Version Skew Strategy
90 |
91 | None.
92 |
93 | ## Drawbacks
94 |
95 | None.
96 |
97 | ## Alternatives
98 |
99 | There was a discussion if this should be a new remediation strategy.
100 | Consent in the one was that remediation without Node deletion is incomplete,
101 | and that it should be added to the existing reboot strategy \[4\].
102 |
103 | ## References
104 |
105 | * [1] [Original Proposal](https://github.com/metal3-io/metal3-docs/blob/main/design/cluster-api-provider-metal3/capm3-remediation-controller-proposal.md)
106 | * [2] [Node deletion discussion](https://github.com/metal3-io/metal3-docs/pull/118#issuecomment-655326761)
107 | * [3] [RBAC issue on upgrade](https://github.com/metal3-io/cluster-api-provider-metal3/pull/367#discussion_r852388737)
108 | * [4] [Add to existing strategy discussion](https://github.com/metal3-io/cluster-api-provider-metal3/pull/367#issuecomment-978936471)
109 | * [Issue](https://github.com/metal3-io/cluster-api-provider-metal3/issues/392)
110 | * [Initial PR](https://github.com/metal3-io/cluster-api-provider-metal3/pull/367)
111 | * [New PR because of CI issues](https://github.com/metal3-io/cluster-api-provider-metal3/pull/668)
112 |
--------------------------------------------------------------------------------
/design/component-relationships.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/design/component-relationships.png
--------------------------------------------------------------------------------
/design/hardware-classification-controller/hwcc_sequence_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/design/hardware-classification-controller/hwcc_sequence_diagram.png
--------------------------------------------------------------------------------
/design/helm-charts/single-pod-helm-chart.md:
--------------------------------------------------------------------------------
1 |
7 |
8 | # Single-pod Helm chart
9 |
10 | ## Status
11 |
12 | provisional
13 |
14 | ## Summary
15 |
16 | Provide a Helm chart to deploy Metal3 and Ironic as a single
17 | pod in a Kubernetes cluster.
18 |
19 | ## Motivation
20 |
21 | ### Goals
22 |
23 | The goal is to support a popular way to deploy Kubernetes applications
24 | to simplify creation of development environments on top of arbitrary
25 | Kubernetes clusters.
26 |
27 | Another goal to prepare to set a standard for production-grade deployment
28 | of Metal3 and its components.
29 |
30 | ### Non-Goals
31 |
32 | Providing end-to-end bootstrap sequence for Metal3 and Ironic is not a
33 | a goal of this design.
34 |
35 | ## Proposal
36 |
37 | ### User Stories
38 |
39 | #### Story 1
40 |
41 | As a user of Metal3, I want to install it in my existing Kubernetes
42 | cluster using Helm.
43 |
44 | ### Implementation Details/Notes/Constraints
45 |
46 | Initial implementation includes a Helm chart that creates single
47 | pod deployment with Metal3 and Ironic components containers
48 | in a Kubernetes cluster.
49 |
50 | The charts shall be added as a separate repository in metal3-io space.
51 | Proposed name for the repository is ``metal3-helm-chart``.
52 |
53 | ### Risks and Mitigations
54 |
55 | None.
56 |
57 | ## Design Details
58 |
59 | Helm charts will require a separate repository (metal3-helm-charts)
60 | to be created.
61 |
62 | In future, a CI environment that will build and test the charts,
63 | will have to be created.
64 |
65 | Potentially, the Helm charts will require changes in the way the
66 | Docker images for components of Ironic and Metal3 are built. The
67 | changes will include additional parameters that will be exposed
68 | through the charts metadata (values.yaml files).
69 |
70 | ### Work Items
71 |
72 | 1. Create a Helm chart for Ironic and its components:
73 |
74 | - `ironic`
75 | - `ironic-dnsmasq`
76 | - `ironic-httpd`
77 | - `mariadb`
78 | - `baremetal-operator`
79 |
80 | 2. Create a CI for building the Helm chart and smoke verification.
81 | 3. Create a CI for testing Helm chart deployment and functional
82 | testing.
83 |
84 | ### Dependencies
85 |
86 | - The charts require ``helm`` binary to build and deploy.
87 | - Supported version of Helm is embedded in the charts.
88 |
89 | The following repository is used as a bootstrap for adding
90 | the chart to ``metal3-io/`` Github organization:
91 |
92 | `https://github.com/Mirantis/metal3-helm-chart`
93 |
94 | ### Test Plan
95 |
96 | Testing strategy for Helm charts includes static code tests and
97 | integration tests. Integration tests include verification of
98 | deployment, update/upgrade and functional verification of the
99 | resulting installation for both scenarios.
100 |
101 | ### Upgrade / Downgrade Strategy
102 |
103 | None.
104 |
105 | ### Version Skew Strategy
106 |
107 | None.
108 |
109 | ## Drawbacks
110 |
111 | Helm charts do not immediately improve development environment
112 | creation experience.
113 |
114 | ## Alternatives
115 |
116 | Currently, the deployment functionality is already implemented as
117 | ``metal3-dev-env`` scripts. Another alternative is to use plain
118 | Kubernetes manifest from ```baremetal-operator/deploy`` for
119 | deployment on K8s.
120 |
121 | ## References
122 |
123 | None.
124 |
--------------------------------------------------------------------------------
/design/image-ownership.md:
--------------------------------------------------------------------------------
1 |
7 |
8 | # image-ownership
9 |
10 | ## Status
11 |
12 | implementable
13 |
14 | ## Summary
15 |
16 | In order for Metal3 to provision hosts and bring them into the
17 | cluster, it needs to manage 2 images: the target image being written
18 | to the host's disk, and the provisioning image used to do that. The
19 | provisioning image is an implementation detail of Metal3, and its
20 | use of Ironic, and so will be managed as part of Metal3 The image
21 | written to disk is part of the cluster, and so will need to be at
22 | least minimally configurable.
23 |
24 | ## Motivation
25 |
26 | ### Goals
27 |
28 | - Identify the "owner" for specifying the version of the provisioning
29 | image
30 | - Identify the "owner" for specifying the version of the target image
31 |
32 | ### Non-Goals
33 |
34 | - Specifying where the images are hosted during production runs
35 | - Specifying how images are upgraded
36 |
37 | ## Proposal
38 |
39 | ### Implementation Details/Notes/Constraints
40 |
41 | The version of the IPA image used to provision images (the
42 | "provisioning image") is tied to the version of Ironic used by the
43 | baremetal operator. The user has no reason to change that image, so we
44 | do not need to make it configurable. We can either build the name and
45 | version into the source code for the operator, or the operator can use
46 | a versionless name/URL when passing the data to Ironic and we can have
47 | our build system install the image into the container using that same
48 | name. The latter should make updating Ironic simpler over time, but
49 | may require extra work in the short term that we would not prioritize
50 | highly.
51 |
52 | The version of the image being written to the host (the "target
53 | image") will change with each update of OpenShift, and may ultimately
54 | need to be something that is decoupled to ensure that Metal3 can be
55 | used with stock Kubernetes clusters in addition to OpenShift
56 | clusters. Therefore it at least needs to be something the installer
57 | can specify, and should not be hard-coded into any components. In the
58 | interest of making the baremetal operator generic, we will have the
59 | baremetal actuator assign the image to be provisioned to each host as
60 | part of allocating a host to a cluster. Long term, the actuator can
61 | derive the image name from a configuration setting or from the
62 | provider spec in the MachineSet/Machine. In the near term, the
63 | actuator can use a hard-coded value.
64 |
65 | ### Risks and Mitigations
66 |
67 | Allowing customization of the target image may result in users
68 | choosing images that are not suitable for hosting a
69 | Kubernetes/OpenShift cluster.
70 |
71 | Not allowing customization of the provisioning image will mean that
72 | users will need to upgrade their baremetal operator component in order
73 | to make use of updated versions of the provisioning tool.
74 |
75 | ### Work Items
76 |
77 | - Add an image URL field to the BareMetalHost CRD
78 | - Figure out how the actuator is going to know the URL (where is the
79 | image being served?)
80 | - Update the actuator to pass that URL by updating the host object at
81 | the same time that it sets the machine reference on the host
82 | - Update the baremetal operator to use the URL in the host object
83 | instead of the value currently hard-coded in the controller
84 |
85 | ### Dependencies
86 |
87 | We need to work out where the target image is going to come from in a
88 | production system so we can understand how to build a valid URL in the
89 | actuator.
90 |
91 | ### Test Plan
92 |
93 | We will have updated unit tests for the operator and whatever
94 | end-to-end tests verify image provisioning works will need to
95 | configure the system with the right URL.
96 |
97 | ### Upgrade / Downgrade Strategy
98 |
99 | The provisioning image URL is managed by the operator, so it will be
100 | upgraded or downgraded as the operator itself changes.
101 |
102 | The target image relies on the version of the OS being used to start
103 | the cluster, and will need to be set by the installer.
104 |
105 | ### Version Skew Strategy
106 |
107 | The versions of the two images are not directly related so there
108 | should not be an issue with skew.
109 |
110 | The version of the provisioning image is tied to the version of the
111 | baremetal operator, so we need to package them together or otherwise
112 | ensure that the operator can fetch the image it needs.
113 |
114 | ## Drawbacks
115 |
116 | The baremetal operator would be bit simpler if it owned both images,
117 | but it would be less reusable.
118 |
119 | ## Alternatives
120 |
121 | We could make the provisioning image configurable, but the only
122 | benefit to doing that would be to allow updates to that image
123 | independently of the other components and if we allow that we may have
124 | untested configurations running in the field.
125 |
126 | ## References
127 |
128 | N/A
129 |
--------------------------------------------------------------------------------
/design/images/ironic_authentication.drawio:
--------------------------------------------------------------------------------
1 | 5Vxdc6M2FP01funM7ggJAX5MnI9mmv3opjO7+9RRjGzTYORiObH76yuMMCDJMWbBOCSTB3MFwpx7uLo6uvIAjebr25gsZp+YT8MBBP56gK4GEFo2hIPkH/ib1OJaw9QwjQNfnpQbHoL/qDQCaV0FPl2WTuSMhTxYlI1jFkV0zEs2EsfspXzahIXluy7IlGqGhzEJdev3wOez1OphkNt/p8F0lt3ZArJlTrKTpWE5Iz57KZjQ9QCNYsZ4+mm+HtEwAS/DJb3uZk/r7ovFNOJVLsCfP7CbH0+fbf5t+re3IjFcfvswdNNunkm4kk8svy3fZBDEbBX5NOkFDNDlyyzg9GFBxknri3C6sM34PBRHlvg4CcJwxEIWb69F1PIxdYV9yWP2RAstQ8dFxBEt+nPIR3umMafrgkk+1y1lc8rjjThFtiIbp5dIkuGMPS+5yyxP2mZFdw2lkUiaTHd950iKDxLMY4DF7QLrE+pNxiZgnbFHHyfNAGtlQEpgLagD62ADsMhtC1hLw/VCYDajr6BrHY0uABjQiQldAKyry1FyBYt4wT7Z/jWEulNGHboGOg8NqLfGZtR/0DE8N9D1CHL56UuPEEdeGXHbFLWhAXGnLcQrDIY08i+StEIcjUOyXAbjv2ZBVEaZrgP+o/D5ZxLhP2J5dLWWAX97sMkOIv8mCHc9ROJxfuyaxEGhj+Qw72R7lPWy1ytLtorH9LWwmuUtnMRTyl+LBRIl6pcyJ93NBTeahojMFtOQ8OC5nG+ZXCvv8JUF4uFyFlnlsV8b0tNHl1cV8yOlIxuX6YiQ0lGKjNbRlmm7x65PPu8o8kUsogrranImZx7YRgISc4XhmblI0BoUr09O5FXkpgWs8yLnUOEUrElOjA+wvGVyDutExv10MtI2Z+rPYts+2koG5qz7WSRdwwzMpp6HCQjOioCWQkDo1SQgVAgIVSa3TMAsN3i3DERvNAQ6ymTGtmsy0LE6ZqBjYODNswDkJmTsabUYQNExyGwzKnj3SAnXaCrSY15mYDn9lkN7MVeXJhIG0yihsWAPFfbLJNkOxiS8kA3zwPeT2xjnAWW5YS8Tj5in2ntGt2ICb79CrebFAdM4lfpjLCYyQbTSp6y9cYdjqQks1t0BsO4O9UVqzB2wStAOw2Cx3AdSwStkuUgF3kmwTmBTJ7MEAgCxURG7AeKv3cksVDM9rL8Lprlse9jrQll72APgEQDMQgJ2W8denbl1jj18N7zHzrlhr2uVfeW9A84Ne/vdYI/U3Afog+1psa+w4NQT7LGa6HSOvT41+DwNorXmgDcs1rtlzLEp1z+pWA/1XP/LgkZivj9+EubR/V2P4d+F/s7gR1XyS5MgY1goOUZBqavW7HXCQeUlI9pB6SULA2eivKBhWTDRFjQqr4wo5RWaiNiy8oIqpNNH1VI08EJCdfx39BfSWHHS2vuo516Uz37TgOqN3LFTmzIPGBbsTeJTa1kAMgmEHdMUq1PjzmnadMFZEyAp+SQcuh9N0pkOU6awNQ9ThdXgU8OE8PnBpOeA/Q562tTH8D6fNOhlNyt4IGT9xV99BxDU34DT4q+n4X3G3/bODX990P/+cKtPPXvjAagOlnbXEUgfA/rtAUetVO3aA95xpWuHijDgcVUYYrL9IO/DYj5jUxaR8Dq35ipDUWNorVZtWFUtyGa156IWKJHVAwpdatdRnlgt8OrWqv1qBe9hIuYcd0+kXVVn43lpV7aiXdWuGuqajVlZQJF8B4lSHjLyc+4ZW0i6/EM538jtcmTFWRVh1XqVv1Urf7Ua5HIBcX2uWlW5mp14kKuVSfhrW72aVMLfesDBb3P0s5WldLuuVq7u3XLUBZi24w18A6PfcRneCch4ZoNfU2R0lcHPOfGWlmGVWpz3Ehmz1cH3GxqVjpxTp2JVqpPeS2iszsa+xkZFx3JUeWQPGwU9yKZw2iI5Ybn/C2O1cEYCkZM77bFZquuq5B+rRxpHlFPxXcEoXC0TKUulf280MguVUd8dd6WRDXWN7J4JePrrAtspT+Gdrtfnh7owdPf1Qh8CqhZJqr/I4QHgIs1fouUCA2C3XY+qhBnXsDBiLBBrby9OFqmLeMcsEkNqbcgP1aVOAEGAmHxweeW2vwdE3YpuEOZNxRZqHtSgC3RpInMBuIu2YDJ9FOiJO7DbnTsmt/7zBv4ZTf6NwQW6elzcz60Pxg2EPS5WUCu0oHe6xVqjA0y1R712gLpB5IQrhUYHmBYK++wArbKs6zfAuD+2x/gjpQS6vRdAHOa/5ZfO4/JfRETX/wM=ldFRC4IwEADgX7PHYDqCnjNTiILwQeglprt0MD2ZC8Vfn+HMhi/1MLh9u922G2FB1UeaN+UZBSji00JLQdiB+L43DguJHMAitfqUAlon0SAqIxsXc6xryI1jXGvs3LSGF+DAA5VYZSQ5V7DSVApTTrrb0sVjkEU5n+xRu1LxOdlCW3KB3RexkLBAI5opqvoA1Lszc19Otyw3+8udpTS+qiqIskFvpmLHf7Z8nqChNr+WHoPlauPE+T0WvgA=
--------------------------------------------------------------------------------
/design/images/ironic_authentication_mTLS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/design/images/ironic_authentication_mTLS.png
--------------------------------------------------------------------------------
/design/ironic-debuggability-improvement.md:
--------------------------------------------------------------------------------
1 |
7 |
8 | # ironic-debuggability-improvement
9 |
10 | ## Status
11 |
12 | provisional
13 |
14 | ## Summary
15 |
16 | Ironic and Ironic-inspector ramdisk logs are currently hidden inside their
17 | respective containers and never shown to the end user by default.
18 | This document describes proposed changes in Ironic and Ironic-inspector
19 | logs handling to make debugging easier for the end user.
20 |
21 | ## Motivation
22 |
23 | We need to make Ironic logs accessible to the end user.
24 |
25 | ### Goals
26 |
27 | 1. Make it easier to understand what happened when a deployment fails.
28 | 2. Avoid old logs pile up and take all available space.
29 |
30 | ### Non-Goals
31 |
32 | 1. Modify Ironic logging events or log level.
33 |
34 | ## Proposal
35 |
36 | The ironic-inspector-image includes a new script,
37 | ironic-inspection-log-watch.sh, that can act as an entry point for a container
38 | to dump host inspection logs. These logs are emitted by the ironic-inspector
39 | service.
40 |
41 | The script should watch for Ironic host inspection log files to appear in
42 | `/shared/log/ironic-inspector/ramdisk`, decompress them, print their
43 | contents with each line prefixed by the base file name, and then remove the
44 | file.
45 |
46 | The ironic-image includes a new script, ironic-provisioning-log-watch.sh,
47 | that can act as an entry point for a container to dump host provisioning logs.
48 | These logs are emitted by the ironic-conductor service.
49 |
50 | The script should watch for Ironic host provisioning log files to appear in
51 | `/shared/log/ironic/deploy` decompress them, print their contents with each
52 | line prefixed by the base file name, and then remove the file.
53 |
54 | The logs are written all at once, which is not necessary atomic,
55 | but pretty close to. Log file names start from node UUID in the current
56 | Ironic implementation. There is a pending change on Ironic side to
57 | add a node name into the log file name.
58 |
59 | The baremetal-operator repository contains a kustomize-based deployment for
60 | Metal3 services. That should be updated to include a container based on the
61 | ironic-inspector-image using the ironic-inspection-log-watch entry point to
62 | show the logs collected during inspection.That also should be updated to
63 | include a container based on the ironic-image using the
64 | ironic-provisioning-log-watch entry point to show the logs collected during
65 | deployment.
66 |
67 | ## Design Details
68 |
69 | ironic-provisioning-log-watch.sh will be created in
70 |
71 | ironic-inspection-log-watch.sh will be created in
72 |
73 | Both scripts will be added as new container entry points to
74 |
75 |
76 | ### Implementation Details/Notes/Constraints
77 |
78 | Proposed implementation includes two stages:
79 |
80 | 1. Print log contents with UUID reference.
81 | 2. Print log contents with node name reference.
82 |
83 | The second stage is dependent on these Ironic changes:
84 |
85 | Add node name to ironic-inspector ramdisk log filename
86 |
87 | Add node name to ironic-conductor ramdisk log filename
88 |
89 | ### Risks and Mitigations
90 |
91 | None
92 |
93 | ### Work Items
94 |
95 | None
96 |
97 | ### Dependencies
98 |
99 | None
100 |
101 | ### Test Plan
102 |
103 | - Unit test
104 | - metal3-dev-env integration test
105 |
106 | ### Upgrade / Downgrade Strategy
107 |
108 | None
109 |
110 | ### Version Skew Strategy
111 |
112 | None
113 |
114 | ## Drawbacks
115 |
116 | None
117 |
118 | ## Alternatives
119 |
120 | None
121 |
122 | ## References
123 |
124 | None
125 |
--------------------------------------------------------------------------------
/docs/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG BUILD_IMAGE=docker.io/golang:1.21.9@sha256:7d0dcbe5807b1ad7272a598fbf9d7af15b5e2bed4fd6c4c2b5b3684df0b317dd
2 | FROM $BUILD_IMAGE as builder
3 | WORKDIR /workspace
4 |
5 | # Run this with docker build --build_arg $(go env GOPROXY) to override the goproxy
6 | ARG goproxy=https://proxy.golang.org
7 | ENV GOPROXY=$goproxy
8 |
9 | # Copy the Go Modules manifests
10 | COPY hack/tools/go.mod go.mod
11 | COPY hack/tools/go.sum go.sum
12 |
13 | # Cache deps before building and copying source so that we don't need to re-download as much
14 | # and so that source changes don't invalidate our downloaded layer
15 | RUN go mod download
16 |
17 | # Copy the sources
18 | COPY hack/tools/releasetags/ releasetags/
19 |
20 | # Build
21 | ARG ARCH=amd64
22 | RUN CGO_ENABLED=0 GOOS=linux GOARCH=${ARCH} \
23 | go build -tags=tools -a -ldflags '-extldflags "-static"' \
24 | -o mdbook-releasetags ./releasetags
25 |
26 | FROM rust:1.78.0-slim@sha256:517c6272b328bc51c87e099ef4adfbc7ab4558af2d757e8d423c7c3f1cbbf9d5
27 | ARG MDBOOK_VERSION="0.4.37"
28 | RUN cargo install mdbook --vers ${MDBOOK_VERSION}
29 | RUN cp /usr/local/cargo/bin/mdbook /usr/bin/mdbook
30 | COPY --from=builder /workspace/mdbook-releasetags /usr/bin/mdbook-releasetags
31 | WORKDIR /workdir
32 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # Metal³ user-guide instructions
2 |
3 | We are using [Mdbook](https://github.com/rust-lang/mdBook) to build the
4 | Metal³ user-guide. Below you will find step-by-step instructions on how
5 | to test your changes.
6 |
7 | ## User-guide structure
8 |
9 | Below is the concatenated file structure for the Metal³ user-guide.
10 |
11 | ```shell
12 | ├── book.toml
13 | ├── README.md
14 | ├── src
15 | │ ├── bmo
16 | │ │ └── OWNERS
17 | │ ├── capm3
18 | │ │ └── OWNERS
19 | │ ├── images
20 | │ │ └── metal3-color.svg
21 | │ ├── introduction.md
22 | │ ├── ipam
23 | │ │ └── OWNERS
24 | │ ├── ironic
25 | │ │ └── OWNERS
26 | │ └── SUMMARY.md
27 | └── theme
28 | └── favicon.svg
29 | ```
30 |
31 | ### src
32 |
33 | Apart from the actual content/files, `src` folder stores SUMMARY.md which
34 | is consumed by the Mdbook, and defines the content structure.
35 |
36 | ### book.toml
37 |
38 | All parameters and configurations of the user-guide is done via book.toml.
39 | These include output parameters, redirects, metadata such as title,
40 | description, authors, language, etc. More information on that can be found
41 | [here](https://rust-lang.github.io/mdBook/format/config.html).
42 |
43 | ### SUMMARY.md
44 |
45 | This is a context of the user-guide and defines the exact structure.
46 | Based on any order of documents given in the STRUCTURE.md, mdbook will
47 | try to fetch those documents and parse them out.
48 |
49 | ## Preview your changes locally
50 |
51 | All the commands below are executed within mdbook container.
52 |
53 | 1. Build the user-guide.
54 |
55 | ```bash
56 | make build
57 | ```
58 |
59 | 1. Preview the user-guide built before pushing your changes. This will open the
60 | user-guide in your browser at `http://localhost:3000/`. Export `HOST_PORT`
61 | environment variable with desired port number to serve the user-guide on another
62 | port. You can keep running `make serve` and continue making doc changes. Mdbook
63 | will detect your changes, render them and refresh your browser page automatically.
64 |
65 | ```bash
66 | make serve
67 | ```
68 |
69 | 1. Clean Mdbook auto-generated content from docs/user-guide/book path once you
70 | have finished local preview.
71 |
72 | ```bash
73 | make clean
74 | ```
75 |
76 | ## Markers
77 |
78 | There is a preprocessor written in Golang that resolves
79 | `{{#releasetag owner:"metal3-io" repo:"cluster-api-provider-metal3"}}` type
80 | markers as the latest version e.g. `:v1.6.1`. It takes repository name and owner
81 | as arguments. This preprocessor resolves the markers during netlify-build. Note:
82 | preprocessor will return an error if the SUMMARY.md file has drafts (aka links
83 | without a path) in the structure.
84 |
--------------------------------------------------------------------------------
/docs/prerequisites.md:
--------------------------------------------------------------------------------
1 | # Deployment of Metal3 on vanilla K8s cluster
2 |
3 | To deploy metal3-components on vanilla K8s cluster, the following prerequisites
4 | have to be met:
5 |
6 | 1. **Ironic should have access to layer-2 network for provisioning.**
7 | 2. **Firewall is configured properly**
8 | 3. **Webserver container containing node images is running and reachable**
9 | 4. **Ironic-bmo-configmap is populated correctly**
10 |
11 | We elaborate these points in detail here:
12 |
13 | 1. Ironic should have access to layer-2 network for provisioning. It
14 | should be running on **host** networking. And on top of that the
15 | network should be configured so that nodes can reach the networking
16 | service for DHCP, PXE boot . It is also required to provide ironic
17 | with the MAC address(es) of each node that ironic is provisioning
18 | so that it can determine from which host the introspection data is
19 | coming from.
20 |
21 | 2. Firewall should be configured to allow the required traffic to pass
22 | through. The following traffic should be allowed at least:
23 | - ARP
24 | - DHCP
25 | - VRRP
26 | - ICMP
27 | - HTTP towards internal and external webserver
28 | - Ports for the above mentioned services and for `Ironic-IPA`.
29 | The list of default ironic-ports are as follows:
30 |
31 | - 6180 --> for httpd webserver
32 | - 5050 --> for ironic-inspector
33 | - 6385 --> for ironic-endpoint
34 | - 9999 --> for ironic-ipa
35 |
36 | 3. The webserver container containing node images volume should be
37 | running and reachable. It is called the `httpd-infra` container in
38 | metal3-dev-env, which runs on ironic image and contains the node
39 | images (OS images). It also caches a few other packages which are
40 | required for the second webserver `ironic-httpd` which runs inside
41 | the cluster in `Baremetal Operator` deployment. The following tree
42 | structure shows an example of the volume mounted in the external
43 | webserver container with the required node images and other cached
44 | images:
45 |
46 | ```ini
47 | /shared/
48 | ├── html
49 | │ ├── dualboot.ipxe
50 | │ ├── images
51 | │ │ ├── bionic-server-cloudimg-amd64.img
52 | │ │ ├── bionic-server-cloudimg-amd64.img.md5sum
53 | │ │ ├── ironic-python-agent-1862f800-59e2c9cab7e95
54 | │ │ │ ├── ironic-python-agent.initramfs
55 | │ │ │ ├── ironic-python-agent.kernel
56 | │ │ │ ├── ironic-python-agent.tar
57 | │ │ │ └── ironic-python-agent.tar.headers
58 | │ │ ├── ironic-python-agent.initramfs -> ironic-python-agent-1862f800-59e2c9cab7e95/ironic-python-agent.initramfs
59 | │ │ ├── ironic-python-agent.kernel -> ironic-python-agent-1862f800-59e2c9cab7e95/ironic-python-agent.kernel
60 | │ │ └── ironic-python-agent.tar.headers -> ironic-python-agent-1862f800-59e2c9cab7e95/ironic-python-agent.tar.headers
61 | │ ├── inspector.ipxe
62 | │ └── uefi_esp.img
63 | └── tmp
64 | ```
65 |
66 | 4. The environments variables defined in `ironic-bmo-configmap`
67 | required for `Baremetal Operator` deployment needs to be defined
68 | prior to deploying the provider components in management cluster:
69 |
70 | ```sh
71 | PROVISIONING_IP=$CLUSTER_PROVISIONING_IP
72 | PROVISIONING_INTERFACE=$CLUSTER_PROVISIONING_INTERFACE
73 | PROVISIONING_CIDR=$PROVISIONING_CIDR
74 | DHCP_RANGE=$CLUSTER_DHCP_RANGE
75 | DEPLOY_KERNEL_URL=http://$CLUSTER_URL_HOST:6180/images/ironic-python-agent.kernel
76 | DEPLOY_RAMDISK_URL=http://$CLUSTER_URL_HOST:6180/images/ironic-python-agent.initramfs
77 | IRONIC_ENDPOINT=http://$CLUSTER_URL_HOST:6385/v1/
78 | IRONIC_INSPECTOR_ENDPOINT=http://$CLUSTER_URL_HOST:5050/v1/
79 | CACHEURL=http://$PROVISIONING_URL_HOST/images
80 | ```
81 |
82 | This is an example representation of the environment variables which are
83 | expected in `Baremetal Operator` deployment. This example actually shows the
84 | environment variables also which are used in `metal3-dev-env` to populate the
85 | configmap. This can be replaced by any variables in vanilla K8s cluster. It is
86 | only important that the configmap variables are populated correctly so that
87 | the ironic environment is reachable. In case, ironic is to be deployed locally,
88 | these configmap env variables are populated through `ironic_ci.env` which
89 | resides in `baremetal-operator/deploy/` folder.
90 |
--------------------------------------------------------------------------------
/docs/presentations/README.md:
--------------------------------------------------------------------------------
1 | # Metal3 Presentations
2 |
3 |
4 |
5 | ## Goal
6 |
7 | The motivation behind this initiative is to provide easy to use presentation
8 | templates for Metal3 projects, which can then be imported and built-upon to
9 | create presentation for Meetups, Conferences or any other platforms. This can
10 | serve as a supplement to existing documentation for new users, and also help
11 | spread the project by helping presenters save time and focus on their content.
12 |
13 | ## Framework
14 |
15 | We are using the [RevealJS](https://revealjs.com/) framework to create the
16 | presentation. To contribute a presentation please create a directory, at the
17 | `meta3-docs/docs/presentations` path, with your files associated with the
18 | presentation, for example :-
19 |
20 | ```bash
21 | ls metal3-docs/docs/presentations/test-presentation
22 | test-image1.png test-image2-capi.png test-presentation.html
23 | ```
24 |
25 | To test your presentation with the revealjs framework, there are two simple
26 | options:
27 |
28 | 1. Copy the `dist` and `plugin` directories from revealjs repository to the presentations directory.
29 | 2. Copy all the presentation files under the revealjs repository and open the presentation `.html` file inside a browser.
30 |
31 | Here is an example :
32 |
33 | ```bash
34 |
35 | ## Clone revealjs repository
36 | git clone https://github.com/hakimel/reveal.js.git
37 |
38 | ## Option 1
39 | cd ${your_presentation_directory}
40 | cp -r ${revealjs_directory}/plugin .
41 | cp -r ${revealjs_directory}/dist .
42 |
43 | ## Option 2
44 | cp ${your_presentation_directory}/* ${revealjs_directory}
45 |
46 | ```
47 |
48 | For full scale revealjs deployment refer
49 | [here](https://revealjs.com/installation/#full-setup)
50 |
51 | Now you can simply edit the presentation html, markdown files(when using an
52 | external markdown file) to build on top of the presentation.
53 |
54 | For exporting the presentation in pdf format, you can use
55 | [decktape](https://github.com/astefanutti/decktape#install), for example :
56 |
57 | ```bash
58 | decktape reveal test-presentation.html test_deck.pdf
59 | ```
60 |
61 | Exporting to .odp or .pptx formats is not supported but
62 | this [issue](https://github.com/hakimel/reveal.js/issues/1702) might help.
63 |
64 | ## Example
65 |
66 | Lets see this example of the `metal3-overview` presentation.
67 | First, let's see the list of files under the `metal3-overview` directory :
68 |
69 | ```diff
70 | tree metal3-overview/
71 |
72 | metal3-overview/
73 | ├── metal3-components.png
74 | ├── metal3-integration-capi.png
75 | ├── metal3-overview.html
76 | ├── slide-example-1.png
77 | ├── slide-example-2.png
78 | └── slide-example-3.png
79 | ```
80 |
81 | *`metal3-overview.html`* : is rendered with revealjs to create the slides, also
82 | contains the markdown content inline
83 | *`metal3 .png files`* : images that we created to be used in the slides
84 | *`slide .png files`* : snapshots of a few slides in the presentation
85 |
86 | In this example we have used an inline markdown for slides' content via
87 | ``, but we can also include
88 | that externally by specifying the external file like
89 | ``.There are variety of
90 | features available in the revealjs framework, for detailed documentation visit
91 | [revealjs official website](https://revealjs.com/).
92 |
--------------------------------------------------------------------------------
/docs/presentations/metal3-overview/metal3-components.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/docs/presentations/metal3-overview/metal3-components.png
--------------------------------------------------------------------------------
/docs/presentations/metal3-overview/metal3-integration-capi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/docs/presentations/metal3-overview/metal3-integration-capi.png
--------------------------------------------------------------------------------
/docs/presentations/metal3-overview/slide-example-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/docs/presentations/metal3-overview/slide-example-1.png
--------------------------------------------------------------------------------
/docs/presentations/metal3-overview/slide-example-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/docs/presentations/metal3-overview/slide-example-2.png
--------------------------------------------------------------------------------
/docs/presentations/metal3-overview/slide-example-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/docs/presentations/metal3-overview/slide-example-3.png
--------------------------------------------------------------------------------
/docs/user-guide/README.md:
--------------------------------------------------------------------------------
1 | # User-guide structure
2 |
3 | We use [mdbook](https://github.com/rust-lang/mdBook) project to build the user-guide.
4 | All the content should be located within `docs/user-guide/src/` directory.
5 |
6 | The structure of the book is based on the `SUMMARY.md` file.
7 | `book.toml` is used as the configuration file for the mdbook.
8 | Each GitHub project has its own directory to keep related documents.For example:
9 |
10 | - `docs/user-guide/src/bmo` for Baremetal Operator related content
11 | - `docs/user-guide/src/capm3` for Cluster-API-Provider-Metal3 related content
12 | - `docs/user-guide/src/ironic` for Ironic related content
13 | - `docs/user-guide/src/ipam` for Ip-address-manager related content
14 |
15 | Similarly, we have the copy of the OWNERS file from each project, which gives reviewer and approver rights on the docs to the same maintainers of the project:
16 |
17 | - `docs/user-guide/src/bmo/OWNERS`
18 | - `docs/user-guide/src/capm3/OWNERS`
19 | - `docs/user-guide/src/ironic/OWNERS`
20 | - `docs/user-guide/src/ipam/OWNERS`
21 |
22 | ## Automatic build process
23 |
24 | Netlify is configured to build the user-guide periodically from the current state of the main branch. As such, when there is a documentation change merged, at the latest it will be visible in the official user-guide the next day.
25 |
26 | Whenever build is triggered, Netlify will fetch the mdbook binary first and run `make build` to build the content.
27 | This generates HTML content to be published under `docs/user-guide/book` directory.
28 | Last step, Netlify publishes the final content from `docs/user-guide/book`.
29 | The final content is built on the fly by Netlify as such, we don't store it on GitHub.
30 |
31 | ## What's the URL of the current user-guide
32 |
33 | [https://book.metal3.io/](https://book.metal3.io/)
34 |
35 | ## How to check book content when reviewing a GitHub patch
36 |
37 | Netlify is configured to build the book from a pull request branch and it will be reported on the PR as `netlify/metal3-user-guide/deploy-preview`.
38 | As such, it helps reviewers to review the patch not as the markdown only but also as the final user-guide.
39 | Our Netlify configuration is in the [netlify.toml](https://github.com/metal3-io/metal3-docs/blob/main/netlify.toml).
40 |
41 | ## Mdbook maintenance
42 |
43 | All the configurations of the mdbook, such as content path, version, from where to get the binary while building the user-guide is defined in the [Makefile](https://github.com/metal3-io/metal3-docs/blob/main/Makefile).
44 |
45 | ```sh
46 | MDBOOK_BIN_VERSION ?= v0.4.15
47 | SOURCE_PATH := docs/user-guide
48 | CONTAINER_RUNTIME ?= sudo docker
49 | IMAGE_NAME := quay.io/metal3-io/mdbook
50 | IMAGE_TAG ?= latest
51 | HOST_PORT ?= 3000
52 | BIN_DIR := hack
53 | MDBOOK_BIN := $(BIN_DIR)/mdbook
54 | ...
55 | ```
56 |
57 | ## How to preview changes locally
58 |
59 | Before submitting document change, you can run the same mdbook binary to preview the book.
60 |
61 | 1. Install the mdbook by following official docs [here](https://rust-lang.github.io/mdBook/)
62 |
63 | 1. You can use serve command to preview the user-guide running at localhost:3000
64 |
65 | ```shell
66 | cd docs/user-guide/
67 | mdbook serve
68 | ```
69 |
70 | You should have the user-guide available now at `localhost:3000`.
71 | Also, the serve command watches the `src` directory for changes and rebuilds the user-guide for every change.
72 |
--------------------------------------------------------------------------------
/docs/user-guide/book.toml:
--------------------------------------------------------------------------------
1 | [book]
2 | authors = ["Metal³ community"]
3 | language = "en"
4 | multilingual = false
5 | src = "src"
6 | title = "Metal³ user-guide"
7 |
8 | [preprocessor.releasetags]
9 |
10 | [output.html]
11 | git-repository-url = "https://github.com/metal3-io/"
12 | curly-quotes = true
13 |
--------------------------------------------------------------------------------
/docs/user-guide/src/SUMMARY.md:
--------------------------------------------------------------------------------
1 | # Metal3 Project
2 |
3 | [comment]: After adding the releasetag preprocessor the build fails with output "thread 'main' panicked at '', src/utils/fs.rs:45:10" if there are links with empty paths.
4 |
5 | [Introduction](introduction.md)
6 |
7 | - [Project overview](project-overview.md)
8 | - [Quick-start](quick-start.md)
9 | - [Installing on Baremetal](baremetal/guide.md)
10 | - [Baremetal Operator](bmo/introduction.md)
11 | - [Install Baremetal Operator](bmo/install_baremetal_operator.md)
12 | - [Host State Machine](bmo/state_machine.md)
13 | - [Supported Hardware](bmo/supported_hardware.md)
14 | - [Basic Features](bmo/features.md)
15 | - [Provisioning and Deprovisioning](bmo/provisioning.md)
16 | - [Automated Cleaning](bmo/automated_cleaning.md)
17 | - [Automatic Secure Boot](bmo/automatic_secure_boot.md)
18 | - [Controlling Inspection](bmo/inspect_annotation.md)
19 | - [Firmware Settings](bmo/firmware_settings.md)
20 | - [Firmware Updates](bmo/firmware_updates.md)
21 | - [Instance Customization](bmo/instance_customization.md)
22 | - [Live Updates (Servicing)](bmo/live_updates_servicing.md)
23 | - [RAID Setup](bmo/raid.md)
24 | - [Rebooting Hosts](bmo/reboot_annotation.md)
25 | - [Specifying Root Device](bmo/root_device_hints.md)
26 | - [Advanced Features](bmo/features.md)
27 | - [Adopting Externally Provisioned Hosts](bmo/externally_provisioned.md)
28 | - [Advanced Instance Customization](bmo/advanced_instance_customization.md)
29 | - [Booting from Live ISO](bmo/live-iso.md)
30 | - [Detaching Hosts from Provisioner](bmo/detached_annotation.md)
31 | - [External Inspection](bmo/external_inspection.md)
32 | - [Reconstructing Host Status](bmo/status_annotation.md)
33 | - [Ironic in Metal3](ironic/introduction.md)
34 | - [Install Ironic](ironic/ironic_installation.md)
35 | - [Ironic python agent](ironic/ironic-python-agent.md)
36 | - [Ironic container images](ironic/ironic-container-images.md)
37 | - [Ironic Standalone Operator](irso/introduction.md)
38 | - [Install Ironic with IrSO](irso/install-basics.md)
39 | - [External Database for Ironic](irso/database.md)
40 | - [Cluster-api-provider-metal3](capm3/introduction.md)
41 | - [Install Metal³ provider](capm3/installation_guide.md)
42 | - [Features](capm3/features.md)
43 | - [Remediation](capm3/remediaton.md)
44 | - [Node Reuse](capm3/node_reuse.md)
45 | - [Pivoting](capm3/pivoting.md)
46 | - [Automated cleaning](capm3/automated_cleaning.md)
47 | - [Ip-address-manager](ipam/introduction.md)
48 | - [Install Ip-address-manager](ipam/ipam_installation.md)
49 | - [Troubleshooting FAQ](troubleshooting.md)
50 | - [Try it (for developers)](developer_environment/tryit.md)
51 | - [API Reference](reference.md)
52 | - [Version Support](version_support.md)
53 | - [Project Security Policy](security_policy.md)
54 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/advanced_instance_customization.md:
--------------------------------------------------------------------------------
1 | # Instance Customization
2 |
3 | Below we cover more advanced instance customization, more complex use-cases
4 | and/or where customization of the metal3 deployment may be required.
5 |
6 | For more general guidance around instance customization refer to the
7 | [instance customization](./instance_customization.md) section.
8 |
9 | ## Pre-Provisioning NetworkData
10 |
11 | *Pre-provisioning network data* describes the desired networking configuration for the
12 | deploy ramdisk running `ironic-python-agent` (IPA).
13 |
14 | Usage of this API requires an IPA ramdisk image with a tool capable of interpreting and
15 | applying the data such as *cloud-init*, *Glean* or alternative. The default community
16 | supported ramdisk does not currently contain such a tool, but it is possible to build
17 | a custom image, for example using [ironic-python-agent-builder][ipa_builder] with the
18 | [simple-init][simple_init] element enabled.
19 |
20 | Specifying pre-provisioning network data is useful in DHCP-less scenarios, where we
21 | cannot rely on DHCP to provide network configuration for the IPA ramdisk during the
22 | inspection and provisioning phases. In this situation we can use redfish virtualmedia
23 | to boot the IPA ramdisk, and the generated virtualmedia ISO will also serve as a
24 | configuration drive to provide the network configuration.
25 |
26 | The data is specified in the [OpenStack network_data.json][network_data] format
27 | as described for *Network data* in the [instance customization](./instance_customization.md) section.
28 |
29 | Usually, one pre-provisioning network data secret is created per host and should be
30 | linked to it like *Network data*. If you require the same configuration for
31 | pre-provisioning and the deployed OS, it is only necessary to specify pre-provisioning
32 | network data - the pre-provisioning secret is automatically applied to networkData if
33 | no alternative secret is specified.
34 |
35 | For example, given a local file `host-0-network.json`, you can create a secret:
36 |
37 | ```bash
38 | kubectl create secret generic host-0-preprov-networkdata --from-file=networkData=host-0-network.json
39 | ```
40 |
41 | Then you can attach it to the host during its enrollment:
42 |
43 | ```yaml
44 | apiVersion: metal3.io/v1alpha1
45 | kind: BareMetalHost
46 | metadata:
47 | name: host-0
48 | namespace: my-cluster
49 | spec:
50 | online: true
51 | bootMACAddress: 80:c1:6e:7a:e8:10
52 | bmc:
53 | address: redfish-virtualmedia://192.168.1.13
54 | credentialsName: host-0-bmc
55 | preprovisioningNetworkDataName: host-0-preprov-networkdata
56 | ```
57 |
58 | [network_data]: https://docs.openstack.org/nova/latest/user/metadata.html#openstack-format-metadata
59 | [ipa_builder]: https://docs.openstack.org/ironic-python-agent-builder/
60 | [simple_init]: https://docs.openstack.org/diskimage-builder/latest/elements/simple-init/README.html
61 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/automated_cleaning.md:
--------------------------------------------------------------------------------
1 | # Automated Cleaning
2 |
3 | One of the Ironic's feature exposed to Metal3 Baremetal Operator is [node
4 | automated
5 | cleaning](https://docs.openstack.org/ironic/latest/admin/cleaning.html#automated-cleaning).
6 | When enabled, automated cleaning kicks off when a node is provisioned first
7 | time and on every deprovisioning.
8 |
9 | There are two automated cleaning modes available which can be configured via
10 | `automatedCleaningMode` field of a BareMetalHost `spec`:
11 |
12 | - `metadata` (the default) enables the removal of partitioning tables from all
13 | disks
14 | - `disabled` disables the cleaning process
15 |
16 | For example:
17 |
18 | ```yaml
19 | apiVersion: metal3.io/v1alpha1
20 | kind: BareMetalHost
21 | metadata:
22 | name: example-host
23 | spec:
24 | automatedCleaningMode: metadata
25 | bootMACAddress: 00:8a:b6:8e:ac:b8
26 | bmc:
27 | address: ipmi://192.168.111.1:6230
28 | credentialsName: example-node-bmc-secret
29 | online: true
30 | ```
31 |
32 | **Note:** Ironic supports full data removal, which is not currently exposed in
33 | Metal3.
34 |
35 | For a host with cleaning disabled, no cleaning will be performed during
36 | deprovisioning. This is faster but may cause conflicts on subsequent
37 | provisionings (e.g. Ceph is known not to tolerate stale data partitions).
38 |
39 | **Warning:** when disabling cleaning, consider setting [root device
40 | hints](root_device_hints.md) to specify the exact block device to install to.
41 | Otherwise, subsequent provisionings may end up with different root devices,
42 | potentially causing incorrect configuration because of duplicated [config
43 | drives](instance_customization.md).
44 |
45 | If you are using Cluster-api-provider-metal3, please see [its cleaning
46 | documentation](../capm3/automated_cleaning.md).
47 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/automatic_secure_boot.md:
--------------------------------------------------------------------------------
1 | # Automatic secure boot
2 |
3 | The automatic secure boot feature allows enabling and disabling UEFI (Unified Extensible Firmware Interface) secure boot when provisioning a host. This feature requires supported hardware and compatible OS image. The current hardwares that support enabling UEFI secure boot are `iLO`, `iRMC` and `Redfish` drivers.
4 |
5 | Check also:
6 |
7 | - [Ironic UEFI secure boot](https://docs.openstack.org/ironic/latest/admin/security.html#uefi-secure-boot-mode)
8 | - [Wikipedia UEFI secure boot](https://en.wikipedia.org/wiki/UEFI#SECURE-BOOT)
9 |
10 | ## Why do we need it
11 |
12 | We need the Automatic secure boot when provisioning a host with high security requirements. Based on checksum and signature, the secure boot protects the host from loading malicious code in the boot process before loading the provisioned operating system.
13 |
14 | ## How to use it
15 |
16 | To enable Automatic secure boot, first check if hardware is supported and then specify the value `UEFISecureBoot` for `bootMode` in the BareMetalHost custom resource. Please note, it is enabled before booting into the deployed instance and disabled when the ramdisk is running and on tear down. Below you can check the example:
17 |
18 | ```YAML
19 | apiVersion: metal3.io/v1alpha1
20 | kind: BareMetalHost
21 | metadata:
22 | name: node-1
23 | spec:
24 | online: true
25 | bootMACAddress: 00:5c:52:31:3a:9c
26 | bootMode: UEFISecureBoot
27 | ...
28 | ```
29 |
30 | This will enable UEFI before booting the instance and disable it when deprovisioned. Note that the default value for `bootMode` is `UEFI`.
31 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/detached_annotation.md:
--------------------------------------------------------------------------------
1 | # Detaching Hosts from Provisioner
2 |
3 | The detached annotation provides a way to prevent management of a BareMetalHost.
4 | It works by deleting the host information from Ironic without triggering deprovisioning.
5 | The BareMetal Operator will recreate the host in Ironic again once the annotation is removed.
6 | This annotation can be used with BareMetalHosts in `Provisioned`, `ExternallyProvisioned` or `Available` states.
7 |
8 | Normally, deleting a BareMetalHost will always trigger deprovisioning.
9 | This can be problematic and unnecessary if we just want to, for example, move the BareMetalHost from one cluster to another.
10 | By applying the annotation before removing the BareMetalHost from the old cluster, we can ensure that the host is not disrupted by this (normally it would be deprovisioned).
11 | The next step is then to recreate it in the new cluster without triggering a new inspection.
12 | See the [status annotation page](./status_annotation.md) for how to do this.
13 |
14 | The detached annotation is also useful if you want to move the host under
15 | control of a different management system without fully removing it from
16 | BareMetal Operator. Particularly, detaching a host stops Ironic from trying to
17 | enforce its power state as per the `online` field.
18 |
19 | For more details, please see the [design proposal](https://github.com/metal3-io/metal3-docs/blob/main/design/baremetal-operator/detached-annotation.md).
20 |
21 | ## How to detach
22 |
23 | The annotation key is `baremetalhost.metal3.io/detached` and the value can be anything (it is ignored).
24 | Here is an example:
25 |
26 | ```yaml
27 | apiVersion: metal3.io/v1alpha1
28 | kind: BareMetalHost
29 | metadata:
30 | name: example
31 | annotations:
32 | baremetalhost.metal3.io/detached: ""
33 | spec:
34 | online: true
35 | bootMACAddress: 00:8a:b6:8e:ac:b8
36 | bootMode: legacy
37 | bmc:
38 | address: ipmi://192.168.111.1:6230
39 | credentialsName: example-bmc-secret
40 | ...
41 | ```
42 |
43 | Now wait for the `operationalStatus` field to become `detached`.
44 |
45 | ## How to attach again
46 |
47 | If you want to attach a previously detached host, remove the annotation and
48 | wait for the `operationalStatus` field to become `OK`.
49 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/external_inspection.md:
--------------------------------------------------------------------------------
1 | # External inspection
2 |
3 | Similar to the [status annotation](status_annotation.md), external inspection makes it possible to skip the inspection step.
4 | The difference is that the status annotation can only be used on the very first reconcile and allows setting all the fields under `status`.
5 | In contrast, external inspection limits the changes so that only HardwareDetails can be modified, and it can be used at any time when inspection is disabled (with the `inspect.metal3.io: disabled` annotation) or when there is no existing HardwareDetails data.
6 |
7 | External inspection is controlled through an annotation on the BareMetalHost.
8 | The annotation key is `inspect.metal3.io/hardwaredetails` and the value is a JSON representation of the BareMetalHosts `status.hardware` field.
9 |
10 | Here is an example with a BMH that has inspection disabled and is using the external inspection feature to add the HardwareDetails.
11 |
12 | ```yaml
13 | apiVersion: metal3.io/v1alpha1
14 | kind: BareMetalHost
15 | metadata:
16 | name: node-0
17 | namespace: metal3
18 | annotations:
19 | inspect.metal3.io: disabled
20 | inspect.metal3.io/hardwaredetails: |
21 | {"systemVendor":{"manufacturer":"QEMU", "productName":"Standard PC (Q35 + ICH9, 2009)","serialNumber":""}, "firmware":{"bios":{"date":"","vendor":"","version":""}},"ramMebibytes":4096, "nics":[{"name":"eth0","model":"0x1af4 0x0001","mac":"00:b7:8b:bb:3d:f6", "ip":"172.22.0.64","speedGbps":0,"vlanId":0,"pxe":true}], "storage":[{"name":"/dev/sda","rotational":true,"sizeBytes":53687091200, "vendor":"QEMU", "model":"QEMU HARDDISK","serialNumber":"drive-scsi0-0-0-0", "hctl":"6:0:0:0"}],"cpu":{"arch":"x86_64", "model":"Intel Xeon E3-12xx v2 (IvyBridge)","clockMegahertz":2494.224, "flags":["foo"],"count":4},"hostname":"hwdAnnotation-0"}
22 | spec:
23 | ...
24 | ```
25 |
26 | Why is this needed?
27 |
28 | - It allows avoiding an extra reboot for live-images that include their own inspection tooling.
29 | - It provides an arguably safer alternative to the status annotation in some cases.
30 |
31 | Caveats:
32 |
33 | - If both `baremetalhost.metal3.io/status` and `inspect.metal3.io/hardwaredetails` are specified on BareMetalHost creation, `inspect.metal3.io/hardwaredetails` will take precedence and overwrite any hardware data specified via `baremetalhost.metal3.io/status`.
34 | - If the BareMetalHost is in the `Available` state the controller will not attempt to match profiles based on the annotation.
35 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/externally_provisioned.md:
--------------------------------------------------------------------------------
1 | # Adopting Externally Provisioned Hosts
2 |
3 | BareMetal Operator allows enrolling hosts that have been previously provisioned
4 | by a 3rd party without making them go through inspection, cleaning and
5 | re-provisioning. Hosts are enrolled as usual, additionally setting the
6 | `externallyProvisioned` field to `true`:
7 |
8 | ```yaml
9 | apiVersion: metal3.io/v1alpha1
10 | kind: BareMetalHost
11 | metadata:
12 | name: host-0
13 | namespace: my-cluster
14 | spec:
15 | online: true
16 | bootMACAddress: 80:c1:6e:7a:e8:10
17 | bmc:
18 | address: ipmi://192.168.1.13
19 | credentialsName: host-0-bmc
20 | externallyProvisioned: true
21 | ```
22 |
23 | Such hosts move from the `registering` provisioning state directly into
24 | `externally provisioned` as shows in the [state machine](./state_machine.md):
25 |
26 | ```yaml
27 | status:
28 | # ...
29 | operationalStatus: OK
30 | provisioning:
31 | ID: 8799e0d2-d2ca-4681-9385-e8bd69f6f441
32 | bootMode: UEFI
33 | image:
34 | url: ""
35 | state: externally provisioned
36 | ```
37 |
38 | **Note:** while it's currently not possible to get a host out of the
39 | `externally provisioned` state, it's better to future-proof your hosts by
40 | adding a real `image` field so that your externally provisioned hosts look
41 | exactly like normal ones.
42 |
43 | ## Available actions
44 |
45 | Currently, only a limited set of actions is possible on externally provisioned
46 | hosts:
47 |
48 | - Powering on and off using the `online` field.
49 | - Rebooting using the [reboot annotation](./reboot_annotation.md).
50 | - [Live updates (servicing)](./live_updates_servicing.md).
51 | - Deletion without cleaning (the host is only powered off).
52 |
53 | **Warning:** changing the `externallyProvisioned` field back to `false` is
54 | currently not supported (see the [tracker
55 | bug](https://github.com/metal3-io/baremetal-operator/issues/2465)).
56 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/features.md:
--------------------------------------------------------------------------------
1 | # Baremetal Operator features
2 |
3 | ## Basic features
4 |
5 | - [Provisioning and Deprovisioning](./provisioning.md)
6 | - [Automated Cleaning](./automated_cleaning.md)
7 | - [Automatic Secure Boot](./automatic_secure_boot.md)
8 | - [Controlling Inspection](./inspect_annotation.md)
9 | - [Firmware Settings](./firmware_settings.md)
10 | - [Firmware Updates](./firmware_updates.md)
11 | - [Instance Customization](./instance_customization.md)
12 | - [RAID Setup](./raid.md)
13 | - [Rebooting Hosts](./reboot_annotation.md)
14 | - [Specifying Root Device](./root_device_hints.md)
15 |
16 | ## Advanced features
17 |
18 | - [Adopting Externally Provisioned Hosts](./externally_provisioned.md)
19 | - [Advanced Instance Customization](./advanced_instance_customization.md)
20 | - [Booting from Live ISO](./live-iso.md)
21 | - [Detaching Hosts from Provisioner](./detached_annotation.md)
22 | - [External Inspection](./external_inspection.md)
23 | - [Reconstructing Host Status](./status_annotation.md)
24 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/images/ipa-provisioning.plantuml:
--------------------------------------------------------------------------------
1 | @startuml
2 | title: Cluster API - full flow
3 | skinparam BackgroundColor transparent
4 |
5 | participant User as User
6 | participant "CAPI Provider Metal3" as CAPM3
7 | participant "BareMetal Operator" as BMO
8 | participant Ironic as Ironic
9 | participant "Physical Server" as Server
10 |
11 | group "Inspection"
12 | User -> BMO: Create BareMetalHost
13 | BMO -> Ironic: Create a Node
14 | BMO -> Ironic: Start Inspection
15 | Ironic -> Server: Boot the service ramdisk
16 | Server -> Ironic: Send back hardware details
17 | Ironic -> BMO: Success!
18 | BMO -> Ironic: Fetch hardware details
19 |
20 | group "Machine Provisioning"
21 | User -> CAPM3: Create Metal3Machine
22 | CAPM3 <-> BMO: Pick a suitable BareMetalHost
23 | CAPM3 -> BMO: Populate image and user data
24 | BMO -> Ironic: Start Provisioning
25 | Ironic -> Server: Clean the disks
26 | Ironic -> Server: Start Provisioning
27 | Server -> Ironic: Fetch image
28 | Ironic -> Server: Reboot into instance
29 | Ironic -> BMO: Success!
30 | BMO -> CAPM3: Success!
31 | @enduml
32 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/images/ipa-provisioning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/docs/user-guide/src/bmo/images/ipa-provisioning.png
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/images/provisioning-states.dot:
--------------------------------------------------------------------------------
1 | digraph BaremetalHost {
2 | Created [shape=house]
3 | Created -> Unmanaged [label="BMC.* == \"\""]
4 | Created -> Registering [label="BMC.* != \"\""]
5 |
6 | Unmanaged [shape=doublecircle]
7 | Unmanaged -> Registering [label="BMC.* != \"\""]
8 | Unmanaged -> Deleting [label="!DeletionTimestamp.IsZero()"]
9 |
10 | ExternallyProvisioned [label="Externally\nProvisioned"]
11 |
12 | Registering -> Inspecting [label="!externallyProvisioned &&\nNeedsHardwareInspection()"]
13 | Registering -> Preparing [label="!externallyProvisioned &&\ninspectionDisabled()"]
14 | Registering -> ExternallyProvisioned [label="externallyProvisioned"]
15 | Registering -> Deleting [label="!DeletionTimestamp.IsZero()"]
16 |
17 | /* NOTE(dtantsur): this is currently broken: https://github.com/metal3-io/baremetal-operator/issues/2465
18 | ExternallyProvisioned -> Inspecting [label="!externallyProvisioned &&\nNeedsHardwareInspection()"]
19 | ExternallyProvisioned -> Preparing [label="!externallyProvisioned &&\n!NeedsHardwareInspection()"]
20 | */
21 | Available -> ExternallyProvisioned [label="externallyProvisioned"]
22 |
23 | Inspecting -> Preparing [label="done"]
24 | Inspecting -> PoweringOffBeforeDelete [label="!DeletionTimestamp.IsZero()"]
25 |
26 | Preparing -> Available [label="done"]
27 | Preparing -> PoweringOffBeforeDelete [label="!DeletionTimestamp.IsZero()"]
28 |
29 | Available [shape=doublecircle]
30 | Available -> Provisioning [label="NeedsProvisioning()"]
31 | Available -> Preparing [label="saveHostProvisioningSettings()"]
32 | Available -> Preparing [label="getHostFirmwareSettings()"]
33 | Available -> PoweringOffBeforeDelete [label="!DeletionTimestamp.IsZero()"]
34 | Available -> Inspecting [label="hasInspectAnnotation()"]
35 |
36 | Deleting7 [shape=point]
37 |
38 | Provisioning -> Provisioned [label=done]
39 | Provisioning -> Deprovisioning [label="failed ||\n!DeletionTimestamp.IsZero()"]
40 |
41 | Provisioned [shape=doublecircle]
42 | Provisioned -> Deprovisioning [label="provisioningCancelled()"]
43 | Provisioned -> Deprovisioning [label="!DeletionTimestamp.IsZero()"]
44 |
45 | ExternallyProvisioned [shape=doublecircle]
46 | ExternallyProvisioned -> PoweringOffBeforeDelete [label="!DeletionTimestamp.IsZero()"]
47 |
48 | Deprovisioning -> Provisioning [label="NeedsProvisioning()"]
49 | Deprovisioning -> Available [label="!NeedsProvisioning()"]
50 |
51 | Deprovisioning -> PoweringOffBeforeDelete
52 | PoweringOffBeforeDelete -> Deleting
53 |
54 | Deleting [shape=doublecircle]
55 | }
56 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/images/provisioning-states.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/docs/user-guide/src/bmo/images/provisioning-states.png
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/inspect_annotation.md:
--------------------------------------------------------------------------------
1 | # Controlling Inspection
2 |
3 | ## Re-running inspection
4 |
5 | The inspect annotation can be used to request the BareMetal Operator to
6 | (re-)inspect an `available` BareMetalHost, for example, when the hardware
7 | changes. If an inspection request is made while the host is any other
8 | state than `available`, the request will be ignored.
9 |
10 | To request a new inspection, simply annotate the host with `inspect.metal3.io`.
11 | Once inspection is requested, you should see the BMH in `inspecting` state
12 | until inspection is completed, and by the end of inspection the
13 | `inspect.metal3.io` annotation will be removed automatically.
14 |
15 | Here is an example:
16 |
17 | ```yaml
18 | apiVersion: metal3.io/v1alpha1
19 | kind: BareMetalHost
20 | metadata:
21 | name: example
22 | annotations:
23 | # The inspect annotation with no value
24 | inspect.metal3.io: ""
25 | spec:
26 | ...
27 | ```
28 |
29 | ## Disabling inspection
30 |
31 | If you do not need the HardwareData collected by inspection, you can disable it
32 | by setting the `inspect.metal3.io` annotation to `disabled`, for example:
33 |
34 | ```yaml
35 | apiVersion: metal3.io/v1alpha1
36 | kind: BareMetalHost
37 | metadata:
38 | name: example
39 | annotations:
40 | inspect.metal3.io: disabled
41 | spec:
42 | ...
43 | ```
44 |
45 | For advanced use cases, such as providing externally gathered inspection data,
46 | see [external inspection](./external_inspection.md).
47 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/introduction.md:
--------------------------------------------------------------------------------
1 | # Bare Metal Operator
2 |
3 | The Bare Metal Operator (BMO) is a Kubernetes controller that manages
4 | bare-metal hosts, represented in Kubernetes by `BareMetalHost` (BMH) *custom
5 | resources*.
6 |
7 | BMO is responsible for the following operations:
8 |
9 | - Inspecting the host’s hardware and reporting the details on the corresponding
10 | BareMetalHost. This includes information about CPUs, RAM, disks, NICs, and
11 | more.
12 | - Optionally preparing the host by configuring RAID, changing firmware settings
13 | or updating the system and/or BMC firmware.
14 | - Provisioning the host with a desired image.
15 | - Cleaning the host’s disk contents before and after provisioning.
16 |
17 | Under the hood, BMO uses [Ironic](../ironic/introduction) to conduct these
18 | actions.
19 |
20 | ## Enrolling BareMetalHosts
21 |
22 | To enroll a bare-metal machine as a `BareMetalHost`, you need to know at least
23 | the following properties:
24 |
25 | 1. The IP address and credentials of the BMC - the remote management controller
26 | of the host.
27 | 2. The protocol that the BMC understands. Most common are IPMI and Redfish.
28 | See [supported hardware](supported_hardware) for more details.
29 | 3. Boot technology that can be used with the host and the chosen protocol.
30 | Most hardware can use network booting, but some Redfish implementations also
31 | support virtual media (CD) boot.
32 | 4. MAC address that is used for booting. **Important:** it's a MAC address of
33 | an actual NIC of the host, not the BMC MAC address.
34 | 5. The desired boot mode: UEFI or legacy BIOS. UEFI is the default and should
35 | be used unless there are serious reasons not to.
36 |
37 | This is a minimal example of a valid BareMetalHost:
38 |
39 | ```yaml
40 | apiVersion: metal3.io/v1alpha1
41 | kind: BareMetalHost
42 | metadata:
43 | name: node-0
44 | namespace: metal3
45 | spec:
46 | bmc:
47 | address: ipmi://192.168.111.1:6230
48 | credentialsName: node-0-bmc-secret
49 | bootMACAddress: 00:5a:91:3f:9a:bd
50 | online: true
51 | ```
52 |
53 | When this resource is created, it will undergo *inspection* that will populate
54 | more fields as part of the `status`.
55 |
56 | ## Deploying BareMetalHosts
57 |
58 | To provision a bare-metal machine, you will need a few more properties:
59 |
60 | 1. The URL and checksum of the image. Images should be in QCOW2 or raw format.
61 | It is common to use various cloud images with BMO, e.g.
62 | [Ubuntu](https://cloud-images.ubuntu.com/) or
63 | [CentOS](https://cloud.centos.org/centos/). **Important:** not all images
64 | are compatible with UEFI boot - check their description.
65 | 2. Optionally, user data: a secret with a configuration or a script that is
66 | interpreted by the first-boot service embedded in your image. The most
67 | common service is
68 | [cloud-init](https://cloudinit.readthedocs.io/en/latest/index.html), some
69 | distributions use [ignition](https://coreos.github.io/ignition/).
70 | 3. Optionally, network data: a secret with the network configuration that is
71 | interpreted by the first-boot service. In some cases, the network data is
72 | embedded in the user data instead.
73 |
74 | Here is a complete example of a host that will be provisioned with a CentOS 9
75 | image:
76 |
77 | ```yaml
78 | apiVersion: metal3.io/v1alpha1
79 | kind: BareMetalHost
80 | metadata:
81 | name: node-0
82 | namespace: metal3
83 | spec:
84 | bmc:
85 | address: ipmi://192.168.111.1:6230
86 | credentialsName: node-0-bmc-secret
87 | bootMACAddress: 00:5a:91:3f:9a:bd
88 | image:
89 | checksum: http://172.22.0.1/images/CENTOS_9_NODE_IMAGE_K8S_v1.33.0.qcow2.sha256sum
90 | url: http://172.22.0.1/images/CENTOS_9_NODE_IMAGE_K8S_v1.33.0.qcow2
91 | networkData:
92 | name: test1-workers-tbwnz-networkdata
93 | namespace: metal3
94 | online: true
95 | userData:
96 | name: test1-workers-vd4gj
97 | namespace: metal3
98 | status:
99 | hardware:
100 | cpu:
101 | arch: x86_64
102 | count: 2
103 | hostname: node-0
104 | nics:
105 | - ip: 172.22.0.73
106 | mac: 00:5a:91:3f:9a:bd
107 | name: enp1s0
108 | ramMebibytes: 4096
109 | storage:
110 | - hctl: "0:0:0:0"
111 | name: /dev/sda
112 | serialNumber: drive-scsi0-0-0-0
113 | sizeBytes: 53687091200
114 | type: HDD
115 | ```
116 |
117 | ## Integration with the cluster API
118 |
119 | [CAPM3](../capm3/introduction) is the Metal3 component that is responsible for
120 | integration between Cluster API resources and BareMetalHosts. When using Metal3
121 | with CAPM3, you will enroll BareMetalHosts as described above first, then use
122 | `Metal3MachineTemplate` to describe how hosts should be deployed, i.e. which
123 | images and user data to use.
124 |
125 | This happens for example when the user scales a MachineDeployment so that the
126 | server should be added to the cluster, or during an upgrade when it must change
127 | the image it is booting from:
128 |
129 | 
130 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/ironic_bmo_configmap.env:
--------------------------------------------------------------------------------
1 | # HTTP_PORT=6180
2 | PROVISIONING_INTERFACE=eth2
3 | DHCP_RANGE=172.22.0.10,172.22.0.100
4 | DEPLOY_KERNEL_URL=http://172.22.0.2:6180/images/ironic-python-agent.kernel
5 | DEPLOY_RAMDISK_URL=http://172.22.0.2:6180/images/ironic-python-agent.initramfs
6 | IRONIC_ENDPOINT=http://172.22.0.2:6385/v1/
7 | # IRONIC_INSPECTOR_ENDPOINT=http://172.22.0.2:5050/v1/
8 | CACHEURL=http://172.22.0.1/images
9 | IRONIC_FAST_TRACK=true
10 | IRONIC_KERNEL_PARAMS=console=ttyS0
11 | IRONIC_INSPECTOR_VLAN_INTERFACES=all
12 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/live-iso.md:
--------------------------------------------------------------------------------
1 | # Booting from Live ISO
2 |
3 | The live-iso API in Metal3 allows booting a BareMetalHost with an ISO image
4 | instead of writing an image to the local disk using the IPA deploy ramdisk.
5 |
6 | This feature has two primary use cases:
7 |
8 | - Running ephemeral load on hosts (e.g. calculations or simulations that do not
9 | store local data).
10 | - Integrating a 3rd party installer (e.g. [coreos
11 | installer](https://docs.fedoraproject.org/en-US/fedora-coreos/bare-metal/)).
12 |
13 | **Warning:** this feature is designed to work with virtual media (see
14 | [supported hardware](./supported_hardware.md). While it's possible to boot an
15 | ISO over iPXE, the booted OS will not be able to access any data on the ISO
16 | except for the kernel and initramfs it booted from.
17 |
18 | To boot a live ISO, you need to set the image URL to the location of the ISO
19 | and set the `format` field to `live-iso`, for example:
20 |
21 | ```yaml
22 | apiVersion: metal3.io/v1alpha1
23 | kind: BareMetalHost
24 | metadata:
25 | name: live-iso-booted-node
26 | spec:
27 | bootMACAddress: 80:c1:6e:7a:e8:10
28 | bmc:
29 | address: redfish-virtualmedia://192.168.111.1:8000/redfish/v1/Systems/1
30 | credentialsName: live-iso-booted-node-secret
31 | image:
32 | url: http://1.2.3.4/image.iso
33 | format: live-iso
34 | online: true
35 | ```
36 |
37 | **Note**: `image.checksum`, `rootDeviceHints`, `networkData` and `userData`
38 | will not be used since the image is not written to disk.
39 |
40 | For more details, please see the [design proposal](https://github.com/metal3-io/metal3-docs/blob/main/design/baremetal-operator/bmh_live_iso.md).
41 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/live_updates_servicing.md:
--------------------------------------------------------------------------------
1 | # Live updates (servicing)
2 |
3 | Live updates (servicing) enables baremetal-operator to conduct certain actions
4 | on already provisioned BareMetalHosts. These actions currently include:
5 |
6 | - [configuring firmware settings](./firmware_settings.md)
7 | - [updating BIOS and/or BMC firmware](./firmware_updates.md)
8 |
9 | Live updates (servicing) is an opt-in feature. Operators may enable this
10 | feature by creating a `HostUpdatePolicy` custom resource.
11 |
12 | ## HostUpdatePolicy custom resource definition
13 |
14 | HostUpdatePolicy is the custom resource which controls applying live updates.
15 | Each part of the functionality can be controlled separately by setting the
16 | respective entry in the HostUpdatePolicy spec:
17 |
18 | - `firmwareSettings` - controls changes to firmware settings
19 | - `firmwareUpdates` - controls BIOS and BMC firmware updates
20 |
21 | ### Allowed values for firmwareSettings and firmwareUpdates fields
22 |
23 | Each of the fields can be set to one of the two values:
24 |
25 | - `onReboot` - enables performing the requested change on next reboot, or
26 | - `onPreparing` - (default setting) limits applying this type of change to
27 | Preparing state (which only applies to nodes which are being provisioned)
28 |
29 | ### Example HostUpdatePolicy definition
30 |
31 | Here is an example of a HostUpdatePolicy CRD:
32 |
33 | ```yaml
34 | apiVersion: metal3.io/v1alpha1
35 | kind: HostUpdatePolicy
36 | metadata:
37 | name: ostest-worker-0
38 | namespace: openshift-machine-api
39 | spec:
40 | firmwareSettings: onReboot
41 | firmwareUpdates: onReboot
42 | ```
43 |
44 | ## How to perform Live updates on a BareMetalHost
45 |
46 | - create a HostUpdatePolicy resource with the name matching the BMH to be
47 | updated
48 | - use the format above, ensure `firmwareSettings` and/or `firmwareUpdates` is
49 | set to `onReboot`
50 | - make changes to [HostFirmwareSettings](./firmware_settings.md) and/or [HostFirmwareComponents](./firmware_updates.md) as required
51 | - make sure the modified resources are considered valid (see `Conditions`)
52 | - if you're updating a Kubernetes node, make sure to drain it and mark as
53 | not schedulable
54 | - issue a reboot request via the [reboot annotation](./reboot_annotation.md)
55 | - wait for the `operationalStatus` to become `OK` again
56 | - if you're updating a Kubernetes node, make it schedulable again
57 |
58 | ### Example commands
59 |
60 | Below commands may be used to perform servicing operation on a bareMetalHost:
61 |
62 | ```yaml
63 | cat << EOF > hup.yaml
64 | apiVersion: metal3.io/v1alpha1
65 | kind: HostUpdatePolicy
66 | metadata:
67 | name: ostest-worker-0
68 | namespace: openshift-machine-api
69 | spec:
70 | firmwareSettings: onReboot
71 | firmwareUpdates: onReboot
72 | EOF
73 | ```
74 |
75 | ```console
76 | kubectl apply -f hup.yaml
77 |
78 | kubectl patch hostfirmwaresettings ostest-worker-0 --type merge -p \
79 | '{"spec": {"settings": {"QuietBoot": "true"}}}'
80 |
81 | kubectl patch hostfirmwarecomponents ostest-worker-0 --type merge -p \
82 | '{"spec": {"updates": [{"component": "bios",
83 | "url": "http://10.6.48.30:8080/firmimgFIT.d9"}]}}'
84 |
85 | kubectl cordon worker-0
86 |
87 | kubectl annotate bmh ostest-worker-0 reboot.metal3.io=""
88 | ```
89 |
90 | Once the operation is complete, the node can be un-drained with the below command:
91 |
92 | ```console
93 | kubectl uncordon worker-0
94 | ```
95 |
96 | ### Resulting workflow
97 |
98 | Once changes similar to the above are made to the relevant CRDs, the following
99 | will occur:
100 |
101 | - BMO will generate [servicing steps](https://docs.openstack.org/ironic/latest/admin/servicing.html) (similar to manual cleaning steps)
102 | required to perform the requested changes
103 | - BMH will transition to `servicing` operationalStatus
104 | - BMO will make calls to Ironic which will perform the servicing operation
105 | - Ironic will reboot the BMH into the IPA image and perform requested changes
106 | - depending on the hardware, more than one reboot may be required
107 | - once servicing completes, BMO will update the operationalStatus to `OK`
108 | - in case errors are encountered, BMO will set operationalStatus to `error`,
109 | set errorMessage to the explanation of the error, and retry the operation after
110 | a short delay
111 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/reboot_annotation.md:
--------------------------------------------------------------------------------
1 | # Rebooting hosts
2 |
3 | The reboot annotation can be used for rebooting BareMetalHosts in the
4 | `provisioned` state. The annotation key takes either of the following forms:
5 |
6 | - `reboot.metal3.io`
7 | - `reboot.metal3.io/{key}`
8 |
9 | **Note:** use the `online` field to power hosts on/off instead of rebooting.
10 |
11 | ## Simple reboot
12 |
13 | In its basic form (`reboot.metal3.io`), the annotation will trigger a reboot of
14 | the BareMetalHost. The controller will remove the annotation as soon as it has
15 | restored power to the host.
16 |
17 | The annotation value should be a JSON map containing the key `mode` and a value
18 | `hard` or `soft` to indicate if a hard or soft reboot should be performed. If
19 | the value is an empty string, the default is to first try a soft reboot, and if
20 | that fails, do a hard reboot.
21 |
22 | ## Phased reboot
23 |
24 | The advanced form (`reboot.metal3.io/{key}`) includes a unique suffix
25 | (indicated with `{key}`). In this form the host will be kept in `PoweredOff`
26 | state until the annotation has been removed. This can be useful if some tasks
27 | needs to be performed while the host is in a known stable state. The purpose
28 | of the `{key}` is to allow multiple clients to use the API simultaneously in a
29 | safe way. Each client chooses a key and touches only the annotations that has
30 | this key to avoid interfering with other clients.
31 |
32 | If there are multiple annotations, the controller will wait for all of them to
33 | be removed (by the clients) before powering on the host. Similarly, if both
34 | forms of annotations are used, the `reboot.metal3.io/{key}` form will take
35 | precedence. This ensures that the host stays powered off until all clients are
36 | ready (i.e. all annotations are removed).
37 |
38 | Clients using this API must respect each other and clean up after themselves.
39 | Otherwise they will step on each others toes by for example, leaving an
40 | annotation indefinitely or removing someone else's annotation before they were
41 | ready.
42 |
43 | ## Examples
44 |
45 | Immediate reboot via soft shutdown first, followed by a hard power-off if the
46 | soft shutdown fails:
47 |
48 | ```yaml
49 | apiVersion: metal3.io/v1alpha1
50 | kind: BareMetalHost
51 | metadata:
52 | name: example
53 | annotations:
54 | reboot.metal3.io: ""
55 | spec:
56 | ...
57 | ```
58 |
59 | Immediate reboot via hard power-off action:
60 |
61 | ```yaml
62 | apiVersion: metal3.io/v1alpha1
63 | kind: BareMetalHost
64 | metadata:
65 | name: example
66 | annotations:
67 | reboot.metal3.io: '{"mode": "hard"}'
68 | spec:
69 | ...
70 | ```
71 |
72 | Phased reboot, issued and managed by the client registered with the key
73 | `cli42`, via soft shutdown first, followed by a hard reboot if the soft reboot
74 | fails:
75 |
76 | ```yaml
77 | apiVersion: metal3.io/v1alpha1
78 | kind: BareMetalHost
79 | metadata:
80 | name: example
81 | annotations:
82 | reboot.metal3.io/cli42: ""
83 | spec:
84 | ...
85 | ```
86 |
87 | Phased reboot, issued and managed by the client registered with the key, via
88 | hard shutdown:
89 |
90 | ```yaml
91 | apiVersion: metal3.io/v1alpha1
92 | kind: BareMetalHost
93 | metadata:
94 | name: example
95 | annotations:
96 | reboot.metal3.io/cli42: '{"mode": "hard"}'
97 | spec:
98 | ...
99 | ```
100 |
101 | ## Implementation notes
102 |
103 | The exact behavior of `hard` and `soft` reboot depends on the Ironic
104 | configuration. Please see the [Ironic configuration
105 | reference](https://docs.openstack.org/ironic/latest/configuration/config.html)
106 | for more details on this, e.g. the `soft_power_off_timeout` variable is
107 | relevant.
108 |
109 | For more details please check the [reboot interface
110 | proposal](https://github.com/metal3-io/metal3-docs/blob/main/design/baremetal-operator/reboot-interface.md).
111 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/root_device_hints.md:
--------------------------------------------------------------------------------
1 | # Specifying Root Device
2 |
3 | Bare-metal machines often have more than one block device, and in many cases
4 | a user will want to specify, which of them to use as the root device. *Root
5 | device hints* allow selecting one device or a group of devices to choose from.
6 | You can provide the hints via the `spec.rootDeviceHints` field on your
7 | `BareMetalHost`:
8 |
9 | ```yaml
10 | spec:
11 | # ...
12 | rootDeviceHints:
13 | wwn: "0x55cd2e415652abcd"
14 | ```
15 |
16 | **Hint:** root device hints in Metal3 are closely modeled on the Ironic's [root
17 | device hints][ironic-hints], but there are important differences in available
18 | hints and the comparison operators they use.
19 |
20 | **Warning:** the default root device depends on the hardware profile as
21 | explained below. Currently, `/dev/sda` path is used when no hints are
22 | specified. This value is not going to work for NVMe storage. Furthermore, Linux
23 | does not guarantee the block device names to be consistent across reboots.
24 |
25 | [ironic-hints]: https://docs.openstack.org/ironic/latest/install/advanced.html#specifying-the-disk-for-deployment-root-device-hints
26 |
27 | ## RootDeviceHints format
28 |
29 | One or more hints can be provided, the chosen device will need to match all of
30 | them. Available hints are:
31 |
32 | - ``deviceName`` -- A string containing a canonical Linux device path like
33 | `/dev/vda` or a *by-path* alias like `/dev/disk/by-path/pci-0000:04:00.0`.
34 |
35 | **Warning:** as mentioned above, block device names are not guaranteed to be
36 | consistent across reboots. If possible, choose a more reliable hint, such as
37 | `wwn` or `serialNumber`.
38 |
39 | **Hint:** only *by-path* aliases are supported, other aliases, such as
40 | *by-id* or *by-uuid*, cannot currently be used.
41 |
42 | - `hctl` -- A string containing a SCSI bus address like `0:0:0:0`.
43 |
44 | - `model` -- A string containing a vendor-specific device
45 | identifier. The hint can be a substring of the actual value.
46 |
47 | - `vendor` -- A string containing the name of the vendor or
48 | manufacturer of the device. The hint can be a substring of the
49 | actual value.
50 |
51 | - `serialNumber` -- A string containing the device serial number.
52 |
53 | - `minSizeGigabytes` -- An integer representing the minimum size of the
54 | device in Gigabytes.
55 |
56 | - `wwn` -- A string containing the unique storage identifier.
57 |
58 | - `wwnWithExtension` -- A string containing the unique storage
59 | identifier with the vendor extension appended.
60 |
61 | - `wwnVendorExtension` -- A string containing the unique vendor
62 | storage identifier.
63 |
64 | - `rotational` -- A boolean indicating whether the device must be
65 | a rotating disk (`true`) or not (`false`). Examples of non-rotational devices
66 | include SSD and NVMe storage.
67 |
68 | ## Finding the right hint value
69 |
70 | Since the root device hints are only required for provisioning, you can use the
71 | results of inspection to get an overview of available storage devices:
72 |
73 | ```bash
74 | kubectl get hardwaredata/ -n -o jsonpath='{.spec.hardware.storage}' | jq .
75 | ```
76 |
77 | This commands produces a JSON output, where you can find all necessary fields
78 | to populate the root device hints before provisioning. For example, on a
79 | virtual testing environment:
80 |
81 | ```json
82 | [
83 | {
84 | "alternateNames": [
85 | "/dev/sda",
86 | "/dev/disk/by-path/pci-0000:03:00.0-scsi-0:0:0:0"
87 | ],
88 | "hctl": "0:0:0:0",
89 | "model": "QEMU HARDDISK",
90 | "name": "/dev/disk/by-path/pci-0000:03:00.0-scsi-0:0:0:0",
91 | "rotational": true,
92 | "serialNumber": "drive-scsi0-0-0-0",
93 | "sizeBytes": 32212254720,
94 | "type": "HDD",
95 | "vendor": "QEMU"
96 | }
97 | ]
98 | ```
99 |
100 | ## Interaction with hardware profiles
101 |
102 | *Hardware profiles* are a deprecated concept that was introduced to describe
103 | homogenous types of hardware. The default hardware profile is `unknown`, which
104 | implies using `/dev/sda` as the root device.
105 |
106 | In a future version of BareMetalHost API, the hardware profile concept will be
107 | disabled, and Metal3 will default to having no root device hints by default. In
108 | this case, the default logic in Ironic will apply: the smaller block device
109 | that is at least 4 GiB. If you want this logic to apply in the current version
110 | of the API, use the `empty` profile:
111 |
112 | ```yaml
113 | spec:
114 | # ...
115 | hardwareProfile: empty
116 | ```
117 |
118 | In all other cases, use explicit root device hints.
119 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/state_machine.md:
--------------------------------------------------------------------------------
1 | # Host State Machine
2 |
3 | During its lifetime, a `BareMetalHost` resource goes through a series of
4 | various states. Some of them are *stable* (the host stays in them indefinitely
5 | without user input), some are *transient* (the state will change once a certain
6 | operation completes). These fields in the `status` resource define the current
7 | state of the host:
8 |
9 | - `status.provisioning.state` -- the current phase of the provisioning process.
10 | - `status.operationHistory` -- the history of the main provisioning phases:
11 | registration, inspection, provisioning and deprovisioning.
12 | - `status.operationalStatus` -- the overall status of the host.
13 | - `status.errorType` -- the type of the current error (if any).
14 | - `status.poweredOn` -- the current power state of the host.
15 |
16 | This is how the status of a healthy provisioned host may look like:
17 |
18 | ```yaml
19 | status:
20 | # ...
21 | operationHistory:
22 | deprovision:
23 | end: null
24 | start: null
25 | inspect:
26 | end: "2024-06-17T13:09:07Z"
27 | start: "2024-06-17T13:03:54Z"
28 | provision:
29 | end: "2024-06-17T13:11:18Z"
30 | start: "2024-06-17T13:09:26Z"
31 | register:
32 | end: "2024-06-17T13:03:54Z"
33 | start: "2024-06-17T12:54:18Z"
34 | operationalStatus: OK
35 | poweredOn: true
36 | provisioning:
37 | ID: e09032ea-1b7d-4c50-bfcd-b94ff7e8d431
38 | bootMode: UEFI
39 | image:
40 | checksumType: sha256
41 | checksum: http://192.168.0.150/SHA256SUMS
42 | format: qcow2
43 | url: http://192.168.0.150/jammy-server-cloudimg-amd64.img
44 | rootDeviceHints:
45 | deviceName: /dev/sda
46 | state: provisioned
47 | # ...
48 | ```
49 |
50 | ## OperationalStatus
51 |
52 | - `OK` -- the host is healthy and operational.
53 | - `discovered` -- the host is known to Metal3 but lacks the required
54 | information for the normal operation (usually, the BMC credentials).
55 | - `error` -- error has occurred, see the `status.errorType` and
56 | `status.errorMessage` fields for details.
57 | - `delayed` -- cannot proceed with the provisioning because the maximum number
58 | of the hosts in the given state has been reached.
59 | - `detached` -- the host is detached, no provisioning actions are possible
60 | (see [detached annotation](./detached_annotation.md) for details).
61 |
62 | ## Provisioning state machine
63 |
64 | 
65 |
66 | ## Provisioning states
67 |
68 | ### Creating
69 |
70 | Newly created hosts get an empty provisioning state briefly before moving
71 | either to `unmanaged` or `registering`.
72 |
73 | ### Unmanaged
74 |
75 | An `unmanaged` host is missing both the BMC address and credentials
76 | secret name, and does not have any information to access the BMC
77 | for registration.
78 |
79 | The corresponding operational status is `discovered`.
80 |
81 | ### Externally Provisioned
82 |
83 | An [externally provisioned](./externally_provisioned.md) host has been deployed
84 | using another tool. Hosts reach this state when they are created with the
85 | `externallyProvisioned` field set to `true`. Hosts in this state are monitored,
86 | and only their power status is managed.
87 |
88 | ### Registering
89 |
90 | The host will stay in the `registering` state while the BMC access details are
91 | being validated.
92 |
93 | ### Inspecting
94 |
95 | After the host is registered, an IPA ramdisk will be booted on it. The agent
96 | collects information about the available hardware components and sends it back
97 | to Metal3. The host will stay in the `inspecting` state until this process is
98 | completed.
99 |
100 | ### Preparing
101 |
102 | When setting up RAID or changing firmware settings, the host will be in
103 | `preparing` state.
104 |
105 | ### Available
106 |
107 | A host in the `available` state is ready to be provisioned. It will move to the
108 | `provisioning` state once the `image` field is populated.
109 |
110 | ### Provisioning
111 |
112 | While an image is being copied to the host, and the host is configured
113 | to run the image, the host will be in the `provisioning` state.
114 |
115 | ### Provisioned
116 |
117 | After an image is copied to the host and the host is running the
118 | image, it will be in the `provisioned` state.
119 |
120 | ### Deprovisioning
121 |
122 | When the previously provisioned image is being removed from the host,
123 | it will be in the `deprovisioning` state.
124 |
125 | ### Powering off before delete
126 |
127 | When the host that is not currently `unmanaged` is marked to be deleted, it
128 | will be powered off first and will stay in the `powering off before delete`
129 | until it's done or until the retry limit is reached.
130 |
131 | ### Deleting
132 |
133 | When the host is marked to be deleted and has been successfully powered off, it
134 | will move from its current state to `deleting`, at which point the resource
135 | record is deleted.
136 |
--------------------------------------------------------------------------------
/docs/user-guide/src/bmo/status_annotation.md:
--------------------------------------------------------------------------------
1 | # Reconstructing Host Status
2 |
3 | The status annotation is useful when you need to avoid inspection of a BareMetalHost.
4 | This can happen if the status is already known, for example, when moving the BareMetalHost from one cluster to another.
5 | By setting this annotation, the BareMetal Operator will take the status of the BareMetalHost directly from the annotation.
6 |
7 | The annotation key is `baremetalhost.metal3.io/status` and the value is a JSON representation of the BareMetalHosts `status` field.
8 | One simple way of extracting the status and turning it into an annotation is using kubectl like this:
9 |
10 | ```bash
11 | # Save the status in json format to a file
12 | kubectl get bmh -o jsonpath="{.status}" > status.json
13 | # Save the BMH and apply the status annotation to the saved BMH.
14 | kubectl -n metal3 annotate bmh \
15 | baremetalhost.metal3.io/status="$(cat status.json)" \
16 | --dry-run=client -o yaml > bmh.yaml
17 | ```
18 |
19 | Note that the above example does not apply the annotation to the BareMetalHost directly since this is most likely not useful to apply it on one that already has a status.
20 | Instead it saves the BareMetalHost *with the annotation applied* to a file `bmh.yaml`.
21 | This file can then be applied in another cluster.
22 | The status would be discarded at this point since the user is usually not allowed to set it, but the annotation is still there and would be used by the BareMetal Operator to set status again.
23 | Once this is done, the operator will remove the status annotation.
24 | In this situation you may also want to check the [detached annotation](./detached_annotation.md) for how to remove the BareMetalHost from the old cluster without going through deprovisioning.
25 |
26 | Here is an example of a BareMetalHost, first without the annotation, but with status and spec, and then the other way around.
27 | This shows how the status field is turned into the annotation value.
28 |
29 | ```yaml
30 | apiVersion: metal3.io/v1alpha1
31 | kind: BareMetalHost
32 | metadata:
33 | name: node-0
34 | namespace: metal3
35 | spec:
36 | automatedCleaningMode: metadata
37 | bmc:
38 | address: redfish+http://192.168.111.1:8000/redfish/v1/Systems/febc9f61-4b7e-411a-ada9-8c722edcee3e
39 | credentialsName: node-0-bmc-secret
40 | bootMACAddress: 00:80:1f:e6:f1:8f
41 | bootMode: legacy
42 | online: true
43 | status:
44 | errorCount: 0
45 | errorMessage: ""
46 | goodCredentials:
47 | credentials:
48 | name: node-0-bmc-secret
49 | namespace: metal3
50 | credentialsVersion: "1775"
51 | hardwareProfile: ""
52 | lastUpdated: "2022-05-31T06:33:05Z"
53 | operationHistory:
54 | deprovision:
55 | end: null
56 | start: null
57 | inspect:
58 | end: null
59 | start: "2022-05-31T06:33:05Z"
60 | provision:
61 | end: null
62 | start: null
63 | register:
64 | end: "2022-05-31T06:33:05Z"
65 | start: "2022-05-31T06:32:54Z"
66 | operationalStatus: OK
67 | poweredOn: false
68 | provisioning:
69 | ID: 8d566f5b-a28f-451b-a70f-419507c480cd
70 | bootMode: legacy
71 | image:
72 | url: ""
73 | state: inspecting
74 | triedCredentials:
75 | credentials:
76 | name: node-0-bmc-secret
77 | namespace: metal3
78 | credentialsVersion: "1775"
79 | ```
80 |
81 | ```yaml
82 | apiVersion: metal3.io/v1alpha1
83 | kind: BareMetalHost
84 | metadata:
85 | name: node-0
86 | namespace: metal3
87 | annotations:
88 | baremetalhost.metal3.io/status: |
89 | {"errorCount":0,"errorMessage":"","goodCredentials":{"credentials":{"name":"node-0-bmc-secret","namespace":"metal3"},"credentialsVersion":"1775"},"hardwareProfile":"","lastUpdated":"2022-05-31T06:33:05Z","operationHistory":{"deprovision":{"end":null,"start":null},"inspect":{"end":null,"start":"2022-05-31T06:33:05Z"},"provision":{"end":null,"start":null},"register":{"end":"2022-05-31T06:33:05Z","start":"2022-05-31T06:32:54Z"}},"operationalStatus":"OK","poweredOn":false,"provisioning":{"ID":"8d566f5b-a28f-451b-a70f-419507c480cd","bootMode":"legacy","image":{"url":""},"state":"inspecting"},"triedCredentials":{"credentials":{"name":"node-0-bmc-secret","namespace":"metal3"},"credentialsVersion":"1775"}}
90 | spec:
91 | ...
92 | ```
93 |
--------------------------------------------------------------------------------
/docs/user-guide/src/capm3/automated_cleaning.md:
--------------------------------------------------------------------------------
1 | # Automated Cleaning
2 |
3 |
4 |
5 | Before reading this page, please see [Baremetal Operator Automated Cleaning](../bmo/automated_cleaning.md) page.
6 |
7 | If you are using only Metal3 Baremetal Operator, you can skip this page and refer to Baremetal
8 | Operator automated cleaning [page](../bmo/automated_cleaning.md) instead.
9 |
10 | For deployments following Cluster-api-provider-metal3 (CAPM3) workflow, automated cleaning can
11 | be (recommended) configured via CAPM3 custom resources (CR).
12 |
13 | There are two automated cleaning modes available which can be set via `automatedCleaningMode` field of a
14 | Metal3MachineTemplate `spec` or Metal3Machine `spec`.
15 |
16 | - `metadata` to enable the cleaning
17 | - `disabled` to disable the cleaning
18 |
19 | When enabled (`metadata`), automated cleaning kicks off when a node is in the first provisioning and on every deprovisioning.
20 | There is no default value for `automatedCleaningMode` in Metal3MachineTemplate and Metal3Machine. If user doesn't set any mode,
21 | the field in the `spec` will be omitted. Unsetting `automatedCleaningMode` in the Metal3MachineTemplate will block the synchronization
22 | of the cleaning mode between the Metal3MachineTemplate and Metal3Machines. This enables the selective operations described below.
23 |
24 | ## Bulk operations
25 |
26 | CAPM3 controller ensures to replicate automated cleaning mode to all Metal3Machines from their referenced Metal3MachineTemplate.
27 | For example, one controlplane and one worker Metal3Machines have `automatedCleaningMode` set to `disabled`, because it is set to `disabled` in the template that they both are referencing.
28 |
29 | **Note**: CAPM3 controller replicates the cleaning mode from Metal3MachineTemplate to Metal3Machine only if `automatedCleaningMode` is set (not empty) on the Metal3MachineTemplate resource. In other words, it synchronizes either `disabled` or `metadata` modes between Metal3MachineTemplate and Metal3Machines.
30 |
31 | ## Selective operations
32 |
33 | Normally automated cleaning mode is replicated from Metal3MachineTemplate `spec` to its referenced Metal3Machines' `spec` and from Metal3Machines `spec` to BareMetalHost `spec` (if CAPM3 is used). However, sometimes you might want to have a different automated cleaning mode for one or more Metal3Machines than the others even though they are referencing the same Metal3MachineTemplate. For example, there is one worker and one controlplane Metal3Machine created from the same Metal3MachineTemplate, and we would like the automated cleaning to be enabled (`metadata`) for the worker while disabled (`disabled`) for the controlplane.
34 |
35 | Here are the steps to achieve that:
36 |
37 | 1. Unset `automatedCleaningMode` in the Metal3MachineTemplate. Then CAPM3 controller unsets it for referenced Metal3Machines. Although it is unset in the Metal3Machine, BareMetalHosts will get their default automated cleaning mode `metadata`. As we mentioned earlier, CAPM3 controller replicates cleaning mode from Metal3MachineTemplate to Metal3Machine ONLY when it is either `metadata` or `disabled`. As such, to block synchronization between Metal3MachineTemplate and Metal3Machine, unsetting the cleaning mode in the Metal3MachineTemplate is enough.
38 | 1. Set `automatedCleaningMode` to `disabled` on the worker Metal3Machine `spec` and to `metadata` on the controlplane Metal3Machine `spec`. Since we don't have any mode set on the Metal3MachineTemplate, Metal3Machines can have different automated cleaning modes set even if they reference the same Metal3MachineTemplate. CAPM3 controller copies cleaning modes from Metal3Machines to their corresponding BareMetalHosts. As such, we end up with two nodes having different cleaning modes regardless of the fact that they reference the same Metal3MachineTemplate.
39 |
40 | 
41 |
--------------------------------------------------------------------------------
/docs/user-guide/src/capm3/clusterclass.md:
--------------------------------------------------------------------------------
1 | # Using ClusterClass with CAPM3
2 |
3 | ClusterClass is a feature of Cluster API that enables the cluster operators to
4 | create multiple clusters using a single general template. You can find detailed
5 | explanation of how to use a `ClusterClass` in the
6 | [Cluster API documentation](https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20210526-cluster-class-and-managed-topologies.md)
7 |
8 | ## Prerequisites
9 |
10 | ### ClusterClass support enabled in CAPI
11 |
12 | To use ClusterClass with CAPM3, experimental feature `ClusterClass` has to be
13 | enabled in CAPI deployment. You can find more info on how to enable ClusterClass
14 | support [in The Cluster API Book](https://cluster-api.sigs.k8s.io/tasks/experimental-features/cluster-class/).
15 |
16 | ## Deploying cluster using ClusterClass
17 |
18 | ### Deploying a ClusterClass
19 |
20 | To create ClusterClass for CAPM3 a few objects has to be deployed in the
21 | management-cluster:
22 |
23 | - Metal3ClusterTemplate - a template that will be used by ClusterClass
24 | controller to instantiate the cluster.
25 |
26 | - KubeadmControlPlaneTemplate - a template used to create Kubeadm Control Plane
27 | for the instantiated cluster.
28 |
29 | - Metal3MachineTemplate - templates that will be used to create Metal3Machine
30 | objects. Can be defined separately for control plane and worker nodes.
31 |
32 | - KubeadmConfigTemplate - a template for Kubeadm config.
33 |
34 | - ClusterClass - the final object that references above objects and consolidates
35 | them into single cluster template definition.
36 |
37 | You can find example of those objects
38 | [in the example file available on the CAPM3 repository](https://github.com/metal3-io/cluster-api-provider-metal3/blob/main/examples/templates/clusterclass.yaml).
39 |
40 | ### Deploying a Cluster
41 |
42 | Definitions described above can be used to deploy multiple clusters. However
43 | some resources has to be deployed specifically for the cluster:
44 |
45 | - Metal3DataTemplate - should be created for both worker and control plane nodes
46 | in the cluster
47 |
48 | - IPPools - should be created per cluster if required.
49 |
50 | - Cluster - used to instantiate cluster using `ClusterClass`. You can change
51 | cluster behavior by overriding variables defined in the `ClusterClass`.
52 |
53 | Example definitions of those resources
54 | [can be found in the CAPM3 repository](https://github.com/metal3-io/cluster-api-provider-metal3/blob/main/examples/templates/cluster.yaml).
55 |
56 | ## Tilt based development environment
57 |
58 | If you want to further develop or test ClusterClass support you can use Tilt
59 | environment.
60 |
61 | 1. Clone CAPM3 repository.
62 |
63 | ```shell
64 | git clone https://github.com/metal3-io/cluster-api-provider-metal3.git
65 | ```
66 |
67 | 1. Generate Tilt settings that will enable ClusterClass support in CAPI.
68 |
69 | ```shell
70 | make tilt-settings-clusterclass
71 | ```
72 |
73 | 1. Start Tilt.
74 |
75 | ```shell
76 | make tilt-up
77 | ```
78 |
79 | 1. Generate ClusterClass based example.
80 |
81 | ```shell
82 | make generate-examples-clusterclass
83 | ```
84 |
85 | 1. Deploy example `ClusterClass`, `Cluster` and all the dependencies.
86 |
87 | ```shell
88 | make deploy-examples-clusterclass
89 | ```
90 |
--------------------------------------------------------------------------------
/docs/user-guide/src/capm3/features.md:
--------------------------------------------------------------------------------
1 | # Cluster-api-provider-metal3 features
2 |
3 | - [Remediation](./remediaton.md)
4 | - [Node Reuse](./node_reuse.md)
5 | - [Pivoting](./pivoting.md)
6 | - [Automated cleaning](./automated_cleaning.md)
7 | - [Label synchronization](./label_sync.md)
8 |
--------------------------------------------------------------------------------
/docs/user-guide/src/capm3/images/object-ref.plantuml:
--------------------------------------------------------------------------------
1 | @startuml
2 | hide empty description
3 | skinparam defaultFontName Courier
4 | skinparam shadowing true
5 | [*] --> Metal3MachineTemplate
6 | Metal3MachineTemplate : apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
7 | Metal3MachineTemplate : kind: Metal3MachineTemplate
8 | Metal3MachineTemplate : metadata:
9 | Metal3MachineTemplate : name: workers-template
10 | Metal3MachineTemplate : namespace: metal3
11 | Metal3MachineTemplate : spec:
12 | Metal3MachineTemplate : // automatedCleaningMode is unset here:
13 | Metal3MachineTemplate : template:
14 | Metal3MachineTemplate : image:
15 | Metal3MachineTemplate : checksum: http://172.22.0.1/ubuntu.md5sum
16 | Metal3MachineTemplate : checksumType: md5
17 | Metal3MachineTemplate : format: raw
18 | Metal3MachineTemplate : url: http://172.22.0.1/ubuntu.img
19 |
20 | Metal3MachineTemplate --> Metal3Machine0
21 | Metal3Machine0 : apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
22 | Metal3Machine0 : kind: Metal3Machine
23 | Metal3Machine0 : metadata:
24 | Metal3Machine0 : name: node-0
25 | Metal3Machine0 : namespace: metal3
26 | Metal3Machine0 : spec:
27 | Metal3Machine0 : **automatedCleaningMode: disabled**
28 | Metal3Machine0 : image:
29 | Metal3Machine0 : checksum: http://172.22.0.1/ubuntu.img.md5sum
30 | Metal3Machine0 : checksumType: md5
31 | Metal3Machine0 : format: raw
32 | Metal3Machine0 : url: http://172.22.0.1/ubuntu.img
33 |
34 |
35 | Metal3MachineTemplate --> Metal3Machine1
36 | Metal3Machine1 : apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
37 | Metal3Machine1 : kind: Metal3Machine
38 | Metal3Machine1 : metadata:
39 | Metal3Machine1 : name: node-1
40 | Metal3Machine1 : namespace: metal3
41 | Metal3Machine1 : spec:
42 | Metal3Machine1 : **automatedCleaningMode: metadata**
43 | Metal3Machine1 : image:
44 | Metal3Machine1 : checksum: http://172.22.0.1/ubuntu.img.md5sum
45 | Metal3Machine1 : checksumType: md5
46 | Metal3Machine1 : format: raw
47 | Metal3Machine1 : url: http://172.22.0.1/ubuntu.img
48 |
49 | Metal3Machine0 --> BareMetalHost0
50 | BareMetalHost0 : apiVersion: metal3.io/v1alpha1
51 | BareMetalHost0 : kind: BareMetalHost
52 | BareMetalHost0 : metadata:
53 | BareMetalHost0 : name: node-0
54 | BareMetalHost0 : namespace: metal3
55 | BareMetalHost0 : spec:
56 | BareMetalHost0 : **automatedCleaningMode: disabled**
57 | BareMetalHost0 : bmc:
58 | BareMetalHost0 : address: ipmi://192.168.111.1:6230
59 | BareMetalHost0 : credentialsName: node-0-bmc-secret
60 | BareMetalHost0 : bootMACAddress: 00:40:4c:41:71:90
61 | BareMetalHost0 : consumerRef:
62 | BareMetalHost0 : apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
63 | BareMetalHost0 : kind: Metal3Machine
64 | BareMetalHost0 : name: node-0
65 | BareMetalHost0 : namespace: metal3
66 | BareMetalHost0 : image:
67 | BareMetalHost0 : checksum: http://172.22.0.1/ubuntu.img.md5sum
68 | BareMetalHost0 : checksumType: md5
69 | BareMetalHost0 : format: raw
70 | BareMetalHost0 : url: http://172.22.0.1/ubuntu.img
71 | BareMetalHost0 : online: true
72 |
73 |
74 | Metal3Machine1 --> BareMetalHost1
75 | BareMetalHost1 : apiVersion: metal3.io/v1alpha1
76 | BareMetalHost1 : kind: BareMetalHost
77 | BareMetalHost1 : metadata:
78 | BareMetalHost1 : name: node-1
79 | BareMetalHost1 : namespace: metal3
80 | BareMetalHost1 : spec:
81 | BareMetalHost1 : **automatedCleaningMode: metadata**
82 | BareMetalHost1 : bmc:
83 | BareMetalHost1 : address: ipmi://192.168.111.1:6230
84 | BareMetalHost1 : credentialsName: node-0-bmc-secret
85 | BareMetalHost1 : bootMACAddress: 00:40:4c:41:71:90
86 | BareMetalHost1 : consumerRef:
87 | BareMetalHost1 : apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
88 | BareMetalHost1 : kind: Metal3Machine
89 | BareMetalHost1 : name: node-1
90 | BareMetalHost1 : namespace: metal3
91 | BareMetalHost1 : image:
92 | BareMetalHost1 : checksum: http://172.22.0.1/ubuntu.img.md5sum
93 | BareMetalHost1 : checksumType: md5
94 | BareMetalHost1 : format: raw
95 | BareMetalHost1 : url: http://172.22.0.1/ubuntu.img
96 | BareMetalHost1 : online: true
97 | @enduml
--------------------------------------------------------------------------------
/docs/user-guide/src/capm3/installation_guide.md:
--------------------------------------------------------------------------------
1 | # Install Cluster-api-provider-metal3
2 |
3 | You can either use clusterctl (recommended) to install Metal³ infrastructure provider
4 | or kustomize for manual installation. Both methods install provider CRDs,
5 | its controllers and [Ip-address-manager](https://github.com/metal3-io/ip-address-manager).
6 | Please keep in mind that Baremetal Operator and Ironic are decoupled from CAPM3
7 | and will not be installed when the provider is initialized. As such, you need to
8 | install them yourself.
9 |
10 | ## Prerequisites
11 |
12 | 1. Install `clusterctl`, refer to Cluster API [book](https://cluster-api.sigs.k8s.io/user/quick-start.html#install-clusterctl) for installation instructions.
13 | 1. Install `kustomize`, refer to official instructions [here](https://kubectl.docs.kubernetes.io/installation/kustomize/).
14 | 1. Install Ironic, refer to [this page](../ironic/ironic_installation.html).
15 | 1. Install Baremetal Operator, refer to [this page](../bmo/install_baremetal_operator.html).
16 | 1. Install Cluster API core components i.e., core, bootstrap and control-plane providers. This will also install cert-manager, if it is not already installed.
17 |
18 | ```bash
19 | clusterctl init --core cluster-api{{#releasetag owner:"kubernetes-sigs" repo:"cluster-api"}} --bootstrap kubeadm{{#releasetag owner:"kubernetes-sigs" repo:"cluster-api"}} \
20 | --control-plane kubeadm{{#releasetag owner:"kubernetes-sigs" repo:"cluster-api"}} -v5
21 | ```
22 |
23 | ## With clusterctl
24 |
25 | This method is recommended. You can specify the CAPM3 version you want to install by appending a version tag, e.g. `{{#releasetag owner:"metal3-io" repo:"cluster-api-provider-metal3" }}`. If the version is not specified, the latest version available will be installed.
26 |
27 | ```bash
28 | clusterctl init --infrastructure metal3{{#releasetag owner:"metal3-io" repo:"cluster-api-provider-metal3"}}
29 | ```
30 |
31 | ## With kustomize
32 |
33 | To install a specific version, checkout the `github.com/metal3-io/cluster-api-provider-metal3.git` to the tag with the desired version
34 |
35 | ```bash
36 | git clone https://github.com/metal3-io/cluster-api-provider-metal3.git
37 | cd cluster-api-provider-metal3
38 | git checkout v1.1.2 -b v1.1.2
39 |
40 | ```
41 |
42 | Then, edit the controller-manager image version in `config/default/capm3/manager_image_patch.yaml`
43 |
44 | ```yaml
45 | apiVersion: apps/v1
46 | kind: Deployment
47 | metadata:
48 | name: controller-manager
49 | namespace: system
50 | spec:
51 | template:
52 | spec:
53 | containers:
54 | # Change the value of image/tag to your desired image URL or version tag
55 | - image: quay.io/metal3-io/cluster-api-provider-metal3:v1.1.2
56 | name: manager
57 | ```
58 |
59 | Apply the manifests
60 |
61 | ```bash
62 | cd cluster-api-provider-metal3
63 | kustomize build config/default | kubectl apply -f -
64 | ```
65 |
--------------------------------------------------------------------------------
/docs/user-guide/src/capm3/label_sync.md:
--------------------------------------------------------------------------------
1 | # Labels Synchronization between BareMetalHost and Kubernetes Nodes
2 |
3 | CAPM3 has mechanism to synchronize BareMetalHost (BMH) labels with predefined
4 | prefixes to the corresponding Kubernetes node object on that BMH.
5 |
6 | ## How to use?
7 |
8 | To use label synchronization user needs to define prefix(es) for labels. Only
9 | labels that fall within prefix set are synchronized. User defines prefixes with
10 | annotation in Metal3Cluster object by using
11 | **metal3.io/metal3-label-sync-prefixes** annotation key and gives prefixes as
12 | annotation value. Prefixes should be separated by comma.
13 |
14 | In the following example we are defining two label prefixes for label
15 | synchronization: **test.foobar.io** and **my-prefix**.
16 |
17 | ```bash
18 | kubectl annotate metal3cluster test1 metal3.io/metal3-label-sync-prefixes=test.foobar.io,my-prefix -n=metal3 --overwrite
19 | ```
20 |
21 | **Note:** All prefixes should be complaint with [RFC 1123](https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names).
22 |
23 | After adding annotation on Metal3Cluster, we label BMH nodes with labels that
24 | start with prefixes defined above:
25 |
26 | ```bash
27 | kubectl label baremetalhosts node-0 my-prefix/rack=xyz-123 -n=metal3
28 | kubectl label baremetalhosts node-0 test.foobar.io/group=abc -n=metal3
29 | ```
30 |
31 | **Note:** Prefixes should be separated from the rest of the label key by **"/"**, e.g. my-prefix/rack, test.foobar.io/xyz
32 |
33 | Now label sync controller will apply same labels to corresponding Kubernetes node.
34 |
35 | ```bash
36 | $ kubectl get nodes --show-labels
37 | NAME STATUS ROLES AGE VERSION LABELS
38 | test1-8ndsl NotReady 10m v1.31.0 my-prefix/rack=xyz-123,test.foobar.io/group=abc
39 | ```
40 |
41 | Label sync controller removes the labels with defined prefixes if same label
42 | does not exist in BMH. Similarly, if we delete the label which exists in BMH
43 | from Node it will be re-added at the next reconciliation cycle.
44 |
--------------------------------------------------------------------------------
/docs/user-guide/src/capm3/node_reuse.md:
--------------------------------------------------------------------------------
1 | # Node Reuse
2 |
3 | This feature brings a possibility of re-using the same BaremetalHosts (referred to as a host later)
4 | during deprovisioning and provisioning mainly as a part of the rolling upgrade process in the cluster.
5 |
6 | ## Importance of scale-in strategy
7 |
8 | The logic behind the reusing of the hosts, solely relies on the **scale-in** upgrade strategy utilized by
9 | Cluster API objects, namely [KubeadmControlPlane](https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20191017-kubeadm-based-control-plane.md#rolling-update-strategy) and MachineDeployment.
10 | During the upgrade process of above resources, the machines owned by KubeadmControlPlane or MachineDeployment are
11 | removed one-by-one before creating new ones (delete-create method).
12 | That way, we can fully ensure that, the intended host is reused when the upgrade is kicked in (picked up on the following provisioning for the new machine being created).
13 |
14 | **Note:** To achieve the desired *delete first and create after* behavior in above-mentioned Cluster API objects,
15 | user has to modify:
16 |
17 | * MaxSurge field in KubeadmControlPlane and set it to 0 with minimum number of 3 control plane machines replicas
18 | * MaxSurge and MaxUnavailable fields in MachineDeployment set them to 0 & 1 accordingly
19 |
20 | On the contrary, if the scale-out strategy is utilized by CAPI objects during the upgrade, usually create-swap-delete
21 | method is followed by CAPI objects, where new machine is created first and new host is picked up for that
22 | machine, breaking the node reuse logic right at the beginning of the upgrade process.
23 |
24 | ## Workflow
25 |
26 | Metal3MachineTemplate (M3MT) Custom Resource is the object responsible for enabling of the node reuse feature.
27 |
28 | ```yaml
29 | apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
30 | kind: Metal3MachineTemplate
31 | metadata:
32 | name: test1-controlplane
33 | namespace: metal3
34 | spec:
35 | nodeReuse: True
36 | template:
37 | spec:
38 | image:
39 | ...
40 | ```
41 |
42 | There could be two Metal3MachineTemplate objects, one referenced by KubeadmControlPlane for control plane nodes, and the other by MachineDeployment for worker node. Before performing an upgrade, user must set `nodeReuse` field to **true** in the desired Metal3MachineTemplate object where hosts targeted to be reused. If left unchanged, by default, `nodeReuse` field is set to **false** resulting in no host reusing being performed in the workflow. If you would like to know more about the internals of controller logic, please check the original proposal for the feature [here](https://github.com/metal3-io/metal3-docs/blob/main/design/cluster-api-provider-metal3/node_reuse.md)
43 |
44 | Once `nodeReuse` field is set to **true**, user has to make sure that scale-in feature is enabled as suggested above, and proceed with updating the desired fields in KubeadmControlPlane or MachineDeployment to start a rolling upgrade.
45 |
46 | **Note:** If you are creating a new Metal3MachineTemplate object (for control-plane or worker), rather than using the existing one
47 | created while provisioning, please make sure to reference it from the corresponding Cluster API object (KubeadmControlPlane or MachineDeployment). Also keep in mind that, already provisioned Metal3Machines were created from the old Metal3MachineTemplate
48 | and they consume existing hosts, meaning even though `nodeReuse` field is set to **true** in the new Metal3MachineTemplate,
49 | it would have no effect. To use newly Metal3MachineTemplate in the workflow, user has to reprovision the nodes, which
50 | should result in using new Metal3MachineTemplate referenced in Cluster API object and Metal3Machine created out of it.
51 |
--------------------------------------------------------------------------------
/docs/user-guide/src/images/capi-machines.pantuml:
--------------------------------------------------------------------------------
1 | @startuml
2 | rectangle Machine_1
3 | rectangle Machine_2
4 | rectangle Machine_3
5 | rectangle AWSMachine
6 | rectangle OpenStackMachine
7 | rectangle Metal3Machine
8 | cloud EC2 [
9 | EC2 instance
10 | ]
11 | cloud os [
12 | openstack server
13 | ]
14 | node BareMetalHost
15 |
16 | Machine_1 --> AWSMachine
17 | Machine_2 --> OpenStackMachine
18 | Machine_3 --> Metal3Machine
19 |
20 | Metal3Machine --> BareMetalHost
21 | OpenStackMachine --> os
22 | AWSMachine --> EC2
23 | @enduml
24 |
--------------------------------------------------------------------------------
/docs/user-guide/src/images/capi-machines.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/docs/user-guide/src/images/capi-machines.png
--------------------------------------------------------------------------------
/docs/user-guide/src/images/metal3-capi-objects.plantuml:
--------------------------------------------------------------------------------
1 | @startuml
2 | rectangle Cluster
3 | rectangle KubeadmControlPlane
4 | rectangle MachineDeployment
5 | rectangle Metal3Cluster
6 | rectangle Metal3MachineTemplate
7 | collections Metal3Machines
8 | collections Machines
9 | collections BareMetalHosts
10 |
11 | rectangle IPPool
12 | collections IPClaims
13 | collections IPAddresses
14 |
15 | rectangle Metal3DataTemplate
16 | collections Metal3DataClaims
17 | collections Metal3Data
18 |
19 | Cluster --> KubeadmControlPlane
20 | Cluster --> MachineDeployment
21 | Cluster --> Metal3Cluster
22 | KubeadmControlPlane --> Metal3MachineTemplate
23 | KubeadmControlPlane --> Machines
24 | MachineDeployment --> Metal3MachineTemplate
25 | MachineDeployment --> Machines
26 |
27 | Metal3MachineTemplate --> Metal3DataTemplate
28 | Metal3DataTemplate --> Metal3Data
29 |
30 | Machines --> Metal3Machines
31 | Metal3MachineTemplate --> Metal3Machines
32 |
33 | Metal3Machines --> BareMetalHosts
34 | Metal3Machines --> Metal3DataClaims
35 | Metal3DataClaims --> Metal3Data
36 | Metal3Data --> IPClaims
37 |
38 | IPPool --> IPAddresses
39 | IPClaims --> IPAddresses
40 |
41 | Metal3Data --> BareMetalHosts
42 | IPAddresses --> BareMetalHosts
43 | @enduml
44 |
--------------------------------------------------------------------------------
/docs/user-guide/src/images/metal3-capi-objects.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/docs/user-guide/src/images/metal3-capi-objects.png
--------------------------------------------------------------------------------
/docs/user-guide/src/images/metal3-stack.plantuml:
--------------------------------------------------------------------------------
1 | @startuml
2 | rectangle CAPI
3 | rectangle Metal3 {
4 | rectangle CAPM3
5 | rectangle BMO
6 | rectangle IPAM
7 | }
8 | rectangle Ironic
9 |
10 | CAPI -- CAPM3
11 | CAPM3 -- BMO
12 | CAPM3 -- IPAM
13 | BMO -- Ironic
14 |
15 | @enduml
16 |
--------------------------------------------------------------------------------
/docs/user-guide/src/images/metal3-stack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/docs/user-guide/src/images/metal3-stack.png
--------------------------------------------------------------------------------
/docs/user-guide/src/introduction.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | # Metal³
8 |
9 |
10 |
11 | The Metal³ project (pronounced: "Metal Kubed") provides components for bare
12 | metal host management with Kubernetes. You can enrol your bare metal machines,
13 | provision operating system images, and then, if you like, deploy Kubernetes
14 | clusters to them. From there, operating and upgrading your Kubernetes clusters
15 | can be handled by Metal³. Moreover, Metal³ is itself a Kubernetes application,
16 | so it runs on Kubernetes, and uses Kubernetes resources and APIs as its
17 | interface.
18 |
19 | Metal³ is one of the providers for the Kubernetes sub-project [Cluster
20 | API](https://github.com/kubernetes-sigs/cluster-api). Cluster API provides
21 | infrastructure agnostic Kubernetes lifecycle management, and Metal³ brings the
22 | bare metal implementation.
23 |
24 | This is paired with one of the components from the OpenStack ecosystem,
25 | [Ironic](https://ironicbaremetal.org/) for booting and installing machines.
26 | Metal³ handles the installation of Ironic as a standalone component (there's no
27 | need to bring along the rest of OpenStack). Ironic is supported by a mature
28 | community of hardware vendors and supports a wide range of bare metal
29 | management protocols which are continuously tested on a variety of hardware.
30 | Backed by Ironic, Metal³ can provision machines, no matter the brand of
31 | hardware.
32 |
33 | In summary, you can write Kubernetes manifests representing your hardware and
34 | your desired Kubernetes cluster layout. Then Metal³ can:
35 |
36 | * Discover your hardware inventory
37 | * Configure BIOS and RAID settings on your hosts
38 | * Optionally clean a host's disks as part of provisioning
39 | * Install and boot an operating system image of your choice
40 | * Deploy Kubernetes
41 | * Upgrade Kubernetes or the operating system in your clusters with a
42 | non-disruptive rolling strategy
43 | * Automatically remediate failed nodes by rebooting them and removing them from
44 | the cluster if necessary
45 |
46 | You can even deploy Metal³ to your clusters so that they can manage other
47 | clusters using Metal³...
48 |
49 | Metal³ is [open-source](https://github.com/metal3-io) and welcomes community
50 | contributions. The community meets at the following venues:
51 |
52 | * \#cluster-api-baremetal on [Kubernetes Slack](https://slack.k8s.io/)
53 | * Metal³ development [mailing list](https://groups.google.com/g/metal3-dev)
54 | * From the mailing list, you'll also be able to find the details of a weekly
55 | Zoom community call on Wednesdays at 14:00 GMT
56 |
57 | # About this guide
58 |
59 | This user guide aims to explain the Metal³ feature set, and provide how-tos for
60 | using Metal³. It's not a tutorial (for that, see the [Getting Started
61 | Guide](developer_environment/tryit.md)). Nor is it a reference (for that, see
62 | the [API Reference
63 | Documentation](https://github.com/metal3-io/cluster-api-provider-metal3/blob/main/docs/api.md),
64 | and of course, the code itself.)
65 |
--------------------------------------------------------------------------------
/docs/user-guide/src/ipam/ipam_installation.md:
--------------------------------------------------------------------------------
1 | # Installing IPAM as Deployment
2 |
3 | This section will show how IPAM can be installed as a deployment in a cluster.
4 |
5 | ## Deploying controllers
6 |
7 | CAPI and IPAM controllers need to be deployed at the beginning. The IPAM controller has a dependency on Cluster API *Cluster* objects. CAPI CRDs and controllers must be deployed and the cluster objects should exist for successful deployments.
8 |
9 | ## Deployment
10 |
11 | The user can create the **IPPool** object independently. It will wait for its cluster to exist before reconciling. If the user wants to create **IPAddress** objects manually, they should be created before any claims. It is highly recommended to use the *preAllocations* field itself or have the reconciliation paused.
12 |
13 | After an **IPClaim** object creation, the controller will list all existing **IPAddress** objects. It will then select randomly an address that has not been allocated yet and is not in the *preAllocations* map. It will then create an **IPAddress** object containing the references to the **IPPool** and **IPClaim** and the address, the prefix from the address pool or the default prefix, and the gateway from the address pool or the default gateway.
14 |
15 | ### Deploy IPAM
16 |
17 | Deploys IPAM CRDs and IPAM controllers. We can run Makefile target from inside the cloned IPAM git repo.
18 |
19 | ```sh
20 | make deploy
21 | ```
22 |
23 | ### Run locally
24 |
25 | Runs IPAM controller locally
26 |
27 | ```sh
28 | kubectl scale -n capm3-system deployment.v1.apps/metal3-ipam-controller-manager \
29 | --replicas 0
30 | make run
31 | ```
32 |
33 | ### Deploy an example pool
34 |
35 | ```sh
36 | make deploy-examples
37 | ```
38 |
39 | ### Delete the example pool
40 |
41 | ```sh
42 | make delete-examples
43 | ```
44 |
45 | ## Deletion
46 |
47 | When deleting an **IPClaim** object, the controller will simply delete the associated **IPAddress** object. Once all **IPAddress** objects have been deleted, the **IPPool** object can be deleted. Before that point, the finalizer in the **IPPool** object will block the deletion.
48 |
49 | ## References
50 |
51 | 1. [IPAM](https://github.com/metal3-io/ip-address-manager/).
52 | 2. [IPAM deployment workflow](https://github.com/metal3-io/ip-address-manager/blob/main/docs/deployment_workflow.md).
53 | 3. Custom resource (CR) examples in
54 | [metal3-dev-env](https://github.com/metal3-io/metal3-dev-env), in the
55 | [templates](https://github.com/metal3-io/metal3-dev-env/tree/main/tests/roles/run_tests/templates).
56 |
--------------------------------------------------------------------------------
/docs/user-guide/src/ironic/ironic-container-images.md:
--------------------------------------------------------------------------------
1 | # Ironic Container Images
2 |
3 |
4 |
5 | The currently available ironic container images are:
6 |
7 | | Name and link to repository | Published image | Content/Purpose |
8 | | --- | --- | --- |
9 | | [ironic-image](https://github.com/metal3-io/ironic-image) | `quay.io/metal3-io/ironic` | Ironic services / BMC emulators |
10 | | [ironic-ipa-downloader](https://github.com/metal3-io/ironic-ipa-downloader) | `quay.io/metal3-io/ironic-ipa-downloader` | Download and cache the [ironic python agent][ipa] ramdisk |
11 | | [ironic-client](https://github.com/metal3-io/ironic-image/tree/main/resources/ironic-client) | `quay.io/metal3-io/ironic-client` | Ironic command-line interface (for debugging) |
12 |
13 | The main `ironic-image` currently contains entry points to run both Ironic
14 | itself and its auxiliary services: *dnsmasq* and *httpd*.
15 |
16 | [ipa]: ironic-python-agent
17 |
18 | ## How to build a container image
19 |
20 | Each repository mentioned in the list contains a Dockerfile that can be
21 | used to build the corresponding container, for example:
22 |
23 | ```bash
24 | git clone https://github.com/metal3-io/ironic-image.git
25 | cd ironic-image
26 | docker build . -f Dockerfile
27 | ```
28 |
29 | In some cases a **make** sub-command is provided to build the image using
30 | docker, usually `make docker`.
31 |
32 | ## Customizing source builds
33 |
34 | When building the ironic image, it is also possible to specify a different
35 | source for ironic, ironic-lib or the sushy library using the build arguments
36 | `IRONIC_SOURCE`, `IRONIC_LIB_SOURCE` and `SUSHY_SOURCE`. It is also possible
37 | to apply local patches to the source. See [ironic-image
38 | README](https://github.com/metal3-io/ironic-image/blob/main/README.md) for
39 | details.
40 |
41 | ## Special resources: sushy-tools and virtualbmc
42 |
43 | The Dockerfiles needed to build
44 | [sushy-tools](https://docs.openstack.org/sushy-tools/latest/) (Redfish
45 | emulator) and [VirtualBMC](https://docs.openstack.org/virtualbmc/latest/) (IPMI
46 | emulator) containers can be found in the `ironic-image` container repository,
47 | under the `resources` directory.
48 |
--------------------------------------------------------------------------------
/docs/user-guide/src/ironic/ironic-python-agent.md:
--------------------------------------------------------------------------------
1 | # Ironic Python Agent (IPA)
2 |
3 | [IPA](https://docs.openstack.org/ironic-python-agent/latest/) is a service written in python that runs within a ramdisk. It provides remote access for `Ironic` to perform various operations on the managed server. It also sends information about the server to `Ironic`.
4 |
5 | By default, we pull IPA images from [Ironic upstream](https://tarballs.opendev.org/openstack/ironic-python-agent/dib) archive where an image is built on every commit to the *master* git branch.
6 |
7 | However, another remote registry or a local IPA archive can be specified. [ipa-downloader](https://github.com/metal3-io/ironic-ipa-downloader) is responsible for downloading the IPA ramdisk image to a shared volume from where the nodes are able to retrieve it.
8 |
9 | ## Data flow
10 |
11 | IPA interacts with other components. The information exchanged and the component to which it is sent to or received from are described below.
12 | The communication between IPA and these components can be encrypted in-transit with SSL/TLS.
13 |
14 | - Inspection: data about hardware details, such as CPU, disk, RAM and network interfaces.
15 | - Heartbeat: periodic message informing Ironic that the node is still running.
16 | - Lookup: data sent to Ironic that helps it determine Ironic’s node UUID for the node.
17 |
18 | The above data is sent/received as follows.
19 |
20 | - Inspection result is sent to Ironic
21 | - Lookup/heartbeats data is sent to Ironic.
22 | - User supplied boot image that will be written to the node’s disk is retrieved from HTTPD server
23 |
24 | ## References
25 |
26 | - [IPA Documentation](https://docs.openstack.org/ironic-python-agent/latest/admin/how_it_works.html)
27 | - [IPA github repo](https://opendev.org/openstack/ironic-python-agent)
28 |
--------------------------------------------------------------------------------
/docs/user-guide/src/ironic/ironic_variables.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | The following environmental variables can be passed to configure the Ironic services:
4 |
5 | - `HTTP_PORT` - port used by httpd server (default 6180)
6 | - `PROVISIONING_IP` - provisioning interface IP address to use for ironic, dnsmasq(dhcpd) and httpd (default 172.22.0.1)
7 | - `CLUSTER_PROVISIONING_IP` - cluster provisioning interface IP address (default 172.22.0.2)
8 | - `PROVISIONING_INTERFACE` - interface to use for ironic, dnsmasq(dhcpd) and httpd (default ironicendpoint)
9 | - `CLUSTER_DHCP_RANGE` - dhcp range to use for provisioning (default 172.22.0.10-172.22.0.100)
10 | - `DEPLOY_KERNEL_URL` - the URL of the kernel to deploy ironic-python-agent
11 | - `DEPLOY_RAMDISK_URL` - the URL of the ramdisk to deploy ironic-python-agent
12 | - `IRONIC_ENDPOINT` - the endpoint of the ironic
13 | - `CACHEURL` - the URL of the cached images
14 | - `IRONIC_FAST_TRACK` - whether to enable fast_track provisioning or not (default true)
15 | - `IRONIC_KERNEL_PARAMS` - kernel parameters to pass to IPA (default console=ttyS0)
16 | - `IRONIC_INSPECTOR_VLAN_INTERFACES` - VLAN interfaces included in introspection, all - all VLANs on all interfaces, using LLDP information (default), interface all VLANs on an interface, using LLDP information, interface.vlan - a particular VLAN interface, not using LLDP
17 | - `IRONIC_BOOT_ISO_SOURCE` - where the boot iso image will be served from, possible values are: local (default), to download the image, prepare it and serve it
18 | from the conductor; http, to serve it directly from its HTTP URL
19 | - `IPA_DOWNLOAD_ENABLED` - enables the use of the Ironic Python Agent Downloader container to download IPA archive (default true)
20 | - `USE_LOCAL_IPA` - enables the use of locally supplied IPA archive. This condition is handled by BMO and this has effect only when `IPA_DOWNLOAD_ENABLED` is "false", otherwise `IPA_DOWNLOAD_ENABLED` takes precedence. (default false)
21 | - `LOCAL_IPA_PATH` - this has effect only when `USE_LOCAL_IPA` is set to "true", points to the directory where the IPA archive is located. This variable is handled by BMO. The variable should contain an arbitrary path pointing to the directory that contains the ironic-python-agent.tar
22 | - `GATEWAY_IP` - gateway IP address to use for ironic dnsmasq (dhcpd)
23 | - `DNS_IP` - DNS IP address to use for ironic dnsmasq (dhcpd)
24 |
25 | To know how to pass these variables, please see the sections below.
26 |
--------------------------------------------------------------------------------
/docs/user-guide/src/irso/introduction.md:
--------------------------------------------------------------------------------
1 | # Ironic Standalone Operator
2 |
3 | Ironic Standalone Operator (IrSO) is a Kubernetes controller that installs and
4 | manages Ironic in a configuration suitable for Metal3. IrSO has the following
5 | features:
6 |
7 | - Flexible networking configuration, support for Keepalived.
8 | - Using SQLite or MariaDB as the database backend.
9 | - Optional support for a DHCP service (dnsmasq).
10 | - Optional support for automatically downloading an
11 | [IPA](../ironic/ironic-python-agent.md) image.
12 |
13 | IrSO uses [ironic-image](../ironic/ironic-container-images.md) under the hood.
14 |
15 | ## Installing Ironic Standalone Operator
16 |
17 | On every source code change, a new IrSO image is built and published at
18 | `quay.io/metal3-io/ironic-standalone-operator`. To install it in your cluster,
19 | you can use the Kustomize templates provided in the source repository:
20 |
21 | ```console
22 | git clone https://github.com/metal3-io/ironic-standalone-operator
23 | cd ironic-standalone-operator
24 | git checkout -b
25 |
26 | make install deploy
27 | kubectl wait --for=condition=Available --timeout=60s \
28 | -n ironic-standalone-operator-system deployment/ironic-standalone-operator-controller-manager
29 | ```
30 |
31 | ## API resources
32 |
33 | IrSO uses two Custom Resources to manage an Ironic installation:
34 |
35 | [Ironic](https://github.com/metal3-io/ironic-standalone-operator/blob/main/config/crd/bases/ironic.metal3.io_ironics.yaml)
36 | manages Ironic itself and all of its auxiliary services.
37 |
38 | See [installing Ironic with IrSO](./install-basics.md) for information on how
39 | to use these resources.
40 |
41 | ## How is Ironic installed?
42 |
43 | By default, IrSO installs Ironic as a single pod on a **control plane** node.
44 | This is because Ironic currently requires *host networking*, and thus it's not
45 | advisable to let it co-exist with tenant workload.
46 |
47 | ### Installed components
48 |
49 | An Ironic installation always contains these three components:
50 |
51 | - `ironic` is the main API service, as well as the conductor process that
52 | handles actions on bare-metal machines.
53 | - `httpd` is the web server that serves images and configuration for iPXE and
54 | virtual media boot, as well as works as the HTTPS frontend for Ironic.
55 | - `ramdisk-logs` is a script that unpacks any ramdisk logs and outputs them
56 | for consumption via `kubectl logs` or similar tools.
57 |
58 | There is also a standard init container:
59 |
60 | - `ramdisk-downloader` downloads images of the deployment/inspection ramdisk
61 | and stores them locally for easy access.
62 |
63 | When network boot (iPXE) is enabled, another component is deployed:
64 |
65 | - `dnsmasq` serves DHCP and functions as a PXE server for bootstrapping iPXE.
66 |
67 | With Keepalived support enabled:
68 |
69 | - `keepalived` manages the IP address on the provisioning interface.
70 |
71 | ### Supported versions
72 |
73 | A major and minor version can be supplied to the `Ironic` resource to request
74 | a specific branch of ironic-image (and thus Ironic). Here are supported version
75 | values for each branch and release of the operator:
76 |
77 | | Operator version | Ironic version(s) | Default version |
78 | | ---------------- | ------------------------ | --------------- |
79 | | latest (main) | latest, 29.0, 28.0, 27.0 | latest |
80 | | 0.3.0 | latest, 29.0, 28.0, 27.0 | latest |
81 | | 0.2.0 | latest, 28.0, 27.0 | latest |
82 | | 0.1.0 | latest, 27.0 | latest |
83 |
84 | **NOTE:** the special version value `latest` always installs the latest
85 | available version of ironic-image and Ironic.
86 |
--------------------------------------------------------------------------------
/docs/user-guide/src/project-overview.md:
--------------------------------------------------------------------------------
1 | # Project overview
2 |
3 | Metal3 consists of multiple sub-projects. The most notable are [Bare Metal
4 | Operator](https://github.com/metal3-io/baremetal-operator), [Cluster API
5 | provider Metal3](https://github.com/metal3-io/cluster-api-provider-metal3) and
6 | the [IP address manager](https://github.com/metal3-io/ip-address-manager). There
7 | is no requirement to use all of them.
8 |
9 | The stack, when including Cluster API and Ironic, looks like this:
10 |
11 | 
12 |
13 | From a user perspective it may be more useful to visualize the Kubernetes
14 | resources. When using Cluster API, Metal3 works as any other infrastructure
15 | provider. The Machines get corresponding Metal3Machines, which in turn reference
16 | the BareMetalHosts.
17 |
18 | 
19 |
20 | The following diagram shows more details about the Metal3 objects. Note that it
21 | is not showing everything and is meant just as an overview.
22 |
23 | 
24 |
25 | ## How does it work?
26 |
27 | Metal3 relies on Ironic for interacting with the physical machines. Ironic in
28 | turn communicates with [Baseboard Management
29 | Controllers](https://en.wikipedia.org/wiki/Intelligent_Platform_Management_Interface#Baseboard_management_controller)
30 | (BMCs) to manage the machines. Ironic can communicate with the BMCs using
31 | protocols such as Redfish, IPMI, or iDRAC. In this way, it can power on or off
32 | the machines, change the boot device, and so on. For more information, see
33 | [Ironic in Metal3](./ironic/introduction.md)
34 |
35 | For more advanced operations, like writing an image to the disk, the [Ironic
36 | Python Agent](./ironic/ironic-python-agent.md) (IPA) is first booted on the
37 | machine. Ironic can then communicate with the IPA to perform the requested
38 | operation.
39 |
40 | The BareMetal Operator (BMO) is a Kubernetes controller that exposes parts of
41 | Ironic's capabilities through the Kubernetes API. This is essentially done
42 | through the BareMetalHost custom resource.
43 |
44 | The Cluster API infrastructure provider for Metal3 (CAPM3) provides the
45 | necessary functionality to make Metal3 work with [Cluster
46 | API](https://cluster-api.sigs.k8s.io/). This means that Cluster API can be used
47 | to provision bare metal hosts into workload clusters. Similar to other
48 | [infrastructure
49 | providers](https://cluster-api.sigs.k8s.io/reference/providers#infrastructure),
50 | CAPM3 adds custom resources such as Metal3Cluster and Metal3MachineTemplate in
51 | order to implement the Cluster API contract.
52 |
53 | A notable addition to the contract is the management of metadata through
54 | Metal3DataTemplates and related objects. Users can provide metadata and network
55 | data through these objects. For network data specifically, it is worth
56 | mentioning the Metal3 [IP address manager (IPAM)](./ipam/introduction.md) that
57 | can be used to assign IP addresses to the hosts.
58 |
59 | ## Requirements
60 |
61 | - Server(s) with baseboard management capabilities (i.e. Redfish, iDRAC, IPMI,
62 | etc.). For development you can use virtual machines with Sushy-tools. More
63 | information [here](./bmo/supported_hardware.md).
64 | - An Ironic instance. More information [here](./ironic/introduction.md).
65 | - A Kubernetes cluster (the management cluster) where the user stores and
66 | manages the Metal3 resources. A [kind cluster](https://kind.sigs.k8s.io/) is
67 | enough for bootstrapping or development.
68 |
--------------------------------------------------------------------------------
/docs/user-guide/src/reference.md:
--------------------------------------------------------------------------------
1 | # API reference
2 |
3 | ## Bare Metal Operator
4 |
5 | - Baremetal Operator (CRDs): [documentation](https://doc.crds.dev/github.com/metal3-io/baremetal-operator)
6 | - golang API documentation: [godoc](https://pkg.go.dev/github.com/metal3-io/baremetal-operator/apis/metal3.io/v1alpha1)
7 |
8 | ## Cluster API provider Metal3
9 |
10 | - Cluster API provider Metal3 (CRDs): [documentation](https://doc.crds.dev/github.com/metal3-io/cluster-api-provider-metal3)
11 | - golang API documentation: [godoc](https://pkg.go.dev/github.com/metal3-io/cluster-api-provider-metal3)
12 |
13 | ## Ip Address Manager
14 |
15 | - Ip Address Manager (CRDs): [documentation](https://doc.crds.dev/github.com/metal3-io/ip-address-manager)
16 | - golang API documentation: [godoc](https://pkg.go.dev/github.com/metal3-io/ip-address-manager/api/v1alpha1)
17 |
--------------------------------------------------------------------------------
/docs/user-guide/src/troubleshooting.md:
--------------------------------------------------------------------------------
1 | # Troubleshooting
2 |
3 | ## Verify that Ironic and Baremetal Operator are healthy
4 |
5 | There is no point continuing before you have verified that the controllers are
6 | healthy. A "standard" deployment will have Ironic and Baremetal Operator running
7 | in the `baremetal-operator-system` namespace. Check that the containers are
8 | running, not restarting or crashing:
9 |
10 | ```bash
11 | kubectl -n baremetal-operator-system get pods
12 | ```
13 |
14 | Note: If you deploy Ironic outside of Kubernetes you will need to check on it in
15 | a different way.
16 |
17 | Healthy example output:
18 |
19 | ```text
20 | NAME READY STATUS RESTARTS AGE
21 | baremetal-operator-controller-manager-85b896f688-j27g5 1/1 Running 0 5m13s
22 | ironic-6bcdcb99f8-6ldlz 3/3 Running 1 (2m2s ago) 5m15s
23 | ```
24 |
25 | (There has been one restart, but it is not constantly restarting.)
26 |
27 | Unhealthy example output:
28 |
29 | ```text
30 | NAME READY STATUS RESTARTS AGE
31 | baremetal-operator-controller-manager-85b896f688-j27g5 1/1 Running 0 3m35s
32 | ironic-6bcdcb99f8-6ldlz 1/3 Running 1 (24s ago) 3m37s
33 | ```
34 |
35 | ### Waiting for IP
36 |
37 | Make sure to check the logs also since Ironic may be stuck on "waiting for IP".
38 | For example:
39 |
40 | ```bash
41 | kubectl -n baremetal-operator-system logs ironic-6bcdcb99f8-6ldlz -c ironic
42 | ```
43 |
44 | If Ironic is waiting for IP, you need to check the network configuration.
45 | Some things to look out for:
46 |
47 | - What IP or interface is Ironic configured to use?
48 | - Is Ironic using the host network?
49 | - Is Ironic running on the expected (set of) Node(s)?
50 | - Does the Node have the expected IP assigned?
51 | - Are you using keepalived or similar to manage the IP, and is it working properly?
52 |
53 | ## Host is stuck in cleaning, how do I delete it?
54 |
55 | First and foremost, avoid using forced deletion, otherwise you'll have [a
56 | conflict](#mac-address-conflict-on-registration). If you don't care about disks
57 | being [cleaned](bmo/automated_cleaning.md), you can edit the BareMetalHost resource
58 | and disable cleaning:
59 |
60 | ```yaml
61 | spec:
62 | automatedCleaningMode: disabled
63 | ```
64 |
65 | Alternatively, you can wait for 3 cleaning retries to finish. After that, the
66 | host will be deleted. If you do care about cleaning, you need to figure out why
67 | it does not finish.
68 |
69 | ## MAC address conflict on registration
70 |
71 | If you force deletion of a host after registration, Baremetal Operator will not
72 | be able to delete the corresponding record from Ironic. If you try to enroll
73 | the same host again, you will see the following error:
74 |
75 | ```text
76 | Normal RegistrationError 4m36s metal3-baremetal-controller MAC address 11:22:33:44:55:66 conflicts with existing node namespace~name
77 | ```
78 |
79 | Currently, the only way to get rid of this error is to re-create the Ironic's
80 | internal database. If your deployment uses SQLite (the default), it is enough
81 | to restart the pod with Ironic. If you use MariaDB, you need to restart its
82 | pod, clearing any persistent volumes.
83 |
84 | ## Power requests are issued for deleted hosts
85 |
86 | Similarly to the previous question, a host is not deleted from Ironic in case
87 | of a forced deletion of its BareMetalHost object. If valid BMC credentials were
88 | provided, Ironic will keep checking the power state of the host and enforcing
89 | the last requested power state. The only solution is again to delete the
90 | Ironic's internal database.
91 |
--------------------------------------------------------------------------------
/docs/user-guide/theme/favicon.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/hack/markdownlint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # markdownlint-cli2 has config file(s) named .markdownlint-cli2.yaml in the repo
3 |
4 | set -eux
5 |
6 | IS_CONTAINER="${IS_CONTAINER:-false}"
7 | CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-podman}"
8 |
9 | # all md files, but ignore .github
10 | if [ "${IS_CONTAINER}" != "false" ]; then
11 | markdownlint-cli2 "**/*.md" "#.github"
12 | else
13 | "${CONTAINER_RUNTIME}" run --rm \
14 | --env IS_CONTAINER=TRUE \
15 | --volume "${PWD}:/workdir:ro,z" \
16 | --entrypoint sh \
17 | --workdir /workdir \
18 | docker.io/pipelinecomponents/markdownlint-cli2:0.12.0@sha256:a3977fba9814f10d33a1d69ae607dc808e7a6470b2ba03e84c17193c0791aac0 \
19 | /workdir/hack/markdownlint.sh "$@"
20 | fi
21 |
--------------------------------------------------------------------------------
/hack/shellcheck.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -eux
4 |
5 | IS_CONTAINER="${IS_CONTAINER:-false}"
6 | CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-podman}"
7 |
8 | if [ "${IS_CONTAINER}" != "false" ]; then
9 | TOP_DIR="${1:-.}"
10 | find "${TOP_DIR}" -name '*.sh' -type f -exec shellcheck -s bash {} \+
11 | else
12 | "${CONTAINER_RUNTIME}" run --rm \
13 | --env IS_CONTAINER=TRUE \
14 | --volume "${PWD}:/workdir:ro,z" \
15 | --entrypoint sh \
16 | --workdir /workdir \
17 | docker.io/koalaman/shellcheck-alpine:v0.10.0@sha256:5921d946dac740cbeec2fb1c898747b6105e585130cc7f0602eec9a10f7ddb63 \
18 | /workdir/hack/shellcheck.sh "$@"
19 | fi
20 |
--------------------------------------------------------------------------------
/hack/spellcheck.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Spelling errors detected in markdown files.
4 | # If the errors are names, external links, or unusual but accurate technical words,
5 | # then you should create an inline comment like:
6 | #
7 | #
8 | #
9 | # Of course, you should only include non-dictionary words that are correctly spelled!
10 | # If the error happens because of a common technical term or proper name that is likely
11 | # to appear many times, then please edit "../.cspell-config.json" and add it to the
12 | # "words" list.
13 | # shellcheck disable=SC2292
14 |
15 | set -eux
16 |
17 | IS_CONTAINER="${IS_CONTAINER:-false}"
18 | CONTAINER_RUNTIME="${CONTAINER_RUNTIME:-podman}"
19 | WORKDIR="${WORKDIR:-/workdir}"
20 |
21 | # all md files, but ignore .github and node_modules
22 | if [ "${IS_CONTAINER}" != "false" ]; then
23 | cspell-cli --show-suggestions -c .cspell-config.json -- "./**/*.md"
24 | else
25 | "${CONTAINER_RUNTIME}" run --rm \
26 | --env IS_CONTAINER=TRUE \
27 | --volume "${PWD}:${WORKDIR}:ro,z" \
28 | --entrypoint sh \
29 | --workdir "${WORKDIR}" \
30 | ghcr.io/streetsidesoftware/cspell:8.13.3@sha256:03df0e485775a43531c9c0e829227f39b3380796e92faab4166137dc5712d40a \
31 | "${WORKDIR}"/hack/spellcheck.sh "$@"
32 | fi
33 |
--------------------------------------------------------------------------------
/hack/tools/go.mod:
--------------------------------------------------------------------------------
1 | module metal3-io/metal3-docs/hack/tools
2 |
3 | go 1.21
4 |
5 | require (
6 | github.com/blang/semver v3.5.1+incompatible
7 | sigs.k8s.io/kubebuilder/docs/book/utils v0.0.0-20240216033807-8afeb403549f
8 | )
9 |
--------------------------------------------------------------------------------
/hack/tools/go.sum:
--------------------------------------------------------------------------------
1 | github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ=
2 | github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
3 | sigs.k8s.io/kubebuilder/docs/book/utils v0.0.0-20240216033807-8afeb403549f h1:0rbnCuTF/IIxfwoJR/p7zTB5+AA0RaBwB0lqjBUs/28=
4 | sigs.k8s.io/kubebuilder/docs/book/utils v0.0.0-20240216033807-8afeb403549f/go.mod h1:4CGoZGcqb7Bes5d0qgb4SIHqk+XjUfoxesbbTmpVl6s=
5 |
--------------------------------------------------------------------------------
/hack/tools/releasetags/releasetags.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "io"
6 | "log"
7 | "net/http"
8 | "os"
9 | "reflect"
10 | "strings"
11 |
12 | "github.com/blang/semver"
13 | "sigs.k8s.io/kubebuilder/docs/book/utils/plugin"
14 | )
15 |
16 | type ReleaseTag struct{}
17 |
18 | // SupportsOutput checks if the given plugin supports the given output format.
19 | func (ReleaseTag) SupportsOutput(_ string) bool { return true }
20 |
21 | // Process modifies the book in the input, which gets returned as the result of the plugin.
22 | func (l ReleaseTag) Process(input *plugin.Input) error {
23 | return plugin.EachCommand(&input.Book, "releasetag", func(chapter *plugin.BookChapter, args string) (string, error) {
24 | parsedVersions := semver.Versions{}
25 | var repo, owner string
26 | var found bool
27 |
28 | markers := reflect.StructTag(strings.TrimSpace(args))
29 |
30 | if repo, found = markers.Lookup("repo"); !found {
31 | return "", fmt.Errorf("releasetag requires tag \"repo\" to be set")
32 | }
33 |
34 | if owner, found = markers.Lookup("owner"); !found {
35 | return "", fmt.Errorf("releasetag requires tag \"owner\" to be set")
36 | }
37 |
38 | response, err := http.Get("https://proxy.golang.org/github.com/" + owner + "/" + repo + "/@v/list")
39 | if err != nil {
40 | log.Fatalln(err)
41 | }
42 |
43 | body, err := io.ReadAll(response.Body)
44 | if err != nil {
45 | log.Fatalln(err)
46 | }
47 |
48 | for _, s := range strings.Split(string(body), "\n") {
49 | if strings.Contains(s, "-") {
50 | continue
51 | }
52 | parsedVersion, err := semver.ParseTolerant(s)
53 | if err != nil {
54 | // Discard releases with tags that are not a valid semantic versions
55 | continue
56 | }
57 | parsedVersions = append(parsedVersions, parsedVersion)
58 | }
59 |
60 | var picked semver.Version
61 | for i, tag := range parsedVersions {
62 | if tag.GT(picked) {
63 | picked = parsedVersions[i]
64 | }
65 | }
66 |
67 | return fmt.Sprintf(":v%s", picked), nil
68 | })
69 | }
70 |
71 | func main() {
72 | cfg := ReleaseTag{}
73 | if err := plugin.Run(cfg, os.Stdin, os.Stdout, os.Args[1:]...); err != nil {
74 | log.Fatal(err.Error())
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/images/high-level-arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/images/high-level-arch.png
--------------------------------------------------------------------------------
/images/metal3-.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/images/metal3-banner.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/images/metal3-banner.pdf
--------------------------------------------------------------------------------
/images/metal3-black.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/images/metal3-black.png
--------------------------------------------------------------------------------
/images/metal3-dev-env-transparent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/images/metal3-dev-env-transparent.png
--------------------------------------------------------------------------------
/images/metal3-dev-env.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/images/metal3-dev-env.png
--------------------------------------------------------------------------------
/images/metal3-website-sticker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/images/metal3-website-sticker.png
--------------------------------------------------------------------------------
/images/metal3-white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/images/metal3-white.png
--------------------------------------------------------------------------------
/images/metal3-white.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/images/metal3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/images/metal3.png
--------------------------------------------------------------------------------
/images/metal3.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/images/metal3_facet-black-text.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/images/metal3_facet-black-text.png
--------------------------------------------------------------------------------
/images/metal3_facet-whitetext.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/metal3-io/metal3-docs/e7ae8383c74127356177e76ba0f21a984129bf50/images/metal3_facet-whitetext.png
--------------------------------------------------------------------------------
/netlify.toml:
--------------------------------------------------------------------------------
1 | # Netlify settings
2 | [build]
3 | command = "make netlify-build"
4 | publish = "docs/user-guide/book"
5 |
6 | [build.environment]
7 | GO_VERSION = "1.22.2"
--------------------------------------------------------------------------------
/processes/managing-reviewers.md:
--------------------------------------------------------------------------------
1 | # Managing Reviewers
2 |
3 | ## Status
4 |
5 | implementable
6 |
7 | ## Summary
8 |
9 | After the migration to use OWNERS files to manage reviewers, it is
10 | easier for us to add reviewers to separate repositories. This document
11 | describes the process for adding reviewers to a metal3 project repo.
12 |
13 | ### Goals
14 |
15 | 1. Describe a process for adding reviewers.
16 | 2. Keep the process light-weight.
17 |
18 | ### Non-Goals
19 |
20 | 1. Change the process for adding [maintainers](https://github.com/metal3-io/community/blob/main/maintainers/).
21 |
22 | ## Proposal
23 |
24 | Anyone can propose a patch to update an OWNERS file in a repository to
25 | add a reviewer. The patch should be submitted as a standalone PR,
26 | rather than being linked to any other contribution.
27 |
28 | The reviewer list for each repository will be pruned over time to
29 | remove contributors who are inactive.
30 |
31 | Reviewers may also be removed for behaving in a manner that other
32 | maintainers view as detrimental to the project, following the process
33 | described for maintainers in [Revoking Approval
34 | Access](https://github.com/metal3-io/community/blob/main/maintainers/README.md#revoking-approval-access).
35 |
36 | Pull requests to add or remove reviewers from OWNERS files should be
37 | approved using the same policy as other changes: One person with
38 | approval permission and another with at least reviewer permission must
39 | accept the PR.
40 |
41 | ### Risks and Mitigations
42 |
43 | Ideally new reviewers will have already contributed to the project,
44 | either through code, documentation, reviews, or design
45 | discussions. New contributors can be added to new repositories if they
46 | are helping to launch a new sub-component.
47 |
48 | ### Dependencies
49 |
50 | - [reviewer-permissions-migration](reviewer-permissions-migration.md)
51 |
--------------------------------------------------------------------------------
/processes/reviewer-permissions-migration.md:
--------------------------------------------------------------------------------
1 | # reviewer-permissions-migration
2 |
3 | ## Status
4 |
5 | implementable
6 |
7 | ## Summary
8 |
9 | We should use the OWNERS files in each repository to manage the list
10 | of reviewers, instead of relying on the Github organization
11 | membership.
12 |
13 | ## Motivation
14 |
15 | As the metal3 community expands, we are inevitably going to find that
16 | teams of people focus on different areas and different components. For
17 | example, we recently added the hardware-classification-controller,
18 | which is managed by some existing as well as new contributors. One of
19 | the things we have to balance as we add new contributors is the trust
20 | we extend and the obligations we place on them. I think we have grown
21 | to a point where we need to change how we do that.
22 |
23 | We have been using Github org membership as the way to indicate who
24 | has permission to use /lgtm as part of approving
25 | patches. Unfortunately, that extends to any repository in the org,
26 | which means we have to trust someone quite a lot before we invite them
27 | to be an org member. As we grow, this becomes more difficult to do
28 | with blanket permissions across all of our repositories.
29 |
30 | ### Goals
31 |
32 | 1. Transition from github org membership to OWNERS files for reviewer
33 | permissions.
34 |
35 | ### Non-Goals
36 |
37 | 1. Define a new process for approving reviewers.
38 | 2. Change the process for approving approvers.
39 | 3. Change the permissions for managing the CI infrastructure.
40 |
41 | ## Proposal
42 |
43 | Given the new repositories and teams, I think we should shift as much
44 | as possible to using the OWNERS files in repositories, so that our
45 | teams can manage the list of reviewers in each repository
46 | independently. This will mean that the OWNERS file will manage
47 | permissions for /lgtm as well as /approve.
48 |
49 | ### Implementation Details/Notes/Constraints
50 |
51 | In order to make the change, we need to review the OWNERS file(s) in
52 | each repository and update them to include a list of reviewers. We
53 | have [a process for approval permission](../maintainers) but that does
54 | not apply to reviewers. We should give reviewer permission more easily
55 | than approver permission, as a way to grow our teams without friction,
56 | so we're going to want to have a separate process for that.
57 |
58 | If we focus on the transition for now, we can define that process
59 | separately later. So, I propose that we take the contributors with 10
60 | or more commits according to github’s contribution list (via the page
61 | like
62 | )
63 | as the initial set of reviewers for each repo. That will allow us to
64 | complete the migration and we can expand the list further afterwards.
65 |
66 | After we agree on this process, I will propose PRs to each repo to add
67 | reviewers to the owners files. When we have merged those PRs, we can
68 | change Prow’s configuration to have it use the OWNERS file instead of
69 | github org membership for /lgtm permissions. We should also update the
70 | maintainers process document to include instructions for managing the
71 | list of org members and for managing the reviewer list for a repo.
72 |
73 | ### Risks and Mitigations
74 |
75 | If we miss a repository, the list of approvers for that repository
76 | will also have reviewer permission so we can still merge patches.
77 |
78 | ### Work Items
79 |
80 | - Add reviewers to OWNERS files in the root of all repositories
81 | - Update Prow configuration to look for reviewers in the OWNERS files
82 |
83 | ## Alternatives
84 |
85 | This change will not affect the commands to the Jenkins integration
86 | jobs managed by Ericsson, like /test-integration. That tool chain only
87 | looks at org membership, so all members of the organization will still
88 | be able to trigger the Jenkins integration tests. This is however not
89 | the case for the metal3-io/project-infra repository where only a
90 | subset of people can trigger the Jenkins tests due to the sensitivity
91 | of the information available (such as Github tokens).
92 |
--------------------------------------------------------------------------------
/processes/roadmap.md:
--------------------------------------------------------------------------------
1 | # Metal3 Roadmap
2 |
3 | The Metal3 Roadmap is maintained as a Github project and can be found
4 | [here](https://github.com/orgs/metal3-io/projects/2).
5 |
6 | ## Description
7 |
8 | Each column in the project represents the work items for a specific release of
9 | either Baremetal Operator or Cluster API Provider Metal3. In addition there is
10 | a `Feature requests` column that contains items that have not yet been
11 | accepted and the `Backlog` column that contains items that have been accepted
12 | but not yet planned for a specific release.
13 |
14 | An issue can be planned for a specific release if someone volunteers to take
15 | ownership of the feature. The owner will then be assigned the issue. An owner
16 | does not have to carry the whole design and implementation processes on her own
17 | but must instead make sure that the feature is being worked on and will be
18 | completed by the planned release date.
19 |
20 | ## Proposing a feature
21 |
22 | Proposing a new feature for a specific release of one of the components is done
23 | by opening an issue in the metal3-docs repository, describing the feature and
24 | which components and release are targeted. The new issue will automatically
25 | appear in the feature request column of the roadmap.
26 |
27 | ## Updating the Roadmap
28 |
29 | Updating the roadmap is done during a community meeting, with a discussion
30 | within the members of the projects, alternatively through an email thread.
31 | The update is performed by one of the approvers of the metal3-docs project.
32 |
33 | A new feature proposal is moved from the `feature requests` column to a
34 | component release column if agreed within the community, and a member
35 | volunteers to take ownership of that feature. If a feature is seen as
36 | necessary in the long-term without being planned for the releases defined,
37 | it is then placed in the `Backlog` column. An issue from the `backlog` can be
38 | moved to a specific release when someone volunteers to take ownership of the
39 | issue.
40 |
41 | An inactive issue in one of the releases (marked as stale) can be moved back to
42 | the `Backlog` column, and issues in the `feature requests` column that are not
43 | actual feature proposals but issues related to metal3-docs repository can be
44 | removed from the project.
45 |
--------------------------------------------------------------------------------
/processes/triage.md:
--------------------------------------------------------------------------------
1 | # Title
2 |
3 | Metal3 Issue Triage Process
4 |
5 | ## Status
6 |
7 | provisional
8 |
9 | ## Summary
10 |
11 | In order to ensure that issues reported by Metal3 users are reviewed on
12 | a consistent basis, we should meet on a regular schedule in a live
13 | meeting to review newly submitted issues, and on some recurring basis
14 | look at potentially stale issues for consideration whether it should be
15 | closed, increase priority, etc.
16 |
17 | ## Proposal
18 |
19 | During the triage process, the moderator should go through each of the
20 | subcategories listed below and apply the process to each issue.
21 |
22 | ### New Issue Triage
23 |
24 | [GitHub Search
25 | Query](https://github.com/issues?utf8=%E2%9C%93&q=archived%3Afalse+user%3Ametal3-io+no%3Alabel+is%3Aissue+sort%3Acreated-asc+is%3Aopen):
26 | `archived:false user:metal3-io no:label is:issue sort:created-asc
27 | is:open`
28 |
29 | - Evaluate if the issue is still relevant.
30 | - If not, close the issue.
31 | - Determine the kind, and apply the right label. For example: bug, feature, etc.
32 | - Make a best guess at priority, if the issue isn't actively being
33 | worked on
34 | - If needed, ask for more information from the reporter or a
35 | developer. Label this issue `priority/awaiting-evidence`.
36 | - Mark trivial issues as `good first issue`
37 |
38 | ### Awaiting Evidence
39 |
40 | [GitHub Search
41 | Query](https://github.com/issues?utf8=%E2%9C%93&q=archived%3Afalse+user%3Ametal3-io+is%3Aissue+sort%3Acreated-asc+is%3Aopen+label%3Apriority%2Fawaiting-more-evidence):`archived:false
42 | user:metal3-io is:issue sort:created-asc is:open
43 | label:priority/awaiting-more-evidence`
44 |
45 | - Review if the required evidence has been provided, if so, change the
46 | priority/kind as needed, or close the issue if resolved.
47 |
48 | ### Stale Issues
49 |
50 | [GitHub Search
51 | Query](https://github.com/issues?q=archived%3Afalse+user%3Ametal3-io+is%3Aissue+sort%3Acreated-asc+is%3Aopen+label%3Alifecycle%2Fstale):
52 | `archived:false user:metal3-io is:issue sort:created-asc is:open
53 | label:lifecycle/stale`
54 |
55 | - There are periodic jobs in Prow that mark issues stale after 90 days of
56 | inactivity.
57 | - After 30 additional days of inactivity, issues will be closed.
58 | - Every other triage (e.g. once per 30 days), the stale issues should
59 | be reviewed to ensure that no critical issues we want to keep open
60 | would be closed.
61 |
--------------------------------------------------------------------------------