├── meta
└── runtime.yml
├── .gitignore
├── roles
├── bf2_boot
│ ├── templates
│ │ └── bf.cfg.j2
│ ├── README.md
│ └── tasks
│ │ └── main.yml
├── install_cuda
│ ├── files
│ │ ├── 7fa2af80.gpg
│ │ ├── A024F6F0E6D6A281.gpg
│ │ ├── A4B469963BF863CC.gpg
│ │ ├── F60F4B3D7FA2AF80.gpg
│ │ ├── cuda-repository-pin-600
│ │ └── libnvidia-container.pub.pem
│ ├── templates
│ │ └── cuda-repo.list.j2
│ ├── tasks
│ │ ├── deb_network.yml
│ │ ├── add_mirror_repo.yml
│ │ ├── apt_common.yml
│ │ ├── libnvidia_container.yml
│ │ └── main.yml
│ └── README.md
├── manage_bf2_nic_speed
│ ├── templates
│ │ └── 83-net-speed.rules.j2
│ ├── README.md
│ └── tasks
│ │ └── main.yml
├── prepare_cuda_repo
│ ├── tasks
│ │ ├── main.yml
│ │ ├── get_installer.yml
│ │ └── check_vars.yml
│ └── README.md
├── manage_bf_bmc_fw
│ ├── README.md
│ └── tasks
│ │ └── main.yml
├── force_reboot_armos
│ ├── README.md
│ └── tasks
│ │ └── main.yml
├── install_doca
│ ├── README.md
│ ├── defaults
│ │ └── main.yml
│ └── tasks
│ │ └── main.yml
├── manage_rshim_owner
│ ├── README.md
│ └── tasks
│ │ ├── main.yaml
│ │ └── change_owner.yaml
├── manage_bf2_fw
│ ├── README.md
│ ├── defaults
│ │ └── main.yml
│ └── tasks
│ │ └── main.yml
├── load_bfb
│ ├── templates
│ │ ├── bf_ubuntu.cfg.j2
│ │ └── bf.cfg.j2
│ ├── defaults
│ │ └── main.yml
│ ├── README.md
│ └── tasks
│ │ └── main.yml
├── bf_bmc
│ ├── defaults
│ │ └── main.yml
│ ├── README.md
│ └── tasks
│ │ ├── main.yml
│ │ ├── chassis_power_on.yaml
│ │ ├── chassis_power_off.yaml
│ │ └── powercycle.yml
├── dpu_nvconfig
│ ├── tasks
│ │ ├── run_mlxconfig.yml
│ │ ├── set_embedded_cpu_model.yml
│ │ ├── main.yml
│ │ ├── set_gpu_owner.yml
│ │ ├── nvset.yml
│ │ ├── set_link_type.yml
│ │ └── set_nic_mode.yml
│ └── README.md
└── bf2_mode
│ ├── defaults
│ └── main.yml
│ ├── README.md
│ └── tasks
│ ├── main.yml
│ ├── security.yml
│ └── ownership.yml
├── ansible.cfg
├── .ansible-lint
├── README.md
├── plugins
├── README.md
├── filter
│ └── rshim_filter.py
├── action
│ ├── raw_reboot.py
│ └── raw_upgrade.py
└── modules
│ ├── bf2_facts.py
│ └── bf2_facts_test.py
├── LICENSE
└── galaxy.yml
/meta/runtime.yml:
--------------------------------------------------------------------------------
1 | requires_ansible: ">=2.9,<2.12.0"
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | nvidia-dpu_ops-*.tar.gz
2 | **/__pycache__
3 | .cache
4 |
--------------------------------------------------------------------------------
/roles/bf2_boot/templates/bf.cfg.j2:
--------------------------------------------------------------------------------
1 | BOOT0={{ pxe_boot_dev }}
2 | BOOT1=DISK
3 |
--------------------------------------------------------------------------------
/roles/install_cuda/files/7fa2af80.gpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/ansible-collection-dpu-ops/main/roles/install_cuda/files/7fa2af80.gpg
--------------------------------------------------------------------------------
/ansible.cfg:
--------------------------------------------------------------------------------
1 | [defaults]
2 | roles_path = roles
3 | library = plugins/modules/
4 | action_plugins = plugins/action/
5 | filter_plugins = plugins/modules/
6 |
--------------------------------------------------------------------------------
/roles/install_cuda/files/A024F6F0E6D6A281.gpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/ansible-collection-dpu-ops/main/roles/install_cuda/files/A024F6F0E6D6A281.gpg
--------------------------------------------------------------------------------
/roles/install_cuda/files/A4B469963BF863CC.gpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/ansible-collection-dpu-ops/main/roles/install_cuda/files/A4B469963BF863CC.gpg
--------------------------------------------------------------------------------
/roles/install_cuda/files/F60F4B3D7FA2AF80.gpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/ansible-collection-dpu-ops/main/roles/install_cuda/files/F60F4B3D7FA2AF80.gpg
--------------------------------------------------------------------------------
/roles/install_cuda/templates/cuda-repo.list.j2:
--------------------------------------------------------------------------------
1 | deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu{{ c_ver }}/{{ c_arch }}/ /
2 | # deb-src https://developer.download.nvidia.com/compute/cuda/repos/ubuntu{{ c_ver }}/{{ c_arch }}/ /
3 |
--------------------------------------------------------------------------------
/roles/manage_bf2_nic_speed/templates/83-net-speed.rules.j2:
--------------------------------------------------------------------------------
1 | SUBSYSTEM=="net", ACTION=="add", NAME=="p0", RUN+="/sbin/ethtool -s p0 {{ p0_nic_speed_options }}"
2 | SUBSYSTEM=="net", ACTION=="add", NAME=="p1", RUN+="/sbin/ethtool -s p1 {{ p1_nic_speed_options }}"
3 |
--------------------------------------------------------------------------------
/roles/prepare_cuda_repo/tasks/main.yml:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | ---
3 | - name: Check vars
4 | include_tasks: check_vars.yml
5 |
6 | - name: Get local installer
7 | include_tasks: get_installer.yml
8 |
--------------------------------------------------------------------------------
/roles/install_cuda/files/cuda-repository-pin-600:
--------------------------------------------------------------------------------
1 | # https://help.ubuntu.com/community/PinningHowto
2 | Package: nsight-compute
3 | Pin: origin *ubuntu.com*
4 | Pin-Priority: -1
5 |
6 | Package: nsight-systems
7 | Pin: origin *ubuntu.com*
8 | Pin-Priority: -1
9 |
10 | Package: *
11 | Pin: release l=NVIDIA CUDA
12 | Pin-Priority: 600
13 |
14 |
--------------------------------------------------------------------------------
/roles/prepare_cuda_repo/README.md:
--------------------------------------------------------------------------------
1 | # Prepare CUDA local repository
2 |
3 | The `prepare_cuda_repo` role is used to download CUDA "DEB local" installer,
4 | unpack it on the "control plane" host line Foreman and set up corresponding data.
5 |
6 | This roles is used in conjunction with `install_cuda` role
7 |
8 | ## Parameters
9 |
10 | * `cuda_release` -- release of CUDA like `11.5.1`, `11.6.2`
11 | * `cuda_arch` -- CPU architecture to install. Allowed values are:
12 | * `amd64`
13 | * `arm64`
14 |
15 |
--------------------------------------------------------------------------------
/.ansible-lint:
--------------------------------------------------------------------------------
1 | exclude_paths:
2 | - ./collections/
3 | - ./.venv/
4 | - ./.cache
5 | - ./.git
6 |
7 | # https://github.com/ansible-community/ansible-lint/blob/master/src/ansiblelint/constants.py
8 | skip_list:
9 | - '204' # Lines should be no longer than 160
10 | - '301' # Commands should not change things if nothing needs
11 | - '302' # Using command rather than an argument to e.g.
12 | - '305' # Use shell only when shell functionality is required
13 | - '503' # Tasks that run when changed should likely be handlers
14 |
--------------------------------------------------------------------------------
/roles/manage_bf_bmc_fw/README.md:
--------------------------------------------------------------------------------
1 | # Update BMC firmware of DPU
2 |
3 | ## Paremeters
4 |
5 | * `bmc_url` -- URL of BMC firmware image
6 |
7 | ## Playbook examples
8 |
9 | manage-bf-bmc-fw.yaml
10 |
11 | ---
12 | - hosts: bmc
13 | user: "{{ remote_install_user }}"
14 | gather_facts: no # if using a bmc host, this will fail because ansible is not present
15 | become: true
16 | vars:
17 | bmc_url: "{{ foreman.foreman_mirror }}/{{ bmc.file }}"
18 | roles:
19 | - nvidia.dpu_ops.manage_bf_bmc_fw
20 |
21 |
22 |
--------------------------------------------------------------------------------
/roles/bf2_boot/README.md:
--------------------------------------------------------------------------------
1 | # DPU (BF2) Boot
2 | ## Parameters
3 |
4 | Ansible variable(s) to be defined:
5 |
6 | * `pxe_boot_dev` - name of device to boot DPU from in `/etc/bf.cfg` of the installer.
7 | Allowed values are:
8 | * `NET-OOB-IPV4`
9 | * `NET-NIC_P1-IPV4`
10 |
11 | ## Usage example
12 |
13 | bf2-boot-order.yml
14 |
15 | - hosts: bf2oob
16 | become: true
17 | user: "{{ remote_install_user }}"
18 | vars:
19 | pxe_boot_dev: "{{ bf2.pxe_boot_dev }}"
20 | roles:
21 | - nvidia.dpu_ops.bf2_boot
22 |
23 |
24 |
--------------------------------------------------------------------------------
/roles/force_reboot_armos/README.md:
--------------------------------------------------------------------------------
1 | # Force reboot of ARM OS of DPU
2 | The `force_reboot_armos` role is used to reboot "ARM OS" of DPU
3 | from x86 host side
4 |
5 | ## Parameters
6 |
7 | ## Playbook examples
8 |
9 | force_reboot_armos.yaml
10 |
11 | ---
12 | - hosts: all
13 | user: "{{ remote_install_user }}"
14 | gather_facts: no
15 | become: true
16 | pre_tasks:
17 | - name: set is_bmc
18 | set_fact:
19 | is_bmc: "{{ inventory_hostname.startswith('bmc') }}"
20 | roles:
21 | - nvidia.dpu_ops.force_reboot_armos
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/roles/manage_bf2_nic_speed/README.md:
--------------------------------------------------------------------------------
1 | # Set ethernet link speed of DPU ports
2 |
3 | ## Parameters
4 |
5 | * `p0_nic_speed_options` -- speed for port #1
6 | * `p1_nic_speed_options` -- speed for port #2
7 |
8 | ## Playbook examples
9 |
10 | manage-bf2-nic-speed.yaml
11 |
12 | ---
13 | - hosts: bf2oob
14 | user: "{{ remote_install_user }}"
15 | become: true
16 | vars:
17 | p0_nic_speed_options: "{{ bf2.p0_nic_speed_options }}"
18 | p1_nic_speed_options: "{{ bf2.p1_nic_speed_options }}"
19 | roles:
20 | - nvidia.dpu_ops.manage_bf2_nic_speed
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/roles/install_cuda/tasks/deb_network.yml:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | #
3 | # Setup CUDA '(deb) network' repo
4 | ---
5 | - name: copy cuda.list
6 | copy:
7 | content: |
8 | deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu{{ c_ver }}/{{ c_arch }}/ /
9 | dest: /etc/apt/sources.list.d/cuda.list
10 | owner: root
11 | group: root
12 | mode: '0644'
13 |
14 | # TODO
15 | # 1. run modprobe nvidia; nvidia-smi to check it wors
16 | # 2. pre-condition: GPU as PCI device is present
17 | # 3. Install # nvidia-container-runtime if required
18 | # https://nvidia.github.io/libnvidia-container/
19 |
--------------------------------------------------------------------------------
/roles/install_doca/README.md:
--------------------------------------------------------------------------------
1 | # Install DOCA
2 |
3 | The `install_doca` role is used to install [Nvidia DOCA SDK](https://developer.nvidia.com/networking/doca)
4 | on x86 host.
5 |
6 | ## Parameters
7 |
8 | * `doca.version` -- version of DOCA to install
9 | * `doca.package` -- name of RPM/DEB package with DOCA SDK to install on x86 machine
10 |
11 | The role couldn't parse HTML page to guess name of DEB/RPM package to install.
12 |
13 | ## Playbook examples
14 |
15 | manage-doca.yml
16 |
17 | ---
18 | - hosts: x86host
19 | user: "{{ remote_install_user }}"
20 | become: true
21 | vars:
22 | doca:
23 | version: 1.2.1
24 | package: doca-host-repo-ubuntu2004_1.2.1-0.1.5.1.2.006.5.5.2.1.7.0_amd64.deb
25 | roles:
26 | - nvidia.dpu_ops.install_doca
27 |
28 |
29 |
--------------------------------------------------------------------------------
/roles/manage_rshim_owner/README.md:
--------------------------------------------------------------------------------
1 | # Set RSHIM ownership
2 |
3 | The `manage_rshim_owner` role allows to set owner of DPU RSHIM interface.
4 | It could be either BMC or x86 host.
5 |
6 | ## Parameters
7 |
8 | * `bf_target` -- who is the owner of RSHIM: `bmc` or `x86`
9 |
10 | ## Playbook example
11 |
12 | bf2_mode.yml
13 |
14 | - hosts: "bmc"
15 | user: "{{ remote_install_user }}"
16 | become: true
17 | gather_facts: False
18 | vars:
19 | bf_target: "bmc" # internal variable for the non_bf2_host regex
20 | ansible_fqdn: "{{ inventory_hostname }}" # this hack is because facts are not gathered and the non_bf2_host uses it
21 | x86_host: "{{ non_bf2_host }}"
22 | bmc_host: "{{ inventory_hostname }}"
23 | roles:
24 | - nvidia.dpu_ops.manage_rshim_owner
25 |
26 |
27 |
--------------------------------------------------------------------------------
/roles/manage_bf2_fw/README.md:
--------------------------------------------------------------------------------
1 | # Update DPU NIC firmware
2 |
3 | The `manage_bf2_fw` roles updates NIC firmware of DPU and power-cycle the x86 host
4 |
5 | ## Playbook examples
6 |
7 | manage-bf2-fw.yaml
8 |
9 | ---
10 | - hosts: bf2oob
11 | user: "{{ remote_install_user }}"
12 | become: true
13 | vars:
14 | bmc_host: "{{ hostvars[non_bf2_host]['bmc_ip'] }}"
15 | bmc_user: "{{ hostvars[non_bf2_host]['bmc_user'] }}"
16 | bmc_password: "{{ hostvars[non_bf2_host]['bmc_password'] }}"
17 | run_on: "{{ groups['foreman'][0] }}"
18 | roles:
19 | - nvidia.dpu_ops.manage_bf2_fw
20 | post_tasks:
21 | - name: wait for machine to be back online
22 | wait_for:
23 | host: "{{ non_bf2_host }}"
24 | port: 22
25 | timeout: 900
26 | delay: 60
27 | delegate_to: "{{ groups['foreman'][0] }}"
28 |
29 |
30 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Ansible Collection - nvidia.dpu_ops
2 |
3 | The following is a collection of roles that can be used to administer NVIDIA DPU cards.
4 | It contains the following functions:
5 |
6 | * `bf_bmc` - Run arbitrary ipmitool commands on the BMC of a DPU
7 | * `bf2_boot` - Modify the boot order of a DPU
8 | * `bf2_mode` - Modify the security and ownership modes of a DPU
9 | * `force_reboot_armos` - Force reboot the DPU over rshim
10 | * `install_doca` - Install DOCA utilities
11 | * `load_bfb` - Load BFB and bf.cfg over rshim
12 | * `manage_bf_bmc_fw` - Upgrade the firmware of the BMC of the DPU
13 | * `manage_bf2_fw` - Upgrade the firmware of the DPU
14 | * `manage_bf2_nic_speed` - Change settings on the nic speed for a DPU
15 | * `manage_rshim_owner` - Change rshim ownership between a DPU and its host
16 | * `prepare_cuda_repo` - Prepare local repository of CUDA installer
17 | * `install_cuda` - Install CUDA on x86 or DPU
18 | * `dpu_nvconfig` - Set nvconfig parameters of DPU
19 |
--------------------------------------------------------------------------------
/plugins/README.md:
--------------------------------------------------------------------------------
1 | # Collections Plugins Directory
2 |
3 | This directory can be used to ship various plugins inside an Ansible collection. Each plugin is placed in a folder that
4 | is named after the type of plugin it is in. It can also include the `module_utils` and `modules` directory that
5 | would contain module utils and modules respectively.
6 |
7 | Here is an example directory of the majority of plugins currently supported by Ansible:
8 |
9 | ```
10 | └── plugins
11 | ├── action
12 | ├── become
13 | ├── cache
14 | ├── callback
15 | ├── cliconf
16 | ├── connection
17 | ├── filter
18 | ├── httpapi
19 | ├── inventory
20 | ├── lookup
21 | ├── module_utils
22 | ├── modules
23 | ├── netconf
24 | ├── shell
25 | ├── strategy
26 | ├── terminal
27 | ├── test
28 | └── vars
29 | ```
30 |
31 | A full list of plugin types can be found at [Working With Plugins](https://docs.ansible.com/ansible/2.10/plugins/plugins.html).
32 |
--------------------------------------------------------------------------------
/roles/load_bfb/templates/bf_ubuntu.cfg.j2:
--------------------------------------------------------------------------------
1 | {% extends "bf.cfg.j2" %}
2 |
3 | {% block preamble%}
4 | {% endblock %}
5 |
6 | {% block script_begin %}
7 | set +x
8 | {% endblock %}
9 |
10 | {% block cloudinit %}
11 | debug:
12 | verbose: true
13 | timezone: "Etc/UTC"
14 | hostname: {{ ansible_hostname }}
15 | manage_etc_hosts: true
16 | users:
17 | - name: ubuntu
18 | shell: /bin/bash
19 | sudo: ALL=(ALL) NOPASSWD:ALL
20 | lock_passwd: false
21 | passwd: {{ hashed_user_password }}
22 | groups: [adm, audio, cdrom, dialout, dip, floppy, lxd, netdev, plugdev, sudo, video]
23 | {% endblock %}
24 |
25 | {% block cloudinit_extra_commands %}
26 | # - [ systemctl, enable, rshim ]
27 | {% endblock %}
28 |
29 | {% block embedded_network %}
30 | {% endblock %}
31 |
32 | {% block separated_network %}
33 | {% endblock %}
34 |
35 | {% block ovs_config %}
36 | {% endblock %}
37 |
38 | {% block script_end %}
39 | {% endblock %}
40 |
41 | {% block postamble %}
42 | {% endblock %}
43 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2020 NVIDIA Corporation
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | this software and associated documentation files (the "Software"), to deal in
7 | the Software without restriction, including without limitation the rights to
8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/roles/manage_bf2_fw/defaults/main.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 |
26 | force: False
27 |
--------------------------------------------------------------------------------
/roles/load_bfb/defaults/main.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 |
26 | bfcfg_template: bf_ubuntu.cfg.j2
27 |
--------------------------------------------------------------------------------
/roles/bf_bmc/defaults/main.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 |
26 | run_on: localhost
27 | powercycle_delay: 60
28 |
--------------------------------------------------------------------------------
/roles/bf_bmc/README.md:
--------------------------------------------------------------------------------
1 | # BF BMC
2 | This role is used to manage power state of x86 or DPU using IPMI protocol
3 |
4 | ## Parameters
5 |
6 | The `main.yaml` tasks requires following parameter to be specified:
7 |
8 | * `bmc_action` -- IPMI command to execute on BMC
9 |
10 | ## Standalone tasks
11 |
12 | * `chassis_power_off.yaml`
13 | * `chassis_power_on.yaml`
14 | * `powercycle.yml`
15 |
16 | ## Playbook example
17 |
18 | bf2_mode.yml
19 |
20 | ---
21 | - hosts: "bmc"
22 | user: "{{ remote_install_user }}"
23 | become: true
24 | gather_facts: False
25 | vars:
26 | bmc_action: "chassis power cycle"
27 | bmc_host: "{{ inventory_hostname }}"
28 | bmc_user: "{{ ansible_user }}"
29 | bmc_password: "{{ ansible_password }}"
30 | run_on: "{{ groups['foreman'][0] }}"
31 | roles:
32 | - nvidia.dpu_ops.bf_bmc
33 |
34 |
35 |
36 | powercycle.yml
37 |
38 | ---
39 | - hosts: bf2oob
40 | user: "{{ remote_install_user }}"
41 | become: true
42 | gather_facts: true
43 |
44 | vars:
45 | bmc_host: "{{ hostvars[non_bf2_host]['bmc_ip'] }}"
46 | bmc_user: "{{ hostvars[non_bf2_host]['bmc_user'] }}"
47 | bmc_password: "{{ hostvars[non_bf2_host]['bmc_password'] }}"
48 | run_on: "{{ groups['foreman'][0] }}"
49 | tasks:
50 | - name: power-cycle x86 host
51 | include_role:
52 | name: nvidia.dpu_ops.bf_bmc
53 | tasks_from: powercycle.yml
54 |
55 |
56 |
--------------------------------------------------------------------------------
/roles/install_cuda/tasks/add_mirror_repo.yml:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | #
3 | # Setup CUDA '(deb) local' repo
4 | ---
5 | - name: Get md5sum.txt from local mirror
6 | uri:
7 | url: "{{ foreman.foreman_mirror }}/cuda/{{ cuda_release }}-{{ cuda_arch }}/md5sum.txt"
8 | return_content: true
9 | # failed_when: false
10 | register: md5
11 | - name: Check metadata status
12 | fail:
13 | msg: "No md5sum.txt found for CUDA {{ cuda_release }}-{{ cuda_arch }} in local mirror"
14 | when:
15 | - md5.status == 404
16 |
17 | - name: set dist_ver fact
18 | set_fact:
19 | dist_ver: "{{ ansible_distribution | lower }}{{ ansible_distribution_version | regex_replace('\\.', '') }}"
20 | # ubuntu2004, ubuntu1804, ...
21 | # cuda-repo-ubuntu2004-11-5-local_11.5.2-495.29.05-1_amd64.deb
22 | - name: get repo package pattern
23 | set_fact:
24 | pkg_pattern: 'cuda-repo-{{ dist_ver }}-.*_{{ cuda_arch }}.deb'
25 | - name: get repo package
26 | set_fact:
27 | pkg_name: "{{ md5.content.splitlines() | map('regex_search', pkg_pattern) |select('string') |list }}"
28 | failed_when: pkg_name |count != 1
29 | - name: get repo local fn
30 | set_fact:
31 | pkg_nn: "{{ pkg_name[0] | split('_') | first }}"
32 |
33 | - name: copy cuda.list
34 | copy:
35 | content: |
36 | deb {{ foreman.foreman_mirror }}/cuda/{{ cuda_release }}-{{ cuda_arch }}/var/{{ pkg_nn }} ./
37 | dest: /etc/apt/sources.list.d/cuda-repo.list
38 | owner: root
39 | group: root
40 | mode: '0644'
41 |
--------------------------------------------------------------------------------
/roles/dpu_nvconfig/tasks/run_mlxconfig.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # Copyright 2022 NVIDIA Corporation
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | # this software and associated documentation files (the "Software"), to deal in
7 | # the Software without restriction, including without limitation the rights to
8 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9 | # the Software, and to permit persons to whom the Software is furnished to do so,
10 | # subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | #
22 | ###############################################################################
23 | ---
24 | - name: mlxconfig set {{ set_mlxconfig }}
25 | ansible.builtin.shell: mlxconfig -d {{ bf2_devices[0].mst }} -y s {{ set_mlxconfig }}
26 | register: mlxconfig_set_link_type
27 | - name: set should_reboot
28 | set_fact:
29 | should_reboot: true
30 |
--------------------------------------------------------------------------------
/roles/dpu_nvconfig/tasks/set_embedded_cpu_model.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # Copyright 2022 NVIDIA Corporation
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | # this software and associated documentation files (the "Software"), to deal in
7 | # the Software without restriction, including without limitation the rights to
8 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9 | # the Software, and to permit persons to whom the Software is furnished to do so,
10 | # subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | #
22 | ###############################################################################
23 | ---
24 | - include: nvset.yml arg={{ item }}
25 | with_dict:
26 | - {INTERNAL_CPU_MODEL: 1}
27 | - name: run 'mlxconfig set'
28 | include_tasks: run_mlxconfig.yml
29 | when: set_mlxconfig | length > 0
30 | - name: unset fact
31 | set_fact:
32 | set_mlxconfig: ""
33 |
--------------------------------------------------------------------------------
/roles/bf2_boot/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 | ---
26 |
27 | - name: Create bf.cfg
28 | template:
29 | src: bf.cfg.j2
30 | dest: /etc/bf.cfg
31 | owner: root
32 | group: root
33 | mode: "0644"
34 |
35 | - name: run bfcfg
36 | command: /usr/bin/bfcfg
37 |
--------------------------------------------------------------------------------
/roles/bf2_mode/defaults/main.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 | mode_options: "--disable_rshim --disable_tracer --disable_counter_rd --disable_port_owner"
26 | should_reboot: False
27 |
28 | embedded_port:
29 | - enp3s0f0s0
30 | - enp3s0f1s0
31 | separated_port:
32 | - p0
33 | - p1
34 |
--------------------------------------------------------------------------------
/roles/install_doca/defaults/main.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 |
26 | distro_version_url: "{{ ansible_distribution |lower }}{{ ansible_distribution_version }}"
27 | doca_url: "https://linux.mellanox.com/public/repo/doca/{{ doca.version }}/{{ distro_version_url }}/amd64/{{ doca.package }}"
28 |
--------------------------------------------------------------------------------
/roles/bf_bmc/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 | ---
26 | - name: ipmitool command
27 | command:
28 | ipmitool -I lanplus -H {{ bmc_host }} -U {{ bmc_user }} -P {{ bmc_password }} {{ bmc_action }}
29 | register: bmc_output
30 | delegate_to: "{{ run_on }}"
31 |
32 | - name: ipmitool output
33 | debug:
34 | var: bmc_output.stdout_lines
35 |
--------------------------------------------------------------------------------
/plugins/filter/rshim_filter.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: MIT
3 | #
4 | # Permission is hereby granted, free of charge, to any person obtaining a
5 | # copy of this software and associated documentation files (the "Software"),
6 | # to deal in the Software without restriction, including without limitation
7 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 | # and/or sell copies of the Software, and to permit persons to whom the
9 | # Software is furnished to do so, subject to the following conditions:
10 | #
11 | # The above copyright notice and this permission notice shall be included in
12 | # all copies or substantial portions of the Software.
13 | #
14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 | # DEALINGS IN THE SOFTWARE.
21 | #
22 | ###############################################################################
23 | r'''
24 | `get_rshim` filter implementation
25 | '''
26 | def get_rshim(bf2_devices, rshim):
27 | "returns list of rshim devices"
28 | return [f for f in bf2_devices if f['rshim'] == rshim]
29 |
30 |
31 | class FilterModule:
32 | """Ansible filter `get_rshim`"""
33 | def filters(self):
34 | 'return dict pointing at function'
35 | return {'get_rshim': get_rshim,}
36 |
--------------------------------------------------------------------------------
/roles/bf_bmc/tasks/chassis_power_on.yaml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 | ---
26 | - name: ipmitool command
27 | command:
28 | ipmitool -I lanplus -H {{ bmc_host }} -U {{ bmc_user }} -P {{ bmc_password }} chassis power on
29 | register: bmc_output
30 | delegate_to: "{{ run_on }}"
31 |
32 | - name: ipmitool output
33 | debug:
34 | var: bmc_output.stdout_lines
35 |
36 |
--------------------------------------------------------------------------------
/roles/bf_bmc/tasks/chassis_power_off.yaml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 | ---
26 | - name: ipmitool command
27 | command:
28 | ipmitool -I lanplus -H {{ bmc_host }} -U {{ bmc_user }} -P {{ bmc_password }} chassis power off
29 | register: bmc_output
30 | delegate_to: "{{ run_on }}"
31 |
32 | - name: ipmitool output
33 | debug:
34 | var: bmc_output.stdout_lines
35 |
36 |
--------------------------------------------------------------------------------
/roles/install_cuda/tasks/apt_common.yml:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | #
3 | # https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=arm64-sbsa&Compilation=Native&Distribution=Ubuntu&target_version=20.04&target_type=deb_network
4 | ---
5 | - name: c_arch for aarch64
6 | set_fact:
7 | c_arch: sbsa
8 | c_ver: "{{ ansible_distribution_version | regex_replace('\\.', '') }}"
9 | when: ansible_architecture == 'aarch64'
10 |
11 | - name: c_arch for x86_64
12 | set_fact:
13 | c_arch: x86_64
14 | c_ver: "{{ ansible_distribution_version | regex_replace('\\.', '') }}"
15 | when: ansible_architecture == 'x86_64'
16 |
17 | # A024F6F0E6D6A281: Mellanox Technologies (Mellanox Technologies - Signing Key v3)
18 | # F60F4B3D7FA2AF80: cudatools 0 else 'x86' }}"
34 |
35 | - name: change ownership
36 | include_tasks: change_owner.yaml
37 |
--------------------------------------------------------------------------------
/roles/bf2_mode/README.md:
--------------------------------------------------------------------------------
1 | # DPU (BF2) Mode
2 |
3 | The `bf2_mode` role is used to:
4 | 1. set restricted mode and block the host from accessing the DPU or grant the access
5 | 2. change an "ownernership", actually switch DPU NIC mode between "separated host" and "smartnic"
6 |
7 | For more information about the DPU modes of operation, see the
8 | [NVIDIA Mellanox BlueField DPU SW Modes of Operation](https://docs.nvidia.com/networking/display/BlueFieldSWv35111601/Modes+of+Operation#ModesofOperation-SeparatedHost) page.
9 |
10 | ## Parameters
11 |
12 | Ansible variable(s) to be defined:
13 |
14 | * `new_bf_mode` - is used set restricted mode and block the host from accessing the DPU
15 | Allowed values are:
16 | * `privileged`
17 | * `restricted`
18 |
19 | * `new_bf_ownership` - the DPU may be placed in either separated or embedded ownership mode.
20 | Allowed values are:
21 | * `SEPARATED_HOST`
22 | * `EMBEDDED_CPU`
23 |
24 | ## Playbook examples
25 |
26 | bf2_mode.yml
27 |
28 | ---
29 | - hosts: bf2oob
30 | user: "{{ remote_install_user }}"
31 | become: true
32 | pre_tasks:
33 | - name: Check for required variables
34 | fail:
35 | msg: "Invalid security mode, new_bf_mode should either be restricted or privileged"
36 | when: new_bf_mode not in bf2.security_modes
37 | vars:
38 | bmc_host: "{{ hostvars[non_bf2_host]['bmc_ip'] }}"
39 | bmc_user: "{{ hostvars[non_bf2_host]['bmc_user'] }}"
40 | bmc_password: "{{ hostvars[non_bf2_host]['bmc_password'] }}"
41 | run_on: "{{ groups['foreman'][0] }}"
42 | roles:
43 | - nvidia.dpu_ops.bf2_mode
44 | post_tasks:
45 | - name: wait for machine to be back online
46 | wait_for:
47 | host: "{{ non_bf2_host }}"
48 | port: 22
49 | timeout: 900
50 | delay: 60
51 | delegate_to: "{{ groups['foreman'][0] }}"
52 |
53 |
54 |
--------------------------------------------------------------------------------
/roles/dpu_nvconfig/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # Copyright 2022 NVIDIA Corporation
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | # this software and associated documentation files (the "Software"), to deal in
7 | # the Software without restriction, including without limitation the rights to
8 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9 | # the Software, and to permit persons to whom the Software is furnished to do so,
10 | # subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | #
22 | ###############################################################################
23 | - name: get facts
24 | bf2_facts:
25 | when: bf2_devices is not defined
26 | # bf2[0] since the dpu card only has 1 entry
27 |
28 | - name: Change port link type
29 | include_tasks: set_link_type.yml
30 |
31 | - name: "Change NIC mode ({{ dpu_nic_mode }})"
32 | include_tasks: set_nic_mode.yml
33 | when: dpu_nic_mode is defined
34 |
35 | - name: "Change GPU owner ({{ gpu_owner }})"
36 | include_tasks: set_gpu_owner.yml
37 | when: gpu_owner is defined
38 |
39 | - name: Run 'mlxconfig set'
40 | include_tasks: run_mlxconfig.yml
41 | when: set_mlxconfig | length > 0
42 |
43 |
--------------------------------------------------------------------------------
/roles/bf_bmc/tasks/powercycle.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 | ---
26 | - name: turn x86 host off
27 | include_tasks: chassis_power_off.yaml
28 | - name: Sleep for 1 minute to ensure power off
29 | pause:
30 | seconds: "{{ powercycle_delay }}"
31 | - name: turn x86 host on
32 | include_tasks: chassis_power_on.yaml
33 | - name: wait for machine to be back online
34 | wait_for:
35 | host: "{{ non_bf2_host }}"
36 | port: 22
37 | timeout: 900
38 | delay: "{{ powercycle_delay }}"
39 | delegate_to: "{{ groups['foreman'][0] }}"
40 |
--------------------------------------------------------------------------------
/roles/prepare_cuda_repo/tasks/get_installer.yml:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | ---
3 | - name: get repo local fn
4 | set_fact:
5 | pkg_fn: "/var/www/cuda/{{ pkg_name[0] }}"
6 | pkg_nn: "{{ pkg_name[0] | split('_') | first }}"
7 | dst_dir: "/var/www/cuda/{{ cuda_release }}-{{ cuda_arch }}"
8 | - name: set md5sum_fn fact
9 | set_fact:
10 | md5sum_fn: "{{ dst_dir }}/md5sum.txt"
11 |
12 | # - name: check local copy of md5sum.txt
13 | # stat:
14 | # path: "{{ md5sum_fn }}"
15 | # register: md5_st
16 |
17 | - name: prepare local
18 | block:
19 | # prepare destination folders
20 | - name: mkdir
21 | file:
22 | path: "{{ item }}"
23 | state: directory
24 | mode: '0755'
25 | owner: root
26 | group: root
27 | with_items:
28 | - /var/www/cuda
29 | - "{{ dst_dir }}"
30 | - name: copy store md5sum.txt
31 | copy:
32 | content: "{{ md5.content }}"
33 | dest: "{{ md5sum_fn }}"
34 | mode: '0644'
35 | owner: root
36 | group: root
37 | # when: not md5_st.stat.exists
38 |
39 | - name: download CUDA local installer
40 | get_url:
41 | url: "https://developer.download.nvidia.com/compute/cuda/{{ cuda_release }}/local_installers/{{ pkg_name[0] }}"
42 | dest: "{{ pkg_fn }}"
43 | register: pkg
44 | # when: not (pkg_st.stat.islnk is defined)
45 |
46 | - name: check unpacked dir
47 | stat:
48 | path: "{{ dst_dir }}/var/{{ pkg_nn }}"
49 | register: var_st
50 | - name: Unpack local installer
51 | shell: |
52 | dpkg --unpack --force-architecture --instdir={{ dst_dir }} {{ pkg_fn }}
53 | args:
54 | executable: /bin/bash
55 | chdir: "{{ dst_dir }}"
56 | creates: "{{ dst_dir }}/var/{{ pkg_nn }}"
57 | register: unpack
58 | when: pkg.changed or not (var_st.stat.exists)
59 |
60 | - name: debug
61 | debug:
62 | msg: "{{ unpack }}"
63 |
--------------------------------------------------------------------------------
/roles/prepare_cuda_repo/tasks/check_vars.yml:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | ---
3 | - name: cuda_release is defined?
4 | fail:
5 | msg: "Value of cuda_release isn't defined"
6 | when:
7 | - not cuda_release is defined
8 | - name: cuda_arch is defined?
9 | fail:
10 | msg: "Value of cuda_arch isn't defined"
11 | when:
12 | - not cuda_arch is defined
13 | - name: cuda_arch is correct?
14 | fail:
15 | msg: "Value of cuda_arch ({{ cuda_arch }}) isn't correct"
16 | when:
17 | - not (cuda_arch == "arm64" or cuda_arch == "amd64")
18 | - name: cuda_dist is defined?
19 | fail:
20 | msg: "Value of cuda_dist isn't defined"
21 | when:
22 | - not cuda_dist is defined
23 | - name: set facts
24 | set_fact:
25 | # ubuntu2004, ubuntu1804, ...
26 | # We can't guess OS distro by `ansible_distribution` from the Foreman host
27 | # it won't match OS distro running on DPU or x86 host in general
28 | # dist_ver: "{{ ansible_distribution | lower }}{{ ansible_distribution_version | regex_replace('\\.', '') }}"
29 | dist_ver: "{{ cuda_dist | lower | regex_replace('[-\\.]', '') }}"
30 |
31 | - name: Get CUDA release metadata
32 | uri:
33 | url: "https://developer.download.nvidia.com/compute/cuda/{{ cuda_release }}/docs/sidebar/md5sum.txt"
34 | return_content: true
35 | failed_when: false
36 | register: md5
37 |
38 | - name: Check metadata status
39 | fail:
40 | msg: "No metadata found for CUDA release {{ cuda_release }}"
41 | when:
42 | - md5.status == 404
43 |
44 | # cuda-repo-ubuntu2004-11-5-local_11.5.2-495.29.05-1_amd64.deb
45 | - name: get repo package pattern
46 | set_fact:
47 | pkg_pattern: 'cuda-repo-{{ dist_ver }}-.*_{{ cuda_arch }}.deb'
48 | - name: get repo package
49 | set_fact:
50 | pkg_name: "{{ md5.content.splitlines() | map('regex_search', pkg_pattern) |select('string') |list }}"
51 | failed_when: pkg_name |count != 1
52 |
--------------------------------------------------------------------------------
/roles/install_cuda/tasks/libnvidia_container.yml:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | #
3 | # https://nvidia.github.io/libnvidia-container/
4 | ---
5 | - name: set facts
6 | set_fact:
7 | dist_id: "{{ ansible_distribution | lower }}{{ ansible_distribution_version }}"
8 | - name: stat libnvidia-container-gpgkey.pem
9 | stat:
10 | path: /root/libnvidia-container-gpgkey.pem
11 | register: st_pgpkey
12 | - name: copy/install pgpkey
13 | block:
14 | - name: install pgp key
15 | copy:
16 | src: libnvidia-container.pub.pem
17 | dest: /root/libnvidia-container-gpgkey.pem
18 | mode: '0640'
19 | owner: root
20 | group: root
21 | register: gpgkey
22 |
23 | - name: import pgp key
24 | shell:
25 | apt-key add /root/libnvidia-container-gpgkey.pem
26 | when: gpgkey.changed
27 | when: not (st_pgpkey.stat.islnk is defined)
28 | #- name: fetch pgp key
29 | # fetch:
30 | # src: https://nvidia.github.io/libnvidia-container/gpgkey
31 | # dest: /root/libnvidia-container-gpgkey.pem
32 | # register: gpgkey
33 |
34 | - name: stat libnvidia-container.list
35 | stat:
36 | path: /etc/apt/sources.list.d/libnvidia-container.list
37 | register: st_repolist
38 | - name: copy/install repo file
39 | block:
40 | - name: download CUDA local installer
41 | get_url:
42 | url: "https://nvidia.github.io/libnvidia-container/{{ dist_id }}/libnvidia-container.list"
43 | dest: "/etc/apt/sources.list.d/libnvidia-container.list"
44 | register: repo_list
45 | # - name: fetch repo.list
46 | # shell: |
47 | # wget -q -O /etc/apt/sources.list.d/libnvidia-container.list \
48 | # https://nvidia.github.io/libnvidia-container/{{ dist_id }}/libnvidia-container.list
49 | # args:
50 | # creates: /etc/apt/sources.list.d/libnvidia-container.list
51 | # register: repo_list
52 |
53 | when: not (st_repolist.stat.islnk is defined)
54 |
55 |
--------------------------------------------------------------------------------
/roles/install_cuda/README.md:
--------------------------------------------------------------------------------
1 | # Install CUDA on x86 or DPU
2 |
3 | The `install_cuda` role is used to install Nvidia CUDA SDK
4 | on x86 host or on DPU.
5 |
6 | ## Parameters
7 |
8 | * `cuda_release` -- release of CUDA like `11.5.1`, `11.6.2`
9 | * `cuda_arch` -- CPU architecture to install. Allowed values are:
10 | * `amd64`
11 | * `arm64`
12 | * `mode` -- choose "DEB (network)" `deb_network` or "DEB (local)" `deb_local`
13 |
14 |
15 | ## Playbook examples
16 |
17 | setup_cuda_network.yml
18 |
19 | # Usage:
20 | # ansible-playbook setup_cuda_network.yml -v -e cuda_release=11.6.2 -e cuda_arch=arm64
21 | ---
22 | - hosts: "{{ groups['foreman'][0] }}"
23 | user: "{{ remote_install_user }}"
24 | become: true
25 | roles:
26 | - nvidia.dpu_ops.prepare_cuda_repo
27 |
28 | - hosts: bf2oob
29 | user: "{{ remote_install_user }}"
30 | become: true
31 | roles:
32 | - name: nvidia.dpu_ops.install_cuda
33 | mode: deb_network
34 | when: cuda_arch == "arm64"
35 |
36 | - hosts: x86host
37 | user: "{{ remote_install_user }}"
38 | become: true
39 | roles:
40 | - name: nvidia.dpu_ops.install_cuda
41 | mode: deb_network
42 | when: cuda_arch == "amd64"
43 |
44 |
45 |
46 | setup_cuda_local.yml
47 |
48 | # Usage:
49 | # ansible-playbook setup_cuda_local.yml -v -e cuda_release=11.6.2 -e cuda_arch=arm64
50 | ---
51 | - hosts: "{{ groups['foreman'][0] }}"
52 | user: "{{ remote_install_user }}"
53 | become: true
54 | roles:
55 | - nvidia.dpu_ops.prepare_cuda_repo
56 |
57 | - hosts: bf2oob
58 | user: "{{ remote_install_user }}"
59 | become: true
60 | roles:
61 | - name: nvidia.dpu_ops.install_cuda
62 | mode: deb_local
63 | when: cuda_arch == "arm64"
64 |
65 | - hosts: x86host
66 | user: "{{ remote_install_user }}"
67 | become: true
68 | roles:
69 | - name: nvidia.dpu_ops.install_cuda
70 | mode: deb_local
71 | when: cuda_arch == "amd64"
72 |
73 |
74 |
--------------------------------------------------------------------------------
/roles/dpu_nvconfig/tasks/set_gpu_owner.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # Copyright 2022 NVIDIA Corporation
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | # this software and associated documentation files (the "Software"), to deal in
7 | # the Software without restriction, including without limitation the rights to
8 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9 | # the Software, and to permit persons to whom the Software is furnished to do so,
10 | # subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | #
22 | ###############################################################################
23 | # Configurations: Next Boot New
24 | # PCI_DOWNSTREAM_PORT_OWNER[4] DEVICE_DEFAULT(0) EMBEDDED_CPU(15)
25 | ---
26 | - name: nvconfig
27 | set_fact:
28 | nv: '{{ bf2_devices[0].nvconfig }}'
29 |
30 | - name: Show GPU owner
31 | debug:
32 | msg: "Current GPU owner is {{ nv['PCI_DOWNSTREAM_PORT_OWNER[4]'] }}"
33 |
34 | - name: "Set GPU owned by ARM CPU"
35 | include: nvset.yml
36 | with_dict:
37 | - {"PCI_DOWNSTREAM_PORT_OWNER[4]": "15"}
38 | when: gpu_owner == "ARM" or gpu_owner == "arm"
39 |
40 | - name: "Set GPU owned by x86 CPU"
41 | include: nvset.yml
42 | with_dict:
43 | - {"PCI_DOWNSTREAM_PORT_OWNER[4]": "0"}
44 | when: gpu_owner == "X86" or gpu_owner == "x86"
45 |
--------------------------------------------------------------------------------
/roles/install_doca/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 | ---
26 | - name: Install mellanox gpg key
27 | apt_key:
28 | url: https://www.mellanox.com/downloads/ofed/RPM-GPG-KEY-Mellanox
29 | state: present
30 |
31 | - name: Install all in one doca package
32 | apt:
33 | deb: "{{ doca_url }}"
34 |
35 | - name: Remove old doca packages
36 | apt:
37 | update_cache: true
38 | state: absent
39 | name:
40 | - doca-sdk
41 | - doca-runtime
42 | - doca-tools
43 | when: downgrade is defined and downgrade
44 |
45 | - name: Install doca packages
46 | apt:
47 | update_cache: true
48 | name:
49 | - doca-sdk
50 | - doca-runtime
51 | - doca-tools
52 |
--------------------------------------------------------------------------------
/roles/bf2_mode/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 | - name: get facts
26 | bf2_facts:
27 | when: bf2_devices is not defined # bf2[0] since the dpu card only has 1 entry
28 |
29 | - name: Change security mode
30 | include_tasks: security.yml
31 | when: new_bf_mode is defined
32 |
33 | - name: Change ownership mode
34 | include_tasks: ownership.yml
35 | when: new_bf_ownership is defined
36 |
37 | - name: reboot host
38 | block:
39 | - name: turn x86 host off
40 | vars:
41 | bmc_action: "chassis power off"
42 | include_role:
43 | name: bf_bmc
44 |
45 | - name: Sleep for 1 minute to ensure power off
46 | pause:
47 | seconds: 60
48 |
49 | - name: turn x86 host on
50 | vars:
51 | bmc_action: "chassis power on"
52 | include_role:
53 | name: bf_bmc
54 | when: should_reboot
55 |
--------------------------------------------------------------------------------
/galaxy.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 |
26 | namespace: nvidia
27 |
28 | name: dpu_ops
29 |
30 | version: 1.0.1
31 |
32 | readme: README.md
33 |
34 | authors:
35 | - John Trenholm
36 | - Michael Basnight
37 | - Yurii Shestakov
38 |
39 | description: DPU Ops
40 |
41 | license:
42 | - MIT
43 |
44 | # license_file: LICENSE
45 |
46 | tags: []
47 |
48 | dependencies: {}
49 |
50 | repository: https://github.com/NVIDIA/ansible-collection-dpu-ops
51 |
52 | documentation: https://github.com/NVIDIA/ansible-collection-dpu-ops
53 |
54 | homepage: https://github.com/NVIDIA/ansible-collection-dpu-ops
55 |
56 | issues: https://github.com/NVIDIA/ansible-collection-dpu-ops/issues
57 |
58 | build_ignore:
59 | - '*.tar.gz'
60 | - plugins/modules/bf2_facts_test.py
61 | - poetry.lock
62 | - pyproject.toml
63 |
--------------------------------------------------------------------------------
/roles/bf2_mode/tasks/security.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 | - name: set current_bf_mode
26 | set_fact:
27 | current_bf_mode: "{{ bf2_devices[0].permission }}"
28 |
29 | - name: var
30 | debug:
31 | msg: "Current: {{ current_bf_mode }} New: {{ new_bf_mode }}"
32 |
33 | - name: Set mode to restricted
34 | command: "mlxprivhost -d {{ bf2_devices[0].mst }} r {{ mode_options }}"
35 | when:
36 | - new_bf_mode == 'restricted'
37 | - current_bf_mode != new_bf_mode
38 | register: mode_change_restricted
39 |
40 | - name: Set mode to privileged
41 | command: "mlxprivhost -d {{ bf2_devices[0].mst }} p"
42 | when:
43 | - new_bf_mode == 'privileged'
44 | - current_bf_mode != new_bf_mode
45 | register: mode_change_priv
46 |
47 | - name: Set reboot flag
48 | set_fact:
49 | should_reboot: True
50 | when: mode_change_priv.changed or mode_change_restricted.changed
51 |
--------------------------------------------------------------------------------
/roles/dpu_nvconfig/tasks/nvset.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # Copyright 2022 NVIDIA Corporation
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | # this software and associated documentation files (the "Software"), to deal in
7 | # the Software without restriction, including without limitation the rights to
8 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9 | # the Software, and to permit persons to whom the Software is furnished to do so,
10 | # subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | #
22 | ###############################################################################
23 | ---
24 | - name: "{{ item.key }} is defined"
25 | block:
26 | - name: set fact
27 | set_fact:
28 | num_val: "{{ nv[item.key] | regex_replace('^.*\\((\\d+)\\)', '\\1') }}"
29 | new_val: "{{ item.value | string }}"
30 | # - debug:
31 | # msg: "key {{ item.key }} is defined: {{ nv[item.key] }} | {{ num_val }}"
32 | - name: set fact
33 | set_fact:
34 | set_mlxconfig: "{{ set_mlxconfig }} {{ item.key }}={{ item.value }}"
35 | when: num_val != new_val
36 | when: nv[item.key] is defined
37 |
38 | - name: "{{ item.key }} is not defined"
39 | block:
40 | # - debug:
41 | # msg: "key {{ item.key }} is NOT defined, set {{ item.value }}"
42 | - name: set fact
43 | set_fact:
44 | set_mlxconfig: "{{ set_mlxconfig }} {{ item.key }}={{ item.value }}"
45 | when: not nv[item.key] is defined
46 | #- name:
47 | # debug:
48 | # msg: key/val {{ arg }} | {{ set_mlxconfig }}
49 |
50 |
--------------------------------------------------------------------------------
/roles/manage_bf2_fw/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 | - name: FW update
26 | shell: "/opt/mellanox/mlnx-fw-updater/mlnx_fw_updater.pl"
27 | when:
28 | - not force
29 | changed_when:
30 | - fw_update.rc == 0
31 | failed_when:
32 | - fw_update.rc == 1
33 | register: fw_update
34 |
35 | - name: FW update - force
36 | shell: "/opt/mellanox/mlnx-fw-updater/mlnx_fw_updater.pl --force-fw-update"
37 | when:
38 | - force
39 | changed_when:
40 | - fw_update.rc == 0
41 | failed_when:
42 | - fw_update.rc == 1
43 | register: fw_update
44 |
45 | - name: reboot host
46 | block:
47 | - name: turn x86 host off
48 | vars:
49 | bmc_action: "chassis power off"
50 | include_role:
51 | name: bf_bmc
52 |
53 | - name: Sleep for 1 minute to ensure power off
54 | pause:
55 | seconds: 60
56 |
57 | - name: turn x86 host on
58 | vars:
59 | bmc_action: "chassis power on"
60 | include_role:
61 | name: bf_bmc
62 | when: fw_update.changed in [0, 1]
63 |
--------------------------------------------------------------------------------
/roles/dpu_nvconfig/tasks/set_link_type.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # Copyright 2022 NVIDIA Corporation
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | # this software and associated documentation files (the "Software"), to deal in
7 | # the Software without restriction, including without limitation the rights to
8 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9 | # the Software, and to permit persons to whom the Software is furnished to do so,
10 | # subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | #
22 | ###############################################################################
23 | ---
24 | - name: init 'set_mlxconfig'
25 | set_fact:
26 | set_mlxconfig: ""
27 | when: not (set_mlxconfig is defined)
28 | - block:
29 | - name: debug
30 | debug:
31 | msg: |
32 | Port 1: {{ bf2_devices[0].nvconfig.LINK_TYPE_P1 }}
33 | New value: {{ link_type_p1 }}
34 | - name: Set P1=IB?
35 | set_fact:
36 | set_mlxconfig: "{{ set_mlxconfig }} LINK_TYPE_P1=1"
37 | when: (bf2_devices[0].nvconfig.LINK_TYPE_P1 == 'ETH(2)' and link_type_p1 == 'IB')
38 | - name: Set P1=ETH?
39 | set_fact:
40 | set_mlxconfig: "{{ set_mlxconfig }} LINK_TYPE_P1=2"
41 | when: (bf2_devices[0].nvconfig.LINK_TYPE_P1 == 'IB(1)' and link_type_p1 == 'ETH')
42 | when: link_type_p1 is defined
43 |
44 | - block:
45 | - name: debug
46 | debug:
47 | msg: |
48 | Port 2: {{ bf2_devices[0].nvconfig.LINK_TYPE_P2 }}
49 | New value: {{ link_type_p2 }}
50 | - name: Set P2=IB?
51 | set_fact:
52 | set_mlxconfig: "{{ set_mlxconfig }} LINK_TYPE_P2=1"
53 | when: (bf2_devices[0].nvconfig.LINK_TYPE_P2 == 'ETH(2)' and link_type_p2 == 'IB')
54 | - name: Set P2=ETH?
55 | set_fact:
56 | set_mlxconfig: "{{ set_mlxconfig }} LINK_TYPE_P2=2"
57 | when: (bf2_devices[0].nvconfig.LINK_TYPE_P2 == 'IB(1)' and link_type_p2 == 'ETH')
58 | when: link_type_p2 is defined
59 |
--------------------------------------------------------------------------------
/roles/manage_rshim_owner/tasks/change_owner.yaml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 | - name: Rshim move from bmc to x86
26 | block:
27 | - name: Stop rshim on {{ bmc_host }}
28 | raw: systemctl stop rshim; systemctl mask rshim
29 | delegate_to: "{{ bmc_host }}"
30 | become: true
31 |
32 | - name: Wait 5 seconds
33 | pause:
34 | seconds: 5
35 |
36 | - name: Restart rshim on {{ x86_host }}
37 | raw: systemctl restart rshim
38 | delegate_to: "{{ x86_host }}"
39 | become: true
40 | when: current_rshim_owner == 'bmc'
41 |
42 | - name: Rshim move from x86 to bmc
43 | block:
44 | - name: Stop rshim on {{ x86_host }}
45 | raw: systemctl stop rshim
46 | delegate_to: "{{ x86_host }}"
47 | become: true
48 |
49 | - name: Wait 5 seconds
50 | pause:
51 | seconds: 5
52 |
53 | - name: Start rshim on {{ bmc_host }}
54 | raw: systemctl unmask rshim; systemctl enable rshim; systemctl start rshim
55 | delegate_to: "{{ bmc_host }}"
56 | become: true
57 |
58 | - name: Wait 5 seconds
59 | pause:
60 | seconds: 5
61 |
62 | - name: Start rshim on {{ x86_host }}
63 | raw: systemctl start rshim
64 | delegate_to: "{{ x86_host }}"
65 | become: true
66 | when: current_rshim_owner == 'x86'
67 |
--------------------------------------------------------------------------------
/roles/force_reboot_armos/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 | - name: reboot armos from x86 host
26 | block:
27 | - name: get facts
28 | bf2_facts:
29 | when: bf2_devices is not defined
30 | register: bf2_output
31 |
32 | - name: Verify rshim is active on host
33 | assert:
34 | that: "{{ bf2_devices | nvidia.dpu_ops.get_rshim(rshim.dev) | length > 0 }}"
35 | fail_msg: >
36 | rshim is not active on the host, which could mean that either
37 | the BMC on the BF2 card owns the rshim or it is just disabled on the host.
38 | Please verify the state of your host and BF2 card before proceeding.
39 |
40 | - name: reboot bf2
41 | shell: echo "SW_RESET 1" > {{ rshim.dev }}/misc
42 | become: true
43 | when: is_bmc is false
44 |
45 | - name: reboot armos from the BMC
46 | block:
47 | - name: get rshim from BMC
48 | raw: 'find /dev -maxdepth 1 -name "rshim*"'
49 | register: rshim_output_bmc
50 |
51 | - name: Verify rshim is active on BMC
52 | assert:
53 | that: rshim_output_bmc.stdout_lines|length > 0
54 | fail_msg: >
55 | rshim is not active on the BMC, which could mean that either the x86 host
56 | owns the rshim or it is just disabled on the BMC.
57 | Please verify the state of your host and BF2 card before proceeding.
58 |
59 | - name: reboot bf2 on bmc
60 | raw: echo "SW_RESET 1" > /dev/rshim0/misc
61 |
62 | when: is_bmc is true
63 |
--------------------------------------------------------------------------------
/roles/load_bfb/templates/bf.cfg.j2:
--------------------------------------------------------------------------------
1 | {% block preamble %}{% endblock %}
2 | bfb_modify_os()
3 | {
4 | {% block script_begin %}{%endblock %}
5 |
6 | # Glean the BOOTNIC from the mac passed in for configuring the bf.cfg
7 | # The devices are not renamed to p{0,1} until first boot
8 | HOST_MAC={{ boot_mac }}
9 | for p in /sys/class/net/*
10 | do
11 | n=${p##*/}
12 | # echo $d = $n
13 | mac=$(cat $p/address)
14 | if [ "$mac" != "$HOST_MAC" ] ; then
15 | continue
16 | fi
17 | dev=$(readlink $p/device)
18 | case "$dev" in
19 | *MLNXBF17*)
20 | DEVICE=OOB
21 | PROVISION_IFC=oob_net0
22 | ;;
23 | *03:00*)
24 | port=${dev##*03:00.}
25 | DEVICE="NIC_P${port}"
26 | PROVISION_IFC="eth${port}"
27 | ;;
28 | *)
29 | echo "$dev unknown" >&2
30 | ;;
31 | esac
32 | break
33 | done
34 | if [ -z "$DEVICE" ] ; then
35 | DEVICE=OOB
36 | PROVISION_IFC=oob_net0
37 | fi
38 | echo "DEVICE=$DEVICE ; PROVISION_IFC=$PROVISION_IFC"
39 | BOOTNIC=NET-${DEVICE}-IPV4
40 |
41 | # Note: This section section uses EOF (no slash) which will substitute $VARS
42 | cat << EOF > /mnt/etc/bf.cfg
43 | BOOT0=${BOOTNIC}
44 | BOOT1=DISK
45 | EOF
46 |
47 | # Note: this section will ensure that variables like passwords are not escaped, rendering them useless.
48 | # Because of that, there is no variable substituion from the finish template at BFB install. Please ensure
49 | # there are no variables in this user-data file blocks that are expected to be read at BFB install time.
50 | cat << \EOF > /mnt/var/lib/cloud/seed/nocloud-net/user-data
51 | #cloud-config
52 | {% block cloudinit %}
53 | {% endblock %}
54 | runcmd:
55 | - [ grub-install ]
56 | - [ /usr/bin/bfcfg ]
57 | {% block cloudinit_extra_commands %}{% endblock %}
58 | EOF
59 |
60 | # mst start
61 | # DEV=$(/bin/ls -1 /dev/mst/mt*pciconf0)
62 | DEV=03:00.0
63 | OWNERSHIP_STATUS=$(mlxconfig -d $DEV q INTERNAL_CPU_MODEL |awk '/INTERNAL_CPU_MODEL/ {print $2}')
64 |
65 | if [[ $OWNERSHIP_STATUS == 'EMBEDDED_CPU(1)' ]]; then
66 | cat > /mnt/var/lib/cloud/seed/nocloud-net/network-config.orig << 'EOF'
67 | {% block embedded_network %}{% endblock %}
68 | EOF
69 | else
70 | cat > /mnt/var/lib/cloud/seed/nocloud-net/network-config.orig << 'EOF'
71 | {% block separated_network %}{% endblock %}
72 | EOF
73 | fi
74 |
75 | [ -s /mnt/var/lib/cloud/seed/nocloud-net/network-config.orig ] && \
76 | mv /mnt/var/lib/cloud/seed/nocloud-net/network-config.orig /mnt/var/lib/cloud/seed/nocloud-net/network-config
77 |
78 | # Set mtu for ovs ports
79 | cat << \EOF >> /mnt/etc/mellanox/mlnx-ovs.conf
80 | {% block ovs_config %}{% endblock %}
81 | EOF
82 |
83 | {% block script_end %}{% endblock %}
84 |
85 | }
86 | {% block postamble %}{% endblock %}
87 |
88 | # Do not remove these trailing spaces or cat bfb will not work
89 |
90 |
91 |
92 |
93 |
--------------------------------------------------------------------------------
/roles/install_cuda/tasks/main.yml:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | #
3 | # https://developer.nvidia.com/cuda-toolkit-archive
4 | # 1. CUDA installation methods:
5 | # - deb (local)
6 | # - deb (network)
7 | # - runfile (local)
8 | # 2. NVidia container runtime
9 | # https://nvidia.github.io/libnvidia-container/
10 | ---
11 |
12 | - name: check 'mode' var
13 | fail:
14 | msg: "'mode' parameter isn't defined"
15 | when:
16 | - not mode is defined
17 | - not (mode == "deb_local" or mode == "deb_network")
18 |
19 | - name: common apt settings
20 | include_tasks: apt_common.yml
21 |
22 | - name: Add deb (local) repo
23 | include_tasks: add_mirror_repo.yml
24 | when: mode == 'deb_local'
25 |
26 | - name: Add deb (network) repo
27 | include_tasks: deb_network.yml
28 | when: mode == 'deb_network'
29 |
30 | # FIXME libnvidia_container depends on Internet access
31 | - name: Add NVidia container runtime repo
32 | include_tasks: libnvidia_container.yml
33 |
34 | # Uninstall CUDA packages only incase it doesn't match new version
35 | - name: Gather the package facts
36 | ansible.builtin.package_facts:
37 | manager: auto
38 |
39 | # The error was: 'dict object' has no attribute 'cuda'
40 | - name: Get list of installed CUDA packages
41 | set_fact:
42 | has_cuda_ver: "{{ ansible_facts.packages['cuda'][0]['version'].split('-')[0] }}"
43 | cuda_packages: "{{ ansible_facts.packages | map('regex_search', 'cuda-.*') |select('string') |list }}"
44 | when: ansible_facts.packages.cuda is defined
45 |
46 | - name: No CUDA installed?
47 | set_fact:
48 | has_cuda_ver: "0.0.0"
49 | cuda_packages: []
50 | when: ansible_facts.packages.cuda is not defined
51 |
52 | - name: Print list of CUDA packages
53 | ansible.builtin.debug:
54 | var: cuda_packages
55 |
56 | - name: Uninstall CUDA packages
57 | block:
58 | - name: uninstall by list
59 | apt:
60 | name: "{{ cuda_packages | list }}"
61 | state: absent
62 | - name: uninstall by name
63 | apt:
64 | name: ['cuda']
65 | state: absent
66 | - name: unload nvidia.ko
67 | shell: |
68 | sudo modprobe -r nvidia
69 | when: cuda_release != has_cuda_ver and has_cuda_ver != "0.0.0"
70 |
71 | # when: not cuda_release is version(has_cuda_ver, '=')
72 | # when: "{{ not cuda_release is version(has_cuda_ver, '=') }}"
73 |
74 | - name: Update apt metadata
75 | apt:
76 | update_cache: yes
77 | - name: Install CUDA package
78 | apt:
79 | name: ['cuda']
80 | state: present
81 | register: cuda_pkg
82 |
83 | # nvbug 3584701
84 | # modprobe: ERROR: could not insert 'nvidia': Operation not permitted
85 | #- name: reload nvidia.ko
86 | # shell: |
87 | # modprobe nvidia
88 | # when: cuda_pkg.changed
89 |
90 | - name: Install nvidia-container-runtime
91 | apt:
92 | name: ['nvidia-container-runtime']
93 | state: present
94 |
--------------------------------------------------------------------------------
/roles/load_bfb/README.md:
--------------------------------------------------------------------------------
1 | # Load BFB image into DPU over RSHIM
2 |
3 | ## Parameters
4 |
5 | * `local_bfb` -- location of BFB image on local FS
6 | * `bfb_url` -- URL of BFB image to be downloaded on x86 host
7 | * `boot_mac` -- MAC-address of network interface to boot from
8 |
9 | ## Playbook examples
10 |
11 | load-bfb.yaml
12 |
13 | ---
14 | - hosts: bf2:bf2oob:bmc
15 | user: "{{ remote_install_user }}"
16 | gather_facts: no # if using a bmc host, this will fail because ansible is not present
17 | become: true
18 | vars:
19 | bfb_url: "{{ foreman.foreman_mirror }}/{{ product_version }}/{{ bfb.file }}"
20 | cloudinit_hostname: "{{ inventory_hostname | regex_replace('bmc-','') }}"
21 | cloudinit_ntp_host: "{{ subnet_dns_primary }}"
22 | cloudinit_dns_host: "{{ subnet_dns_primary }}"
23 | tmfifo_ip: "{{ hostvars[inventory_hostname].tmfifo_ip | default('192.168.100.2') }}/28"
24 | tmfifo_mac: "{{ hostvars[inventory_hostname].tmfifo_mac | default('00:1a:ca:ff:ff:01') }}"
25 | cloudinit_mtu: "{{ network_mtu }}"
26 | ovs_mtu: "{{ network_mtu|int + 50 }}"
27 | cloudinit_domain: "{{ domain }}"
28 | bfcfg_template: "roles/load_bfb/templates/bf2_ndo.cfg.j2"
29 | ansible_fqdn: "{{ inventory_hostname }}" # this hack is because facts are not gathered and the non_bf2_host uses it
30 | pre_tasks:
31 | - name: set is_bmc
32 | set_fact:
33 | is_bmc: "{{ inventory_hostname.startswith('bmc') }}"
34 | - name: bmc operations
35 | block:
36 | - name: set hosts
37 | set_fact:
38 | x86_host: "{{ foreman_url }}"
39 | dpu_host: "{{ inventory_hostname | regex_replace('bmc-','') }}"
40 | - name: set bmc facts
41 | set_fact:
42 | boot_mac: "{{ hostvars[dpu_host]['oob_mac'] if bf2.oob_provision else hostvars[dpu_host]['primary_mac'] }}"
43 | local_bfb: "/var/www/{{ product_version }}/{{ bfb.file }}" # directly manipulate the foreman filesystem
44 | when: inventory_hostname.startswith('bmc')
45 | - name: x86 host operations
46 | block:
47 | - name: set hosts
48 | set_fact:
49 | x86_host: "{{ non_bf2_host }}"
50 | dpu_host: "{{ inventory_hostname | regex_replace('oob-','') }}"
51 | - name: set non bmc facts
52 | set_fact:
53 | boot_mac: "{{ hostvars[dpu_host]['oob_mac'] if bf2.oob_provision else hostvars[dpu_host]['primary_mac'] }}"
54 | local_bfb: "{{ bf2.download_local_path }}/{{ bfb.file }}"
55 | - name: Create bfb temp dir
56 | file:
57 | state: directory
58 | path: "{{ bf2.download_local_path }}"
59 | owner: root
60 | group: root
61 | mode: "0644"
62 | delegate_to: "{{ x86_host }}"
63 | - name: Download bfb from web server
64 | get_url:
65 | url: "{{ bfb_url }}"
66 | dest: "{{ bf2.download_local_path }}"
67 | validate_certs: "{{ foreman.validate_certs }}"
68 | delegate_to: "{{ x86_host }}"
69 | when: not inventory_hostname.startswith('bmc')
70 | roles:
71 | - nvidia.dpu_ops.load_bfb
72 |
73 |
74 |
--------------------------------------------------------------------------------
/roles/install_cuda/files/libnvidia-container.pub.pem:
--------------------------------------------------------------------------------
1 | -----BEGIN PGP PUBLIC KEY BLOCK-----
2 |
3 | mQINBFnNWDEBEACiX68rxIWvqH3h2GykO25oK9BAqV8fDtb6lXEbw3eKx4g87BRz
4 | M3DQBA0S0IfkQ72ovJ33H50+gVTXuu+Zme5muWk72m3pApccZVDLqdzYlpWPruNb
5 | MC+IlWr70yo8Jw8Zr1ihbWjFvMbDJTkgqPt2djNq3xxvdiKoZlgnpLRKIpSu9iBQ
6 | lNoZLHxTQKFH4219L77prRogv2QV1ckBL5lDVOERJuHo4jHE8mm9/NZ6v3m2HGuu
7 | AEZ7T9nWlPGiAIP8Pww4ZRTJcBANcI2EFKPLdfP61HTH6w0kVMkoAaGlemadTDl3
8 | ZcLpUpTFLc+ko/2uQ1qVPx9QYyoMrorS3kUmlXrhsA7FvcB09aIcb+JX6SVkcbO5
9 | A5+baCa3owwUtFBXMHM5hqpLv4P3/GsuW6283YwLZCf53dJY4lJZePqzPGsvs/wS
10 | vhnZrFvb61i/Aqm0hjhVh7h6VNxUiE8geMcjxy29LtzajoyS0EPVxes4xZu0VbS7
11 | 8LQyCNHSpS7TFmtVUQmbXqDN7cpiyr9+yutr0lZOMc7NYQt0nP/3RtYkWEob6wXa
12 | rVImHas1OYzlZymdO1uAnqkediS61E2vSD1OEq37/375FB/Q3AYXuNkQzDjYoJJz
13 | 9wsv7Xp0bdPzQ/daLdIFNQXo5MmVIirsWM07JvbZaJhDOiJxGn0MPf11/QARAQAB
14 | tEBOVklESUEgQ09SUE9SQVRJT04gKE9wZW4gU291cmNlIFByb2plY3RzKSA8Y3Vk
15 | YXRvb2xzQG52aWRpYS5jb20+iQI4BBMBCgAiBQJZzVgxAhsPBgsJCggHAwUVCgkI
16 | CwUWAgMBAAIeAQIXgAAKCRDdyuBE95bssAh6EACgUCww2sr8sOztEHKhvdCsonXu
17 | THYbel3YlWmVDPbh4dA31xoRXlvSJptJzPi/zlTc9fkVSFGbEZbFRR4JjnwYTMLD
18 | ElMh5YRMYAoPVYhWGKIO4earu32GhFuPjfr6h+0xNaQeDPIbr7bPe/AEhLSdJMzI
19 | OuAifr7UaC65A6YlxfeaSqyt0HthYujoQ12cWxP998C5jkc0IN2tyLs/OD7HLHht
20 | +lafqDSylykx63cw7jvsV/15rqZwVwjhkcxZyrKET32MTjXF3cxn7+TGpKS8B1k4
21 | a/EI7uXnncfSoma0dAT9bZM9JZbXQmSzCPDHHuVtnQ/3uh8VyenpigTFnrb20LCy
22 | 6WzJd3O9lAZXLhvwF/By3a07WLzRtTZNaUpt37Anb0js2syr3lohbmK9i3xvuqZN
23 | zhGPbqu9IV+vFgSGyTHRJUSBlHKDGiCdOOHc20MLPW1yRCXbx0F4eS9TWchYyJkJ
24 | NNczD5DnEl/gsvL4NCRxa+oUyUhhJ1HpJ6YNmTsy6nAAKIC+6248o164GiavaR3z
25 | 03RfaQayGHAUrBKi+PJBY7efgsZeYT8f+hyYrIC04MO8poBKS/GvSUL2QtVtj59N
26 | q+95gIptW2mZM8KRpt2huLH+QQ8SKr1vAECbpKJOwseqKmVyxX02iaSE8ifLE+tX
27 | FE8YgS3CZjWwy5PD0LkBDQRdgpCQAQgAx1oxX9tFlv3CIva0CJ0dsZyNF7mgHPgN
28 | szccUYLu0chyWYvwiVU/OlCzivytNX56wgeBgIVV1QzeBuTkrJSgzJ+dSgfrmyg5
29 | RwIDhvH+Dcut0++6+di1LyH9gXQcYPrN3pf4yR8nlRbm6K0Vsp0Z4+br18QelURe
30 | rfAkRordag26aB+MzVLvloHHu3Z6/v321uTGMdFd8CVCjovec5+EdcIAam3U/MmZ
31 | e2mr2M/x6F3st30cE7umq9Bb6UCqc6L8bQcoloxR3rwFzL1u9wUBUzQlaMNmxbe0
32 | BfezkmSQeC8JN4Fku+DtHEpS9uP5JEYNEEQ66K4mJDTMr0whBv1fKQARAQABiQNb
33 | BBgBCgAmAhsCFiEEyVsyG2HojBgJxPdZ3crgRPeW7LAFAl7oD1gFCQNGskgBKcBd
34 | IAQZAQoABgUCXYKQkAAKCRBu2RyjrBFgzZ/WB/9TuD2qzaBO7HlPDWRUTpFlvFgy
35 | Dc3XyfTAC/ISeYbIcPcq5kmVHgpsMdbN9Vvmot5GuT7VWzhHc9sJCmHgL330glBt
36 | NtSRflKzlBYnbiSWxLFYZtu2BtNOk8Ylbw8qw1E6W/iFBrqAwgeZvs2VOcPU3203
37 | Mqfi1JbS+YHC/bgs6cNq0zs/WJraYxiuleclKYExxLt9tRd0058n58GAph+Ki7mR
38 | InO6kxuKpsQannSn1Ku/DiaQcSF2L2TMSo0N9zwvYEZR+hgsKVqyRKT+DkZhusHJ
39 | HYGv96YHSTwo016ZhwYS9t0MLXY9/PgJysuO41Ya4Ii43D3UK1wOHTmyHZHTCRDd
40 | yuBE95bssDpwD/4jV9Pin3vAKa4hhn5GD4e478FNKRD58Q7qF3AhVTBNPIl1m4EF
41 | X7sqI6cXUDG4BjpS70ZRWF2x51ZTiq7DLTV/gGw2okfVjoWjzQY0ebrLd4IoNs80
42 | lIHmXxa+JdwB6WupCUzKCKLcPsX/yPAmswPNGAuIMAv+PWhUUSMVtzOZldnlogGM
43 | hbJ9UD2txFGGh9WoYc2vgX9KAaKryXcC6QMabv7JJU24HEJJDgbJEvtFM5PS8QMF
44 | bXIZsYgICWpQXVChBbduXo9sD2TUDWYAniNaaw4LKxPRG+Ix4HAqkh1oNOLojO30
45 | DO3r1/62FKE5/ykg3iSMTDR0iOES/leXCCIO9fRJT8+eucxyOQoY5ti7tjt1wm3H
46 | nTB+Rz3E/E2qeLs2PN82aseccm1G06pmsMCUiWtmSV6HjdO2XufYprrGLSu0RrT3
47 | sz5WHGUOY2iO40xHhSiXg3TcLZRpv30DQzxoUrx9Ff//rXLFznh+MksuvVD2roUR
48 | BGz/en31FxAcBoex9nNraeOekbFen5b7Xrq9wnzM5xZvJN2QYB3vS0khz/ZgFyy5
49 | 444ALa9gwb29FZCfA4m59S2QoB8uPQGM+8gnusE6J8y4fvI59ugafidIkt86dZ3m
50 | FsEME5XNmBGdNEo2flRVFfpG1IWds2Ba3IsdbYd9nzmbBW7/n0InVRDrIg==
51 | =9QWY
52 | -----END PGP PUBLIC KEY BLOCK-----
53 |
--------------------------------------------------------------------------------
/plugins/action/raw_reboot.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | ###############################################################################
4 | #
5 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
6 | # SPDX-License-Identifier: MIT
7 | #
8 | # Permission is hereby granted, free of charge, to any person obtaining a
9 | # copy of this software and associated documentation files (the "Software"),
10 | # to deal in the Software without restriction, including without limitation
11 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 | # and/or sell copies of the Software, and to permit persons to whom the
13 | # Software is furnished to do so, subject to the following conditions:
14 | #
15 | # The above copyright notice and this permission notice shall be included in
16 | # all copies or substantial portions of the Software.
17 | #
18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 | # DEALINGS IN THE SOFTWARE.
25 | #
26 | ###############################################################################
27 |
28 | import time
29 |
30 | from datetime import datetime, timedelta
31 |
32 | from ansible.errors import AnsibleConnectionFailure
33 | from ansible.plugins.action import ActionBase
34 |
35 |
36 | DOCUMENTATION = r'''
37 | ---
38 | module: raw_reboot
39 |
40 | short_description: Module issuing a raw style reboot and wait to come online
41 |
42 | version_added: "1.1.0"
43 |
44 | description: Module for raw reboots
45 |
46 | options:
47 | reboot_timeout:
48 | description: Maximum number of seconds to wait for a reboot
49 |
50 |
51 | '''
52 |
53 |
54 | EXAMPLES = r'''
55 | - name: raw reboot
56 | raw_reboot:
57 | reboot_timeout: 1200
58 | '''
59 |
60 |
61 | class TimeoutException(Exception):
62 | pass
63 |
64 |
65 | class ActionModule(ActionBase):
66 |
67 | def run(self, **kwargs):
68 | result = super(ActionModule, self).run(kwargs)
69 | result['failed'] = True
70 | result['rebooted'] = False
71 |
72 | reboot_timeout = int(self._task.args.get('reboot_timeout', 600))
73 | end_time = datetime.utcnow() + timedelta(seconds=reboot_timeout)
74 |
75 | # Now reboot and then wait
76 | self._low_level_execute_command("/sbin/reboot", sudoable=True)
77 | # Sleep just in case the reboot takes a few seconds
78 | time.sleep(30)
79 |
80 | while datetime.utcnow() < end_time:
81 | try:
82 | self._low_level_execute_command("/usr/bin/whoami", sudoable=True)
83 | result['failed'] = False
84 | result['rebooted'] = True
85 | return result
86 | except Exception as e:
87 | # a connection failure is fine here, we are waiting for it to reboot anyway
88 | # reset it and move on
89 | if isinstance(e, AnsibleConnectionFailure):
90 | try:
91 | self._connection.reset()
92 | except AnsibleConnectionFailure:
93 | pass
94 | time.sleep(60)
95 |
96 | raise TimeoutException("Timed out waiting for the host to reboot timeout seconds {timeout}".format(timeout=reboot_timeout))
97 |
--------------------------------------------------------------------------------
/roles/dpu_nvconfig/README.md:
--------------------------------------------------------------------------------
1 | # Set nvconfig parameters of DPU
2 |
3 | The `dpu_nvconfig` role use use to:
4 | * set link type (ETH or IB)
5 | * set NIC mode
6 | * set GPU owner
7 |
8 | ## Parameters
9 |
10 | 1. Set link type (ETH or IB)
11 | * `link_type_p1`
12 | * `link_type_p2`
13 |
14 | 2. Set NIC mode:
15 | * `dpu_nic_mode` allowed values: `ConnectX` or `SmartNIC`
16 |
17 | 3. Set GPU owner for ROY adapter (DPU+GPU)
18 | * `gpu_owner` allowed values: `ARM` or `X86`
19 |
20 | ## Playbook examples
21 |
22 | set_vpi_mode.yaml
23 |
24 | ---
25 | - hosts: bf2oob
26 | user: "{{ remote_install_user }}"
27 | become: true
28 | gather_facts: true
29 | pre_tasks:
30 | - name: Check for required variables
31 | fail:
32 | msg: "Neither link_type_p1 nor link_type_p2 variables defined. Allowed values: ETH or IB"
33 | when:
34 | - not link_type_p1 is defined
35 | - not link_type_p2 is defined
36 |
37 | vars:
38 | bmc_host: "{{ hostvars[non_bf2_host]['bmc_ip'] }}"
39 | bmc_user: "{{ hostvars[non_bf2_host]['bmc_user'] }}"
40 | bmc_password: "{{ hostvars[non_bf2_host]['bmc_password'] }}"
41 | run_on: "{{ groups['foreman'][0] }}"
42 | link_type_p1: ETH
43 | link_type_p2: IB
44 |
45 | roles:
46 | - nvidia.dpu_ops.dpu_nvconfig
47 |
48 | post_tasks:
49 | - name: reboot x86 host block
50 | block:
51 | - name: notify about reboot
52 | debug:
53 | msg: "!!! Reboot of {{ non_bf2_host }} is scheduled (playbook handler)!!!"
54 | - name: power-cycle x86 host
55 | include_role:
56 | name: nvidia.dpu_ops.bf_bmc
57 | tasks_from: powercycle.yml
58 | when: should_reboot is defined
59 |
60 |
61 |
62 | set_nic_mode-cx.yaml
63 |
64 | ---
65 | - hosts: bf2oob
66 | user: "{{ remote_install_user }}"
67 | become: true
68 | gather_facts: true
69 |
70 | vars:
71 | bmc_host: "{{ hostvars[non_bf2_host]['bmc_ip'] }}"
72 | bmc_user: "{{ hostvars[non_bf2_host]['bmc_user'] }}"
73 | bmc_password: "{{ hostvars[non_bf2_host]['bmc_password'] }}"
74 | run_on: "{{ groups['foreman'][0] }}"
75 |
76 | roles:
77 | - {name: nvidia.dpu_ops.dpu_nvconfig,
78 | dpu_nic_mode: ConnectX}
79 |
80 | post_tasks:
81 | - name: power-cycle x86 host
82 | include_role:
83 | name: nvidia.dpu_ops.bf_bmc
84 | tasks_from: powercycle.yml
85 | when: should_reboot is defined
86 |
87 |
88 |
89 | set_nic_mode-snic.yaml
90 |
91 | ---
92 | - hosts: bf2oob
93 | user: "{{ remote_install_user }}"
94 | become: true
95 | gather_facts: true
96 |
97 | vars:
98 | bmc_host: "{{ hostvars[non_bf2_host]['bmc_ip'] }}"
99 | bmc_user: "{{ hostvars[non_bf2_host]['bmc_user'] }}"
100 | bmc_password: "{{ hostvars[non_bf2_host]['bmc_password'] }}"
101 | run_on: "{{ groups['foreman'][0] }}"
102 |
103 | roles:
104 | - {name: nvidia.dpu_ops.dpu_nvconfig,
105 | dpu_nic_mode: SmartNIC}
106 |
107 | post_tasks:
108 | - name: power-cycle x86 host
109 | include_role:
110 | name: nvidia.dpu_ops.bf_bmc
111 | tasks_from: powercycle.yml
112 | when: should_reboot is defined
113 |
114 |
115 |
116 | set_gpu_mode.yaml
117 |
118 | ---
119 | - hosts: bf2oob
120 | user: "{{ remote_install_user }}"
121 | become: true
122 | gather_facts: true
123 |
124 | vars:
125 | bmc_host: "{{ hostvars[non_bf2_host]['bmc_ip'] }}"
126 | bmc_user: "{{ hostvars[non_bf2_host]['bmc_user'] }}"
127 | bmc_password: "{{ hostvars[non_bf2_host]['bmc_password'] }}"
128 | run_on: "{{ groups['foreman'][0] }}"
129 |
130 | roles:
131 | - {name: nvidia.dpu_ops.dpu_nvconfig,
132 | gpu_owner: ARM}
133 |
134 | post_tasks:
135 | - name: power-cycle x86 host
136 | include_role:
137 | name: nvidia.dpu_ops.bf_bmc
138 | tasks_from: powercycle.yml
139 | when: should_reboot is defined
140 |
141 |
142 |
--------------------------------------------------------------------------------
/plugins/action/raw_upgrade.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | ###############################################################################
4 | #
5 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
6 | # SPDX-License-Identifier: MIT
7 | #
8 | # Permission is hereby granted, free of charge, to any person obtaining a
9 | # copy of this software and associated documentation files (the "Software"),
10 | # to deal in the Software without restriction, including without limitation
11 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 | # and/or sell copies of the Software, and to permit persons to whom the
13 | # Software is furnished to do so, subject to the following conditions:
14 | #
15 | # The above copyright notice and this permission notice shall be included in
16 | # all copies or substantial portions of the Software.
17 | #
18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 | # DEALINGS IN THE SOFTWARE.
25 | #
26 | ###############################################################################
27 |
28 | import time
29 |
30 | from datetime import datetime, timedelta
31 |
32 | from ansible.errors import AnsibleConnectionFailure
33 | from ansible.plugins.action import ActionBase
34 |
35 |
36 | DOCUMENTATION = r'''
37 | ---
38 | module: raw_upgrade
39 |
40 | short_description: Module issuing a raw style upgrade of firmwares
41 |
42 | version_added: "1.1.0"
43 |
44 | description: Module for raw upgrades
45 |
46 | options:
47 | retries:
48 | description: Maximum number of retries
49 | delay:
50 | description: Number of seconds to wait between retries
51 |
52 |
53 | '''
54 |
55 |
56 | EXAMPLES = r'''
57 | - name: raw upgrade
58 | raw_upgrade:
59 | retries: 100
60 | delay: 60
61 | '''
62 |
63 |
64 | class FailedActivationException(Exception):
65 | pass
66 |
67 |
68 | class UnfinishedActivationException(Exception):
69 | pass
70 |
71 |
72 | ACTIVATE_LINE = "busctl set-property xyz.openbmc_project.Software.BMC.Updater /xyz/openbmc_project/software/{} xyz.openbmc_project.Software.Activation RequestedActivation s xyz.openbmc_project.Software.Activation.RequestedActivations.Active"
73 |
74 | VERIFY_LINE = "busctl get-property xyz.openbmc_project.Software.BMC.Updater /xyz/openbmc_project/software/{} xyz.openbmc_project.Software.Activation Activation"
75 |
76 |
77 | class ActionModule(ActionBase):
78 |
79 | def run(self, **kwargs):
80 | result = super(ActionModule, self).run(kwargs)
81 | failed = False
82 | active = False
83 |
84 | retries = int(self._task.args.get('retries', 100))
85 | delay = int(self._task.args.get('delay', 60))
86 | current_try = 0
87 |
88 | image_lines = self._low_level_execute_command("ls --color=none -t /tmp/images/")['stdout_lines']
89 | if len(image_lines) > 1:
90 | raise FailedActivationException("More than one file is present in /tmp/images")
91 | image_name = image_lines[0]
92 |
93 | self._low_level_execute_command(ACTIVATE_LINE.format(image_name))
94 |
95 | while current_try < retries:
96 | verify_out = self._low_level_execute_command(VERIFY_LINE.format(image_name))['stdout']
97 |
98 | if "Activation.Activations.Active" in verify_out:
99 | active = True
100 | break
101 | if "Activation.Activations.Failed" in verify_out:
102 | failed = True
103 | break
104 | current_try += 1
105 | time.sleep(delay)
106 |
107 | if failed:
108 | raise FailedActivationException("Activation of firmware has failed")
109 | if not active:
110 | raise UnfinishedActivationException("Activation of firmware timed out and stayed in Activating state")
111 |
112 | result['active'] = active
113 | return result
114 |
--------------------------------------------------------------------------------
/roles/bf2_mode/tasks/ownership.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 | - name: set current_bf_ownership
26 | set_fact:
27 | current_bf_ownership: "{{ 'separated_host' if bf2_devices[0].ownership == 'separated' else 'embedded_cpu' }}"
28 | new_ownership_val: "{{ 0 if new_bf_ownership == 'separated_host' else 1 }}"
29 |
30 | - name: set interface names on new ownership
31 | set_fact:
32 | new_interface: "{{ separated_port if new_bf_ownership == 'separated_host' else embedded_port }}"
33 | old_interface: "{{ separated_port if current_bf_ownership == 'separated_host' else embedded_port }}"
34 |
35 | - name: var
36 | debug:
37 | msg: "Current: {{ current_bf_ownership }} New: {{ new_bf_ownership }}"
38 |
39 | - name: See if netplan exists
40 | stat:
41 | path: /etc/netplan/50-cloud-init.yaml
42 | register: netplan_file
43 |
44 | - name: Fail if netplan does not exist
45 | fail:
46 | msg: /etc/netplan/50-cloud-init.yaml did not exist
47 | when: not netplan_file.stat.exists
48 |
49 | - name: Change bf2 ownership
50 | command: "{{ item }}"
51 | with_items:
52 | - "mlxconfig -y -d {{ bf2_devices[0].mst }} s INTERNAL_CPU_MODEL={{ new_ownership_val }}"
53 | - "mlxconfig -y -d {{ bf2_devices[0].mst }}.1 s INTERNAL_CPU_MODEL={{ new_ownership_val }}"
54 | when:
55 | - current_bf_ownership != new_bf_ownership
56 | register: set_output_ownership
57 |
58 | - name: Set ovs config for separated
59 | lineinfile:
60 | path: /etc/mellanox/mlnx-ovs.conf
61 | regexp: CREATE_OVS_BRIDGES=.*$
62 | line: CREATE_OVS_BRIDGES="no"
63 | when:
64 | - current_bf_ownership != new_bf_ownership and new_bf_ownership == 'separated_host'
65 |
66 | - name: cleanup ovs bridges
67 | command: "/usr/bin/ovs-vsctl --if-exists del-br {{ item }}"
68 | with_items:
69 | - "ovsbr1"
70 | - "ovsbr2"
71 | when:
72 | - current_bf_ownership != new_bf_ownership and new_bf_ownership == 'separated_host'
73 |
74 | - name: Update netplans - p0
75 | ansible.builtin.replace:
76 | path: "{{ item }}"
77 | regexp: "{{ old_interface[0] }}:"
78 | replace: "{{ new_interface[0] }}:"
79 | with_items:
80 | - /etc/netplan/50-cloud-init.yaml
81 | - /etc/netplan/60-mlnx.yaml
82 |
83 | - name: Update netplans - p1
84 | ansible.builtin.replace:
85 | path: "{{ item }}"
86 | regexp: "{{ old_interface[1] }}:"
87 | replace: "{{ new_interface[1] }}:"
88 | with_items:
89 | - /etc/netplan/50-cloud-init.yaml
90 | - /etc/netplan/60-mlnx.yaml
91 |
92 | - name: Set ovs config for embedded
93 | lineinfile:
94 | path: /etc/mellanox/mlnx-ovs.conf
95 | regexp: CREATE_OVS_BRIDGES=.*$
96 | line: CREATE_OVS_BRIDGES="yes"
97 | when:
98 | - current_bf_ownership != new_bf_ownership and new_bf_ownership == 'embedded_cpu'
99 |
100 | - name: update netplan
101 | command: netplan generate
102 |
103 | - name: apply netplan
104 | command: netplan apply
105 |
106 | - name: sync files to disk
107 | command: sync
108 |
109 | - name: Set reboot flag
110 | set_fact:
111 | should_reboot: True
112 | when: set_output_ownership.changed
113 |
--------------------------------------------------------------------------------
/roles/dpu_nvconfig/tasks/set_nic_mode.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # Copyright 2022 NVIDIA Corporation
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
6 | # this software and associated documentation files (the "Software"), to deal in
7 | # the Software without restriction, including without limitation the rights to
8 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9 | # the Software, and to permit persons to whom the Software is furnished to do so,
10 | # subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | #
22 | ###############################################################################
23 | ## Ref: BLUEFIELD AS CONNECTX, Internal Architecture Spec
24 | # 5.2 Transition from SNIC mode to NIC mode
25 | # Transition from SNIC mode (default) to NIC mode should be available from x86, w/o accessing Arm cores.
26 | # 1. Install DPU ConnectX FW.
27 | # 2. NV configurations should be applied
28 | # a. INTERNAL_CPU_PAGE_SUPPLIER = EXT_HOST_PF
29 | # b. INTERNAL_CPU_ESWITCH_MANAGER = EXT_HOST_PF
30 | # c. INTERNAL_CPU_IB_VPORT0 = EXT_HOST_PF
31 | # d. INTERNAL_CPU_OFFLOAD_ENGINE = DISABLED
32 | # 3. Perform FW reset: mlxfwreset -d reset
33 | # a. Note, default reset flow (legacy/sub-1-sec) to be performed depends on system and device configurations.
34 | # b. It should be verified that if supported (mlxfwreset -d query),
35 | # both flows (legacy and sub-1-sec) should work and result in DPU device to be configured to DPU NIC mode.
36 | #
37 | # 5.2.1 Rshim host driver and Rshim PF aspect
38 | # 1. Rshim host driver isn’t must to complete transition and operate in DPU NIC mode
39 | # 2. The one who’d like to restrict Rshim PF should configure INTERNAL_CPU_RSHIM = DISABLED
40 | # a. Note: power cycle should be performed to apply such configuration
41 | #
42 | # 5.3 Transition from NIC mode back to SNIC mode (default) should include
43 | # ...
44 | # 3. NV configurations to be applied
45 | # a. INTERNAL_CPU_PAGE_SUPPLIER = ECPF
46 | # b. INTERNAL_CPU_ESWITCH_MANAGER = ECPF
47 | # c. INTERNAL_CPU_IB_VPORT0 = ECPF
48 | # d. INTERNAL_CPU_OFFLOAD_ENGINE = ENABLED
49 | # e. If INTERNAL_CPU_RSHIM = DISABLED, need to configure INTERNAL_CPU_RSHIM = ENABLED
50 | # i. Note, power cycle should be performed if INTERNAL_CPU_RSHIM is configured
51 | #---
52 | # $ mlxconfig -d /dev/mst/mt41686_pciconf0 i
53 | # ...
54 | # INTERNAL CPU CONF:
55 | # INTERNAL_CPU_ESWITCH_MANAGER=
56 | # Defines the owner of Eth Embedded Switch responsibilities
57 | # 0x0: ECPF
58 | # 0x1: EXT_HOST_PF
59 | # Valid for INTERNAL_CPU_MODEL = EMBEDDED_CPU
60 | ---
61 | - name: nvconfig
62 | set_fact:
63 | nv: '{{ bf2_devices[0].nvconfig }}'
64 |
65 | - name: info
66 | debug:
67 | msg: |
68 | New NIC mode: {{ dpu_nic_mode }}
69 |
70 | # if INTERNAL_CPU_MODEL SEPARATED_HOST(0)
71 | # "-E- The Device doesn't support INTERNAL_CPU_PAGE_SUPPLIER parameter"
72 | # so, we need to set INTERNAL_CPU_MODEL=EMBEDDED_CPU(1) before switching to ConnectX mode
73 | #
74 | - name: set nic_mode=ConnectX
75 | block:
76 | - include: set_embedded_cpu_model.yml
77 | when: nv.INTERNAL_CPU_MODEL == "SEPARATED_HOST(0)"
78 | - include: nvset.yml
79 | with_dict:
80 | - {INTERNAL_CPU_PAGE_SUPPLIER: 1}
81 | - {INTERNAL_CPU_ESWITCH_MANAGER: 1}
82 | - {INTERNAL_CPU_IB_VPORT0: 1}
83 | - {INTERNAL_CPU_OFFLOAD_ENGINE: 1}
84 | when: dpu_nic_mode == "ConnectX" or dpu_nic_mode == "CX"
85 |
86 | - name: set nic_mode=SmartNIC
87 | block:
88 | - include: set_embedded_cpu_model.yml
89 | when: nv.INTERNAL_CPU_MODEL == "SEPARATED_HOST(0)"
90 | - include: nvset.yml
91 | with_dict:
92 | - {INTERNAL_CPU_PAGE_SUPPLIER: 0}
93 | - {INTERNAL_CPU_ESWITCH_MANAGER: 0}
94 | - {INTERNAL_CPU_IB_VPORT0: 0}
95 | - {INTERNAL_CPU_OFFLOAD_ENGINE: 0}
96 | when: dpu_nic_mode == "SmartNIC" or dpu_nic_mode == "SNIC"
97 |
--------------------------------------------------------------------------------
/roles/load_bfb/tasks/main.yml:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | #
3 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | # SPDX-License-Identifier: MIT
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in
14 | # all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | #
24 | ###############################################################################
25 | - name: Load BFB from x86 host
26 | block:
27 | - name: get facts
28 | bf2_facts:
29 | when: bf2_devices is not defined
30 | delegate_to: "{{ x86_host }}"
31 |
32 | - name: Verify rshim is active on host
33 | assert:
34 | that: "{{ bf2_devices | nvidia.dpu_ops.get_rshim(rshim.dev) | length > 0 }}"
35 | fail_msg: >
36 | rshim is not active on the host, which could mean that either
37 | the BMC on the BF2 card owns the rshim or it is just disabled on the host.
38 | Please verify the state of your host and BF2 card before proceeding.
39 | delegate_to: "{{ x86_host }}"
40 |
41 | - name: Generate bf.cfg
42 | template:
43 | src: "{{ bfcfg_template }}"
44 | dest: /tmp/bf.cfg
45 | owner: root
46 | group: root
47 | mode: "0755"
48 | delegate_to: "{{ x86_host }}"
49 |
50 | - name: Load bfb into bf2
51 | shell: "bfb-install --bfb {{ local_bfb }} --config /tmp/bf.cfg --rshim {{ rshim.dev }}"
52 | delegate_to: "{{ x86_host }}"
53 |
54 | - name: Delete temporary bf.cfg
55 | file:
56 | path: /tmp/bf.cfg
57 | state: absent
58 | delegate_to: "{{ x86_host }}"
59 | when: is_bmc is false
60 |
61 | - name: Load BFB from BMC rshim
62 | block:
63 | - name: get rshim from BMC
64 | raw: 'find /dev -maxdepth 1 -name "rshim*"'
65 | register: rshim_output_bmc
66 |
67 | - name: Verify rshim is active on BMC
68 | assert:
69 | that: rshim_output_bmc.stdout_lines|length > 0
70 | fail_msg: >
71 | rshim is not active on the BMC, which could mean that either the x86 host
72 | owns the rshim or it is just disabled on the BMC.
73 | Please verify the state of your host and BF2 card before proceeding.
74 |
75 | - name: delete directory to ensure it is empty
76 | file:
77 | state: absent
78 | path: "{{ local_bfb | dirname }}/{{ boot_mac }}"
79 | owner: root
80 | group: root
81 | mode: "0755"
82 | delegate_to: "{{ x86_host }}"
83 |
84 | - name: Create directory to append the files
85 | file:
86 | state: directory
87 | path: "{{ local_bfb | dirname }}/{{ boot_mac }}"
88 | owner: root
89 | group: root
90 | mode: "0755"
91 | delegate_to: "{{ x86_host }}"
92 |
93 | - name: Copy bf.cfg for appending
94 | copy:
95 | src: "{{ local_bfb }}"
96 | dest: "{{ local_bfb | dirname }}/{{ boot_mac }}"
97 | owner: root
98 | group: root
99 | mode: "0755"
100 | remote_src: true
101 | delegate_to: "{{ x86_host }}"
102 |
103 | - name: Generate bf.cfg
104 | template:
105 | src: "{{ bfcfg_template }}"
106 | dest: "{{ local_bfb | dirname }}/{{ boot_mac }}/bf.cfg"
107 | owner: root
108 | group: root
109 | mode: "0755"
110 | delegate_to: "{{ x86_host }}"
111 |
112 | - name: Assemble combined file
113 | assemble:
114 | src: "{{ local_bfb | dirname }}/{{ boot_mac }}"
115 | dest: "{{ local_bfb | dirname }}/{{ boot_mac }}/bfb-and-config.bfb"
116 | owner: root
117 | group: root
118 | mode: "0755"
119 | delegate_to: "{{ x86_host }}"
120 |
121 | - name: Load bfb into bf2
122 | raw: "wget --no-check-certificate {{ bfb_url | dirname }}/{{ boot_mac }}/bfb-and-config.bfb -O /dev/rshim0/boot"
123 | retries: 10
124 | delay: 1
125 | register: result
126 | until: result.rc == 0
127 | when: is_bmc is true
128 |
--------------------------------------------------------------------------------
/plugins/modules/bf2_facts.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | ###############################################################################
4 | #
5 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
6 | # SPDX-License-Identifier: MIT
7 | #
8 | # Permission is hereby granted, free of charge, to any person obtaining a
9 | # copy of this software and associated documentation files (the "Software"),
10 | # to deal in the Software without restriction, including without limitation
11 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 | # and/or sell copies of the Software, and to permit persons to whom the
13 | # Software is furnished to do so, subject to the following conditions:
14 | #
15 | # The above copyright notice and this permission notice shall be included in
16 | # all copies or substantial portions of the Software.
17 | #
18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 | # DEALINGS IN THE SOFTWARE.
25 | #
26 | ###############################################################################
27 |
28 | from __future__ import (absolute_import, division, print_function)
29 | __metaclass__ = type
30 | import re
31 | import sys
32 |
33 |
34 | DOCUMENTATION = r'''
35 | ---
36 | module: bf2_facts
37 |
38 | short_description: Module for generating bf2 facts
39 |
40 | version_added: "1.1.0"
41 |
42 | description: MModule for generating bf2 facts
43 |
44 | '''
45 |
46 |
47 | EXAMPLES = r'''
48 | - name: gather bf2 facts
49 | bf2_facts:
50 | '''
51 |
52 |
53 | RETURN = r'''
54 | ansible_facts:
55 | description: Facts to add to ansible_facts.
56 | returned: always
57 | type: dict
58 | contains:
59 | '''
60 |
61 |
62 | UNDEFINED = 'UNDEFINED'
63 |
64 |
65 | import shlex
66 | import subprocess
67 | from ansible.module_utils.basic import AnsibleModule
68 |
69 | # singleton, cache of mlxconfig, key is pci/mst dev, val is dict()
70 | nvconfig_cache = dict()
71 | lspci_cache = dict()
72 |
73 |
74 | class CommandError(Exception):
75 | """
76 | helper class for handling stderr failures
77 | """
78 | def __init__(self, stderr):
79 | self.stderr = stderr
80 | def __str__(self):
81 | return self.stderr
82 |
83 |
84 | def execute(cmd):
85 | """
86 | Executes a command, will raise an error if stderr is not clean
87 | """
88 | if type(cmd) == str:
89 | cmd = shlex.split(cmd)
90 | proc = subprocess.Popen(cmd,
91 | stdout=subprocess.PIPE,
92 | stderr=subprocess.PIPE)
93 | try:
94 | stdout, stderr = proc.communicate(input=None, timeout=15)
95 | if proc.returncode != 0:
96 | # if stderr:
97 | raise CommandError(stderr)
98 | except subprocess.TimeoutExpired:
99 | proc.kill()
100 | stdout, stderr = proc.communicate()
101 | return stdout.decode('utf-8')
102 |
103 |
104 | def get_lines(cmd):
105 | return execute(cmd).rstrip().split('\n')
106 |
107 |
108 | def get_first_result(results, key):
109 | for r in results:
110 | if key in r:
111 | return r
112 | return None
113 | # return next(filter(lambda r: key in r, results))
114 |
115 |
116 | def has_query_privhost():
117 | lines = get_lines('mlxprivhost -h')
118 | return get_first_result(lines, 'query') is not None
119 |
120 |
121 | def get_rshim_output(rshim_path):
122 | # File IO in the Popen call is unhappy w/ the special rshim files, so this call command is used
123 | subprocess.call("echo 'DISPLAY_LEVEL 1' > {}/misc".format(rshim_path), shell=True)
124 | lines = get_lines("cat {}/misc".format(rshim_path))
125 | # add in the rshim slot for later use
126 | lines.append("RSHIM_SLOT {}".format(rshim_path))
127 | dev_name_line = get_first_result(lines, 'DEV_NAME')
128 | full_dev_name = shlex.split(dev_name_line)[1]
129 | return full_dev_name, lines
130 |
131 |
132 | def get_mst_and_pci():
133 | # get all the lines with BlueField2 since those are the cards
134 | # Note that the -v flag will have 2 devices per card
135 | # the second device will be in the form of device.1
136 | # We discard the device.1's to not have duplicate devices
137 | lines = get_lines('mst status -v')
138 | # FIXME BlueField (1), BlueField3 ?
139 | bf_lines = [l for l in lines if 'BlueField' in l]
140 | # grab only the pcie device name
141 | mst_and_pci = [tuple(l.split()[1:3]) for l in bf_lines]
142 | # discard the devices with a period in the name
143 | return [l for l in mst_and_pci if '.' not in l[0]]
144 |
145 |
146 | def _parse_mlxconfig(lines):
147 | """
148 | Input: lines of `mlxconfig -d .. q` output
149 | Output: dict
150 | """
151 | # in_hdr = True
152 | ret = dict()
153 | for l in lines:
154 | # if in_hdr:
155 | # if l.startswith('Configurations'):
156 | # in_hdr = False
157 | # continue
158 | # if not l:
159 | # continue
160 | ary = re.split(r'\s+', l)
161 | # print(repr(ary), file=sys.stderr)
162 | # (x, hdr, val) = re.split(r'\s+', l)
163 | if len(ary) >= 3 and ary[0] == '':
164 | ret[ary[1]] = ary[2]
165 | return ret
166 |
167 |
168 | def get_mlxconfig(mst):
169 | global nvconfig_cache
170 | if mst in nvconfig_cache:
171 | return nvconfig_cache[mst]
172 | lines = get_lines("mlxconfig -d {} q".format(mst))
173 | ret = _parse_mlxconfig(lines)
174 | # needed for PRIS and ROY adapters:
175 | if 'PCI_DOWNSTREAM_PORT_OWNER' in ret:
176 | k = 'PCI_DOWNSTREAM_PORT_OWNER[4]'
177 | lines = get_lines("mlxconfig -d {} q {}".format(mst, k))
178 | r2 = _parse_mlxconfig(lines)
179 | ret[k] = r2[k]
180 | nvconfig_cache[mst] = ret
181 | return(ret)
182 |
183 |
184 | def get_mode(mst):
185 | nvcfg = get_mlxconfig(mst)
186 | # print(f"(get_mode: {nvcfg['INTERNAL_CPU_MODEL']})", file=sys.stderr)
187 | # TODO what about NIC_MODE vs SNIC_MODE vs SEPARATED_MODE ?
188 | v = nvcfg.get('INTERNAL_CPU_MODEL', None)
189 | if v is not None:
190 | return 'embedded' if v == 'EMBEDDED_CPU(1)' else 'separated'
191 | else:
192 | return UNDEFINED
193 |
194 |
195 | def get_vpd(pci):
196 | if pci in lspci_cache:
197 | return lspci_cache[pci]
198 | lines = get_lines("lspci -vvs {}".format(pci))
199 | rx = re.compile('^\s+\[(\w\w)\]\s[^:]+:\s(.*?)\s*$')
200 | ret = dict()
201 | for l in lines:
202 | m = rx.search(l)
203 | if m is None:
204 | continue
205 | ret[m.group(1)] = m.group(2)
206 | lspci_cache[pci] = ret
207 | return ret
208 |
209 |
210 | def get_serial_number(pci):
211 | # lines = get_lines("lspci -vvs {}".format(pci))
212 | # line = get_first_result(lines, 'Serial number')
213 | # if line is None:
214 | # return UNDEFINED
215 | # return line.split(":")[-1].strip()
216 | vpd = get_vpd(pci)
217 | return vpd.get('SN', UNDEFINED)
218 |
219 |
220 |
221 | def get_part_number(pci):
222 | vpd = get_vpd(pci)
223 | return vpd.get('PN', UNDEFINED)
224 | # lines = get_lines("lspci -vvs {}".format(pci))
225 | # line = get_first_result(lines, 'Part number')
226 | # if line is None:
227 | # return UNDEFINED
228 | # return line.split(":")[-1].strip()
229 |
230 |
231 | def get_rshims_from_fs():
232 | # the case of no rshims should return an empty list, not a list of 1 empty item
233 | rshims = get_lines('find /dev -maxdepth 1 -name "rshim*"')
234 | if len(rshims) == 1 and not rshims[0]:
235 | return []
236 | return rshims
237 |
238 |
239 | def get_rshim_from_pci(rshim_outs, pci):
240 | if not rshim_outs:
241 | return None
242 | # Split on the dot of the pci as the key in the rshim_outs
243 | # has a different dot version (62:00.0 vs 62:00.2)
244 | rshim_key = pci.split('.')[0]
245 | # There may not be rshim's on the host for a given card, so not finding
246 | # a result just means it is not found
247 | key = get_first_result(rshim_outs.keys(), rshim_key)
248 | if key is None:
249 | return []
250 | return rshim_outs.get(key)
251 |
252 |
253 | def get_mac_from_rshim_output(rshim_out):
254 | line = get_first_result(rshim_out, 'PEER_MAC')
255 | return shlex.split(line)[1]
256 |
257 |
258 | def get_rshim_slot_from_rshim_output(rshim_out):
259 | line = get_first_result(rshim_out, 'RSHIM_SLOT')
260 | return shlex.split(line)[1]
261 |
262 |
263 | def get_restriction_level(mst):
264 | lines = get_lines("mlxprivhost -d {} q".format(mst))
265 | line = get_first_result(lines, 'level')
266 | return line.split(":")[1].strip().lower()
267 |
268 |
269 | def get_versions(mst):
270 | lines = get_lines("mlxfwmanager -d {}".format(mst))
271 | versions = {}
272 | for line in lines:
273 | for phrase in ['FW', 'PXE', 'UEFI', 'UNKNOWN_ROM']:
274 | if phrase in line:
275 | # Some of the UEFI Virtio have 3 words before the version so this
276 | # takes that into consideration
277 | split = shlex.split(line)
278 | if (split[1] == 'Virtio'):
279 | key = "{} {} {}".format(split[0], split[1], split[2])
280 | versions[key] = split[3]
281 | else:
282 | versions[split[0]] = split[1]
283 | return versions
284 |
285 |
286 | def run_module():
287 | ansible_facts = {'bf2_devices': []}
288 | warnings = []
289 |
290 | module = AnsibleModule(
291 | argument_spec={},
292 | supports_check_mode=True
293 | )
294 | try:
295 | try:
296 | execute('mst start')
297 | except FileNotFoundError:
298 | # if mst is not installed on the machine, popen will throw this exception,
299 | # so it can be handled gracefully
300 | module.exit_json(ansible_facts=ansible_facts,
301 | warnings="could not find the mst command, ensure that mlnx-ofed-all is installed")
302 |
303 | # validate if mlxprivhost can be used for query mode. some versions do not have the query flag
304 | can_query_privhost = has_query_privhost()
305 |
306 | rshims = get_rshims_from_fs()
307 | # rshim output will contain a key to the pcie device name with info inside it
308 | rshim_outs = {}
309 |
310 | # get all the rshim's on a single machine
311 | for rshim_path in rshims:
312 | full_dev_name, lines = get_rshim_output(rshim_path)
313 | rshim_outs[full_dev_name] = lines
314 |
315 | for mst, pci in get_mst_and_pci():
316 | rshim_out = get_rshim_from_pci(rshim_outs, pci)
317 | permission = get_restriction_level(mst) if can_query_privhost else UNDEFINED
318 | if permission == 'privileged':
319 | # many items only work in privileged mode
320 | ownership = get_mode(mst)
321 | versions = get_versions(mst)
322 | else:
323 | ownership = UNDEFINED
324 | versions = UNDEFINED
325 |
326 |
327 | ansible_facts['bf2_devices'].append({
328 | 'mst': mst,
329 | 'pci': pci,
330 | 'ownership': ownership,
331 | 'permission': permission,
332 | 'serial_number': get_serial_number(pci),
333 | 'part_number': get_part_number(pci),
334 | # Sort this out once the mac is not all 00's
335 | # 'mac': get_mac_from_rshim_output(rshim_out) if rshim_out else UNDEFINED,
336 | 'rshim': get_rshim_slot_from_rshim_output(rshim_out) if rshim_out else UNDEFINED,
337 | 'versions': versions,
338 | 'nvconfig': nvconfig_cache.get(mst, {})
339 | })
340 |
341 | module.exit_json(ansible_facts=ansible_facts, warnings="")
342 | except Exception as e:
343 | module.fail_json(msg='An unhandled error occured', exception=e)
344 |
345 |
346 | def main():
347 | run_module()
348 |
349 |
350 | if __name__ == '__main__':
351 | main()
352 |
--------------------------------------------------------------------------------
/plugins/modules/bf2_facts_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | ###############################################################################
4 | #
5 | # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
6 | # SPDX-License-Identifier: MIT
7 | #
8 | # Permission is hereby granted, free of charge, to any person obtaining a
9 | # copy of this software and associated documentation files (the "Software"),
10 | # to deal in the Software without restriction, including without limitation
11 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 | # and/or sell copies of the Software, and to permit persons to whom the
13 | # Software is furnished to do so, subject to the following conditions:
14 | #
15 | # The above copyright notice and this permission notice shall be included in
16 | # all copies or substantial portions of the Software.
17 | #
18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 | # DEALINGS IN THE SOFTWARE.
25 | #
26 | ###############################################################################
27 |
28 | import bf2_facts
29 | import shlex
30 | import unittest
31 | from unittest.mock import patch
32 |
33 |
34 | def generate_rshim_output(mock_execute, pci, rshim, mac='00:00:00:00:00:00'):
35 | example = """DISPLAY_LEVEL 1 (0:basic, 1:advanced, 2:log)
36 | BOOT_MODE 1 (0:rshim, 1:emmc, 2:emmc-boot-swap)
37 | BOOT_TIMEOUT 100 (seconds)
38 | DROP_MODE 0 (0:normal, 1:drop)
39 | SW_RESET 0 (1: reset)
40 | DEV_NAME pcie-0000:{}.2
41 | DEV_INFO BlueField-2(Rev 1)
42 | BOOT_RESET_SKIP 0 (1: skip)
43 | PEER_MAC {} (rw)
44 | PXE_ID 0x00000000 (rw)
45 | VLAN_ID 0 0 (rw)
46 | """.format(pci, mac)
47 | mock_execute.return_value = example
48 | return bf2_facts.get_rshim_output(rshim)
49 |
50 |
51 | class Test(unittest.TestCase):
52 | @patch('bf2_facts.execute')
53 | def test_has_query_privhost_new_version(self, mock_execute):
54 | example = """usage: mlxprivhost [-h] [-v] --device DEVICE [--disable_rshim] [--disable_tracer] [--disable_counter_rd] [--disable_port_owner] {r,restrict,p,privilege,q,query}
55 |
56 | restrict or privilege host
57 | Note: New configurations takes effect immediately.
58 | Note: privileged host - host has all supported privileges.
59 | restricted host - host is not allowed to modify global
60 | per port/parameters or access other hosts parametersis.
61 |
62 | optional arguments:
63 | -h, --help show this help message and exit
64 | -v, --version show program's version number and exit
65 |
66 | Options:
67 | --device DEVICE, -d DEVICE
68 | Device to work with.
69 | --disable_rshim When TRUE, the host does not have an RSHIM function
70 | to access the embedded CPU registers
71 | --disable_tracer When TRUE, the host will not be allowed to own the Tracer
72 | --disable_counter_rd When TRUE, the host will not be allowed to read Physical port counters
73 | --disable_port_owner When TRUE, the host will not be allowed to be Port Owner
74 |
75 | Commands:
76 | {r,restrict,p,privilege,q,query}
77 | restrict: Set host 1 (ARM) privileged, host 0 (x86_64) restricted.
78 | privilege: Set host 1 (ARM) privileged, host 0 (x86_64) privileged
79 | (back to default).
80 | query: Query current host configuration.
81 | """
82 | mock_execute.return_value = example
83 | self.assertTrue(bf2_facts.has_query_privhost())
84 |
85 | @patch('bf2_facts.execute')
86 | def test_has_query_privhost_old_version(self, mock_execute):
87 | example = """usage: mlxprivhost [-h] [-v] --device DEVICE [--disable_rshim] [--disable_tracer] [--disable_counter_rd] [--disable_port_owner] {r,restrict,p,privilege}
88 |
89 | restrict or privilege host
90 | Note: New configurations takes effect immediately.
91 | Note: privileged host - host has all supported privileges.
92 | restricted host - host is not allowed to modify global
93 | per port/parameters or access other hosts parametersis.
94 |
95 | optional arguments:
96 | -h, --help show this help message and exit
97 | -v, --version show program's version number and exit
98 |
99 | Options:
100 | --device DEVICE, -d DEVICE
101 | Device to work with.
102 | --disable_rshim When TRUE, the host does not have an RSHIM function
103 | to access the embedded CPU registers
104 | --disable_tracer When TRUE, the host will not be allowed to own the Tracer
105 | --disable_counter_rd When TRUE, the host will not be allowed to read Physical port counters
106 | --disable_port_owner When TRUE, the host will not be allowed to be Port Owner
107 |
108 | Commands:
109 | {r,restrict,p,privilege}
110 | restrict: Set host 1 (ARM) privileged, host 0 (x86_64) restricted.
111 | privilege: Set host 1 (ARM) privileged, host 0 (x86_64) privileged
112 | (back to default).
113 | """
114 | mock_execute.return_value = example
115 | self.assertFalse(bf2_facts.has_query_privhost())
116 |
117 | @patch('bf2_facts.execute')
118 | @patch('subprocess.call')
119 | def test_get_rshim_output(self, call, mock_execute):
120 | example = """DISPLAY_LEVEL 1 (0:basic, 1:advanced, 2:log)
121 | BOOT_MODE 1 (0:rshim, 1:emmc, 2:emmc-boot-swap)
122 | BOOT_TIMEOUT 100 (seconds)
123 | DROP_MODE 0 (0:normal, 1:drop)
124 | SW_RESET 0 (1: reset)
125 | DEV_NAME pcie-0000:e2:00.2
126 | DEV_INFO BlueField-2(Rev 1)
127 | BOOT_RESET_SKIP 0 (1: skip)
128 | PEER_MAC 00:00:00:00:00:00 (rw)
129 | PXE_ID 0x00000000 (rw)
130 | VLAN_ID 0 0 (rw)
131 | """
132 | mock_execute.return_value = example
133 | actual_rshim_slot = '/dev/rshim100'
134 | key, val = generate_rshim_output(mock_execute, 'e2:00', actual_rshim_slot)
135 | self.assertEqual(key, 'pcie-0000:e2:00.2')
136 | self.assertEqual(len(val), 12)
137 | rshim_slot = shlex.split([l for l in val if 'RSHIM_SLOT' in l][0])[1]
138 | self.assertEqual(rshim_slot, actual_rshim_slot)
139 |
140 | @patch('bf2_facts.execute')
141 | def test_get_mst_and_pci(self, mock_execute):
142 | example = """MST modules:
143 | ------------
144 | MST PCI module is not loaded
145 | MST PCI configuration module loaded
146 | PCI devices:
147 | ------------
148 | DEVICE_TYPE MST PCI RDMA NET NUMA
149 | BlueField2(rev:1) /dev/mst/mt41686_pciconf0.1 e2:00.1 mlx5_1 net-ens7f1 1
150 |
151 | BlueField2(rev:1) /dev/mst/mt41686_pciconf0 e2:00.0 mlx5_0 net-ens7f0 1
152 |
153 | """
154 | mock_execute.return_value = example
155 | mst_and_pci = bf2_facts.get_mst_and_pci()
156 | self.assertEqual(len(mst_and_pci), 1)
157 | self.assertEqual(mst_and_pci[0][0], '/dev/mst/mt41686_pciconf0')
158 | self.assertEqual(mst_and_pci[0][1], 'e2:00.0')
159 |
160 | @patch('bf2_facts.execute')
161 | def test_get_mode(self, mock_execute):
162 | example = """
163 | Device #1:
164 | ----------
165 |
166 | Device type: BlueField2
167 | Name: MBF2M516A-EEEO_Ax
168 | Description: BlueField-2 E-Series SmartNIC 100GbE/EDR VPI Dual-Port QSFP56; PCIe Gen4 x16; Crypto Enabled; 16GB on-board DDR; 1GbE OOB management; FHHL
169 | Device: /dev/mst/mt41686_pciconf0
170 |
171 | Configurations: Next Boot
172 | MEMIC_BAR_SIZE 0
173 | MEMIC_SIZE_LIMIT _256KB(1)
174 | HOST_CHAINING_MODE DISABLED(0)
175 | HOST_CHAINING_CACHE_DISABLE False(0)
176 | HOST_CHAINING_DESCRIPTORS Array[0..7]
177 | HOST_CHAINING_TOTAL_BUFFER_SIZE Array[0..7]
178 | INTERNAL_CPU_MODEL EMBEDDED_CPU(1)
179 | _INTERNAL_CPU_MODEL SEPARATED_HOST(0)
180 | FLEX_PARSER_PROFILE_ENABLE 0
181 | PROG_PARSE_GRAPH False(0)
182 | FLEX_IPV4_OVER_VXLAN_PORT 0
183 | ROCE_NEXT_PROTOCOL 254
184 | ESWITCH_HAIRPIN_DESCRIPTORS Array[0..7]
185 | ESWITCH_HAIRPIN_TOT_BUFFER_SIZE Array[0..7]
186 | PF_BAR2_SIZE 0
187 | NON_PREFETCHABLE_PF_BAR False(0)
188 | VF_VPD_ENABLE False(0)
189 | PER_PF_NUM_SF False(0)
190 | LINK_TYPE_P1 ETH(2)
191 | LINK_TYPE_P2 ETH(2)
192 | """
193 | mock_execute.return_value = example
194 | mode = bf2_facts.get_mode('/dev/mst/mt41686_pciconf0')
195 | self.assertEqual(mode, 'embedded')
196 |
197 | example = """
198 | Device #1:
199 | ----------
200 |
201 | Device type: BlueField2
202 | Name: MBF2M516A-EEEO_Ax
203 | Description: BlueField-2 E-Series SmartNIC 100GbE/EDR VPI Dual-Port QSFP56; PCIe Gen4 x16; Crypto Enabled; 16GB on-board DDR; 1GbE OOB management; FHHL
204 | Device: /dev/mst/mt41686_pciconf0.1
205 |
206 | Configurations: Next Boot
207 | MEMIC_BAR_SIZE 0
208 | INTERNAL_CPU_MODEL SEPARATED_HOST(0)
209 | """
210 | mock_execute.return_value = example
211 | mode = bf2_facts.get_mode('/dev/mst/mt41686_pciconf0.1')
212 | self.assertEqual(mode, 'separated')
213 |
214 | @patch('bf2_facts.execute')
215 | def test_get_part_and_serial_number(self, mock_execute):
216 | example = """e2:00.0 Ethernet controller: Mellanox Technologies MT42822 BlueField-2 integrated ConnectX-6 Dx network controller (rev 01)
217 | Subsystem: Mellanox Technologies MT42822 BlueField-2 integrated ConnectX-6 Dx network controller
218 | Physical Slot: 7-1
219 | Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B- DisINTx+
220 | Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- [disabled]
227 | Capabilities: [48] Vital Product Data
228 | Product Name: BlueField-2 DPU 100GbE/EDR/HDR100 VPI Dual-Port QSFP56, Crypto Enabled, 16GB on-board DDR, 1GbE OOB management, Tall Bracket
229 |
230 | Read-only fields:
231 | [PN] Part number: MBF2M516A-EEEOT
232 | [EC] Engineering changes: A4
233 | [V2] Vendor specific: MBF2M516A-EEEOT
234 | [SN] Serial number: MT2050X00614
235 | [V3] Vendor specific: 9c20a1608d3feb118000043f72ff4c16
236 | [VA] Vendor specific: MLX:MN=MLNX:CSKU=V2:UUID=V3:PCI=V0:MODL=BF2M516A
237 | [V0] Vendor specific: PCIeGen4 x16
238 | [RV] Reserved: checksum good, 1 byte(s) reserved
239 | End
240 | """
241 | mock_execute.return_value = example
242 | bf2_facts.lspci_cache = dict() # need to clean it up
243 | serial_number = bf2_facts.get_serial_number('e2:00.0')
244 | self.assertEqual('MT2050X00614', serial_number)
245 | part_number = bf2_facts.get_part_number('e2:00.0')
246 | self.assertEqual('MBF2M516A-EEEOT', part_number)
247 |
248 |
249 | @patch('bf2_facts.execute')
250 | def test_no_vpd(self, mock_execute):
251 | example = """e2:00.0 Ethernet controller: Mellanox Technologies MT42822 BlueField-2 integrated ConnectX-6 Dx network controller (rev 01)
252 | Subsystem: Mellanox Technologies MT42822 BlueField-2 integrated ConnectX-6 Dx network controller
253 | Physical Slot: 7-1
254 | Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B- DisINTx+
255 | Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- [disabled]
262 | Capabilities: [48] Vital Product Data
263 | End
264 | """
265 | mock_execute.return_value = example
266 | bf2_facts.lspci_cache = dict() # need to clean it up
267 | serial_number = bf2_facts.get_serial_number('e2:00.0')
268 | self.assertEqual('UNDEFINED', serial_number)
269 | part_number = bf2_facts.get_part_number('e2:00.0')
270 | self.assertEqual('UNDEFINED', part_number)
271 | @patch('bf2_facts.execute')
272 | def test_get_rshims_from_fs(self, mock_execute):
273 | example = """/dev/rshim0
274 | /dev/rshim1
275 | /dev/rshim100
276 | """
277 | mock_execute.return_value = example
278 | rshims = bf2_facts.get_rshims_from_fs()
279 | self.assertEqual(3, len(rshims))
280 |
281 | @patch('bf2_facts.execute')
282 | @patch('subprocess.call')
283 | def test_get_rshim_from_pci(self, call, mock_execute):
284 | rshim_outs = {}
285 | pci_1 = 'aa:00'
286 | pci_2 = 'bb:00'
287 | for k,v in [(pci_1, '/dev/rshim1'), (pci_2, '/dev/rshim2')]:
288 | name, lines = generate_rshim_output(mock_execute, k, v)
289 | rshim_outs[name] = lines
290 | rshim_out = bf2_facts.get_rshim_from_pci(rshim_outs, pci_1)
291 | pci = [l for l in rshim_out if 'DEV_NAME' in l][0]
292 | self.assertTrue(pci_1 in pci)
293 | # empty case
294 | self.assertIsNone(bf2_facts.get_rshim_from_pci([], pci_1))
295 |
296 | @patch('bf2_facts.execute')
297 | @patch('subprocess.call')
298 | def test_get_mac_from_rshim_output(self, call, mock_execute):
299 | # first get some rshim_out data populated
300 | rshim_outs = {}
301 | pci = 'aa:00'
302 | mac = '01:01:01:01:01:01'
303 | name, lines = generate_rshim_output(mock_execute, pci, '/dev/rshim0', mac=mac)
304 | rshim_outs[name] = lines
305 | rshim_out = bf2_facts.get_rshim_from_pci(rshim_outs, pci)
306 |
307 | out_mac = bf2_facts.get_mac_from_rshim_output(rshim_out)
308 | self.assertEqual(mac, out_mac)
309 |
310 | @patch('bf2_facts.execute')
311 | @patch('subprocess.call')
312 | def test_get_rshim_slot_from_rshim_output(self, call, mock_execute):
313 | # first get some rshim_out data populated
314 | rshim_outs = {}
315 | pci = 'aa:00'
316 | rshim_slot = '/dev/rshim100'
317 | name, lines = generate_rshim_output(mock_execute, pci, rshim_slot)
318 | rshim_outs[name] = lines
319 | rshim_out = bf2_facts.get_rshim_from_pci(rshim_outs, pci)
320 |
321 | out_rshim_slot = bf2_facts.get_rshim_slot_from_rshim_output(rshim_out)
322 | self.assertEqual(rshim_slot, out_rshim_slot)
323 |
324 | @patch('bf2_facts.execute')
325 | def test_get_restriction_level(self, mock_execute):
326 | example = """Current device configurations:
327 | ------------------------------
328 | level : PRIVILEGED
329 |
330 | Port functions status:
331 | -----------------------
332 | disable_rshim : FALSE
333 | disable_tracer : FALSE
334 | disable_port_owner : FALSE
335 | disable_counter_rd : FALSE
336 |
337 | """
338 | mock_execute.return_value = example
339 | level = bf2_facts.get_restriction_level('/dev/mst/mt41686_pciconf0')
340 | self.assertEqual(level, 'privileged')
341 |
342 | @patch('bf2_facts.execute')
343 | def test_get_versions(self, mock_execute):
344 | example = """Querying Mellanox devices firmware ...
345 |
346 | Device #1:
347 | ----------
348 |
349 | Device Type: BlueField2
350 | Part Number: MBF2M516A-EEEO_Ax
351 | Description: BlueField-2 E-Series SmartNIC 100GbE/EDR VPI Dual-Port QSFP56; PCIe Gen4 x16; Crypto Enabled; 16GB on-board DDR; 1GbE OOB management; FHHL
352 | PSID: MT_0000000559
353 | PCI Device Name: /dev/mst/mt41686_pciconf0
354 | Base MAC: 043f72a45a9c
355 | Versions: Current Available
356 | FW 24.29.2008 N/A
357 | PXE 3.6.0205 N/A
358 | UEFI 14.22.0019 N/A
359 | UNKNOWN_ROM 22.1.0011 N/A
360 | UEFI Virtio x 1.2.3.4
361 |
362 | Status: No matching image found
363 |
364 | """
365 | mock_execute.return_value = example
366 | versions = bf2_facts.get_versions('/dev/mst/mt41686_pciconf0')
367 | self.assertEqual(versions['FW'], '24.29.2008')
368 | self.assertEqual(versions['PXE'], '3.6.0205')
369 | self.assertEqual(versions['UEFI'], '14.22.0019')
370 | self.assertEqual(versions['UNKNOWN_ROM'], '22.1.0011')
371 | self.assertEqual(versions['UEFI Virtio x'], '1.2.3.4')
372 |
373 | if __name__ == '__main__':
374 | unittest.main()
375 |
--------------------------------------------------------------------------------