├── .github ├── dependabot.yml └── workflows │ ├── codeql.yml │ ├── dependency-review.yml │ ├── scorecard.yml │ └── static-scan.yml ├── .gitignore ├── LICENSE ├── README.md ├── SECURITY.md ├── ansible ├── README.md ├── playbooks │ ├── deploy-lowlatency.yml │ ├── deploy-powersaving.yml │ ├── deploy-powerupdown.yml │ ├── deploy-waitpkg.yml │ └── inventory │ │ └── hosts └── roles │ └── commspower-platform-provisioning │ ├── defaults │ └── main.yml │ ├── handlers │ └── main.yml │ ├── meta │ └── main.yml │ └── tasks │ ├── check_cstate_driver.yml │ ├── check_msr_driver.yml │ ├── check_provisioning_dir.yml │ ├── check_pstate_driver.yml │ ├── check_waitpkg_platform_support.yml │ ├── clone_commspower_repo.yml │ ├── configure_waitpkg.yml │ ├── disable_numa_balancing.yml │ ├── install_git.yml │ ├── install_msr_tools.yml │ ├── install_python.yml │ ├── lowlatency.yml │ ├── lowpower.yml │ ├── main.yml │ ├── powersaving.yml │ ├── set_uncore_frequency.yml │ ├── update_grub_reboot.yml │ └── very-lowpower.yml ├── intel_sst_os_interface └── mailbox.md ├── ipm └── patches │ ├── dpdk │ ├── 20.11 │ │ ├── 0001-eal-add-lcore-busyness-telemetry.patch │ │ ├── 0002-eal-add-cpuset-lcore-telemetry-entries.patch │ │ └── 0003-add-capacity-endpoint-to-telemetry-thread.patch │ ├── 21.11 │ │ ├── 0001-eal-add-lcore-busyness-telemetry.patch │ │ ├── 0002-eal-add-cpuset-lcore-telemetry-entries.patch │ │ └── 0003-add-capacity-endpoint-to-telemetry-thread.patch │ ├── 22.11 │ │ ├── 0001-eal-add-lcore-busyness-telemetry.patch │ │ ├── 0002-eal-add-cpuset-lcore-telemetry-entries.patch │ │ └── 0003-add-capacity-endpoint-to-telemetry-thread.patch │ ├── 23.11 │ │ ├── 0001-eal-add-lcore-busyness-telemetry.patch │ │ ├── 0002-eal-add-cpuset-lcore-telemetry-entries.patch │ │ └── 0003-add-capacity-endpoint-to-telemetry-thread.patch │ └── README.md │ └── vpp │ ├── 20.09 │ ├── 0001-Subject-PATCH-1-3-vlib-CPU-load-measurement-and-CLI.patch │ ├── 0002-Subject-PATCH-2-3-stats-Added-CPU-load-and-queue-bur.patch │ ├── 0003-Subject-PATCH-3-3-stats-encode-cpu-id-in-utilization.patch │ ├── 0004-Subject-PATCH-1-1-stats-Added-capacity-flags.patch │ └── README │ ├── 21.01 │ ├── 0001-Subject-PATCH-1-3-vlib-CPU-load-measurement-and-CLI.patch │ ├── 0002-Subject-PATCH-2-3-stats-Added-CPU-load-and-queue-bur.patch │ ├── 0003-Subject-PATCH-3-3-stats-encode-cpu-id-in-utilization.patch │ └── 0004-Subject-PATCH-1-1-stats-Added-capacity-flags.patch │ ├── 22.02 │ ├── 0001-vlib-CPU-load-measurement-and-CLI.patch │ ├── 0002-stats-Added-CPU-load-and-queue-burst-flag-in-stats.patch │ ├── 0003-stats-encode-cpu-id-in-utilization-metric.patch │ └── 0004-stats-Added-capacity-flag-in-stats.patch │ ├── 23.02 │ ├── 0001-vlib-CPU-load-measurement-and-CLI.patch │ ├── 0002-stats-Added-CPU-load-and-queue-burst-flag-in-stats.patch │ ├── 0003-stats-encode-cpu-id-in-utilization-metric.patch │ └── 0004-stats-Added-capacity-flag-in-stats.patch │ └── 24.02 │ ├── 0001-vlib-CPU-load-measurement-and-CLI.patch │ ├── 0002-stats-Added-CPU-load-and-queue-burst-flag-in-stats.patch │ ├── 0003-stats-encode-cpu-id-in-utilization-metric.patch │ └── 0004-stats-Added-capacity-flag-in-stats.patch ├── msrtool └── rw_msr_tool.py ├── power.md ├── power.py ├── pwr.md ├── pwr ├── pwr.egg-info │ ├── PKG-INFO │ ├── SOURCES.txt │ ├── dependency_links.txt │ ├── not-zip-safe │ └── top_level.txt ├── pwr │ ├── __init__.py │ ├── internal │ │ ├── __init__.py │ │ └── cpuinfo.py │ └── pwr.py └── setup.py ├── sst_bf.md ├── sst_bf.py └── telemetry ├── README.md └── pkgpower.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: / 5 | schedule: 6 | interval: daily 7 | 8 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: [ "main" ] 7 | pull_request: 8 | branches: [ "main" ] 9 | schedule: 10 | - cron: "37 4 * * 0" 11 | 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | analyze: 17 | name: Analyze 18 | runs-on: ubuntu-latest 19 | permissions: 20 | actions: read 21 | contents: read 22 | security-events: write 23 | 24 | strategy: 25 | fail-fast: false 26 | matrix: 27 | language: [ Python ] 28 | 29 | steps: 30 | - name: Harden Runner 31 | uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0 32 | with: 33 | egress-policy: audit 34 | 35 | - name: Set up Python 36 | uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 37 | with: 38 | python-version: 3.12.4 39 | 40 | - name: Checkout 41 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v3.5.2 42 | 43 | - name: Initialize CodeQL 44 | uses: github/codeql-action/init@ff0a06e83cb2de871e5a09832bc6a81e7276941f # v2.3.3 45 | with: 46 | languages: ${{ matrix.language }} 47 | queries: +security-and-quality 48 | 49 | - name: Autobuild 50 | uses: github/codeql-action/autobuild@ff0a06e83cb2de871e5a09832bc6a81e7276941f # v2.3.3 51 | 52 | - name: Perform CodeQL Analysis 53 | uses: github/codeql-action/analyze@ff0a06e83cb2de871e5a09832bc6a81e7276941f # v2.3.3 54 | with: 55 | category: "/language:${{ matrix.language }}" 56 | 57 | - name: CodeQL and Dependabot Report Action 58 | if: ${{ github.event_name == 'workflow_dispatch' }} 59 | uses: rsdmike/github-security-report-action@a149b24539044c92786ec39af8ba38c93496495d # v3.0.4 60 | with: 61 | template: report 62 | token: ${{ secrets.SECURITY_TOKEN }} 63 | 64 | - name: GitHub Upload Release Artifacts 65 | if: ${{ github.event_name == 'workflow_dispatch' }} 66 | uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 67 | with: 68 | name: report 69 | path: | 70 | ./report.pdf 71 | -------------------------------------------------------------------------------- /.github/workflows/dependency-review.yml: -------------------------------------------------------------------------------- 1 | # Dependency Review Action 2 | # 3 | # This Action will scan dependency manifest files that change as part of a Pull Request, 4 | # surfacing known-vulnerable versions of the packages declared or updated in the PR. 5 | # Once installed, if the workflow run is marked as required, 6 | # PRs introducing known-vulnerable packages will be blocked from merging. 7 | # 8 | # Source repository: https://github.com/actions/dependency-review-action 9 | name: 'Dependency Review' 10 | on: [pull_request] 11 | 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | dependency-review: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - name: Harden Runner 20 | uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0 21 | with: 22 | egress-policy: audit 23 | 24 | - name: 'Checkout Repository' 25 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v3.5.2 26 | - name: 'Dependency Review' 27 | uses: actions/dependency-review-action@da24556b548a50705dd671f47852072ea4c105d9 # v4.7.1 28 | -------------------------------------------------------------------------------- /.github/workflows/scorecard.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. They are provided 2 | # by a third-party and are governed by separate terms of service, privacy 3 | # policy, and support documentation. 4 | 5 | name: Scorecard supply-chain security 6 | on: 7 | # For Branch-Protection check. Only the default branch is supported. See 8 | # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection 9 | branch_protection_rule: 10 | # To guarantee Maintained check is occasionally updated. See 11 | # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained 12 | # schedule: 13 | # - cron: '32 15 * * 6' 14 | push: 15 | branches: [ "main" ] 16 | 17 | # Declare default permissions as read only. 18 | permissions: read-all 19 | 20 | jobs: 21 | analysis: 22 | name: Scorecard analysis 23 | runs-on: ubuntu-latest 24 | permissions: 25 | # Needed to upload the results to code-scanning dashboard. 26 | security-events: write 27 | # Needed to publish results and get a badge (see publish_results below). 28 | id-token: write 29 | # Uncomment the permissions below if installing in a private repository. 30 | # contents: read 31 | # actions: read 32 | 33 | steps: 34 | - name: Harden Runner 35 | uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0 36 | with: 37 | egress-policy: audit 38 | 39 | - name: "Checkout code" 40 | uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v3.1.0 41 | with: 42 | persist-credentials: false 43 | 44 | - name: "Run analysis" 45 | uses: ossf/scorecard-action@f49aabe0b5af0936a0987cfb85d86b75731b0186 # v2.4.1 46 | with: 47 | results_file: results.sarif 48 | results_format: sarif 49 | # (Optional) "write" PAT token. Uncomment the `repo_token` line below if: 50 | # - you want to enable the Branch-Protection check on a *public* repository, or 51 | # - you are installing Scorecard on a *private* repository 52 | # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action#authentication-with-pat. 53 | # repo_token: ${{ secrets.SCORECARD_TOKEN }} 54 | 55 | # Public repositories: 56 | # - Publish results to OpenSSF REST API for easy access by consumers 57 | # - Allows the repository to include the Scorecard badge. 58 | # - See https://github.com/ossf/scorecard-action#publishing-results. 59 | # For private repositories: 60 | # - `publish_results` will always be set to `false`, regardless 61 | # of the value entered here. 62 | publish_results: true 63 | 64 | # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF 65 | # format to the repository Actions tab. 66 | - name: "Upload artifact" 67 | uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 68 | with: 69 | name: SARIF file 70 | path: results.sarif 71 | retention-days: 5 72 | 73 | # Upload the results to GitHub's code scanning dashboard. 74 | - name: "Upload to code-scanning" 75 | uses: github/codeql-action/upload-sarif@ff0a06e83cb2de871e5a09832bc6a81e7276941f # v3.28.18 76 | with: 77 | sarif_file: results.sarif 78 | -------------------------------------------------------------------------------- /.github/workflows/static-scan.yml: -------------------------------------------------------------------------------- 1 | name: static-analysis 2 | on: [push, pull_request] 3 | 4 | permissions: 5 | contents: read 6 | 7 | jobs: 8 | shellcheck: 9 | name: Shellcheck 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Harden Runner 13 | uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0 14 | with: 15 | egress-policy: audit 16 | 17 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v3.5.3 18 | - name: Run ShellCheck 19 | uses: ludeeus/action-shellcheck@00cae500b08a931fb5698e11e79bfbd38e612a38 # main 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .* 2 | *.pyc 3 | build 4 | dist 5 | !.github 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Valid-License-Identifier: BSD-3-Clause 2 | SPDX-URL: https://spdx.org/licenses/BSD-3-Clause.html 3 | Usage-Guide: 4 | To use the BSD 3-clause "New" or "Revised" License put the following SPDX 5 | tag/value pair into a comment according to the placement guidelines in 6 | the licensing rules documentation: 7 | SPDX-License-Identifier: BSD-3-Clause 8 | License-Text: 9 | 10 | Copyright (c) 2017-2024 Intel Corporation. All rights reserved. 11 | 12 | Redistribution and use in source and binary forms, with or without 13 | modification, are permitted provided that the following conditions are met: 14 | 15 | 1. Redistributions of source code must retain the above copyright notice, 16 | this list of conditions and the following disclaimer. 17 | 18 | 2. Redistributions in binary form must reproduce the above copyright 19 | notice, this list of conditions and the following disclaimer in the 20 | documentation and/or other materials provided with the distribution. 21 | 22 | 3. Neither the name of the copyright holder nor the names of its 23 | contributors may be used to endorse or promote products derived from this 24 | software without specific prior written permission. 25 | 26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 27 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 30 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 | POSSIBILITY OF SUCH DAMAGE. 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) 2 | [![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/intel/CommsPowerManagement/badge)](https://securityscorecards.dev/viewer/?uri=github.com/intel/CommsPowerManagement) 3 | ![CodeQL](https://github.com/intel/CommsPowerManagement/actions/workflows/codeql.yml/badge.svg?branch=main) 4 | 5 | 6 | # Introduction 7 | 8 | Recent generations of the Intel® Xeon® family processors allow configurations 9 | where: 10 | 11 | 1. Turbo Boost can be enabled on a per-core basis. 12 | 2. Some cores can be given a higher base frequency than others 13 | 14 | # Overview 15 | 16 | The scripts provided are as follows: 17 | 18 | [power.py](power.md) allows the user 19 | to adjust the frequencies and Turbo-Boost availability on a core-by-core basis. 20 | This script allows the adjustment of P-states, C-states and Turbo-Boost. 21 | 22 | [sst_bf.py](sst_bf.md) allows the user to confure the system for 23 | Intel® Speed Select Technology - Base Frequency (Intel® SST-BF). 24 | This allows some cores to run at a higher base frequency than 25 | others. 26 | 27 | [pwr.py](pwr.md) a python library which can be imported into an application, 28 | to measure/modify core frequencies of a CPU to utilize Intel® Speed Select Technology. 29 | 30 | Please click on the links to see more information on the scripts. 31 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation. 3 | 4 | ## Reporting a Vulnerability 5 | Please report any security vulnerabilities in this project utilizing the guidelines [here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html). 6 | -------------------------------------------------------------------------------- /ansible/README.md: -------------------------------------------------------------------------------- 1 | #README for comms_platform_provisioning 2 | October 2020 3 | 4 | ## CONTENTS 5 | 6 | - Introduction 7 | - Requirements 8 | - Role variables 9 | - Example playbook 10 | - Usage 11 | 12 | ## INTRODUCTION 13 | An Ansible collection with single role comms_power_provisioning, that 14 | configures the system for `lowlatency`, `powersaving`, `lowpower` and `very-lowpower`. 15 | 16 | ## PREREQUISITES 17 | - System with any of the below listed distros, with python3 and Ansible >= 2.9.4 installed. 18 | - RHEL/CentOS/Debian/SUSE. 19 | 20 | - Required global proxy settings setup according to the distribution. 21 | Inappropriate proxy settings might block the script from cloning repos 22 | or installing required tools packages. 23 | 24 | ## ROLE VARIABLES 25 | All variables are stored in main.yml file under role's "default" directory. 26 | Based on user needs the parameters has to be adjusted. 27 | 28 | **Note**: Example `lowlatency` boot parameters are given in main.yml of "default" 29 | directory. These parameters have to be adjusted based on user system need. 30 | 31 | ## INVENTORY FILE 32 | Ansible uses inventory file to get the list of servers which should be provissioned. 33 | User can chose any one of the below listed files as the inventory file and 34 | add the server information to the inventory file as per the ansible guidelines. 35 | 36 | ``` 37 | /etc/ansible/hosts 38 | /etc/ansible/hosts.yaml 39 | ansible/playbooks/inventory/hosts 40 | ``` 41 | 42 | ## EXAMPLE PLAYBOOK 43 | Playbook to deploy `lowlatency`: 44 | ``` 45 | - name: Deploy commspower-platform-provisioning 46 | hosts: webservers 47 | roles: 48 | - role: ../roles/commspower-platform-provisioning 49 | state: lowlatency 50 | ``` 51 | 52 | Playbook to deploy `powersaving`: 53 | ``` 54 | - name: Deploy commspower-platform-provisioning 55 | hosts: webservers 56 | roles: 57 | - role: ../roles/commspower-platform-provisioning 58 | state: powersaving 59 | ``` 60 | 61 | Playbook to deploy either `lowpower` or `very-lowpower` provisioning on selected cores. The "state" will be set to either `lowpower` or `very-lowpower` during the run based on the action variable. More about this playbook can be found in below section. 62 | ``` 63 | - name: Deploy commspower-platform-provisioning 64 | hosts: webservers 65 | roles: 66 | - role: ../roles/commspower-platform-provisioning 67 | state: lowpower or very-lowpower 68 | ``` 69 | 70 | ## Usage 71 | . Edit the `main.yml` file `ansible/roles/commspower-platform-provisioning/defaults` to 72 | change the variable values as per the user need. 73 | 74 | . Use either of the below inventory files to define target servers information. 75 | ansible/playbooks/inventory/hosts 76 | 77 | . Run `lowlatency` provisioning as below. The `lowlatency` profile is to configure 78 | the system for measuring latency of the workloads with minimum jitter. This profile 79 | configures the system with low jitter boot parameters 80 | (defined inside `ansible/roles/commspower-platform-provisioning/defaults`) 81 | and reboots it. It also configures the power management features like uncore frequency 82 | and C-states. It fixes the uncore frequency and disables the C-states on the system to 83 | avoid the jitter caused by uncore frequency changes and C-state exit latencies. 84 | 85 | ``` 86 | ansible-playbook -i 87 | Example: ansible-playbook -i ansible/playbooks/inventory/hosts ansible/playbooks/deploy-lowlatency.yml 88 | ``` 89 | . Run `powersaving` provisioning as below 90 | ``` 91 | ansible-playbook -i 92 | Example: ansible-playbook -i ansible/playbooks/inventory/hosts ansible/playbooks/deploy-powersaving.yml 93 | ``` 94 | . Run `lowpower` and `very-lowpower` provisioning as below. 95 | The playbook `deploy-powerupdown.yml` will take care of setting the 96 | system to below profiles. 97 | 98 | ```lowpower``` 99 | 100 | This profile turns the C6 C-state `on` or `off` for the list of cores the 101 | user wants. User has to specify the list of cores and C6 C-state as 102 | `on` or `off`. 103 | 104 | ```very-lowpower``` 105 | 106 | This profile turns C6 C-state `on` or `off` for the list of cores the user 107 | wants. It will also lower the max frequency of the cores by the given number 108 | of bins. User has to specify the list of cores on which C6 C-state should be 109 | `on` or `off`. Also, user has to specify the number of bins by which max frequency 110 | of the listed cores should be lowered. The user should specify the core list, 111 | C6 C-state, bins and action details as variables inside server group variables 112 | files under the system default path i.e. `/etc/ansible/` as below. 113 | 114 | ``` 115 | Example file: /etc/ansible/group_vars/webservers.yaml 116 | mkdir /etc/ansible/group_vars 117 | touch /etc/ansible/group_vars/ 118 | ``` 119 | The action sections with variables should be added like below to the file created. 120 | The number of actions can vary. 121 | 122 | **Note:** Do not add actions with an empty core list. 123 | 124 | ``` 125 | actions_to_apply: 126 | - action: lowpower 127 | c6state: 'on' 128 | core_list: 129 | - 10 130 | - 11 131 | - action: very-lowpower 132 | bins_to_lower: 2 133 | c6state: 'off' 134 | core_list: 135 | - 1 136 | - 3 137 | - action: very-lowpower 138 | bins_to_lower: 4 139 | c6state: 'off' 140 | core_list: 141 | - 8 142 | - 9 143 | ``` 144 | 145 | ```waitpkg``` 146 | 147 | This profile configures the WAITPKG instructions on a supported platform. 148 | It enables the C0.2 state and sets the max. wait time in TSC ticks to the value 149 | specified in `defaults/main.yml`. The configuration is done using SYSFS interface 150 | in `/sys/devices/system/cpu/umwait_control` or via MSR if SYSFS interface is not 151 | availabe. The `tsx=on` parameter should be added to the list of GRUB kernel 152 | parameters in `grub_cmd_line_args` variable in order to enable monitoring 153 | of multiple addresses. Parameters that are not required may be removed. 154 | The playbook will check whether the parameter is present in currently running 155 | kernel. If not, the playbook will reconfigure the GRUB entry and reboot into 156 | the reconfigured kernel. To apply the configuration run the following: 157 | ``` 158 | ansible-playbook -i ansible/playbooks/inventory/hosts ansible/playbooks/deploy-waitpkg.yml 159 | ``` 160 | 161 | ``` 162 | ansible-playbook -i 163 | Example: ansible-playbook -i ansible/playbooks/inventory/hosts ansible/playbooks/deploy-powerupdown.yml 164 | ``` 165 | -------------------------------------------------------------------------------- /ansible/playbooks/deploy-lowlatency.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | --- 4 | - name: Deploy commspower-platform-provisioning 5 | hosts: webservers 6 | roles: 7 | - role: ../roles/commspower-platform-provisioning 8 | state: lowlatency 9 | -------------------------------------------------------------------------------- /ansible/playbooks/deploy-powersaving.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | --- 4 | - name: Deploy commspower-platform-provisioning 5 | hosts: webservers 6 | roles: 7 | - role: ../roles/commspower-platform-provisioning 8 | state: powersaving 9 | -------------------------------------------------------------------------------- /ansible/playbooks/deploy-powerupdown.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2021-22 Intel Corporation 3 | --- 4 | - name: Deploy commspower-platform-provisioning 5 | hosts: all 6 | tasks: 7 | - name: loop over role 8 | include_role: 9 | name: ../roles/commspower-platform-provisioning 10 | vars: 11 | state: "{{action_tobe.action}}" 12 | with_items: "{{actions_to_apply}}" 13 | loop_control: 14 | loop_var: action_tobe 15 | -------------------------------------------------------------------------------- /ansible/playbooks/deploy-waitpkg.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | --- 4 | - name: Deploy commspower-platform-provisioning 5 | hosts: webservers 6 | roles: 7 | - role: ../roles/commspower-platform-provisioning 8 | state: waitpkg 9 | -------------------------------------------------------------------------------- /ansible/playbooks/inventory/hosts: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | [webservers] 5 | 6 | # These hostname must be resolvable from deployment host 7 | 8 | #webserver01 9 | #webserver02 10 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/defaults/main.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | # defaults file for lowlatency-platform-provisioning 6 | state: present 7 | 8 | # CommsPowerManagement repo details 9 | commspower_repo: https://github.com/intel/CommsPowerManagement.git 10 | commspower_repo_version: master 11 | 12 | # Directory where CommsPowerManagement repo should be cloned. 13 | commspower_dir: commspower_repo 14 | 15 | # MSR tools for SUSE 16 | msrtools_repo: https://github.com/intel/msr-tools.git 17 | msrtools_version: msr-tools-1.3 18 | msrtools_dir: msrtools_repo 19 | 20 | # list of latency measuring cores. 21 | cores: 22 | - 20 23 | - 21 24 | - 22 25 | 26 | # List of example boot params for low latency system, 27 | # out of which tsc, isolcpus, rcu_nocbs, nohz_full are 28 | # important parameters, rest of the parameters should be 29 | # adjusted as per the need, so understand impact of them 30 | # before using. 31 | # Any existing grub boot command will be replaced with this. 32 | 33 | grub_cmdline_add_args: 34 | - ro 35 | - rhgb 36 | - quiet 37 | - hugepages=4 38 | - hugepagesz=1G 39 | - intel_iommu=on 40 | - iommu=pt 41 | - mce=off 42 | - nosoftlockup 43 | - nmi_watchdog=0 44 | - hpet=disable 45 | - tsc=reliable 46 | - isolcpus=20-22 47 | - rcu_nocbs=20-22 48 | - nohz_full=20-22 49 | 50 | # grub file path 51 | grub_cmdline_default: /etc/default/grub 52 | 53 | # Paths to various boot config files 54 | efi_boot_config: /boot/efi/EFI 55 | grub_boot_config: /boot/grub 56 | grub2_boot_config: /boot/grub2 57 | grubmkcfg: /usr/sbin/grub*-mkconfig 58 | 59 | # pstate driver path 60 | pstate_sysfs_path: /sys/devices/system/cpu/cpufreq/policy0/scaling_driver 61 | 62 | # cstate driver path 63 | cstate_sysfs_path: /sys/devices/system/cpu/cpuidle/current_driver 64 | 65 | 66 | # From kernel >= 5.6, sysfs path can be used to set the uncore 67 | # frequency. 68 | # ls /sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/ 69 | # initial_max_freq_khz initial_min_freq_khz max_freq_khz min_freq_khz 70 | uncore_sysfs_path: /sys/devices/system/cpu/intel_uncore_frequency 71 | uncore_maxfreq_file: max_freq_khz 72 | uncore_minfreq_file: min_freq_khz 73 | 74 | # uncore p1 frequency (in Khz) to set using sysfs entry 75 | uncore_freq_clx_khz: 1800000 76 | uncore_freq_icx_khz: 1400000 77 | 78 | # The hex values below represents Uncore minimum and maximum Frequency 79 | # to be set using the MSR 0x620. 80 | # Higher byte represents minimum Uncore Frequency. 81 | # Lower byte represents maximum Uncore Frequency. 82 | # The decimal equivalent of byte 0x12 is 18, so 18*100Mhz=1800Mhz is set 83 | # as the Uncore minimum and maximum Frequency on the CLX system 84 | # Similarly The decimal equivalent of byte 0x0e is 14 , so 14*100Mhz=1400Mhz 85 | # is set as the Uncore minimum and maximum Frequency on the ICX system 86 | uncore_freq_clx: 0x1212 87 | uncore_freq_icx: 0x0e0e 88 | 89 | # Cascade Lake 90 | clx_type: Intel(R) Xeon(R) Gold 6230N CPU @ 2.30GHz 91 | 92 | # provisioning status files will be created under this path. 93 | prov_status_dir: /var/run/intel/platform-provisioning 94 | 95 | cpu_path: /sys/devices/system/cpu 96 | 97 | # WAITPKG sysfs control paths 98 | umwait_sys_path: /sys/devices/system/cpu/umwait_control 99 | umwait_control_max_time_syspath: max_time 100 | umwait_control_c02_enable_syspath: enable_c02 101 | 102 | # maximum number of TSC ticks for WAITPKG instructions (set to 0 to unlimited) 103 | waitpkg_max_time_ticks: 100000 -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/handlers/main.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | # handlers file for commspower-platform-provisioning 6 | 7 | - name: Print nohzcheck 8 | shell: echo "CONFIG_NO_HZ_FULL=y is set" 9 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/meta/main.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | galaxy_info: 6 | author: Reshma Pattan 7 | description: Configures the system for power saving and low latency workloads. 8 | company: Intel Corporation 9 | 10 | license: BSD-3-Clause 11 | 12 | min_ansible_version: 2.9 13 | 14 | platforms: 15 | - name: Suse 16 | versions: 17 | - 12 18 | - name: CentOS 19 | versions: 20 | - 7 21 | - 8 22 | - name: RHEL 23 | versions: 24 | - 8.2 25 | - 8.3 26 | - name: Debian 27 | 28 | galaxy_tags: [] 29 | 30 | dependencies: [] 31 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/check_cstate_driver.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | 6 | - name: Check cstate driver is enabled 7 | stat: 8 | path: grep 'intel_idle' "{{ cstate_sysfs_path }}" 9 | register: cstate_driver 10 | changed_when: false 11 | 12 | - name: Print cstate driver state 13 | debug: 14 | msg: "cstate driver should be enabled" 15 | when: cstate_driver.stat.exists 16 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/check_msr_driver.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | 6 | # Check for CONFIG_X86_MSR 7 | - name: Check for CONFIG_X86_MSR=m 8 | command: grep CONFIG_X86_MSR=m /boot/config-"{{ansible_kernel}}" 9 | register: msrcheck 10 | changed_when: false 11 | ignore_errors: yes 12 | 13 | # Load MSR module 14 | - name: Load MSR 15 | modprobe: 16 | name: msr 17 | state: present 18 | # check if msr is avialbale as module in config file, if so do modprobe 19 | when: msrcheck.rc == 0 20 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/check_provisioning_dir.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | - name: Check profile directory exists 6 | stat: 7 | path: "{{ prov_status_dir }}" 8 | register: profile_folder 9 | 10 | - name: Create profile dir if not exists 11 | file: 12 | path: "{{ prov_status_dir }}" 13 | state: directory 14 | mode: 0755 15 | group: root 16 | owner: root 17 | when: not profile_folder.stat.exists 18 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/check_pstate_driver.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | 6 | # Enable the pstate driver to use HWP registers. 7 | - name: Check if frequency scaling is available 8 | stat: 9 | path: grep -E 'intel_pstate|acpi_cpufreq' "{{ pstate_sysfs_path }}" 10 | register: pstate_driver 11 | changed_when: false 12 | 13 | - name: Print pstate driver state 14 | debug: 15 | msg: "pstate driver should be enabled" 16 | when: pstate_driver.stat.exists 17 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/check_waitpkg_platform_support.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2022-23 Intel Corporation 3 | 4 | --- 5 | # check WAITPKG availability 6 | - name: Check if WAITPKG is supported on the platform 7 | command: grep -q 'waitpkg' '/proc/cpuinfo' 8 | register: waitpkg_available 9 | changed_when: false 10 | failed_when: waitpkg_available.rc != 0 11 | check_mode: no 12 | 13 | - debug: 14 | msg: "{{ waitpkg_available }}" 15 | when: ansible_check_mode 16 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/clone_commspower_repo.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | # Create the directory under /tmp folder, ansible will name the 6 | # dir as ansible*. 7 | - name: Create commspower temporary dir 8 | tempfile: 9 | state: directory 10 | suffix: "{{ commspower_dir }}" 11 | register: commspower_path 12 | 13 | # Clone the commspower repo to /tmp/ansible*. 14 | - name: Clone CommsPowerManagement repo 15 | git: 16 | repo: "{{ commspower_repo }}" 17 | version: "{{ commspower_repo_version }}" 18 | dest: "{{ commspower_path.path }}" 19 | update: yes 20 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/configure_waitpkg.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2022-23 Intel Corporation 3 | 4 | --- 5 | 6 | - import_tasks: check_waitpkg_platform_support.yml 7 | 8 | - name: Check if TSX is enabled 9 | command: grep -q -i "tsx=on" /proc/cmdline 10 | register: tsx_active 11 | changed_when: tsx_active.rc != 0 12 | failed_when: false 13 | check_mode: no 14 | 15 | - name: Fail if TSX switch is not present in ansible GRUB config 16 | fail: 17 | msg: "Cannot reconfigure GRUB to activate TSX. Missing \"tsx=on\" in grub_cmdline_add_args in defaults/main.yml." 18 | check_mode: no 19 | changed_when: false 20 | when: tsx_active.rc != 0 and grub_cmdline_add_args is not contains('tsx=on') 21 | 22 | - name: Add tsx=on kernel parameter in GRUB if needed 23 | include_tasks: update_grub_reboot.yml 24 | when: tsx_active.rc != 0 25 | 26 | - name: Validate waitpkg_max_time_ticks value in defaults/main.yml 27 | fail: 28 | msg: waitpkg_max_time_ticks value must be divisble by 4 29 | when: waitpkg_max_time_ticks % 4 != 0 30 | 31 | - name: Check availability of WAITPKG sysfs interface 32 | stat: 33 | path: "{{ umwait_sys_path }}" 34 | register: umwait_control_paths 35 | ignore_errors: yes 36 | check_mode: no 37 | 38 | - debug: 39 | msg: "{{ umwait_control_paths }}" 40 | when: ansible_check_mode 41 | 42 | - name: Configure WAITPKG using SYSFS 43 | block: 44 | - name: Read the max_time value 45 | command: cat "{{ umwait_sys_path }}/{{ umwait_control_max_time_syspath }}" 46 | changed_when: false 47 | register: umwait_max_time 48 | check_mode: no 49 | 50 | - debug: 51 | msg: 52 | - "Current max_time: {{ umwait_max_time.stdout }}" 53 | - "Config max_time: {{ waitpkg_max_time_ticks }}" 54 | when: ansible_check_mode 55 | 56 | - name: Write max_time value if different from configuration (SYSFS) 57 | shell: > 58 | echo "{{ waitpkg_max_time_ticks }}" 59 | | tee "{{ umwait_sys_path }}/{{ umwait_control_max_time_syspath }}" 60 | when: waitpkg_max_time_ticks != umwait_max_time.stdout | int 61 | 62 | - name: Enable C0.2 sleep state 63 | shell: > 64 | echo 1 65 | | tee "{{ umwait_sys_path }}/{{ umwait_control_c02_enable_syspath }}" 66 | when: umwait_control_paths.stat.exists 67 | 68 | - name: Configure WAITPKG using MSR tools 69 | block: 70 | - name: Read the max_time value (MSR) 71 | command: rdmsr -f 31:2 -u 0xe1 72 | register: umwait_max_time 73 | changed_when: false 74 | check_mode: no 75 | 76 | - debug: 77 | msg: 78 | - "Current max_time: {{ umwait_max_time.stdout | int * 4 }}" 79 | - "Config max_time: {{ waitpkg_max_time_ticks }}" 80 | when: ansible_check_mode and umwait_max_time.rc == 0 81 | 82 | # writing just the value waitpkg_max_time_ticks forces enabling of 83 | # C0.2 state (waitpkg_max_time_ticks must be divisible by 4 84 | # which also unsets the "Disable C0.2" bit) 85 | - name: Write max_time value if different from configuration (MSR) 86 | shell: > 87 | wrmsr -a 0xe1 $(printf '0x%x' 88 | {{ waitpkg_max_time_ticks 89 | if waitpkg_max_time_ticks != (umwait_max_time.stdout | int * 4) 90 | else umwait_max_time.stdout | int * 4 }}) 91 | 92 | when: not umwait_control_paths.stat.exists 93 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/disable_numa_balancing.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2021-22 Intel Corporation 3 | 4 | --- 5 | 6 | # Check for CONFIG_NUMA_BALANCING 7 | - name: Check for CONFIG_NUMA_BALANCING=y 8 | command: grep CONFIG_NUMA_BALANCING=y /boot/config-"{{ansible_kernel}}" 9 | register: numa_blancing_check 10 | changed_when: false 11 | ignore_errors: yes 12 | 13 | # Disable numa balancing 14 | - name: Disable Numa Balancing 15 | shell: 16 | echo "0" > /proc/sys/kernel/numa_balancing 17 | # if numa balancing enabled 18 | when: numa_blancing_check.rc == 0 19 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/install_git.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | - name: Install GIT - RedHat 6 | yum: 7 | name: git 8 | state: present 9 | when: ansible_os_family == 'RedHat' 10 | 11 | - name: Install GIT - CentOS 12 | yum: 13 | name: git 14 | state: present 15 | when: ansible_os_family == 'CentOS' 16 | 17 | - name: Install GIT - SUSE 18 | zypper: 'name=git-core state=present' 19 | when: ansible_os_family == 'Suse' 20 | 21 | - name: Install GIT - Debian 22 | apt: 23 | name: git 24 | state: present 25 | when: ansible_os_family == 'Debian' 26 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/install_msr_tools.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | # Create the directory under /tmp folder, ansible will name the 6 | # dir as ansible*. 7 | - name: Create msrtools temporary dir 8 | tempfile: 9 | state: directory 10 | suffix: "{{ msrtools_dir }}" 11 | register: msrtools_path 12 | 13 | # Clone the msrtools repo to /tmp/ansible*. 14 | - name: Clone msrtools repo 15 | git: 16 | repo: "{{ msrtools_repo }}" 17 | version: "{{ msrtools_version }}" 18 | dest: "{{ msrtools_path.path }}" 19 | update: yes 20 | 21 | # Build msr-tools 22 | - name: Build and install msrtools repo 23 | make: 24 | chdir: "{{ msrtools_path.path }}" 25 | target: install 26 | 27 | # Remove msr-tools repo 28 | - name: remove msr tools tmp dir 29 | file: 30 | path: "{{ msrtools_path.path }}" 31 | state: absent 32 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/install_python.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | - name: Install python - CentOS 6 | yum: 7 | name: python3 8 | state: present 9 | when: ansible_os_family == 'CentOS' 10 | 11 | - name: Install python - RedHat 12 | yum: 13 | name: python3 14 | state: present 15 | when: ansible_os_family == 'RedHat' 16 | 17 | - name: Install python - SuSE 18 | zypper: 19 | name: python3 20 | state: present 21 | when: ansible_os_family == 'Suse' 22 | 23 | - name: Install python - Debian 24 | apt: 25 | name: python3 26 | state: present 27 | when: ansible_os_family == 'Debian' 28 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/lowlatency.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | # Install prerequisites. 6 | - import_tasks: install_git.yml 7 | - import_tasks: install_python.yml 8 | - import_tasks: install_msr_tools.yml 9 | 10 | # Make sure the kernel has below flag enabled. 11 | # If not system not suitable for latency testing. 12 | - name: Check for CONFIG_NO_HZ_FULL=y 13 | command: grep CONFIG_NO_HZ_FULL=y /boot/config-"{{ansible_kernel}}" 14 | register: nohzcheck 15 | changed_when: false 16 | 17 | # Update grub with new boot params and reboot 18 | - include_tasks: update_grub_reboot.yml 19 | 20 | # Check msr driver availability 21 | - include_tasks: check_msr_driver.yml 22 | 23 | # Set uncore frequency 24 | - include_tasks: set_uncore_frequency.yml 25 | 26 | # Check Pstate driver enabled 27 | - include_tasks: check_pstate_driver.yml 28 | 29 | # Find the C states sysfs path files for list of latency cores. 30 | - name: Find C states sysfs, state1,state2... stateN, excluding state0. 31 | find: 32 | paths: /sys/devices/system/cpu/cpu{{ item }}/cpuidle 33 | recurse: no 34 | file_type: directory 35 | excludes: 'state0' 36 | register: cstate_paths 37 | loop: "{{ cores|flatten(levels=1) }}" 38 | changed_when: false 39 | ignore_errors: yes 40 | 41 | # enable this task to print more debug info 42 | # - name: Print idle driver state 43 | # debug: 44 | # msg: "{{ item.files[0].path }} {{ item.files[1].path }} 45 | # {{ item.files[2].path }}" 46 | # msg: "{{ item.files }}" 47 | # msg: "{{ item.matched }}" 48 | # loop: "{{ cstate_paths.results|flatten(levels=2) }}" 49 | 50 | # Disable the c states on latency cores. 51 | - name: disable the cstate 52 | vars: 53 | total: "{{item.matched}}" 54 | shell: | 55 | {% set var = total|int %} 56 | {% for c in range(var) %} 57 | echo "1" > {{ item.files[c].path }}/disable 58 | {% endfor %} 59 | loop: "{{ cstate_paths.results|flatten(levels=2) }}" 60 | 61 | # Disable numa balancing 62 | - include_tasks: disable_numa_balancing.yml 63 | 64 | # Clone the commspowermgmt repo. 65 | - include_tasks: clone_commspower_repo.yml 66 | 67 | # Create provisioning status dir 68 | - include_tasks: check_provisioning_dir.yml 69 | 70 | # Create the empty file to indicate latency provisioning is done. 71 | - name: Create lowlatency provisioning done file 72 | file: 73 | path: "{{ prov_status_dir }}/lowlatency" 74 | state: touch 75 | owner: root 76 | group: root 77 | mode: 0644 78 | 79 | # Additional info for user 80 | - name: Warn with recommened settings. 81 | debug: 82 | msg: "1.Unbind unused NICs from kernel driver to eliminate unnecessary 83 | interrupts, 84 | 2.Set process shceduling policy/priority by 'chrt -f -p 99 '" 85 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/lowpower.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | # Check Cstate driver enabled 6 | - include_tasks: check_cstate_driver.yml 7 | when: state == "present" 8 | 9 | # Find the C states above C0 from sysfs path files. 10 | - name: Find C states sysfs, state1,state2... stateN, excluding state0. 11 | find: 12 | paths: /sys/devices/system/cpu/cpu{{ core_id }}/cpuidle 13 | recurse: no 14 | file_type: directory 15 | excludes: 'state0' 16 | register: cstate_paths 17 | with_items: "{{ action_tobe.core_list }}" 18 | loop_control: 19 | loop_var: core_id 20 | changed_when: false 21 | ignore_errors: yes 22 | 23 | - name: print c6state 24 | debug: 25 | msg: "{{ action_tobe.c6state}}" 26 | 27 | # enable only C6 state 28 | - name: enable the c6 cstate 29 | shell: | 30 | echo "0" > {{ cspaths1.invocation.module_args.paths[0] }}/\ 31 | state{{cspaths1.matched}}/disable 32 | with_items: "{{ cstate_paths.results|flatten(levels=2) }}" 33 | loop_control: 34 | loop_var: cspaths1 35 | when: (action_tobe.c6state is defined) and (action_tobe.c6state == 'on') 36 | 37 | # disable only C6 state 38 | - name: disable the c6 cstate 39 | shell: | 40 | echo "1" > {{ cspaths1.invocation.module_args.paths[0] }}/\ 41 | state{{cspaths1.matched}}/disable 42 | with_items: "{{ cstate_paths.results|flatten(levels=2) }}" 43 | loop_control: 44 | loop_var: cspaths1 45 | when: (action_tobe.c6state is defined) and (action_tobe.c6state == 'off') 46 | 47 | # Create provisioning status dir 48 | - include_tasks: check_provisioning_dir.yml 49 | 50 | # Create the empty file to indicate powersaving provisioning is done. 51 | - name: Create lowpower provisioning done file 52 | file: 53 | path: "{{ prov_status_dir }}/lowpower" 54 | state: touch 55 | owner: root 56 | group: root 57 | mode: 0644 58 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/main.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | # Low latency provissioning 6 | - include_tasks: lowlatency.yml 7 | when: state == "lowlatency" 8 | 9 | # Power saving provissioning 10 | - include_tasks: powersaving.yml 11 | when: state == "powersaving" 12 | 13 | # Power saving provissioning 14 | - include_tasks: lowpower.yml 15 | when: state == "lowpower" 16 | 17 | - include_tasks: very-lowpower.yml 18 | when: state == "very-lowpower" 19 | 20 | # WAITPKG provisioning 21 | - include_tasks: configure_waitpkg.yml 22 | when: state == "waitpkg" 23 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/powersaving.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | # Install prerequisites. 6 | - import_tasks: install_git.yml 7 | 8 | - import_tasks: install_msr_tools.yml 9 | when: (ansible_os_family == "Suse" and 10 | ansible_distribution_major_version == "12") 11 | 12 | # Check msr driver/module availability 13 | - include_tasks: check_msr_driver.yml 14 | when: state == "present" 15 | 16 | # Check HWP enabled. 17 | - name: Check HWP enabled 18 | shell: test "$(rdmsr -p 0 0x770)" -eq 1 19 | 20 | # Check Pstate driver enabled 21 | - include_tasks: check_pstate_driver.yml 22 | when: state == "present" 23 | 24 | # Check Cstate driver enabled 25 | - include_tasks: check_cstate_driver.yml 26 | when: state == "present" 27 | 28 | # Find the C states above C1 from sysfs path files. 29 | - name: Find C states sysfs, state1,state2... stateN, excluding state0. 30 | find: 31 | paths: /sys/devices/system/cpu/cpu{{ item }}/cpuidle 32 | recurse: no 33 | file_type: directory 34 | excludes: 'state0,state1' 35 | register: cstate_paths 36 | with_sequence: start=0 end={{ansible_processor_count*ansible_processor_cores* 37 | ansible_processor_threads_per_core-1}} 38 | changed_when: false 39 | ignore_errors: yes 40 | 41 | # Disable the C states above C1. 42 | - name: disable the cstate 43 | vars: 44 | total: "{{item.matched}}" 45 | shell: | 46 | {% set var = total|int %} 47 | {% for c in range(var) %} 48 | echo "1" > {{ item.files[c].path }}/disable 49 | {% endfor %} 50 | loop: "{{ cstate_paths.results|flatten(levels=2) }}" 51 | 52 | # Create provisioning status dir 53 | - include_tasks: check_provisioning_dir.yml 54 | 55 | # Create the empty file to indicate powersaving provisioning is done. 56 | - name: Create powersaving provisioning done file 57 | file: 58 | path: "{{ prov_status_dir }}/powersaving" 59 | state: touch 60 | owner: root 61 | group: root 62 | mode: 0644 63 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/set_uncore_frequency.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | 6 | # Check intel-uncore-frequency driver presence 7 | - name: Check intel-uncore-frequency driver presence 8 | shell: lsmod | grep -i intel-uncore-frequency 9 | register: uncore_freq_driver 10 | ignore_errors: yes 11 | 12 | # Print the current processor model name. 13 | - name: Print processor 14 | debug: 15 | msg: '{{ ansible_processor[2] }}' 16 | 17 | # Set the uncore frequency using msrs if intel-uncore-frequency driver 18 | # unavailable 19 | - name: Set uncore frequency using MSR 20 | vars: 21 | uncore_frequency: "{{ uncore_freq_clx if ansible_processor[2] == 22 | clx_type else uncore_freq_icx}}" 23 | command: wrmsr -a 0x620 "{{ uncore_frequency }}" 24 | when: uncore_freq_driver.rc == 1 25 | ignore_errors: yes 26 | 27 | # Find the sysfs entry for uncore frequency setting 28 | - name: Set uncore frequency using sysfs 29 | find: 30 | paths: "{{ uncore_sysfs_path }}" 31 | recurse: no 32 | file_type: directory 33 | register: uncore_paths 34 | changed_when: false 35 | when: uncore_freq_driver.rc == 0 36 | ignore_errors: yes 37 | 38 | # enable this task to print uncore sysfs paths finding. 39 | # - name: Print uncore sysfs file info 40 | # debug: 41 | # msg: "{{ item.path}}" 42 | # loop: "{{ uncore_paths.files|flatten(levels=2) }}" 43 | # when: uncore_freq_driver.rc == 0 44 | 45 | # Set the uncore frequency using sysfs path. 46 | - name: set uncore frequency 47 | vars: 48 | uncore_frequency: "{{ uncore_freq_clx_khz if ansible_processor[2] == 49 | clx_type else uncore_freq_icx_khz}}" 50 | shell: | 51 | echo "{{ uncore_frequency }}" > "{{item.path}}"/"{{uncore_maxfreq_file}}"; 52 | echo "{{ uncore_frequency }}" > "{{item.path}}"/"{{uncore_minfreq_file}}"; 53 | echo "{{ uncore_frequency }}" > "{{item.path}}"/"{{uncore_maxfreq_file}}"; 54 | loop: "{{ uncore_paths.files|flatten(levels=2) }}" 55 | when: uncore_freq_driver.rc == 0 56 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/update_grub_reboot.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | # Replace the default grub command line with custom boot parameters. 6 | - name: Update default grub config for RedHat or CentOS 7 | lineinfile: 8 | dest: "{{ grub_cmdline_default }}" 9 | line: GRUB_CMDLINE_LINUX="{{ grub_cmdline_add_args | join(' ') }}" 10 | regexp: '^GRUB_CMDLINE_LINUX="' 11 | when: > 12 | ansible_os_family == "RedHat" or 13 | ansible_os_family == "CentOS" 14 | 15 | # Replace the default grub command line with custom boot parameters. 16 | - name: Update default grub config for Suse 17 | lineinfile: 18 | dest: "{{ grub_cmdline_default }}" 19 | line: GRUB_CMDLINE_LINUX_DEFAULT="{{ grub_cmdline_add_args | join(' ') }}" 20 | regexp: '^GRUB_CMDLINE_LINUX_DEFAULT="' 21 | when: > 22 | ansible_os_family == "Suse" or 23 | ansible_os_family == "Debian" 24 | 25 | - name: check that EFI config path exists 26 | stat: path={{ efi_boot_config }}/{{ ansible_facts['distribution'] }}/grub.cfg 27 | register: path 28 | 29 | - name: Update EFI config file 30 | shell: "{{ grubmkcfg }} -o 31 | {{ efi_boot_config }}/{{ ansible_facts['distribution'] }}/grub.cfg" 32 | when: path.stat.exists 33 | 34 | - name: check that grub config path exists 35 | stat: path={{ grub_boot_config }}/grub.cfg 36 | register: path 37 | 38 | - name: Update grub config file 39 | shell: "{{ grubmkcfg }} -o 40 | {{ grub_boot_config }}/grub.cfg" 41 | when: path.stat.exists 42 | 43 | - name: check that grub2 config path exists 44 | stat: path={{ grub2_boot_config }}/grub.cfg 45 | register: path 46 | 47 | - name: Update grub2 config file 48 | shell: "{{ grubmkcfg }} -o 49 | {{ grub2_boot_config }}/grub.cfg" 50 | when: path.stat.exists 51 | 52 | - name: Reboot the board for new boot params 53 | reboot: 54 | reboot_timeout: 700 55 | when: > 56 | (ansible_os_family == "RedHat" or 57 | ansible_os_family == "CentOS" or 58 | ansible_os_family == "Suse" or 59 | ansible_os_family == "Debian") 60 | -------------------------------------------------------------------------------- /ansible/roles/commspower-platform-provisioning/tasks/very-lowpower.yml: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: BSD-3-Clause 2 | # Copyright(c) 2020-21 Intel Corporation 3 | 4 | --- 5 | - import_tasks: install_git.yml 6 | - import_tasks: install_msr_tools.yml 7 | 8 | # Check msr driver/module availability 9 | - include_tasks: check_msr_driver.yml 10 | 11 | # Check Cstate driver enabled 12 | - include_tasks: check_cstate_driver.yml 13 | 14 | # Find the C states above C0 from sysfs path files. 15 | - name: Find C states sysfs, state1,state2... stateN, excluding state0. 16 | find: 17 | paths: /sys/devices/system/cpu/cpu{{ core_id }}/cpuidle 18 | recurse: no 19 | file_type: directory 20 | excludes: 'state0' 21 | register: cstate_paths 22 | with_items: "{{ action_tobe.core_list }}" 23 | loop_control: 24 | loop_var: core_id 25 | changed_when: false 26 | ignore_errors: yes 27 | 28 | # enable only C6 state 29 | - name: enable the c6 cstate 30 | shell: | 31 | echo "0" > {{ cspaths1.invocation.module_args.paths[0] }}/\ 32 | state{{cspaths1.matched}}/disable 33 | loop: "{{ cstate_paths.results|flatten(levels=2) }}" 34 | loop_control: 35 | loop_var: cspaths1 36 | when: (action_tobe.c6state is defined) and (action_tobe.c6state == 'on') 37 | 38 | # disable only C6 state 39 | - name: disable the c6 cstate 40 | shell: | 41 | echo "1" > {{ cspaths1.invocation.module_args.paths[0] }}/\ 42 | state{{cspaths1.matched}}/disable 43 | loop: "{{ cstate_paths.results|flatten(levels=2) }}" 44 | loop_control: 45 | loop_var: cspaths1 46 | when: (action_tobe.c6state is defined) and (action_tobe.c6state == 'off') 47 | 48 | # Set new max frequency lowered by bins on set of cores. 49 | - name: get the max frequency of listed cores 50 | shell: 51 | cmd: cat /sys/devices/system/cpu/cpu{{core_id}}/cpufreq/scaling_max_freq 52 | register: core_n_freq_dict 53 | loop: "{{ action_tobe.core_list }}" 54 | loop_control: 55 | loop_var: core_id 56 | 57 | - name: set the new max frequency on listed cores 58 | vars: 59 | # Prepare freqMHz to be lowered from maximum frequency 60 | freq_to_lower: |- 61 | "{{ core_n_freq.stdout|int - action_tobe.bins_to_lower * 100000 }}" 62 | shell: >- 63 | echo "{{freq_to_lower}}" > 64 | /sys/devices/system/cpu/cpu{{core_n_freq.core_id}}/cpufreq/scaling_max_freq 65 | loop: "{{core_n_freq_dict.results}}" 66 | loop_control: 67 | loop_var: core_n_freq 68 | 69 | # Create provisioning status dir 70 | - include_tasks: check_provisioning_dir.yml 71 | 72 | # Create the empty file to indicate provisioning is done. 73 | - name: Create provisioning done file 74 | file: 75 | path: "{{ prov_status_dir }}/very-lowpower" 76 | state: touch 77 | owner: root 78 | group: root 79 | mode: 0644 80 | -------------------------------------------------------------------------------- /ipm/patches/dpdk/20.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch: -------------------------------------------------------------------------------- 1 | From 5c902504efd82545ee41bbeca7ef72682f5f8d65 Mon Sep 17 00:00:00 2001 2 | From: Anatoly Burakov 3 | Date: Fri, 15 Jul 2022 13:12:54 +0000 4 | Subject: [PATCH 2/3] eal: add cpuset lcore telemetry entries 5 | 6 | Expose per-lcore cpuset information to telemetry. 7 | 8 | Signed-off-by: Anatoly Burakov 9 | --- 10 | .../common/eal_common_lcore_telemetry.c | 48 +++++++++++++++++++ 11 | 1 file changed, 48 insertions(+) 12 | 13 | diff --git a/lib/librte_eal/common/eal_common_lcore_telemetry.c b/lib/librte_eal/common/eal_common_lcore_telemetry.c 14 | index 2e9033bf5a..f01ccd9a65 100644 15 | --- a/lib/librte_eal/common/eal_common_lcore_telemetry.c 16 | +++ b/lib/librte_eal/common/eal_common_lcore_telemetry.c 17 | @@ -19,6 +19,8 @@ int __rte_lcore_telemetry_enabled; 18 | 19 | #ifdef RTE_LCORE_BUSYNESS 20 | 21 | +#include "eal_private.h" 22 | + 23 | struct lcore_telemetry { 24 | int busyness; 25 | /**< Calculated busyness (gets set/returned by the API) */ 26 | @@ -261,6 +263,49 @@ lcore_handle_busyness(const char *cmd __rte_unused, 27 | return 0; 28 | } 29 | 30 | +static int 31 | +lcore_handle_cpuset(const char *cmd __rte_unused, 32 | + const char *params __rte_unused, 33 | + struct rte_tel_data *d) 34 | +{ 35 | + char corenum[64]; 36 | + int i; 37 | + 38 | + rte_tel_data_start_dict(d); 39 | + 40 | + /* Foreach lcore - can't use macro since it excludes ROLE_NON_EAL */ 41 | + for (i = 0; i < RTE_MAX_LCORE; i++) { 42 | + const struct lcore_config *cfg = &lcore_config[i]; 43 | + const rte_cpuset_t *cpuset = &cfg->cpuset; 44 | + struct rte_tel_data *ld; 45 | + unsigned int cpu; 46 | + 47 | + if (!lcore_enabled(i)) 48 | + continue; 49 | + 50 | + /* create an array of integers */ 51 | + ld = rte_tel_data_alloc(); 52 | + if (ld == NULL) 53 | + return -ENOMEM; 54 | + rte_tel_data_start_array(ld, RTE_TEL_INT_VAL); 55 | + 56 | + /* add cpu ID's from cpuset to the array */ 57 | + for (cpu = 0; cpu < CPU_SETSIZE; cpu++) { 58 | + if (!CPU_ISSET(cpu, cpuset)) 59 | + continue; 60 | + rte_tel_data_add_array_int(ld, cpu); 61 | + } 62 | + 63 | + /* add array to the per-lcore container */ 64 | + snprintf(corenum, sizeof(corenum), "%d", i); 65 | + 66 | + /* tell telemetry library to free this array automatically */ 67 | + rte_tel_data_add_dict_container(d, corenum, ld, 0); 68 | + } 69 | + 70 | + return 0; 71 | +} 72 | + 73 | void 74 | eal_lcore_telemetry_free(void) 75 | { 76 | @@ -287,6 +332,9 @@ RTE_INIT(lcore_init_telemetry) 77 | rte_telemetry_register_cmd("/eal/lcore/busyness_disable", lcore_busyness_disable, 78 | "disable lcore busyness measurement"); 79 | 80 | + rte_telemetry_register_cmd("/eal/lcore/cpuset", lcore_handle_cpuset, 81 | + "list physical core affinity for each lcore"); 82 | + 83 | __rte_lcore_telemetry_enabled = true; 84 | } 85 | 86 | -- 87 | 2.25.1 88 | 89 | -------------------------------------------------------------------------------- /ipm/patches/dpdk/20.11/0003-add-capacity-endpoint-to-telemetry-thread.patch: -------------------------------------------------------------------------------- 1 | From 81175d27730b2b69d36d00d4083872696db109e4 Mon Sep 17 00:00:00 2001 2 | From: David Hunt 3 | Date: Mon, 16 Sep 2024 14:59:56 +0100 4 | Subject: [PATCH 3/3] add capacity endpoint to telemetry thread 5 | 6 | Busyness is calculated on how busy the current core is, ignoring the 7 | current frequency. So a core that's 50% busy at P1 (e.g. 2GHz), shows 8 | as 100% busy at 1GHz. 9 | 10 | This patch adds a new 'capacity' metric that shows a percentage based on 11 | the P1 (base) freqency of the core, so that if the core is 50% busy at 12 | P1, it should show 50% regardless of what the current frequency is. 13 | 14 | Signed-off-by: David Hunt 15 | --- 16 | .../common/eal_common_lcore_telemetry.c | 240 ++++++++++++++++++ 17 | lib/librte_eal/include/rte_lcore.h | 21 ++ 18 | lib/librte_eal/version.map | 1 + 19 | 3 files changed, 262 insertions(+) 20 | 21 | diff --git a/lib/librte_eal/common/eal_common_lcore_telemetry.c b/lib/librte_eal/common/eal_common_lcore_telemetry.c 22 | index f01ccd9a65..18dcc40b1e 100644 23 | --- a/lib/librte_eal/common/eal_common_lcore_telemetry.c 24 | +++ b/lib/librte_eal/common/eal_common_lcore_telemetry.c 25 | @@ -10,9 +10,18 @@ 26 | #include 27 | #include 28 | #include 29 | +#include 30 | +#include 31 | +#include 32 | 33 | #ifdef RTE_LCORE_BUSYNESS 34 | #include 35 | +#define MSR_PLATFORM_INFO 0xCE 36 | +#define POWER_SYSFS_CUR_PATH "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq" 37 | +#define POWER_SYSFS_BASE_FREQ_PATH "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency" 38 | +#define POWER_SYSFS_SCALING_DRIVER_PATH "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_driver" 39 | +#define POWER_SYSFS_SCALING_MAX_FREQ_PATH "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_max_freq" 40 | +#define POWER_SYSFS_MSR_PATH "/dev/cpu/%u/msr" 41 | #endif 42 | 43 | int __rte_lcore_telemetry_enabled; 44 | @@ -47,6 +56,182 @@ static struct lcore_telemetry *telemetry_data; 45 | #define SMOOTH_COEFF 5 46 | #define STATE_CHANGE_OPT 32 47 | 48 | +static int p1_freq[RTE_MAX_LCORE] = {0}; 49 | + 50 | +static int 51 | +try_read_base_frequency(unsigned int lcore_id) 52 | +{ 53 | + char path[PATH_MAX]; 54 | + int fd; 55 | + snprintf(path, sizeof(path), POWER_SYSFS_BASE_FREQ_PATH, rte_lcore_to_cpu_id(lcore_id)); 56 | + 57 | + fd = open(path, O_RDONLY); 58 | + if (fd == -1) { 59 | + return -1; 60 | + } 61 | + char buffer[16]; 62 | + ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0); 63 | + if (bytesRead == -1) { 64 | + close(fd); 65 | + return -1; 66 | + } 67 | + buffer[bytesRead] = '\0'; // Null-terminate the buffer 68 | + close(fd); 69 | + 70 | + p1_freq[lcore_id] = atoi(buffer); 71 | + return p1_freq[lcore_id]; 72 | + 73 | + 74 | +} 75 | + 76 | +static int 77 | +try_read_scaling_max_freq(unsigned int lcore_id) 78 | +{ 79 | + char path[PATH_MAX]; 80 | + int freq; 81 | + int fd; 82 | + 83 | + /* 84 | + * If the driver is acpi_cpufreq, we can read the scaling_max_freq file 85 | + */ 86 | + snprintf(path, sizeof(path), POWER_SYSFS_SCALING_DRIVER_PATH, rte_lcore_to_cpu_id(lcore_id)); 87 | + fd = open(path, O_RDONLY); 88 | + if (fd == -1) { 89 | + return -1; 90 | + } 91 | + char buffer[16]; 92 | + ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0); 93 | + if (bytesRead == -1) { 94 | + close(fd); 95 | + return -1; 96 | + } 97 | + buffer[bytesRead] = '\0'; // Null-terminate the buffer 98 | + 99 | + close(fd); 100 | + 101 | + if (strncmp(buffer, "acpi-cpufreq", 12) == 0) { 102 | + /* we can use the scaling_max_freq to get the p1 */ 103 | + snprintf(path, sizeof(path), POWER_SYSFS_SCALING_MAX_FREQ_PATH, rte_lcore_to_cpu_id(lcore_id)); 104 | + fd = open(path, O_RDONLY); 105 | + if (fd == -1) { 106 | + return -1; 107 | + } 108 | + ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0); 109 | + if (bytesRead == -1) { 110 | + close(fd); 111 | + return -1; 112 | + } 113 | + buffer[bytesRead] = '\0'; // Null-terminate the buffer 114 | + close(fd); 115 | + freq = atoi(buffer) / 1000; /* convert to KHz */ 116 | + 117 | + /* 118 | + * If the freq value ends with '1', then, turbo is enabled. 119 | + * Round it down to the nearest 100. Otherwuse use the value. 120 | + */ 121 | + return (freq & ~1) * 1000; /* convert to Hz */ 122 | + } 123 | + return -1; 124 | +} 125 | + 126 | +static int 127 | +try_read_msr(unsigned int lcore_id) 128 | +{ 129 | + char path[PATH_MAX]; 130 | + int fd; 131 | + int freq; 132 | + uint64_t data; 133 | + 134 | + /* 135 | + * If the msr driver is present, we can read p1 from MSR_PLATFORM_INFO register 136 | + */ 137 | + snprintf(path, sizeof(path), POWER_SYSFS_MSR_PATH, rte_lcore_to_cpu_id(lcore_id)); 138 | + fd = open(path, O_RDONLY); 139 | + if (fd < 0) { 140 | + return -1; 141 | + } 142 | + 143 | + if (pread(fd, &data, sizeof(data), MSR_PLATFORM_INFO) != sizeof(data)) { 144 | + close(fd); 145 | + return -1; 146 | + } 147 | + 148 | + close(fd); 149 | + 150 | + freq = ((data >> 8) & 0xff) * 100 * 1000; 151 | + 152 | + return freq; 153 | +} 154 | + 155 | + 156 | +static 157 | +int read_sysfs_p1_freq(unsigned int lcore_id) { 158 | + int freq; 159 | + 160 | + /* We've previously got the p1 frequency. */ 161 | + if (p1_freq[lcore_id] != 0) 162 | + return p1_freq[lcore_id]; 163 | + 164 | + /* 165 | + * Check the base_frequency file, if it's there 166 | + */ 167 | + freq = try_read_base_frequency(lcore_id); 168 | + if (freq != -1) { 169 | + p1_freq[lcore_id] = freq; 170 | + return freq; 171 | + } 172 | + 173 | + /* 174 | + * Check the scaling_max_freq file for the acpi-freq driver 175 | + */ 176 | + freq = try_read_scaling_max_freq(lcore_id); 177 | + if (freq != -1) { 178 | + p1_freq[lcore_id] = freq; 179 | + return freq; 180 | + } 181 | + 182 | + /* 183 | + * Try reading from the MSR register 184 | + */ 185 | + freq = try_read_msr(lcore_id); 186 | + if (freq != -1) { 187 | + p1_freq[lcore_id] = freq; 188 | + return freq; 189 | + } 190 | + 191 | + RTE_LOG(ERR, EAL, "Capacity telemetry for lcore %d not supported: no p1 frequency found", 192 | + lcore_id); 193 | + 194 | + return -1; 195 | +} 196 | + 197 | + 198 | +int current_fds[RTE_MAX_LCORE] = {0}; 199 | + 200 | +static 201 | +int read_sysfs_cur_freq(unsigned int lcore_id) { 202 | + char path[PATH_MAX]; 203 | + 204 | + if (current_fds[lcore_id] == 0) { 205 | + snprintf(path, sizeof(path), POWER_SYSFS_CUR_PATH, rte_lcore_to_cpu_id(lcore_id)); 206 | + current_fds[lcore_id] = open(path, O_RDONLY); 207 | + if (current_fds[lcore_id] == -1) { 208 | + return -1; 209 | + } 210 | + } 211 | + 212 | + char buffer[16]; 213 | + ssize_t bytesRead = pread(current_fds[lcore_id], buffer, sizeof(buffer) - 1, 0); 214 | + if (bytesRead == -1) { 215 | + return -1; 216 | + } 217 | + 218 | + buffer[bytesRead] = '\0'; // Null-terminate the buffer 219 | + 220 | + int value = atoi(buffer); 221 | + return value; 222 | +} 223 | + 224 | /* Helper function to check if the lcore is enabled. 225 | * Cannot use rte_lcore_is_enabled since it only catches ROLE_RTE threads which 226 | * does not include ROLE_NON_EAL threads which some application threads, for 227 | @@ -102,6 +287,33 @@ int rte_lcore_busyness(unsigned int lcore_id) 228 | return telemetry_data[lcore_id].busyness; 229 | } 230 | 231 | +int rte_lcore_capacity(unsigned int lcore_id) 232 | +{ 233 | + const uint64_t active_thresh = RTE_LCORE_BUSYNESS_PERIOD * 1000; 234 | + struct lcore_telemetry *tdata; 235 | + 236 | + if (lcore_id >= RTE_MAX_LCORE) 237 | + return -EINVAL; 238 | + tdata = &telemetry_data[lcore_id]; 239 | + 240 | + /* if the lcore is not active */ 241 | + if (tdata->interval_ts == 0) 242 | + return LCORE_BUSYNESS_NOT_SET; 243 | + /* if the core hasn't been active in a while */ 244 | + else if ((rte_rdtsc() - tdata->interval_ts) > active_thresh) 245 | + return LCORE_BUSYNESS_NOT_SET; 246 | + 247 | + int cur_freq = read_sysfs_cur_freq(rte_lcore_to_cpu_id(lcore_id)); 248 | + int busy = telemetry_data[lcore_id].busyness; 249 | + int p1 = read_sysfs_p1_freq(lcore_id) ; 250 | + 251 | + if ((busy == -1) || (p1 <= 0)) { 252 | + return -1; 253 | + } else { 254 | + return busy * cur_freq / p1; 255 | + } 256 | +} 257 | + 258 | int rte_lcore_busyness_enabled(void) 259 | { 260 | return __rte_lcore_telemetry_enabled; 261 | @@ -263,6 +475,26 @@ lcore_handle_busyness(const char *cmd __rte_unused, 262 | return 0; 263 | } 264 | 265 | +static int 266 | +lcore_handle_capacity(const char *cmd __rte_unused, 267 | + const char *params __rte_unused, struct rte_tel_data *d) 268 | +{ 269 | + char corenum[64]; 270 | + int i; 271 | + 272 | + rte_tel_data_start_dict(d); 273 | + 274 | + /* Foreach lcore - can't use macro since it excludes ROLE_NON_EAL */ 275 | + for (i = 0; i < RTE_MAX_LCORE; i++) { 276 | + if (!lcore_enabled(i)) 277 | + continue; 278 | + snprintf(corenum, sizeof(corenum), "%d", i); 279 | + rte_tel_data_add_dict_int(d, corenum, rte_lcore_capacity(i)); 280 | + } 281 | + 282 | + return 0; 283 | +} 284 | + 285 | static int 286 | lcore_handle_cpuset(const char *cmd __rte_unused, 287 | const char *params __rte_unused, 288 | @@ -326,6 +558,9 @@ RTE_INIT(lcore_init_telemetry) 289 | rte_telemetry_register_cmd("/eal/lcore/busyness", lcore_handle_busyness, 290 | "return percentage busyness of cores"); 291 | 292 | + rte_telemetry_register_cmd("/eal/lcore/capacity_used", lcore_handle_capacity, 293 | + "return percentage capacity of cores"); 294 | + 295 | rte_telemetry_register_cmd("/eal/lcore/busyness_enable", lcore_busyness_enable, 296 | "enable lcore busyness measurement"); 297 | 298 | @@ -340,6 +575,11 @@ RTE_INIT(lcore_init_telemetry) 299 | 300 | #else 301 | 302 | +int rte_lcore_capacity(unsigned int lcore_id __rte_unused) 303 | +{ 304 | + return -ENOTSUP; 305 | +} 306 | + 307 | int rte_lcore_busyness(unsigned int lcore_id __rte_unused) 308 | { 309 | return -ENOTSUP; 310 | diff --git a/lib/librte_eal/include/rte_lcore.h b/lib/librte_eal/include/rte_lcore.h 311 | index 90c2aa037a..dddc529ccd 100644 312 | --- a/lib/librte_eal/include/rte_lcore.h 313 | +++ b/lib/librte_eal/include/rte_lcore.h 314 | @@ -487,6 +487,27 @@ __rte_experimental 315 | int 316 | rte_lcore_busyness(unsigned int lcore_id); 317 | 318 | +/** 319 | + * @warning 320 | + * @b EXPERIMENTAL: this API may change without prior notice. 321 | + * 322 | + * Read capacity value corresponding to an lcore. 323 | + * This differs from busyness in that it is related to the current usage 324 | + * of the lcore compared to P1 frequency, not the current frequency. 325 | + * 326 | + * @param lcore_id 327 | + * Lcore to read capacity value for. 328 | + * @return 329 | + * - value between 0 and 100 on success 330 | + * - -1 if lcore is not active 331 | + * - -EINVAL if lcore is invalid 332 | + * - -ENOMEM if not enough memory available 333 | + * - -ENOTSUP if not supported 334 | + */ 335 | +__rte_experimental 336 | +int 337 | +rte_lcore_capacity(unsigned int lcore_id); 338 | + 339 | /** 340 | * @warning 341 | * @b EXPERIMENTAL: this API may change without prior notice. 342 | diff --git a/lib/librte_eal/version.map b/lib/librte_eal/version.map 343 | index d828a0d791..cac187ffdd 100644 344 | --- a/lib/librte_eal/version.map 345 | +++ b/lib/librte_eal/version.map 346 | @@ -406,6 +406,7 @@ EXPERIMENTAL { 347 | 348 | __rte_lcore_telemetry_timestamp; 349 | __rte_lcore_telemetry_enabled; 350 | + rte_lcore_capacity; 351 | rte_lcore_busyness; 352 | rte_lcore_busyness_enabled; 353 | rte_lcore_busyness_enabled_set; 354 | -- 355 | 2.25.1 356 | 357 | -------------------------------------------------------------------------------- /ipm/patches/dpdk/21.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch: -------------------------------------------------------------------------------- 1 | From ac2b8db5f2dc2578b99a63b0abaea703c092ab42 Mon Sep 17 00:00:00 2001 2 | From: Anatoly Burakov 3 | Date: Fri, 15 Jul 2022 13:12:45 +0000 4 | Subject: [PATCH 2/3] eal: add cpuset lcore telemetry entries 5 | 6 | Expose per-lcore cpuset information to telemetry. 7 | 8 | Signed-off-by: Anatoly Burakov 9 | --- 10 | lib/eal/common/eal_common_lcore_telemetry.c | 48 +++++++++++++++++++++ 11 | 1 file changed, 48 insertions(+) 12 | 13 | diff --git a/lib/eal/common/eal_common_lcore_telemetry.c b/lib/eal/common/eal_common_lcore_telemetry.c 14 | index 2e9033bf5a..f01ccd9a65 100644 15 | --- a/lib/eal/common/eal_common_lcore_telemetry.c 16 | +++ b/lib/eal/common/eal_common_lcore_telemetry.c 17 | @@ -19,6 +19,8 @@ int __rte_lcore_telemetry_enabled; 18 | 19 | #ifdef RTE_LCORE_BUSYNESS 20 | 21 | +#include "eal_private.h" 22 | + 23 | struct lcore_telemetry { 24 | int busyness; 25 | /**< Calculated busyness (gets set/returned by the API) */ 26 | @@ -261,6 +263,49 @@ lcore_handle_busyness(const char *cmd __rte_unused, 27 | return 0; 28 | } 29 | 30 | +static int 31 | +lcore_handle_cpuset(const char *cmd __rte_unused, 32 | + const char *params __rte_unused, 33 | + struct rte_tel_data *d) 34 | +{ 35 | + char corenum[64]; 36 | + int i; 37 | + 38 | + rte_tel_data_start_dict(d); 39 | + 40 | + /* Foreach lcore - can't use macro since it excludes ROLE_NON_EAL */ 41 | + for (i = 0; i < RTE_MAX_LCORE; i++) { 42 | + const struct lcore_config *cfg = &lcore_config[i]; 43 | + const rte_cpuset_t *cpuset = &cfg->cpuset; 44 | + struct rte_tel_data *ld; 45 | + unsigned int cpu; 46 | + 47 | + if (!lcore_enabled(i)) 48 | + continue; 49 | + 50 | + /* create an array of integers */ 51 | + ld = rte_tel_data_alloc(); 52 | + if (ld == NULL) 53 | + return -ENOMEM; 54 | + rte_tel_data_start_array(ld, RTE_TEL_INT_VAL); 55 | + 56 | + /* add cpu ID's from cpuset to the array */ 57 | + for (cpu = 0; cpu < CPU_SETSIZE; cpu++) { 58 | + if (!CPU_ISSET(cpu, cpuset)) 59 | + continue; 60 | + rte_tel_data_add_array_int(ld, cpu); 61 | + } 62 | + 63 | + /* add array to the per-lcore container */ 64 | + snprintf(corenum, sizeof(corenum), "%d", i); 65 | + 66 | + /* tell telemetry library to free this array automatically */ 67 | + rte_tel_data_add_dict_container(d, corenum, ld, 0); 68 | + } 69 | + 70 | + return 0; 71 | +} 72 | + 73 | void 74 | eal_lcore_telemetry_free(void) 75 | { 76 | @@ -287,6 +332,9 @@ RTE_INIT(lcore_init_telemetry) 77 | rte_telemetry_register_cmd("/eal/lcore/busyness_disable", lcore_busyness_disable, 78 | "disable lcore busyness measurement"); 79 | 80 | + rte_telemetry_register_cmd("/eal/lcore/cpuset", lcore_handle_cpuset, 81 | + "list physical core affinity for each lcore"); 82 | + 83 | __rte_lcore_telemetry_enabled = true; 84 | } 85 | 86 | -- 87 | 2.25.1 88 | 89 | -------------------------------------------------------------------------------- /ipm/patches/dpdk/21.11/0003-add-capacity-endpoint-to-telemetry-thread.patch: -------------------------------------------------------------------------------- 1 | From 644d8d946ce5e31c9a818da9661f4e0658f57754 Mon Sep 17 00:00:00 2001 2 | From: David Hunt 3 | Date: Mon, 16 Sep 2024 14:28:18 +0100 4 | Subject: [PATCH 3/3] add capacity endpoint to telemetry thread 5 | 6 | Busyness is calculated on how busy the current core is, ignoring the 7 | current frequency. So a core that's 50% busy at P1 (e.g. 2GHz), shows 8 | as 100% busy at 1GHz. 9 | 10 | This patch adds a new 'capacity' metric that shows a percentage based on 11 | the P1 (base) freqency of the core, so that if the core is 50% busy at 12 | P1, it should show 50% regardless of what the current frequency is. 13 | 14 | Signed-off-by: David Hunt 15 | --- 16 | lib/eal/common/eal_common_lcore_telemetry.c | 240 ++++++++++++++++++++ 17 | lib/eal/include/rte_lcore.h | 21 ++ 18 | lib/eal/version.map | 1 + 19 | 3 files changed, 262 insertions(+) 20 | 21 | diff --git a/lib/eal/common/eal_common_lcore_telemetry.c b/lib/eal/common/eal_common_lcore_telemetry.c 22 | index f01ccd9a65..18dcc40b1e 100644 23 | --- a/lib/eal/common/eal_common_lcore_telemetry.c 24 | +++ b/lib/eal/common/eal_common_lcore_telemetry.c 25 | @@ -10,9 +10,18 @@ 26 | #include 27 | #include 28 | #include 29 | +#include 30 | +#include 31 | +#include 32 | 33 | #ifdef RTE_LCORE_BUSYNESS 34 | #include 35 | +#define MSR_PLATFORM_INFO 0xCE 36 | +#define POWER_SYSFS_CUR_PATH "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq" 37 | +#define POWER_SYSFS_BASE_FREQ_PATH "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency" 38 | +#define POWER_SYSFS_SCALING_DRIVER_PATH "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_driver" 39 | +#define POWER_SYSFS_SCALING_MAX_FREQ_PATH "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_max_freq" 40 | +#define POWER_SYSFS_MSR_PATH "/dev/cpu/%u/msr" 41 | #endif 42 | 43 | int __rte_lcore_telemetry_enabled; 44 | @@ -47,6 +56,182 @@ static struct lcore_telemetry *telemetry_data; 45 | #define SMOOTH_COEFF 5 46 | #define STATE_CHANGE_OPT 32 47 | 48 | +static int p1_freq[RTE_MAX_LCORE] = {0}; 49 | + 50 | +static int 51 | +try_read_base_frequency(unsigned int lcore_id) 52 | +{ 53 | + char path[PATH_MAX]; 54 | + int fd; 55 | + snprintf(path, sizeof(path), POWER_SYSFS_BASE_FREQ_PATH, rte_lcore_to_cpu_id(lcore_id)); 56 | + 57 | + fd = open(path, O_RDONLY); 58 | + if (fd == -1) { 59 | + return -1; 60 | + } 61 | + char buffer[16]; 62 | + ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0); 63 | + if (bytesRead == -1) { 64 | + close(fd); 65 | + return -1; 66 | + } 67 | + buffer[bytesRead] = '\0'; // Null-terminate the buffer 68 | + close(fd); 69 | + 70 | + p1_freq[lcore_id] = atoi(buffer); 71 | + return p1_freq[lcore_id]; 72 | + 73 | + 74 | +} 75 | + 76 | +static int 77 | +try_read_scaling_max_freq(unsigned int lcore_id) 78 | +{ 79 | + char path[PATH_MAX]; 80 | + int freq; 81 | + int fd; 82 | + 83 | + /* 84 | + * If the driver is acpi_cpufreq, we can read the scaling_max_freq file 85 | + */ 86 | + snprintf(path, sizeof(path), POWER_SYSFS_SCALING_DRIVER_PATH, rte_lcore_to_cpu_id(lcore_id)); 87 | + fd = open(path, O_RDONLY); 88 | + if (fd == -1) { 89 | + return -1; 90 | + } 91 | + char buffer[16]; 92 | + ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0); 93 | + if (bytesRead == -1) { 94 | + close(fd); 95 | + return -1; 96 | + } 97 | + buffer[bytesRead] = '\0'; // Null-terminate the buffer 98 | + 99 | + close(fd); 100 | + 101 | + if (strncmp(buffer, "acpi-cpufreq", 12) == 0) { 102 | + /* we can use the scaling_max_freq to get the p1 */ 103 | + snprintf(path, sizeof(path), POWER_SYSFS_SCALING_MAX_FREQ_PATH, rte_lcore_to_cpu_id(lcore_id)); 104 | + fd = open(path, O_RDONLY); 105 | + if (fd == -1) { 106 | + return -1; 107 | + } 108 | + ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0); 109 | + if (bytesRead == -1) { 110 | + close(fd); 111 | + return -1; 112 | + } 113 | + buffer[bytesRead] = '\0'; // Null-terminate the buffer 114 | + close(fd); 115 | + freq = atoi(buffer) / 1000; /* convert to KHz */ 116 | + 117 | + /* 118 | + * If the freq value ends with '1', then, turbo is enabled. 119 | + * Round it down to the nearest 100. Otherwuse use the value. 120 | + */ 121 | + return (freq & ~1) * 1000; /* convert to Hz */ 122 | + } 123 | + return -1; 124 | +} 125 | + 126 | +static int 127 | +try_read_msr(unsigned int lcore_id) 128 | +{ 129 | + char path[PATH_MAX]; 130 | + int fd; 131 | + int freq; 132 | + uint64_t data; 133 | + 134 | + /* 135 | + * If the msr driver is present, we can read p1 from MSR_PLATFORM_INFO register 136 | + */ 137 | + snprintf(path, sizeof(path), POWER_SYSFS_MSR_PATH, rte_lcore_to_cpu_id(lcore_id)); 138 | + fd = open(path, O_RDONLY); 139 | + if (fd < 0) { 140 | + return -1; 141 | + } 142 | + 143 | + if (pread(fd, &data, sizeof(data), MSR_PLATFORM_INFO) != sizeof(data)) { 144 | + close(fd); 145 | + return -1; 146 | + } 147 | + 148 | + close(fd); 149 | + 150 | + freq = ((data >> 8) & 0xff) * 100 * 1000; 151 | + 152 | + return freq; 153 | +} 154 | + 155 | + 156 | +static 157 | +int read_sysfs_p1_freq(unsigned int lcore_id) { 158 | + int freq; 159 | + 160 | + /* We've previously got the p1 frequency. */ 161 | + if (p1_freq[lcore_id] != 0) 162 | + return p1_freq[lcore_id]; 163 | + 164 | + /* 165 | + * Check the base_frequency file, if it's there 166 | + */ 167 | + freq = try_read_base_frequency(lcore_id); 168 | + if (freq != -1) { 169 | + p1_freq[lcore_id] = freq; 170 | + return freq; 171 | + } 172 | + 173 | + /* 174 | + * Check the scaling_max_freq file for the acpi-freq driver 175 | + */ 176 | + freq = try_read_scaling_max_freq(lcore_id); 177 | + if (freq != -1) { 178 | + p1_freq[lcore_id] = freq; 179 | + return freq; 180 | + } 181 | + 182 | + /* 183 | + * Try reading from the MSR register 184 | + */ 185 | + freq = try_read_msr(lcore_id); 186 | + if (freq != -1) { 187 | + p1_freq[lcore_id] = freq; 188 | + return freq; 189 | + } 190 | + 191 | + RTE_LOG(ERR, EAL, "Capacity telemetry for lcore %d not supported: no p1 frequency found", 192 | + lcore_id); 193 | + 194 | + return -1; 195 | +} 196 | + 197 | + 198 | +int current_fds[RTE_MAX_LCORE] = {0}; 199 | + 200 | +static 201 | +int read_sysfs_cur_freq(unsigned int lcore_id) { 202 | + char path[PATH_MAX]; 203 | + 204 | + if (current_fds[lcore_id] == 0) { 205 | + snprintf(path, sizeof(path), POWER_SYSFS_CUR_PATH, rte_lcore_to_cpu_id(lcore_id)); 206 | + current_fds[lcore_id] = open(path, O_RDONLY); 207 | + if (current_fds[lcore_id] == -1) { 208 | + return -1; 209 | + } 210 | + } 211 | + 212 | + char buffer[16]; 213 | + ssize_t bytesRead = pread(current_fds[lcore_id], buffer, sizeof(buffer) - 1, 0); 214 | + if (bytesRead == -1) { 215 | + return -1; 216 | + } 217 | + 218 | + buffer[bytesRead] = '\0'; // Null-terminate the buffer 219 | + 220 | + int value = atoi(buffer); 221 | + return value; 222 | +} 223 | + 224 | /* Helper function to check if the lcore is enabled. 225 | * Cannot use rte_lcore_is_enabled since it only catches ROLE_RTE threads which 226 | * does not include ROLE_NON_EAL threads which some application threads, for 227 | @@ -102,6 +287,33 @@ int rte_lcore_busyness(unsigned int lcore_id) 228 | return telemetry_data[lcore_id].busyness; 229 | } 230 | 231 | +int rte_lcore_capacity(unsigned int lcore_id) 232 | +{ 233 | + const uint64_t active_thresh = RTE_LCORE_BUSYNESS_PERIOD * 1000; 234 | + struct lcore_telemetry *tdata; 235 | + 236 | + if (lcore_id >= RTE_MAX_LCORE) 237 | + return -EINVAL; 238 | + tdata = &telemetry_data[lcore_id]; 239 | + 240 | + /* if the lcore is not active */ 241 | + if (tdata->interval_ts == 0) 242 | + return LCORE_BUSYNESS_NOT_SET; 243 | + /* if the core hasn't been active in a while */ 244 | + else if ((rte_rdtsc() - tdata->interval_ts) > active_thresh) 245 | + return LCORE_BUSYNESS_NOT_SET; 246 | + 247 | + int cur_freq = read_sysfs_cur_freq(rte_lcore_to_cpu_id(lcore_id)); 248 | + int busy = telemetry_data[lcore_id].busyness; 249 | + int p1 = read_sysfs_p1_freq(lcore_id) ; 250 | + 251 | + if ((busy == -1) || (p1 <= 0)) { 252 | + return -1; 253 | + } else { 254 | + return busy * cur_freq / p1; 255 | + } 256 | +} 257 | + 258 | int rte_lcore_busyness_enabled(void) 259 | { 260 | return __rte_lcore_telemetry_enabled; 261 | @@ -263,6 +475,26 @@ lcore_handle_busyness(const char *cmd __rte_unused, 262 | return 0; 263 | } 264 | 265 | +static int 266 | +lcore_handle_capacity(const char *cmd __rte_unused, 267 | + const char *params __rte_unused, struct rte_tel_data *d) 268 | +{ 269 | + char corenum[64]; 270 | + int i; 271 | + 272 | + rte_tel_data_start_dict(d); 273 | + 274 | + /* Foreach lcore - can't use macro since it excludes ROLE_NON_EAL */ 275 | + for (i = 0; i < RTE_MAX_LCORE; i++) { 276 | + if (!lcore_enabled(i)) 277 | + continue; 278 | + snprintf(corenum, sizeof(corenum), "%d", i); 279 | + rte_tel_data_add_dict_int(d, corenum, rte_lcore_capacity(i)); 280 | + } 281 | + 282 | + return 0; 283 | +} 284 | + 285 | static int 286 | lcore_handle_cpuset(const char *cmd __rte_unused, 287 | const char *params __rte_unused, 288 | @@ -326,6 +558,9 @@ RTE_INIT(lcore_init_telemetry) 289 | rte_telemetry_register_cmd("/eal/lcore/busyness", lcore_handle_busyness, 290 | "return percentage busyness of cores"); 291 | 292 | + rte_telemetry_register_cmd("/eal/lcore/capacity_used", lcore_handle_capacity, 293 | + "return percentage capacity of cores"); 294 | + 295 | rte_telemetry_register_cmd("/eal/lcore/busyness_enable", lcore_busyness_enable, 296 | "enable lcore busyness measurement"); 297 | 298 | @@ -340,6 +575,11 @@ RTE_INIT(lcore_init_telemetry) 299 | 300 | #else 301 | 302 | +int rte_lcore_capacity(unsigned int lcore_id __rte_unused) 303 | +{ 304 | + return -ENOTSUP; 305 | +} 306 | + 307 | int rte_lcore_busyness(unsigned int lcore_id __rte_unused) 308 | { 309 | return -ENOTSUP; 310 | diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h 311 | index 85d6e38f4e..4a631e9645 100644 312 | --- a/lib/eal/include/rte_lcore.h 313 | +++ b/lib/eal/include/rte_lcore.h 314 | @@ -443,6 +443,27 @@ __rte_experimental 315 | int 316 | rte_lcore_busyness(unsigned int lcore_id); 317 | 318 | +/** 319 | + * @warning 320 | + * @b EXPERIMENTAL: this API may change without prior notice. 321 | + * 322 | + * Read capacity value corresponding to an lcore. 323 | + * This differs from busyness in that it is related to the current usage 324 | + * of the lcore compared to P1 frequency, not the current frequency. 325 | + * 326 | + * @param lcore_id 327 | + * Lcore to read capacity value for. 328 | + * @return 329 | + * - value between 0 and 100 on success 330 | + * - -1 if lcore is not active 331 | + * - -EINVAL if lcore is invalid 332 | + * - -ENOMEM if not enough memory available 333 | + * - -ENOTSUP if not supported 334 | + */ 335 | +__rte_experimental 336 | +int 337 | +rte_lcore_capacity(unsigned int lcore_id); 338 | + 339 | /** 340 | * @warning 341 | * @b EXPERIMENTAL: this API may change without prior notice. 342 | diff --git a/lib/eal/version.map b/lib/eal/version.map 343 | index a06a9c2a47..a405bfb319 100644 344 | --- a/lib/eal/version.map 345 | +++ b/lib/eal/version.map 346 | @@ -424,6 +424,7 @@ EXPERIMENTAL { 347 | # Telemetry patch set APIs 348 | __rte_lcore_telemetry_timestamp; 349 | __rte_lcore_telemetry_enabled; 350 | + rte_lcore_capacity; 351 | rte_lcore_busyness; 352 | rte_lcore_busyness_enabled; 353 | rte_lcore_busyness_enabled_set; 354 | -- 355 | 2.25.1 356 | 357 | -------------------------------------------------------------------------------- /ipm/patches/dpdk/22.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch: -------------------------------------------------------------------------------- 1 | From 810d87bf69d79351cfa3089df920e4b726f269a5 Mon Sep 17 00:00:00 2001 2 | From: Anatoly Burakov 3 | Date: Fri, 15 Jul 2022 13:12:45 +0000 4 | Subject: [PATCH 2/3] eal: add cpuset lcore telemetry entries 5 | 6 | Expose per-lcore cpuset information to telemetry. 7 | 8 | Signed-off-by: Anatoly Burakov 9 | --- 10 | lib/eal/common/eal_common_lcore_telemetry.c | 46 +++++++++++++++++++++ 11 | 1 file changed, 46 insertions(+) 12 | 13 | diff --git a/lib/eal/common/eal_common_lcore_telemetry.c b/lib/eal/common/eal_common_lcore_telemetry.c 14 | index 1478e5a48a..f01ccd9a65 100644 15 | --- a/lib/eal/common/eal_common_lcore_telemetry.c 16 | +++ b/lib/eal/common/eal_common_lcore_telemetry.c 17 | @@ -263,6 +263,49 @@ lcore_handle_busyness(const char *cmd __rte_unused, 18 | return 0; 19 | } 20 | 21 | +static int 22 | +lcore_handle_cpuset(const char *cmd __rte_unused, 23 | + const char *params __rte_unused, 24 | + struct rte_tel_data *d) 25 | +{ 26 | + char corenum[64]; 27 | + int i; 28 | + 29 | + rte_tel_data_start_dict(d); 30 | + 31 | + /* Foreach lcore - can't use macro since it excludes ROLE_NON_EAL */ 32 | + for (i = 0; i < RTE_MAX_LCORE; i++) { 33 | + const struct lcore_config *cfg = &lcore_config[i]; 34 | + const rte_cpuset_t *cpuset = &cfg->cpuset; 35 | + struct rte_tel_data *ld; 36 | + unsigned int cpu; 37 | + 38 | + if (!lcore_enabled(i)) 39 | + continue; 40 | + 41 | + /* create an array of integers */ 42 | + ld = rte_tel_data_alloc(); 43 | + if (ld == NULL) 44 | + return -ENOMEM; 45 | + rte_tel_data_start_array(ld, RTE_TEL_INT_VAL); 46 | + 47 | + /* add cpu ID's from cpuset to the array */ 48 | + for (cpu = 0; cpu < CPU_SETSIZE; cpu++) { 49 | + if (!CPU_ISSET(cpu, cpuset)) 50 | + continue; 51 | + rte_tel_data_add_array_int(ld, cpu); 52 | + } 53 | + 54 | + /* add array to the per-lcore container */ 55 | + snprintf(corenum, sizeof(corenum), "%d", i); 56 | + 57 | + /* tell telemetry library to free this array automatically */ 58 | + rte_tel_data_add_dict_container(d, corenum, ld, 0); 59 | + } 60 | + 61 | + return 0; 62 | +} 63 | + 64 | void 65 | eal_lcore_telemetry_free(void) 66 | { 67 | @@ -289,6 +332,9 @@ RTE_INIT(lcore_init_telemetry) 68 | rte_telemetry_register_cmd("/eal/lcore/busyness_disable", lcore_busyness_disable, 69 | "disable lcore busyness measurement"); 70 | 71 | + rte_telemetry_register_cmd("/eal/lcore/cpuset", lcore_handle_cpuset, 72 | + "list physical core affinity for each lcore"); 73 | + 74 | __rte_lcore_telemetry_enabled = true; 75 | } 76 | 77 | -- 78 | 2.25.1 79 | 80 | -------------------------------------------------------------------------------- /ipm/patches/dpdk/22.11/0003-add-capacity-endpoint-to-telemetry-thread.patch: -------------------------------------------------------------------------------- 1 | From ea2762b20c60cd66378758559af90bb48c9a8ee5 Mon Sep 17 00:00:00 2001 2 | From: David Hunt 3 | Date: Fri, 23 Aug 2024 09:07:08 +0100 4 | Subject: [PATCH 3/3] add capacity endpoint to telemetry thread 5 | 6 | Busyness is calculated on how busy the current core is, ignoring the 7 | current frequency. So a core that's 50% busy at P1 (e.g. 2GHz), shows 8 | as 100% busy at 1GHz. 9 | 10 | This patch adds a new 'capacity' metric that shows a percentage based on 11 | the P1 (base) freqency of the core, so that if the core is 50% busy at 12 | P1, it should show 50% regardless of what the current frequency is. 13 | 14 | Signed-off-by: David Hunt 15 | --- 16 | lib/eal/common/eal_common_lcore_telemetry.c | 240 ++++++++++++++++++++ 17 | lib/eal/include/rte_lcore.h | 21 ++ 18 | lib/eal/version.map | 1 + 19 | 3 files changed, 262 insertions(+) 20 | 21 | diff --git a/lib/eal/common/eal_common_lcore_telemetry.c b/lib/eal/common/eal_common_lcore_telemetry.c 22 | index f01ccd9a65..18dcc40b1e 100644 23 | --- a/lib/eal/common/eal_common_lcore_telemetry.c 24 | +++ b/lib/eal/common/eal_common_lcore_telemetry.c 25 | @@ -10,9 +10,18 @@ 26 | #include 27 | #include 28 | #include 29 | +#include 30 | +#include 31 | +#include 32 | 33 | #ifdef RTE_LCORE_BUSYNESS 34 | #include 35 | +#define MSR_PLATFORM_INFO 0xCE 36 | +#define POWER_SYSFS_CUR_PATH "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq" 37 | +#define POWER_SYSFS_BASE_FREQ_PATH "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency" 38 | +#define POWER_SYSFS_SCALING_DRIVER_PATH "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_driver" 39 | +#define POWER_SYSFS_SCALING_MAX_FREQ_PATH "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_max_freq" 40 | +#define POWER_SYSFS_MSR_PATH "/dev/cpu/%u/msr" 41 | #endif 42 | 43 | int __rte_lcore_telemetry_enabled; 44 | @@ -47,6 +56,182 @@ static struct lcore_telemetry *telemetry_data; 45 | #define SMOOTH_COEFF 5 46 | #define STATE_CHANGE_OPT 32 47 | 48 | +static int p1_freq[RTE_MAX_LCORE] = {0}; 49 | + 50 | +static int 51 | +try_read_base_frequency(unsigned int lcore_id) 52 | +{ 53 | + char path[PATH_MAX]; 54 | + int fd; 55 | + snprintf(path, sizeof(path), POWER_SYSFS_BASE_FREQ_PATH, rte_lcore_to_cpu_id(lcore_id)); 56 | + 57 | + fd = open(path, O_RDONLY); 58 | + if (fd == -1) { 59 | + return -1; 60 | + } 61 | + char buffer[16]; 62 | + ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0); 63 | + if (bytesRead == -1) { 64 | + close(fd); 65 | + return -1; 66 | + } 67 | + buffer[bytesRead] = '\0'; // Null-terminate the buffer 68 | + close(fd); 69 | + 70 | + p1_freq[lcore_id] = atoi(buffer); 71 | + return p1_freq[lcore_id]; 72 | + 73 | + 74 | +} 75 | + 76 | +static int 77 | +try_read_scaling_max_freq(unsigned int lcore_id) 78 | +{ 79 | + char path[PATH_MAX]; 80 | + int freq; 81 | + int fd; 82 | + 83 | + /* 84 | + * If the driver is acpi_cpufreq, we can read the scaling_max_freq file 85 | + */ 86 | + snprintf(path, sizeof(path), POWER_SYSFS_SCALING_DRIVER_PATH, rte_lcore_to_cpu_id(lcore_id)); 87 | + fd = open(path, O_RDONLY); 88 | + if (fd == -1) { 89 | + return -1; 90 | + } 91 | + char buffer[16]; 92 | + ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0); 93 | + if (bytesRead == -1) { 94 | + close(fd); 95 | + return -1; 96 | + } 97 | + buffer[bytesRead] = '\0'; // Null-terminate the buffer 98 | + 99 | + close(fd); 100 | + 101 | + if (strncmp(buffer, "acpi-cpufreq", 12) == 0) { 102 | + /* we can use the scaling_max_freq to get the p1 */ 103 | + snprintf(path, sizeof(path), POWER_SYSFS_SCALING_MAX_FREQ_PATH, rte_lcore_to_cpu_id(lcore_id)); 104 | + fd = open(path, O_RDONLY); 105 | + if (fd == -1) { 106 | + return -1; 107 | + } 108 | + ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0); 109 | + if (bytesRead == -1) { 110 | + close(fd); 111 | + return -1; 112 | + } 113 | + buffer[bytesRead] = '\0'; // Null-terminate the buffer 114 | + close(fd); 115 | + freq = atoi(buffer) / 1000; /* convert to KHz */ 116 | + 117 | + /* 118 | + * If the freq value ends with '1', then, turbo is enabled. 119 | + * Round it down to the nearest 100. Otherwuse use the value. 120 | + */ 121 | + return (freq & ~1) * 1000; /* convert to Hz */ 122 | + } 123 | + return -1; 124 | +} 125 | + 126 | +static int 127 | +try_read_msr(unsigned int lcore_id) 128 | +{ 129 | + char path[PATH_MAX]; 130 | + int fd; 131 | + int freq; 132 | + uint64_t data; 133 | + 134 | + /* 135 | + * If the msr driver is present, we can read p1 from MSR_PLATFORM_INFO register 136 | + */ 137 | + snprintf(path, sizeof(path), POWER_SYSFS_MSR_PATH, rte_lcore_to_cpu_id(lcore_id)); 138 | + fd = open(path, O_RDONLY); 139 | + if (fd < 0) { 140 | + return -1; 141 | + } 142 | + 143 | + if (pread(fd, &data, sizeof(data), MSR_PLATFORM_INFO) != sizeof(data)) { 144 | + close(fd); 145 | + return -1; 146 | + } 147 | + 148 | + close(fd); 149 | + 150 | + freq = ((data >> 8) & 0xff) * 100 * 1000; 151 | + 152 | + return freq; 153 | +} 154 | + 155 | + 156 | +static 157 | +int read_sysfs_p1_freq(unsigned int lcore_id) { 158 | + int freq; 159 | + 160 | + /* We've previously got the p1 frequency. */ 161 | + if (p1_freq[lcore_id] != 0) 162 | + return p1_freq[lcore_id]; 163 | + 164 | + /* 165 | + * Check the base_frequency file, if it's there 166 | + */ 167 | + freq = try_read_base_frequency(lcore_id); 168 | + if (freq != -1) { 169 | + p1_freq[lcore_id] = freq; 170 | + return freq; 171 | + } 172 | + 173 | + /* 174 | + * Check the scaling_max_freq file for the acpi-freq driver 175 | + */ 176 | + freq = try_read_scaling_max_freq(lcore_id); 177 | + if (freq != -1) { 178 | + p1_freq[lcore_id] = freq; 179 | + return freq; 180 | + } 181 | + 182 | + /* 183 | + * Try reading from the MSR register 184 | + */ 185 | + freq = try_read_msr(lcore_id); 186 | + if (freq != -1) { 187 | + p1_freq[lcore_id] = freq; 188 | + return freq; 189 | + } 190 | + 191 | + RTE_LOG(ERR, EAL, "Capacity telemetry for lcore %d not supported: no p1 frequency found", 192 | + lcore_id); 193 | + 194 | + return -1; 195 | +} 196 | + 197 | + 198 | +int current_fds[RTE_MAX_LCORE] = {0}; 199 | + 200 | +static 201 | +int read_sysfs_cur_freq(unsigned int lcore_id) { 202 | + char path[PATH_MAX]; 203 | + 204 | + if (current_fds[lcore_id] == 0) { 205 | + snprintf(path, sizeof(path), POWER_SYSFS_CUR_PATH, rte_lcore_to_cpu_id(lcore_id)); 206 | + current_fds[lcore_id] = open(path, O_RDONLY); 207 | + if (current_fds[lcore_id] == -1) { 208 | + return -1; 209 | + } 210 | + } 211 | + 212 | + char buffer[16]; 213 | + ssize_t bytesRead = pread(current_fds[lcore_id], buffer, sizeof(buffer) - 1, 0); 214 | + if (bytesRead == -1) { 215 | + return -1; 216 | + } 217 | + 218 | + buffer[bytesRead] = '\0'; // Null-terminate the buffer 219 | + 220 | + int value = atoi(buffer); 221 | + return value; 222 | +} 223 | + 224 | /* Helper function to check if the lcore is enabled. 225 | * Cannot use rte_lcore_is_enabled since it only catches ROLE_RTE threads which 226 | * does not include ROLE_NON_EAL threads which some application threads, for 227 | @@ -102,6 +287,33 @@ int rte_lcore_busyness(unsigned int lcore_id) 228 | return telemetry_data[lcore_id].busyness; 229 | } 230 | 231 | +int rte_lcore_capacity(unsigned int lcore_id) 232 | +{ 233 | + const uint64_t active_thresh = RTE_LCORE_BUSYNESS_PERIOD * 1000; 234 | + struct lcore_telemetry *tdata; 235 | + 236 | + if (lcore_id >= RTE_MAX_LCORE) 237 | + return -EINVAL; 238 | + tdata = &telemetry_data[lcore_id]; 239 | + 240 | + /* if the lcore is not active */ 241 | + if (tdata->interval_ts == 0) 242 | + return LCORE_BUSYNESS_NOT_SET; 243 | + /* if the core hasn't been active in a while */ 244 | + else if ((rte_rdtsc() - tdata->interval_ts) > active_thresh) 245 | + return LCORE_BUSYNESS_NOT_SET; 246 | + 247 | + int cur_freq = read_sysfs_cur_freq(rte_lcore_to_cpu_id(lcore_id)); 248 | + int busy = telemetry_data[lcore_id].busyness; 249 | + int p1 = read_sysfs_p1_freq(lcore_id) ; 250 | + 251 | + if ((busy == -1) || (p1 <= 0)) { 252 | + return -1; 253 | + } else { 254 | + return busy * cur_freq / p1; 255 | + } 256 | +} 257 | + 258 | int rte_lcore_busyness_enabled(void) 259 | { 260 | return __rte_lcore_telemetry_enabled; 261 | @@ -263,6 +475,26 @@ lcore_handle_busyness(const char *cmd __rte_unused, 262 | return 0; 263 | } 264 | 265 | +static int 266 | +lcore_handle_capacity(const char *cmd __rte_unused, 267 | + const char *params __rte_unused, struct rte_tel_data *d) 268 | +{ 269 | + char corenum[64]; 270 | + int i; 271 | + 272 | + rte_tel_data_start_dict(d); 273 | + 274 | + /* Foreach lcore - can't use macro since it excludes ROLE_NON_EAL */ 275 | + for (i = 0; i < RTE_MAX_LCORE; i++) { 276 | + if (!lcore_enabled(i)) 277 | + continue; 278 | + snprintf(corenum, sizeof(corenum), "%d", i); 279 | + rte_tel_data_add_dict_int(d, corenum, rte_lcore_capacity(i)); 280 | + } 281 | + 282 | + return 0; 283 | +} 284 | + 285 | static int 286 | lcore_handle_cpuset(const char *cmd __rte_unused, 287 | const char *params __rte_unused, 288 | @@ -326,6 +558,9 @@ RTE_INIT(lcore_init_telemetry) 289 | rte_telemetry_register_cmd("/eal/lcore/busyness", lcore_handle_busyness, 290 | "return percentage busyness of cores"); 291 | 292 | + rte_telemetry_register_cmd("/eal/lcore/capacity_used", lcore_handle_capacity, 293 | + "return percentage capacity of cores"); 294 | + 295 | rte_telemetry_register_cmd("/eal/lcore/busyness_enable", lcore_busyness_enable, 296 | "enable lcore busyness measurement"); 297 | 298 | @@ -340,6 +575,11 @@ RTE_INIT(lcore_init_telemetry) 299 | 300 | #else 301 | 302 | +int rte_lcore_capacity(unsigned int lcore_id __rte_unused) 303 | +{ 304 | + return -ENOTSUP; 305 | +} 306 | + 307 | int rte_lcore_busyness(unsigned int lcore_id __rte_unused) 308 | { 309 | return -ENOTSUP; 310 | diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h 311 | index 9f4bd6e22f..132cdb9139 100644 312 | --- a/lib/eal/include/rte_lcore.h 313 | +++ b/lib/eal/include/rte_lcore.h 314 | @@ -437,6 +437,27 @@ __rte_experimental 315 | int 316 | rte_lcore_busyness(unsigned int lcore_id); 317 | 318 | +/** 319 | + * @warning 320 | + * @b EXPERIMENTAL: this API may change without prior notice. 321 | + * 322 | + * Read capacity value corresponding to an lcore. 323 | + * This differs from busyness in that it is related to the current usage 324 | + * of the lcore compared to P1 frequency, not the current frequency. 325 | + * 326 | + * @param lcore_id 327 | + * Lcore to read capacity value for. 328 | + * @return 329 | + * - value between 0 and 100 on success 330 | + * - -1 if lcore is not active 331 | + * - -EINVAL if lcore is invalid 332 | + * - -ENOMEM if not enough memory available 333 | + * - -ENOTSUP if not supported 334 | + */ 335 | +__rte_experimental 336 | +int 337 | +rte_lcore_capacity(unsigned int lcore_id); 338 | + 339 | /** 340 | * @warning 341 | * @b EXPERIMENTAL: this API may change without prior notice. 342 | diff --git a/lib/eal/version.map b/lib/eal/version.map 343 | index 7791f59314..5bb8429b29 100644 344 | --- a/lib/eal/version.map 345 | +++ b/lib/eal/version.map 346 | @@ -444,6 +444,7 @@ EXPERIMENTAL { 347 | # Added for busyness telemetry 348 | __rte_lcore_telemetry_timestamp; 349 | __rte_lcore_telemetry_enabled; 350 | + rte_lcore_capacity; 351 | rte_lcore_busyness; 352 | rte_lcore_busyness_enabled; 353 | rte_lcore_busyness_enabled_set; 354 | -- 355 | 2.25.1 356 | 357 | -------------------------------------------------------------------------------- /ipm/patches/dpdk/23.11/0002-eal-add-cpuset-lcore-telemetry-entries.patch: -------------------------------------------------------------------------------- 1 | From f689846f602caddf6a0f6c013c3dbb6f0974dec2 Mon Sep 17 00:00:00 2001 2 | From: Hoang Nguyen 3 | Date: Thu, 1 Aug 2024 16:11:56 +0000 4 | Subject: [PATCH 2/3] eal: add cpuset lcore telemetry entries 5 | 6 | Expose per-lcore cpuset information to telemetry. 7 | 8 | Signed-off-by: Anatoly Burakov 9 | --- 10 | lib/eal/common/eal_common_lcore_telemetry.c | 46 +++++++++++++++++++++ 11 | 1 file changed, 46 insertions(+) 12 | 13 | diff --git a/lib/eal/common/eal_common_lcore_telemetry.c b/lib/eal/common/eal_common_lcore_telemetry.c 14 | index 1478e5a48a..f01ccd9a65 100644 15 | --- a/lib/eal/common/eal_common_lcore_telemetry.c 16 | +++ b/lib/eal/common/eal_common_lcore_telemetry.c 17 | @@ -263,6 +263,49 @@ lcore_handle_busyness(const char *cmd __rte_unused, 18 | return 0; 19 | } 20 | 21 | +static int 22 | +lcore_handle_cpuset(const char *cmd __rte_unused, 23 | + const char *params __rte_unused, 24 | + struct rte_tel_data *d) 25 | +{ 26 | + char corenum[64]; 27 | + int i; 28 | + 29 | + rte_tel_data_start_dict(d); 30 | + 31 | + /* Foreach lcore - can't use macro since it excludes ROLE_NON_EAL */ 32 | + for (i = 0; i < RTE_MAX_LCORE; i++) { 33 | + const struct lcore_config *cfg = &lcore_config[i]; 34 | + const rte_cpuset_t *cpuset = &cfg->cpuset; 35 | + struct rte_tel_data *ld; 36 | + unsigned int cpu; 37 | + 38 | + if (!lcore_enabled(i)) 39 | + continue; 40 | + 41 | + /* create an array of integers */ 42 | + ld = rte_tel_data_alloc(); 43 | + if (ld == NULL) 44 | + return -ENOMEM; 45 | + rte_tel_data_start_array(ld, RTE_TEL_INT_VAL); 46 | + 47 | + /* add cpu ID's from cpuset to the array */ 48 | + for (cpu = 0; cpu < CPU_SETSIZE; cpu++) { 49 | + if (!CPU_ISSET(cpu, cpuset)) 50 | + continue; 51 | + rte_tel_data_add_array_int(ld, cpu); 52 | + } 53 | + 54 | + /* add array to the per-lcore container */ 55 | + snprintf(corenum, sizeof(corenum), "%d", i); 56 | + 57 | + /* tell telemetry library to free this array automatically */ 58 | + rte_tel_data_add_dict_container(d, corenum, ld, 0); 59 | + } 60 | + 61 | + return 0; 62 | +} 63 | + 64 | void 65 | eal_lcore_telemetry_free(void) 66 | { 67 | @@ -289,6 +332,9 @@ RTE_INIT(lcore_init_telemetry) 68 | rte_telemetry_register_cmd("/eal/lcore/busyness_disable", lcore_busyness_disable, 69 | "disable lcore busyness measurement"); 70 | 71 | + rte_telemetry_register_cmd("/eal/lcore/cpuset", lcore_handle_cpuset, 72 | + "list physical core affinity for each lcore"); 73 | + 74 | __rte_lcore_telemetry_enabled = true; 75 | } 76 | 77 | -- 78 | 2.25.1 79 | 80 | -------------------------------------------------------------------------------- /ipm/patches/dpdk/23.11/0003-add-capacity-endpoint-to-telemetry-thread.patch: -------------------------------------------------------------------------------- 1 | From 2a8e1d477157e299f02fb9e64aa5d197d2caee16 Mon Sep 17 00:00:00 2001 2 | From: David Hunt 3 | Date: Fri, 20 Sep 2024 09:11:45 +0100 4 | Subject: [PATCH 3/3] add capacity endpoint to telemetry thread 5 | 6 | Busyness is calculated on how busy the current core is, ignoring the 7 | current frequency. So a core that's 50% busy at P1 (e.g. 2GHz), shows 8 | as 100% busy at 1GHz. 9 | 10 | This patch adds a new 'capacity' metric that shows a percentage based on 11 | the P1 (base) freqency of the core, so that if the core is 50% busy at 12 | P1, it should show 50% regardless of what the current frequency is. 13 | 14 | Signed-off-by: David Hunt 15 | --- 16 | lib/eal/common/eal_common_lcore_telemetry.c | 241 ++++++++++++++++++++ 17 | lib/eal/include/rte_lcore.h | 21 ++ 18 | lib/eal/version.map | 1 + 19 | 3 files changed, 263 insertions(+) 20 | 21 | diff --git a/lib/eal/common/eal_common_lcore_telemetry.c b/lib/eal/common/eal_common_lcore_telemetry.c 22 | index f01ccd9a65..1c6d085a55 100644 23 | --- a/lib/eal/common/eal_common_lcore_telemetry.c 24 | +++ b/lib/eal/common/eal_common_lcore_telemetry.c 25 | @@ -10,9 +10,18 @@ 26 | #include 27 | #include 28 | #include 29 | +#include 30 | +#include 31 | +#include 32 | 33 | #ifdef RTE_LCORE_BUSYNESS 34 | #include 35 | +#define MSR_PLATFORM_INFO 0xCE 36 | +#define POWER_SYSFS_CUR_PATH "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq" 37 | +#define POWER_SYSFS_BASE_FREQ_PATH "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency" 38 | +#define POWER_SYSFS_SCALING_DRIVER_PATH "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_driver" 39 | +#define POWER_SYSFS_SCALING_MAX_FREQ_PATH "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_max_freq" 40 | +#define POWER_SYSFS_MSR_PATH "/dev/cpu/%u/msr" 41 | #endif 42 | 43 | int __rte_lcore_telemetry_enabled; 44 | @@ -47,6 +56,183 @@ static struct lcore_telemetry *telemetry_data; 45 | #define SMOOTH_COEFF 5 46 | #define STATE_CHANGE_OPT 32 47 | 48 | +static int p1_freq[RTE_MAX_LCORE] = {0}; 49 | + 50 | +static int 51 | +try_read_base_frequency(unsigned int lcore_id) 52 | +{ 53 | + char path[PATH_MAX]; 54 | + int fd; 55 | + snprintf(path, sizeof(path), POWER_SYSFS_BASE_FREQ_PATH, rte_lcore_to_cpu_id(lcore_id)); 56 | + 57 | + fd = open(path, O_RDONLY); 58 | + if (fd == -1) { 59 | + return -1; 60 | + } 61 | + char buffer[16]; 62 | + ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0); 63 | + if (bytesRead == -1) { 64 | + close(fd); 65 | + return -1; 66 | + } 67 | + buffer[bytesRead] = '\0'; // Null-terminate the buffer 68 | + close(fd); 69 | + 70 | + p1_freq[lcore_id] = atoi(buffer); 71 | + return p1_freq[lcore_id]; 72 | + 73 | + 74 | +} 75 | + 76 | +static int 77 | +try_read_scaling_max_freq(unsigned int lcore_id) 78 | +{ 79 | + char path[PATH_MAX]; 80 | + int freq; 81 | + int fd; 82 | + 83 | + /* 84 | + * If the driver is acpi_cpufreq, we can read the scaling_max_freq file 85 | + */ 86 | + 87 | + snprintf(path, sizeof(path), POWER_SYSFS_SCALING_DRIVER_PATH, rte_lcore_to_cpu_id(lcore_id)); 88 | + fd = open(path, O_RDONLY); 89 | + if (fd == -1) { 90 | + return -1; 91 | + } 92 | + char buffer[16]; 93 | + ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0); 94 | + if (bytesRead == -1) { 95 | + close(fd); 96 | + return -1; 97 | + } 98 | + buffer[bytesRead] = '\0'; // Null-terminate the buffer 99 | + 100 | + close(fd); 101 | + 102 | + if (strncmp(buffer, "acpi-cpufreq", 12) == 0) { 103 | + /* we can use the scaling_max_freq to get the p1 */ 104 | + snprintf(path, sizeof(path), POWER_SYSFS_SCALING_MAX_FREQ_PATH, rte_lcore_to_cpu_id(lcore_id)); 105 | + fd = open(path, O_RDONLY); 106 | + if (fd == -1) { 107 | + return -1; 108 | + } 109 | + ssize_t bytesRead = pread(fd, buffer, sizeof(buffer) - 1, 0); 110 | + if (bytesRead == -1) { 111 | + close(fd); 112 | + return -1; 113 | + } 114 | + buffer[bytesRead] = '\0'; // Null-terminate the buffer 115 | + close(fd); 116 | + freq = atoi(buffer) / 1000; /* convert to KHz */ 117 | + 118 | + /* 119 | + * If the freq value ends with '1', then, turbo is enabled. 120 | + * Round it down to the nearest 100. Otherwuse use the value. 121 | + */ 122 | + return (freq & ~1) * 1000; /* convert to Hz */ 123 | + } 124 | + return -1; 125 | +} 126 | + 127 | +static int 128 | +try_read_msr(unsigned int lcore_id) 129 | +{ 130 | + char path[PATH_MAX]; 131 | + int fd; 132 | + int freq; 133 | + uint64_t data; 134 | + 135 | + /* 136 | + * If the msr driver is present, we can read p1 from MSR_PLATFORM_INFO register 137 | + */ 138 | + snprintf(path, sizeof(path), POWER_SYSFS_MSR_PATH, rte_lcore_to_cpu_id(lcore_id)); 139 | + fd = open(path, O_RDONLY); 140 | + if (fd < 0) { 141 | + return -1; 142 | + } 143 | + 144 | + if (pread(fd, &data, sizeof(data), MSR_PLATFORM_INFO) != sizeof(data)) { 145 | + close(fd); 146 | + return -1; 147 | + } 148 | + 149 | + close(fd); 150 | + 151 | + freq = ((data >> 8) & 0xff) * 100 * 1000; 152 | + 153 | + return freq; 154 | +} 155 | + 156 | + 157 | +static 158 | +int read_sysfs_p1_freq(unsigned int lcore_id) { 159 | + int freq; 160 | + 161 | + /* We've previously got the p1 frequency. */ 162 | + if (p1_freq[lcore_id] != 0) 163 | + return p1_freq[lcore_id]; 164 | + 165 | + /* 166 | + * Check the base_frequency file, if it's there 167 | + */ 168 | + freq = try_read_base_frequency(lcore_id); 169 | + if (freq != -1) { 170 | + p1_freq[lcore_id] = freq; 171 | + return freq; 172 | + } 173 | + 174 | + /* 175 | + * Check the scaling_max_freq file for the acpi-freq driver 176 | + */ 177 | + freq = try_read_scaling_max_freq(lcore_id); 178 | + if (freq != -1) { 179 | + p1_freq[lcore_id] = freq; 180 | + return freq; 181 | + } 182 | + 183 | + /* 184 | + * Try reading from the MSR register 185 | + */ 186 | + freq = try_read_msr(lcore_id); 187 | + if (freq != -1) { 188 | + p1_freq[lcore_id] = freq; 189 | + return freq; 190 | + } 191 | + 192 | + RTE_LOG(ERR, EAL, "Capacity telemetry for lcore %d not supported: no p1 frequency found", 193 | + lcore_id); 194 | + 195 | + return -1; 196 | +} 197 | + 198 | + 199 | +int current_fds[RTE_MAX_LCORE] = {0}; 200 | + 201 | +static 202 | +int read_sysfs_cur_freq(unsigned int lcore_id) { 203 | + char path[PATH_MAX]; 204 | + 205 | + if (current_fds[lcore_id] == 0) { 206 | + snprintf(path, sizeof(path), POWER_SYSFS_CUR_PATH, rte_lcore_to_cpu_id(lcore_id)); 207 | + current_fds[lcore_id] = open(path, O_RDONLY); 208 | + if (current_fds[lcore_id] == -1) { 209 | + return -1; 210 | + } 211 | + } 212 | + 213 | + char buffer[16]; 214 | + ssize_t bytesRead = pread(current_fds[lcore_id], buffer, sizeof(buffer) - 1, 0); 215 | + if (bytesRead == -1) { 216 | + return -1; 217 | + } 218 | + 219 | + buffer[bytesRead] = '\0'; // Null-terminate the buffer 220 | + 221 | + int value = atoi(buffer); 222 | + return value; 223 | +} 224 | + 225 | /* Helper function to check if the lcore is enabled. 226 | * Cannot use rte_lcore_is_enabled since it only catches ROLE_RTE threads which 227 | * does not include ROLE_NON_EAL threads which some application threads, for 228 | @@ -102,6 +288,33 @@ int rte_lcore_busyness(unsigned int lcore_id) 229 | return telemetry_data[lcore_id].busyness; 230 | } 231 | 232 | +int rte_lcore_capacity(unsigned int lcore_id) 233 | +{ 234 | + const uint64_t active_thresh = RTE_LCORE_BUSYNESS_PERIOD * 1000; 235 | + struct lcore_telemetry *tdata; 236 | + 237 | + if (lcore_id >= RTE_MAX_LCORE) 238 | + return -EINVAL; 239 | + tdata = &telemetry_data[lcore_id]; 240 | + 241 | + /* if the lcore is not active */ 242 | + if (tdata->interval_ts == 0) 243 | + return LCORE_BUSYNESS_NOT_SET; 244 | + /* if the core hasn't been active in a while */ 245 | + else if ((rte_rdtsc() - tdata->interval_ts) > active_thresh) 246 | + return LCORE_BUSYNESS_NOT_SET; 247 | + 248 | + int cur_freq = read_sysfs_cur_freq(rte_lcore_to_cpu_id(lcore_id)); 249 | + int busy = telemetry_data[lcore_id].busyness; 250 | + int p1 = read_sysfs_p1_freq(lcore_id) ; 251 | + 252 | + if ((busy == -1) || (p1 <= 0)) { 253 | + return -1; 254 | + } else { 255 | + return busy * cur_freq / p1; 256 | + } 257 | +} 258 | + 259 | int rte_lcore_busyness_enabled(void) 260 | { 261 | return __rte_lcore_telemetry_enabled; 262 | @@ -263,6 +476,26 @@ lcore_handle_busyness(const char *cmd __rte_unused, 263 | return 0; 264 | } 265 | 266 | +static int 267 | +lcore_handle_capacity(const char *cmd __rte_unused, 268 | + const char *params __rte_unused, struct rte_tel_data *d) 269 | +{ 270 | + char corenum[64]; 271 | + int i; 272 | + 273 | + rte_tel_data_start_dict(d); 274 | + 275 | + /* Foreach lcore - can't use macro since it excludes ROLE_NON_EAL */ 276 | + for (i = 0; i < RTE_MAX_LCORE; i++) { 277 | + if (!lcore_enabled(i)) 278 | + continue; 279 | + snprintf(corenum, sizeof(corenum), "%d", i); 280 | + rte_tel_data_add_dict_int(d, corenum, rte_lcore_capacity(i)); 281 | + } 282 | + 283 | + return 0; 284 | +} 285 | + 286 | static int 287 | lcore_handle_cpuset(const char *cmd __rte_unused, 288 | const char *params __rte_unused, 289 | @@ -326,6 +559,9 @@ RTE_INIT(lcore_init_telemetry) 290 | rte_telemetry_register_cmd("/eal/lcore/busyness", lcore_handle_busyness, 291 | "return percentage busyness of cores"); 292 | 293 | + rte_telemetry_register_cmd("/eal/lcore/capacity_used", lcore_handle_capacity, 294 | + "return percentage capacity of cores"); 295 | + 296 | rte_telemetry_register_cmd("/eal/lcore/busyness_enable", lcore_busyness_enable, 297 | "enable lcore busyness measurement"); 298 | 299 | @@ -340,6 +576,11 @@ RTE_INIT(lcore_init_telemetry) 300 | 301 | #else 302 | 303 | +int rte_lcore_capacity(unsigned int lcore_id __rte_unused) 304 | +{ 305 | + return -ENOTSUP; 306 | +} 307 | + 308 | int rte_lcore_busyness(unsigned int lcore_id __rte_unused) 309 | { 310 | return -ENOTSUP; 311 | diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h 312 | index 3c64774bcb..dffb7d1ab5 100644 313 | --- a/lib/eal/include/rte_lcore.h 314 | +++ b/lib/eal/include/rte_lcore.h 315 | @@ -426,6 +426,27 @@ __rte_experimental 316 | int 317 | rte_lcore_busyness(unsigned int lcore_id); 318 | 319 | +/** 320 | + * @warning 321 | + * @b EXPERIMENTAL: this API may change without prior notice. 322 | + * 323 | + * Read capacity value corresponding to an lcore. 324 | + * This differs from busyness in that it is related to the current usage 325 | + * of the lcore compared to P1 frequency, not the current frequency. 326 | + * 327 | + * @param lcore_id 328 | + * Lcore to read capacity value for. 329 | + * @return 330 | + * - value between 0 and 100 on success 331 | + * - -1 if lcore is not active 332 | + * - -EINVAL if lcore is invalid 333 | + * - -ENOMEM if not enough memory available 334 | + * - -ENOTSUP if not supported 335 | + */ 336 | +__rte_experimental 337 | +int 338 | +rte_lcore_capacity(unsigned int lcore_id); 339 | + 340 | /** 341 | * @warning 342 | * @b EXPERIMENTAL: this API may change without prior notice. 343 | diff --git a/lib/eal/version.map b/lib/eal/version.map 344 | index a4451d58eb..a2a3ba045f 100644 345 | --- a/lib/eal/version.map 346 | +++ b/lib/eal/version.map 347 | @@ -440,6 +440,7 @@ EXPERIMENTAL { 348 | # added in 20.11 349 | __rte_lcore_telemetry_timestamp; 350 | __rte_lcore_telemetry_enabled; 351 | + rte_lcore_capacity; 352 | rte_lcore_busyness; 353 | rte_lcore_busyness_enabled; 354 | rte_lcore_busyness_enabled_set; 355 | -- 356 | 2.25.1 357 | 358 | -------------------------------------------------------------------------------- /ipm/patches/dpdk/README.md: -------------------------------------------------------------------------------- 1 | # DPDK Patches 2 | 3 | Apply the patches using ```git am {patch}.patch```. 4 | 1. ```20.11 directory``` are a set of patches that add the busyness telemetry to DPDK 20.11.9 5 | 2. ```21.11 directory``` are a set of patches that add the busyness telemetry to DPDK 21.11.8 6 | 3. ```22.11 directory``` are a set of patches that add the busyness telemetry to DPDK 22.11.6 7 | 4. ```23.11 directory``` are a set of patches that add the busyness telemetry to DPDK 23.11.2 8 | -------------------------------------------------------------------------------- /ipm/patches/vpp/20.09/0001-Subject-PATCH-1-3-vlib-CPU-load-measurement-and-CLI.patch: -------------------------------------------------------------------------------- 1 | From f2539b2ceabd50939fd3fcc4e1c2afd090c36e23 Mon Sep 17 00:00:00 2001 2 | From: Katelyn Donnellan 3 | Date: Fri, 24 Mar 2023 13:32:00 +0000 4 | Subject: [PATCH 1/3] Subject: [PATCH 1/3] vlib: CPU load measurement and CLI 5 | 6 | The patch calculates CPU load based on number of ticks ellapsed in 7 | processing packets by main/worker thread. 8 | 9 | New CLI command to query CPU load: 10 | `show cpu load` 11 | --- 12 | src/vlib/cli.c | 33 +++++++++++++++++++++++++++++++++ 13 | src/vlib/global_funcs.h | 16 ++++++++++++++++ 14 | src/vlib/main.c | 16 ++++++++++++++++ 15 | src/vlib/main.h | 6 ++++++ 16 | 4 files changed, 71 insertions(+) 17 | 18 | diff --git a/src/vlib/cli.c b/src/vlib/cli.c 19 | index 2697c0ae0..0267f4e58 100644 20 | --- a/src/vlib/cli.c 21 | +++ b/src/vlib/cli.c 22 | @@ -883,6 +883,39 @@ VLIB_CLI_COMMAND (show_memory_usage_command, static) = { 23 | }; 24 | /* *INDENT-ON* */ 25 | 26 | +static clib_error_t * 27 | +show_cpu_load (vlib_main_t * vm, unformat_input_t * input, 28 | + vlib_cli_command_t * cmd) 29 | +{ 30 | + uword i; 31 | + 32 | + vlib_cli_output (vm, "%10s | %10s | %12s", "Thread", "Core", "Load %"); 33 | + 34 | + for (i = 0; i < vlib_get_n_threads (); i++) 35 | + { 36 | + vlib_main_t *vm_i; 37 | + 38 | + vm_i = vlib_get_main_by_index (i); 39 | + if (!vm_i) 40 | + continue; 41 | + 42 | + vlib_cli_output (vm, "%8u | %8u | %8.2f", i, vm_i->cpu_id, 43 | + (f64)vm_i->cpu_load_points / 100.0); 44 | + } 45 | + 46 | + return 0; 47 | +} 48 | + 49 | +/* *INDENT-OFF* */ 50 | +VLIB_CLI_COMMAND (show_cpu_load_command, static) = { 51 | + .path = "show cpu load", 52 | + .short_help = "Show cpu load", 53 | + .function = show_cpu_load, 54 | + .is_mp_safe = 1, 55 | +}; 56 | +/* *INDENT-ON* */ 57 | + 58 | + 59 | static clib_error_t * 60 | show_cpu (vlib_main_t * vm, unformat_input_t * input, 61 | vlib_cli_command_t * cmd) 62 | diff --git a/src/vlib/global_funcs.h b/src/vlib/global_funcs.h 63 | index 9dd01fbfb..b8ad35760 100644 64 | --- a/src/vlib/global_funcs.h 65 | +++ b/src/vlib/global_funcs.h 66 | @@ -19,6 +19,22 @@ 67 | #ifndef included_vlib_global_funcs_h_ 68 | #define included_vlib_global_funcs_h_ 69 | 70 | +always_inline u32 71 | +vlib_get_n_threads () 72 | +{ 73 | + return vec_len (vlib_mains); 74 | +} 75 | + 76 | +always_inline vlib_main_t * 77 | +vlib_get_main_by_index (u32 thread_index) 78 | +{ 79 | + vlib_main_t *vm; 80 | + vm = vlib_mains[thread_index]; 81 | + ASSERT (vm); 82 | + return vm; 83 | +} 84 | + 85 | + 86 | always_inline vlib_main_t * 87 | vlib_get_main (void) 88 | { 89 | diff --git a/src/vlib/main.c b/src/vlib/main.c 90 | index bfe97953a..6af6e1ad8 100644 91 | --- a/src/vlib/main.c 92 | +++ b/src/vlib/main.c 93 | @@ -1209,6 +1209,9 @@ dispatch_node (vlib_main_t * vm, 94 | /* n_vectors */ n, 95 | /* n_clocks */ t - last_time_stamp); 96 | 97 | + if (n) 98 | + vm->cpu_load_clocks += t - last_time_stamp; 99 | + 100 | /* When in interrupt mode and vector rate crosses threshold switch to 101 | polling mode. */ 102 | if (PREDICT_FALSE ((dispatch_state == VLIB_NODE_STATE_INTERRUPT) 103 | @@ -1938,6 +1941,19 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) 104 | /* Record time stamp in case there are no enabled nodes and above 105 | calls do not update time stamp. */ 106 | cpu_time_now = clib_cpu_time_now (); 107 | + /* Time to update cpu load? */ 108 | + if (PREDICT_FALSE (cpu_time_now >= vm->cpu_load_interval_end) ) 109 | + { 110 | + if (vm->cpu_load_interval_start) 111 | + { 112 | + vm->cpu_load_points = (vm->cpu_load_clocks * 1e4) / 113 | + (cpu_time_now - vm->cpu_load_interval_start); 114 | + } 115 | + vm->cpu_load_interval_start = cpu_time_now; 116 | + vm->cpu_load_interval_end = cpu_time_now + 1e9; 117 | + vm->cpu_load_clocks = 0; 118 | + } 119 | + 120 | vm->loops_this_reporting_interval++; 121 | now = clib_time_now_internal (&vm->clib_time, cpu_time_now); 122 | /* Time to update loops_per_second? */ 123 | diff --git a/src/vlib/main.h b/src/vlib/main.h 124 | index 45a521a86..985e0c3e3 100644 125 | --- a/src/vlib/main.h 126 | +++ b/src/vlib/main.h 127 | @@ -132,6 +132,12 @@ typedef struct vlib_main_t 128 | /* Time stamp when main loop was entered (time 0). */ 129 | u64 cpu_time_main_loop_start; 130 | 131 | + /* CPU load measurement */ 132 | + u64 cpu_load_interval_start; 133 | + u64 cpu_load_interval_end; 134 | + u64 cpu_load_clocks; 135 | + u32 cpu_load_points; 136 | + 137 | /* Incremented once for each main loop. */ 138 | volatile u32 main_loop_count; 139 | 140 | -- 141 | 2.17.1 142 | 143 | -------------------------------------------------------------------------------- /ipm/patches/vpp/20.09/0002-Subject-PATCH-2-3-stats-Added-CPU-load-and-queue-bur.patch: -------------------------------------------------------------------------------- 1 | From db46276f403f9a7811042782d78979c978726e82 Mon Sep 17 00:00:00 2001 2 | From: Katelyn Donnellan 3 | Date: Fri, 24 Mar 2023 13:32:52 +0000 4 | Subject: [PATCH 2/3] Subject: [PATCH 2/3] stats: Added CPU load and queue 5 | burst flag in stats 6 | 7 | This patch adds following capabilities: 8 | - flag to indicate when number of packets in DPDK queue cross 9 | configurable queue threshold. 10 | - Stats config parameter to configure interval for CPU load 11 | measurement. 12 | `cpuload-interval