├── .github
    └── dependabot.yml
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── cmd
    ├── ctlplane-agent.go
    └── ctlplane.go
├── docker
    └── Dockerfile
├── go.mod
├── go.sum
├── hack
    └── fuzz_all.sh
├── manifest
    └── ctlplane-daemon.yaml
├── pkg
    ├── agent
    │   ├── agent.go
    │   ├── agent_test.go
    │   ├── resources.go
    │   └── resources_test.go
    ├── cpudaemon
    │   ├── daemon.go
    │   ├── daemon_allocators.go
    │   ├── daemon_allocators_test.go
    │   ├── daemon_cpuset.go
    │   ├── daemon_cpuset_test.go
    │   ├── daemon_fuzz_test.go
    │   ├── daemon_numa_allocator.go
    │   ├── daemon_numa_allocator_test.go
    │   ├── daemon_numa_namespace_allocator.go
    │   ├── daemon_numa_namespace_allocator_test.go
    │   ├── daemon_state.go
    │   ├── daemon_state_test.go
    │   ├── daemon_test.go
    │   ├── static_policy.go
    │   ├── static_policy_test.go
    │   └── testdata
    │   │   ├── daemon_cpuset.state
    │   │   ├── no_state
    │   │       ├── cpuset.cpus
    │   │       ├── cpuset.cpus.effective
    │   │       └── cpuset
    │   │       │   ├── cpuset.cpus
    │   │       │   └── test
    │   │       │       ├── cpuset.cpus
    │   │       │       └── cpuset.mems
    │   │   ├── node_info
    │   │       ├── node0
    │   │       │   ├── cpu1
    │   │       │   │   └── topology
    │   │       │   │   │   ├── core_id
    │   │       │   │   │   ├── die_id
    │   │       │   │   │   └── package_id
    │   │       │   ├── cpu3
    │   │       │   │   └── topology
    │   │       │   │   │   ├── core_id
    │   │       │   │   │   ├── die_id
    │   │       │   │   │   └── package_id
    │   │       │   ├── cpu5
    │   │       │   │   └── topology
    │   │       │   │   │   ├── core_id
    │   │       │   │   │   ├── die_id
    │   │       │   │   │   └── package_id
    │   │       │   └── cpu7
    │   │       │   │   └── topology
    │   │       │   │       ├── core_id
    │   │       │   │       ├── die_id
    │   │       │   │       └── package_id
    │   │       └── node1
    │   │       │   ├── cpu2
    │   │       │       └── topology
    │   │       │       │   ├── core_id
    │   │       │       │   ├── die_id
    │   │       │       │   └── package_id
    │   │       │   ├── cpu4
    │   │       │       └── topology
    │   │       │       │   ├── core_id
    │   │       │       │   ├── die_id
    │   │       │       │   └── package_id
    │   │       │   ├── cpu6
    │   │       │       └── topology
    │   │       │       │   ├── core_id
    │   │       │       │   ├── die_id
    │   │       │       │   └── package_id
    │   │       │   └── cpu8
    │   │       │       └── topology
    │   │       │           ├── core_id
    │   │       │           ├── die_id
    │   │       │           └── package_id
    │   │   └── with_state
    │   │       ├── cpuset.cpus.effective
    │   │       ├── cpuset
    │   │           └── cpuset.cpus
    │   │       └── daemon.state
    ├── ctlplaneapi
    │   ├── controlplane.pb.go
    │   ├── controlplane.proto
    │   ├── controlplane_grpc.pb.go
    │   ├── ctrlplaneapi_test.go
    │   ├── ctrplaneapi_server.go
    │   ├── validation.go
    │   └── validation_test.go
    ├── numautils
    │   ├── dirutils.go
    │   ├── dirutils_test.go
    │   ├── discover.go
    │   ├── discover_test.go
    │   ├── numa.go
    │   ├── numa_test.go
    │   ├── topology.go
    │   └── topology_test.go
    └── utils
    │   ├── fileutils.go
    │   └── fileutils_test.go
└── security.md


/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "gomod" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "daily"
12 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | All notable changes to this project will be documented in this file.
 3 | 
 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 6 | ## 0.1.2[01.06.2023]
 7 | ### Version Update
 8 | - update golang version to 1.20.4
 9 | ## 0.1.1[24.01.2023]
10 | ### Bugfixes
11 | - numa-namespace policies handles correclty more namespaces than numa zones by grouping them 
12 | - report error when runtime agent configuration does not match the system runtime
13 | - correct representation of memory resources
14 | - correct cgroup v1 path validation 
15 | 
16 | ## 0.1 [02.12.2022]
17 | ### Added
18 | - Add support for cgroups v2
19 | - Add support for cgroupfs cgroup driver
20 | - Add exclusive variant of numa-namespace policy. It makes guaranteed pods an exclusive access to cpus.
21 | - Add numa-namespace policy. It enables guaranteed , burstable and best-effort pod insolation in a numa zone based on namespace.
22 | - Add numa policy. It enables single numa policy.
23 | - Add default policy. It enables std. static cpu managment mode without topology-management.
24 | - Add support for Kind cluster for running integration tests
25 | - Use klog logging
26 | - Add support for containerd
27 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, caste, color, religion, or sexual
 10 | identity and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the overall
 26 |   community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or advances of
 31 |   any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email address,
 35 |   without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | CommunityCodeOfConduct AT intel DOT com.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series of
 86 | actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or permanent
 93 | ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior, harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within the
113 | community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.1, available at
119 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
120 | 
121 | Community Impact Guidelines were inspired by
122 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
123 | 
124 | For answers to common questions about this code of conduct, see the FAQ at
125 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
126 | [https://www.contributor-covenant.org/translations][translations].
127 | 
128 | [homepage]: https://www.contributor-covenant.org
129 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
130 | [Mozilla CoC]: https://github.com/mozilla/diversity
131 | [FAQ]: https://www.contributor-covenant.org/faq
132 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | ### License
 4 | 
 5 | CPU Control Plane Plugin for Kubernetes is licensed under the terms in [LICENSE](LICENSE). By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms.
 6 | 
 7 | ### Sign your work
 8 | 
 9 | Please use the sign-off line at the end of the patch. Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch. The rules are pretty simple: if you can certify
10 | the below (from [developercertificate.org](http://developercertificate.org/)):
11 | 
12 | ```
13 | Developer Certificate of Origin
14 | Version 1.1
15 | 
16 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
17 | 660 York Street, Suite 102,
18 | San Francisco, CA 94110 USA
19 | 
20 | Everyone is permitted to copy and distribute verbatim copies of this
21 | license document, but changing it is not allowed.
22 | 
23 | Developer's Certificate of Origin 1.1
24 | 
25 | By making a contribution to this project, I certify that:
26 | 
27 | (a) The contribution was created in whole or in part by me and I
28 |     have the right to submit it under the open source license
29 |     indicated in the file; or
30 | 
31 | (b) The contribution is based upon previous work that, to the best
32 |     of my knowledge, is covered under an appropriate open source
33 |     license and I have the right under that license to submit that
34 |     work with modifications, whether created in whole or in part
35 |     by me, under the same open source license (unless I am
36 |     permitted to submit under a different license), as indicated
37 |     in the file; or
38 | 
39 | (c) The contribution was provided directly to me by some other
40 |     person who certified (a), (b) or (c) and I have not modified
41 |     it.
42 | 
43 | (d) I understand and agree that this project and the contribution
44 |     are public and that a record of the contribution (including all
45 |     personal information I submit with it, including my sign-off) is
46 |     maintained indefinitely and may be redistributed consistent with
47 |     this project or the open source license(s) involved.
48 | ```
49 | 
50 | Then you just add a line to every git commit message:
51 | 
52 |     Signed-off-by: Joe Smith <joe.smith@email.com>
53 | 
54 | Use your real name (sorry, no pseudonyms or anonymous contributions.)
55 | 
56 | If you set your `user.name` and `user.email` git configs, you can sign your
57 | commit automatically with `git commit -s`.
58 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | DOCKER_IMAGE_VERSION=0.1
 2 | 
 3 | msg:
 4 | 	echo "Building resourcemanagement.controlplane"
 5 | 
 6 | proto:
 7 | 	protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative pkg/ctlplaneapi/controlplane.proto
 8 | 
 9 | coverage:
10 | 	go test -count=1 -coverprofile=coverage.out ./...
11 | 	go tool cover -html=coverage.out -o coverage.html
12 | 	
13 | image:
14 | 	docker build -t ctlplane:${DOCKER_IMAGE_VERSION} -f docker/Dockerfile .
15 | 
16 | build:
17 | 	CGO_ENABLED=0 go build -o bin/ctlplane cmd/ctlplane-agent.go cmd/ctlplane.go
18 | 
19 | utest:
20 | 	go test -count=1 -v ./...
21 | 
22 | race:
23 | 	go test -count=1 -race -v ./...
24 | 
25 | itest:
26 | 	go test -count=1 -tags=integration -v ./pkg/integrationtests
27 | 
28 | fuzz:
29 | 	hack/fuzz_all.sh
30 | 
31 | clean:
32 | 	go clean --cache
33 | 
34 | golangci:
35 | 	golangci-lint run ./pkg/...
36 | 
37 | all: msg build
38 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # DISCONTINUATION OF PROJECT  
  2 | This project will no longer be maintained by Intel.  
  3 | Intel has ceased development and contributions including, but not limited to, maintenance, bug fixes, new releases, or updates, to this project.  
  4 | Intel no longer accepts patches to this project.  
  5 | If you have an ongoing need to use this project, are interested in independently developing it, or would like to maintain patches for the open source software community, please create your own fork of this project.  
  6 | 
  7 | # CPU Control Plane Plugin for Kubernetes
  8 | 
  9 | ## Requirements: 
 10 | * Ubuntu 20.04
 11 | * Docker: 20.10.14
 12 | * Containerd with systemd: 1.5.11 or newer 
 13 | * Kubernetes 1.23 or newer
 14 | 
 15 | The CPU Control Plane Plugin is a k8s commponent which enables 
 16 | fine granular control of CPU resources in terms of cpu and/or memory pinning.
 17 | The component consists of two parts:
 18 | * a privileged daemonset responsible for control of the cgroups for a given set of pods and containers
 19 | * a agent responsible for watching pods CRUD events
 20 | 
 21 | The current release supports [different allocation strategies](#policies) for guaranteed, best-effort and burstable containers. 
 22 | 
 23 | ## Installation:  
 24 | 
 25 | To proceed with installation:
 26 | 1. Run `make image` -- this will create a docker image
 27 | 2. Push created image to preffered registry
 28 | 3. Change registry path in `manifest/ctlplane-daemon.yaml`
 29 | 4. Install required components and configurations by invoking `kubectl apply -f manifest/ctlplane-daemon.yaml`
 30 | 
 31 | > **NOTE**: The controlplane component requires admin privileges to function properly. 
 32 | > Those are installed by default for the `ctlplane` 
 33 | 
 34 | ## CPU policies:
 35 | 
 36 | The `allocator` flag currently supports four policies:
 37 | 
 38 | * **default** this policy assings each guaranteed container to exclusive subset of cpus. Cpus are taken sequentially
 39 | (0, 1, 2, ...) from list of available cpus. Guaranteed and best-effort containers are not pinned.
 40 | 
 41 | * **numa** this policy assings each guaranteed container to exclusive subset of cpus with minimal topology distance.
 42 | Burstable and best-effort containers are not pinned.
 43 | 
 44 | * **numa-namespace:<number-of-namespaces>** this policy will isolate each namespace in separate NUMA zones.
 45 | It is required that the system supports a sufficient number of NUMA zones to assign separate zones to 
 46 | each namespace. Guaranteed container's cpus are shared with burstable and best-effort containers, but not
 47 | with other guaranteed containers.
 48 | 
 49 | * **numa-namespace-exclusive:<number-of-namespaces>** same as numa-namespace, except it assigns excusive cpus
 50 | to Guaranteed pods (they are not shared with burstable and best-effort containers)
 51 | 
 52 | 
 53 | ## Configuration options:
 54 | 
 55 | ### CPU policy:
 56 | The policies can be switched inside the `ctlplane-daemon.yaml` in the `ctlplane-daemonset` container my modifying `allocator` flag: 
 57 | 
 58 | ```
 59 | name: ctlplane-daemonset
 60 | (...)
 61 | args: [(...), "-allocator", "numa-namespace=2"]
 62 | ```
 63 | 
 64 | This configuration will use **numa-namespace** with 2 namespaces supported at a given time.
 65 | 
 66 | 
 67 | ### Memory pinning:
 68 | User can enable memory pinning when using NUMA-aware allocators. This can be done by invoking ctlplane daemon with `-mem` option
 69 | ```
 70 | name: ctlplane-daemonset
 71 | (...)
 72 | args: [(...), "-allocator", "numa-namespace=2", "-mem"]
 73 | ```
 74 | 
 75 | ### CGroup driver:
 76 | User can select which cgroup driver is used by the cluster. This can be done by invoking ctlplane daemon with `-cgroup-driver DRIVER` option, where `DRIVER` can be either `systemd` or `cgroupfs`. `systemd` is default option if not present.
 77 | ```
 78 | name: ctlplane-daemonset
 79 | (...)
 80 | args: [(...), "-cgroup-driver", "cgroupfs"]
 81 | ```
 82 | 
 83 | ### Container runtime:
 84 | User can select which container runtime is used by the cluster. This can by done by invoking ctlplane daemon with `-runtime RUNTIME` option, where `RUNTIME`  can be either `containerd`, `docker`. Additionaly we support `kind`, as container runtime to be used when kind is used to setup cluster.
 85 | ```
 86 | name: ctlplane-daemonset
 87 | (...)
 88 | args: [(...), "-runtime", "containerd"]
 89 | ```
 90 | 
 91 | 
 92 | ### Agent namespace filter:
 93 | The agent can be configured to listen only to CRUD events inside namespaces with given prefix. This can be configured inside `ctlplane-daemon.yaml` in the `ctlplane-agent` container.
 94 | 
 95 | ```
 96 | name: ctlplane-agent
 97 | (...)
 98 | args: [(...) -namespace-prefix", "test-"]
 99 | ```
100 | 
101 | ### Other options
102 | 
103 | | Parameter | Possible values | Description | Used by |
104 | | - | - | - | - |
105 | | `-dport` | 0..65353 | Port used by the daemon gRPC server | daemon & agent |
106 | | `-cpath` | string | path to cgroups main directory, usually /sys/fs/cgroup | daemon |
107 | | `-npath` | string | path to sysfs node info, usually /sys/devices/system/node | daemon |
108 | | `-spath` | string | path to daemon state file | daemon |
109 | | `-agent-host` | string | hostname used by the agent, if environment variable `NODE_NAME` is set, this option is overriten | agent |
110 | 
111 | ## How to invoke unit tests
112 | 
113 | 1. Invoke `make utest`
114 | 
115 | ## How to invoke integration tests
116 | 
117 | 1. Deploy CPU control plane **daemon** with `numa-namespace-exclusive=2` allocator, and **agent** with `-namespace-prefix test-`
118 | 2. Invoke `make itest`
119 | 


--------------------------------------------------------------------------------
/cmd/ctlplane-agent.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"os"
 7 | 	"os/signal"
 8 | 
 9 | 	"github.com/go-logr/logr"
10 | 	"google.golang.org/grpc"
11 | 	"google.golang.org/grpc/credentials/insecure"
12 | 	"k8s.io/client-go/kubernetes"
13 | 	"k8s.io/client-go/rest"
14 | 	"k8s.io/klog/v2"
15 | 	"resourcemanagement.controlplane/pkg/agent"
16 | 	"resourcemanagement.controlplane/pkg/ctlplaneapi"
17 | )
18 | 
19 | func runAgent(daemonPort int, nodeName string, namespacePrefix string, logger logr.Logger) {
20 | 	config, err := rest.InClusterConfig()
21 | 	if err != nil {
22 | 		klog.Fatal(err)
23 | 	}
24 | 	clusterClient, err := kubernetes.NewForConfig(config)
25 | 	if err != nil {
26 | 		klog.Fatal(err)
27 | 	}
28 | 
29 | 	logger.Info("connecting to ctlplane daemon gRPC", "address", "localhost", "port", daemonPort)
30 | 	conn, err := grpc.Dial(fmt.Sprintf("localhost:%d", daemonPort), grpc.WithTransportCredentials(insecure.NewCredentials()))
31 | 	if err != nil {
32 | 		klog.Fatal(err)
33 | 	}
34 | 	defer conn.Close()
35 | 
36 | 	ctlPlaneClient = ctlplaneapi.NewControlPlaneClient(conn)
37 | 	ctx, ctxCancel := context.WithCancel(logr.NewContext(context.Background(), logger))
38 | 	defer ctxCancel()
39 | 
40 | 	agent := agent.NewAgent(ctx, ctlPlaneClient, namespacePrefix)
41 | 	if err := agent.Run(clusterClient, nodeName); err != nil {
42 | 		klog.Fatal(err)
43 | 	}
44 | 
45 | 	signalChan := make(chan os.Signal, 1)
46 | 	signal.Notify(signalChan, os.Interrupt)
47 | 	<-signalChan
48 | }
49 | 


--------------------------------------------------------------------------------
/cmd/ctlplane.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"net"
  8 | 	"os"
  9 | 	"strconv"
 10 | 	"strings"
 11 | 
 12 | 	"github.com/go-logr/logr"
 13 | 	"k8s.io/klog/v2"
 14 | 	"k8s.io/klog/v2/klogr"
 15 | 	"resourcemanagement.controlplane/pkg/ctlplaneapi"
 16 | 	"resourcemanagement.controlplane/pkg/numautils"
 17 | 	"resourcemanagement.controlplane/pkg/utils"
 18 | 
 19 | 	"resourcemanagement.controlplane/pkg/cpudaemon"
 20 | 
 21 | 	"google.golang.org/grpc"
 22 | 	"google.golang.org/grpc/health"
 23 | 	"google.golang.org/grpc/health/grpc_health_v1"
 24 | )
 25 | 
 26 | const defaultDaemonPort = 31000
 27 | 
 28 | var (
 29 | 	ctlPlaneClient ctlplaneapi.ControlPlaneClient
 30 | )
 31 | 
 32 | type ctlParameters struct {
 33 | 	daemonPort      int         // ctlplane daemon port
 34 | 	memoryPinning   bool        // also do memory pinning
 35 | 	runtime         string      // container runtime
 36 | 	cgroupPath      string      // path to the system cgroup fs
 37 | 	nodeName        string      // agent node name
 38 | 	numaPath        string      // path to the sysfs node info
 39 | 	statePath       string      // path to the state file
 40 | 	allocator       string      // allocator to use
 41 | 	namespacePrefix string      // required namespace prefix
 42 | 	cgroupDriver    string      // either cgroupfs or systemd
 43 | 	logger          logr.Logger // logger
 44 | }
 45 | 
 46 | func readNumberFromCommandOrPanic(cmd, prefix string) int {
 47 | 	numNamespaces, err := strconv.Atoi(cmd[len(prefix)+1:])
 48 | 	if err != nil {
 49 | 		klog.Fatalf("cannot read number of namespaces %s. format is %s=[0-9]+", cmd, prefix)
 50 | 	}
 51 | 	if numNamespaces <= 0 {
 52 | 		klog.Fatalf("number of namespaces must be greater than 0. it is %d", numNamespaces)
 53 | 	}
 54 | 	return numNamespaces
 55 | }
 56 | 
 57 | func getAllocator(args ctlParameters) cpudaemon.Allocator {
 58 | 	cR := parseRuntime(args.runtime)
 59 | 	driver := parseCGroupDriver(args.cgroupDriver)
 60 | 
 61 | 	cgroupController := cpudaemon.NewCgroupController(cR, driver, args.logger)
 62 | 
 63 | 	if args.allocator == "default" {
 64 | 		if args.memoryPinning {
 65 | 			klog.Fatal("option 'use memory pinning' is available only for numa-aware allocators")
 66 | 		}
 67 | 		return cpudaemon.NewDefaultAllocator(cgroupController)
 68 | 	}
 69 | 	if args.allocator == "numa" {
 70 | 		return cpudaemon.NewNumaAwareAllocator(cgroupController, args.memoryPinning)
 71 | 	}
 72 | 	if strings.HasPrefix(args.allocator, "numa-namespace=") {
 73 | 		numNamespaces := readNumberFromCommandOrPanic(args.allocator, "numa-namespace")
 74 | 		return cpudaemon.NewNumaPerNamespaceAllocator(
 75 | 			numNamespaces,
 76 | 			cgroupController,
 77 | 			false,
 78 | 			args.memoryPinning,
 79 | 			args.logger,
 80 | 		)
 81 | 	}
 82 | 	if strings.HasPrefix(args.allocator, "numa-namespace-exclusive=") {
 83 | 		numNamespaces := readNumberFromCommandOrPanic(args.allocator, "numa-namespace-exclusive")
 84 | 		return cpudaemon.NewNumaPerNamespaceAllocator(
 85 | 			numNamespaces,
 86 | 			cgroupController,
 87 | 			true,
 88 | 			args.memoryPinning,
 89 | 			args.logger,
 90 | 		)
 91 | 	}
 92 | 	klog.Fatalf("unknown allocator %s", args.allocator)
 93 | 	return nil
 94 | }
 95 | 
 96 | func parseRuntime(runtime string) cpudaemon.ContainerRuntime {
 97 | 	val, ok := map[string]cpudaemon.ContainerRuntime{
 98 | 		"containerd": cpudaemon.ContainerdRunc,
 99 | 		"kind":       cpudaemon.Kind,
100 | 		"docker":     cpudaemon.Docker,
101 | 	}[runtime]
102 | 	if !ok {
103 | 		klog.Fatalf("unknown runtime %s", runtime)
104 | 	}
105 | 	return val
106 | }
107 | 
108 | func parseCGroupDriver(driver string) cpudaemon.CGroupDriver {
109 | 	val, ok := map[string]cpudaemon.CGroupDriver{
110 | 		"systemd":  cpudaemon.DriverSystemd,
111 | 		"cgroupfs": cpudaemon.DriverCgroupfs,
112 | 	}[driver]
113 | 	if !ok {
114 | 		klog.Fatalf("unknown cgroup driver %s", driver)
115 | 	}
116 | 	return val
117 | }
118 | 
119 | func runDaemon(args ctlParameters) {
120 | 	l, err := net.Listen("tcp", fmt.Sprintf(":%d", args.daemonPort))
121 | 	if err != nil {
122 | 		klog.Fatal(err.Error())
123 | 	}
124 | 
125 | 	srv := grpc.NewServer()
126 | 	allocator := getAllocator(args)
127 | 	policy := cpudaemon.NewStaticPolocy(allocator)
128 | 
129 | 	args.logger.Info(
130 | 		"starting control plane server",
131 | 		"nodeName",
132 | 		args.nodeName,
133 | 		"allocator",
134 | 		args.allocator,
135 | 		"policy",
136 | 		"static",
137 | 	)
138 | 
139 | 	daemon, err := cpudaemon.New(args.cgroupPath, args.numaPath, args.statePath, policy, args.logger)
140 | 	if err != nil {
141 | 		klog.Fatal(err)
142 | 	}
143 | 
144 | 	svc := ctlplaneapi.NewServer(daemon)
145 | 	healthSvc := health.NewServer()
146 | 
147 | 	ctlplaneapi.RegisterControlPlaneServer(srv, svc)
148 | 	grpc_health_v1.RegisterHealthServer(srv, healthSvc) //nolint: nosnakecase
149 | 
150 | 	err = srv.Serve(l)
151 | 	if err != nil {
152 | 		klog.Fatal(err)
153 | 	}
154 | }
155 | 
156 | func runAgentMode(args ctlParameters) {
157 | 	if os.Getenv("NODE_NAME") != "" {
158 | 		args.nodeName = os.Getenv("NODE_NAME")
159 | 	} else if args.nodeName == "" {
160 | 		klog.Fatal("Running in agent mode with unknown agent node name!")
161 | 	}
162 | 	runAgent(args.daemonPort, args.nodeName, args.namespacePrefix, args.logger)
163 | }
164 | 
165 | func createLogger() logr.Logger {
166 | 	flags := flag.NewFlagSet("klog", flag.ContinueOnError)
167 | 	klog.InitFlags(flags)
168 | 	_ = flags.Parse([]string{"-v", "3"})
169 | 	return klogr.NewWithOptions(klogr.WithFormat(klogr.FormatKlog))
170 | }
171 | 
172 | // normalizePath returns absolute path with symlinks evaluated.
173 | func normalizePath(path string, notExistOk bool) string {
174 | 	realPath, err := utils.EvaluateRealPath(path)
175 | 	if err != nil {
176 | 		if notExistOk && errors.Is(err, os.ErrNotExist) { // file does not exist,
177 | 			return path
178 | 		}
179 | 		klog.Fatal(err)
180 | 	}
181 | 	return realPath
182 | }
183 | 
184 | func main() {
185 | 	args := ctlParameters{}
186 | 	agentMode := false
187 | 
188 | 	flag.BoolVar(&agentMode, "a", false, "Run Controlplane agent")
189 | 	flag.BoolVar(
190 | 		&args.memoryPinning,
191 | 		"mem",
192 | 		false,
193 | 		"Pin memory togeter with cpu (valid only for numa-aware allocators)",
194 | 	)
195 | 	flag.IntVar(&args.daemonPort, "dport", defaultDaemonPort, "Specify Control Plane Daemon port")
196 | 	flag.StringVar(
197 | 		&args.allocator,
198 | 		"allocator",
199 | 		"default",
200 | 		"Allocator to use. Available are: default, numa, numa-namespace=NUM_NAMESPACES",
201 | 	)
202 | 	flag.StringVar(&args.cgroupPath, "cpath", "/sys/fs/cgroup/", "Specify Path to cgroupds")
203 | 	flag.StringVar(&args.numaPath, "npath", numautils.LinuxTopologyPath, "Specify Path to sysfs node info")
204 | 	flag.StringVar(&args.statePath, "spath", "daemon.state", "Specify path to state file")
205 | 	flag.StringVar(&args.nodeName, "agent-host", "", "Agent node name")
206 | 	flag.StringVar(&args.namespacePrefix, "namespace-prefix", "", "If set, serves only namespaces with given prefix")
207 | 	flag.StringVar(
208 | 		&args.runtime,
209 | 		"runtime",
210 | 		"containerd",
211 | 		"Container Runtime (Default: containerd, Possible values: containerd, docker, kind)",
212 | 	)
213 | 	flag.StringVar(&args.cgroupDriver, "cgroup-driver", "systemd", "Set cgroup driver used by kubelet. Values: systemd, cgroupfs")
214 | 
215 | 	flag.Parse() // after declaring flags we need to call it
216 | 	args.logger = createLogger()
217 | 
218 | 	defer func() {
219 | 		err := recover()
220 | 		if err != nil {
221 | 			args.logger.Info("Fatal error", "value", err)
222 | 		}
223 | 	}()
224 | 
225 | 	args.cgroupPath = normalizePath(args.cgroupPath, false)
226 | 	args.numaPath = normalizePath(args.numaPath, false)
227 | 	args.statePath = normalizePath(args.statePath, true)
228 | 
229 | 	switch {
230 | 	case agentMode:
231 | 		runAgentMode(args)
232 | 	default:
233 | 		runDaemon(args)
234 | 	}
235 | }
236 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 Intel Corporation
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | FROM golang:1.19.1 AS builder
 5 | 
 6 | WORKDIR /ctlplane
 7 | COPY . ./
 8 | RUN go mod download && make build
 9 | 
10 | FROM scratch
11 | WORKDIR /
12 | COPY --from=builder /ctlplane/bin/ctlplane .
13 | EXPOSE 31000
14 | ENTRYPOINT ["/ctlplane"]
15 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module resourcemanagement.controlplane
 2 | 
 3 | go 1.20
 4 | 
 5 | require (
 6 | 	github.com/containerd/cgroups v1.1.0
 7 | 	github.com/go-logr/logr v1.2.4
 8 | 	github.com/opencontainers/runtime-spec v1.0.2
 9 | 	github.com/stretchr/testify v1.8.4
10 | 	google.golang.org/grpc v1.55.0
11 | 	google.golang.org/protobuf v1.30.0
12 | 	k8s.io/api v0.27.2
13 | 	k8s.io/apimachinery v0.27.2
14 | 	k8s.io/client-go v0.27.2
15 | 	k8s.io/klog/v2 v2.100.1
16 | )
17 | 
18 | require (
19 | 	github.com/cilium/ebpf v0.10.0 // indirect
20 | 	github.com/coreos/go-systemd/v22 v22.5.0 // indirect
21 | 	github.com/davecgh/go-spew v1.1.1 // indirect
22 | 	github.com/docker/go-units v0.5.0 // indirect
23 | 	github.com/emicklei/go-restful/v3 v3.10.2 // indirect
24 | 	github.com/go-openapi/jsonpointer v0.19.6 // indirect
25 | 	github.com/go-openapi/jsonreference v0.20.2 // indirect
26 | 	github.com/go-openapi/swag v0.22.3 // indirect
27 | 	github.com/godbus/dbus/v5 v5.1.0 // indirect
28 | 	github.com/gogo/protobuf v1.3.2 // indirect
29 | 	github.com/golang/protobuf v1.5.3 // indirect
30 | 	github.com/google/gnostic v0.6.9 // indirect
31 | 	github.com/google/go-cmp v0.5.9 // indirect
32 | 	github.com/google/gofuzz v1.2.0 // indirect
33 | 	github.com/google/uuid v1.3.0 // indirect
34 | 	github.com/josharian/intern v1.0.0 // indirect
35 | 	github.com/json-iterator/go v1.1.12 // indirect
36 | 	github.com/mailru/easyjson v0.7.7 // indirect
37 | 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
38 | 	github.com/modern-go/reflect2 v1.0.2 // indirect
39 | 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
40 | 	github.com/pmezard/go-difflib v1.0.0 // indirect
41 | 	github.com/sirupsen/logrus v1.9.2 // indirect
42 | 	github.com/stretchr/objx v0.5.0 // indirect
43 | 	golang.org/x/net v0.10.0 // indirect
44 | 	golang.org/x/oauth2 v0.8.0 // indirect
45 | 	golang.org/x/sys v0.8.0 // indirect
46 | 	golang.org/x/term v0.8.0 // indirect
47 | 	golang.org/x/text v0.9.0 // indirect
48 | 	golang.org/x/time v0.3.0 // indirect
49 | 	google.golang.org/appengine v1.6.7 // indirect
50 | 	google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19 // indirect
51 | 	gopkg.in/inf.v0 v0.9.1 // indirect
52 | 	gopkg.in/yaml.v2 v2.4.0 // indirect
53 | 	gopkg.in/yaml.v3 v3.0.1 // indirect
54 | 	k8s.io/kube-openapi v0.0.0-20230525220651-2546d827e515 // indirect
55 | 	k8s.io/utils v0.0.0-20230505201702-9f6742963106 // indirect
56 | 	sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
57 | 	sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
58 | 	sigs.k8s.io/yaml v1.3.0 // indirect
59 | )
60 | 


--------------------------------------------------------------------------------
/hack/fuzz_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ## Modified from Ethan Davidson
 3 | ## https://stackoverflow.com/questions/71584005/
 4 | ## how-to-run-multi-fuzz-test-cases-wirtten-in-one-source-file-with-go1-18
 5 | 
 6 | # clean all subprocesses on ctl-c
 7 | trap "trap - SIGTERM && kill -- -$$ || true" SIGINT SIGTERM
 8 | 
 9 | set -e
10 | 
11 | fuzzTime="${1:-1}"m  # read from argument list or fallback to default - 1 minute
12 | 
13 | files=$(grep -r --include='**_test.go' --files-with-matches 'func Fuzz' .)
14 | 
15 | logsdir="$(dirname "$0")/../fuzzlogs"
16 | mkdir -p "${logsdir}"
17 | 
18 | cat <<EOF
19 | Starting fuzzing tests.
20 |     One test timeout: $fuzzTime
21 |     Files: $files
22 |     Logs dir: $logsdir
23 | EOF
24 | 
25 | go clean --cache
26 | 
27 | for file in ${files}
28 | do
29 |     funcs="$(grep -oP 'func \K(Fuzz\w*)' "$file")"
30 |     for func in ${funcs}
31 |     do
32 |         {
33 |             echo "Fuzzing $func in $file"
34 |             parentDir="$(dirname "$file")"
35 |             go test "$parentDir" -run="$func" -fuzz="$func" -fuzztime="${fuzzTime}" -v -parallel 4 ./... \
36 |             | tee "${logsdir}"/"$func."log
37 |         } &
38 |     done
39 | done
40 | 
41 | for job in `jobs -p`
42 | do
43 |     echo "Waiting for PID $job to finish"
44 |     wait $job || true
45 | done
46 | 


--------------------------------------------------------------------------------
/manifest/ctlplane-daemon.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: v1
  2 | kind: Namespace
  3 | metadata:
  4 |   name: ctlplane
  5 | ---
  6 | apiVersion: rbac.authorization.k8s.io/v1
  7 | kind: ClusterRoleBinding
  8 | metadata:
  9 |   name: ctlplane-rbac
 10 | subjects:
 11 |   - kind: ServiceAccount
 12 |     name: default
 13 |     namespace: ctlplane
 14 | roleRef:
 15 |   kind: ClusterRole
 16 |   name: cluster-admin
 17 |   apiGroup: rbac.authorization.k8s.io
 18 | ---
 19 | kind: DaemonSet
 20 | apiVersion: apps/v1
 21 | metadata:
 22 |   name: ctlplane-daemonset
 23 |   namespace: ctlplane
 24 | spec:
 25 |   selector:
 26 |     matchLabels:
 27 |       app: ctlplane-daemonset
 28 |   template:
 29 |     metadata:
 30 |       labels:
 31 |         app: ctlplane-daemonset
 32 |     spec:
 33 |       serviceAccountName: default
 34 |       initContainers:
 35 |         - name: delete-old-state
 36 |           image: busybox:1.28
 37 |           command: ['rm', '-f', '/daemonstate/daemon.state']
 38 |           volumeMounts:
 39 |           - name: state
 40 |             mountPath: /daemonstate
 41 |           securityContext:
 42 |             privileged: true
 43 |             seccompProfile:
 44 |               type: RuntimeDefault
 45 |             capabilities:
 46 |               drop:
 47 |                 - all
 48 |           resources:
 49 |             limits:
 50 |               cpu: 2
 51 |               memory: "128M"
 52 |             requests:
 53 |               cpu: 1
 54 |               memory: "64M"    
 55 |       containers:
 56 |         - name: ctlplane-daemonset
 57 |           image: IMAGE
 58 |           imagePullPolicy: Always
 59 |           ports:
 60 |             - containerPort: 31000
 61 |           securityContext:
 62 |             privileged: true
 63 |             seccompProfile:
 64 |               type: RuntimeDefault
 65 |             capabilities:
 66 |               drop:
 67 |                 - all
 68 |           args: ["-cpath", "/cgroup", "-spath", "/daemonstate/daemon.state", "-runtime", "containerd", "-allocator", "numa-namespace-exclusive=2"]
 69 |           volumeMounts:
 70 |           - name: host
 71 |             mountPath: /cgroup
 72 |           - name: state
 73 |             mountPath: /daemonstate
 74 |           resources:
 75 |             limits:
 76 |               cpu: 4
 77 |               memory: "512M"
 78 |             requests:
 79 |               cpu: 2
 80 |               memory: "64M"
 81 |           readinessProbe:
 82 |             grpc:
 83 |               port: 31000
 84 |             initialDelaySeconds: 5
 85 |             periodSeconds: 10
 86 |           livenessProbe:
 87 |             grpc:
 88 |               port: 31000
 89 |             initialDelaySeconds: 15
 90 |             periodSeconds: 20
 91 |         - name: ctlplane-agent
 92 |           image: IMAGE
 93 |           imagePullPolicy: Always
 94 |           securityContext:
 95 |             privileged: false
 96 |             allowPrivilegeEscalation: false
 97 |             readOnlyRootFilesystem: true
 98 |             runAsNonRoot: true
 99 |             runAsUser: 10001
100 |             runAsGroup: 10001
101 |             seccompProfile:
102 |               type: RuntimeDefault
103 |             capabilities:
104 |               drop:
105 |                 - all
106 |           args: ["-a", "-namespace-prefix", "test-"]
107 |           env:
108 |             - name: NODE_NAME
109 |               valueFrom:
110 |                 fieldRef:
111 |                   fieldPath: spec.nodeName
112 |           resources:
113 |             limits:
114 |               cpu: 4
115 |               memory: "512M"
116 |             requests:
117 |               cpu: 2
118 |               memory: "64M"
119 |       volumes:
120 |         - name: host
121 |           hostPath:
122 |             path: /sys/fs/cgroup
123 |         - name: state
124 |           hostPath:
125 |             path: /usr/local/daemonstate/
126 | ---
127 | kind: Service
128 | apiVersion: v1
129 | metadata:
130 |   name: ctlplane-daemonset
131 |   namespace: ctlplane
132 | spec:
133 |   selector:
134 |     app: ctlplane-daemonset
135 |   ports:
136 |     - name: service
137 |       port: 31000
138 |       targetPort: 31000
139 | 


--------------------------------------------------------------------------------
/pkg/agent/agent.go:
--------------------------------------------------------------------------------
  1 | // Package agent implements ctlplane agent which observes k8s for pod lifecycle events
  2 | package agent
  3 | 
  4 | import (
  5 | 	"context"
  6 | 	"errors"
  7 | 	"fmt"
  8 | 	"strings"
  9 | 	"sync"
 10 | 	"time"
 11 | 
 12 | 	"github.com/go-logr/logr"
 13 | 	corev1 "k8s.io/api/core/v1"
 14 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 15 | 	"k8s.io/apimachinery/pkg/types"
 16 | 	"k8s.io/apimachinery/pkg/util/runtime"
 17 | 	"k8s.io/client-go/informers"
 18 | 	"k8s.io/client-go/kubernetes"
 19 | 	"k8s.io/client-go/tools/cache"
 20 | 	"k8s.io/klog/v2"
 21 | 	"resourcemanagement.controlplane/pkg/ctlplaneapi"
 22 | )
 23 | 
 24 | const (
 25 | 	defaultTimeout          = 5 * time.Second
 26 | 	maxUnsuccesfullAttempts = 3
 27 | )
 28 | 
 29 | var ErrCannotSync = errors.New("cannot sync with k8s")
 30 | 
 31 | // Agent observes k8s for pod lifecycle events.
 32 | type Agent struct {
 33 | 	ctlPlaneClient                     ctlplaneapi.ControlPlaneClient
 34 | 	mu                                 sync.Mutex
 35 | 	addedPods                          map[types.UID]bool
 36 | 	namespacePrefix                    string
 37 | 	ctx                                context.Context
 38 | 	callTimeout                        time.Duration
 39 | 	logger                             logr.Logger
 40 | 	numConsecutiveUnsuccessfulAttempts uint
 41 | }
 42 | 
 43 | // NewAgent returns new agent with fields properly initialized.
 44 | func NewAgent(context context.Context, ctlPlaneClient ctlplaneapi.ControlPlaneClient, namespacePrefix string) *Agent {
 45 | 	logger, err := logr.FromContext(context)
 46 | 	if err != nil {
 47 | 		klog.Fatal("no logger provided")
 48 | 	}
 49 | 	return &Agent{
 50 | 		ctlPlaneClient:  ctlPlaneClient,
 51 | 		namespacePrefix: namespacePrefix,
 52 | 		addedPods:       make(map[types.UID]bool),
 53 | 		ctx:             context,
 54 | 		callTimeout:     defaultTimeout,
 55 | 		logger:          logger.WithName("agent"),
 56 | 	}
 57 | }
 58 | 
 59 | func (a *Agent) context() (context.Context, context.CancelFunc) {
 60 | 	return context.WithTimeout(a.ctx, a.callTimeout)
 61 | }
 62 | 
 63 | // Run runs agent loop in a goroutine.
 64 | func (a *Agent) Run(clusterClient kubernetes.Interface, nodeName string) error {
 65 | 	factory := informers.NewSharedInformerFactoryWithOptions(clusterClient, 0, informers.WithNamespace(""),
 66 | 		informers.WithTweakListOptions(func(o *metav1.ListOptions) {
 67 | 			o.LabelSelector = "app!=ctlplane-daemonset"
 68 | 			o.FieldSelector = fmt.Sprintf("spec.nodeName=%s", nodeName)
 69 | 		}),
 70 | 	)
 71 | 	podInformer := factory.Core().V1().Pods()
 72 | 	informer := podInformer.Informer()
 73 | 
 74 | 	defer runtime.HandleCrash()
 75 | 
 76 | 	go factory.Start(a.ctx.Done())
 77 | 
 78 | 	a.logger.Info("syncing cache")
 79 | 	synced := cache.WaitForNamedCacheSync("ctlplane-agent:"+nodeName, a.ctx.Done(), informer.HasSynced)
 80 | 	if !synced {
 81 | 		a.logger.Error(ErrCannotSync, "could not sync k8s state")
 82 | 		return ErrCannotSync
 83 | 	}
 84 | 
 85 | 	informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
 86 | 		UpdateFunc: a.update,
 87 | 		DeleteFunc: a.delete,
 88 | 	})
 89 | 	a.logger.Info("agent started")
 90 | 	return nil
 91 | }
 92 | 
 93 | // update is invoked whenever pod status changes. We use it also to send CreatePodRequest, because the
 94 | // update reports all changes in pod's containers, and we shall wait for all containers to be up and running
 95 | // before sending the request.
 96 | func (a *Agent) update(_ interface{}, newobj interface{}) {
 97 | 	a.mu.Lock()
 98 | 	defer a.mu.Unlock()
 99 | 
100 | 	p, ok := newobj.(*corev1.Pod)
101 | 	logger := a.logger.WithName("update")
102 | 
103 | 	if !ok {
104 | 		logger.Info("obj passed is not a pod")
105 | 		return
106 | 	}
107 | 
108 | 	logger = logger.WithValues("PID", p.UID)
109 | 
110 | 	if !strings.HasPrefix(p.Namespace, a.namespacePrefix) {
111 | 		logger.V(2).Info("pod namespace does not contain prefix", "namespace", p.Namespace, "prefix", a.namespacePrefix)
112 | 		return
113 | 	}
114 | 
115 | 	if p.DeletionTimestamp != nil {
116 | 		logger.Info("pod has deletion timestamp, ignoring")
117 | 		return
118 | 	}
119 | 
120 | 	allContainersReady := true
121 | 	for _, c := range p.Status.ContainerStatuses {
122 | 		if c.ContainerID == "" || !c.Ready {
123 | 			allContainersReady = false
124 | 			break
125 | 		}
126 | 	}
127 | 	logger.V(2).Info("received pod update", "allContainersReady", allContainersReady)
128 | 
129 | 	if !allContainersReady || len(p.Status.ContainerStatuses) != len(p.Spec.Containers) {
130 | 		return
131 | 	}
132 | 
133 | 	var (
134 | 		reply *ctlplaneapi.PodAllocationReply
135 | 		err   error
136 | 	)
137 | 	if a.addedPods[p.UID] {
138 | 		in, reqErr := GetUpdatePodRequest(p)
139 | 		if reqErr != nil {
140 | 			err = reqErr
141 | 		} else {
142 | 			logger.Info("sending update pod req")
143 | 			ctx, cancel := a.context()
144 | 			defer cancel()
145 | 			reply, err = a.ctlPlaneClient.UpdatePod(ctx, in)
146 | 		}
147 | 	} else {
148 | 		in, reqErr := GetCreatePodRequest(p)
149 | 		if reqErr != nil {
150 | 			err = reqErr
151 | 		} else {
152 | 			logger.Info("sending add pod req")
153 | 			ctx, cancel := a.context()
154 | 			defer cancel()
155 | 			reply, err = a.ctlPlaneClient.CreatePod(ctx, in)
156 | 			a.addedPods[p.UID] = true
157 | 		}
158 | 	}
159 | 
160 | 	if err != nil {
161 | 		logger.Error(err, "allocation error")
162 | 		a.unsuccessfulAttempt()
163 | 	} else {
164 | 		logger.Info("allocation done", "reply", reply)
165 | 		a.successfulAttempt()
166 | 	}
167 | }
168 | 
169 | // delete is invoked after pod has been deleted.
170 | func (a *Agent) delete(obj interface{}) {
171 | 	a.mu.Lock()
172 | 	defer a.mu.Unlock()
173 | 
174 | 	logger := a.logger.WithName("delete")
175 | 
176 | 	p, ok := obj.(*corev1.Pod)
177 | 
178 | 	if !ok {
179 | 		logger.Info("obj passed is not a pod")
180 | 		return
181 | 	}
182 | 
183 | 	logger = logger.WithValues("PID", p.UID)
184 | 
185 | 	if !strings.HasPrefix(p.Namespace, a.namespacePrefix) {
186 | 		logger.V(2).Info("pod namespace does not contain prefix", "namespace", p.Namespace, "prefix", a.namespacePrefix)
187 | 		return
188 | 	}
189 | 
190 | 	logger.Info("deleting pod")
191 | 	in := GetDeletePodRequest(p)
192 | 	ctx, cancel := a.context()
193 | 	defer cancel()
194 | 	reply, err := a.ctlPlaneClient.DeletePod(ctx, in)
195 | 	delete(a.addedPods, p.UID)
196 | 
197 | 	if err != nil {
198 | 		logger.Error(err, "deletion failed")
199 | 		a.unsuccessfulAttempt()
200 | 	} else {
201 | 		logger.Info("deletion done", "reply", reply)
202 | 		a.successfulAttempt()
203 | 	}
204 | }
205 | 
206 | func (a *Agent) successfulAttempt() {
207 | 	a.numConsecutiveUnsuccessfulAttempts = 0
208 | }
209 | 
210 | func (a *Agent) unsuccessfulAttempt() {
211 | 	a.numConsecutiveUnsuccessfulAttempts += 1
212 | 	if a.numConsecutiveUnsuccessfulAttempts >= maxUnsuccesfullAttempts {
213 | 		klog.Fatal("Exceeded maximum number of unsuccessful attempts")
214 | 	}
215 | }
216 | 


--------------------------------------------------------------------------------
/pkg/agent/agent_test.go:
--------------------------------------------------------------------------------
  1 | package agent
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"errors"
  6 | 	"testing"
  7 | 	"time"
  8 | 
  9 | 	"github.com/go-logr/logr"
 10 | 	"github.com/stretchr/testify/assert"
 11 | 	"github.com/stretchr/testify/mock"
 12 | 	"github.com/stretchr/testify/require"
 13 | 	"google.golang.org/grpc"
 14 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 15 | 
 16 | 	"resourcemanagement.controlplane/pkg/ctlplaneapi"
 17 | )
 18 | 
 19 | type ControlPlaneClientMock struct {
 20 | 	mock.Mock
 21 | }
 22 | 
 23 | func (c *ControlPlaneClientMock) CreatePod(
 24 | 	ctx context.Context,
 25 | 	in *ctlplaneapi.CreatePodRequest,
 26 | 	opts ...grpc.CallOption,
 27 | ) (*ctlplaneapi.PodAllocationReply, error) {
 28 | 	args := c.Called(ctx, in)
 29 | 	return args.Get(0).(*ctlplaneapi.PodAllocationReply), args.Error(1)
 30 | }
 31 | 
 32 | func (c *ControlPlaneClientMock) UpdatePod(
 33 | 	ctx context.Context,
 34 | 	in *ctlplaneapi.UpdatePodRequest,
 35 | 	opts ...grpc.CallOption,
 36 | ) (*ctlplaneapi.PodAllocationReply, error) {
 37 | 	args := c.Called(ctx, in)
 38 | 	return args.Get(0).(*ctlplaneapi.PodAllocationReply), args.Error(1)
 39 | }
 40 | 
 41 | func (c *ControlPlaneClientMock) DeletePod(
 42 | 	ctx context.Context,
 43 | 	in *ctlplaneapi.DeletePodRequest,
 44 | 	opts ...grpc.CallOption,
 45 | ) (*ctlplaneapi.PodAllocationReply, error) {
 46 | 	args := c.Called(ctx, in)
 47 | 	return args.Get(0).(*ctlplaneapi.PodAllocationReply), args.Error(1)
 48 | }
 49 | 
 50 | var _ ctlplaneapi.ControlPlaneClient = &ControlPlaneClientMock{}
 51 | var testCtx = logr.NewContext(context.TODO(), logr.Discard())
 52 | 
 53 | func TestCreatePodPasses(t *testing.T) {
 54 | 	cpMock := ControlPlaneClientMock{}
 55 | 	pod := genTestPods()
 56 | 	podRequest, err := GetCreatePodRequest(&pod)
 57 | 	require.Nil(t, err)
 58 | 	cpMock.On("CreatePod", mock.Anything, podRequest).Return(&ctlplaneapi.PodAllocationReply{}, nil)
 59 | 	agent := NewAgent(testCtx, &cpMock, "")
 60 | 
 61 | 	agent.update(struct{}{}, &pod)
 62 | 
 63 | 	cpMock.AssertExpectations(t)
 64 | }
 65 | 
 66 | func TestUpdateIgnoresDeletingPods(t *testing.T) {
 67 | 	mock := ControlPlaneClientMock{}
 68 | 	pod := genTestPods()
 69 | 	pod.DeletionTimestamp = &metav1.Time{Time: time.Unix(0, 0)}
 70 | 	agent := NewAgent(testCtx, &mock, "")
 71 | 
 72 | 	agent.update(struct{}{}, &pod)
 73 | 
 74 | 	mock.AssertExpectations(t)
 75 | }
 76 | 
 77 | func TestUpdateIgnoresNamespaceWithWrongPrefix(t *testing.T) {
 78 | 	mock := ControlPlaneClientMock{}
 79 | 	pod := genTestPods()
 80 | 	agent := NewAgent(testCtx, &mock, "test")
 81 | 
 82 | 	agent.update(struct{}{}, &pod)
 83 | 
 84 | 	mock.AssertExpectations(t)
 85 | }
 86 | 
 87 | func TestUpdateIgnoresInitializingPods(t *testing.T) {
 88 | 	mock := ControlPlaneClientMock{}
 89 | 	pod := genTestPods()
 90 | 	pod.Status.ContainerStatuses[0].Ready = false
 91 | 	agent := NewAgent(testCtx, &mock, "")
 92 | 
 93 | 	agent.update(struct{}{}, &pod)
 94 | 
 95 | 	mock.AssertExpectations(t)
 96 | }
 97 | 
 98 | func TestUpdatePodPasses(t *testing.T) {
 99 | 	cpMock := ControlPlaneClientMock{}
100 | 	pod := genTestPods()
101 | 	podCreateRequest, err := GetCreatePodRequest(&pod)
102 | 	require.Nil(t, err)
103 | 	podUpdateRequest, err := GetUpdatePodRequest(&pod)
104 | 	require.Nil(t, err)
105 | 	agent := NewAgent(testCtx, &cpMock, "")
106 | 
107 | 	cpMock.On("CreatePod", mock.Anything, podCreateRequest).Return(&ctlplaneapi.PodAllocationReply{}, nil)
108 | 	agent.update(struct{}{}, &pod)
109 | 	cpMock.On("UpdatePod", mock.Anything, podUpdateRequest).Return(&ctlplaneapi.PodAllocationReply{}, nil)
110 | 	agent.update(struct{}{}, &pod)
111 | 
112 | 	cpMock.AssertExpectations(t)
113 | }
114 | 
115 | func TestUpdatePodPassesWithError(t *testing.T) {
116 | 	cpMock := ControlPlaneClientMock{}
117 | 	pod := genTestPods()
118 | 	podCreateRequest, err := GetCreatePodRequest(&pod)
119 | 	require.Nil(t, err)
120 | 	podUpdateRequest, err := GetUpdatePodRequest(&pod)
121 | 	require.Nil(t, err)
122 | 	agent := NewAgent(testCtx, &cpMock, "")
123 | 
124 | 	cpMock.On("CreatePod", mock.Anything, podCreateRequest).Return(&ctlplaneapi.PodAllocationReply{}, nil)
125 | 	agent.update(struct{}{}, &pod)
126 | 	err = errors.New("some update error") //nolint
127 | 	cpMock.On("UpdatePod", mock.Anything, podUpdateRequest).Return(&ctlplaneapi.PodAllocationReply{}, err)
128 | 	agent.update(struct{}{}, &pod)
129 | 	assert.Equal(t, agent.numConsecutiveUnsuccessfulAttempts, uint(1))
130 | }
131 | 
132 | func TestDeletePodPasses(t *testing.T) {
133 | 	cpMock := ControlPlaneClientMock{}
134 | 	pod := genTestPods()
135 | 	podCreateRequest, err := GetCreatePodRequest(&pod)
136 | 	require.Nil(t, err)
137 | 	podDeleteRequest := GetDeletePodRequest(&pod)
138 | 	agent := NewAgent(testCtx, &cpMock, "")
139 | 
140 | 	cpMock.On("CreatePod", mock.Anything, podCreateRequest).Return(&ctlplaneapi.PodAllocationReply{}, nil)
141 | 	agent.update(struct{}{}, &pod)
142 | 	cpMock.On("DeletePod", mock.Anything, podDeleteRequest).Return(&ctlplaneapi.PodAllocationReply{}, nil)
143 | 	agent.delete(&pod)
144 | 
145 | 	cpMock.AssertExpectations(t)
146 | }
147 | 
148 | func TestDeletePodIfNotAddedPreviously(t *testing.T) {
149 | 	cpMock := ControlPlaneClientMock{}
150 | 	pod := genTestPods()
151 | 	podDeleteRequest := GetDeletePodRequest(&pod)
152 | 	agent := NewAgent(testCtx, &cpMock, "")
153 | 	err := errors.New("unsuccessful deletion") //nolint
154 | 	cpMock.On("DeletePod", mock.Anything, podDeleteRequest).Return(&ctlplaneapi.PodAllocationReply{}, err)
155 | 	agent.delete(&pod)
156 | 	assert.Equal(t, agent.numConsecutiveUnsuccessfulAttempts, uint(1))
157 | 	cpMock.AssertExpectations(t)
158 | }
159 | 
160 | func TestDeleteIgnoresNamespaceWithWrongPrefix(t *testing.T) {
161 | 	mock := ControlPlaneClientMock{}
162 | 	pod := genTestPods()
163 | 	agent := NewAgent(testCtx, &mock, "test")
164 | 
165 | 	agent.delete(&pod)
166 | 
167 | 	mock.AssertExpectations(t)
168 | }
169 | 


--------------------------------------------------------------------------------
/pkg/agent/resources.go:
--------------------------------------------------------------------------------
  1 | package agent
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"math"
  7 | 
  8 | 	corev1 "k8s.io/api/core/v1"
  9 | 	"k8s.io/apimachinery/pkg/api/resource"
 10 | 	"resourcemanagement.controlplane/pkg/ctlplaneapi"
 11 | )
 12 | 
 13 | var (
 14 | 	ErrNotRepresentable = errors.New("value not representable as int64")
 15 | 	ErrCountingOverflow = errors.New("values sum is not representable as int32")
 16 | )
 17 | 
 18 | // GetCreatePodRequest creates CreatePodRequest from pod spec.
 19 | func GetCreatePodRequest(pod *corev1.Pod) (*ctlplaneapi.CreatePodRequest, error) {
 20 | 	podID := pod.GetUID()
 21 | 
 22 | 	containerInfo, resourceInfo, err := createPodResources(pod)
 23 | 
 24 | 	if err != nil {
 25 | 		return nil, err
 26 | 	}
 27 | 
 28 | 	createPodRequest := &ctlplaneapi.CreatePodRequest{
 29 | 		PodId:        string(podID),
 30 | 		PodName:      pod.Name,
 31 | 		PodNamespace: pod.Namespace,
 32 | 		Resources:    resourceInfo,
 33 | 		Containers:   containerInfo,
 34 | 	}
 35 | 
 36 | 	return createPodRequest, nil
 37 | }
 38 | 
 39 | // GetUpdatePodRequest creates UpdatePodRequest from pod spec.
 40 | func GetUpdatePodRequest(pod *corev1.Pod) (*ctlplaneapi.UpdatePodRequest, error) {
 41 | 	podID := pod.GetUID()
 42 | 
 43 | 	containerInfo, resourceInfo, err := createPodResources(pod)
 44 | 
 45 | 	if err != nil {
 46 | 		return nil, err
 47 | 	}
 48 | 
 49 | 	updatePodRequest := &ctlplaneapi.UpdatePodRequest{
 50 | 		PodId:      string(podID),
 51 | 		Resources:  resourceInfo,
 52 | 		Containers: containerInfo,
 53 | 	}
 54 | 
 55 | 	return updatePodRequest, nil
 56 | }
 57 | 
 58 | // GetDeletePodRequest creates DeletePodRequest from pod spec.
 59 | func GetDeletePodRequest(pod *corev1.Pod) *ctlplaneapi.DeletePodRequest {
 60 | 	podID := pod.GetUID()
 61 | 
 62 | 	deletePodRequest := &ctlplaneapi.DeletePodRequest{
 63 | 		PodId: string(podID),
 64 | 	}
 65 | 
 66 | 	return deletePodRequest
 67 | }
 68 | func addContainerMemoryToPod(cInfo *ctlplaneapi.ContainerInfo,
 69 | 	podRequestedMemory *resource.Quantity,
 70 | 	podLimitMemory *resource.Quantity) error {
 71 | 	rm := resource.Quantity{}
 72 | 	err := rm.Unmarshal(cInfo.Resources.RequestedMemory)
 73 | 	var zero resource.Quantity
 74 | 	if err != nil {
 75 | 		return err
 76 | 	}
 77 | 	if rm.Cmp(zero) < 0 {
 78 | 		return fmt.Errorf("mem request: %w", ErrCountingOverflow)
 79 | 	}
 80 | 	podRequestedMemory.Add(rm)
 81 | 	lm := resource.Quantity{}
 82 | 	err = lm.Unmarshal(cInfo.Resources.LimitMemory)
 83 | 	if err != nil {
 84 | 		return err
 85 | 	}
 86 | 	if lm.Cmp(zero) < 0 {
 87 | 		return fmt.Errorf("mem limit: %w", ErrCountingOverflow)
 88 | 	}
 89 | 	podLimitMemory.Add(lm)
 90 | 	return nil
 91 | }
 92 | func createPodResources(pod *corev1.Pod) ([]*ctlplaneapi.ContainerInfo, *ctlplaneapi.ResourceInfo, error) {
 93 | 	var podRequestedCpus int32
 94 | 	var podLimitCpus int32
 95 | 	var podRequestedMemory resource.Quantity
 96 | 	var podLimitMemory resource.Quantity
 97 | 
 98 | 	containerInfo := make([]*ctlplaneapi.ContainerInfo, 0)
 99 | 
100 | 	for _, container := range pod.Spec.Containers {
101 | 		container := container // prevent implicit memory alignment of iterator
102 | 		cInfo, err := getContainerInfo(&container)
103 | 		if err != nil {
104 | 			return []*ctlplaneapi.ContainerInfo{}, nil, err
105 | 		}
106 | 		cID := getContainerID(container.Name, pod)
107 | 		cInfo.ContainerId = cID
108 | 
109 | 		podRequestedCpus += cInfo.Resources.RequestedCpus
110 | 		if podRequestedCpus < 0 {
111 | 			return containerInfo, nil, fmt.Errorf("cpus request: %w", ErrCountingOverflow)
112 | 		}
113 | 		podLimitCpus += cInfo.Resources.LimitCpus
114 | 		if podLimitCpus < 0 {
115 | 			return containerInfo, nil, fmt.Errorf("cpus limit: %w", ErrCountingOverflow)
116 | 		}
117 | 
118 | 		err = addContainerMemoryToPod(cInfo, &podRequestedMemory, &podLimitMemory)
119 | 		if err != nil {
120 | 			return []*ctlplaneapi.ContainerInfo{}, nil, err
121 | 		}
122 | 		containerInfo = append(containerInfo, cInfo)
123 | 	}
124 | 	rm, err := podRequestedMemory.Marshal()
125 | 	if err != nil {
126 | 		return containerInfo, nil, err
127 | 	}
128 | 	lm, err := podLimitMemory.Marshal()
129 | 	if err != nil {
130 | 		return containerInfo, nil, err
131 | 	}
132 | 	resourceInfo := &ctlplaneapi.ResourceInfo{
133 | 		RequestedCpus:   podRequestedCpus,
134 | 		LimitCpus:       podLimitCpus,
135 | 		RequestedMemory: rm,
136 | 		LimitMemory:     lm,
137 | 	}
138 | 
139 | 	return containerInfo, resourceInfo, nil
140 | }
141 | 
142 | func getContainerInfo(container *corev1.Container) (*ctlplaneapi.ContainerInfo, error) {
143 | 	containerResuestedCpus, containerRequestedMemory, err := getContainerResources(container.Resources.Requests)
144 | 	if err != nil {
145 | 		return nil, fmt.Errorf("requested resources error: %w", err)
146 | 	}
147 | 	containerLimitCpus, containerLimitMemory, err := getContainerResources(container.Resources.Limits)
148 | 	if err != nil {
149 | 		return nil, fmt.Errorf("limit resources error: %w", err)
150 | 	}
151 | 
152 | 	containerInfo := &ctlplaneapi.ContainerInfo{
153 | 		ContainerName: container.Name,
154 | 		Resources: &ctlplaneapi.ResourceInfo{
155 | 			RequestedCpus:   containerResuestedCpus,
156 | 			LimitCpus:       containerLimitCpus,
157 | 			RequestedMemory: containerRequestedMemory,
158 | 			LimitMemory:     containerLimitMemory,
159 | 		},
160 | 	}
161 | 
162 | 	return containerInfo, nil
163 | }
164 | 
165 | func getContainerResources(resourceList corev1.ResourceList) (int32, []byte, error) {
166 | 	cpusQuantity := resourceList.Cpu()
167 | 	cpus, representable := cpusQuantity.AsInt64()
168 | 
169 | 	if !representable || cpus > math.MaxInt32 || cpus < 0 {
170 | 		return 0, nil, fmt.Errorf("cpu quantity %v: %w", cpusQuantity, ErrNotRepresentable)
171 | 	}
172 | 
173 | 	memoryQuantity := resourceList.Memory()
174 | 	memory, err := memoryQuantity.Marshal()
175 | 	if err != nil {
176 | 		return 0, nil, err
177 | 	}
178 | 	return int32(cpus), memory, nil
179 | }
180 | 
181 | func getContainerID(name string, pod *corev1.Pod) string {
182 | 	for _, containerStatus := range pod.Status.ContainerStatuses {
183 | 		if containerStatus.Name == name {
184 | 			return containerStatus.ContainerID
185 | 		}
186 | 	}
187 | 
188 | 	return ""
189 | }
190 | 


--------------------------------------------------------------------------------
/pkg/agent/resources_test.go:
--------------------------------------------------------------------------------
  1 | package agent
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math"
  6 | 	"strconv"
  7 | 	"testing"
  8 | 	"time"
  9 | 
 10 | 	"github.com/stretchr/testify/assert"
 11 | 	"github.com/stretchr/testify/require"
 12 | 	corev1 "k8s.io/api/core/v1"
 13 | 	"k8s.io/apimachinery/pkg/api/resource"
 14 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 15 | 	"resourcemanagement.controlplane/pkg/ctlplaneapi"
 16 | )
 17 | 
 18 | type resourceSpec struct {
 19 | 	reqCpu string
 20 | 	reqMem string
 21 | 	limCpu string
 22 | 	limMem string
 23 | }
 24 | 
 25 | func genPodsFromSpec(containersResources []resourceSpec) corev1.Pod {
 26 | 	containers := make([]corev1.Container, 0, len(containersResources))
 27 | 	statuses := make([]corev1.ContainerStatus, 0, len(containersResources))
 28 | 	for i, container := range containersResources {
 29 | 		containers = append(containers, corev1.Container{
 30 | 			Name: fmt.Sprintf("test container %d", i+1),
 31 | 			Resources: corev1.ResourceRequirements{
 32 | 				Requests: corev1.ResourceList{
 33 | 					corev1.ResourceCPU:    resource.MustParse(container.reqCpu),
 34 | 					corev1.ResourceMemory: resource.MustParse(container.reqMem),
 35 | 				},
 36 | 				Limits: corev1.ResourceList{
 37 | 					corev1.ResourceCPU:    resource.MustParse(container.limCpu),
 38 | 					corev1.ResourceMemory: resource.MustParse(container.limMem),
 39 | 				},
 40 | 			},
 41 | 		})
 42 | 
 43 | 		statuses = append(statuses, corev1.ContainerStatus{
 44 | 			ContainerID: fmt.Sprintf("id test container %d", i+1),
 45 | 			Name:        fmt.Sprintf("test container %d", i+1),
 46 | 			Ready:       true,
 47 | 			State: corev1.ContainerState{
 48 | 				Running: &corev1.ContainerStateRunning{
 49 | 					StartedAt: metav1.Time{Time: time.Now()},
 50 | 				},
 51 | 			},
 52 | 		})
 53 | 	}
 54 | 
 55 | 	pod := corev1.Pod{
 56 | 		ObjectMeta: metav1.ObjectMeta{
 57 | 			Name:      "mypod",
 58 | 			Namespace: "default",
 59 | 			UID:       "123",
 60 | 		},
 61 | 		Spec: corev1.PodSpec{
 62 | 			Containers: containers,
 63 | 		},
 64 | 		Status: corev1.PodStatus{
 65 | 			Phase:             corev1.PodRunning,
 66 | 			ContainerStatuses: statuses,
 67 | 		},
 68 | 	}
 69 | 	return pod
 70 | }
 71 | 
 72 | func genTestPods() corev1.Pod {
 73 | 	return genPodsFromSpec(
 74 | 		[]resourceSpec{
 75 | 			{
 76 | 				reqCpu: "2000",
 77 | 				reqMem: "32Mi",
 78 | 				limCpu: "3000",
 79 | 				limMem: "64Mi",
 80 | 			},
 81 | 			{
 82 | 				reqCpu: "3000",
 83 | 				reqMem: "24Mi",
 84 | 				limCpu: "4000",
 85 | 				limMem: "48Mi",
 86 | 			},
 87 | 			{
 88 | 				reqCpu: "3000",
 89 | 				reqMem: "128G",
 90 | 				limCpu: "4000",
 91 | 				limMem: "256Gi",
 92 | 			},
 93 | 		},
 94 | 	)
 95 | }
 96 | func bytesToQuantity(b []byte) resource.Quantity {
 97 | 	res := resource.Quantity{}
 98 | 	_ = res.Unmarshal(b)
 99 | 	return res
100 | }
101 | func totalMemory(args ...string) *resource.Quantity {
102 | 	tmem := resource.Quantity{}
103 | 	for _, a := range args {
104 | 		lmem, _ := resource.ParseQuantity(a)
105 | 		tmem.Add(lmem)
106 | 	}
107 | 	return &tmem
108 | }
109 | func assertResourcesEqualWithTestPod(t *testing.T, ri *ctlplaneapi.ResourceInfo) {
110 | 	assert.Equal(t, int32(8000), ri.RequestedCpus)
111 | 	assert.Equal(t, int32(11000), ri.LimitCpus)
112 | 	assert.Equal(t, totalMemory("56Mi", "128G").Cmp(bytesToQuantity(ri.RequestedMemory)), 0)
113 | 	assert.Equal(t, totalMemory("112Mi", "256Gi").Cmp(bytesToQuantity(ri.LimitMemory)), 0)
114 | }
115 | 
116 | func assertContainersEqualWithTestPod(t *testing.T, ci []*ctlplaneapi.ContainerInfo) {
117 | 	assert.Equal(t, 3, len(ci))
118 | 	assert.Equal(t, "id test container 1", ci[0].ContainerId)
119 | 	assert.Equal(t, int32(2000), ci[0].Resources.RequestedCpus)
120 | 	assert.Equal(t, int32(3000), ci[0].Resources.LimitCpus)
121 | 	assert.Equal(t, totalMemory("32Mi").Cmp(bytesToQuantity(ci[0].Resources.RequestedMemory)), 0)
122 | 	assert.Equal(t, totalMemory("64Mi").Cmp(bytesToQuantity(ci[0].Resources.LimitMemory)), 0)
123 | 	assert.Equal(t, int32(3000), ci[1].Resources.RequestedCpus)
124 | 	assert.Equal(t, int32(4000), ci[1].Resources.LimitCpus)
125 | 	assert.Equal(t, totalMemory("24Mi").Cmp(bytesToQuantity(ci[1].Resources.RequestedMemory)), 0)
126 | 	assert.Equal(t, totalMemory("48Mi").Cmp(bytesToQuantity(ci[1].Resources.LimitMemory)), 0)
127 | 	assert.Equal(t, int32(3000), ci[2].Resources.RequestedCpus)
128 | 	assert.Equal(t, int32(4000), ci[2].Resources.LimitCpus)
129 | 	assert.Equal(t, totalMemory("128G").Cmp(bytesToQuantity(ci[2].Resources.RequestedMemory)), 0)
130 | 	assert.Equal(t, totalMemory("256Gi").Cmp(bytesToQuantity(ci[2].Resources.LimitMemory)), 0)
131 | }
132 | 
133 | func TestGetCreatePodRequest(t *testing.T) {
134 | 	pod := genTestPods()
135 | 	pR, err := GetCreatePodRequest(&pod)
136 | 	require.Nil(t, err)
137 | 	assert.Equal(t, "123", pR.PodId)
138 | 	assertResourcesEqualWithTestPod(t, pR.Resources)
139 | 	assertContainersEqualWithTestPod(t, pR.Containers)
140 | }
141 | 
142 | func TestGetUpdatePodRequest(t *testing.T) {
143 | 	pod := genTestPods()
144 | 	pR, err := GetUpdatePodRequest(&pod)
145 | 	require.Nil(t, err)
146 | 	assert.Equal(t, "123", pR.PodId)
147 | 	assertResourcesEqualWithTestPod(t, pR.Resources)
148 | 	assertContainersEqualWithTestPod(t, pR.Containers)
149 | }
150 | 
151 | func TestGetDeletePodRequest(t *testing.T) {
152 | 	pod := genTestPods()
153 | 	pR := GetDeletePodRequest(&pod)
154 | 	assert.Equal(t, string(pod.GetUID()), pR.PodId)
155 | }
156 | 
157 | func TestResourceCountingOverflow(t *testing.T) {
158 | 	limits := [][]int{{1, 1, 1, 1}, {math.MaxInt32, 1, 1, 1}}
159 | 
160 | 	// jump over memory as it can Mi, Gi ...
161 | 	for i := 0; i < 4; i += 2 { // for each shift of limit indicies
162 | 		specs := []resourceSpec{}
163 | 		for _, spec := range limits {
164 | 			specs = append(specs, resourceSpec{
165 | 				reqCpu: strconv.Itoa(spec[(i+0)%4]),
166 | 				reqMem: strconv.Itoa(spec[(i+1)%4]),
167 | 				limCpu: strconv.Itoa(spec[(i+2)%4]),
168 | 				limMem: strconv.Itoa(spec[(i+3)%4]),
169 | 			})
170 | 		}
171 | 		t.Run(fmt.Sprintf("Shift %d", i), func(t *testing.T) {
172 | 			pod := genPodsFromSpec(specs)
173 | 			_, err := GetCreatePodRequest(&pod)
174 | 			assert.ErrorIs(t, err, ErrCountingOverflow)
175 | 		})
176 | 	}
177 | }
178 | 


--------------------------------------------------------------------------------
/pkg/cpudaemon/daemon_allocators.go:
--------------------------------------------------------------------------------
  1 | package cpudaemon
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"os"
  6 | 	"path"
  7 | 	"strconv"
  8 | 	"strings"
  9 | 
 10 | 	"resourcemanagement.controlplane/pkg/ctlplaneapi"
 11 | 	"resourcemanagement.controlplane/pkg/utils"
 12 | 
 13 | 	"github.com/containerd/cgroups"
 14 | 	cgroupsv2 "github.com/containerd/cgroups/v2"
 15 | 	"github.com/go-logr/logr"
 16 | 	"github.com/opencontainers/runtime-spec/specs-go"
 17 | )
 18 | 
 19 | // ResourceNotSet is used as default resource allocation in CgroupController.UpdateCPUSet invocations.
 20 | const ResourceNotSet = ""
 21 | 
 22 | // Allocator interface to take cpu.
 23 | type Allocator interface {
 24 | 	takeCpus(c Container, s *DaemonState) error
 25 | 	freeCpus(c Container, s *DaemonState) error
 26 | 	clearCpus(c Container, s *DaemonState) error
 27 | }
 28 | 
 29 | // CgroupControllerImpl CgroupController interface implementation.
 30 | type CgroupControllerImpl struct {
 31 | 	containerRuntime ContainerRuntime
 32 | 	cgroupDriver     CGroupDriver
 33 | 	logger           logr.Logger
 34 | }
 35 | 
 36 | // NewCgroupController returns initialized CgroupControllerImpl instance.
 37 | func NewCgroupController(containerRuntime ContainerRuntime, cgroupDriver CGroupDriver, logger logr.Logger) CgroupControllerImpl {
 38 | 	return CgroupControllerImpl{containerRuntime, cgroupDriver, logger.WithName("cgroupController")}
 39 | }
 40 | 
 41 | // CgroupController interface to cgroup library to control cpusets.
 42 | type CgroupController interface {
 43 | 	UpdateCPUSet(path string, c Container, cpuSet string, memSet string) error
 44 | }
 45 | 
 46 | var _ CgroupController = CgroupControllerImpl{}
 47 | 
 48 | // DefaultAllocator simple static allocator without NUMA.
 49 | type DefaultAllocator struct {
 50 | 	ctrl CgroupController
 51 | }
 52 | 
 53 | var _ Allocator = &DefaultAllocator{}
 54 | 
 55 | // NewDefaultAllocator constructs default cpu allocator.
 56 | func NewDefaultAllocator(controller CgroupController) *DefaultAllocator {
 57 | 	return newAllocator(controller)
 58 | }
 59 | 
 60 | func newAllocator(ct CgroupController) *DefaultAllocator {
 61 | 	d := DefaultAllocator{
 62 | 		ctrl: ct,
 63 | 	}
 64 | 	return &d
 65 | }
 66 | 
 67 | // SliceName returns path to container cgroup leaf slice in cgroupfs.
 68 | func SliceName(c Container, r ContainerRuntime, d CGroupDriver) string {
 69 | 	if r == Kind {
 70 | 		return sliceNameKind(c)
 71 | 	}
 72 | 	if d == DriverSystemd {
 73 | 		return sliceNameDockerContainerdWithSystemd(c, r)
 74 | 	}
 75 | 	return sliceNameDockerContainerdWithCgroupfs(c, r)
 76 | }
 77 | 
 78 | func sliceNameKind(c Container) string {
 79 | 	podType := [3]string{"", "besteffort/", "burstable/"}
 80 | 	return fmt.Sprintf(
 81 | 		"kubelet/kubepods/%spod%s/%s",
 82 | 		podType[c.QS],
 83 | 		c.PID,
 84 | 		strings.ReplaceAll(c.CID, "containerd://", ""),
 85 | 	)
 86 | }
 87 | 
 88 | func sliceNameDockerContainerdWithSystemd(c Container, r ContainerRuntime) string {
 89 | 	sliceType := [3]string{"", "kubepods-besteffort.slice/", "kubepods-burstable.slice/"}
 90 | 	podType := [3]string{"", "-besteffort", "-burstable"}
 91 | 	runtimeTypePrefix := [2]string{"docker", "cri-containerd"}
 92 | 	runtimeURLPrefix := [2]string{"docker://", "containerd://"}
 93 | 	return fmt.Sprintf(
 94 | 		"/kubepods.slice/%skubepods%s-pod%s.slice/%s-%s.scope",
 95 | 		sliceType[c.QS],
 96 | 		podType[c.QS],
 97 | 		strings.ReplaceAll(c.PID, "-", "_"),
 98 | 		runtimeTypePrefix[r],
 99 | 		strings.ReplaceAll(c.CID, runtimeURLPrefix[r], ""),
100 | 	)
101 | }
102 | 
103 | func sliceNameDockerContainerdWithCgroupfs(c Container, r ContainerRuntime) string {
104 | 	sliceType := [3]string{"", "besteffort/", "burstable/"}
105 | 	runtimeURLPrefix := [2]string{"docker://", "containerd://"}
106 | 	return fmt.Sprintf(
107 | 		"/kubepods/%spod%s/%s",
108 | 		sliceType[c.QS],
109 | 		c.PID,
110 | 		strings.ReplaceAll(c.CID, runtimeURLPrefix[r], ""),
111 | 	)
112 | }
113 | 
114 | func (d *DefaultAllocator) takeCpus(c Container, s *DaemonState) error {
115 | 	if c.QS != Guaranteed {
116 | 		return nil
117 | 	}
118 | 	for i, b := range s.AvailableCPUs {
119 | 		if b.EndCPU-b.StartCPU+1-c.Cpus > 0 {
120 | 			sCPU := b.StartCPU
121 | 			eCPU := b.StartCPU + c.Cpus - 1
122 | 			s.AvailableCPUs[i].StartCPU = eCPU + 1
123 | 			s.Allocated[c.CID] = []ctlplaneapi.CPUBucket{
124 | 				{
125 | 					StartCPU: sCPU,
126 | 					EndCPU:   eCPU,
127 | 				},
128 | 			}
129 | 
130 | 			var t string
131 | 			if sCPU == eCPU {
132 | 				t = strconv.Itoa(sCPU)
133 | 			} else {
134 | 				t = strconv.Itoa(sCPU) + "-" + strconv.Itoa(eCPU)
135 | 			}
136 | 			return d.ctrl.UpdateCPUSet(s.CGroupPath, c, t, ResourceNotSet)
137 | 		}
138 | 	}
139 | 	return DaemonError{
140 | 		ErrorType:    CpusNotAvailable,
141 | 		ErrorMessage: "No available cpus for take request",
142 | 	}
143 | }
144 | 
145 | func (d *DefaultAllocator) freeCpus(c Container, s *DaemonState) error {
146 | 	if c.QS != Guaranteed {
147 | 		return nil
148 | 	}
149 | 
150 | 	v, ok := s.Allocated[c.CID]
151 | 	if !ok {
152 | 		return DaemonError{
153 | 			ErrorType:    ContainerNotFound,
154 | 			ErrorMessage: "Container " + c.CID + " not available for deletion",
155 | 		}
156 | 	}
157 | 
158 | 	delete(s.Allocated, c.CID)
159 | 	for i := 0; i < len(s.AvailableCPUs); i++ {
160 | 		if v[0].EndCPU == s.AvailableCPUs[i].StartCPU-1 {
161 | 			s.AvailableCPUs[i].StartCPU = v[0].StartCPU
162 | 		}
163 | 	}
164 | 	return nil
165 | }
166 | 
167 | func (d *DefaultAllocator) clearCpus(c Container, s *DaemonState) error {
168 | 	var allCpus []ctlplaneapi.CPUBucket
169 | 	allCpus = append(allCpus, s.AvailableCPUs...)
170 | 	for _, allocated := range s.Allocated {
171 | 		allCpus = append(allCpus, allocated...)
172 | 	}
173 | 	cpuSet := CPUSetFromBucketList(allCpus)
174 | 	return d.ctrl.UpdateCPUSet(s.CGroupPath, c, cpuSet.ToCpuString(), ResourceNotSet)
175 | }
176 | 
177 | // UpdateCPUSet updates the cpu set of a given child process.
178 | func (cgc CgroupControllerImpl) UpdateCPUSet(pPath string, c Container, cSet string, memSet string) error {
179 | 	runtimeURLPrefix := [2]string{"docker://", "containerd://"}
180 | 	if cgc.containerRuntime == Kind || cgc.containerRuntime != Kind &&
181 | 		strings.Contains(c.CID, runtimeURLPrefix[cgc.containerRuntime]) {
182 | 		slice := SliceName(c, cgc.containerRuntime, cgc.cgroupDriver)
183 | 		cgc.logger.V(2).Info("allocating cgroup", "cgroupPath", pPath, "slicePath", slice, "cpuSet", cSet, "memSet", memSet)
184 | 
185 | 		if cgroups.Mode() == cgroups.Unified {
186 | 			return cgc.updateCgroupsV2(pPath, slice, cSet, memSet)
187 | 		}
188 | 		return cgc.updateCgroupsV1(pPath, slice, cSet, memSet)
189 | 	}
190 | 
191 | 	return DaemonError{
192 | 		ErrorType:    ConfigurationError,
193 | 		ErrorMessage: "Control Plane configured runtime does not match pod runtime",
194 | 	}
195 | }
196 | 
197 | func (cgc CgroupControllerImpl) updateCgroupsV1(pPath, slice, cSet, memSet string) error {
198 | 	outputPath := path.Join(pPath, "cpuset", slice)
199 | 	if err := utils.ValidatePathInsideBase(outputPath, pPath); err != nil {
200 | 		return err
201 | 	}
202 | 
203 | 	ctrl := cgroups.NewCpuset(pPath)
204 | 	err := ctrl.Update(slice, &specs.LinuxResources{
205 | 		CPU: &specs.LinuxCPU{
206 | 			Cpus: cSet,
207 | 			Mems: memSet,
208 | 		},
209 | 	})
210 | 	// if we set the memory pinning we should enable memory_migrate in cgroups v1
211 | 	if err == nil && memSet != "" {
212 | 		migratePath := path.Join(pPath, "cpuset", slice, "cpuset.memory_migrate")
213 | 		err = os.WriteFile(migratePath, []byte("1"), os.FileMode(0))
214 | 	}
215 | 	return err
216 | }
217 | 
218 | func (cgc CgroupControllerImpl) updateCgroupsV2(pPath, slice, cSet, memSet string) error {
219 | 	outputPath := path.Join(pPath, slice)
220 | 	if err := utils.ValidatePathInsideBase(outputPath, pPath); err != nil {
221 | 		return err
222 | 	}
223 | 
224 | 	res := cgroupsv2.Resources{CPU: &cgroupsv2.CPU{Cpus: cSet, Mems: memSet}}
225 | 	_, err := cgroupsv2.NewManager(pPath, slice, &res)
226 | 	// memory migration in cgroups v2 is always enabled, no need to set it as in cgroupsv1
227 | 	return err
228 | }
229 | 


--------------------------------------------------------------------------------
/pkg/cpudaemon/daemon_allocators_test.go:
--------------------------------------------------------------------------------
  1 | package cpudaemon
  2 | 
  3 | import (
  4 | 	"strconv"
  5 | 	"testing"
  6 | 
  7 | 	"resourcemanagement.controlplane/pkg/ctlplaneapi"
  8 | 
  9 | 	"github.com/go-logr/logr"
 10 | 	"github.com/stretchr/testify/assert"
 11 | 	"github.com/stretchr/testify/mock"
 12 | 	"github.com/stretchr/testify/require"
 13 | )
 14 | 
 15 | type CgroupsMock struct {
 16 | 	mock.Mock
 17 | }
 18 | 
 19 | func (m *CgroupsMock) UpdateCPUSet(pP string, c Container, cpu string, mem string) error {
 20 | 	args := m.Called(pP, c, cpu, mem)
 21 | 	return args.Error(0)
 22 | }
 23 | 
 24 | func newMockedPolicy(m CgroupController) *DefaultAllocator {
 25 | 	return newAllocator(m)
 26 | }
 27 | 
 28 | func takeCPUs(t *testing.T, d *DefaultAllocator, ctrl *CgroupsMock, st *DaemonState, c Container, s int, e int) {
 29 | 	ctrl.On("UpdateCPUSet", st.CGroupPath, c, strconv.Itoa(s)+"-"+strconv.Itoa(e), ResourceNotSet).Return(nil)
 30 | 	// check no error
 31 | 	assert.Nil(t, d.takeCpus(c, st))
 32 | 	// check list of allocated containers
 33 | 	v, ok := st.Allocated[c.CID]
 34 | 	assert.True(t, ok)
 35 | 	assert.Equal(t, []ctlplaneapi.CPUBucket{
 36 | 		{
 37 | 			StartCPU: s,
 38 | 			EndCPU:   e,
 39 | 		},
 40 | 	}, v, "TakeCPU returned unexpected cpu bucket!")
 41 | 	// check list of available cpus
 42 | 	assert.Equal(t,
 43 | 		[]ctlplaneapi.CPUBucket{
 44 | 			{
 45 | 				StartCPU: e + 1,
 46 | 				EndCPU:   127,
 47 | 			},
 48 | 		}, st.AvailableCPUs)
 49 | 	// check stored state
 50 | }
 51 | 
 52 | func deleteContainer(t *testing.T, d *DefaultAllocator, st *DaemonState, c Container, nS int) {
 53 | 	assert.Nil(t, d.freeCpus(c, st))
 54 | 	_, ok := st.Allocated[c.CID]
 55 | 	assert.False(t, ok)
 56 | 	assert.Equal(t,
 57 | 		[]ctlplaneapi.CPUBucket{
 58 | 			{
 59 | 				StartCPU: nS,
 60 | 				EndCPU:   127,
 61 | 			},
 62 | 		}, st.AvailableCPUs)
 63 | }
 64 | 
 65 | func TestDefaultAllocatorTakeCPU(t *testing.T) {
 66 | 	daemonStateFile, tearDown := setupTest()
 67 | 	defer tearDown(t)
 68 | 	mockCtrl := CgroupsMock{}
 69 | 	st, err := newState("testdata/no_state", "testdata/node_info", daemonStateFile)
 70 | 	assert.Nil(t, err)
 71 | 	d := newMockedPolicy(&mockCtrl)
 72 | 	c := Container{
 73 | 		PID:  "test_pod_id1",
 74 | 		CID:  "test_container_iud1",
 75 | 		Cpus: 10,
 76 | 		QS:   Guaranteed,
 77 | 	}
 78 | 	takeCPUs(t, d, &mockCtrl, st, c, 0, 9)
 79 | 	c = Container{
 80 | 		PID:  "test_pod_id2",
 81 | 		CID:  "test_container_iud2",
 82 | 		Cpus: 10,
 83 | 		QS:   Guaranteed,
 84 | 	}
 85 | 	takeCPUs(t, d, &mockCtrl, st, c, 10, 19)
 86 | }
 87 | 
 88 | func TestErrorNoCPUsAvailableOnTake(t *testing.T) {
 89 | 	daemonStateFile, tearDown := setupTest()
 90 | 	defer tearDown(t)
 91 | 	s, err := newState("testdata/no_state", "testdata/node_info", daemonStateFile)
 92 | 	assert.Nil(t, err)
 93 | 
 94 | 	d := NewDefaultAllocator(NewCgroupController(Docker, DriverSystemd, logr.Discard()))
 95 | 	assert.NotNil(t, d)
 96 | 	c := Container{
 97 | 		PID:  "test_pod_id",
 98 | 		CID:  "test_container_id",
 99 | 		Cpus: 129,
100 | 		QS:   Guaranteed,
101 | 	}
102 | 	err = d.takeCpus(c, s)
103 | 	assert.Equal(t, DaemonError{
104 | 		ErrorType:    CpusNotAvailable,
105 | 		ErrorMessage: "No available cpus for take request",
106 | 	}, err)
107 | }
108 | 
109 | func TestErrorWrongRuntimeConfiguration(t *testing.T) {
110 | 	daemonStateFile, tearDown := setupTest()
111 | 	defer tearDown(t)
112 | 	st, err := newState("testdata/no_state", "testdata/node_info", daemonStateFile)
113 | 	assert.Nil(t, err)
114 | 	d := NewDefaultAllocator(NewCgroupController(Docker, DriverSystemd, logr.Discard()))
115 | 	assert.NotNil(t, d)
116 | 	c := Container{
117 | 		PID:  "test_pod_id1",
118 | 		CID:  "containerd://test_container_iud1",
119 | 		Cpus: 10,
120 | 		QS:   Guaranteed,
121 | 	}
122 | 	err = d.takeCpus(c, st)
123 | 	assert.Equal(t, DaemonError{
124 | 		ErrorType:    ConfigurationError,
125 | 		ErrorMessage: "Control Plane configured runtime does not match pod runtime",
126 | 	}, err)
127 | }
128 | func TestTakeAndDeleteContainer(t *testing.T) {
129 | 	daemonStateFile, tearDown := setupTest()
130 | 	defer tearDown(t)
131 | 	mockCtrl := CgroupsMock{}
132 | 	st, err := newState("testdata/no_state", "testdata/node_info", daemonStateFile)
133 | 	assert.Nil(t, err)
134 | 
135 | 	d := newMockedPolicy(&mockCtrl)
136 | 	assert.NotNil(t, d)
137 | 	c := Container{
138 | 		PID:  "test_pod_id1",
139 | 		CID:  "test_container_iud1",
140 | 		Cpus: 10,
141 | 		QS:   Guaranteed,
142 | 	}
143 | 	takeCPUs(t, d, &mockCtrl, st, c, 0, 9)
144 | 	c = Container{
145 | 		PID:  "test_pod_id2",
146 | 		CID:  "test_container_iud2",
147 | 		Cpus: 10,
148 | 		QS:   Guaranteed,
149 | 	}
150 | 	takeCPUs(t, d, &mockCtrl, st, c, 10, 19)
151 | 	deleteContainer(t, d, st, c, 10)
152 | }
153 | 
154 | func TestDefaultAllocatorClearCPU(t *testing.T) {
155 | 	daemonStateFile, tearDown := setupTest()
156 | 	defer tearDown(t)
157 | 	mockCtrl := CgroupsMock{}
158 | 	st, err := newState("testdata/no_state", "testdata/node_info", daemonStateFile)
159 | 	assert.Nil(t, err)
160 | 	d := newMockedPolicy(&mockCtrl)
161 | 	c := Container{
162 | 		PID:  "test_pod_id1",
163 | 		CID:  "test_container_iud1",
164 | 		Cpus: 10,
165 | 		QS:   Guaranteed,
166 | 	}
167 | 	expectedCpuSet, err := CPUSetFromString("0-127")
168 | 	require.Nil(t, err)
169 | 
170 | 	mockCtrl.On("UpdateCPUSet", st.CGroupPath, c, expectedCpuSet.ToCpuString(), ResourceNotSet).Return(nil)
171 | 	assert.Nil(t, d.clearCpus(c, st))
172 | 
173 | 	mockCtrl.AssertExpectations(t)
174 | }
175 | 
176 | func TestSliceNameKind(t *testing.T) {
177 | 	container := Container{CID: "containerd://cid", PID: "pid-01", QS: Burstable}
178 | 	expectedSlice := "kubelet/kubepods/burstable/podpid-01/cid"
179 | 	assert.Equal(t, expectedSlice, SliceName(container, Kind, DriverCgroupfs))
180 | }
181 | 
182 | func TestSliceNameSystemd(t *testing.T) {
183 | 	container := Container{CID: "containerd://cid", PID: "pid-01", QS: Burstable}
184 | 	expectedSlice := "/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podpid_01.slice/cri-containerd-cid.scope"
185 | 	assert.Equal(t, expectedSlice, SliceName(container, ContainerdRunc, DriverSystemd))
186 | }
187 | 
188 | func TestSliceNameCgroupfs(t *testing.T) {
189 | 	container := Container{CID: "docker://cid", PID: "pid-01", QS: Burstable}
190 | 	expectedSlice := "/kubepods/burstable/podpid-01/cid"
191 | 	assert.Equal(t, expectedSlice, SliceName(container, Docker, DriverCgroupfs))
192 | }
193 | 


--------------------------------------------------------------------------------
/pkg/cpudaemon/daemon_cpuset.go:
--------------------------------------------------------------------------------
  1 | package cpudaemon
  2 | 
  3 | import (
  4 | 	"os"
  5 | 	"path/filepath"
  6 | 	"sort"
  7 | 	"strconv"
  8 | 	"strings"
  9 | 
 10 | 	"resourcemanagement.controlplane/pkg/ctlplaneapi"
 11 | )
 12 | 
 13 | func getValues(path string, cpusetFileName string) ([]ctlplaneapi.CPUBucket, error) {
 14 | 	return LoadCpuSet(filepath.Join(path, cpusetFileName))
 15 | }
 16 | 
 17 | // LoadCpuSet loads and parses cpuset from given path.
 18 | func LoadCpuSet(path string) ([]ctlplaneapi.CPUBucket, error) {
 19 | 	cpus, err := os.ReadFile(path)
 20 | 	if err != nil {
 21 | 		return nil, err
 22 | 	}
 23 | 	return LoadCpuSetFromString(string(cpus))
 24 | }
 25 | 
 26 | // LoadCpuSetFromString parses cpuset from given string.
 27 | func LoadCpuSetFromString(cpuSet string) ([]ctlplaneapi.CPUBucket, error) {
 28 | 	res := []ctlplaneapi.CPUBucket{}
 29 | 	cStr := strings.Trim(strings.Trim(cpuSet, " "), "\n")
 30 | 	if cStr == "" {
 31 | 		return res, nil
 32 | 	}
 33 | 	s := strings.Split(cStr, ",")
 34 | 	for _, v := range s {
 35 | 		v = strings.TrimSpace(v)
 36 | 		c := strings.Split(v, "-")
 37 | 		a, err := strconv.Atoi(c[0])
 38 | 		if err != nil {
 39 | 			return []ctlplaneapi.CPUBucket{}, err
 40 | 		}
 41 | 		e := a
 42 | 		if len(c) > 1 {
 43 | 			e, err = strconv.Atoi(c[1])
 44 | 			if err != nil {
 45 | 				return []ctlplaneapi.CPUBucket{}, err
 46 | 			}
 47 | 		}
 48 | 
 49 | 		b := ctlplaneapi.CPUBucket{
 50 | 			StartCPU: a,
 51 | 			EndCPU:   e,
 52 | 		}
 53 | 		res = append(res, b)
 54 | 	}
 55 | 	return res, nil
 56 | }
 57 | 
 58 | // CPUSet represents set of cpuids.
 59 | type CPUSet map[int]struct{}
 60 | 
 61 | func (c CPUSet) String() string {
 62 | 	return c.ToCpuString()
 63 | }
 64 | 
 65 | // CPUSetFromBucketList creates CPUSet based on list of ctlplaneapi.CPUBucket.
 66 | func CPUSetFromBucketList(buckets []ctlplaneapi.CPUBucket) CPUSet {
 67 | 	bucketSet := make(CPUSet)
 68 | 	for _, bucket := range buckets {
 69 | 		for cpu := bucket.StartCPU; cpu <= bucket.EndCPU; cpu++ {
 70 | 			bucketSet[cpu] = struct{}{}
 71 | 		}
 72 | 	}
 73 | 	return bucketSet
 74 | }
 75 | 
 76 | // CPUSetFromString creates CPUSet based on cgroup cpuset string.
 77 | func CPUSetFromString(cpuSetStr string) (CPUSet, error) {
 78 | 	buckets, err := LoadCpuSetFromString(cpuSetStr)
 79 | 	if err != nil {
 80 | 		return CPUSet{}, err
 81 | 	}
 82 | 	return CPUSetFromBucketList(buckets), nil
 83 | }
 84 | 
 85 | // Contains checks if given cpuid exists in CPUSet.
 86 | func (c CPUSet) Contains(cpu int) bool {
 87 | 	_, ok := c[cpu]
 88 | 	return ok
 89 | }
 90 | 
 91 | // Add adds given cpuid to CPUSet. If it's already added this is noop.
 92 | func (c CPUSet) Add(cpu int) {
 93 | 	c[cpu] = struct{}{}
 94 | }
 95 | 
 96 | // Remove removes given cpuid from CPUSet. If CPUSet does not contain given cpuid this is noop.
 97 | func (c CPUSet) Remove(cpu int) {
 98 | 	delete(c, cpu)
 99 | }
100 | 
101 | // ToBucketList converts CPUSet back to CPUBucket list, sorted by cpuid.
102 | func (c CPUSet) ToBucketList() []ctlplaneapi.CPUBucket {
103 | 	newBuckets := make([]ctlplaneapi.CPUBucket, 0, c.Count())
104 | 	for _, cpu := range c.Sorted() {
105 | 		newBuckets = append(newBuckets, ctlplaneapi.CPUBucket{StartCPU: cpu, EndCPU: cpu})
106 | 	}
107 | 	return newBuckets
108 | }
109 | 
110 | // Merge sums all cpus from two sets.
111 | func (c CPUSet) Merge(other CPUSet) CPUSet {
112 | 	for cpu := range other {
113 | 		c[cpu] = struct{}{}
114 | 	}
115 | 	return c
116 | }
117 | 
118 | // RemoveAll removes all cpus that exist in other.
119 | func (c CPUSet) RemoveAll(other CPUSet) CPUSet {
120 | 	for cpu := range other {
121 | 		delete(c, cpu)
122 | 	}
123 | 	return c
124 | }
125 | 
126 | // Count returns count of cpus in CPUSet.
127 | func (c CPUSet) Count() int {
128 | 	return len(c)
129 | }
130 | 
131 | // Clone returns new CPUSet with same content.
132 | func (c CPUSet) Clone() CPUSet {
133 | 	o := CPUSet{}
134 | 	for cpu := range c {
135 | 		o[cpu] = struct{}{}
136 | 	}
137 | 	return o
138 | }
139 | 
140 | // Sorted returns sorted list of cpu ids.
141 | func (c CPUSet) Sorted() []int {
142 | 	keys := make([]int, 0, len(c))
143 | 	for k := range c {
144 | 		keys = append(keys, k)
145 | 	}
146 | 	sort.Ints(keys)
147 | 	return keys
148 | }
149 | 
150 | // ToCpuString converts CPUSet to cgroup cpuset compatible string, sorted by cpuid.
151 | func (c CPUSet) ToCpuString() string {
152 | 	if c.Count() == 0 {
153 | 		return ""
154 | 	}
155 | 	b := strings.Builder{}
156 | 	for _, cpu := range c.Sorted() {
157 | 		b.WriteString(strconv.Itoa(cpu))
158 | 		b.WriteString(",")
159 | 	}
160 | 	result := b.String()
161 | 	return result[:len(result)-1]
162 | }
163 | 


--------------------------------------------------------------------------------
/pkg/cpudaemon/daemon_cpuset_test.go:
--------------------------------------------------------------------------------
  1 | package cpudaemon
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"resourcemanagement.controlplane/pkg/ctlplaneapi"
  7 | 
  8 | 	"github.com/stretchr/testify/assert"
  9 | )
 10 | 
 11 | func TestGetCPUSetThrowOnMissingGroup(t *testing.T) {
 12 | 	p := "testdata/"
 13 | 	b, e := getValues(p, "cpuset.cpus")
 14 | 	assert.Nil(t, b)
 15 | 	assert.NotNil(t, e)
 16 | }
 17 | 
 18 | func TestGetCPUSet(t *testing.T) {
 19 | 	p := "testdata/no_state"
 20 | 	b, e := getValues(p, "cpuset.cpus")
 21 | 	assert.Nil(t, e)
 22 | 	assert.Equal(t, []ctlplaneapi.CPUBucket{
 23 | 		{
 24 | 			StartCPU: 0,
 25 | 			EndCPU:   127,
 26 | 		},
 27 | 	}, b, "Missmatch to expected get cpu value")
 28 | }
 29 | 
 30 | func TestCPUSetFromBuckets(t *testing.T) {
 31 | 	buckets := []ctlplaneapi.CPUBucket{
 32 | 		{StartCPU: 1, EndCPU: 1},
 33 | 		{StartCPU: 8, EndCPU: 8},
 34 | 		{StartCPU: 5, EndCPU: 5},
 35 | 	}
 36 | 	expectedSet := []int{1, 5, 8}
 37 | 
 38 | 	assert.Equal(t, expectedSet, CPUSetFromBucketList(buckets).Sorted())
 39 | }
 40 | 
 41 | func TestCPUSetFromString(t *testing.T) {
 42 | 	cpuSet, err := CPUSetFromString("1,2-5,7")
 43 | 	assert.Nil(t, err)
 44 | 	assert.Equal(t, []int{1, 2, 3, 4, 5, 7}, cpuSet.Sorted())
 45 | }
 46 | 
 47 | func TestCPUSetContains(t *testing.T) {
 48 | 	cpuSet, err := CPUSetFromString("1,3,6")
 49 | 	assert.Nil(t, err)
 50 | 
 51 | 	assert.True(t, cpuSet.Contains(1))
 52 | 	assert.False(t, cpuSet.Contains(2))
 53 | }
 54 | 
 55 | func TestCPUSetAdd(t *testing.T) {
 56 | 	cpuSet := CPUSet{}
 57 | 	cpuSet.Add(1)
 58 | 
 59 | 	assert.Equal(t, []int{1}, cpuSet.Sorted())
 60 | }
 61 | 
 62 | func TestCPUSetRemove(t *testing.T) {
 63 | 	cpuSet := CPUSet{}
 64 | 	cpuSet.Add(1)
 65 | 	cpuSet.Add(2)
 66 | 	cpuSet.Remove(1)
 67 | 
 68 | 	assert.Equal(t, []int{2}, cpuSet.Sorted())
 69 | }
 70 | 
 71 | func TestCPUSetToBucketList(t *testing.T) {
 72 | 	cpuSet := CPUSet{}
 73 | 	cpuSet.Add(1)
 74 | 	cpuSet.Add(3)
 75 | 
 76 | 	assert.Equal(t, []ctlplaneapi.CPUBucket{{StartCPU: 1, EndCPU: 1}, {StartCPU: 3, EndCPU: 3}}, cpuSet.ToBucketList())
 77 | }
 78 | 
 79 | func TestCPUSetMerge(t *testing.T) {
 80 | 	fst, err := CPUSetFromString("1-5")
 81 | 	assert.Nil(t, err)
 82 | 	snd, err := CPUSetFromString("4-8")
 83 | 	assert.Nil(t, err)
 84 | 
 85 | 	merged := fst.Merge(snd)
 86 | 	assert.Equal(t, []int{1, 2, 3, 4, 5, 6, 7, 8}, merged.Sorted())
 87 | 	assert.Equal(t, fst, merged) // merge is in-place
 88 | }
 89 | 
 90 | func TestCPUSetRemoveAll(t *testing.T) {
 91 | 	fst, err := CPUSetFromString("1-5")
 92 | 	assert.Nil(t, err)
 93 | 	snd, err := CPUSetFromString("4-8")
 94 | 	assert.Nil(t, err)
 95 | 
 96 | 	removed := fst.RemoveAll(snd)
 97 | 	assert.Equal(t, []int{1, 2, 3}, removed.Sorted())
 98 | 	assert.Equal(t, fst, removed) // remove is in-place
 99 | }
100 | 
101 | func TestCPUSetCount(t *testing.T) {
102 | 	c := CPUSet{}
103 | 	assert.Equal(t, 0, c.Count())
104 | 	c.Add(5)
105 | 	assert.Equal(t, 1, c.Count())
106 | }
107 | 
108 | func TestCPUSetClone(t *testing.T) {
109 | 	c := CPUSet{}
110 | 	c2 := c.Clone()
111 | 	c2.Add(5)
112 | 
113 | 	assert.Equal(t, 0, c.Count())
114 | 	assert.Equal(t, 1, c2.Count())
115 | }
116 | 
117 | func TestCPUSetSorted(t *testing.T) {
118 | 	c, err := CPUSetFromString("7,4,124,8,1,0")
119 | 	assert.Nil(t, err)
120 | 
121 | 	assert.Equal(t, []int{0, 1, 4, 7, 8, 124}, c.Sorted())
122 | }
123 | 
124 | func TestCPUSetSortedEmpty(t *testing.T) {
125 | 	assert.Equal(t, []int{}, CPUSet{}.Sorted())
126 | }
127 | 
128 | func TestCPUSetToCpuString(t *testing.T) {
129 | 	c, err := CPUSetFromString("7,4,124,8,1,0")
130 | 	assert.Nil(t, err)
131 | 
132 | 	assert.Equal(t, "0,1,4,7,8,124", c.ToCpuString())
133 | }
134 | 
135 | func TestCPUSetToCpuStringEmpty(t *testing.T) {
136 | 	assert.Equal(t, "", CPUSet{}.ToCpuString())
137 | }
138 | 
139 | func TestCPUSetFromStringWithNewline(t *testing.T) {
140 | 	fst, err := CPUSetFromString("\n")
141 | 	assert.Nil(t, err)
142 | 
143 | 	assert.Equal(t, []int{}, fst.Sorted())
144 | }
145 | 


--------------------------------------------------------------------------------
/pkg/cpudaemon/daemon_fuzz_test.go:
--------------------------------------------------------------------------------
  1 | package cpudaemon
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"os"
  7 | 	"path"
  8 | 	"strings"
  9 | 	"testing"
 10 | 
 11 | 	"github.com/go-logr/logr"
 12 | 	"github.com/stretchr/testify/mock"
 13 | 	"github.com/stretchr/testify/require"
 14 | 	"k8s.io/apimachinery/pkg/api/resource"
 15 | 	"resourcemanagement.controlplane/pkg/ctlplaneapi"
 16 | )
 17 | 
 18 | func updatePodRequestFromCreate(t *testing.T, base *ctlplaneapi.CreatePodRequest, numDeletes, numUpdates uint) *ctlplaneapi.UpdatePodRequest {
 19 | 	req := ctlplaneapi.UpdatePodRequest{
 20 | 		PodId:     base.PodId,
 21 | 		Resources: &ctlplaneapi.ResourceInfo{},
 22 | 	}
 23 | 	trm := resource.Quantity{}
 24 | 	tlm := resource.Quantity{}
 25 | 	for i, container := range base.Containers {
 26 | 		c := ctlplaneapi.ContainerInfo{
 27 | 			ContainerId:   container.ContainerId,
 28 | 			ContainerName: container.ContainerName,
 29 | 			Resources: &ctlplaneapi.ResourceInfo{
 30 | 				RequestedCpus:   container.Resources.RequestedCpus,
 31 | 				LimitCpus:       container.Resources.LimitCpus,
 32 | 				RequestedMemory: container.Resources.RequestedMemory,
 33 | 				LimitMemory:     container.Resources.LimitMemory,
 34 | 			},
 35 | 		}
 36 | 		crm := resource.Quantity{}
 37 | 		clm := resource.Quantity{}
 38 | 		err := crm.Unmarshal(c.Resources.RequestedMemory)
 39 | 		if err != nil {
 40 | 			t.Fatal("Error Unmarshaling container memory request")
 41 | 		}
 42 | 		err = clm.Unmarshal(c.Resources.LimitMemory)
 43 | 		if err != nil {
 44 | 			t.Fatal("Error Unmarshaling container memory limit")
 45 | 		}
 46 | 		iu := uint(i)
 47 | 		if iu < numDeletes {
 48 | 			continue
 49 | 		}
 50 | 		if iu < numDeletes+numUpdates {
 51 | 			c.Resources.RequestedCpus += 1
 52 | 			c.Resources.LimitCpus += 1
 53 | 		}
 54 | 		req.Containers = append(req.Containers, &c)
 55 | 		req.Resources.LimitCpus += c.Resources.LimitCpus
 56 | 		trm.Add(crm)
 57 | 		req.Resources.RequestedCpus += c.Resources.RequestedCpus
 58 | 		tlm.Add(clm)
 59 | 	}
 60 | 	req.Resources.RequestedMemory, _ = trm.Marshal()
 61 | 	req.Resources.LimitMemory, _ = tlm.Marshal()
 62 | 	return &req
 63 | }
 64 | 
 65 | func createPodRequestForFuzzing(
 66 | 	pid, podName, namespace, cid, containerName string,
 67 | 	numContainers uint,
 68 | 	reqCpu, limCpu, reqMem, limMem int32,
 69 | ) *ctlplaneapi.CreatePodRequest {
 70 | 	numContainers32 := int32(numContainers)
 71 | 	req := ctlplaneapi.CreatePodRequest{
 72 | 		PodId:        pid,
 73 | 		PodName:      podName,
 74 | 		PodNamespace: namespace,
 75 | 		Resources: &ctlplaneapi.ResourceInfo{
 76 | 			RequestedCpus:   reqCpu * numContainers32,
 77 | 			LimitCpus:       limCpu * numContainers32,
 78 | 			RequestedMemory: newQuantityAsBytes(int64(reqMem * numContainers32)),
 79 | 			LimitMemory:     newQuantityAsBytes(int64(limMem * numContainers32)),
 80 | 		},
 81 | 		Containers: []*ctlplaneapi.ContainerInfo{},
 82 | 	}
 83 | 	for i := uint(0); i < numContainers; i++ {
 84 | 		req.Containers = append(req.Containers, &ctlplaneapi.ContainerInfo{
 85 | 			ContainerId:   fmt.Sprintf("%s-%d", cid, i),
 86 | 			ContainerName: fmt.Sprintf("%s-name-%d", containerName, i),
 87 | 			Resources: &ctlplaneapi.ResourceInfo{
 88 | 				RequestedCpus:   reqCpu,
 89 | 				LimitCpus:       limCpu,
 90 | 				RequestedMemory: newQuantityAsBytes(int64(reqMem)),
 91 | 				LimitMemory:     newQuantityAsBytes(int64(limMem)),
 92 | 			},
 93 | 		})
 94 | 	}
 95 | 	return &req
 96 | }
 97 | 
 98 | func FuzzCreatePod(f *testing.F) {
 99 | 	f.Fuzz(func(t *testing.T, pid, podName, namespace, cid, containerName string, numContainers uint, reqCpu, limCpu, reqMem, limMem int32) {
100 | 		numContainers %= 100
101 | 		dir := t.TempDir()
102 | 		daemonStateFile := path.Join(dir, "daemon.state")
103 | 		defer os.Remove(daemonStateFile)
104 | 
105 | 		m := MockedPolicy{}
106 | 		d, err := New("testdata/no_state", "testdata/node_info", daemonStateFile, &m, logr.Discard())
107 | 		require.Nil(t, err)
108 | 
109 | 		m.On("AssignContainer", mock.Anything, &d.state).Return(nil).Run(func(args mock.Arguments) {
110 | 			c := args.Get(0).(Container)
111 | 			require.Equal(t, int(reqCpu), c.Cpus)
112 | 			if !strings.HasPrefix(c.CID, cid) {
113 | 				require.Fail(t, "CID does not have proper prefix", "cid", c.CID, "prefix", cid)
114 | 			}
115 | 			if !strings.HasPrefix(c.PID, pid) {
116 | 				require.Fail(t, "PID does not have proper prefix", "pid", c.PID, "prefix", pid)
117 | 			}
118 | 			if !strings.HasPrefix(c.Name, containerName) {
119 | 				require.Fail(t, "container name does not have proper prefix", "name", c.Name, "prefix", containerName)
120 | 			}
121 | 		})
122 | 
123 | 		req := createPodRequestForFuzzing(pid, podName, namespace, cid, containerName, numContainers, reqCpu, limCpu, reqMem, limMem)
124 | 
125 | 		resp, err := d.CreatePod(req)
126 | 
127 | 		if err != nil {
128 | 			derr := DaemonError{}
129 | 			if !errors.As(err, &derr) {
130 | 				t.Fatal("Error is not of type DaemonError")
131 | 			}
132 | 			if derr.ErrorType != PodSpecError {
133 | 				t.Fatal("Error is of different type than PodSpecError")
134 | 			}
135 | 		} else {
136 | 			require.Equal(t, numContainers, uint(len(resp.ContainerResources)))
137 | 			m.AssertNumberOfCalls(t, "AssignContainer", int(numContainers))
138 | 		}
139 | 	})
140 | }
141 | 
142 | func FuzzDeletePod(f *testing.F) {
143 | 	f.Fuzz(func(t *testing.T, pid string, podInState bool) {
144 | 		dir := t.TempDir()
145 | 		daemonStateFile := path.Join(dir, "daemon.state")
146 | 		defer os.Remove(daemonStateFile)
147 | 
148 | 		m := MockedPolicy{}
149 | 		d, err := New("testdata/no_state", "testdata/node_info", daemonStateFile, &m, logr.Discard())
150 | 		require.Nil(t, err)
151 | 
152 | 		if pid != "" && podInState {
153 | 			d.state.Pods[pid] = PodMetadata{
154 | 				PID:       "pid",
155 | 				Name:      "name",
156 | 				Namespace: "namespace",
157 | 				Containers: []Container{{
158 | 					CID:  "cid",
159 | 					PID:  "pid",
160 | 					Name: "name",
161 | 					Cpus: 3,
162 | 				}},
163 | 			}
164 | 			m.On("DeleteContainer", d.state.Pods[pid].Containers[0], &d.state).Return(nil).Once()
165 | 		}
166 | 
167 | 		req := ctlplaneapi.DeletePodRequest{PodId: pid}
168 | 		err = d.DeletePod(&req)
169 | 
170 | 		if err != nil {
171 | 			derr := DaemonError{}
172 | 			if !errors.As(err, &derr) {
173 | 				t.Fatal("Error is not of type DaemonError")
174 | 			}
175 | 			if derr.ErrorType == PodSpecError {
176 | 				return
177 | 			}
178 | 			if podInState {
179 | 				t.Fatal("Pod is in state and error is of different type than PodSpecError")
180 | 			}
181 | 		}
182 | 	})
183 | }
184 | 
185 | func FuzzUpdatePod(f *testing.F) {
186 | 	f.Fuzz(func(
187 | 		t *testing.T, pid, podName, namespace, cid, containerName string, numContainers uint,
188 | 		reqCpu, limCpu, reqMem, limMem int32, numDel uint, numUpdate uint,
189 | 	) {
190 | 		numContainers %= 100
191 | 		numDel %= 10
192 | 		numUpdate %= 10
193 | 
194 | 		if numDel+numUpdate > numContainers || numDel == numContainers {
195 | 			return
196 | 		}
197 | 
198 | 		dir := t.TempDir()
199 | 		daemonStateFile := path.Join(dir, "daemon.state")
200 | 		defer os.Remove(daemonStateFile)
201 | 
202 | 		m := MockedPolicy{}
203 | 
204 | 		d, err := New("testdata/no_state", "testdata/node_info", daemonStateFile, &m, logr.Discard())
205 | 		require.Nil(t, err)
206 | 
207 | 		m.On("DeleteContainer", mock.Anything, &d.state).Return(nil)
208 | 		m.On("AssignContainer", mock.Anything, &d.state).Return(nil).Run(func(args mock.Arguments) {
209 | 			c := args.Get(0).(Container)
210 | 			rc := int(reqCpu)
211 | 			if c.Cpus != rc && c.Cpus != rc+1 {
212 | 				require.Fail(t, "Wrong number of CPUs", "cpus", c.Cpus, "expected", []int{rc, rc + 1})
213 | 			}
214 | 			if !strings.HasPrefix(c.CID, cid) {
215 | 				require.Fail(t, "CID does not have proper prefix", "cid", c.CID, "prefix", cid)
216 | 			}
217 | 			if !strings.HasPrefix(c.PID, pid) {
218 | 				require.Fail(t, "PID does not have proper prefix", "pid", c.PID, "prefix", pid)
219 | 			}
220 | 			if !strings.HasPrefix(c.Name, containerName) {
221 | 				require.Fail(t, "container name does not have proper prefix", "name", c.Name, "prefix", containerName)
222 | 			}
223 | 		})
224 | 
225 | 		req := createPodRequestForFuzzing(pid, podName, namespace, cid, containerName, numContainers, reqCpu, limCpu, reqMem, limMem)
226 | 		_, err = d.CreatePod(req)
227 | 
228 | 		// We add pod and want to continue only if it was added successfully
229 | 		if err != nil {
230 | 			return
231 | 		}
232 | 
233 | 		reqUpdate := updatePodRequestFromCreate(t, req, numDel, numUpdate)
234 | 		t.Log(reqUpdate)
235 | 		resp, err := d.UpdatePod(reqUpdate)
236 | 
237 | 		require.Nil(t, err)
238 | 		require.Equal(t, numUpdate, uint(len(resp.ContainerResources)))
239 | 		m.AssertNumberOfCalls(t, "DeleteContainer", int(numDel+numUpdate))
240 | 		m.AssertNumberOfCalls(t, "AssignContainer", int(numContainers+numUpdate))
241 | 	})
242 | }
243 | 


--------------------------------------------------------------------------------
/pkg/cpudaemon/daemon_numa_allocator.go:
--------------------------------------------------------------------------------
  1 | package cpudaemon
  2 | 
  3 | import (
  4 | 	"strconv"
  5 | 	"strings"
  6 | 
  7 | 	"resourcemanagement.controlplane/pkg/ctlplaneapi"
  8 | 	"resourcemanagement.controlplane/pkg/numautils"
  9 | )
 10 | 
 11 | // NumaAwareAllocator allocates cpus based on node numa topology. The topology is represented as tree
 12 | // whose leafs are cpus and nodes are next levels of topology organization. For each request alllocator
 13 | // find such allocation, that will minimize the topology distance between cpus. In our case the topology
 14 | // distance between n leafs is defined as maximal path length from any of those leafs to the nearest
 15 | // common predecessor.
 16 | type NumaAwareAllocator struct {
 17 | 	ctrl          CgroupController
 18 | 	memoryPinning bool
 19 | }
 20 | 
 21 | var _ Allocator = &NumaAwareAllocator{}
 22 | 
 23 | // NewNumaAwareAllocator Creates new numa-aware allocator with default cgroup controller.
 24 | func NewNumaAwareAllocator(cgroupController CgroupController, memoryPinning bool) *NumaAwareAllocator {
 25 | 	return &NumaAwareAllocator{
 26 | 		ctrl:          cgroupController,
 27 | 		memoryPinning: memoryPinning,
 28 | 	}
 29 | }
 30 | 
 31 | func getMemoryPinningIfEnabledFromCpuSet(memoryPinning bool, topology *numautils.NumaTopology, cpus CPUSet) string {
 32 | 	if !memoryPinning {
 33 | 		return ""
 34 | 	}
 35 | 
 36 | 	return getMemoryPinning(topology, cpus.Sorted())
 37 | }
 38 | 
 39 | func getMemoryPinningIfEnabled(memoryPinning bool, topology *numautils.NumaTopology, cpuIds []int) string {
 40 | 	if !memoryPinning {
 41 | 		return ""
 42 | 	}
 43 | 
 44 | 	return getMemoryPinning(topology, cpuIds)
 45 | }
 46 | 
 47 | func getMemoryPinning(topology *numautils.NumaTopology, cpuIds []int) string {
 48 | 	nodesSet := map[int]struct{}{}
 49 | 
 50 | 	for _, cpu := range cpuIds {
 51 | 		nodesSet[topology.CpuInformation[cpu].Node] = struct{}{}
 52 | 	}
 53 | 
 54 | 	nodesList := make([]string, 0, len(nodesSet))
 55 | 	for k := range nodesSet {
 56 | 		nodesList = append(nodesList, strconv.Itoa(k))
 57 | 	}
 58 | 	return strings.Join(nodesList, ",")
 59 | }
 60 | 
 61 | func (d *NumaAwareAllocator) takeCpus(c Container, s *DaemonState) error {
 62 | 	if c.QS != Guaranteed {
 63 | 		return nil
 64 | 	}
 65 | 
 66 | 	cpuIds, err := s.Topology.Take(c.Cpus)
 67 | 	if err != nil {
 68 | 		return DaemonError{
 69 | 			ErrorType:    CpusNotAvailable,
 70 | 			ErrorMessage: err.Error(),
 71 | 		}
 72 | 	}
 73 | 
 74 | 	allocatedList := s.Allocated[c.CID]
 75 | 	cpuSetList := make([]string, 0, c.Cpus)
 76 | 	for _, cpuID := range cpuIds {
 77 | 		allocatedList = append(allocatedList, ctlplaneapi.CPUBucket{
 78 | 			StartCPU: cpuID,
 79 | 			EndCPU:   cpuID,
 80 | 		})
 81 | 		cpuSetList = append(cpuSetList, strconv.Itoa(cpuID))
 82 | 	}
 83 | 	s.Allocated[c.CID] = allocatedList
 84 | 
 85 | 	return d.ctrl.UpdateCPUSet(
 86 | 		s.CGroupPath,
 87 | 		c,
 88 | 		strings.Join(cpuSetList, ","),
 89 | 		getMemoryPinningIfEnabled(d.memoryPinning, &s.Topology, cpuIds),
 90 | 	)
 91 | }
 92 | 
 93 | func (d *NumaAwareAllocator) freeCpus(c Container, s *DaemonState) error {
 94 | 	if c.QS != Guaranteed {
 95 | 		return nil
 96 | 	}
 97 | 
 98 | 	v, ok := s.Allocated[c.CID]
 99 | 	if !ok {
100 | 		return DaemonError{
101 | 			ErrorType:    ContainerNotFound,
102 | 			ErrorMessage: "Container " + c.CID + " not available for deletion",
103 | 		}
104 | 	}
105 | 
106 | 	delete(s.Allocated, c.CID)
107 | 	for _, cpuBucket := range v {
108 | 		for cpu := cpuBucket.StartCPU; cpu <= cpuBucket.EndCPU; cpu++ {
109 | 			err := s.Topology.Return(cpu)
110 | 			if err != nil {
111 | 				return DaemonError{
112 | 					ErrorType:    CpusNotAvailable,
113 | 					ErrorMessage: err.Error(),
114 | 				}
115 | 			}
116 | 		}
117 | 	}
118 | 	return nil
119 | }
120 | 
121 | func (d *NumaAwareAllocator) clearCpus(c Container, s *DaemonState) error {
122 | 	allCpus := s.Topology.Topology.GetLeafs()
123 | 	cpuSet := CPUSet{}
124 | 	for _, leaf := range allCpus {
125 | 		cpuSet.Add(leaf.Value)
126 | 	}
127 | 
128 | 	return d.ctrl.UpdateCPUSet(
129 | 		s.CGroupPath,
130 | 		c,
131 | 		cpuSet.ToCpuString(),
132 | 		getMemoryPinningIfEnabledFromCpuSet(d.memoryPinning, &s.Topology, cpuSet),
133 | 	)
134 | }
135 | 


--------------------------------------------------------------------------------
/pkg/cpudaemon/daemon_numa_allocator_test.go:
--------------------------------------------------------------------------------
  1 | package cpudaemon
  2 | 
  3 | import (
  4 | 	"os"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/stretchr/testify/assert"
  8 | 	"github.com/stretchr/testify/require"
  9 | )
 10 | 
 11 | func newMockedNumaAllocator() *NumaAwareAllocator {
 12 | 	cgroupMock := CgroupsMock{}
 13 | 	allocator := &NumaAwareAllocator{
 14 | 		ctrl:          &cgroupMock,
 15 | 		memoryPinning: true,
 16 | 	}
 17 | 	return allocator
 18 | }
 19 | 
 20 | func TestNumaTakeCpuWithoutMemoryPinning(t *testing.T) {
 21 | 	dir, err := os.MkdirTemp("", "test_cpu")
 22 | 	require.Nil(t, err)
 23 | 	defer os.RemoveAll(dir)
 24 | 
 25 | 	s := getTestDaemonState(dir, 2)
 26 | 	s.Topology = oneLevelTopology(2)
 27 | 
 28 | 	allocator := newMockedNumaAllocator()
 29 | 	allocator.memoryPinning = false
 30 | 	container := baseContainer(1)
 31 | 	container.Cpus = 2
 32 | 
 33 | 	mock := allocator.ctrl.(*CgroupsMock)
 34 | 	mock.On("UpdateCPUSet", s.CGroupPath, container, "0,1", "").Return(nil)
 35 | 
 36 | 	assert.Nil(t, allocator.takeCpus(container, s))
 37 | 
 38 | 	assertCpuState(t, s, &container, "0,1")
 39 | 	mock.AssertExpectations(t)
 40 | }
 41 | 
 42 | func TestNumaTakeCpu(t *testing.T) {
 43 | 	dir, err := os.MkdirTemp("", "test_cpu")
 44 | 	require.Nil(t, err)
 45 | 	defer os.RemoveAll(dir)
 46 | 
 47 | 	s := getTestDaemonState(dir, 2)
 48 | 	s.Topology = oneLevelTopology(2)
 49 | 
 50 | 	allocator := newMockedNumaAllocator()
 51 | 	container := baseContainer(1)
 52 | 	container.Cpus = 2
 53 | 
 54 | 	mock := allocator.ctrl.(*CgroupsMock)
 55 | 	mock.On("UpdateCPUSet", s.CGroupPath, container, "0,1", "0").Return(nil)
 56 | 
 57 | 	assert.Nil(t, allocator.takeCpus(container, s))
 58 | 
 59 | 	assertCpuState(t, s, &container, "0,1")
 60 | 	mock.AssertExpectations(t)
 61 | }
 62 | 
 63 | func TestNumaTakeCpuFailsIfTooMuchCpus(t *testing.T) {
 64 | 	dir, err := os.MkdirTemp("", "test_cpu")
 65 | 	require.Nil(t, err)
 66 | 	defer os.RemoveAll(dir)
 67 | 
 68 | 	s := getTestDaemonState(dir, 2)
 69 | 	s.Topology = oneLevelTopology(2)
 70 | 
 71 | 	allocator := newMockedNumaAllocator()
 72 | 	container := baseContainer(1)
 73 | 	container.Cpus = 3
 74 | 
 75 | 	assert.NotNil(t, allocator.takeCpus(container, s))
 76 | }
 77 | 
 78 | func TestNumaFreeCpu(t *testing.T) {
 79 | 	dir, err := os.MkdirTemp("", "test_cpu")
 80 | 	require.Nil(t, err)
 81 | 	defer os.RemoveAll(dir)
 82 | 
 83 | 	s := getTestDaemonState(dir, 2)
 84 | 	s.Topology = oneLevelTopology(2)
 85 | 
 86 | 	allocator := newMockedNumaAllocator()
 87 | 
 88 | 	container := baseContainer(1)
 89 | 
 90 | 	mock := allocator.ctrl.(*CgroupsMock)
 91 | 	mock.On("UpdateCPUSet", s.CGroupPath, container, "0", "0").Return(nil)
 92 | 
 93 | 	assert.Nil(t, allocator.takeCpus(container, s))
 94 | 	assert.Contains(t, s.Allocated, container.CID)
 95 | 
 96 | 	assert.Nil(t, allocator.freeCpus(container, s))
 97 | 	assert.NotContains(t, s.Allocated, container.CID)
 98 | 	mock.AssertExpectations(t)
 99 | }
100 | 
101 | func TestNumaClearCpu(t *testing.T) {
102 | 	dir, err := os.MkdirTemp("", "test_cpu")
103 | 	require.Nil(t, err)
104 | 	defer os.RemoveAll(dir)
105 | 
106 | 	s := getTestDaemonState(dir, 2)
107 | 	s.Topology = oneLevelTopology(2)
108 | 
109 | 	allocator := newMockedNumaAllocator()
110 | 	container := baseContainer(1)
111 | 	container.Cpus = 2
112 | 
113 | 	mock := allocator.ctrl.(*CgroupsMock)
114 | 	mock.On("UpdateCPUSet", s.CGroupPath, container, "0,1", "0").Return(nil)
115 | 
116 | 	assert.Nil(t, allocator.clearCpus(container, s))
117 | 
118 | 	mock.AssertExpectations(t)
119 | }
120 | 


--------------------------------------------------------------------------------
/pkg/cpudaemon/daemon_numa_namespace_allocator.go:
--------------------------------------------------------------------------------
  1 | package cpudaemon
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"strconv"
  7 | 	"strings"
  8 | 
  9 | 	"github.com/go-logr/logr"
 10 | 	"resourcemanagement.controlplane/pkg/ctlplaneapi"
 11 | 	"resourcemanagement.controlplane/pkg/numautils"
 12 | )
 13 | 
 14 | var ErrNamespaceNotEmpty = errors.New("namespace")
 15 | var ErrNotEnoughSpaceInBucket = errors.New("not enough free cpus in namespace bucket")
 16 | var ErrContainerNotFound = errors.New("cannot find container")
 17 | var ErrBucketNotFound = errors.New("namespace cpu bucket not found")
 18 | 
 19 | // NumaPerNamespaceAllocator allocates cpus in N isolated sub-pools, based on namespace. Sub-pools are
 20 | // created by splitting topology tree leafs into N buckets. Cpus in a bucket are later assigned
 21 | // sequentially to new containers. Only one guaranteed container can be pinned to each cpu, but each
 22 | // non-guaranteed container is pinned to all cpus in sub-pool.
 23 | type NumaPerNamespaceAllocator struct {
 24 | 	ctrl                  CgroupController
 25 | 	logger                logr.Logger
 26 | 	memoryPinning         bool
 27 | 	exclusive             bool
 28 | 	NumBuckets            int
 29 | 	NamespaceToBucket     map[string]int
 30 | 	BucketToNumContainers map[int]int
 31 | 	globalBucket          int
 32 | }
 33 | 
 34 | var _ Allocator = &NumaPerNamespaceAllocator{}
 35 | 
 36 | // NewNumaPerNamespaceAllocator initializes all fields of the allocator, uses default cgroup controller.
 37 | func NewNumaPerNamespaceAllocator(
 38 | 	numNamespaces int,
 39 | 	cgroupController CgroupController,
 40 | 	exclusive bool,
 41 | 	memoryPinning bool,
 42 | 	logger logr.Logger,
 43 | ) *NumaPerNamespaceAllocator {
 44 | 	return &NumaPerNamespaceAllocator{
 45 | 		ctrl:                  cgroupController,
 46 | 		logger:                logger.WithName("numaPerNamespaceAllocator"),
 47 | 		NumBuckets:            numNamespaces,
 48 | 		NamespaceToBucket:     make(map[string]int),
 49 | 		BucketToNumContainers: make(map[int]int),
 50 | 		exclusive:             exclusive,
 51 | 		memoryPinning:         memoryPinning,
 52 | 		globalBucket:          0,
 53 | 	}
 54 | }
 55 | 
 56 | // getBucket returns list of cpus associated with given namespace.
 57 | func (d *NumaPerNamespaceAllocator) getBucket(s *DaemonState, namespace string) ([]*numautils.TopologyNode, error) {
 58 | 	leafs := s.Topology.Topology.GetLeafs()
 59 | 	bucketSize := len(leafs) / d.NumBuckets
 60 | 
 61 | 	namespaceBucket, ok := d.NamespaceToBucket[namespace]
 62 | 
 63 | 	if !ok {
 64 | 		return []*numautils.TopologyNode{}, ErrBucketNotFound
 65 | 	}
 66 | 
 67 | 	if namespaceBucket == d.NumBuckets-1 { // it is last bucket, might be larger
 68 | 		return leafs[bucketSize*namespaceBucket:], nil
 69 | 	}
 70 | 	return leafs[bucketSize*namespaceBucket : bucketSize*(namespaceBucket+1)], nil
 71 | }
 72 | 
 73 | func (d *NumaPerNamespaceAllocator) takeCpus(c Container, s *DaemonState) error {
 74 | 	if c.QS == Guaranteed && c.Cpus == 0 {
 75 | 		return DaemonError{
 76 | 			ErrorType:    NotImplemented,
 77 | 			ErrorMessage: "number of guaranteed container cpus shall be greater than 0",
 78 | 		}
 79 | 	}
 80 | 
 81 | 	podMetadata, ok := s.Pods[c.PID]
 82 | 	if !ok {
 83 | 		return DaemonError{
 84 | 			ErrorType:    PodNotFound,
 85 | 			ErrorMessage: fmt.Sprintf("cannot retrieve pod %s metadata", c.PID),
 86 | 		}
 87 | 	}
 88 | 
 89 | 	if _, ok := d.NamespaceToBucket[podMetadata.Namespace]; !ok {
 90 | 		if err := d.newNamespace(podMetadata.Namespace); err != nil {
 91 | 			return DaemonError{
 92 | 				ErrorType:    CpusNotAvailable,
 93 | 				ErrorMessage: err.Error(),
 94 | 			}
 95 | 		}
 96 | 	}
 97 | 
 98 | 	bucket, err := d.getBucket(s, podMetadata.Namespace)
 99 | 	if err != nil {
100 | 		return DaemonError{
101 | 			ErrorType:    CpusNotAvailable,
102 | 			ErrorMessage: err.Error(),
103 | 		}
104 | 	}
105 | 
106 | 	namespaceBucket := d.NamespaceToBucket[podMetadata.Namespace]
107 | 	d.BucketToNumContainers[namespaceBucket]++
108 | 
109 | 	var cpuIds []int
110 | 	if c.QS == Guaranteed {
111 | 		cpuIds, err = d.takeGuaranteedCpusFromBucket(bucket, c)
112 | 	} else {
113 | 		cpuIds, err = d.takeAllCpusFromBucket(bucket, c)
114 | 	}
115 | 	if err != nil {
116 | 		return DaemonError{
117 | 			ErrorType:    CpusNotAvailable,
118 | 			ErrorMessage: err.Error(),
119 | 		}
120 | 	}
121 | 	allocatedList := make([]ctlplaneapi.CPUBucket, 0, len(cpuIds))
122 | 	cpuSetList := make([]string, 0, len(cpuIds))
123 | 	for _, cpuID := range cpuIds {
124 | 		allocatedList = append(allocatedList, ctlplaneapi.CPUBucket{
125 | 			StartCPU: cpuID,
126 | 			EndCPU:   cpuID,
127 | 		})
128 | 		cpuSetList = append(cpuSetList, strconv.Itoa(cpuID))
129 | 	}
130 | 
131 | 	s.Allocated[c.CID] = allocatedList
132 | 	if err = d.ctrl.UpdateCPUSet(s.CGroupPath, c, strings.Join(cpuSetList, ","), getMemoryPinningIfEnabled(d.memoryPinning, &s.Topology, cpuIds)); err != nil {
133 | 		return err
134 | 	}
135 | 
136 | 	if d.exclusive && c.QS == Guaranteed {
137 | 		return d.removeCpusFromCommonPool(s, podMetadata.Namespace, CPUSetFromBucketList(allocatedList))
138 | 	}
139 | 	return nil
140 | }
141 | 
142 | func (d *NumaPerNamespaceAllocator) takeGuaranteedCpusFromBucket(
143 | 	bucket []*numautils.TopologyNode,
144 | 	c Container,
145 | ) ([]int, error) {
146 | 	// we firstly check if we are able to allocate daemon
147 | 	numAvailable := 0
148 | 	for _, cpu := range bucket {
149 | 		if cpu.Available() {
150 | 			numAvailable++
151 | 			if numAvailable == c.Cpus {
152 | 				break // no need to count all
153 | 			}
154 | 		}
155 | 	}
156 | 
157 | 	if numAvailable < c.Cpus {
158 | 		return []int{},
159 | 			fmt.Errorf(
160 | 				"%w: cannot allocate %d cpus, only %d processors available in bucket",
161 | 				ErrNotEnoughSpaceInBucket,
162 | 				c.Cpus,
163 | 				numAvailable,
164 | 			)
165 | 	}
166 | 
167 | 	// now we can take cpus without having to return them in case if we are unable to allocate them
168 | 	var cpuIds = make([]int, 0, c.Cpus)
169 | 	for _, cpu := range bucket {
170 | 		if cpu.Available() {
171 | 			cpuIds = append(cpuIds, cpu.Value)
172 | 			if err := cpu.Take(); err != nil {
173 | 				return cpuIds, err
174 | 			}
175 | 			if len(cpuIds) == c.Cpus {
176 | 				break
177 | 			}
178 | 		}
179 | 	}
180 | 	return cpuIds, nil
181 | }
182 | 
183 | func (d *NumaPerNamespaceAllocator) takeAllCpusFromBucket(
184 | 	bucket []*numautils.TopologyNode,
185 | 	c Container,
186 | ) ([]int, error) {
187 | 	var cpuIds = make([]int, 0, c.Cpus)
188 | 	for _, cpu := range bucket {
189 | 		if !d.exclusive || cpu.Available() { // for exlusive assignment take only cpus not taken exclusively
190 | 			cpuIds = append(cpuIds, cpu.Value)
191 | 		}
192 | 	}
193 | 	return cpuIds, nil
194 | }
195 | 
196 | func (d *NumaPerNamespaceAllocator) freeCpus(c Container, s *DaemonState) error {
197 | 	v, ok := s.Allocated[c.CID]
198 | 	if !ok {
199 | 		return DaemonError{
200 | 			ErrorType:    ContainerNotFound,
201 | 			ErrorMessage: "Container " + c.CID + " not available for deletion",
202 | 		}
203 | 	}
204 | 	delete(s.Allocated, c.CID)
205 | 
206 | 	podMetadata, ok := s.Pods[c.PID]
207 | 	if !ok {
208 | 		return DaemonError{
209 | 			ErrorType:    PodNotFound,
210 | 			ErrorMessage: fmt.Sprintf("cannot retrieve pod %s metadata", c.PID),
211 | 		}
212 | 	}
213 | 
214 | 	namespaceBucket := d.NamespaceToBucket[podMetadata.Namespace]
215 | 	d.BucketToNumContainers[namespaceBucket]--
216 | 	if d.BucketToNumContainers[namespaceBucket] == 0 {
217 | 		if err := d.freeNamespace(podMetadata.Namespace); err != nil {
218 | 			return DaemonError{RuntimeError, err.Error()}
219 | 		}
220 | 	}
221 | 
222 | 	for _, cpuBucket := range v {
223 | 		for cpu := cpuBucket.StartCPU; cpu <= cpuBucket.EndCPU; cpu++ {
224 | 			err := s.Topology.Return(cpu)
225 | 			if err != nil {
226 | 				return DaemonError{
227 | 					ErrorType:    CpusNotAvailable,
228 | 					ErrorMessage: err.Error(),
229 | 				}
230 | 			}
231 | 		}
232 | 	}
233 | 	if d.exclusive && c.QS == Guaranteed {
234 | 		return d.addCpusToCommonPool(s, podMetadata.Namespace, CPUSetFromBucketList(v))
235 | 	}
236 | 	return nil
237 | }
238 | 
239 | func (d *NumaPerNamespaceAllocator) clearCpus(c Container, s *DaemonState) error {
240 | 	allCpus := s.Topology.Topology.GetLeafs()
241 | 	cpuSet := CPUSet{}
242 | 	for _, leaf := range allCpus {
243 | 		cpuSet.Add(leaf.Value)
244 | 	}
245 | 	return d.ctrl.UpdateCPUSet(
246 | 		s.CGroupPath,
247 | 		c,
248 | 		cpuSet.ToCpuString(),
249 | 		getMemoryPinningIfEnabledFromCpuSet(d.memoryPinning, &s.Topology, cpuSet),
250 | 	)
251 | }
252 | 
253 | func (d *NumaPerNamespaceAllocator) newNamespace(namespace string) error {
254 | 	d.NamespaceToBucket[namespace] = d.globalBucket % d.NumBuckets
255 | 	d.globalBucket++
256 | 	d.logger.Info("created namespace bucket", "name", namespace)
257 | 	return nil
258 | }
259 | 
260 | func (d *NumaPerNamespaceAllocator) freeNamespace(namespace string) error {
261 | 	namespaceBucket := d.NamespaceToBucket[namespace]
262 | 	if d.BucketToNumContainers[namespaceBucket] > 0 {
263 | 		return ErrNamespaceNotEmpty
264 | 	}
265 | 
266 | 	delete(d.BucketToNumContainers, namespaceBucket)
267 | 	delete(d.NamespaceToBucket, namespace)
268 | 	d.logger.Info("deleted namespace bucket", "name", namespace)
269 | 	return nil
270 | }
271 | 
272 | func (d *NumaPerNamespaceAllocator) removeCpusFromCommonPool(s *DaemonState, namespace string, cpus CPUSet) error {
273 | 	for cid, allocatedList := range s.Allocated {
274 | 		c, err := findContainer(s, cid)
275 | 		if err != nil {
276 | 			d.logger.Error(err, "cannot find container")
277 | 			continue
278 | 		}
279 | 		if s.Pods[c.PID].Namespace != namespace || c.QS == Guaranteed {
280 | 			continue
281 | 		}
282 | 
283 | 		originalCPUs := CPUSetFromBucketList(allocatedList)
284 | 		newCPUs := originalCPUs.Clone().RemoveAll(cpus)
285 | 		d.logger.Info(
286 | 			"reallocating container",
287 | 			"reason",
288 | 			"remove",
289 | 			"cid",
290 | 			cid,
291 | 			"originalBuckets",
292 | 			originalCPUs,
293 | 			"newBucket",
294 | 			newCPUs,
295 | 		)
296 | 		err = d.ctrl.UpdateCPUSet(
297 | 			s.CGroupPath,
298 | 			c,
299 | 			newCPUs.ToCpuString(),
300 | 			getMemoryPinningIfEnabledFromCpuSet(d.memoryPinning, &s.Topology, newCPUs),
301 | 		)
302 | 		if err != nil {
303 | 			d.logger.Error(err, "could not remove cpus from common pool", "cid", cid)
304 | 			return err
305 | 		}
306 | 		s.Allocated[cid] = newCPUs.ToBucketList()
307 | 	}
308 | 	return nil
309 | }
310 | 
311 | func (d *NumaPerNamespaceAllocator) addCpusToCommonPool(s *DaemonState, namespace string, cpus CPUSet) error {
312 | 	for cid, allocatedList := range s.Allocated {
313 | 		c, err := findContainer(s, cid)
314 | 		if err != nil {
315 | 			d.logger.Error(err, "cannot find container")
316 | 			continue
317 | 		}
318 | 		if s.Pods[c.PID].Namespace != namespace || c.QS == Guaranteed {
319 | 			continue
320 | 		}
321 | 
322 | 		originalCPUs := CPUSetFromBucketList(allocatedList)
323 | 		newCPUs := originalCPUs.Clone().Merge(cpus)
324 | 		d.logger.Info(
325 | 			"reallocating container",
326 | 			"reason",
327 | 			"add",
328 | 			"cid",
329 | 			cid,
330 | 			"originalBuckets",
331 | 			originalCPUs,
332 | 			"newBucket",
333 | 			newCPUs,
334 | 		)
335 | 		err = d.ctrl.UpdateCPUSet(
336 | 			s.CGroupPath,
337 | 			c,
338 | 			newCPUs.ToCpuString(),
339 | 			getMemoryPinningIfEnabledFromCpuSet(d.memoryPinning, &s.Topology, newCPUs),
340 | 		)
341 | 		if err != nil {
342 | 			return err
343 | 		}
344 | 		s.Allocated[cid] = newCPUs.ToBucketList()
345 | 	}
346 | 	return nil
347 | }
348 | 
349 | func findContainer(s *DaemonState, cid string) (Container, error) {
350 | 	for _, podMeta := range s.Pods {
351 | 		for _, container := range podMeta.Containers {
352 | 			if container.CID == cid {
353 | 				return container, nil
354 | 			}
355 | 		}
356 | 	}
357 | 	return Container{}, fmt.Errorf("%w %s", ErrContainerNotFound, cid)
358 | }
359 | 


--------------------------------------------------------------------------------
/pkg/cpudaemon/daemon_numa_namespace_allocator_test.go:
--------------------------------------------------------------------------------
  1 | package cpudaemon
  2 | 
  3 | import (
  4 | 	"os"
  5 | 	"strconv"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/go-logr/logr"
  9 | 	"github.com/stretchr/testify/assert"
 10 | 	"github.com/stretchr/testify/mock"
 11 | 	"github.com/stretchr/testify/require"
 12 | 	"resourcemanagement.controlplane/pkg/ctlplaneapi"
 13 | 	"resourcemanagement.controlplane/pkg/numautils"
 14 | )
 15 | 
 16 | func oneLevelTopology(numCpus int) numautils.NumaTopology {
 17 | 	topology := numautils.NumaTopology{
 18 | 		CpuInformation: make(map[int]numautils.CpuInfo),
 19 | 	}
 20 | 
 21 | 	cpus := []numautils.CpuInfo{}
 22 | 	for i := 0; i < numCpus; i++ {
 23 | 		cpus = append(cpus, numautils.CpuInfo{
 24 | 			Cpu: i,
 25 | 		})
 26 | 	}
 27 | 
 28 | 	if err := topology.LoadFromCpuInfo(cpus); err != nil {
 29 | 		panic(err)
 30 | 	}
 31 | 	return topology
 32 | }
 33 | 
 34 | func getTestDaemonState(tempDir string, numCpus int) *DaemonState {
 35 | 	s := DaemonState{
 36 | 		Allocated: map[string][]ctlplaneapi.CPUBucket{},
 37 | 		Pods: map[string]PodMetadata{
 38 | 			"pod1": {
 39 | 				PID:       "pod1",
 40 | 				Name:      "pod1_name",
 41 | 				Namespace: "pod1_namespace",
 42 | 			},
 43 | 			"pod2": {
 44 | 				PID:       "pod2",
 45 | 				Name:      "pod2_name",
 46 | 				Namespace: "pod2_namespace",
 47 | 			},
 48 | 			"pod3": {
 49 | 				PID:       "pod3",
 50 | 				Name:      "pod3_name",
 51 | 				Namespace: "pod3_namespace",
 52 | 			},
 53 | 		},
 54 | 		Topology:   numautils.NumaTopology{},
 55 | 		CGroupPath: tempDir,
 56 | 	}
 57 | 	s.Topology = oneLevelTopology(numCpus)
 58 | 
 59 | 	return &s
 60 | }
 61 | 
 62 | func newMockedNumaPerNamespaceAllocator(numBuckets int, exclusive bool) *NumaPerNamespaceAllocator {
 63 | 	cgroupMock := CgroupsMock{}
 64 | 	allocator := &NumaPerNamespaceAllocator{
 65 | 		ctrl:                  &cgroupMock,
 66 | 		logger:                logr.Discard(),
 67 | 		exclusive:             exclusive,
 68 | 		NumBuckets:            numBuckets,
 69 | 		NamespaceToBucket:     map[string]int{},
 70 | 		BucketToNumContainers: map[int]int{},
 71 | 		memoryPinning:         true,
 72 | 	}
 73 | 	return allocator
 74 | }
 75 | 
 76 | func baseContainer(num int) Container {
 77 | 	numStr := strconv.Itoa(num)
 78 | 	return Container{
 79 | 		CID:  "cid" + numStr,
 80 | 		PID:  "pod" + numStr,
 81 | 		Name: "cid" + numStr + "_name",
 82 | 		Cpus: 1,
 83 | 		QS:   Guaranteed,
 84 | 	}
 85 | }
 86 | 
 87 | func getGuaranteedAndBurstableContainers() (Container, Container) {
 88 | 	guaranteed := baseContainer(1)
 89 | 	burstable := baseContainer(2)
 90 | 	burstable.PID = "pod1"
 91 | 	burstable.QS = Burstable
 92 | 	return guaranteed, burstable
 93 | }
 94 | 
 95 | func addContainerToState(s *DaemonState, c Container) {
 96 | 	podMeta := s.Pods[c.PID]
 97 | 	podMeta.Containers = append(podMeta.Containers, c)
 98 | 	s.Pods[c.PID] = podMeta
 99 | }
100 | 
101 | func assertCpuState(t *testing.T, s *DaemonState, container *Container, cpuString string) {
102 | 	expectedCpus, err := CPUSetFromString(cpuString)
103 | 	require.Nil(t, err)
104 | 	assert.Equal(t, expectedCpus, CPUSetFromBucketList(s.Allocated[container.CID]))
105 | }
106 | 
107 | func TestNumaNamespaceTakeCpuWithoutMemoryPinning(t *testing.T) {
108 | 	dir, err := os.MkdirTemp("", "test_cpu")
109 | 	require.Nil(t, err)
110 | 	defer os.RemoveAll(dir)
111 | 
112 | 	s := getTestDaemonState(dir, 2)
113 | 
114 | 	allocator := newMockedNumaPerNamespaceAllocator(2, false)
115 | 	allocator.memoryPinning = false
116 | 	containerNs1 := baseContainer(1)
117 | 	containerNs2 := baseContainer(2)
118 | 
119 | 	mock := allocator.ctrl.(*CgroupsMock)
120 | 	mock.On("UpdateCPUSet", s.CGroupPath, containerNs1, "0", "").Return(nil)
121 | 	mock.On("UpdateCPUSet", s.CGroupPath, containerNs2, "1", "").Return(nil)
122 | 
123 | 	assert.Nil(t, allocator.takeCpus(containerNs1, s))
124 | 	assert.Nil(t, allocator.takeCpus(containerNs2, s))
125 | 
126 | 	assertCpuState(t, s, &containerNs1, "0")
127 | 	assertCpuState(t, s, &containerNs2, "1")
128 | }
129 | 
130 | func TestNumaNamespaceTakeCpu(t *testing.T) {
131 | 	dir, err := os.MkdirTemp("", "test_cpu")
132 | 	require.Nil(t, err)
133 | 	defer os.RemoveAll(dir)
134 | 
135 | 	s := getTestDaemonState(dir, 2)
136 | 
137 | 	allocator := newMockedNumaPerNamespaceAllocator(2, false)
138 | 	containerNs1 := baseContainer(1)
139 | 	containerNs2 := baseContainer(2)
140 | 
141 | 	mock := allocator.ctrl.(*CgroupsMock)
142 | 	mock.On("UpdateCPUSet", s.CGroupPath, containerNs1, "0", "0").Return(nil)
143 | 	mock.On("UpdateCPUSet", s.CGroupPath, containerNs2, "1", "0").Return(nil)
144 | 
145 | 	assert.Nil(t, allocator.takeCpus(containerNs1, s))
146 | 	assert.Nil(t, allocator.takeCpus(containerNs2, s))
147 | 
148 | 	assertCpuState(t, s, &containerNs1, "0")
149 | 	assertCpuState(t, s, &containerNs2, "1")
150 | }
151 | 
152 | func TestNumaNamespaceOversubscribedTakeCpu(t *testing.T) {
153 | 	dir, err := os.MkdirTemp("", "test_cpu")
154 | 	require.Nil(t, err)
155 | 	defer os.RemoveAll(dir)
156 | 
157 | 	s := getTestDaemonState(dir, 4)
158 | 
159 | 	allocator := newMockedNumaPerNamespaceAllocator(2, false)
160 | 	containerNs1 := baseContainer(1)
161 | 	containerNs2 := baseContainer(2)
162 | 	containerNs3 := baseContainer(3)
163 | 
164 | 	mock := allocator.ctrl.(*CgroupsMock)
165 | 	mock.On("UpdateCPUSet", s.CGroupPath, containerNs1, "0", "0").Return(nil)
166 | 	mock.On("UpdateCPUSet", s.CGroupPath, containerNs2, "2", "0").Return(nil)
167 | 	mock.On("UpdateCPUSet", s.CGroupPath, containerNs3, "1", "0").Return(nil)
168 | 
169 | 	assert.Nil(t, allocator.takeCpus(containerNs1, s))
170 | 	assert.Nil(t, allocator.takeCpus(containerNs2, s))
171 | 	assert.Nil(t, allocator.takeCpus(containerNs3, s))
172 | 
173 | 	assertCpuState(t, s, &containerNs1, "0")
174 | 	assertCpuState(t, s, &containerNs2, "2")
175 | 	assertCpuState(t, s, &containerNs3, "1")
176 | }
177 | 
178 | func TestNumaNamespaceExclusiveTakeCpu(t *testing.T) {
179 | 	dir, err := os.MkdirTemp("", "test_cpu")
180 | 	require.Nil(t, err)
181 | 	defer os.RemoveAll(dir)
182 | 
183 | 	s := getTestDaemonState(dir, 8)
184 | 
185 | 	allocator := newMockedNumaPerNamespaceAllocator(2, true)
186 | 	containerGuaranteed, containerBurstable := getGuaranteedAndBurstableContainers()
187 | 	containerBurstable2 := containerBurstable
188 | 	containerBurstable2.CID = "pod3"
189 | 
190 | 	mock := allocator.ctrl.(*CgroupsMock)
191 | 	mock.On("UpdateCPUSet", s.CGroupPath, containerGuaranteed, "0", "0").Return(nil)
192 | 	mock.On("UpdateCPUSet", s.CGroupPath, containerBurstable, "1,2,3", "0").Return(nil)
193 | 	mock.On("UpdateCPUSet", s.CGroupPath, containerBurstable2, "1,2,3", "0").Return(nil)
194 | 
195 | 	assert.Nil(t, allocator.takeCpus(containerGuaranteed, s))
196 | 	assert.Nil(t, allocator.takeCpus(containerBurstable, s))
197 | 	assert.Nil(t, allocator.takeCpus(containerBurstable2, s))
198 | 	mock.AssertExpectations(t)
199 | 
200 | 	assertCpuState(t, s, &containerGuaranteed, "0")
201 | 	assertCpuState(t, s, &containerBurstable, "1,2,3")
202 | 	assertCpuState(t, s, &containerBurstable2, "1,2,3")
203 | }
204 | 
205 | func TestNumaNamespaceExclusiveTakeCpuWithReallocation(t *testing.T) {
206 | 	dir, err := os.MkdirTemp("", "test_cpu")
207 | 	require.Nil(t, err)
208 | 	defer os.RemoveAll(dir)
209 | 
210 | 	s := getTestDaemonState(dir, 4)
211 | 
212 | 	allocator := newMockedNumaPerNamespaceAllocator(2, true)
213 | 	containerGuaranteed, containerBurstable := getGuaranteedAndBurstableContainers()
214 | 
215 | 	mock := allocator.ctrl.(*CgroupsMock)
216 | 
217 | 	mock.On("UpdateCPUSet", s.CGroupPath, containerBurstable, "0,1", "0").Return(nil) // 1st allocation of burstable
218 | 	assert.Nil(t, allocator.takeCpus(containerBurstable, s))
219 | 	assertCpuState(t, s, &containerBurstable, "0,1")
220 | 	addContainerToState(s, containerBurstable)
221 | 
222 | 	mock.On("UpdateCPUSet", s.CGroupPath, containerGuaranteed, "0", "0").Return(nil) // allocation of guaranteed
223 | 	mock.On("UpdateCPUSet", s.CGroupPath, containerBurstable, "1", "0").Return(nil)  // reallocation of burstable
224 | 	assert.Nil(t, allocator.takeCpus(containerGuaranteed, s))
225 | 	mock.AssertExpectations(t)
226 | 
227 | 	assertCpuState(t, s, &containerBurstable, "1")
228 | 	assertCpuState(t, s, &containerGuaranteed, "0")
229 | }
230 | 
231 | func TestNumaNamespaceTakeCpuNonGuaranteed(t *testing.T) {
232 | 	dir, err := os.MkdirTemp("", "test_cpu")
233 | 	require.Nil(t, err)
234 | 	defer os.RemoveAll(dir)
235 | 
236 | 	s := getTestDaemonState(dir, 2)
237 | 	s.Topology = oneLevelTopology(4)
238 | 
239 | 	allocator := newMockedNumaPerNamespaceAllocator(2, false)
240 | 	container := baseContainer(1)
241 | 	container.QS = Burstable
242 | 
243 | 	mock := allocator.ctrl.(*CgroupsMock)
244 | 	mock.On("UpdateCPUSet", s.CGroupPath, container, "0,1", "0").Return(nil)
245 | 
246 | 	assert.Nil(t, allocator.takeCpus(container, s))
247 | 	mock.AssertExpectations(t)
248 | 
249 | 	assertCpuState(t, s, &container, "0,1")
250 | }
251 | 
252 | func TestNumaNamespaceFreeCpu(t *testing.T) {
253 | 	dir, err := os.MkdirTemp("", "test_cpu")
254 | 	require.Nil(t, err)
255 | 	defer os.RemoveAll(dir)
256 | 
257 | 	s := getTestDaemonState(dir, 2)
258 | 
259 | 	allocator := newMockedNumaPerNamespaceAllocator(2, false)
260 | 
261 | 	container := baseContainer(1)
262 | 
263 | 	mock := allocator.ctrl.(*CgroupsMock)
264 | 	mock.On("UpdateCPUSet", s.CGroupPath, container, "0", "0").Return(nil)
265 | 
266 | 	assert.Nil(t, allocator.takeCpus(container, s))
267 | 	assert.Contains(t, s.Allocated, container.CID)
268 | 
269 | 	assert.Nil(t, allocator.freeCpus(container, s))
270 | 	assert.NotContains(t, s.Allocated, container.CID)
271 | 	mock.AssertExpectations(t)
272 | }
273 | 
274 | func TestNumaNamespaceExclusiveFreeCpu(t *testing.T) {
275 | 	dir, err := os.MkdirTemp("", "test_cpu")
276 | 	require.Nil(t, err)
277 | 	defer os.RemoveAll(dir)
278 | 
279 | 	s := getTestDaemonState(dir, 4)
280 | 
281 | 	allocator := newMockedNumaPerNamespaceAllocator(1, true)
282 | 	containerGuaranteed, containerBurstable := getGuaranteedAndBurstableContainers()
283 | 
284 | 	mock := allocator.ctrl.(*CgroupsMock)
285 | 
286 | 	// add guaranteed container for cpu 0
287 | 	mock.On("UpdateCPUSet", s.CGroupPath, containerGuaranteed, "0", "0").Return(nil)
288 | 	assert.Nil(t, allocator.takeCpus(containerGuaranteed, s))
289 | 	addContainerToState(s, containerGuaranteed)
290 | 
291 | 	// add burstable container for cpu 1,2,3
292 | 	mock.On("UpdateCPUSet", s.CGroupPath, containerBurstable, "1,2,3", "0").Return(nil)
293 | 	assert.Nil(t, allocator.takeCpus(containerBurstable, s))
294 | 	addContainerToState(s, containerBurstable)
295 | 
296 | 	assert.Contains(t, s.Allocated, containerGuaranteed.CID)
297 | 
298 | 	// remove guaranteed container, the burstable container shall now be reassigned to cpus 0,1,2,3
299 | 	mock.On("UpdateCPUSet", s.CGroupPath, containerBurstable, "0,1,2,3", "0").Return(nil)
300 | 	assert.Nil(t, allocator.freeCpus(containerGuaranteed, s))
301 | 
302 | 	assert.NotContains(t, s.Allocated, containerGuaranteed.CID)
303 | 
304 | 	mock.AssertExpectations(t)
305 | }
306 | 
307 | func TestNumaNamespaceTakeCpuFailsIfNotEnoughSpace(t *testing.T) {
308 | 	dir, err := os.MkdirTemp("", "test_cpu")
309 | 	require.Nil(t, err)
310 | 	defer os.RemoveAll(dir)
311 | 
312 | 	s := getTestDaemonState(dir, 2)
313 | 
314 | 	allocator := newMockedNumaPerNamespaceAllocator(2, false)
315 | 
316 | 	assert.Error(t, allocator.takeCpus(Container{
317 | 		CID:  "cid1",
318 | 		PID:  "pod1",
319 | 		Name: "cid1_name",
320 | 		Cpus: 2,
321 | 		QS:   Guaranteed,
322 | 	}, s))
323 | }
324 | 
325 | func TestNumaNamespaceTakeCpuFailsIfAllBucketsTaken(t *testing.T) {
326 | 	dir, err := os.MkdirTemp("", "test_cpu")
327 | 	require.Nil(t, err)
328 | 	defer os.RemoveAll(dir)
329 | 
330 | 	s := getTestDaemonState(dir, 2)
331 | 
332 | 	allocator := newMockedNumaPerNamespaceAllocator(2, false)
333 | 	cmock := allocator.ctrl.(*CgroupsMock)
334 | 	cmock.On("UpdateCPUSet", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil)
335 | 
336 | 	assert.Nil(t, allocator.takeCpus(baseContainer(1), s))
337 | 	assert.Nil(t, allocator.takeCpus(baseContainer(2), s))
338 | 	assert.Error(t, allocator.takeCpus(baseContainer(3), s))
339 | 	cmock.AssertExpectations(t)
340 | }
341 | 
342 | func TestNumaNamespaceClearCpu(t *testing.T) {
343 | 	dir, err := os.MkdirTemp("", "test_cpu")
344 | 	require.Nil(t, err)
345 | 	defer os.RemoveAll(dir)
346 | 
347 | 	s := getTestDaemonState(dir, 2)
348 | 	s.Topology = oneLevelTopology(4)
349 | 
350 | 	allocator := newMockedNumaPerNamespaceAllocator(2, false)
351 | 	container := baseContainer(1)
352 | 	container.QS = Burstable
353 | 
354 | 	mock := allocator.ctrl.(*CgroupsMock)
355 | 	mock.On("UpdateCPUSet", s.CGroupPath, container, "0,1,2,3", "0").Return(nil)
356 | 
357 | 	assert.Nil(t, allocator.clearCpus(container, s))
358 | 	mock.AssertExpectations(t)
359 | }
360 | 


--------------------------------------------------------------------------------
/pkg/cpudaemon/daemon_state.go:
--------------------------------------------------------------------------------
  1 | package cpudaemon
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"errors"
  6 | 	"io"
  7 | 	"os"
  8 | 
  9 | 	"github.com/containerd/cgroups"
 10 | 	"resourcemanagement.controlplane/pkg/ctlplaneapi"
 11 | 	"resourcemanagement.controlplane/pkg/numautils"
 12 | 	"resourcemanagement.controlplane/pkg/utils"
 13 | )
 14 | 
 15 | const daemonFilePermission = 0600
 16 | 
 17 | // DaemonState struct holding the current daemon state.
 18 | type DaemonState struct {
 19 | 	AvailableCPUs []ctlplaneapi.CPUBucket            // Used ony with default allocator
 20 | 	Allocated     map[string][]ctlplaneapi.CPUBucket // Maps container id to allocated cpus
 21 | 	Pods          map[string]PodMetadata             // Maps pod id to its metadata
 22 | 	Topology      numautils.NumaTopology             // Used with numa and numa-namespace allocators
 23 | 	CGroupPath    string                             // Path to cgroup main folder (usually /sys/fs/cgroup)
 24 | 	StatePath     string                             // Path to state file where DaemonState is marshalled/unmarshalled
 25 | }
 26 | 
 27 | func newState(cgroupPath string, numaPath string, statePath string) (*DaemonState, error) {
 28 | 	s := DaemonState{
 29 | 		CGroupPath: cgroupPath,
 30 | 		Allocated:  make(map[string][]ctlplaneapi.CPUBucket),
 31 | 		Pods:       make(map[string]PodMetadata),
 32 | 		StatePath:  statePath,
 33 | 	}
 34 | 
 35 | 	var (
 36 | 		gCgroupPath     string
 37 | 		gCpusetFilePath string
 38 | 	)
 39 | 	if cgroups.Mode() != cgroups.Unified {
 40 | 		gCgroupPath = cgroupPath + "/cpuset"
 41 | 		gCpusetFilePath = "cpuset.cpus"
 42 | 	} else {
 43 | 		gCgroupPath = cgroupPath
 44 | 		gCpusetFilePath = "cpuset.cpus.effective"
 45 | 	}
 46 | 	c, err := getValues(gCgroupPath, gCpusetFilePath)
 47 | 
 48 | 	if err == nil {
 49 | 		s.AvailableCPUs = c
 50 | 	} else {
 51 | 		return nil, DaemonError{
 52 | 			ErrorType:    MissingCgroup,
 53 | 			ErrorMessage: err.Error(),
 54 | 		}
 55 | 	}
 56 | 
 57 | 	err = s.Topology.Load(numaPath)
 58 | 
 59 | 	if err != nil {
 60 | 		return nil, DaemonError{
 61 | 			ErrorType:    NotImplemented,
 62 | 			ErrorMessage: err.Error(),
 63 | 		}
 64 | 	}
 65 | 	_, errSt := os.Stat(statePath)
 66 | 	if errSt != nil && errors.Is(errSt, os.ErrNotExist) {
 67 | 		err = s.SaveState()
 68 | 	} else {
 69 | 		err = s.LoadState()
 70 | 	}
 71 | 	_ = errSt
 72 | 	if err != nil {
 73 | 		return nil, err
 74 | 	}
 75 | 	return &s, err
 76 | }
 77 | 
 78 | // SaveState saves state to file given in StatePath.
 79 | func (d *DaemonState) SaveState() error {
 80 | 	b, err := json.Marshal(d)
 81 | 	if err != nil {
 82 | 		return err
 83 | 	}
 84 | 	err = os.WriteFile(d.StatePath, b, daemonFilePermission)
 85 | 	return err
 86 | }
 87 | 
 88 | // LoadState loads state from StatePath. StatePath value is always preserved.
 89 | func (d *DaemonState) LoadState() error {
 90 | 	statePath := d.StatePath
 91 | 	if err := utils.ErrorIfSymlink(statePath); err != nil {
 92 | 		return err
 93 | 	}
 94 | 	b, err := os.ReadFile(statePath)
 95 | 	if err != nil {
 96 | 		return err
 97 | 	}
 98 | 	err = json.Unmarshal(b, d)
 99 | 	d.StatePath = statePath // do not modify statePath, even if different (eg. state file was copied)
100 | 	return err
101 | }
102 | 
103 | // DaemonStateFromReader loads the state of the daemon from a stream.
104 | func DaemonStateFromReader(reader io.Reader) (DaemonState, error) {
105 | 	d := DaemonState{}
106 | 	b, err := io.ReadAll(reader)
107 | 	if err != nil {
108 | 		return DaemonState{}, err
109 | 	}
110 | 	err = json.Unmarshal(b, &d)
111 | 	return d, err
112 | }
113 | 


--------------------------------------------------------------------------------
/pkg/cpudaemon/daemon_state_test.go:
--------------------------------------------------------------------------------
 1 | package cpudaemon
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"path"
 6 | 	"testing"
 7 | 
 8 | 	"resourcemanagement.controlplane/pkg/ctlplaneapi"
 9 | 	"resourcemanagement.controlplane/pkg/utils"
10 | 
11 | 	"github.com/stretchr/testify/assert"
12 | 	"github.com/stretchr/testify/require"
13 | )
14 | 
15 | func TestNewState(t *testing.T) {
16 | 	daemonStateFile, tearDown := setupTest()
17 | 	defer tearDown(t)
18 | 	s, err := newState("testdata/no_state", "testdata/node_info", daemonStateFile)
19 | 	assert.Nil(t, err)
20 | 	assert.NotNil(t, s)
21 | 	assert.NotNil(t, s.Allocated)
22 | 	assert.Equal(t, len(s.Allocated), 0)
23 | 	assert.Equal(t, len(s.AvailableCPUs), 1)
24 | }
25 | 
26 | func TestThrowLoadState(t *testing.T) {
27 | 	d := DaemonState{
28 | 		StatePath: "daemon_cpuset_not_exist.state",
29 | 	}
30 | 	err := d.LoadState()
31 | 	assert.Equal(t, err != nil, true)
32 | }
33 | 
34 | func TestMissingCGroup(t *testing.T) {
35 | 	daemonStateFile, tearDown := setupTest()
36 | 	defer tearDown(t)
37 | 	s, err := newState("testdata/no_cgroup", "testdata/node_info", daemonStateFile)
38 | 	assert.NotNil(t, err)
39 | 	assert.Nil(t, s)
40 | 	assert.IsType(t, DaemonError{}, err)
41 | 	assert.Equal(t, MissingCgroup, err.(DaemonError).ErrorType) //nolint: errorlint
42 | }
43 | 
44 | func TestSaveAndLoadDaemonState(t *testing.T) {
45 | 	tempFile, err := os.CreateTemp("", "test")
46 | 	require.Nil(t, err)
47 | 	defer tempFile.Close()
48 | 	defer os.Remove(tempFile.Name())
49 | 
50 | 	expectedState := DaemonState{
51 | 		StatePath: tempFile.Name(),
52 | 	}
53 | 	expectedState.AvailableCPUs = []ctlplaneapi.CPUBucket{
54 | 		{
55 | 			StartCPU: 0,
56 | 			EndCPU:   127,
57 | 		},
58 | 	}
59 | 
60 | 	savedState := DaemonState{
61 | 		StatePath: tempFile.Name(),
62 | 	}
63 | 	savedState.AvailableCPUs = expectedState.AvailableCPUs
64 | 	require.Nil(t, savedState.SaveState())
65 | 
66 | 	loadedState := DaemonState{
67 | 		StatePath: tempFile.Name(),
68 | 	}
69 | 	require.Nil(t, loadedState.LoadState())
70 | 
71 | 	assert.Equal(t, expectedState, loadedState)
72 | }
73 | 
74 | func TestDoNotLoadDaemonStateIfSymlink(t *testing.T) {
75 | 	dir := t.TempDir()
76 | 
77 | 	sourcePath := path.Join(dir, "data.json")
78 | 	symlinkPath := path.Join(dir, "symlink.json")
79 | 
80 | 	require.Nil(t, os.Symlink(sourcePath, symlinkPath))
81 | 
82 | 	state := DaemonState{
83 | 		StatePath: symlinkPath,
84 | 	}
85 | 
86 | 	require.ErrorIs(t, state.LoadState(), utils.ErrFileIsSymlink)
87 | }
88 | 


--------------------------------------------------------------------------------
/pkg/cpudaemon/static_policy.go:
--------------------------------------------------------------------------------
 1 | package cpudaemon
 2 | 
 3 | // Policy interface of cpu management policies.
 4 | type Policy interface {
 5 | 	AssignContainer(c Container, s *DaemonState) error
 6 | 	DeleteContainer(c Container, s *DaemonState) error
 7 | 	ClearContainer(c Container, s *DaemonState) error
 8 | }
 9 | 
10 | // StaticPolicy Static Policy type holding assigned containers.
11 | type StaticPolicy struct {
12 | 	allocator Allocator
13 | }
14 | 
15 | var _ Policy = &StaticPolicy{}
16 | 
17 | // NewStaticPolocy Construct a new static policy.
18 | func NewStaticPolocy(a Allocator) *StaticPolicy {
19 | 	p := StaticPolicy{
20 | 		allocator: a,
21 | 	}
22 | 	return &p
23 | }
24 | 
25 | // AssignContainer tries to allocate a container.
26 | func (p *StaticPolicy) AssignContainer(c Container, s *DaemonState) error {
27 | 	return p.allocator.takeCpus(c, s)
28 | }
29 | 
30 | // DeleteContainer delete allocated containers (without deleting cgroup config - it will be clered by k8s GC).
31 | func (p *StaticPolicy) DeleteContainer(c Container, s *DaemonState) error {
32 | 	return p.allocator.freeCpus(c, s)
33 | }
34 | 
35 | // ClearContainer reverts cpuset configuration to default one (use all available cpus). It does not
36 | // remove container from the state - this should be done with DeleteContainer.
37 | func (p *StaticPolicy) ClearContainer(c Container, s *DaemonState) error {
38 | 	return p.allocator.clearCpus(c, s)
39 | }
40 | 


--------------------------------------------------------------------------------
/pkg/cpudaemon/static_policy_test.go:
--------------------------------------------------------------------------------
 1 | package cpudaemon
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/assert"
 7 | 	"github.com/stretchr/testify/mock"
 8 | )
 9 | 
10 | type AllocatorMock struct {
11 | 	mock.Mock
12 | }
13 | 
14 | var _ Allocator = &AllocatorMock{}
15 | 
16 | func (m *AllocatorMock) takeCpus(c Container, s *DaemonState) error {
17 | 	args := m.Called(c, s)
18 | 	return args.Error(0)
19 | }
20 | 
21 | func (m *AllocatorMock) freeCpus(c Container, s *DaemonState) error {
22 | 	args := m.Called(c, s)
23 | 	return args.Error(0)
24 | }
25 | 
26 | func (m *AllocatorMock) clearCpus(c Container, s *DaemonState) error {
27 | 	args := m.Called(c, s)
28 | 	return args.Error(0)
29 | }
30 | 
31 | func TestNewStaticPolicy(t *testing.T) {
32 | 	s := NewStaticPolocy(nil)
33 | 	assert.NotNil(t, s)
34 | }
35 | 
36 | func TestAssignContainerMocked(t *testing.T) {
37 | 	a := AllocatorMock{}
38 | 	s := NewStaticPolocy(&a)
39 | 
40 | 	// check a new container
41 | 	c := Container{
42 | 		CID:  "test-contaier",
43 | 		PID:  "test-pod",
44 | 		Cpus: 42,
45 | 		QS:   Guaranteed,
46 | 	}
47 | 	st := DaemonState{}
48 | 	a.On("takeCpus", c, &st).Return(nil)
49 | 	err := s.AssignContainer(c, &st)
50 | 	assert.Nil(t, err)
51 | 	c.QS = BestEffort
52 | 	a.On("takeCpus", c, &st).Return(nil)
53 | 	err = s.AssignContainer(c, &st)
54 | 	assert.Nil(t, err)
55 | 	a.AssertNumberOfCalls(t, "takeCpus", 2)
56 | }
57 | 
58 | func TestDeleteContainerMocked(t *testing.T) {
59 | 	a := AllocatorMock{}
60 | 	s := NewStaticPolocy(&a)
61 | 
62 | 	// check a new container
63 | 	c := Container{
64 | 		CID:  "test-contaier",
65 | 		PID:  "test-pod",
66 | 		Cpus: 42,
67 | 		QS:   Guaranteed,
68 | 	}
69 | 	st := DaemonState{}
70 | 	a.On("freeCpus", c, &st).Return(nil)
71 | 	assert.Nil(t, s.DeleteContainer(c, &st))
72 | 	c.QS = BestEffort
73 | 	a.On("freeCpus", c, &st).Return(nil)
74 | 	assert.Nil(t, s.DeleteContainer(c, &st))
75 | 	a.AssertNumberOfCalls(t, "freeCpus", 2)
76 | }
77 | 


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/daemon_cpuset.state:
--------------------------------------------------------------------------------
1 | {"AvailableCPUs":[{"StartCPU":0,"EndCPU":127}],"Allocated":null,"Pods":null,"Topology":{"Topology":null,"CpuInformation":null},"CGroupPath":"","StatePath":"testdata/daemon_cpuset.state"}


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/no_state/cpuset.cpus:
--------------------------------------------------------------------------------
1 | 0-127


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/no_state/cpuset.cpus.effective:
--------------------------------------------------------------------------------
1 | 0-127


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/no_state/cpuset/cpuset.cpus:
--------------------------------------------------------------------------------
1 | 0-127


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/no_state/cpuset/test/cpuset.cpus:
--------------------------------------------------------------------------------
1 | 0-9


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/no_state/cpuset/test/cpuset.mems:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/intel/cpu-control-plane-plugin-for-kubernetes/dcf4449796fa720e3d24996ec596da6483441f92/pkg/cpudaemon/testdata/no_state/cpuset/test/cpuset.mems


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node0/cpu1/topology/core_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node0/cpu1/topology/die_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node0/cpu1/topology/package_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node0/cpu3/topology/core_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node0/cpu3/topology/die_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node0/cpu3/topology/package_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node0/cpu5/topology/core_id:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node0/cpu5/topology/die_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node0/cpu5/topology/package_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node0/cpu7/topology/core_id:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node0/cpu7/topology/die_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node0/cpu7/topology/package_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node1/cpu2/topology/core_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node1/cpu2/topology/die_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node1/cpu2/topology/package_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node1/cpu4/topology/core_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node1/cpu4/topology/die_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node1/cpu4/topology/package_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node1/cpu6/topology/core_id:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node1/cpu6/topology/die_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node1/cpu6/topology/package_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node1/cpu8/topology/core_id:
--------------------------------------------------------------------------------
1 | 1


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node1/cpu8/topology/die_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/node_info/node1/cpu8/topology/package_id:
--------------------------------------------------------------------------------
1 | 0


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/with_state/cpuset.cpus.effective:
--------------------------------------------------------------------------------
1 | 0-55, 76-78, 99


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/with_state/cpuset/cpuset.cpus:
--------------------------------------------------------------------------------
1 | 0-55, 76-78, 99


--------------------------------------------------------------------------------
/pkg/cpudaemon/testdata/with_state/daemon.state:
--------------------------------------------------------------------------------
1 | {"AvailableCPUs":[{"StartCPU":0,"EndCPU":55},{"StartCPU":76,"EndCPU":78},{"StartCPU":99,"EndCPU":99}],"Allocated":{},"Pods":{},"CGroupPath":"testdata/with_state/","StatePath":"testdata/with_state/daemon.state"}


--------------------------------------------------------------------------------
/pkg/ctlplaneapi/controlplane.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | package ctlplaneapi;
 3 | option go_package = "./ctlplaneapi";
 4 | 
 5 | 
 6 | // Control Plane Interface to allocate pods and containers  
 7 | service ControlPlane {
 8 |     // Request allocation of a pod on creation event 
 9 |     rpc CreatePod(CreatePodRequest) returns (PodAllocationReply) {}
10 |     // Updates pod allocation; also used for container deletion
11 |     rpc UpdatePod(UpdatePodRequest) returns (PodAllocationReply) {}
12 |     // Deallocates a pod
13 |     rpc DeletePod(DeletePodRequest) returns (PodAllocationReply) {}
14 | }
15 | 
16 | message CreatePodRequest {
17 |     string podId = 1;
18 |     string podName = 2;
19 |     string podNamespace = 3;
20 |     ResourceInfo resources = 4;
21 |     repeated ContainerInfo containers = 5;
22 | }
23 | 
24 | message UpdatePodRequest {
25 |     string podId = 1;
26 |     ResourceInfo resources = 2;
27 |     repeated ContainerInfo containers = 3;
28 | }
29 | 
30 | message DeletePodRequest {
31 |     string podId = 1;
32 | }
33 | 
34 | enum AllocationState{
35 |     CREATED = 0;
36 |     UPDATED = 1;
37 |     DELETED = 2;
38 | }
39 | 
40 | enum Placement {
41 |     DEFAULT = 0;
42 |     COMPACT = 1;
43 |     SCATTER = 2;
44 |     POOL = 3;
45 | }
46 | 
47 | message ResourceInfo{
48 |     int32 requestedCpus = 1;
49 |     int32 limitCpus = 2;
50 |     bytes requestedMemory = 3;
51 |     bytes limitMemory = 4;
52 |     Placement cpuAffinity = 5;
53 | }
54 | 
55 | message ContainerInfo {
56 |     string containerId = 1;
57 |     string containerName = 2;
58 |     ResourceInfo resources = 3;
59 | }
60 | 
61 | message ContainerAllocationInfo{
62 |     string containerId = 1;
63 |     AllocationState allocState = 2;
64 |     repeated CPUSet cpuSet = 3;
65 | }
66 | 
67 | message CPUSet {
68 |     int32 startCPU = 1;
69 |     int32 endCPU = 2;
70 | }
71 | 
72 | message PodAllocationReply{
73 |     string podId = 1;
74 |     AllocationState allocState = 2;
75 |     repeated CPUSet cpuSet = 3;
76 |     repeated ContainerAllocationInfo containersAllocations = 4;
77 | }
78 | 


--------------------------------------------------------------------------------
/pkg/ctlplaneapi/controlplane_grpc.pb.go:
--------------------------------------------------------------------------------
  1 | // Code generated by protoc-gen-go-grpc. DO NOT EDIT.
  2 | // versions:
  3 | // - protoc-gen-go-grpc v1.2.0
  4 | // - protoc             v3.21.6
  5 | // source: pkg/ctlplaneapi/controlplane.proto
  6 | 
  7 | package ctlplaneapi
  8 | 
  9 | import (
 10 | 	context "context"
 11 | 	grpc "google.golang.org/grpc"
 12 | 	codes "google.golang.org/grpc/codes"
 13 | 	status "google.golang.org/grpc/status"
 14 | )
 15 | 
 16 | // This is a compile-time assertion to ensure that this generated file
 17 | // is compatible with the grpc package it is being compiled against.
 18 | // Requires gRPC-Go v1.32.0 or later.
 19 | const _ = grpc.SupportPackageIsVersion7
 20 | 
 21 | // ControlPlaneClient is the client API for ControlPlane service.
 22 | //
 23 | // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
 24 | type ControlPlaneClient interface {
 25 | 	// Request allocation of a pod on creation event
 26 | 	CreatePod(ctx context.Context, in *CreatePodRequest, opts ...grpc.CallOption) (*PodAllocationReply, error)
 27 | 	// Updates pod allocation; also used for container deletion
 28 | 	UpdatePod(ctx context.Context, in *UpdatePodRequest, opts ...grpc.CallOption) (*PodAllocationReply, error)
 29 | 	// Deallocates a pod
 30 | 	DeletePod(ctx context.Context, in *DeletePodRequest, opts ...grpc.CallOption) (*PodAllocationReply, error)
 31 | }
 32 | 
 33 | type controlPlaneClient struct {
 34 | 	cc grpc.ClientConnInterface
 35 | }
 36 | 
 37 | func NewControlPlaneClient(cc grpc.ClientConnInterface) ControlPlaneClient {
 38 | 	return &controlPlaneClient{cc}
 39 | }
 40 | 
 41 | func (c *controlPlaneClient) CreatePod(ctx context.Context, in *CreatePodRequest, opts ...grpc.CallOption) (*PodAllocationReply, error) {
 42 | 	out := new(PodAllocationReply)
 43 | 	err := c.cc.Invoke(ctx, "/ctlplaneapi.ControlPlane/CreatePod", in, out, opts...)
 44 | 	if err != nil {
 45 | 		return nil, err
 46 | 	}
 47 | 	return out, nil
 48 | }
 49 | 
 50 | func (c *controlPlaneClient) UpdatePod(ctx context.Context, in *UpdatePodRequest, opts ...grpc.CallOption) (*PodAllocationReply, error) {
 51 | 	out := new(PodAllocationReply)
 52 | 	err := c.cc.Invoke(ctx, "/ctlplaneapi.ControlPlane/UpdatePod", in, out, opts...)
 53 | 	if err != nil {
 54 | 		return nil, err
 55 | 	}
 56 | 	return out, nil
 57 | }
 58 | 
 59 | func (c *controlPlaneClient) DeletePod(ctx context.Context, in *DeletePodRequest, opts ...grpc.CallOption) (*PodAllocationReply, error) {
 60 | 	out := new(PodAllocationReply)
 61 | 	err := c.cc.Invoke(ctx, "/ctlplaneapi.ControlPlane/DeletePod", in, out, opts...)
 62 | 	if err != nil {
 63 | 		return nil, err
 64 | 	}
 65 | 	return out, nil
 66 | }
 67 | 
 68 | // ControlPlaneServer is the server API for ControlPlane service.
 69 | // All implementations must embed UnimplementedControlPlaneServer
 70 | // for forward compatibility
 71 | type ControlPlaneServer interface {
 72 | 	// Request allocation of a pod on creation event
 73 | 	CreatePod(context.Context, *CreatePodRequest) (*PodAllocationReply, error)
 74 | 	// Updates pod allocation; also used for container deletion
 75 | 	UpdatePod(context.Context, *UpdatePodRequest) (*PodAllocationReply, error)
 76 | 	// Deallocates a pod
 77 | 	DeletePod(context.Context, *DeletePodRequest) (*PodAllocationReply, error)
 78 | 	mustEmbedUnimplementedControlPlaneServer()
 79 | }
 80 | 
 81 | // UnimplementedControlPlaneServer must be embedded to have forward compatible implementations.
 82 | type UnimplementedControlPlaneServer struct {
 83 | }
 84 | 
 85 | func (UnimplementedControlPlaneServer) CreatePod(context.Context, *CreatePodRequest) (*PodAllocationReply, error) {
 86 | 	return nil, status.Errorf(codes.Unimplemented, "method CreatePod not implemented")
 87 | }
 88 | func (UnimplementedControlPlaneServer) UpdatePod(context.Context, *UpdatePodRequest) (*PodAllocationReply, error) {
 89 | 	return nil, status.Errorf(codes.Unimplemented, "method UpdatePod not implemented")
 90 | }
 91 | func (UnimplementedControlPlaneServer) DeletePod(context.Context, *DeletePodRequest) (*PodAllocationReply, error) {
 92 | 	return nil, status.Errorf(codes.Unimplemented, "method DeletePod not implemented")
 93 | }
 94 | func (UnimplementedControlPlaneServer) mustEmbedUnimplementedControlPlaneServer() {}
 95 | 
 96 | // UnsafeControlPlaneServer may be embedded to opt out of forward compatibility for this service.
 97 | // Use of this interface is not recommended, as added methods to ControlPlaneServer will
 98 | // result in compilation errors.
 99 | type UnsafeControlPlaneServer interface {
100 | 	mustEmbedUnimplementedControlPlaneServer()
101 | }
102 | 
103 | func RegisterControlPlaneServer(s grpc.ServiceRegistrar, srv ControlPlaneServer) {
104 | 	s.RegisterService(&ControlPlane_ServiceDesc, srv)
105 | }
106 | 
107 | func _ControlPlane_CreatePod_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
108 | 	in := new(CreatePodRequest)
109 | 	if err := dec(in); err != nil {
110 | 		return nil, err
111 | 	}
112 | 	if interceptor == nil {
113 | 		return srv.(ControlPlaneServer).CreatePod(ctx, in)
114 | 	}
115 | 	info := &grpc.UnaryServerInfo{
116 | 		Server:     srv,
117 | 		FullMethod: "/ctlplaneapi.ControlPlane/CreatePod",
118 | 	}
119 | 	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
120 | 		return srv.(ControlPlaneServer).CreatePod(ctx, req.(*CreatePodRequest))
121 | 	}
122 | 	return interceptor(ctx, in, info, handler)
123 | }
124 | 
125 | func _ControlPlane_UpdatePod_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
126 | 	in := new(UpdatePodRequest)
127 | 	if err := dec(in); err != nil {
128 | 		return nil, err
129 | 	}
130 | 	if interceptor == nil {
131 | 		return srv.(ControlPlaneServer).UpdatePod(ctx, in)
132 | 	}
133 | 	info := &grpc.UnaryServerInfo{
134 | 		Server:     srv,
135 | 		FullMethod: "/ctlplaneapi.ControlPlane/UpdatePod",
136 | 	}
137 | 	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
138 | 		return srv.(ControlPlaneServer).UpdatePod(ctx, req.(*UpdatePodRequest))
139 | 	}
140 | 	return interceptor(ctx, in, info, handler)
141 | }
142 | 
143 | func _ControlPlane_DeletePod_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
144 | 	in := new(DeletePodRequest)
145 | 	if err := dec(in); err != nil {
146 | 		return nil, err
147 | 	}
148 | 	if interceptor == nil {
149 | 		return srv.(ControlPlaneServer).DeletePod(ctx, in)
150 | 	}
151 | 	info := &grpc.UnaryServerInfo{
152 | 		Server:     srv,
153 | 		FullMethod: "/ctlplaneapi.ControlPlane/DeletePod",
154 | 	}
155 | 	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
156 | 		return srv.(ControlPlaneServer).DeletePod(ctx, req.(*DeletePodRequest))
157 | 	}
158 | 	return interceptor(ctx, in, info, handler)
159 | }
160 | 
161 | // ControlPlane_ServiceDesc is the grpc.ServiceDesc for ControlPlane service.
162 | // It's only intended for direct use with grpc.RegisterService,
163 | // and not to be introspected or modified (even as a copy)
164 | var ControlPlane_ServiceDesc = grpc.ServiceDesc{
165 | 	ServiceName: "ctlplaneapi.ControlPlane",
166 | 	HandlerType: (*ControlPlaneServer)(nil),
167 | 	Methods: []grpc.MethodDesc{
168 | 		{
169 | 			MethodName: "CreatePod",
170 | 			Handler:    _ControlPlane_CreatePod_Handler,
171 | 		},
172 | 		{
173 | 			MethodName: "UpdatePod",
174 | 			Handler:    _ControlPlane_UpdatePod_Handler,
175 | 		},
176 | 		{
177 | 			MethodName: "DeletePod",
178 | 			Handler:    _ControlPlane_DeletePod_Handler,
179 | 		},
180 | 	},
181 | 	Streams:  []grpc.StreamDesc{},
182 | 	Metadata: "pkg/ctlplaneapi/controlplane.proto",
183 | }
184 | 


--------------------------------------------------------------------------------
/pkg/ctlplaneapi/ctrlplaneapi_test.go:
--------------------------------------------------------------------------------
  1 | package ctlplaneapi
  2 | 
  3 | import (
  4 | 	context "context"
  5 | 	"fmt"
  6 | 	"net"
  7 | 	"testing"
  8 | 
  9 | 	"github.com/stretchr/testify/assert"
 10 | 	"github.com/stretchr/testify/mock"
 11 | 	"google.golang.org/grpc"
 12 | 	"google.golang.org/grpc/codes"
 13 | 	"google.golang.org/grpc/credentials/insecure"
 14 | 	"google.golang.org/grpc/status"
 15 | 	"google.golang.org/grpc/test/bufconn"
 16 | 	"google.golang.org/protobuf/proto"
 17 | 	"k8s.io/apimachinery/pkg/api/resource"
 18 | )
 19 | 
 20 | type DaemonMock struct {
 21 | 	mock.Mock
 22 | }
 23 | 
 24 | func (m *DaemonMock) CreatePod(req *CreatePodRequest) (*AllocatedPodResources, error) {
 25 | 	args := m.Called(req)
 26 | 	return createTestCPUAllocation(req.Containers), args.Error(0)
 27 | }
 28 | 
 29 | func (m *DaemonMock) DeletePod(req *DeletePodRequest) error {
 30 | 	args := m.Called(req)
 31 | 	return args.Error(0)
 32 | }
 33 | 
 34 | func (m *DaemonMock) UpdatePod(req *UpdatePodRequest) (*AllocatedPodResources, error) {
 35 | 	args := m.Called(req)
 36 | 	return modifyCPUAllocation(req.Containers), args.Error(0)
 37 | }
 38 | 
 39 | // Creates a bufconn grpc server for testing.
 40 | func NewMockedServer(ctx context.Context) (ControlPlaneClient, func(), *DaemonMock) {
 41 | 	buffer := 1024 * 1024
 42 | 	listener := bufconn.Listen(buffer)
 43 | 	s := grpc.NewServer()
 44 | 	m := DaemonMock{}
 45 | 	RegisterControlPlaneServer(s, NewServer(&m))
 46 | 	go func() {
 47 | 		if err := s.Serve(listener); err != nil {
 48 | 			panic(err)
 49 | 		}
 50 | 	}()
 51 | 
 52 | 	conn, _ := grpc.DialContext(ctx, "", grpc.WithContextDialer(func(context.Context, string) (net.Conn, error) {
 53 | 		return listener.Dial()
 54 | 	}), grpc.WithTransportCredentials(insecure.NewCredentials()))
 55 | 
 56 | 	closer := func() {
 57 | 		listener.Close()
 58 | 		s.Stop()
 59 | 	}
 60 | 
 61 | 	client := NewControlPlaneClient(conn)
 62 | 
 63 | 	return client, closer, &m
 64 | }
 65 | 
 66 | func createTestDeletion(m *DaemonMock, pid string, err error) (*DeletePodRequest, *PodAllocationReply) {
 67 | 	m.On("DeletePod", &DeletePodRequest{PodId: pid}).Return(err).Once()
 68 | 	return &DeletePodRequest{
 69 | 			PodId: pid,
 70 | 		}, &PodAllocationReply{
 71 | 			PodId:      pid,
 72 | 			AllocState: AllocationState_DELETED,
 73 | 		}
 74 | }
 75 | 
 76 | func modifyCPUAllocation(container []*ContainerInfo) *AllocatedPodResources {
 77 | 	a := createTestCPUAllocation(container)
 78 | 	a.CPUSet = []CPUBucket{
 79 | 		{
 80 | 			StartCPU: 0,
 81 | 			EndCPU:   128,
 82 | 		},
 83 | 	}
 84 | 	return a
 85 | }
 86 | 
 87 | func createTestCPUAllocation(container []*ContainerInfo) *AllocatedPodResources {
 88 | 	defaultBuckets := []CPUBucket{
 89 | 		{
 90 | 			StartCPU: 0,
 91 | 			EndCPU:   12,
 92 | 		},
 93 | 		{
 94 | 			StartCPU: 13,
 95 | 			EndCPU:   24,
 96 | 		},
 97 | 	}
 98 | 	cResources := []AllocatedContainerResource{}
 99 | 	for _, c := range container {
100 | 		cResources = append(cResources,
101 | 			AllocatedContainerResource{
102 | 				ContainerID: c.ContainerId,
103 | 				CPUSet:      defaultBuckets,
104 | 			},
105 | 		)
106 | 	}
107 | 	return &AllocatedPodResources{
108 | 		CPUSet:             defaultBuckets,
109 | 		ContainerResources: cResources,
110 | 	}
111 | }
112 | 
113 | func toGRPCHelper4Containers(c []AllocatedContainerResource) []*ContainerAllocationInfo {
114 | 	res := []*ContainerAllocationInfo{}
115 | 	for _, it := range c {
116 | 		res = append(res,
117 | 			&ContainerAllocationInfo{
118 | 				ContainerId: it.ContainerID,
119 | 				CpuSet:      toGRPCHelper4CPUSet(it.CPUSet),
120 | 			})
121 | 	}
122 | 	return res
123 | }
124 | 
125 | func validateAllocatedPodReply(t *testing.T, eReply *PodAllocationReply, reply *PodAllocationReply) {
126 | 	assert.Equal(t, eReply.PodId, reply.PodId)
127 | 	assert.Equal(t, len(eReply.CpuSet), len(reply.CpuSet))
128 | 	assert.Equal(t, eReply.AllocState, reply.AllocState)
129 | 	for i := 0; i < len(eReply.CpuSet); i++ {
130 | 		assert.Equal(t, eReply.CpuSet[i].StartCPU, reply.CpuSet[i].StartCPU)
131 | 		assert.Equal(t, eReply.CpuSet[i].EndCPU, reply.CpuSet[i].EndCPU)
132 | 	}
133 | }
134 | 
135 | func newQuantityAsBytes(v int64) []byte {
136 | 	rm := resource.NewQuantity(v, resource.DecimalSI)
137 | 	r, _ := rm.Marshal()
138 | 	return r
139 | }
140 | 
141 | func modifyContainers(c []*ContainerInfo) []*ContainerInfo {
142 | 	res := []*ContainerInfo{}
143 | 	for i := 0; i < len(c); i++ {
144 | 		modResource := ResourceInfo{
145 | 			RequestedCpus:   1,
146 | 			LimitCpus:       2,
147 | 			RequestedMemory: newQuantityAsBytes(3),
148 | 			LimitMemory:     newQuantityAsBytes(4),
149 | 			CpuAffinity:     Placement_DEFAULT,
150 | 		}
151 | 		res = append(res,
152 | 			&ContainerInfo{
153 | 				ContainerId: c[i].ContainerId,
154 | 				Resources:   &modResource,
155 | 			},
156 | 		)
157 | 	}
158 | 	return res
159 | }
160 | 
161 | // helper function to create some containers and resources allocations.
162 | func createContainers(n int, a []Placement) []*ContainerInfo {
163 | 	containers := []*ContainerInfo{}
164 | 	for i := 0; i < n; i++ {
165 | 		cid := fmt.Sprintf("testCid-%d", i)
166 | 		cRInfo := ResourceInfo{
167 | 			RequestedCpus:   2,
168 | 			LimitCpus:       4,
169 | 			RequestedMemory: newQuantityAsBytes(8),
170 | 			LimitMemory:     newQuantityAsBytes(16),
171 | 			CpuAffinity:     a[i%len(a)],
172 | 		}
173 | 		containers = append(containers,
174 | 			&ContainerInfo{
175 | 				ContainerId: cid,
176 | 				Resources:   &cRInfo,
177 | 			},
178 | 		)
179 | 	}
180 | 	return containers
181 | }
182 | 
183 | func updateTestPodRequest(t *testing.T, m *DaemonMock, cReq *CreatePodRequest, c []*ContainerInfo, err error) (*UpdatePodRequest, *PodAllocationReply) {
184 | 	ePodAllock := modifyCPUAllocation(c)
185 | 	modifiedRInfo := ResourceInfo{
186 | 		RequestedCpus:   2,
187 | 		LimitCpus:       1,
188 | 		RequestedMemory: newQuantityAsBytes(5),
189 | 		LimitMemory:     newQuantityAsBytes(32),
190 | 		CpuAffinity:     Placement_DEFAULT,
191 | 	}
192 | 	request := UpdatePodRequest{
193 | 		PodId:      cReq.PodId,
194 | 		Resources:  &modifiedRInfo,
195 | 		Containers: c,
196 | 	}
197 | 	m.On("UpdatePod",
198 | 		mock.MatchedBy(func(r *UpdatePodRequest) bool {
199 | 			return proto.Equal(r, &request)
200 | 		}),
201 | 	).Return(err)
202 | 	return &request, &PodAllocationReply{
203 | 		PodId:                 cReq.PodId,
204 | 		CpuSet:                toGRPCHelper4CPUSet(ePodAllock.CPUSet),
205 | 		ContainersAllocations: toGRPCHelper4Containers(ePodAllock.ContainerResources),
206 | 		AllocState:            AllocationState_UPDATED,
207 | 	}
208 | }
209 | 
210 | func createTestPodRequest(t *testing.T, pName, pNamespace string, m *DaemonMock, a Placement,
211 | 	c []*ContainerInfo, err error) (*CreatePodRequest, *PodAllocationReply) {
212 | 	pid := "testPid"
213 | 	rInfo := ResourceInfo{
214 | 		RequestedCpus:   2,
215 | 		LimitCpus:       4,
216 | 		RequestedMemory: newQuantityAsBytes(8),
217 | 		LimitMemory:     newQuantityAsBytes(16),
218 | 		CpuAffinity:     a,
219 | 	}
220 | 
221 | 	request := CreatePodRequest{
222 | 		PodId:        pid,
223 | 		PodName:      pName,
224 | 		PodNamespace: pNamespace,
225 | 		Resources:    &rInfo,
226 | 		Containers:   c,
227 | 	}
228 | 	m.On("CreatePod", mock.MatchedBy(func(r *CreatePodRequest) bool {
229 | 		return proto.Equal(r, &request)
230 | 	})).Return(err)
231 | 	ePodAllock := createTestCPUAllocation(c)
232 | 	return &request, &PodAllocationReply{
233 | 		PodId:                 pid,
234 | 		CpuSet:                toGRPCHelper4CPUSet(ePodAllock.CPUSet),
235 | 		ContainersAllocations: toGRPCHelper4Containers(ePodAllock.ContainerResources),
236 | 		AllocState:            AllocationState_CREATED,
237 | 	}
238 | }
239 | 
240 | func TestCreateAndUpdatePodNoError(t *testing.T) {
241 | 	ctx := context.Background()
242 | 	assert := assert.New(t)
243 | 	client, closer, mDaemon := NewMockedServer(ctx)
244 | 	defer closer()
245 | 	affTestCases := []Placement{Placement_DEFAULT, Placement_COMPACT, Placement_SCATTER, Placement_POOL}
246 | 	containers := createContainers(4, affTestCases)
247 | 	modifiedContainers := modifyContainers(containers)
248 | 	for _, a := range affTestCases {
249 | 		pReq, exReply := createTestPodRequest(t, "test1", "test2", mDaemon, a, containers, nil)
250 | 		reply, err := client.CreatePod(ctx, pReq)
251 | 		assert.Nil(err)
252 | 		assert.NotNil(reply)
253 | 		validateAllocatedPodReply(t, exReply, reply)
254 | 		uReq, exReply := updateTestPodRequest(t, mDaemon, pReq, modifiedContainers, nil)
255 | 		reply, err = client.UpdatePod(ctx, uReq)
256 | 		assert.Nil(err)
257 | 		assert.NotNil(reply)
258 | 		validateAllocatedPodReply(t, exReply, reply)
259 | 	}
260 | }
261 | 
262 | func TestCreatePodError(t *testing.T) {
263 | 	ctx := context.Background()
264 | 	assert := assert.New(t)
265 | 	client, closer, mDaemon := NewMockedServer(ctx)
266 | 	defer closer()
267 | 	affTestCases := []Placement{Placement_DEFAULT}
268 | 	containers := createContainers(1, affTestCases)
269 | 	for _, a := range affTestCases {
270 | 		pErr := status.Error(codes.Aborted, "error")
271 | 		pReq, _ := createTestPodRequest(t, "test1", "test2", mDaemon, a, containers, pErr)
272 | 		reply, err := client.CreatePod(ctx, pReq)
273 | 		assert.NotNil(err)
274 | 		assert.Contains(err.Error(), pErr.Error())
275 | 		assert.Nil(reply)
276 | 	}
277 | }
278 | 
279 | func TestDeletePodNotFound(t *testing.T) {
280 | 	ctx := context.Background()
281 | 	assert := assert.New(t)
282 | 	client, closer, mDaemon := NewMockedServer(ctx)
283 | 	defer closer()
284 | 	containers := createContainers(1, []Placement{Placement_DEFAULT})
285 | 	pErr := status.Error(codes.Aborted, "error")
286 | 	pReq, _ := createTestPodRequest(t, "test1", "test2", mDaemon, Placement_DEFAULT, containers, nil)
287 | 	req, _ := createTestDeletion(mDaemon, pReq.PodId, pErr)
288 | 	reply, err := client.DeletePod(ctx, req)
289 | 	assert.NotNil(err)
290 | 	assert.Contains(err.Error(), pErr.Error())
291 | 	assert.Nil(reply)
292 | }
293 | 
294 | func TestDeletePod(t *testing.T) {
295 | 	ctx := context.Background()
296 | 	assert := assert.New(t)
297 | 	client, closer, mDaemon := NewMockedServer(ctx)
298 | 	defer closer()
299 | 	containers := createContainers(1, []Placement{Placement_DEFAULT})
300 | 	pReq, _ := createTestPodRequest(t, "test1", "test2", mDaemon, Placement_DEFAULT, containers, nil)
301 | 	_, err := client.CreatePod(ctx, pReq)
302 | 	assert.Nil(err)
303 | 	req, eReply := createTestDeletion(mDaemon, pReq.PodId, nil)
304 | 	reply, err := client.DeletePod(ctx, req)
305 | 	validateAllocatedPodReply(t, eReply, reply)
306 | 	assert.Nil(err)
307 | }
308 | 


--------------------------------------------------------------------------------
/pkg/ctlplaneapi/ctrplaneapi_server.go:
--------------------------------------------------------------------------------
  1 | // Package ctlplaneapi creates a control plane api grpc server
  2 | package ctlplaneapi
  3 | 
  4 | import (
  5 | 	"context"
  6 | 
  7 | 	"google.golang.org/grpc/codes"
  8 | 	"google.golang.org/grpc/status"
  9 | )
 10 | 
 11 | // CPUBucket A cpu bucket describes a bucket of cpus by a given start CPU ID and
 12 | // end CPU ID. The bucket includes all cpus in the range
 13 | // [start CPU ID - end CPU ID].
 14 | type CPUBucket struct {
 15 | 	StartCPU int
 16 | 	EndCPU   int
 17 | }
 18 | 
 19 | // AllocatedContainerResource represents single container allocation.
 20 | type AllocatedContainerResource struct {
 21 | 	ContainerID string
 22 | 	CPUSet      []CPUBucket
 23 | }
 24 | 
 25 | // AllocatedPodResources repesents pod allocation, together with container sub-allocation.
 26 | type AllocatedPodResources struct {
 27 | 	CPUSet             []CPUBucket
 28 | 	ContainerResources []AllocatedContainerResource
 29 | }
 30 | 
 31 | // CtlPlane is a interface to be implmented by the Daemon.
 32 | type CtlPlane interface {
 33 | 	// Creates a pod with given resource allocation for the parent pod and all
 34 | 	CreatePod(req *CreatePodRequest) (*AllocatedPodResources, error)
 35 | 	// Deletes pod and children containers allocations
 36 | 	DeletePod(req *DeletePodRequest) error
 37 | 	// Creates a pod with given resource allocation for the parent pod and all
 38 | 	UpdatePod(req *UpdatePodRequest) (*AllocatedPodResources, error)
 39 | }
 40 | 
 41 | // Server implements CtlPlane GRPC Server protocol.
 42 | type Server struct {
 43 | 	UnimplementedControlPlaneServer
 44 | 	ctl CtlPlane
 45 | }
 46 | 
 47 | // NewServer initializes new ctlplaneapi.Server.
 48 | func NewServer(c CtlPlane) *Server {
 49 | 	return &Server{
 50 | 		ctl: c,
 51 | 	}
 52 | }
 53 | 
 54 | // DeletePod deletes pod from allocator.
 55 | func (d *Server) DeletePod(ctx context.Context, cP *DeletePodRequest) (*PodAllocationReply, error) {
 56 | 	if err := d.ctl.DeletePod(cP); err != nil {
 57 | 		return nil, status.Error(codes.Unavailable, err.Error())
 58 | 	}
 59 | 	reply := PodAllocationReply{
 60 | 		PodId:      cP.PodId,
 61 | 		AllocState: AllocationState_DELETED,
 62 | 	}
 63 | 	return &reply, nil
 64 | }
 65 | 
 66 | // CreatePod creates pod inside allocator.
 67 | func (d *Server) CreatePod(ctx context.Context, cP *CreatePodRequest) (*PodAllocationReply, error) {
 68 | 	podResources, err := d.ctl.CreatePod(cP)
 69 | 	if err != nil {
 70 | 		return nil, status.Error(codes.Unavailable, err.Error())
 71 | 	}
 72 | 	reply := PodAllocationReply{
 73 | 		PodId:      cP.PodId,
 74 | 		CpuSet:     toGRPCHelper4CPUSet(podResources.CPUSet),
 75 | 		AllocState: AllocationState_CREATED,
 76 | 	}
 77 | 	return &reply, nil
 78 | }
 79 | 
 80 | // UpdatePod reallocates all changed containers of a pod.
 81 | func (d *Server) UpdatePod(ctx context.Context, cP *UpdatePodRequest) (*PodAllocationReply, error) {
 82 | 	podResources, err := d.ctl.UpdatePod(cP)
 83 | 	if err != nil {
 84 | 		return nil, status.Error(codes.Unavailable, err.Error())
 85 | 	}
 86 | 	reply := PodAllocationReply{
 87 | 		PodId:      cP.PodId,
 88 | 		CpuSet:     toGRPCHelper4CPUSet(podResources.CPUSet),
 89 | 		AllocState: AllocationState_UPDATED,
 90 | 	}
 91 | 	return &reply, nil
 92 | }
 93 | 
 94 | func toGRPCHelper4CPUSet(b []CPUBucket) []*CPUSet {
 95 | 	res := []*CPUSet{}
 96 | 	for _, it := range b {
 97 | 		res = append(res,
 98 | 			&CPUSet{
 99 | 				StartCPU: int32(it.StartCPU),
100 | 				EndCPU:   int32(it.EndCPU),
101 | 			})
102 | 	}
103 | 	return res
104 | }
105 | 


--------------------------------------------------------------------------------
/pkg/ctlplaneapi/validation.go:
--------------------------------------------------------------------------------
  1 | package ctlplaneapi
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 
  7 | 	"k8s.io/apimachinery/pkg/api/resource"
  8 | )
  9 | 
 10 | var (
 11 | 	ErrEmptyString             = errors.New("string is empty")
 12 | 	ErrLessThanZero            = errors.New("value cannot be less than 0")
 13 | 	ErrLimitSmallerThanRequest = errors.New("limit cannot be smaller than request")
 14 | 	ErrNoContainers            = errors.New("pod spec does not include any containers")
 15 | )
 16 | 
 17 | // ValidateResourceInfo checks if resource info fulfills following requirements:
 18 | //   - request and limit cpu/memory cannot be less than zero
 19 | //   - requested cpu/memory cannot be larger than their limit
 20 | func ValidateResourceInfo(info *ResourceInfo) error {
 21 | 	rm := resource.Quantity{}
 22 | 	lm := resource.Quantity{}
 23 | 	zero := resource.Quantity{}
 24 | 	err := rm.Unmarshal(info.RequestedMemory)
 25 | 	if err != nil {
 26 | 		return err
 27 | 	}
 28 | 	err = lm.Unmarshal(info.LimitMemory)
 29 | 	if err != nil {
 30 | 		return err
 31 | 	}
 32 | 	if err := returnErrorIfLessThanZero([]lessThanZeroValidatorEntry{
 33 | 		{info.RequestedCpus, "request CPU"},
 34 | 		{info.LimitCpus, "limit CPU"},
 35 | 		{int32(rm.Cmp(zero)), "request memory"},
 36 | 		{int32(lm.Cmp(zero)), "limit memory"},
 37 | 	}); err != nil {
 38 | 		return err
 39 | 	}
 40 | 
 41 | 	if info.LimitCpus < info.RequestedCpus {
 42 | 		return fmt.Errorf("CPU: %w. %d vs %d", ErrLimitSmallerThanRequest, info.LimitCpus, info.RequestedCpus)
 43 | 	}
 44 | 
 45 | 	if lm.Cmp(rm) < 0 {
 46 | 		return fmt.Errorf("memory: %w", ErrLimitSmallerThanRequest)
 47 | 	}
 48 | 
 49 | 	return nil
 50 | }
 51 | 
 52 | // ValidateContainers checks if slice of container infos fulfills following requirements:
 53 | //   - container id and name cannot be empty
 54 | //   - container resources fullfil requirements of ValidateResourceInfo
 55 | func ValidateContainers(containers []*ContainerInfo) error {
 56 | 	for _, container := range containers {
 57 | 		if err := returnErrorIfEmptyString([]emptyStringValidatorEntry{
 58 | 			{container.ContainerId, "container id cannot be nil"},
 59 | 			{container.ContainerName, "container name cannot be nil"},
 60 | 		}); err != nil {
 61 | 			return err
 62 | 		}
 63 | 
 64 | 		if err := ValidateResourceInfo(container.Resources); err != nil {
 65 | 			return err
 66 | 		}
 67 | 	}
 68 | 	return nil
 69 | }
 70 | 
 71 | // ValidateCreatePodRequest checks if CreatePodRequest fulfills following requirements:
 72 | //   - number of containers must be greater than 0
 73 | //   - pod id, name, namespace cannot be empty
 74 | //   - pod resources fullfil requirements of ValidateResourceInfo
 75 | //   - all containers must fullfil requirements of ValidateContainers
 76 | func ValidateCreatePodRequest(req *CreatePodRequest) error {
 77 | 	if len(req.Containers) == 0 {
 78 | 		return ErrNoContainers
 79 | 	}
 80 | 
 81 | 	if err := returnErrorIfEmptyString([]emptyStringValidatorEntry{
 82 | 		{req.PodId, "pod id cannot be nil"},
 83 | 		{req.PodName, "pod name cannot be nil"},
 84 | 		{req.PodNamespace, "pod namespace cannot be nil"},
 85 | 	}); err != nil {
 86 | 		return err
 87 | 	}
 88 | 
 89 | 	if err := ValidateResourceInfo(req.Resources); err != nil {
 90 | 		return err
 91 | 	}
 92 | 
 93 | 	if err := ValidateContainers(req.Containers); err != nil {
 94 | 		return err
 95 | 	}
 96 | 
 97 | 	return nil
 98 | }
 99 | 
100 | // ValidateDeletePodRequest checks if DeletePodRequest fulfills following requirements:
101 | //   - PodId cannot be empty string
102 | func ValidateDeletePodRequest(req *DeletePodRequest) error {
103 | 	if req.PodId == "" {
104 | 		return fmt.Errorf("pod id error: %w", ErrEmptyString)
105 | 	}
106 | 	return nil
107 | }
108 | 
109 | // ValidateUpdatePodRequest checks if UpdatePodRequest fulfills following requirements:
110 | //   - number of containers must be greater than 0
111 | //   - pod id cannot be empty
112 | //   - pod resources fullfil requirements of ValidateResourceInfo
113 | //   - all containers must fullfil requirements of ValidateContainers
114 | func ValidateUpdatePodRequest(req *UpdatePodRequest) error {
115 | 	if len(req.Containers) == 0 {
116 | 		return ErrNoContainers
117 | 	}
118 | 
119 | 	if req.PodId == "" {
120 | 		return fmt.Errorf("pod id error: %w", ErrEmptyString)
121 | 	}
122 | 
123 | 	if err := ValidateResourceInfo(req.Resources); err != nil {
124 | 		return err
125 | 	}
126 | 
127 | 	if err := ValidateContainers(req.Containers); err != nil {
128 | 		return err
129 | 	}
130 | 
131 | 	return nil
132 | }
133 | 
134 | type emptyStringValidatorEntry struct {
135 | 	s   string
136 | 	err string
137 | }
138 | 
139 | func returnErrorIfEmptyString(entries []emptyStringValidatorEntry) error {
140 | 	for _, entry := range entries {
141 | 		if entry.s == "" {
142 | 			return fmt.Errorf("%w: %s", ErrEmptyString, entry.err)
143 | 		}
144 | 	}
145 | 	return nil
146 | }
147 | 
148 | type lessThanZeroValidatorEntry struct {
149 | 	i   int32
150 | 	err string
151 | }
152 | 
153 | func returnErrorIfLessThanZero(entries []lessThanZeroValidatorEntry) error {
154 | 	for _, entry := range entries {
155 | 		if entry.i < 0 {
156 | 			return fmt.Errorf("%w: %s", ErrLessThanZero, entry.err)
157 | 		}
158 | 	}
159 | 	return nil
160 | }
161 | 


--------------------------------------------------------------------------------
/pkg/ctlplaneapi/validation_test.go:
--------------------------------------------------------------------------------
  1 | package ctlplaneapi
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/stretchr/testify/assert"
  7 | 	"github.com/stretchr/testify/require"
  8 | )
  9 | 
 10 | func properResourceInfo() *ResourceInfo {
 11 | 	return &ResourceInfo{
 12 | 		RequestedCpus:   1,
 13 | 		LimitCpus:       1,
 14 | 		RequestedMemory: newQuantityAsBytes(1),
 15 | 		LimitMemory:     newQuantityAsBytes(1),
 16 | 	}
 17 | }
 18 | 
 19 | func properContainers() []*ContainerInfo {
 20 | 	return []*ContainerInfo{
 21 | 		{
 22 | 			ContainerId:   "ci",
 23 | 			ContainerName: "cn",
 24 | 			Resources: &ResourceInfo{
 25 | 				RequestedCpus:   1,
 26 | 				LimitCpus:       1,
 27 | 				RequestedMemory: newQuantityAsBytes(1),
 28 | 				LimitMemory:     newQuantityAsBytes(1),
 29 | 			},
 30 | 		},
 31 | 	}
 32 | }
 33 | 
 34 | func TestValidateResourceInfo(t *testing.T) {
 35 | 	require.Nil(t, ValidateResourceInfo(properResourceInfo()))
 36 | 
 37 | 	testCases := []struct {
 38 | 		modifier    func(*ResourceInfo)
 39 | 		expectedErr error
 40 | 	}{
 41 | 		{
 42 | 			modifier:    func(ri *ResourceInfo) { ri.LimitCpus = 2 },
 43 | 			expectedErr: nil,
 44 | 		},
 45 | 		{
 46 | 			modifier:    func(ri *ResourceInfo) { ri.LimitMemory = newQuantityAsBytes(2) },
 47 | 			expectedErr: nil,
 48 | 		},
 49 | 		{
 50 | 			modifier:    func(ri *ResourceInfo) { ri.RequestedCpus = -1 },
 51 | 			expectedErr: ErrLessThanZero,
 52 | 		},
 53 | 		{
 54 | 			modifier:    func(ri *ResourceInfo) { ri.LimitCpus = -1 },
 55 | 			expectedErr: ErrLessThanZero,
 56 | 		},
 57 | 		{
 58 | 			modifier:    func(ri *ResourceInfo) { ri.RequestedMemory = newQuantityAsBytes(-1) },
 59 | 			expectedErr: ErrLessThanZero,
 60 | 		},
 61 | 		{
 62 | 			modifier:    func(ri *ResourceInfo) { ri.LimitMemory = newQuantityAsBytes(-1) },
 63 | 			expectedErr: ErrLessThanZero,
 64 | 		},
 65 | 		{
 66 | 			modifier:    func(ri *ResourceInfo) { ri.LimitMemory = newQuantityAsBytes(0) },
 67 | 			expectedErr: ErrLimitSmallerThanRequest,
 68 | 		},
 69 | 		{
 70 | 			modifier:    func(ri *ResourceInfo) { ri.LimitCpus = 0 },
 71 | 			expectedErr: ErrLimitSmallerThanRequest,
 72 | 		},
 73 | 	}
 74 | 
 75 | 	for _, testCase := range testCases {
 76 | 		req := properResourceInfo()
 77 | 		testCase.modifier(req)
 78 | 
 79 | 		err := ValidateResourceInfo(req)
 80 | 		assert.ErrorIs(t, err, testCase.expectedErr)
 81 | 	}
 82 | }
 83 | 
 84 | func TestValidateContainers(t *testing.T) {
 85 | 	require.Nil(t, ValidateContainers(properContainers()))
 86 | 
 87 | 	testCases := []struct {
 88 | 		modifier    func([]*ContainerInfo)
 89 | 		expectedErr error
 90 | 	}{
 91 | 		{
 92 | 			modifier:    func(ci []*ContainerInfo) { ci[0].ContainerId = "" },
 93 | 			expectedErr: ErrEmptyString,
 94 | 		},
 95 | 		{
 96 | 			modifier:    func(ci []*ContainerInfo) { ci[0].ContainerName = "" },
 97 | 			expectedErr: ErrEmptyString,
 98 | 		},
 99 | 		{
100 | 			modifier:    func(ci []*ContainerInfo) { ci[0].Resources.LimitCpus = -1 },
101 | 			expectedErr: ErrLessThanZero,
102 | 		},
103 | 	}
104 | 
105 | 	for _, testCase := range testCases {
106 | 		req := properContainers()
107 | 		testCase.modifier(req)
108 | 
109 | 		err := ValidateContainers(req)
110 | 		assert.ErrorIs(t, err, testCase.expectedErr)
111 | 	}
112 | }
113 | 
114 | func TestValidateCreatePodRequest(t *testing.T) {
115 | 	properPodRequest := func() *CreatePodRequest {
116 | 		return &CreatePodRequest{
117 | 			PodId:        "i",
118 | 			PodName:      "n",
119 | 			PodNamespace: "ns",
120 | 			Resources:    properResourceInfo(),
121 | 			Containers:   properContainers(),
122 | 		}
123 | 	}
124 | 
125 | 	require.Nil(t, ValidateCreatePodRequest(properPodRequest()))
126 | 
127 | 	testCases := []struct {
128 | 		modifier    func(*CreatePodRequest)
129 | 		expectedErr error
130 | 	}{
131 | 		{
132 | 			modifier:    func(cpr *CreatePodRequest) { cpr.Containers = []*ContainerInfo{} },
133 | 			expectedErr: ErrNoContainers,
134 | 		},
135 | 		{
136 | 			modifier:    func(cpr *CreatePodRequest) { cpr.PodId = "" },
137 | 			expectedErr: ErrEmptyString,
138 | 		},
139 | 		{
140 | 			modifier:    func(cpr *CreatePodRequest) { cpr.PodName = "" },
141 | 			expectedErr: ErrEmptyString,
142 | 		},
143 | 		{
144 | 			modifier:    func(cpr *CreatePodRequest) { cpr.PodNamespace = "" },
145 | 			expectedErr: ErrEmptyString,
146 | 		},
147 | 		{
148 | 			modifier:    func(cpr *CreatePodRequest) { cpr.Resources.LimitCpus = -1 },
149 | 			expectedErr: ErrLessThanZero,
150 | 		},
151 | 		{
152 | 			modifier:    func(cpr *CreatePodRequest) { cpr.Containers[0].ContainerId = "" },
153 | 			expectedErr: ErrEmptyString,
154 | 		},
155 | 	}
156 | 
157 | 	for _, testCase := range testCases {
158 | 		req := properPodRequest()
159 | 		testCase.modifier(req)
160 | 
161 | 		err := ValidateCreatePodRequest(req)
162 | 		assert.ErrorIs(t, err, testCase.expectedErr)
163 | 	}
164 | }
165 | 
166 | func TestValidateDeletePodRequest(t *testing.T) {
167 | 	assert.Nil(t, ValidateDeletePodRequest(&DeletePodRequest{PodId: "i"}))
168 | 	assert.ErrorIs(t, ValidateDeletePodRequest(&DeletePodRequest{}), ErrEmptyString)
169 | }
170 | 
171 | func TestValidateUpdatePodRequest(t *testing.T) {
172 | 	properPodRequest := func() *UpdatePodRequest {
173 | 		return &UpdatePodRequest{
174 | 			PodId:      "i",
175 | 			Resources:  properResourceInfo(),
176 | 			Containers: properContainers(),
177 | 		}
178 | 	}
179 | 
180 | 	require.Nil(t, ValidateUpdatePodRequest(properPodRequest()))
181 | 
182 | 	testCases := []struct {
183 | 		modifier    func(*UpdatePodRequest)
184 | 		expectedErr error
185 | 	}{
186 | 		{
187 | 			modifier:    func(cpr *UpdatePodRequest) { cpr.Containers = []*ContainerInfo{} },
188 | 			expectedErr: ErrNoContainers,
189 | 		},
190 | 		{
191 | 			modifier:    func(cpr *UpdatePodRequest) { cpr.PodId = "" },
192 | 			expectedErr: ErrEmptyString,
193 | 		},
194 | 		{
195 | 			modifier:    func(cpr *UpdatePodRequest) { cpr.Resources.LimitCpus = -1 },
196 | 			expectedErr: ErrLessThanZero,
197 | 		},
198 | 		{
199 | 			modifier:    func(cpr *UpdatePodRequest) { cpr.Containers[0].ContainerName = "" },
200 | 			expectedErr: ErrEmptyString,
201 | 		},
202 | 	}
203 | 
204 | 	for _, testCase := range testCases {
205 | 		req := properPodRequest()
206 | 		testCase.modifier(req)
207 | 
208 | 		err := ValidateUpdatePodRequest(req)
209 | 		assert.ErrorIs(t, err, testCase.expectedErr)
210 | 	}
211 | }
212 | 


--------------------------------------------------------------------------------
/pkg/numautils/dirutils.go:
--------------------------------------------------------------------------------
 1 | package numautils
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"strconv"
 6 | 	"strings"
 7 | )
 8 | 
 9 | // For strings in form prefixX returns X if X is a number.
10 | func getNameWithPrefixAndNumber(name, prefix string) (bool, int) {
11 | 	if strings.HasPrefix(name, prefix) {
12 | 		nodeID, err := strconv.Atoi(name[len(prefix):])
13 | 		if err != nil {
14 | 			return false, 0
15 | 		}
16 | 		return true, nodeID
17 | 	}
18 | 	return false, 0
19 | }
20 | 
21 | // Returns list of files/directories with name prefix[0-9]+.
22 | // For example, (file, [file1, file2, filetest, nofile3]) returns [1, 2].
23 | func getEntriesWithPrefixAndNumber(path, prefix string) ([]int, error) {
24 | 	dir, err := os.Open(path)
25 | 	if err != nil {
26 | 		return []int{}, err
27 | 	}
28 | 	defer dir.Close()
29 | 
30 | 	dirContents, err := dir.Readdirnames(0)
31 | 	if err != nil {
32 | 		return []int{}, err
33 | 	}
34 | 
35 | 	entries := []int{}
36 | 	for _, directory := range dirContents {
37 | 		if isValid, nodeID := getNameWithPrefixAndNumber(directory, prefix); isValid {
38 | 			entries = append(entries, nodeID)
39 | 		}
40 | 	}
41 | 	return entries, nil
42 | }
43 | 


--------------------------------------------------------------------------------
/pkg/numautils/dirutils_test.go:
--------------------------------------------------------------------------------
 1 | package numautils
 2 | 
 3 | import (
 4 | 	"os"
 5 | 	"path"
 6 | 	"testing"
 7 | 
 8 | 	"github.com/stretchr/testify/assert"
 9 | )
10 | 
11 | func TestGetNameWithPrefixAndNumber(t *testing.T) {
12 | 	testCases := []struct {
13 | 		name              string
14 | 		prefix            string
15 | 		expectedIsPrefix  bool
16 | 		expectedSuffixNum int
17 | 	}{
18 | 		{"test12", "test", true, 12},
19 | 		{"123", "", true, 123},
20 | 		{"xx1t", "xx", false, 0},
21 | 		{"test123", "zest", false, 0},
22 | 		{"test", "test", false, 0},
23 | 	}
24 | 
25 | 	for _, testCase := range testCases {
26 | 		name := testCase.name + "_with_prefix_" + testCase.prefix
27 | 		t.Run(
28 | 			name, func(t *testing.T) {
29 | 				isPrefix, suffixNum := getNameWithPrefixAndNumber(testCase.name, testCase.prefix)
30 | 				assert.Equal(
31 | 					t,
32 | 					testCase.expectedIsPrefix,
33 | 					isPrefix,
34 | 				)
35 | 				assert.Equal(
36 | 					t,
37 | 					testCase.expectedSuffixNum,
38 | 					suffixNum,
39 | 				)
40 | 			},
41 | 		)
42 | 	}
43 | }
44 | 
45 | func TestGetEntriesWithPrefixAndNumber(t *testing.T) {
46 | 	fileNames := []string{"test1", "xtest", "test5", "test3x", "54"}
47 | 	expectedNumbers := []int{1, 5}
48 | 
49 | 	dir, err := os.MkdirTemp("", "dirutils_test")
50 | 	assert.Nil(t, err)
51 | 
52 | 	defer os.RemoveAll(dir)
53 | 
54 | 	for _, fileName := range fileNames {
55 | 		err = os.Mkdir(path.Join(dir, fileName), 0750)
56 | 		assert.Nil(t, err)
57 | 	}
58 | 
59 | 	result, err := getEntriesWithPrefixAndNumber(dir, "test")
60 | 
61 | 	assert.Nil(t, err)
62 | 	assert.ElementsMatch(t, expectedNumbers, result)
63 | }
64 | 


--------------------------------------------------------------------------------
/pkg/numautils/discover.go:
--------------------------------------------------------------------------------
 1 | package numautils
 2 | 
 3 | import (
 4 | 	"path"
 5 | 	"strconv"
 6 | 	"strings"
 7 | 
 8 | 	"resourcemanagement.controlplane/pkg/utils"
 9 | )
10 | 
11 | // LinuxTopologyPath is a path where kernels exposes machine topology information.
12 | const LinuxTopologyPath = "/sys/devices/system/node"
13 | 
14 | const (
15 | 	nodePrefix  = "node"
16 | 	cpuPrefix   = "cpu"
17 | 	topologyDir = "topology"
18 | 	packageFile = "package_id"
19 | 	dieFile     = "die_id"
20 | 	coreFile    = "core_id"
21 | )
22 | 
23 | // CpuInfo stores topology information about single CPU.
24 | type CpuInfo struct {
25 | 	Node    int
26 | 	Package int
27 | 	Die     int
28 | 	Core    int
29 | 	Cpu     int
30 | }
31 | 
32 | func loadNodes(topologyPath string) ([]int, error) {
33 | 	return getEntriesWithPrefixAndNumber(topologyPath, nodePrefix)
34 | }
35 | 
36 | func listCpusFromNode(topologyPath string, node int) ([]CpuInfo, error) {
37 | 	cpuIDs, err := getEntriesWithPrefixAndNumber(getNodeDirPath(topologyPath, node), cpuPrefix)
38 | 	if err != nil {
39 | 		return []CpuInfo{}, err
40 | 	}
41 | 	cpus := []CpuInfo{}
42 | 	for _, cpu := range cpuIDs {
43 | 		cpuTopologyBase := path.Join(getCPUDirPath(topologyPath, node, cpu), topologyDir)
44 | 		readOrDefault := func(fileName string) int {
45 | 			data, err := readIntFromFile(cpuTopologyBase, fileName)
46 | 			if err != nil {
47 | 				return 0
48 | 			}
49 | 			return data
50 | 		}
51 | 		cpu := CpuInfo{
52 | 			Cpu:     cpu,
53 | 			Node:    node,
54 | 			Package: readOrDefault(packageFile),
55 | 			Die:     readOrDefault(dieFile),
56 | 			Core:    readOrDefault(coreFile),
57 | 		}
58 | 		cpus = append(cpus, cpu)
59 | 	}
60 | 
61 | 	return cpus, nil
62 | }
63 | 
64 | func getNodeDirPath(topologyPath string, node int) string {
65 | 	return path.Join(topologyPath, nodePrefix+strconv.Itoa(node))
66 | }
67 | 
68 | func getCPUDirPath(topologyPath string, node int, cpu int) string {
69 | 	return path.Join(getNodeDirPath(topologyPath, node), cpuPrefix+strconv.Itoa(cpu))
70 | }
71 | 
72 | func readIntFromFile(basePath, filename string) (int, error) {
73 | 	data, err := utils.ReadFileAt(basePath, filename)
74 | 	if err != nil {
75 | 		return 0, err
76 | 	}
77 | 	return strconv.Atoi(strings.TrimSpace(string(data)))
78 | }
79 | 


--------------------------------------------------------------------------------
/pkg/numautils/discover_test.go:
--------------------------------------------------------------------------------
  1 | package numautils
  2 | 
  3 | import (
  4 | 	"os"
  5 | 	"path"
  6 | 	"strconv"
  7 | 	"testing"
  8 | 
  9 | 	"github.com/stretchr/testify/assert"
 10 | 	"github.com/stretchr/testify/require"
 11 | )
 12 | 
 13 | const dirMode = 0700
 14 | const fileMode = 0600
 15 | 
 16 | type optionalCpuInfo struct {
 17 | 	packageID int
 18 | 	dieID     int
 19 | 	coreID    int
 20 | }
 21 | 
 22 | type testNode struct {
 23 | 	nodeNum int
 24 | 	cpus    map[int]optionalCpuInfo
 25 | }
 26 | 
 27 | func createNodeFiles(dir string, node testNode) error {
 28 | 	nodePath := path.Join(dir, nodePrefix+strconv.Itoa(node.nodeNum))
 29 | 	if err := os.Mkdir(nodePath, 0750); err != nil {
 30 | 		return err
 31 | 	}
 32 | 
 33 | 	for cpuID, cpuData := range node.cpus {
 34 | 		cpuPath := path.Join(nodePath, cpuPrefix+strconv.Itoa(cpuID))
 35 | 
 36 | 		if err := os.Mkdir(cpuPath, dirMode); err != nil {
 37 | 			return err
 38 | 		}
 39 | 
 40 | 		topologyPath := path.Join(cpuPath, topologyDir)
 41 | 		if err := os.Mkdir(topologyPath, dirMode); err != nil {
 42 | 			return err
 43 | 		}
 44 | 
 45 | 		createFileIfValueSet := func(fname string, value int) error {
 46 | 			if value < 0 {
 47 | 				return nil
 48 | 			}
 49 | 			filePath := path.Join(topologyPath, fname)
 50 | 			valueString := strconv.Itoa(value)
 51 | 			return os.WriteFile(filePath, []byte(valueString), fileMode)
 52 | 		}
 53 | 
 54 | 		if err := createFileIfValueSet(packageFile, cpuData.packageID); err != nil {
 55 | 			return err
 56 | 		}
 57 | 
 58 | 		if err := createFileIfValueSet(dieFile, cpuData.dieID); err != nil {
 59 | 			return err
 60 | 		}
 61 | 
 62 | 		if err := createFileIfValueSet(coreFile, cpuData.coreID); err != nil {
 63 | 			return err
 64 | 		}
 65 | 	}
 66 | 	return nil
 67 | }
 68 | 
 69 | func TestLoadNodes(t *testing.T) {
 70 | 	testDir, err := os.MkdirTemp("", "test")
 71 | 	assert.Nil(t, err)
 72 | 	defer os.RemoveAll(testDir)
 73 | 
 74 | 	err = createNodeFiles(testDir, testNode{
 75 | 		nodeNum: 41,
 76 | 	})
 77 | 	require.Nil(t, err)
 78 | 	err = createNodeFiles(testDir, testNode{
 79 | 		nodeNum: 5,
 80 | 	})
 81 | 	require.Nil(t, err)
 82 | 
 83 | 	nodes, err := loadNodes(testDir)
 84 | 	assert.Nil(t, err)
 85 | 	assert.ElementsMatch(t, []int{41, 5}, nodes)
 86 | }
 87 | 
 88 | func TestReadIntFromFiles(t *testing.T) {
 89 | 	testCases := []struct {
 90 | 		content string
 91 | 		result  int
 92 | 		isError bool
 93 | 	}{
 94 | 		{"123", 123, false},
 95 | 		{"123\n", 123, false},
 96 | 		{"test", 0, true},
 97 | 		{"", 0, true},
 98 | 		{"-1", -1, false},
 99 | 	}
100 | 
101 | 	for _, testCase := range testCases {
102 | 		t.Run(testCase.content, func(t *testing.T) {
103 | 			file, err := os.CreateTemp("", "test")
104 | 			assert.Nil(t, err)
105 | 			defer os.Remove(file.Name())
106 | 
107 | 			_, err = file.WriteString(testCase.content)
108 | 			assert.Nil(t, err)
109 | 
110 | 			value, err := readIntFromFile(file.Name(), "")
111 | 			if testCase.isError {
112 | 				assert.NotNil(t, err)
113 | 			} else {
114 | 				assert.Nil(t, err)
115 | 			}
116 | 
117 | 			assert.Equal(t, testCase.result, value)
118 | 		})
119 | 	}
120 | }
121 | 
122 | func TestListCpusFromNodeTestpath(t *testing.T) {
123 | 	testDir, err := os.MkdirTemp("", "test")
124 | 	assert.Nil(t, err)
125 | 	defer os.RemoveAll(testDir)
126 | 
127 | 	err = createNodeFiles(testDir, testNode{
128 | 		nodeNum: 41,
129 | 		cpus: map[int]optionalCpuInfo{
130 | 			1: {
131 | 				packageID: -1,
132 | 				dieID:     1,
133 | 				coreID:    0,
134 | 			},
135 | 			3: {
136 | 				packageID: -1,
137 | 				dieID:     1,
138 | 				coreID:    0,
139 | 			},
140 | 			5: {
141 | 				packageID: -1,
142 | 				dieID:     1,
143 | 				coreID:    1,
144 | 			},
145 | 			8: {
146 | 				packageID: -1,
147 | 				dieID:     2,
148 | 				coreID:    1,
149 | 			},
150 | 		},
151 | 	})
152 | 	require.Nil(t, err)
153 | 	expectedCpus := []CpuInfo{
154 | 		{
155 | 			Cpu:     1,
156 | 			Node:    41,
157 | 			Package: 0,
158 | 			Die:     1,
159 | 			Core:    0,
160 | 		},
161 | 		{
162 | 			Cpu:     3,
163 | 			Node:    41,
164 | 			Package: 0,
165 | 			Die:     1,
166 | 			Core:    0,
167 | 		},
168 | 		{
169 | 			Cpu:     5,
170 | 			Node:    41,
171 | 			Package: 0,
172 | 			Die:     1,
173 | 			Core:    1,
174 | 		},
175 | 		{
176 | 			Cpu:     8,
177 | 			Node:    41,
178 | 			Package: 0,
179 | 			Die:     2,
180 | 			Core:    1,
181 | 		},
182 | 	}
183 | 
184 | 	cpuInfos, err := listCpusFromNode(testDir, 41)
185 | 	assert.Nil(t, err)
186 | 
187 | 	assert.ElementsMatch(t, expectedCpus, cpuInfos)
188 | }
189 | 


--------------------------------------------------------------------------------
/pkg/numautils/numa.go:
--------------------------------------------------------------------------------
  1 | // Package numautils reads topology as seen in /sys/devices/system/node (documentation available at
  2 | // https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node). It then represents it as
  3 | // tree whose leafs are cpus.
  4 | package numautils
  5 | 
  6 | import (
  7 | 	"errors"
  8 | 	"fmt"
  9 | )
 10 | 
 11 | // ErrNotAvailable is returned when it is impossible to allocate cpus.
 12 | var ErrNotAvailable = errors.New("not enough cpus available")
 13 | 
 14 | // ErrNotFound is returned when cpu information cannot be found.
 15 | var ErrNotFound = errors.New("cpu not found")
 16 | 
 17 | // ErrLoadError is returned when loading topology information from kernel failed.
 18 | var ErrLoadError = errors.New("cannot read topology information")
 19 | 
 20 | // NumaTopology holds topology information of the machine. User should invoke `Load` method to
 21 | // initialize topology information.
 22 | type NumaTopology struct {
 23 | 	Topology       *TopologyNode
 24 | 	CpuInformation map[int]CpuInfo
 25 | }
 26 | 
 27 | // Take finds n non-used cpu in topology tree. It find such allocation, that will minimize the topology
 28 | // distance between cpus. In our case the topology distance between n leafs is defined as maximal
 29 | // path length from any of those leafs to the nearest common predecessor.
 30 | func (t *NumaTopology) Take(n int) ([]int, error) {
 31 | 	l, _ := t.Topology.findLowestNodeWithEnoughAvailability(n, 0)
 32 | 	if l == nil {
 33 | 		return []int{}, ErrNotAvailable
 34 | 	}
 35 | 	leaves, err := l.takeLeaves(n)
 36 | 	// takeLeves updated NumAvailable from l down to leaves.
 37 | 	// We must now update l predecessors
 38 | 	if l != t.Topology {
 39 | 		path := t.Topology.find(func(tl *TopologyNode) bool { return tl == l })
 40 | 		for _, node := range path[1:] { // 1st is l itself
 41 | 			node.NumAvailable -= n
 42 | 		}
 43 | 	}
 44 | 	if err != nil {
 45 | 		return []int{}, ErrNotAvailable
 46 | 	}
 47 | 	cpuIDs := make([]int, 0, n)
 48 | 	for _, leaf := range leaves {
 49 | 		cpuIDs = append(cpuIDs, leaf.Value)
 50 | 	}
 51 | 	return cpuIDs, nil
 52 | }
 53 | 
 54 | // FindCpu returns TopologyNode of given cpu. The node is guaranteed to be a leaf of the topology
 55 | // tree.
 56 | func (t *NumaTopology) FindCpu(cpuID int) (*TopologyNode, error) {
 57 | 	path := t.Topology.find(func(tl *TopologyNode) bool { return tl.IsLeaf() && tl.Value == cpuID })
 58 | 	if len(path) == 0 {
 59 | 		return nil, ErrNotFound
 60 | 	}
 61 | 	return path[0], nil
 62 | }
 63 | 
 64 | // Return returns given cpu to pool of available cpus.
 65 | func (t *NumaTopology) Return(cpuID int) error {
 66 | 	path := t.Topology.find(func(tl *TopologyNode) bool { return tl.IsLeaf() && tl.Value == cpuID })
 67 | 	if len(path) == 0 {
 68 | 		return ErrNotFound
 69 | 	}
 70 | 	if path[0].NumAvailable == 0 {
 71 | 		for _, node := range path {
 72 | 			node.NumAvailable++
 73 | 		}
 74 | 	}
 75 | 
 76 | 	return nil
 77 | }
 78 | 
 79 | // Load loads topology information from given topology path (usually it should be `LinuxTopologyPath`).
 80 | func (t *NumaTopology) Load(topologyPath string) error {
 81 | 	nodes, err := loadNodes(topologyPath)
 82 | 
 83 | 	if err != nil {
 84 | 		return fmt.Errorf("%w: %v", ErrLoadError, err)
 85 | 	}
 86 | 
 87 | 	cpuInfos := []CpuInfo{}
 88 | 	for _, node := range nodes {
 89 | 		nodeCpus, err := listCpusFromNode(topologyPath, node)
 90 | 		if err != nil {
 91 | 			return fmt.Errorf("%w: cannot load cpus information for node %d, %v", ErrLoadError, node, err)
 92 | 		}
 93 | 		cpuInfos = append(cpuInfos, nodeCpus...)
 94 | 	}
 95 | 
 96 | 	return t.LoadFromCpuInfo(cpuInfos)
 97 | }
 98 | 
 99 | // LoadFromCpuInfo loads topology tree information given list of cpus.
100 | func (t *NumaTopology) LoadFromCpuInfo(cpus []CpuInfo) error {
101 | 	t.cpuInfoToTopology(cpus)
102 | 
103 | 	t.CpuInformation = make(map[int]CpuInfo)
104 | 	for _, cpuInfo := range cpus {
105 | 		t.CpuInformation[cpuInfo.Cpu] = cpuInfo
106 | 	}
107 | 
108 | 	return nil
109 | }
110 | 
111 | // Create node topology tree.
112 | func (t *NumaTopology) cpuInfoToTopology(cpuInfos []CpuInfo) {
113 | 	t.Topology = &TopologyNode{
114 | 		nodeInfo: nodeInfo{Type: Machine},
115 | 	}
116 | 
117 | 	topoTypes := getUsedTopoTypes(cpuInfos)
118 | 
119 | 	for _, cpu := range cpuInfos {
120 | 		t.Topology.append(cpuInfoToNodeInfoList(cpu, topoTypes))
121 | 	}
122 | }
123 | 


--------------------------------------------------------------------------------
/pkg/numautils/numa_test.go:
--------------------------------------------------------------------------------
  1 | package numautils
  2 | 
  3 | import (
  4 | 	"os"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/stretchr/testify/assert"
  8 | 	"github.com/stretchr/testify/require"
  9 | )
 10 | 
 11 | var expectedTree = &TopologyNode{
 12 | 	nodeInfo:     nodeInfo{Machine, 0},
 13 | 	NumAvailable: 8,
 14 | 	Children: ChildList{
 15 | 		&TopologyNode{
 16 | 			nodeInfo:     nodeInfo{Node, 0},
 17 | 			NumAvailable: 4,
 18 | 			Children: ChildList{
 19 | 				&TopologyNode{
 20 | 					nodeInfo:     nodeInfo{Core, 0},
 21 | 					NumAvailable: 2,
 22 | 					Children: ChildList{
 23 | 						&TopologyNode{
 24 | 							nodeInfo:     nodeInfo{Cpu, 1},
 25 | 							NumAvailable: 1,
 26 | 						},
 27 | 						&TopologyNode{
 28 | 							nodeInfo:     nodeInfo{Cpu, 3},
 29 | 							NumAvailable: 1,
 30 | 						},
 31 | 					},
 32 | 				},
 33 | 				&TopologyNode{
 34 | 					nodeInfo:     nodeInfo{Core, 1},
 35 | 					NumAvailable: 2,
 36 | 					Children: ChildList{
 37 | 						&TopologyNode{
 38 | 							nodeInfo:     nodeInfo{Cpu, 5},
 39 | 							NumAvailable: 1,
 40 | 						},
 41 | 						&TopologyNode{
 42 | 							nodeInfo:     nodeInfo{Cpu, 7},
 43 | 							NumAvailable: 1,
 44 | 						},
 45 | 					},
 46 | 				},
 47 | 			},
 48 | 		},
 49 | 		&TopologyNode{
 50 | 			nodeInfo:     nodeInfo{Node, 1},
 51 | 			NumAvailable: 4,
 52 | 			Children: ChildList{
 53 | 				&TopologyNode{
 54 | 					nodeInfo:     nodeInfo{Core, 0},
 55 | 					NumAvailable: 2,
 56 | 					Children: ChildList{
 57 | 						&TopologyNode{
 58 | 							nodeInfo:     nodeInfo{Cpu, 2},
 59 | 							NumAvailable: 1,
 60 | 						},
 61 | 						&TopologyNode{
 62 | 							nodeInfo:     nodeInfo{Cpu, 4},
 63 | 							NumAvailable: 1,
 64 | 						},
 65 | 					},
 66 | 				},
 67 | 				&TopologyNode{
 68 | 					nodeInfo:     nodeInfo{Core, 1},
 69 | 					NumAvailable: 2,
 70 | 					Children: ChildList{
 71 | 						&TopologyNode{
 72 | 							nodeInfo:     nodeInfo{Cpu, 6},
 73 | 							NumAvailable: 1,
 74 | 						},
 75 | 						&TopologyNode{
 76 | 							nodeInfo:     nodeInfo{Cpu, 8},
 77 | 							NumAvailable: 1,
 78 | 						},
 79 | 					},
 80 | 				},
 81 | 			},
 82 | 		},
 83 | 	},
 84 | }
 85 | 
 86 | func setupNumaTest(t *testing.T) (string, func()) {
 87 | 	testDir, err := os.MkdirTemp("", "test")
 88 | 	assert.Nil(t, err)
 89 | 	teardownFunc := func() { os.RemoveAll(testDir) }
 90 | 
 91 | 	err = createNodeFiles(testDir, testNode{
 92 | 		nodeNum: 0,
 93 | 		cpus: map[int]optionalCpuInfo{
 94 | 			1: {
 95 | 				coreID: 0,
 96 | 			},
 97 | 			3: {
 98 | 				coreID: 0,
 99 | 			},
100 | 			5: {
101 | 				coreID: 1,
102 | 			},
103 | 			7: {
104 | 				coreID: 1,
105 | 			},
106 | 		},
107 | 	})
108 | 	require.Nil(t, err)
109 | 	err = createNodeFiles(testDir, testNode{
110 | 		nodeNum: 1,
111 | 		cpus: map[int]optionalCpuInfo{
112 | 			2: {
113 | 				coreID: 0,
114 | 			},
115 | 			4: {
116 | 				coreID: 0,
117 | 			},
118 | 			6: {
119 | 				coreID: 1,
120 | 			},
121 | 			8: {
122 | 				coreID: 1,
123 | 			},
124 | 		},
125 | 	})
126 | 	require.Nil(t, err)
127 | 
128 | 	return testDir, teardownFunc
129 | }
130 | 
131 | func newNuma(t *testing.T) NumaTopology {
132 | 	tree, err := cloneTree(expectedTree)
133 | 	assert.Nil(t, err)
134 | 	return NumaTopology{
135 | 		Topology: tree,
136 | 	}
137 | }
138 | 
139 | func TestLoad(t *testing.T) {
140 | 	testDir, teardownFunc := setupNumaTest(t)
141 | 	defer teardownFunc()
142 | 
143 | 	numa := NumaTopology{}
144 | 	err := numa.Load(testDir)
145 | 	require.Nil(t, err)
146 | 
147 | 	assertEqualTrees(t, expectedTree, numa.Topology)
148 | }
149 | 
150 | func TestTake(t *testing.T) {
151 | 	type takeCase struct {
152 | 		n               int
153 | 		expectedisError bool
154 | 		expectedCpus    []int
155 | 	}
156 | 
157 | 	testCases := []struct {
158 | 		name  string
159 | 		takes []takeCase
160 | 	}{
161 | 		{"1", []takeCase{{1, false, []int{1}}}},
162 | 		{"1,2", []takeCase{
163 | 			{1, false, []int{1}},
164 | 			{2, false, []int{5, 7}},
165 | 		}},
166 | 		{"1,5", []takeCase{
167 | 			{1, false, []int{1}},
168 | 			{5, false, []int{3, 5, 7, 2, 4}},
169 | 		}},
170 | 		{"2,1,2", []takeCase{
171 | 			{2, false, []int{1, 3}},
172 | 			{1, false, []int{5}},
173 | 			{2, false, []int{2, 4}},
174 | 		}},
175 | 		{"1, 8", []takeCase{
176 | 			{1, false, []int{1}},
177 | 			{8, true, []int{}},
178 | 		}},
179 | 	}
180 | 
181 | 	for _, testCase := range testCases {
182 | 		t.Run(testCase.name, func(t *testing.T) {
183 | 			numa := newNuma(t)
184 | 			for _, takeCase := range testCase.takes {
185 | 				cpus, err := numa.Take(takeCase.n)
186 | 				if takeCase.expectedisError {
187 | 					assert.NotNil(t, err)
188 | 				} else {
189 | 					assert.Nil(t, err)
190 | 					assert.Equal(t, takeCase.expectedCpus, cpus)
191 | 				}
192 | 				assert.True(t, verifyNumAvailable(numa.Topology))
193 | 			}
194 | 		})
195 | 	}
196 | }
197 | 
198 | func TestReturnCorrect(t *testing.T) {
199 | 	numa := newNuma(t)
200 | 	ids, err := numa.Take(2)
201 | 	assert.Nil(t, err)
202 | 
203 | 	for _, id := range ids {
204 | 		assert.Nil(t, numa.Return(id))
205 | 		assert.True(t, verifyNumAvailable(numa.Topology))
206 | 	}
207 | }
208 | 
209 | func TestReturnIncorrect(t *testing.T) {
210 | 	numa := newNuma(t)
211 | 	assert.Nil(t, numa.Return(1))
212 | 	assert.True(t, verifyNumAvailable(numa.Topology))
213 | }
214 | 


--------------------------------------------------------------------------------
/pkg/numautils/topology.go:
--------------------------------------------------------------------------------
  1 | package numautils
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"strings"
  7 | 
  8 | 	"k8s.io/klog/v2"
  9 | )
 10 | 
 11 | var ErrNotALeaf = errors.New("node is not a leaf")
 12 | 
 13 | // TopologyEntryType holds information about level of given topological information (eg. Node/Package/Die).
 14 | type TopologyEntryType int
 15 | 
 16 | // TopologyEntryType enum.
 17 | const (
 18 | 	Machine TopologyEntryType = iota
 19 | 	Node
 20 | 	Package
 21 | 	Die
 22 | 	Core
 23 | 	Cpu
 24 | )
 25 | 
 26 | var topoTypeByImportance = []TopologyEntryType{Node, Package, Die, Core, Cpu}
 27 | 
 28 | func (t TopologyEntryType) String() string {
 29 | 	switch t {
 30 | 	case Machine:
 31 | 		return "machine"
 32 | 	case Node:
 33 | 		return "node"
 34 | 	case Package:
 35 | 		return "package"
 36 | 	case Die:
 37 | 		return "die"
 38 | 	case Core:
 39 | 		return "core"
 40 | 	case Cpu:
 41 | 		return "cpu"
 42 | 	default:
 43 | 		return "UNKNOWN"
 44 | 	}
 45 | }
 46 | 
 47 | type nodeInfo struct {
 48 | 	Type  TopologyEntryType
 49 | 	Value int
 50 | }
 51 | 
 52 | // TopologyNode struct holds information about single node in topology tree. It holds number of
 53 | // available leafs, defined as follows: leaf has NumAvailable = 1, for each non-leaf node, its
 54 | // NumAvailabe = sum of childrens NumAvailable. If leaf is flagged as not-available it's value becomes
 55 | // 0 and all other node values are updated.
 56 | type TopologyNode struct {
 57 | 	nodeInfo
 58 | 	NumAvailable int
 59 | 	Children     []*TopologyNode
 60 | }
 61 | 
 62 | func (t *TopologyNode) String() string {
 63 | 	return t.toString(1)
 64 | }
 65 | 
 66 | // IsLeaf returns true if node is a leaf node i.e. it has no children.
 67 | func (t *TopologyNode) IsLeaf() bool {
 68 | 	return len(t.Children) == 0
 69 | }
 70 | 
 71 | // GetLeafs returns list of tree leafs, ordered by child precedence.
 72 | func (t *TopologyNode) GetLeafs() []*TopologyNode {
 73 | 	leafs := []*TopologyNode{}
 74 | 	queue := []*TopologyNode{t}
 75 | 	var node *TopologyNode
 76 | 	for len(queue) > 0 {
 77 | 		node = queue[0]
 78 | 		if node.IsLeaf() {
 79 | 			leafs = append(leafs, node)
 80 | 		} else {
 81 | 			queue = append(queue, node.Children...)
 82 | 		}
 83 | 		queue = queue[1:]
 84 | 	}
 85 | 	return leafs
 86 | }
 87 | 
 88 | // Available returns true if node has available leafs.
 89 | func (t *TopologyNode) Available() bool {
 90 | 	return t.NumAvailable > 0
 91 | }
 92 | 
 93 | // Take marks leaf as non-available. Returns error if node is not a leaf.
 94 | func (t *TopologyNode) Take() error {
 95 | 	if !t.IsLeaf() {
 96 | 		return ErrNotALeaf
 97 | 	}
 98 | 	t.NumAvailable--
 99 | 	return nil
100 | }
101 | 
102 | // Return marks leaf as available. Returns error if node is not a leaf.
103 | func (t *TopologyNode) Return() error {
104 | 	if !t.IsLeaf() {
105 | 		return ErrNotALeaf
106 | 	}
107 | 	t.NumAvailable++
108 | 	return nil
109 | }
110 | 
111 | func (t TopologyEntryType) valueFromCpuInfo(c CpuInfo) int {
112 | 	switch t {
113 | 	case Node:
114 | 		return c.Node
115 | 	case Package:
116 | 		return c.Package
117 | 	case Die:
118 | 		return c.Die
119 | 	case Core:
120 | 		return c.Core
121 | 	case Cpu:
122 | 		return c.Cpu
123 | 	default:
124 | 		klog.Fatalf("dont know how to get topology type %v", t)
125 | 	}
126 | 	return -1
127 | }
128 | 
129 | func (t *TopologyNode) toString(level int) string {
130 | 	var builder strings.Builder
131 | 	builder.WriteString(
132 | 		fmt.Sprintf("%s %s %d (%d)\n", strings.Repeat("   ", level), t.Type, t.Value, t.NumAvailable),
133 | 	)
134 | 	nextLevel := level + 1
135 | 	for _, child := range t.Children {
136 | 		builder.WriteString(child.toString(nextLevel))
137 | 	}
138 | 	return builder.String()
139 | }
140 | 
141 | func (t *TopologyNode) append(nodeInfoPath []nodeInfo) {
142 | 	if len(nodeInfoPath) == 0 { // leaf
143 | 		t.NumAvailable = 1
144 | 		return
145 | 	}
146 | 	var nextChild *TopologyNode
147 | 	for _, child := range t.Children {
148 | 		if child.Value == nodeInfoPath[0].Value {
149 | 			nextChild = child
150 | 			break
151 | 		}
152 | 	}
153 | 	if nextChild == nil {
154 | 		nextChild = &TopologyNode{
155 | 			NumAvailable: 0,
156 | 			nodeInfo:     nodeInfoPath[0],
157 | 		}
158 | 		t.Children = append(t.Children, nextChild)
159 | 	}
160 | 	t.NumAvailable++
161 | 	nextChild.append(nodeInfoPath[1:])
162 | }
163 | 
164 | func (t *TopologyNode) findLowestNodeWithEnoughAvailability(n int, currentLevel int) (*TopologyNode, int) {
165 | 	if t.NumAvailable < n {
166 | 		return nil, -1
167 | 	}
168 | 	var (
169 | 		bestLevel    *TopologyNode
170 | 		bestLevelNum int
171 | 	)
172 | 
173 | 	for _, child := range t.Children {
174 | 		level, levelNum := child.findLowestNodeWithEnoughAvailability(n, currentLevel+1)
175 | 		if level != nil && levelNum > bestLevelNum {
176 | 			bestLevel, bestLevelNum = level, levelNum
177 | 		}
178 | 	}
179 | 
180 | 	if bestLevel == nil {
181 | 		return t, currentLevel
182 | 	}
183 | 	return bestLevel, bestLevelNum
184 | }
185 | 
186 | func (t *TopologyNode) takeLeaves(n int) ([]*TopologyNode, error) {
187 | 	if n > t.NumAvailable {
188 | 		return []*TopologyNode{}, ErrNotAvailable
189 | 	}
190 | 	if t.IsLeaf() {
191 | 		t.NumAvailable = 0
192 | 		return []*TopologyNode{t}, nil
193 | 	}
194 | 
195 | 	leaves := make([]*TopologyNode, 0, n)
196 | 	for _, child := range t.Children {
197 | 		if child.NumAvailable == 0 {
198 | 			continue
199 | 		}
200 | 		leavesToTake := n - len(leaves)
201 | 		if child.NumAvailable < leavesToTake {
202 | 			leavesToTake = child.NumAvailable
203 | 		}
204 | 
205 | 		takenLeaves, err := child.takeLeaves(leavesToTake)
206 | 		if err != nil {
207 | 			return []*TopologyNode{t}, err
208 | 		}
209 | 		leaves = append(leaves, takenLeaves...)
210 | 
211 | 		if len(leaves) == n {
212 | 			break
213 | 		}
214 | 	}
215 | 	t.NumAvailable -= n
216 | 	return leaves, nil
217 | }
218 | 
219 | type nodeComparator func(*TopologyNode) bool
220 | 
221 | func (t *TopologyNode) find(comparator nodeComparator) []*TopologyNode {
222 | 	if comparator(t) {
223 | 		return []*TopologyNode{t}
224 | 	}
225 | 	for _, child := range t.Children {
226 | 		path := child.find(comparator)
227 | 		if len(path) > 0 {
228 | 			path = append(path, t)
229 | 			return path
230 | 		}
231 | 	}
232 | 	return []*TopologyNode{}
233 | }
234 | 
235 | func cpuInfoToNodeInfoList(c CpuInfo, topoTypes []TopologyEntryType) []nodeInfo {
236 | 	info := make([]nodeInfo, 0, len(topoTypes))
237 | 	for _, topoType := range topoTypes {
238 | 		info = append(info, nodeInfo{topoType, topoType.valueFromCpuInfo(c)})
239 | 	}
240 | 	return info
241 | }
242 | 
243 | // If all cpus have the same value for given topology level (node, die, etc.) let's ignore it.
244 | func getUsedTopoTypes(cpus []CpuInfo) []TopologyEntryType {
245 | 	if len(cpus) == 0 {
246 | 		return []TopologyEntryType{}
247 | 	}
248 | 
249 | 	areValuesTheSame := func(topoType TopologyEntryType) bool {
250 | 		value := topoType.valueFromCpuInfo(cpus[0])
251 | 		for _, cpu := range cpus[1:] {
252 | 			if topoType.valueFromCpuInfo(cpu) != value {
253 | 				return false
254 | 			}
255 | 		}
256 | 		return true
257 | 	}
258 | 
259 | 	result := []TopologyEntryType{}
260 | 	for _, topoType := range topoTypeByImportance {
261 | 		if !areValuesTheSame(topoType) {
262 | 			result = append(result, topoType)
263 | 		}
264 | 	}
265 | 	return result
266 | }
267 | 


--------------------------------------------------------------------------------
/pkg/numautils/topology_test.go:
--------------------------------------------------------------------------------
  1 | package numautils
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"sort"
  7 | 	"testing"
  8 | 
  9 | 	"github.com/stretchr/testify/assert"
 10 | )
 11 | 
 12 | type ChildList []*TopologyNode
 13 | 
 14 | var testTree = &TopologyNode{
 15 | 	nodeInfo:     nodeInfo{Node, 0},
 16 | 	NumAvailable: 4,
 17 | 	Children: ChildList{
 18 | 		&TopologyNode{
 19 | 			nodeInfo:     nodeInfo{Die, 0},
 20 | 			NumAvailable: 3,
 21 | 			Children: ChildList{
 22 | 				&TopologyNode{
 23 | 					nodeInfo:     nodeInfo{Core, 1},
 24 | 					NumAvailable: 1,
 25 | 					Children: ChildList{
 26 | 						&TopologyNode{
 27 | 							nodeInfo:     nodeInfo{Cpu, 0},
 28 | 							NumAvailable: 1,
 29 | 						},
 30 | 					},
 31 | 				},
 32 | 				&TopologyNode{
 33 | 					nodeInfo:     nodeInfo{Core, 0},
 34 | 					NumAvailable: 2,
 35 | 					Children: ChildList{
 36 | 						&TopologyNode{
 37 | 							nodeInfo:     nodeInfo{Cpu, 0},
 38 | 							NumAvailable: 1,
 39 | 						},
 40 | 						&TopologyNode{
 41 | 							nodeInfo:     nodeInfo{Cpu, 1},
 42 | 							NumAvailable: 1,
 43 | 						},
 44 | 					},
 45 | 				},
 46 | 			},
 47 | 		},
 48 | 		&TopologyNode{
 49 | 			nodeInfo:     nodeInfo{Die, 1},
 50 | 			NumAvailable: 1,
 51 | 			Children: ChildList{
 52 | 				&TopologyNode{
 53 | 					nodeInfo:     nodeInfo{Core, 1},
 54 | 					NumAvailable: 1,
 55 | 					Children: ChildList{
 56 | 						&TopologyNode{
 57 | 							nodeInfo:     nodeInfo{Cpu, 44},
 58 | 							NumAvailable: 1,
 59 | 						},
 60 | 					},
 61 | 				},
 62 | 			},
 63 | 		},
 64 | 	},
 65 | }
 66 | 
 67 | var testTreeExpectedString = `    node 0 (4)
 68 |        die 0 (3)
 69 |           core 1 (1)
 70 |              cpu 0 (1)
 71 |           core 0 (2)
 72 |              cpu 0 (1)
 73 |              cpu 1 (1)
 74 |        die 1 (1)
 75 |           core 1 (1)
 76 |              cpu 44 (1)
 77 | `
 78 | 
 79 | func sortChildren(tree *TopologyNode) {
 80 | 	if len(tree.Children) > 0 {
 81 | 		sort.Slice(tree.Children, func(i, j int) bool {
 82 | 			return tree.Children[i].Value < tree.Children[j].Value
 83 | 		})
 84 | 		for _, child := range tree.Children {
 85 | 			sortChildren(child)
 86 | 		}
 87 | 	}
 88 | }
 89 | 
 90 | func cloneTree(tree *TopologyNode) (*TopologyNode, error) {
 91 | 	jsonData, err := json.Marshal(tree)
 92 | 	if err != nil {
 93 | 		return nil, err
 94 | 	}
 95 | 	clonedTree := &TopologyNode{}
 96 | 	err = json.Unmarshal(jsonData, clonedTree)
 97 | 	if err != nil {
 98 | 		return nil, err
 99 | 	}
100 | 	return clonedTree, nil
101 | }
102 | 
103 | func assertEqualTrees(t *testing.T, expected *TopologyNode, actual *TopologyNode) {
104 | 	treeToStandarizedForm := func(tree *TopologyNode) *TopologyNode {
105 | 		cloned, err := cloneTree(tree)
106 | 		assert.Nil(t, err)
107 | 		sortChildren(cloned)
108 | 		return cloned
109 | 	}
110 | 
111 | 	expected = treeToStandarizedForm(expected)
112 | 	actual = treeToStandarizedForm(actual)
113 | 
114 | 	assert.Equal(t, expected, actual)
115 | }
116 | 
117 | func verifyNumAvailable(node *TopologyNode) bool {
118 | 	if node.IsLeaf() {
119 | 		return node.NumAvailable == 0 || node.NumAvailable == 1
120 | 	}
121 | 	numAvailable := 0
122 | 	for _, child := range node.Children {
123 | 		verified := verifyNumAvailable(child)
124 | 		if !verified {
125 | 			return false
126 | 		}
127 | 		numAvailable += child.NumAvailable
128 | 	}
129 | 	return numAvailable == node.NumAvailable
130 | }
131 | 
132 | func TestAppend(t *testing.T) {
133 | 	appendList := [][]nodeInfo{
134 | 		{
135 | 			{Node, 0},
136 | 			{Die, 0},
137 | 			{Core, 1},
138 | 			{Cpu, 0},
139 | 		},
140 | 		{
141 | 			{Node, 0},
142 | 			{Die, 0},
143 | 			{Core, 0},
144 | 			{Cpu, 0},
145 | 		},
146 | 		{
147 | 			{Node, 0},
148 | 			{Die, 0},
149 | 			{Core, 0},
150 | 			{Cpu, 1},
151 | 		},
152 | 		{
153 | 			{Node, 0},
154 | 			{Die, 1},
155 | 			{Core, 1},
156 | 			{Cpu, 44},
157 | 		},
158 | 	}
159 | 
160 | 	root := TopologyNode{}
161 | 	for _, infoPath := range appendList {
162 | 		root.append(infoPath)
163 | 	}
164 | 
165 | 	assert.Equal(t, len(root.Children), 1)
166 | 	assertEqualTrees(t, root.Children[0], testTree)
167 | }
168 | 
169 | func TestIsLeaf(t *testing.T) {
170 | 	testCases := []struct {
171 | 		name   string
172 | 		tree   *TopologyNode
173 | 		IsLeaf bool
174 | 	}{
175 | 		{"height 4", testTree, false},
176 | 		{"height 2", testTree.Children[0].Children[0], false},
177 | 		{"leaf", testTree.Children[0].Children[0].Children[0], true},
178 | 	}
179 | 
180 | 	for _, testCase := range testCases {
181 | 		t.Run(
182 | 			testCase.name, func(t *testing.T) {
183 | 				assert.Equal(t, testCase.IsLeaf, testCase.tree.IsLeaf())
184 | 			},
185 | 		)
186 | 	}
187 | }
188 | 
189 | func TestFindLowestNodeWithEnoughAvailability(t *testing.T) {
190 | 	testCases := []struct {
191 | 		name         string
192 | 		n            int
193 | 		expectedNode *TopologyNode
194 | 	}{
195 | 		{"one cpu", 1, testTree.Children[0].Children[0].Children[0]},
196 | 		{"two cpus", 2, testTree.Children[0].Children[1]},
197 | 		{"three cpus", 3, testTree.Children[0]},
198 | 		{"maximum number", 4, testTree},
199 | 		{"more than maximum", 5, nil},
200 | 	}
201 | 
202 | 	for _, testCase := range testCases {
203 | 		t.Run(testCase.name, func(t *testing.T) {
204 | 			node, _ := testTree.findLowestNodeWithEnoughAvailability(testCase.n, 0)
205 | 			assert.Same(t, testCase.expectedNode, node)
206 | 		})
207 | 	}
208 | }
209 | 
210 | func TestTakeLeaves(t *testing.T) {
211 | 	tree, err := cloneTree(testTree)
212 | 	assert.Nil(t, err)
213 | 	allLeafs := []*TopologyNode{
214 | 		tree.Children[0].Children[0].Children[0],
215 | 		tree.Children[0].Children[1].Children[0],
216 | 		tree.Children[0].Children[1].Children[1],
217 | 		tree.Children[1].Children[0].Children[0],
218 | 	}
219 | 	for _, chld := range allLeafs {
220 | 		chld.NumAvailable = 0
221 | 	}
222 | 
223 | 	for numLeaves := 1; numLeaves <= len(allLeafs); numLeaves++ {
224 | 		t.Run(
225 | 			fmt.Sprintf("take %d leaves", numLeaves),
226 | 			func(t *testing.T) {
227 | 				tree, err := cloneTree(testTree)
228 | 				assert.Nil(t, err)
229 | 
230 | 				leaves, err := tree.takeLeaves(numLeaves)
231 | 				assert.Nil(t, err)
232 | 
233 | 				assert.Equal(t, leaves, allLeafs[:numLeaves])
234 | 				assert.True(t, verifyNumAvailable(tree))
235 | 			},
236 | 		)
237 | 	}
238 | }
239 | 
240 | func TestTakeMoreLeavesThanAvailable(t *testing.T) {
241 | 	numAvailable := testTree.NumAvailable
242 | 	trees, err := testTree.takeLeaves(numAvailable + 1)
243 | 	assert.Empty(t, trees)
244 | 	assert.NotNil(t, err)
245 | 
246 | 	// check if we did set any child as taken
247 | 	assert.Equal(t, testTree.NumAvailable, numAvailable)
248 | 	assert.True(t, verifyNumAvailable(testTree))
249 | }
250 | 
251 | func TestGetLeavesTestTree(t *testing.T) {
252 | 	leafs := testTree.GetLeafs()
253 | 	expectedLeafs := []*TopologyNode{
254 | 		{
255 | 			nodeInfo:     nodeInfo{Cpu, 0},
256 | 			NumAvailable: 1,
257 | 		},
258 | 		{
259 | 			nodeInfo:     nodeInfo{Cpu, 0},
260 | 			NumAvailable: 1,
261 | 		},
262 | 		{
263 | 			nodeInfo:     nodeInfo{Cpu, 1},
264 | 			NumAvailable: 1,
265 | 		},
266 | 		{
267 | 			nodeInfo:     nodeInfo{Cpu, 44},
268 | 			NumAvailable: 1,
269 | 		},
270 | 	}
271 | 
272 | 	assert.Equal(t, expectedLeafs, leafs)
273 | }
274 | 
275 | func TestToString(t *testing.T) {
276 | 	s := testTree.String()
277 | 	assert.Equal(t, testTreeExpectedString, s)
278 | }
279 | 


--------------------------------------------------------------------------------
/pkg/utils/fileutils.go:
--------------------------------------------------------------------------------
 1 | // Package utils holds varius utilities functions used across other packages.
 2 | package utils
 3 | 
 4 | import (
 5 | 	"errors"
 6 | 	"io/fs"
 7 | 	"os"
 8 | 	"path"
 9 | 	"path/filepath"
10 | 	"strings"
11 | )
12 | 
13 | var (
14 | 	ErrPathNotInBase = errors.New("final path goes outside base directory")
15 | 	ErrFileIsSymlink = errors.New("file cannot be a symlink")
16 | )
17 | 
18 | // EvaluateRealPath returns absolute path with symlinks evaluated.
19 | func EvaluateRealPath(path string) (string, error) {
20 | 	pathEval, err := filepath.EvalSymlinks(path)
21 | 	if err != nil {
22 | 		return path, err
23 | 	}
24 | 	pathAbs, err := filepath.Abs(pathEval)
25 | 	if err != nil {
26 | 		return path, err
27 | 	}
28 | 	return pathAbs, nil
29 | }
30 | 
31 | // ValidatePathInsideBase checks if given path, after evaluating all symbolic links does not go outside baseDir.
32 | func ValidatePathInsideBase(filePath string, baseDir string) error {
33 | 	absRealPath, err := EvaluateRealPath(filePath)
34 | 	if err != nil {
35 | 		return err
36 | 	}
37 | 	if !strings.HasPrefix(absRealPath, baseDir) {
38 | 		return ErrPathNotInBase
39 | 	}
40 | 	return nil
41 | }
42 | 
43 | // ReadFileAt reads file contents only if target file is inside baseDir.
44 | func ReadFileAt(baseDir string, fileName string) ([]byte, error) {
45 | 	filePath := path.Join(baseDir, fileName)
46 | 	if err := ValidatePathInsideBase(filePath, baseDir); err != nil {
47 | 		return []byte{}, err
48 | 	}
49 | 	return os.ReadFile(filePath)
50 | }
51 | 
52 | // ErrorIfSymlink returns an error if path is symlink or doesn't exist.
53 | func ErrorIfSymlink(path string) error {
54 | 	finfo, err := os.Lstat(path)
55 | 	if err != nil {
56 | 		return err
57 | 	}
58 | 	if finfo.Mode()&fs.ModeSymlink != 0 {
59 | 		return ErrFileIsSymlink
60 | 	}
61 | 	return nil
62 | }
63 | 


--------------------------------------------------------------------------------
/pkg/utils/fileutils_test.go:
--------------------------------------------------------------------------------
  1 | package utils
  2 | 
  3 | import (
  4 | 	"os"
  5 | 	"path"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/stretchr/testify/assert"
  9 | 	"github.com/stretchr/testify/require"
 10 | )
 11 | 
 12 | func TestEvaluateRealPathOfFile(t *testing.T) {
 13 | 	dir := t.TempDir()
 14 | 	file := path.Join(dir, "test.txt")
 15 | 	createFile(t, file)
 16 | 
 17 | 	f, err := EvaluateRealPath(file)
 18 | 	assert.Nil(t, err)
 19 | 	assert.Equal(t, file, f)
 20 | }
 21 | 
 22 | func TestEvaluateRealPathOfSymlink(t *testing.T) {
 23 | 	dir := t.TempDir()
 24 | 	file := path.Join(dir, "test.txt")
 25 | 	symlink := path.Join(dir, "test-symlink.txt")
 26 | 	createFile(t, file)
 27 | 	require.Nil(t, os.Symlink(file, symlink))
 28 | 
 29 | 	f, err := EvaluateRealPath(symlink)
 30 | 	assert.Nil(t, err)
 31 | 	assert.Equal(t, file, f)
 32 | }
 33 | 
 34 | func TestValidatePathPasses(t *testing.T) {
 35 | 	dir := t.TempDir()
 36 | 	file := path.Join(dir, "test.txt")
 37 | 	createFile(t, file)
 38 | 	assert.Nil(t, ValidatePathInsideBase(file, dir))
 39 | }
 40 | 
 41 | func TestValidatePathSymlinkOutsideBase(t *testing.T) {
 42 | 	dir := t.TempDir()
 43 | 	outsideFile := path.Join(dir, "test_outside.txt")
 44 | 	dir1 := path.Join(dir, "dir1")
 45 | 	insideSymlink := path.Join(dir1, "test_inside.txt")
 46 | 
 47 | 	require.Nil(t, os.Mkdir(dir1, 0700))
 48 | 	createFile(t, outsideFile)
 49 | 	require.Nil(t, os.Symlink(outsideFile, insideSymlink))
 50 | 
 51 | 	assert.ErrorIs(t, ValidatePathInsideBase(insideSymlink, dir1), ErrPathNotInBase)
 52 | }
 53 | 
 54 | func createFile(t *testing.T, path string) {
 55 | 	f, err := os.Create(path)
 56 | 	require.Nil(t, err)
 57 | 	f.Close()
 58 | }
 59 | 
 60 | func TestReadFileAt(t *testing.T) {
 61 | 	dir := t.TempDir()
 62 | 	file := path.Join(dir, "test.txt")
 63 | 	createFile(t, file)
 64 | 	content, err := ReadFileAt(dir, "test.txt")
 65 | 	assert.Nil(t, err)
 66 | 	assert.Empty(t, content)
 67 | }
 68 | 
 69 | func TestValidateReadFileAtFails(t *testing.T) {
 70 | 	dir := t.TempDir()
 71 | 	outsideFile := path.Join(dir, "test_outside.txt")
 72 | 	dir1 := path.Join(dir, "dir1")
 73 | 	insideSymlink := path.Join(dir1, "test_inside.txt")
 74 | 
 75 | 	require.Nil(t, os.Mkdir(dir1, 0700))
 76 | 	createFile(t, outsideFile)
 77 | 	require.Nil(t, os.Symlink(outsideFile, insideSymlink))
 78 | 
 79 | 	_, err := ReadFileAt(dir1, "test_inside.txt")
 80 | 	assert.ErrorIs(t, err, ErrPathNotInBase)
 81 | }
 82 | 
 83 | func TestNoErrorIfSymlinkNormalFile(t *testing.T) {
 84 | 	dir := t.TempDir()
 85 | 	file := path.Join(dir, "test.txt")
 86 | 	createFile(t, file)
 87 | 
 88 | 	assert.Nil(t, ErrorIfSymlink(file))
 89 | }
 90 | 
 91 | func TestErrorIfSymlinkDoesntExist(t *testing.T) {
 92 | 	dir := t.TempDir()
 93 | 	file := path.Join(dir, "test.txt")
 94 | 
 95 | 	assert.ErrorIs(t, ErrorIfSymlink(file), os.ErrNotExist)
 96 | }
 97 | 
 98 | func TestErrorIfSymlink(t *testing.T) {
 99 | 	dir := t.TempDir()
100 | 	file := path.Join(dir, "test.txt")
101 | 	symlink := path.Join(dir, "sym.txt")
102 | 	createFile(t, file)
103 | 	require.Nil(t, os.Symlink(file, symlink))
104 | 
105 | 	assert.ErrorIs(t, ErrorIfSymlink(symlink), ErrFileIsSymlink)
106 | }
107 | 


--------------------------------------------------------------------------------
/security.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 | Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation. 
3 | 
4 | ## Reporting a Vulnerability
5 | Please report any security vulnerabilities in this project [utilizing the guidelines here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html).
6 | 


--------------------------------------------------------------------------------