├── .github └── workflows │ └── pullrequest.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── kube-burner-workload ├── README.md ├── SCALE_PROFILES.md ├── calico │ ├── README.md │ ├── calico-dashboard.png │ ├── convergence_tracker.yml │ ├── env │ ├── grafana_dash.json │ ├── metrics.yml │ ├── monitoring.yaml │ ├── policy-tracker │ │ ├── Dockerfile │ │ ├── policy-tracker.py │ │ └── requirements.txt │ └── test_limit.sh ├── convergence_waiter.sh ├── egress-np.yml ├── env ├── ingress-np.yml ├── kind-metrics │ ├── README.md │ ├── env │ ├── grafana.png │ ├── grafana_dash.json │ ├── metrics.yml │ └── monitoring.yaml ├── network-policy.yaml ├── openshift │ ├── README.md │ ├── convergence_tracker.yml │ ├── env │ ├── grafana.png │ ├── grafana_dash.json │ ├── metrics.yml │ ├── openflow-tracker │ │ ├── Dockerfile │ │ ├── openflow-tracker.py │ │ └── requirements.txt │ └── test_limit.sh ├── ovn-kubernetes │ ├── README.md │ ├── convergence_tracker.yml │ └── openflow-tracker │ │ ├── Dockerfile │ │ ├── openflow-tracker.py │ │ └── requirements.txt └── pod.yml └── yaml-analysis ├── README.md ├── analyze.go ├── go.mod ├── go.sum ├── helpers.go ├── netpol_config.go ├── profile.go ├── profiles_example.csv └── stats.go /.github/workflows/pullrequest.yml: -------------------------------------------------------------------------------- 1 | # pullrequest.yml 2 | name: Lint Workflow 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request_target: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | linters: 13 | runs-on: ubuntu-latest 14 | steps: 15 | 16 | - name: Check out code 17 | uses: actions/checkout@v3 18 | with: 19 | fetch-depth: 1 20 | ref: ${{ github.event.pull_request.head.sha }} 21 | persist-credentials: false 22 | 23 | - name: Install pre-commit 24 | run: pip install pre-commit 25 | 26 | - name: Run pre-commit hooks 27 | run: pre-commit run --all-files 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #IDE (GoLand) specific 2 | .idea/ 3 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/golangci/golangci-lint 3 | rev: v1.51.2 4 | hooks: 5 | - id: golangci-lint 6 | entry: bash -c 'cd yaml-analysis && golangci-lint run --timeout=5m' 7 | - repo: https://github.com/igorshubovych/markdownlint-cli 8 | rev: v0.34.0 9 | hooks: 10 | - id: markdownlint 11 | args: [--disable, MD013, MD002] 12 | - repo: https://github.com/jumanjihouse/pre-commit-hooks 13 | rev: 3.0.0 14 | hooks: 15 | - id: shellcheck 16 | - repo: https://github.com/pre-commit/pre-commit-hooks 17 | rev: v4.4.0 18 | hooks: 19 | - id: check-json 20 | - repo: https://github.com/psf/black 21 | rev: 22.10.0 22 | hooks: 23 | - id: black -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## k8s-netpol-scale 2 | 3 | This repository contains tools for k8s Network Policy scale testing. 4 | In [./kube-burner](./kube-burner) folder you will find a network policy configurable workload that may be run by 5 | [kube-burner](https://github.com/cloud-bulldozer/kube-burner) 6 | 7 | In [./yaml-analysis](./yaml-analysis) folder you will find tools to analyze network policies based on their yamls, 8 | and predict if a given workload will be properly handled by a cluster based on provided scale profiles data. 9 | -------------------------------------------------------------------------------- /kube-burner-workload/README.md: -------------------------------------------------------------------------------- 1 | ## Intro 2 | 3 | network-policy workload is used to generate a large number of non-trivial network policies. 4 | The workload is defined in [./network-policy.yaml](./network-policy.yaml) and consists of 5 | - `convergence-tracker` job: these are the pods that should track when the created objects are applied. 6 | - `network-policy-perf` job: this job creates the workload after `convergence-tracker` resources are created. 7 | 8 | ## Workload details 9 | 10 | `network-policy-perf` job creates a given number of namespaces named `network-policy-perf-` (iteration starts form 0) with the same contents. 11 | Every namespace has a given number of pods named `test-pod-` (replica starts from 1) based on [./templates/pod.yml](pod.yml). 12 | Every pod is labeled with `1: "true", ..., : "true` to select the required number of pods in the future. 13 | `test-pod: "true"` label is added for `podAntiAffinity` to spread the workload pods as evenly as possible across the nodes. 14 | Network policy always selects first N pods by pod selector, therefore we add `num=` label to ensure 15 | equal distribution of selected pods. 16 | 17 | ``` 18 | kubectl get pods -n network-policy-perf-0 --show-labels 19 | NAME READY STATUS RESTARTS AGE LABELS 20 | test-pod-1 1/1 Running 0 2m52s 1=true,kube-burner-index=0,kube-burner-job=network-policy-perf,kube-burner-runid=4310adad-84eb-4d5b-a984-f408b2b1cd4e,kube-burner-uuid=106b3aff-4b90-4e0d-b69f-495e9f24e8d5,num=1,test-pod=true 21 | test-pod-2 1/1 Running 0 2m52s 1=true,2=true,kube-burner-index=0,kube-burner-job=network-policy-perf,kube-burner-runid=4310adad-84eb-4d5b-a984-f408b2b1cd4e,kube-burner-uuid=106b3aff-4b90-4e0d-b69f-495e9f24e8d5,num=2,test-pod=true 22 | test-pod-3 1/1 Running 0 2m52s 1=true,2=true,3=true,kube-burner-index=0,kube-burner-job=network-policy-perf,kube-burner-runid=4310adad-84eb-4d5b-a984-f408b2b1cd4e,kube-burner-uuid=106b3aff-4b90-4e0d-b69f-495e9f24e8d5,num=3,test-pod=true 23 | ``` 24 | 25 | Every namespace has a given number of network policies named `ingress-` and `egress-` (idx starts form 0) 26 | based on [./templates/ingress-np.yml](ingress-np.yml) and [./templates/egress-np.yml](egress-np.yml), 27 | corresponding ingress and egress policies have exactly same peers for now (may be changed in the future). 28 | 29 | Networkpolicy template takes the following args: 30 | - local_pods: number of pods to select by `spec.podSelector`, same set of pods for every policy in the namespace 31 | - single_ports: number of single ports for every gress rule 32 | - port_ranges: number of port ranges for every gress rule 33 | - pod_selectors: number of selector-based gress rules, every rule has only 1 selector and may have multiple ports 34 | - peer_namespaces: number of selected namespaces for every pod_selector peer 35 | - peer_pods: number of selected pods for every pod_selector peer 36 | - cidr_rules: number of CIDR-based gress rules 37 | 38 | To increase the real load and reduce the number of possible internal optimizations, we need to generate different peers. 39 | CIDRs are all different inside one namespace, they start from `1.0.0.0/24`, and are incremented by 1, `1.0.1.0/24`, `1.0.2.0/24, etc. 40 | Peer selectors always select the same pods based on the required number, but always have different namespace selectors. 41 | We have BinomialCoefficient(NAMESPACES, PEER_NAMESPACES) different peer namespace sets, which don't repeat across network policies 42 | and namespace unless we have more selector-based rules. 43 | 44 | Example: if we have 5 namespaces and every policy selects 3 namespaces for every peer, we have BinomialCoefficient(5, 3) = 10, 45 | which gives the following choices (think peer namespace indexes): 46 | 47 | [1] 1,2,3\ 48 | [2] 1,2,4\ 49 | [3] 1,2,5\ 50 | [4] 1,3,4\ 51 | [5] 1,3,5\ 52 | [6] 1,4,5\ 53 | [7] 2,3,4\ 54 | [8] 2,3,5\ 55 | [9] 2,4,5\ 56 | [10] 3,4,5 57 | 58 | If we create 5 namespaces with 1 network policy each and 2 peer selector with 3 peer_namespaces, we will have 59 | 60 | ns1.np1.peer1 selects namespaces 1,2,3 [1]\ 61 | ns1.np1.peer2 selects namespaces 1,2,4 [2]\ 62 | ns2.np1.peer1 selects namespaces 1,2,5 [3]\ 63 | ns2.np1.peer1 selects namespaces 1,3,4 [4]\ 64 | ...\ 65 | ns5.np1.peer1 selects namespaces 2,4,5 [9]\ 66 | ns5.np1.peer1 selects namespaces 3,4,5 [10] 67 | 68 |
69 | In the cluster it will look like (ingress-only) 70 | 71 | ``` 72 | items: 73 | - apiVersion: networking.k8s.io/v1 74 | kind: NetworkPolicy 75 | metadata: 76 | creationTimestamp: "2023-08-23T09:35:39Z" 77 | generation: 1 78 | labels: 79 | kube-burner-index: "1" 80 | kube-burner-job: network-policy-perf 81 | kube-burner-runid: 39baa8cb-07c6-441f-add6-07a56404a14b 82 | kube-burner-uuid: 9c3cdf2e-4fd7-470a-b1b0-2d1c1a7b5c32 83 | name: ingress-1 84 | namespace: network-policy-perf-0 85 | resourceVersion: "66063" 86 | uid: 5abd93b5-906e-44e0-be1f-1be30b6bbeed 87 | spec: 88 | ingress: 89 | - from: 90 | - namespaceSelector: 91 | matchExpressions: 92 | - key: kubernetes.io/metadata.name 93 | operator: In 94 | values: 95 | - network-policy-perf-1 96 | - network-policy-perf-2 97 | - network-policy-perf-3 98 | podSelector: 99 | matchLabels: 100 | "1": "true" 101 | - from: 102 | - namespaceSelector: 103 | matchExpressions: 104 | - key: kubernetes.io/metadata.name 105 | operator: In 106 | values: 107 | - network-policy-perf-1 108 | - network-policy-perf-2 109 | - network-policy-perf-4 110 | podSelector: 111 | matchLabels: 112 | "1": "true" 113 | podSelector: 114 | matchLabels: 115 | "1": "true" 116 | policyTypes: 117 | - Ingress 118 | status: {} 119 | - apiVersion: networking.k8s.io/v1 120 | kind: NetworkPolicy 121 | metadata: 122 | creationTimestamp: "2023-08-23T09:35:39Z" 123 | generation: 1 124 | labels: 125 | kube-burner-index: "1" 126 | kube-burner-job: network-policy-perf 127 | kube-burner-runid: 39baa8cb-07c6-441f-add6-07a56404a14b 128 | kube-burner-uuid: 9c3cdf2e-4fd7-470a-b1b0-2d1c1a7b5c32 129 | name: ingress-1 130 | namespace: network-policy-perf-1 131 | resourceVersion: "66068" 132 | uid: f5f26105-125b-4436-8a97-3d2fdace15bb 133 | spec: 134 | ingress: 135 | - from: 136 | - namespaceSelector: 137 | matchExpressions: 138 | - key: kubernetes.io/metadata.name 139 | operator: In 140 | values: 141 | - network-policy-perf-1 142 | - network-policy-perf-2 143 | - network-policy-perf-5 144 | podSelector: 145 | matchLabels: 146 | "1": "true" 147 | - from: 148 | - namespaceSelector: 149 | matchExpressions: 150 | - key: kubernetes.io/metadata.name 151 | operator: In 152 | values: 153 | - network-policy-perf-1 154 | - network-policy-perf-3 155 | - network-policy-perf-4 156 | podSelector: 157 | matchLabels: 158 | "1": "true" 159 | podSelector: 160 | matchLabels: 161 | "1": "true" 162 | policyTypes: 163 | - Ingress 164 | status: {} 165 | - apiVersion: networking.k8s.io/v1 166 | kind: NetworkPolicy 167 | metadata: 168 | creationTimestamp: "2023-08-23T09:35:39Z" 169 | generation: 1 170 | labels: 171 | kube-burner-index: "1" 172 | kube-burner-job: network-policy-perf 173 | kube-burner-runid: 39baa8cb-07c6-441f-add6-07a56404a14b 174 | kube-burner-uuid: 9c3cdf2e-4fd7-470a-b1b0-2d1c1a7b5c32 175 | name: ingress-1 176 | namespace: network-policy-perf-2 177 | resourceVersion: "66071" 178 | uid: e74b9e03-311c-4dcb-b1f1-22424ace949d 179 | spec: 180 | ingress: 181 | - from: 182 | - namespaceSelector: 183 | matchExpressions: 184 | - key: kubernetes.io/metadata.name 185 | operator: In 186 | values: 187 | - network-policy-perf-1 188 | - network-policy-perf-3 189 | - network-policy-perf-5 190 | podSelector: 191 | matchLabels: 192 | "1": "true" 193 | - from: 194 | - namespaceSelector: 195 | matchExpressions: 196 | - key: kubernetes.io/metadata.name 197 | operator: In 198 | values: 199 | - network-policy-perf-1 200 | - network-policy-perf-4 201 | - network-policy-perf-5 202 | podSelector: 203 | matchLabels: 204 | "1": "true" 205 | podSelector: 206 | matchLabels: 207 | "1": "true" 208 | policyTypes: 209 | - Ingress 210 | status: {} 211 | - apiVersion: networking.k8s.io/v1 212 | kind: NetworkPolicy 213 | metadata: 214 | creationTimestamp: "2023-08-23T09:35:39Z" 215 | generation: 1 216 | labels: 217 | kube-burner-index: "1" 218 | kube-burner-job: network-policy-perf 219 | kube-burner-runid: 39baa8cb-07c6-441f-add6-07a56404a14b 220 | kube-burner-uuid: 9c3cdf2e-4fd7-470a-b1b0-2d1c1a7b5c32 221 | name: ingress-1 222 | namespace: network-policy-perf-3 223 | resourceVersion: "66079" 224 | uid: c1c3b966-390c-4c44-8fc9-c106fb036e64 225 | spec: 226 | ingress: 227 | - from: 228 | - namespaceSelector: 229 | matchExpressions: 230 | - key: kubernetes.io/metadata.name 231 | operator: In 232 | values: 233 | - network-policy-perf-2 234 | - network-policy-perf-3 235 | - network-policy-perf-4 236 | podSelector: 237 | matchLabels: 238 | "1": "true" 239 | - from: 240 | - namespaceSelector: 241 | matchExpressions: 242 | - key: kubernetes.io/metadata.name 243 | operator: In 244 | values: 245 | - network-policy-perf-2 246 | - network-policy-perf-3 247 | - network-policy-perf-5 248 | podSelector: 249 | matchLabels: 250 | "1": "true" 251 | podSelector: 252 | matchLabels: 253 | "1": "true" 254 | policyTypes: 255 | - Ingress 256 | status: {} 257 | - apiVersion: networking.k8s.io/v1 258 | kind: NetworkPolicy 259 | metadata: 260 | creationTimestamp: "2023-08-23T09:35:39Z" 261 | generation: 1 262 | labels: 263 | kube-burner-index: "1" 264 | kube-burner-job: network-policy-perf 265 | kube-burner-runid: 39baa8cb-07c6-441f-add6-07a56404a14b 266 | kube-burner-uuid: 9c3cdf2e-4fd7-470a-b1b0-2d1c1a7b5c32 267 | name: ingress-1 268 | namespace: network-policy-perf-4 269 | resourceVersion: "66086" 270 | uid: db941c7d-e6a7-48db-8575-3d2a3da6f2bf 271 | spec: 272 | ingress: 273 | - from: 274 | - namespaceSelector: 275 | matchExpressions: 276 | - key: kubernetes.io/metadata.name 277 | operator: In 278 | values: 279 | - network-policy-perf-2 280 | - network-policy-perf-4 281 | - network-policy-perf-5 282 | podSelector: 283 | matchLabels: 284 | "1": "true" 285 | - from: 286 | - namespaceSelector: 287 | matchExpressions: 288 | - key: kubernetes.io/metadata.name 289 | operator: In 290 | values: 291 | - network-policy-perf-3 292 | - network-policy-perf-4 293 | - network-policy-perf-5 294 | podSelector: 295 | matchLabels: 296 | "1": "true" 297 | podSelector: 298 | matchLabels: 299 | "1": "true" 300 | policyTypes: 301 | - Ingress 302 | status: {} 303 | 304 | ``` 305 | 306 |
307 | 308 |
309 | env 310 | 311 | ``` 312 | NAMESPACES=5 313 | PODS_PER_NAMESPACE=1 314 | NETPOLS_PER_NAMESPACE=1 315 | 316 | LOCAL_PODS=1 317 | SINGLE_PORTS=0 318 | PORT_RANGES=0 319 | POD_SELECTORS=2 320 | PEER_NAMESPACES=3 321 | PEER_PODS=1 322 | CIDRS=0 323 | ``` 324 |
325 | 326 | ## Different Platforms 327 | 328 | Different clusters may require different techniques to do the scale testing. While this framework may be used 329 | as it is by any kubernetes cluster, extra features like convergence tracker and metrics may be also enabled. 330 | Since config for these features usually differs based on the network plugin and cluster type, we have added 331 | `PLATFORM` env variable and corresponding `platfrom_name` folders that may be used as an example by other platforms, 332 | and also may be reused and improved by the same platform as a part of this framework. 333 | 334 | Every platform may have its own README. 335 | 336 | ### Comparing different platforms 337 | 338 | To ensure results for different platform are comparable, set up the convergence tracker logic to be as similar as possible, 339 | all timeouts and variables defining successful test run should be the same. 340 | Cluster-specific parameters, like resource quotas, enables services (e.g. observability), nodes configurations may also 341 | affect the results. 342 | 343 | ## Tracking the end of the test 344 | 345 | `CONVERGENCE_TRACKER` env variable enables `convergence-tracker` job. 346 | The right way to track convergence may differ based on network plugin or cluster type, you can use existing platforms 347 | as an example. 348 | For example, ovn-kubernetes network plugin uses OVS flows underneath, therefore this job spins up a pod 349 | on every node and tracks the number of OVS flows, when this number stops changing it considers the config to be applied. 350 | There is a `CONVERGENCE_PERIOD` parameter that defines for how long it waits to consider the number of flows converged. 351 | In addition, `CONVERGENCE_TIMEOUT` sets the hard deadline for convergence tracking. 352 | `convergence_waiter.sh` is a script that waits `CONVERGENCE_PERIOD` for all convergence-tracker pod to be completed, 353 | before deleting the workload. 354 | 355 | ## Running 356 | 357 | 1. Install kube-burner v1.9.4+ 358 | 359 | 1.1 You can download kube-burner from https://github.com/cloud-bulldozer/kube-burner/releases 360 | 361 | 1.2 You can build it from source [kube-burner](https://github.com/cloud-bulldozer/kube-burner/tree/main) with 362 | `make build` 363 | 2. `cd ./kube-burner-workload` 364 | 3. Set env variables with the test config in the `env` file\ 365 | **NOTE**: some corner cases may not work 366 | 4. `source ./env` 367 | 5. `kube-burner init -c ./network-policy.yaml` 368 | 369 | **Note** Every platform may have its own README with more details 370 | 371 | To clean up resources created by this workload run `kubectl delete ns -l kube-burner-job` 372 | 373 | ## Result 374 | 375 | When using metrics collection, you can create a dashboard that will give you details about the test config and 376 | cluster state during the test, including performance metrics. An example dashboard for `./openshift` profile is 377 | 378 | ![image](openshift/grafana.png) 379 | -------------------------------------------------------------------------------- /kube-burner-workload/SCALE_PROFILES.md: -------------------------------------------------------------------------------- 1 | ## NetworkPolicy variables 2 | 3 | All variables that this framework has now may be presented as 4 | 1. NetworkPolicy config 5 | - `LOCAL_PODS` 6 | - `SINGLE_PORTS` 7 | - `PORT_RANGES` 8 | - `POD_SELECTORS` 9 | - `PEER_NAMESPACES` 10 | - `PEER_PODS` 11 | - `CIDRS` 12 | 13 | These parameters define a scale impact of a single NetworkPolicy 14 | 2. Namespace config and scale 15 | - `PODS_PER_NAMESPACE` 16 | - `INGRESS` 17 | - `EGRESS` 18 | - `NAMESPACES` 19 | - `NETPOLS_PER_NAMESPACE` 20 | 21 | These variables define a namespace config and may be used to find scalability limit. 22 | `PODS_PER_NAMESPACE` also serves as a restriction for some NetworkPolicy parameters (like `LOCAL_PODS`) but increases per-namespace 23 | workload at the same time. `NAMESPACES` parameter also limits potential values of `PEER_NAMESPACES`. 24 | 25 | There are some extra test parameters composed of the env variables, where `I()` is an indicator function: 26 | - Number of network policies = `NAMESPACES * NETPOLS_PER_NAMESPACE * (I(INGRESS) + I(EGRESS))` 27 | - Number of used peer namespace selectors = `Number of network policies * POD_SELECTORS` 28 | - Number of different peer namespace selectors = `Binomial(NAMESPACES, PEER_NAMESPACES)` 29 | - % of used different peer selectors = `Number of used peer namespace selectors / Number of different peer namespace selectors` 30 | 31 | When the last parameter is getting >= 100%, some peer namespace selectors will be repeated. 32 | 33 | ## Scale testing 34 | 35 | To find scalability limit for a cluster, we can iteratively increase the workload until the test fails (different 36 | clusters/platforms may have different definitions of failure). Considering we are trying to answer a question: 37 | "How many network policies can I create?", we want the result to be a network policy count. 38 | 39 | Therefore, the easiest way to do so, is to save all parameters values, expect for `NETPOLS_PER_NAMESPACE`. 40 | Then by increasing the `NETPOLS_PER_NAMESPACE` number, we leave everything else exactly the same. 41 | 42 | You can copy a [helper spreadsheet](https://docs.google.com/spreadsheets/d/1Kq1w8c8Z_wlhBOb_EID2nhvmwEi8H6pSxvtpDcbf-1M/edit?usp=sharing) to track test results 43 | 44 | ## Scale testing profiles 45 | 46 | While this framework may be used to define a network policy config based on a specific customer's request, 47 | we also want to provide pre-defined scale testing results that will help customers understand what kind of 48 | workload can be handled. 49 | 50 | To do so, we can create a set of scale testing profiles by defining all variable values. We will code them as 51 | `------` 52 | Here are some examples: 53 | 54 | MINIMAL 55 | - CIDR-only (1-0-0-0-0-0-1) 56 | - port+range+CIDR (1-1-1-0-0-0-1) 57 | - pod-selector-only (1-0-0-1-3-1-0) 58 | - port+range+pod-selector (1-1-1-1-3-1-0) 59 | - pod-selector+CIDR (1-1-0-0-3-1-1) 60 | - port+range+pod-selector+CIDR (1-1-1-1-3-1-1) 61 | 62 | MEDIUM 63 | - CIDR-only (10- 0- 0- 0- 0- 0-10) 64 | - port+range+CIDR (10-10-10- 0- 0- 0-10) 65 | - pod-selector-only (10- 0- 0-10-10-10- 0) 66 | - port+range+pod-selector (10-10-10-10-10-10- 0) 67 | - pod-selector+CIDR (10- 0- 0-10-10-10-10) 68 | - port+range+pod-selector+CIDR (10-10-10-10-10-10-10) 69 | 70 | 71 | ## Spreadsheet 72 | 73 | To simplify results tracking, you can copy a [spreadsheet](https://docs.google.com/spreadsheets/d/1Kq1w8c8Z_wlhBOb_EID2nhvmwEi8H6pSxvtpDcbf-1M/edit#gid=16759354) 74 | that shows an example of increasing workload and finding the best result. 75 | 76 | `export` sheets may be used with [yaml_analysis](../yaml-analysis) tools, check [README](../yaml-analysis/README.md) for more details. -------------------------------------------------------------------------------- /kube-burner-workload/calico/README.md: -------------------------------------------------------------------------------- 1 | ## Running 2 | 3 | 1. This profile assumes you have a calico cluster, and the KUBECONFIG that can be used in the scale test. 4 | 2. Set env variables with the test config in the `env` file 5 | 6 | 2.1 Set env file variable PLATFORM=calico 7 | 8 | 3. Set env variables in the `calico/env` file 9 | 4. `source ./env` 10 | 5. Run the test: `kube-burner init -m ./calico/metrics.yml -c ./network-policy.yaml -u https://[prometheus url] --log-level=debug` 11 | 6. When the test finishes, metrics should be collected by the ES_SERVER 12 | 13 | ## Finding the limit 14 | 15 | To automate finding the limit, [test_limit.sh](./test_limit.sh) script may be used. 16 | It can run multiple iterations increasing the number of network policies until test fails. 17 | It waits for full cleanup after every iteration to ensure the cluster is ready for the next one. 18 | 19 | ## Metrics and Dashboards 20 | 21 | Metrics in this folder are calico-specific, but may be tweaked for other clusters, e.g. by changing 22 | filtered namespaces for `containerCPU` metrics. 23 | 24 | `./grafana_dash.json` has the JSON model that defines the dashboard. It uses metrics defined in `./metrics.yml` 25 | and may be used as an example to define dashboard for other clusters. 26 | -------------------------------------------------------------------------------- /kube-burner-workload/calico/calico-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npinaeva/k8s-netpol-scale/3d1aabaf4511f27966b567ba8192f8cce6b52375/kube-burner-workload/calico/calico-dashboard.png -------------------------------------------------------------------------------- /kube-burner-workload/calico/convergence_tracker.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: convergence-tracker-{{.Replica}} 5 | labels: 6 | app: convergence-tracker 7 | spec: 8 | topologySpreadConstraints: 9 | - maxSkew: 1 10 | topologyKey: kubernetes.io/hostname 11 | whenUnsatisfiable: DoNotSchedule 12 | labelSelector: 13 | matchLabels: 14 | app: convergence-tracker 15 | restartPolicy: Never 16 | hostNetwork: true 17 | containers: 18 | - name: tracker 19 | # image built with the ./policy-tracker/Dockerfile 20 | image: gcr.io/unique-caldron-775/netpol-benchmark/convergence:latest 21 | securityContext: 22 | privileged: true 23 | command: [ "/bin/bash", "-c", "python policy-tracker.py"] 24 | imagePullPolicy: Always 25 | env: 26 | - name: CONVERGENCE_PERIOD 27 | value: "{{.convergence_period}}" 28 | - name: CONVERGENCE_TIMEOUT 29 | value: "{{.convergence_timeout}}" 30 | - name: ES_SERVER 31 | value: {{.es_server}} 32 | - name: ES_INDEX_NETPOL 33 | value: {{.es_index}} 34 | - name: UUID 35 | value: {{.UUID}} 36 | - name: METADATA 37 | value: "{{.metadata}}" 38 | - name: MY_NODE_NAME 39 | valueFrom: 40 | fieldRef: 41 | fieldPath: spec.nodeName 42 | -------------------------------------------------------------------------------- /kube-burner-workload/calico/env: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -a 3 | ES_SERVER=http://localhost:9200 4 | ES_INDEX=calico-benchmark 5 | set +a 6 | -------------------------------------------------------------------------------- /kube-burner-workload/calico/metrics.yml: -------------------------------------------------------------------------------- 1 | # API server 2 | 3 | - query: irate(apiserver_request_total{verb="POST", resource="pods", subresource="binding",code="201"}[2m]) > 0 4 | metricName: schedulingThroughput 5 | 6 | - query: histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"LIST|GET", subresource!~"log|exec|portforward|attach|proxy"}[2m])) by (le, resource, verb, scope)) > 0 7 | metricName: readOnlyAPICallsLatency 8 | 9 | - query: histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"POST|PUT|DELETE|PATCH", subresource!~"log|exec|portforward|attach|proxy"}[2m])) by (le, resource, verb, scope)) > 0 10 | metricName: mutatingAPICallsLatency 11 | 12 | - query: sum(irate(apiserver_request_total{apiserver="kube-apiserver",verb!="WATCH"}[2m])) by (verb,resource,code) > 0 13 | metricName: APIRequestRate 14 | 15 | # Containers & pod metrics 16 | - query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace=~"kube-system|calico-system"}[2m]) * 100) by (container, pod, namespace, node)) > 0 17 | metricName: containerCPU 18 | 19 | - query: (sum(container_memory_rss{name!="",container!="POD",namespace=~"kube-system|calico-system"}) by (container, pod, namespace, node)) > 0 20 | metricName: containerMemory 21 | 22 | # Cluster metrics 23 | 24 | - query: max_over_time( count(kube_pod_labels{label_kube_burner_job="network-policy-perf"})[{{ .elapsed }}:] ) 25 | metricName: podCount 26 | 27 | - query: max_over_time( count(kube_namespace_labels{label_kube_burner_job="network-policy-perf"})[{{ .elapsed }}:] ) 28 | metricName: namespaceCount 29 | 30 | - query: max_over_time( count(kube_networkpolicy_labels{networkpolicy=~"ingress.*"})[{{ .elapsed }}:] ) 31 | metricName: netpolIngressCount 32 | 33 | - query: max_over_time( count(kube_networkpolicy_labels{networkpolicy=~"egress.*"})[{{ .elapsed }}:] ) 34 | metricName: netpolEgressCount 35 | 36 | - query: kube_node_role 37 | metricName: nodeRoles 38 | 39 | - query: sum(kube_node_status_condition{status="true"}) by (condition) 40 | metricName: nodeStatus 41 | 42 | - query: kubernetes_build_info 43 | metricName: k8sVersion 44 | instant: true 45 | 46 | # Calico metrics 47 | 48 | - query: max_over_time( count(felix_cluster_num_hosts)[{{ .elapsed }}:] ) 49 | metricName: felix_cluster_num_hosts 50 | 51 | - query: felix_active_local_endpoints 52 | metricName: felix_active_local_endpoints 53 | 54 | - query: felix_active_local_policies 55 | metricName: felix_active_local_policies 56 | 57 | - query: felix_active_local_selectors 58 | metricName: felix_active_local_selectors 59 | 60 | - query: felix_label_index_num_endpoints 61 | metricName: felix_label_index_num_endpoints 62 | 63 | - query: felix_label_index_num_active_selectors{optimized="true"} 64 | metricName: felix_label_index_num_active_selectors_optimized 65 | 66 | - query: felix_label_index_num_active_selectors{optimized="false"} 67 | metricName: felix_label_index_num_active_selectors_not_optimized 68 | -------------------------------------------------------------------------------- /kube-burner-workload/calico/monitoring.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: calico-monitoring 6 | --- 7 | apiVersion: v1 8 | kind: Service 9 | metadata: 10 | name: prometheus-service 11 | namespace: calico-monitoring 12 | annotations: 13 | prometheus.io/scrape: 'true' 14 | prometheus.io/port: '9090' 15 | spec: 16 | selector: 17 | app: prometheus-server 18 | type: NodePort 19 | ports: 20 | - port: 8080 21 | targetPort: 9090 22 | --- 23 | apiVersion: rbac.authorization.k8s.io/v1 24 | kind: ClusterRole 25 | metadata: 26 | name: prometheus 27 | rules: 28 | - apiGroups: [""] 29 | resources: 30 | - nodes 31 | - nodes/proxy 32 | - services 33 | - endpoints 34 | - pods 35 | verbs: ["get", "list", "watch"] 36 | - apiGroups: 37 | - extensions 38 | resources: 39 | - ingresses 40 | verbs: ["get", "list", "watch"] 41 | - nonResourceURLs: ["/metrics"] 42 | verbs: ["get"] 43 | --- 44 | apiVersion: rbac.authorization.k8s.io/v1 45 | kind: ClusterRoleBinding 46 | metadata: 47 | name: prometheus 48 | roleRef: 49 | apiGroup: rbac.authorization.k8s.io 50 | kind: ClusterRole 51 | name: prometheus 52 | subjects: 53 | - kind: ServiceAccount 54 | name: default 55 | namespace: calico-monitoring 56 | --- 57 | apiVersion: v1 58 | kind: ConfigMap 59 | metadata: 60 | name: prometheus-server-conf 61 | labels: 62 | name: prometheus-server-conf 63 | namespace: calico-monitoring 64 | data: 65 | prometheus.yml: |- 66 | global: 67 | scrape_interval: 5s 68 | evaluation_interval: 5s 69 | scrape_configs: 70 | - job_name: 'kubernetes-apiservers' 71 | kubernetes_sd_configs: 72 | - role: endpoints 73 | scheme: https 74 | tls_config: 75 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 76 | insecure_skip_verify: true 77 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 78 | relabel_configs: 79 | - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] 80 | action: keep 81 | regex: default;kubernetes;https 82 | 83 | - job_name: 'kubernetes-controller-manager' 84 | honor_labels: true 85 | scheme: https 86 | tls_config: 87 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 88 | insecure_skip_verify: true 89 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 90 | static_configs: 91 | - targets: 92 | - 127.0.0.1:10257 93 | 94 | - job_name: 'kubernetes-nodes' 95 | scheme: https 96 | tls_config: 97 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 98 | insecure_skip_verify: true 99 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 100 | kubernetes_sd_configs: 101 | - role: node 102 | relabel_configs: 103 | - action: labelmap 104 | regex: __meta_kubernetes_node_label_(.+) 105 | - target_label: __address__ 106 | replacement: localhost:6443 107 | - source_labels: [__meta_kubernetes_node_name] 108 | regex: (.+) 109 | target_label: __metrics_path__ 110 | replacement: /api/v1/nodes/${1}/proxy/metrics 111 | 112 | - job_name: 'calico-nodes' 113 | scheme: http 114 | kubernetes_sd_configs: 115 | - role: node 116 | relabel_configs: 117 | - action: labelmap 118 | regex: __meta_kubernetes_node_label_(.+) 119 | - source_labels: [__meta_kubernetes_node_address_InternalIP] 120 | target_label: __address__ 121 | replacement: $1:9091 122 | - source_labels: [__meta_kubernetes_node_name] 123 | regex: (.+) 124 | target_label: __metrics_path__ 125 | replacement: /metrics 126 | 127 | - job_name: 'kubernetes-cadvisor' 128 | scheme: https 129 | tls_config: 130 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 131 | insecure_skip_verify: true 132 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 133 | kubernetes_sd_configs: 134 | - role: node 135 | relabel_configs: 136 | - action: labelmap 137 | regex: __meta_kubernetes_node_label_(.+) 138 | - target_label: __address__ 139 | replacement: localhost:6443 140 | - source_labels: [__meta_kubernetes_node_name] 141 | regex: (.+) 142 | target_label: __metrics_path__ 143 | replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor 144 | 145 | - job_name: 'kube-state-metrics' 146 | honor_timestamps: true 147 | scrape_interval: 1m 148 | scrape_timeout: 1m 149 | metrics_path: /metrics 150 | scheme: http 151 | static_configs: 152 | - targets: 153 | - kube-state-metrics.kube-system.svc.cluster.local:8080 154 | 155 | --- 156 | apiVersion: v1 157 | kind: Pod 158 | metadata: 159 | name: prometheus 160 | namespace: calico-monitoring 161 | labels: 162 | app: prometheus-server 163 | spec: 164 | hostNetwork: true 165 | nodeSelector: 166 | node-role.kubernetes.io/control-plane: "" 167 | tolerations: 168 | - key: CriticalAddonsOnly 169 | operator: Exists 170 | - effect: NoSchedule 171 | key: node-role.kubernetes.io/master 172 | - effect: NoSchedule 173 | key: node-role.kubernetes.io/control-plane 174 | containers: 175 | - name: prometheus 176 | image: prom/prometheus:latest 177 | args: 178 | - "--config.file=/etc/prometheus/prometheus.yml" 179 | - "--storage.tsdb.path=/prometheus/" 180 | - "--web.enable-admin-api" 181 | ports: 182 | - containerPort: 9090 183 | volumeMounts: 184 | - name: prometheus-config-volume 185 | mountPath: /etc/prometheus/ 186 | - name: prometheus-storage-volume 187 | mountPath: /prometheus/ 188 | volumes: 189 | - name: prometheus-config-volume 190 | configMap: 191 | defaultMode: 420 192 | name: prometheus-server-conf 193 | - name: prometheus-storage-volume 194 | emptyDir: {} 195 | -------------------------------------------------------------------------------- /kube-burner-workload/calico/policy-tracker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:slim 2 | RUN apt update && \ 3 | apt install -y curl iptables ipset 4 | COPY policy-tracker.py policy-tracker.py 5 | COPY ./requirements.txt requirements.txt 6 | RUN pip install -r requirements.txt 7 | -------------------------------------------------------------------------------- /kube-burner-workload/calico/policy-tracker/policy-tracker.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | import os 4 | import ssl 5 | import sys 6 | import time 7 | import subprocess 8 | 9 | from opensearchpy import OpenSearch 10 | 11 | 12 | def index_result(payload, retry_count=30): 13 | logging.info( 14 | f"Sending metric to es server {es_server} with index {es_index}\n{payload}" 15 | ) 16 | while retry_count > 0: 17 | try: 18 | ssl_ctx = ssl.create_default_context() 19 | ssl_ctx.check_hostname = False 20 | ssl_ctx.verify_mode = ssl.CERT_NONE 21 | es = OpenSearch([es_server]) 22 | es.index(index=es_index, body=payload) 23 | retry_count = 0 24 | except Exception as e: 25 | logging.info("Failed Indexing", e) 26 | logging.info("Retrying to index...") 27 | retry_count -= 1 28 | 29 | 30 | def get_number_of_filter_rules(): 31 | result = get_iptables_rules("filter") 32 | return result.count("\n") 33 | 34 | 35 | def get_number_of_raw_rules(): 36 | result = get_iptables_rules("raw") 37 | return result.count("\n") 38 | 39 | 40 | def get_iptables_rules(table="filter"): 41 | try: 42 | output = subprocess.run( 43 | ["iptables-legacy", "--list-rules", "-t", table], 44 | capture_output=True, 45 | text=True, 46 | ) 47 | return output.stdout 48 | except Exception as e: 49 | logging.error(f"Failed getting iptables rules in table {table}: {e}") 50 | return "" 51 | 52 | 53 | def get_ipsets_len(): 54 | result = get_all_ipsets() 55 | return result.count("\n") 56 | 57 | 58 | def get_all_ipsets(): 59 | try: 60 | output = subprocess.run( 61 | ["ipset", "list"], 62 | capture_output=True, 63 | text=True, 64 | ) 65 | return output.stdout 66 | except Exception as e: 67 | logging.error(f"Failed listing ipsets: {e}") 68 | return "" 69 | 70 | 71 | # poll_interval in seconds, float 72 | # convergence_period in seconds, for how long number of flows shouldn't change to consider it stable 73 | # convergence_timeout in seconds, for how long number to wait for stabilisation before timing out 74 | def wait_for_rules_to_stabilize( 75 | poll_interval, convergence_period, convergence_timeout, node_name 76 | ): 77 | timeout = convergence_timeout + convergence_period 78 | start = time.time() 79 | last_changed = time.time() 80 | filter_rules_num = get_number_of_filter_rules() 81 | raw_rules_num = get_number_of_raw_rules() 82 | changed = False 83 | ipsets_len = get_ipsets_len() 84 | while time.time() - last_changed < convergence_period: 85 | if time.time() - start >= timeout: 86 | logging.info(f"TIMEOUT: {node_name} {timeout} seconds passed") 87 | return 1 88 | 89 | new_raw_rules_num = get_number_of_raw_rules() 90 | if new_raw_rules_num != raw_rules_num: 91 | raw_rules_num = new_raw_rules_num 92 | last_changed = time.time() 93 | changed = True 94 | logging.info(f"{node_name}: iptables raw table rules={raw_rules_num}") 95 | 96 | new_filter_rules_num = get_number_of_filter_rules() 97 | if new_filter_rules_num != filter_rules_num: 98 | filter_rules_num = new_filter_rules_num 99 | last_changed = time.time() 100 | changed = True 101 | logging.info(f"{node_name}: iptables filter table rules={filter_rules_num}") 102 | 103 | new_ipsets_len = get_ipsets_len() 104 | if new_ipsets_len != ipsets_len: 105 | ipsets_len = new_ipsets_len 106 | last_changed = time.time() 107 | changed = True 108 | logging.info(f"{node_name}: length of ipset list={ipsets_len}") 109 | 110 | if changed: 111 | doc = { 112 | "metricName": "convergence_tracker", 113 | "timestamp": datetime.datetime.now(datetime.UTC), 114 | "workload": "network-policy-perf", 115 | "uuid": uuid, 116 | "source_name": node_name, 117 | "convergence_timestamp": datetime.datetime.fromtimestamp(last_changed), 118 | "iptables_filter_rules": filter_rules_num, 119 | "iptables_raw_rules": raw_rules_num, 120 | "ipsets_list_len": ipsets_len, 121 | } 122 | index_result(doc) 123 | changed = False 124 | 125 | time.sleep(poll_interval) 126 | 127 | stabilize_datetime = datetime.datetime.fromtimestamp(last_changed) 128 | logging.info( 129 | f"RESULT: time={stabilize_datetime.isoformat(sep=' ', timespec='milliseconds')} {node_name} " 130 | f"finished with {filter_rules_num} rules in filter table, and {raw_rules_num} rules in raw table " 131 | f"and with {ipsets_len} lines in ipset list." 132 | ) 133 | doc = { 134 | "metricName": "convergence_tracker", 135 | "timestamp": datetime.datetime.now(datetime.UTC), 136 | "workload": "network-policy-perf", 137 | "uuid": uuid, 138 | "source_name": node_name, 139 | "convergence_timestamp": datetime.datetime.fromtimestamp(last_changed), 140 | "iptables_filter_rules": filter_rules_num, 141 | "iptables_raw_rules": raw_rules_num, 142 | "ipsets_list_len": ipsets_len, 143 | } 144 | index_result(doc) 145 | return 0 146 | 147 | 148 | def main(): 149 | global es_server, es_index, start_time, uuid 150 | es_server = os.getenv("ES_SERVER") 151 | es_index = os.getenv("ES_INDEX_NETPOL") 152 | node_name = os.getenv("MY_NODE_NAME") 153 | uuid = os.getenv("UUID") 154 | convergence_period = int(os.getenv("CONVERGENCE_PERIOD")) 155 | convergence_timeout = int(os.getenv("CONVERGENCE_TIMEOUT")) 156 | start_time = datetime.datetime.now() 157 | 158 | logging.basicConfig( 159 | format="%(asctime)s %(levelname)-8s %(message)s", 160 | level=logging.INFO, 161 | datefmt="%Y-%m-%d %H:%M:%S", 162 | ) 163 | doc = { 164 | "metricName": "convergence_tracker_info", 165 | "timestamp": datetime.datetime.now(datetime.UTC), 166 | "workload": "network-policy-perf", 167 | "uuid": uuid, 168 | "source_name": node_name, 169 | "convergence_period": convergence_period, 170 | "convergence_timeout": convergence_timeout, 171 | "test_metadata": os.getenv("METADATA"), 172 | } 173 | index_result(doc) 174 | 175 | logging.info( 176 | f"Start calico-tracker {node_name}, convergence_period {convergence_period}, convergence timeout {convergence_timeout}" 177 | ) 178 | timeout = wait_for_rules_to_stabilize( 179 | 10, convergence_period, convergence_timeout, node_name 180 | ) 181 | sys.exit(timeout) 182 | 183 | 184 | if __name__ == "__main__": 185 | main() 186 | -------------------------------------------------------------------------------- /kube-burner-workload/calico/policy-tracker/requirements.txt: -------------------------------------------------------------------------------- 1 | datetime 2 | requests 3 | kubernetes 4 | opensearch-py 5 | -------------------------------------------------------------------------------- /kube-burner-workload/calico/test_limit.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | check_and_wait () { 4 | pause=30 5 | echo "============================================================" 6 | echo "> Iteration with $NETPOLS_PER_NAMESPACE network policies per ns finished. Status: $status" 7 | if [ "$status" -ne "$expectedStatus" ]; then 8 | echo "> Test failed. Exiting..." 9 | exit 0 10 | fi 11 | echo "> Test passed. Waiting for $pause seconds for next iteration." 12 | sleep $pause 13 | } 14 | 15 | find_prometheus() { 16 | prometheus_port=$(kubectl get svc prometheus-service -n calico-monitoring -ojsonpath="{.spec.ports[0].nodePort}") 17 | prometheus_addr=$(kubectl get node -ojsonpath="{.items[0].status.addresses[0].address}") 18 | prometheus_url="http://$prometheus_addr:$prometheus_port" 19 | echo "> Promtheus URL=$prometheus_url" 20 | } 21 | 22 | 23 | cd .. 24 | source ./env 25 | kubectl apply -f "$PLATFORM/monitoring.yaml" 26 | kubectl patch felixconfiguration default --type='merge' -p '{"spec":{"prometheusMetricsEnabled":true}}' 27 | sleep 10 28 | 29 | NETPOLS_PER_NAMESPACE=0 30 | STEP=100 31 | expectedStatus=0 32 | status=$expectedStatus 33 | find_prometheus 34 | 35 | while true; do 36 | NETPOLS_PER_NAMESPACE=$((NETPOLS_PER_NAMESPACE + STEP)) 37 | echo "> Starting iteration with $NETPOLS_PER_NAMESPACE network policies per ns." 38 | echo "============================================================" 39 | kube-burner init -m "$PLATFORM/metrics.yml" -c ./network-policy.yaml -u "$prometheus_url" 40 | status=$? 41 | check_and_wait 42 | done 43 | -------------------------------------------------------------------------------- /kube-burner-workload/convergence_waiter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TIME_SPENT=0 4 | TIMEOUT=$((CONVERGENCE_TIMEOUT + CONVERGENCE_PERIOD)) 5 | while [ $TIME_SPENT -le "$TIMEOUT" ]; do 6 | FAILED_COUNT=$(kubectl get pods -n convergence-tracker-0 --field-selector status.phase=Failed -o name | wc -l) 7 | if [ "$FAILED_COUNT" -ne 0 ]; then 8 | echo "ERROR: convergence tracker pod reported failure" 9 | kubectl get pods -n convergence-tracker-0 --field-selector status.phase=Failed -o name 10 | exit 1 11 | fi 12 | RUNNING_COUNT=$(kubectl get pods -n convergence-tracker-0 --field-selector status.phase!=Succeeded -o name | wc -l) 13 | if [ "$RUNNING_COUNT" -eq 0 ]; then 14 | echo "DONE" 15 | exit 0 16 | fi 17 | sleep 30 18 | TIME_SPENT=$((TIME_SPENT + 30)) 19 | done 20 | exit 1 21 | -------------------------------------------------------------------------------- /kube-burner-workload/egress-np.yml: -------------------------------------------------------------------------------- 1 | {{- $podNum := add .pods_per_namespace 1 }} 2 | {{- $podNum = sub $podNum .peer_pods }} 3 | {{- $podDict := dict (toString $podNum) "true"}} 4 | {{- $podLabel := toJson $podDict }} 5 | {{- $localPodNum := add .pods_per_namespace 1 }} 6 | {{- $localPodNum = sub $localPodNum .local_pods }} 7 | {{- $localPodDict := dict (toString $localPodNum) "true"}} 8 | {{- $localPodLabel := toJson $localPodDict }} 9 | {{- $binomial := Binomial $.namespaces $.peer_namespaces }} 10 | kind: NetworkPolicy 11 | apiVersion: networking.k8s.io/v1 12 | metadata: 13 | name: egress-{{.Replica}} 14 | spec: 15 | podSelector: 16 | matchLabels: {{$localPodLabel}} 17 | egress: 18 | {{- $startIdx := mul $.Iteration .pod_selectors .netpols_per_namespace }} 19 | {{- $nsShift := mul (sub $.Replica 1) .pod_selectors }} 20 | {{- $startIdx = add $startIdx $nsShift -1 }} 21 | {{- range $i, $e := until .pod_selectors }} 22 | {{- $startIdx = add $startIdx 1 }} 23 | {{- if ge $startIdx $binomial }} 24 | {{- $startIdx = mod $startIdx $binomial }} 25 | {{- end }} 26 | {{- $nsIdxList := IndexToCombination nil (int $startIdx) $.namespaces $.peer_namespaces }} 27 | {{- $nsList := list }} 28 | {{- range $i, $nextNs := $nsIdxList }} 29 | {{- $next_namespace := print "network-policy-perf-" (add $nextNs 1) }} 30 | {{- $nsList = append $nsList $next_namespace }} 31 | {{- end }} 32 | {{- $nsNames := toJson $nsList }} 33 | - to: 34 | - podSelector: 35 | matchLabels: {{$podLabel}} 36 | namespaceSelector: 37 | matchExpressions: 38 | - key: kubernetes.io/metadata.name 39 | operator: In 40 | values: {{$nsNames}} 41 | ports: 42 | {{- $single_port := 1000 }} 43 | {{- range $i, $e := until $.single_ports }} 44 | {{- $single_port = add $single_port 1 }} 45 | - protocol: TCP 46 | port: {{$single_port}} 47 | {{- end }} 48 | {{- $rangeStart := 5000 }} 49 | {{- range $i, $e := until $.port_ranges }} 50 | {{- $rangeEnd := add $rangeStart 5 }} 51 | - protocol: TCP 52 | port: {{$rangeStart}} 53 | endPort: {{$rangeEnd}} 54 | {{ $rangeStart = add $rangeStart 10}} 55 | {{- end }} 56 | {{- end }} 57 | {{- if gt .cidr_rules 0 }} 58 | {{- $subnetIdx := add (mul $.Replica $.cidr_rules) 1 }} 59 | {{- range $i, $e := until .cidr_rules }} 60 | - to: 61 | - ipBlock: 62 | cidr: {{GetSubnet24 (int $subnetIdx) }} 63 | ports: 64 | {{- $single_port := 1000 }} 65 | {{- range $i, $e := until $.single_ports }} 66 | {{- $single_port = add $single_port 1 }} 67 | - protocol: TCP 68 | port: {{$single_port}} 69 | {{- end }} 70 | {{- $rangeStart := 5000 }} 71 | {{- range $i, $e := until $.port_ranges }} 72 | {{- $rangeEnd := add $rangeStart 5 }} 73 | - protocol: TCP 74 | port: {{$rangeStart}} 75 | endPort: {{$rangeEnd}} 76 | {{ $rangeStart = add $rangeStart 10}} 77 | {{- end }} 78 | {{- $subnetIdx = add $subnetIdx 1 }} 79 | {{- end }} 80 | {{- end }} 81 | policyTypes: 82 | - Egress 83 | -------------------------------------------------------------------------------- /kube-burner-workload/env: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -a 3 | # minimal example 4 | NAMESPACES=1 5 | PODS_PER_NAMESPACE=1 6 | NETPOLS_PER_NAMESPACE=1 7 | 8 | # netpol config 9 | INGRESS=true 10 | EGRESS=false 11 | LOCAL_PODS=1 12 | SINGLE_PORTS=0 13 | PORT_RANGES=0 14 | POD_SELECTORS=0 15 | PEER_NAMESPACES=0 16 | PEER_PODS=0 17 | CIDRS=1 18 | 19 | # set kubeconfig 20 | KUBECONFIG= 21 | 22 | # PLATFORM is one of the folders under network-policy workload 23 | PLATFORM=kind-metrics 24 | # Convergence tracker settings 25 | CONVERGENCE_TRACKER= 26 | # CONVERGENCE_PERIOD and CONVERGENCE_TIMEOUT are convergence tracker parameters. 27 | # CONVERGENCE_PERIOD specifies for how long the system should be stable to be considered converged and 28 | # CONVERGENCE_TIMEOUT is a timer specifying the hard deadline for policy convergence. 29 | # A test failure will be reported by convergence tracker in CONVERGENCE_TIMEOUT + CONVERGENCE_PERIOD seconds. 30 | CONVERGENCE_PERIOD=60 31 | CONVERGENCE_TIMEOUT=3600 32 | 33 | # Number of nodes to run convergence tracker. Doesn't have effect if CONVERGENCE_TRACKER is false 34 | NODES_COUNT=3 35 | 36 | # JOB_PAUSE defines for how long he workload won't be deleted after the test is done 37 | # default behaviour is to wait for 5 minutes after job completion to see how the system 38 | # behaves some time after all work is done 39 | JOB_PAUSE=5m 40 | # to debug, use longer interval 41 | #JOB_PAUSE=1h 42 | 43 | # variables that should be filled by platform or stay empty 44 | JOB_NAMESPACE_LABELS= 45 | ES_SERVER= 46 | ES_INDEX= 47 | 48 | if [[ ! -z $PLATFORM ]]; then 49 | if test -f $PLATFORM/env; then 50 | source $PLATFORM/env 51 | fi 52 | fi 53 | set +a 54 | -------------------------------------------------------------------------------- /kube-burner-workload/ingress-np.yml: -------------------------------------------------------------------------------- 1 | {{- $podNum := add .pods_per_namespace 1 }} 2 | {{- $podNum = sub $podNum .peer_pods }} 3 | {{- $podDict := dict (toString $podNum) "true"}} 4 | {{- $podLabel := toJson $podDict }} 5 | {{- $localPodNum := add .pods_per_namespace 1 }} 6 | {{- $localPodNum = sub $localPodNum .local_pods }} 7 | {{- $localPodDict := dict (toString $localPodNum) "true"}} 8 | {{- $localPodLabel := toJson $localPodDict }} 9 | {{- $binomial := Binomial $.namespaces $.peer_namespaces }} 10 | kind: NetworkPolicy 11 | apiVersion: networking.k8s.io/v1 12 | metadata: 13 | name: ingress-{{.Replica}} 14 | spec: 15 | podSelector: 16 | matchLabels: {{$localPodLabel}} 17 | ingress: 18 | {{- $startIdx := mul $.Iteration .pod_selectors .netpols_per_namespace }} 19 | {{- $nsShift := mul (sub $.Replica 1) .pod_selectors }} 20 | {{- $startIdx = add $startIdx $nsShift -1 }} 21 | {{- range $i, $e := until .pod_selectors }} 22 | {{- $startIdx = add $startIdx 1 }} 23 | {{- if ge $startIdx $binomial }} 24 | {{- $startIdx = mod $startIdx $binomial }} 25 | {{- end }} 26 | {{- $nsIdxList := IndexToCombination nil (int $startIdx) $.namespaces $.peer_namespaces }} 27 | {{- $nsList := list }} 28 | {{- range $i, $nextNs := $nsIdxList }} 29 | {{- $next_namespace := print "network-policy-perf-" (add $nextNs 1) }} 30 | {{- $nsList = append $nsList $next_namespace }} 31 | {{- end }} 32 | {{- $nsNames := toJson $nsList }} 33 | - from: 34 | - podSelector: 35 | matchLabels: {{$podLabel}} 36 | namespaceSelector: 37 | matchExpressions: 38 | - key: kubernetes.io/metadata.name 39 | operator: In 40 | values: {{$nsNames}} 41 | ports: 42 | {{- $single_port := 1000 }} 43 | {{- range $i, $e := until $.single_ports }} 44 | {{- $single_port = add $single_port 1 }} 45 | - protocol: TCP 46 | port: {{$single_port}} 47 | {{- end }} 48 | {{- $rangeStart := 5000 }} 49 | {{- range $i, $e := until $.port_ranges }} 50 | {{- $rangeEnd := add $rangeStart 5 }} 51 | - protocol: TCP 52 | port: {{$rangeStart}} 53 | endPort: {{$rangeEnd}} 54 | {{ $rangeStart = add $rangeStart 10}} 55 | {{- end }} 56 | {{- end }} 57 | {{- if gt .cidr_rules 0 }} 58 | {{- $subnetIdx := add (mul $.Replica $.cidr_rules) 1 }} 59 | {{- range $i, $e := until .cidr_rules }} 60 | - from: 61 | - ipBlock: 62 | cidr: {{GetSubnet24 (int $subnetIdx) }} 63 | ports: 64 | {{- $single_port := 1000 }} 65 | {{- range $i, $e := until $.single_ports }} 66 | {{- $single_port = add $single_port 1 }} 67 | - protocol: TCP 68 | port: {{$single_port}} 69 | {{- end }} 70 | {{- $rangeStart := 5000 }} 71 | {{- range $i, $e := until $.port_ranges }} 72 | {{- $rangeEnd := add $rangeStart 5 }} 73 | - protocol: TCP 74 | port: {{$rangeStart}} 75 | endPort: {{$rangeEnd}} 76 | {{ $rangeStart = add $rangeStart 10}} 77 | {{- end }} 78 | {{- $subnetIdx = add $subnetIdx 1 }} 79 | {{- end }} 80 | {{- end }} 81 | policyTypes: 82 | - Ingress 83 | -------------------------------------------------------------------------------- /kube-burner-workload/kind-metrics/README.md: -------------------------------------------------------------------------------- 1 | This folder helps you enable metric collection for scale tests. 2 | It consists of the following steps: 3 | 1. Install Prometheus in a KinD cluster 4 | 2. Run Elasticsearch locally with docker 5 | 3. Run Grafana locally with docker 6 | 4. Run kube-burner with metrics collection 7 | 5. Configure Grafana dashboard to collect data from Elasticsearch 8 | 9 | 10 | You may have some of the mentioned steps already done, then just replace IPs and ports in the following steps. 11 | 12 | 1. Install Prometheus in a KinD cluster 13 | `kubectl apply -f monitoring.yaml` 14 | This command will create a monitoring namespace, Prometheus pod and a NodePort service. 15 | You can check Prometheus interface at : 16 | 17 | 2,3. Run Elasticsearch and Grafana locally with docker 18 | 19 | To ensure collected data outlives the KinD cluster, we run Elasticsearch and Grafana as external containers. 20 | 21 | ```shell 22 | docker run -d --name=elasticsearch -p 9200:9200 -e "discovery.type=single-node" -e "xpack.security.enabled=false" docker.elastic.co/elasticsearch/elasticsearch:8.12.1 23 | docker run -d --name=grafana -p 3000:3000 grafana/grafana:latest 24 | ``` 25 | 26 | To get docker container IP, use 27 | `docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' ` 28 | 29 | Grafana should be running at localhost:3000, credentials are admin/admin. 30 | 31 | 4. Run kube-burner with metrics collection 32 | 33 | ```shell 34 | cd ./kube-burner-workload 35 | source ./env 36 | kube-burner init -m ./kind-metrics/metrics.yml -c ./network-policy.yaml -u http://: 37 | ``` 38 | 39 | Wait for kube-burner to finish (takes around 6 minutes, waiting time is configured with JOB_PAUSE). 40 | 41 | 5. Configure Grafana dashboard to collect data from Elasticsearch 42 | 43 | - Log into Grafana (see step 3), go to configure Data Sources (Menu > Connections > Data sources) 44 | - Add data source of type elasticsearch 45 | - Configure 46 | 47 | URL = `http://admin:admin@:9200`\ 48 | No Authentication\ 49 | Elasticsearch details > 50 | - Index name = `kube-burner` 51 | - Time field name = `timestamp` 52 | 53 | - Click `Save & test`, expect "Data source successfully connected." 54 | - Create a dashboard, Menu > Dashboards > New > Import Dashboard 55 | - Import json form [./grafana_dash.json](./grafana_dash.json) 56 | 57 | You should see something like ![image](./grafana.png) 58 | If not, try to click on UUID dropdown and see if there is something to select. 59 | 60 | 6. More metrics may be added with e.g. https://github.com/kubernetes/kube-state-metrics 61 | -------------------------------------------------------------------------------- /kube-burner-workload/kind-metrics/env: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -a 3 | ES_SERVER=http://admin:admin@localhost:9200 4 | ES_INDEX=kube-burner 5 | set +a 6 | -------------------------------------------------------------------------------- /kube-burner-workload/kind-metrics/grafana.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npinaeva/k8s-netpol-scale/3d1aabaf4511f27966b567ba8192f8cce6b52375/kube-burner-workload/kind-metrics/grafana.png -------------------------------------------------------------------------------- /kube-burner-workload/kind-metrics/metrics.yml: -------------------------------------------------------------------------------- 1 | # API server 2 | 3 | - query: irate(apiserver_request_total{verb="POST", resource="pods", subresource="binding",code="201"}[2m]) > 0 4 | metricName: schedulingThroughput 5 | 6 | - query: histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"LIST|GET", subresource!~"log|exec|portforward|attach|proxy"}[2m])) by (le, resource, verb, scope)) > 0 7 | metricName: readOnlyAPICallsLatency 8 | 9 | - query: histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"POST|PUT|DELETE|PATCH", subresource!~"log|exec|portforward|attach|proxy"}[2m])) by (le, resource, verb, scope)) > 0 10 | metricName: mutatingAPICallsLatency 11 | 12 | - query: sum(irate(apiserver_request_total{apiserver="kube-apiserver",verb!="WATCH"}[2m])) by (verb,resource,code) > 0 13 | metricName: APIRequestRate 14 | 15 | # Containers & pod metrics 16 | - query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace="kube-system"}[2m]) * 100) by (container, pod, namespace, node)) > 0 17 | metricName: containerCPU 18 | 19 | - query: (sum(container_memory_rss{name!="",container!="POD",namespace="kube-system"}) by (container, pod, namespace, node)) > 0 20 | metricName: containerMemory 21 | 22 | # Cluster metrics 23 | 24 | - query: kubernetes_build_info 25 | metricName: k8sVersion 26 | instant: true 27 | -------------------------------------------------------------------------------- /kube-burner-workload/kind-metrics/monitoring.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Namespace 4 | metadata: 5 | name: monitoring 6 | --- 7 | apiVersion: v1 8 | kind: Service 9 | metadata: 10 | name: prometheus-service 11 | namespace: monitoring 12 | annotations: 13 | prometheus.io/scrape: 'true' 14 | prometheus.io/port: '9090' 15 | spec: 16 | selector: 17 | app: prometheus-server 18 | type: NodePort 19 | ports: 20 | - port: 8080 21 | targetPort: 9090 22 | --- 23 | apiVersion: rbac.authorization.k8s.io/v1 24 | kind: ClusterRole 25 | metadata: 26 | name: prometheus 27 | rules: 28 | - apiGroups: [""] 29 | resources: 30 | - nodes 31 | - nodes/proxy 32 | - services 33 | - endpoints 34 | - pods 35 | verbs: ["get", "list", "watch"] 36 | - apiGroups: 37 | - extensions 38 | resources: 39 | - ingresses 40 | verbs: ["get", "list", "watch"] 41 | - nonResourceURLs: ["/metrics"] 42 | verbs: ["get"] 43 | --- 44 | apiVersion: rbac.authorization.k8s.io/v1 45 | kind: ClusterRoleBinding 46 | metadata: 47 | name: prometheus 48 | roleRef: 49 | apiGroup: rbac.authorization.k8s.io 50 | kind: ClusterRole 51 | name: prometheus 52 | subjects: 53 | - kind: ServiceAccount 54 | name: default 55 | namespace: monitoring 56 | --- 57 | apiVersion: v1 58 | kind: ConfigMap 59 | metadata: 60 | name: prometheus-server-conf 61 | labels: 62 | name: prometheus-server-conf 63 | namespace: monitoring 64 | data: 65 | prometheus.yml: |- 66 | global: 67 | scrape_interval: 5s 68 | evaluation_interval: 5s 69 | scrape_configs: 70 | - job_name: 'kubernetes-apiservers' 71 | kubernetes_sd_configs: 72 | - role: endpoints 73 | scheme: https 74 | tls_config: 75 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 76 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 77 | relabel_configs: 78 | - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] 79 | action: keep 80 | regex: default;kubernetes;https 81 | 82 | - job_name: 'kubernetes-controller-manager' 83 | honor_labels: true 84 | scheme: https 85 | tls_config: 86 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 87 | insecure_skip_verify: true 88 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 89 | static_configs: 90 | - targets: 91 | - 127.0.0.1:10257 92 | 93 | - job_name: 'kubernetes-nodes' 94 | scheme: https 95 | tls_config: 96 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 97 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 98 | kubernetes_sd_configs: 99 | - role: node 100 | relabel_configs: 101 | - action: labelmap 102 | regex: __meta_kubernetes_node_label_(.+) 103 | - target_label: __address__ 104 | replacement: localhost:6443 105 | - source_labels: [__meta_kubernetes_node_name] 106 | regex: (.+) 107 | target_label: __metrics_path__ 108 | replacement: /api/v1/nodes/${1}/proxy/metrics 109 | 110 | - job_name: 'kubernetes-cadvisor' 111 | scheme: https 112 | tls_config: 113 | ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 114 | bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 115 | kubernetes_sd_configs: 116 | - role: node 117 | relabel_configs: 118 | - action: labelmap 119 | regex: __meta_kubernetes_node_label_(.+) 120 | - target_label: __address__ 121 | replacement: localhost:6443 122 | - source_labels: [__meta_kubernetes_node_name] 123 | regex: (.+) 124 | target_label: __metrics_path__ 125 | replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor 126 | 127 | --- 128 | apiVersion: v1 129 | kind: Pod 130 | metadata: 131 | name: prometheus 132 | namespace: monitoring 133 | labels: 134 | app: prometheus-server 135 | spec: 136 | hostNetwork: true 137 | nodeSelector: 138 | node-role.kubernetes.io/control-plane: "" 139 | tolerations: 140 | - key: CriticalAddonsOnly 141 | operator: Exists 142 | - effect: NoSchedule 143 | key: node-role.kubernetes.io/master 144 | - effect: NoSchedule 145 | key: node-role.kubernetes.io/control-plane 146 | containers: 147 | - name: prometheus 148 | image: prom/prometheus:latest 149 | args: 150 | - "--config.file=/etc/prometheus/prometheus.yml" 151 | - "--storage.tsdb.path=/prometheus/" 152 | - "--web.enable-admin-api" 153 | ports: 154 | - containerPort: 9090 155 | volumeMounts: 156 | - name: prometheus-config-volume 157 | mountPath: /etc/prometheus/ 158 | - name: prometheus-storage-volume 159 | mountPath: /prometheus/ 160 | volumes: 161 | - name: prometheus-config-volume 162 | configMap: 163 | defaultMode: 420 164 | name: prometheus-server-conf 165 | - name: prometheus-storage-volume 166 | emptyDir: {} 167 | -------------------------------------------------------------------------------- /kube-burner-workload/network-policy.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | {{- if .ES_SERVER }} 3 | global: 4 | gc: true 5 | indexerConfig: 6 | esServers: ["{{.ES_SERVER}}"] 7 | insecureSkipVerify: true 8 | defaultIndex: {{.ES_INDEX}} 9 | type: elastic 10 | {{- end }} 11 | jobs: 12 | {{- if .CONVERGENCE_TRACKER }} 13 | - name: convergence-tracker 14 | namespace: convergence-tracker 15 | jobIterations: 1 16 | podWait: false 17 | waitWhenFinished: false 18 | preLoadImages: false 19 | churn: false 20 | cleanup: true 21 | {{- if .JOB_NAMESPACE_LABELS }} 22 | namespaceLabels: {{.JOB_NAMESPACE_LABELS}} 23 | {{- end}} 24 | jobPause: 30s 25 | objects: 26 | - objectTemplate: {{.PLATFORM}}/convergence_tracker.yml 27 | replicas: {{.NODES_COUNT}} 28 | inputVars: 29 | convergence_period: "{{.CONVERGENCE_PERIOD}}" 30 | convergence_timeout: "{{.CONVERGENCE_TIMEOUT}}" 31 | es_server: "{{.ES_SERVER}}" 32 | es_index: {{.ES_INDEX}} 33 | metadata: "netpols_per_namespace: {{.NETPOLS_PER_NAMESPACE}}, pods_per_namespace: {{.PODS_PER_NAMESPACE}}, 34 | local_pods: {{.LOCAL_PODS}}, pod_selectors: {{.POD_SELECTORS}}, 35 | single_ports: {{.SINGLE_PORTS}}, port_ranges: {{.PORT_RANGES}}, 36 | peer_namespaces: {{.PEER_NAMESPACES}}, peer_pods: {{.PEER_PODS}}, cidr_rules: {{.CIDRS}}" 37 | {{- end }} 38 | - name: network-policy-perf 39 | namespace: network-policy-perf 40 | jobIterations: {{.NAMESPACES}} 41 | qps: 300 42 | burst: 300 43 | namespacedIterations: true 44 | podWait: false 45 | waitWhenFinished: true 46 | # preLoadImages: true 47 | preLoadImages: false 48 | # preLoadPeriod: 30s 49 | # jobIterationDelay: 1m 50 | churn: false 51 | jobPause: "{{.JOB_PAUSE}}" 52 | {{- if .CONVERGENCE_TRACKER }} 53 | beforeCleanup: "convergence_waiter.sh" 54 | {{- end }} 55 | cleanup: true 56 | {{- if .JOB_NAMESPACE_LABELS }} 57 | namespaceLabels: {{.JOB_NAMESPACE_LABELS}} 58 | {{- end }} 59 | objects: 60 | - objectTemplate: pod.yml 61 | replicas: {{.PODS_PER_NAMESPACE}} 62 | {{- if eq .INGRESS "true" }} 63 | - objectTemplate: ingress-np.yml 64 | replicas: {{.NETPOLS_PER_NAMESPACE}} 65 | inputVars: 66 | namespaces: {{.NAMESPACES}} 67 | pods_per_namespace: {{.PODS_PER_NAMESPACE}} 68 | netpols_per_namespace: {{.NETPOLS_PER_NAMESPACE}} 69 | local_pods: {{.LOCAL_PODS}} 70 | pod_selectors: {{.POD_SELECTORS}} 71 | single_ports: {{.SINGLE_PORTS}} 72 | port_ranges: {{.PORT_RANGES}} 73 | peer_namespaces: {{.PEER_NAMESPACES}} 74 | peer_pods: {{.PEER_PODS}} 75 | cidr_rules: {{.CIDRS}} 76 | {{- end }} 77 | {{- if eq .EGRESS "true" }} 78 | - objectTemplate: egress-np.yml 79 | replicas: {{.NETPOLS_PER_NAMESPACE}} 80 | inputVars: 81 | namespaces: {{.NAMESPACES}} 82 | pods_per_namespace: {{.PODS_PER_NAMESPACE}} 83 | netpols_per_namespace: {{.NETPOLS_PER_NAMESPACE}} 84 | local_pods: {{.LOCAL_PODS}} 85 | pod_selectors: {{.POD_SELECTORS}} 86 | single_ports: {{.SINGLE_PORTS}} 87 | port_ranges: {{.PORT_RANGES}} 88 | peer_namespaces: {{.PEER_NAMESPACES}} 89 | peer_pods: {{.PEER_PODS}} 90 | cidr_rules: {{.CIDRS}} 91 | {{- end }} 92 | -------------------------------------------------------------------------------- /kube-burner-workload/openshift/README.md: -------------------------------------------------------------------------------- 1 | ## Running 2 | 3 | 1. This profile assumes you have an openshift cluster, and the KUBECONFIG that can be used in the scale test. 4 | 2. Build kube-burner from the current branch 5 | `make build` 6 | 3. `cd ./examples/workloads/network-policy` 7 | 4. Set env variables with the test config in the `env` file 8 | 9 | 4.1 Set env file variable PLATFORM=openshift 10 | 11 | 5. Set env variables in the `openshift/env` file 12 | 6. `source ./env` 13 | 7. This command uses `oc` binary which is an Openshift CLI similar to kubectl 14 | `kube-burner init -m ./openshift/metrics.yml -c ./network-policy.yaml -u https://$(oc get route prometheus-k8s -n openshift-monitoring -o jsonpath="{.spec.host}") --log-level=debug --token=$(oc create token prometheus-k8s -n openshift-monitoring)` 15 | 8. When the test finishes, metrics should be collected by the ES_SERVER 16 | 17 | ## Finding the limit 18 | 19 | To automate finding the limit, [test_limit.sh](./test_limit.sh) script may be used. 20 | It can run multiple iterations increasing the number of network policies until test fails. 21 | It waits for full cleanup after every iteration to ensure the cluster is ready for the next one. 22 | 23 | ## Metrics and Dashboards 24 | 25 | Metrics in this folder are Openshift-specific, but may be tweaked for other clusters, e.g. by changing 26 | filtered namespaces for `containerCPU` metrics. 27 | 28 | `./grafana_dash.json` has the JSON model that defines the dashboard. It uses metrics defined in `./metrics.yml` 29 | and may be used as an example to define dashboard for other clusters. -------------------------------------------------------------------------------- /kube-burner-workload/openshift/convergence_tracker.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: convergence-tracker-{{.Replica}} 5 | labels: 6 | app: convergence-tracker 7 | spec: 8 | topologySpreadConstraints: 9 | - maxSkew: 1 10 | topologyKey: kubernetes.io/hostname 11 | whenUnsatisfiable: DoNotSchedule 12 | labelSelector: 13 | matchLabels: 14 | app: convergence-tracker 15 | tolerations: 16 | - key: "node-role.kubernetes.io/master" 17 | operator: "Exists" 18 | volumes: 19 | - name: openvswitch 20 | hostPath: 21 | path: /var/run/openvswitch 22 | - name: ovn 23 | hostPath: 24 | path: /var/run/ovn/ 25 | - name: ovn-ic 26 | hostPath: 27 | path: /var/run/ovn-ic/ 28 | - name: ovn-kubernetes 29 | hostPath: 30 | path: /var/run/ovn-kubernetes 31 | - name: host-var-log-ovs 32 | hostPath: 33 | path: /var/log/openvswitch 34 | - name: pod-logs 35 | hostPath: 36 | path: /var/log/pods 37 | restartPolicy: Never 38 | containers: 39 | - name: tracker 40 | # image built with the ./openflow-tracker/Dockerfile 41 | image: quay.io/npinaeva/netpol-scale:openshift 42 | securityContext: 43 | privileged: true 44 | command: [ "/bin/bash", "-c", "python openflow-tracker.py"] 45 | imagePullPolicy: Always 46 | volumeMounts: 47 | - name: openvswitch 48 | mountPath: /var/run/openvswitch 49 | - name: host-var-log-ovs 50 | mountPath: /var/log/openvswitch 51 | - name: ovn 52 | mountPath: /var/run/ovn 53 | - name: ovn-ic 54 | mountPath: /var/run/ovn-ic 55 | - name: pod-logs 56 | mountPath: /var/log/pods 57 | env: 58 | - name: CONVERGENCE_PERIOD 59 | value: "{{.convergence_period}}" 60 | - name: CONVERGENCE_TIMEOUT 61 | value: "{{.convergence_timeout}}" 62 | - name: ES_SERVER 63 | value: {{.es_server}} 64 | - name: ES_INDEX_NETPOL 65 | value: {{.es_index}} 66 | - name: UUID 67 | value: {{.UUID}} 68 | - name: METADATA 69 | value: "{{.metadata}}" 70 | - name: MY_NODE_NAME 71 | valueFrom: 72 | fieldRef: 73 | fieldPath: spec.nodeName 74 | -------------------------------------------------------------------------------- /kube-burner-workload/openshift/env: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -a 3 | ES_SERVER=https://example.com:443 4 | ES_INDEX=ripsaw-kube-burner 5 | JOB_NAMESPACE_LABELS=" 6 | security.openshift.io/scc.podSecurityLabelSync: false 7 | pod-security.kubernetes.io/enforce: privileged 8 | pod-security.kubernetes.io/audit: privileged 9 | pod-security.kubernetes.io/warn: privileged 10 | " 11 | set +a 12 | -------------------------------------------------------------------------------- /kube-burner-workload/openshift/grafana.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/npinaeva/k8s-netpol-scale/3d1aabaf4511f27966b567ba8192f8cce6b52375/kube-burner-workload/openshift/grafana.png -------------------------------------------------------------------------------- /kube-burner-workload/openshift/metrics.yml: -------------------------------------------------------------------------------- 1 | # API server 2 | 3 | - query: irate(apiserver_request_total{verb="POST", resource="pods", subresource="binding",code="201"}[2m]) > 0 4 | metricName: schedulingThroughput 5 | 6 | - query: histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"LIST|GET", subresource!~"log|exec|portforward|attach|proxy"}[2m])) by (le, resource, verb, scope)) > 0 7 | metricName: readOnlyAPICallsLatency 8 | 9 | - query: histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"POST|PUT|DELETE|PATCH", subresource!~"log|exec|portforward|attach|proxy"}[2m])) by (le, resource, verb, scope)) > 0 10 | metricName: mutatingAPICallsLatency 11 | 12 | - query: sum(irate(apiserver_request_total{apiserver="kube-apiserver",verb!="WATCH"}[2m])) by (verb,resource,code) > 0 13 | metricName: APIRequestRate 14 | 15 | # Containers & pod metrics 16 | - query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace=~"openshift-(etcd|oauth-apiserver|sdn|ovn-kubernetes|.*apiserver|authentication|.*controller-manager|.*scheduler|image-registry|operator-lifecycle-manager)"}[2m]) * 100) by (container, pod, namespace, node) and on (node) kube_node_role{role="master"}) > 0 17 | metricName: containerCPU-Masters 18 | 19 | - query: (avg(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace=~"openshift-(sdn|ovn-kubernetes|ingress)"}[2m]) * 100 and on (node) kube_node_role{role="worker"}) by (namespace, pod, container, node)) > 0 20 | metricName: containerCPU-AggregatedWorkers 21 | 22 | - query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace=~"openshift-(monitoring|sdn|ovn-kubernetes|ingress)"}[2m]) * 100) by (container, pod, namespace, node) and on (node) kube_node_role{role="infra"}) > 0 23 | metricName: containerCPU-Infra 24 | 25 | - query: (sum(container_memory_rss{name!="",container!="POD",namespace=~"openshift-(etcd|oauth-apiserver|.*apiserver|ovn-kubernetes|sdn|ingress|authentication|.*controller-manager|.*scheduler|image-registry|operator-lifecycle-manager)"}) by (container, pod, namespace, node) and on (node) kube_node_role{role="master"}) > 0 26 | metricName: containerMemory-Masters 27 | 28 | - query: avg(container_memory_rss{name!="",container!="POD",namespace=~"openshift-(sdn|ovn-kubernetes|ingress)"} and on (node) kube_node_role{role="worker"}) by (pod, container, namespace, node) 29 | metricName: containerMemory-AggregatedWorkers 30 | 31 | - query: (sum(container_memory_rss{name!="",container!="POD",namespace=~"openshift-(sdn|ovn-kubernetes|ingress|monitoring|image-registry)"}) by (container, pod, namespace, node) and on (node) kube_node_role{role="infra"}) > 0 32 | metricName: containerMemory-Infra 33 | 34 | # Node metrics: CPU & Memory 35 | 36 | - query: (sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) and on (instance) label_replace(kube_node_role{role="master"}, "instance", "$1", "node", "(.+)")) > 0 37 | metricName: nodeCPU-Masters 38 | 39 | - query: (avg((sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) and on (instance) label_replace(kube_node_role{role="worker"}, "instance", "$1", "node", "(.+)"))) by (mode)) > 0 40 | metricName: nodeCPU-AggregatedWorkers 41 | 42 | - query: (sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)")) > 0 43 | metricName: nodeCPU-Infra 44 | 45 | # We compute memory utilization by substrating available memory to the total 46 | 47 | - query: avg((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) and on (instance) label_replace(kube_node_role{role="worker"}, "instance", "$1", "node", "(.+)")) 48 | metricName: nodeMemoryUtilization-AggregatedWorkers 49 | 50 | - query: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) and on (instance) label_replace(kube_node_role{role="master"}, "instance", "$1", "node", "(.+)") 51 | metricName: nodeMemoryUtilization-Masters 52 | 53 | - query: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)") 54 | metricName: nodeMemoryUtilization-Infra 55 | 56 | # Kubelet & CRI-O runtime metrics 57 | 58 | - query: irate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[2m]) * 100 and on (node) topk(3,avg_over_time(irate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[2m])[{{ .elapsed }}:]) and on (node) kube_node_role{role="worker"}) 59 | metricName: kubeletCPU 60 | 61 | - query: process_resident_memory_bytes{service="kubelet",job="kubelet"} and on (node) topk(3,max_over_time(irate(process_resident_memory_bytes{service="kubelet",job="kubelet"}[2m])[{{ .elapsed }}:]) and on (node) kube_node_role{role="worker"}) 62 | metricName: kubeletMemory 63 | 64 | - query: irate(process_cpu_seconds_total{service="kubelet",job="crio"}[2m]) * 100 and on (node) topk(3,avg_over_time(irate(process_cpu_seconds_total{service="kubelet",job="crio"}[2m])[{{ .elapsed }}:]) and on (node) kube_node_role{role="worker"}) 65 | metricName: crioCPU 66 | 67 | - query: process_resident_memory_bytes{service="kubelet",job="crio"} and on (node) topk(3,max_over_time(irate(process_resident_memory_bytes{service="kubelet",job="crio"}[2m])[{{ .elapsed }}:]) and on (node) kube_node_role{role="worker"}) 68 | metricName: crioMemory 69 | 70 | # Etcd metrics 71 | 72 | - query: sum(rate(etcd_server_leader_changes_seen_total[2m])) 73 | metricName: etcdLeaderChangesRate 74 | 75 | - query: histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[2m])) 76 | metricName: 99thEtcdDiskBackendCommitDurationSeconds 77 | 78 | - query: histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[2m])) 79 | metricName: 99thEtcdDiskWalFsyncDurationSeconds 80 | 81 | - query: histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket[5m])) 82 | metricName: 99thEtcdRoundTripTimeSeconds 83 | 84 | - query: sum by (cluster_version)(etcd_cluster_version) 85 | metricName: etcdVersion 86 | instant: true 87 | 88 | - query: cluster_version{type="completed"} 89 | metricName: clusterVersion 90 | instant: true 91 | 92 | # Cluster metrics 93 | 94 | - query: max_over_time( count(kube_pod_labels{label_kube_burner_job="network-policy-perf"})[{{ .elapsed }}:] ) 95 | metricName: podCount 96 | 97 | - query: max_over_time( count(kube_namespace_labels{label_kube_burner_job="network-policy-perf"})[{{ .elapsed }}:] ) 98 | metricName: namespaceCount 99 | 100 | - query: max_over_time( count(kube_networkpolicy_labels{networkpolicy=~"ingress.*"})[{{ .elapsed }}:] ) 101 | metricName: netpolIngressCount 102 | 103 | - query: max_over_time( count(kube_networkpolicy_labels{networkpolicy=~"egress.*"})[{{ .elapsed }}:] ) 104 | metricName: netpolEgressCount 105 | 106 | - query: kube_node_role 107 | metricName: nodeRoles 108 | 109 | - query: sum(kube_node_status_condition{status="true"}) by (condition) 110 | metricName: nodeStatus 111 | 112 | - query: kubernetes_build_info 113 | metricName: k8sVersion 114 | instant: true 115 | 116 | # Prometheus metrics 117 | 118 | - query: openshift:prometheus_tsdb_head_series:sum{job="prometheus-k8s"} 119 | metricName: prometheus-timeseriestotal 120 | 121 | - query: openshift:prometheus_tsdb_head_samples_appended_total:sum{job="prometheus-k8s"} 122 | metricName: prometheus-ingestionrate 123 | 124 | # OVS metrics 125 | - query: (sum(irate(container_cpu_usage_seconds_total{id=~"/system.slice/ovs-vswitchd.service"}[2m]) * 100) by (node)) > 0 126 | metricName: ovsVswitchdCPU 127 | 128 | - query: (sum(irate(container_cpu_usage_seconds_total{id=~"/system.slice/ovsdb-server.service"}[2m]) * 100) by (node)) > 0 129 | metricName: ovsdbServerCPU 130 | 131 | - query: ovs_vswitchd_bridge_flows_total 132 | metricName: ovsFlowsCounter 133 | 134 | - query: ovs_vswitchd_rconn_discarded 135 | metricName: ovsVswitchdRconnDiscarded 136 | 137 | - query: ovs_vswitchd_rconn_overflow 138 | metricName: ovsVswitchdRconnOverflow 139 | 140 | - query: ovs_vswitchd_stream_open 141 | metricName: ovsVswitchdStreamOpen 142 | 143 | # OVN metrics 144 | - query: ovn_controller_rconn_discarded 145 | metricName: ovnControllerRconnDiscarded 146 | 147 | - query: ovn_controller_rconn_overflow 148 | metricName: ovnControllerRconnOverflow 149 | 150 | - query: ovn_controller_flow_generation_95th_percentile 151 | metricName: ovnControllerFlowGeneration95Perc 152 | 153 | - query: ovn_controller_flow_installation_95th_percentile 154 | metricName: ovnControllerFlowInstallation95Perc -------------------------------------------------------------------------------- /kube-burner-workload/openshift/openflow-tracker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:slim 2 | RUN apt update && \ 3 | apt install -y curl openvswitch-switch ovn-central 4 | COPY openflow-tracker.py openflow-tracker.py 5 | COPY ./requirements.txt requirements.txt 6 | RUN pip install -r requirements.txt 7 | -------------------------------------------------------------------------------- /kube-burner-workload/openshift/openflow-tracker/openflow-tracker.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | import os 4 | import ssl 5 | import sys 6 | import time 7 | import subprocess 8 | 9 | from opensearchpy import OpenSearch 10 | 11 | 12 | def index_result(payload, retry_count=3): 13 | print(f"Indexing documents in {es_index}") 14 | while retry_count > 0: 15 | try: 16 | ssl_ctx = ssl.create_default_context() 17 | ssl_ctx.check_hostname = False 18 | ssl_ctx.verify_mode = ssl.CERT_NONE 19 | es = OpenSearch([es_server]) 20 | es.index(index=es_index, body=payload) 21 | retry_count = 0 22 | except Exception as e: 23 | logging.info("Failed Indexing", e) 24 | logging.info("Retrying to index...") 25 | retry_count -= 1 26 | 27 | 28 | def get_number_of_ovs_flows(): 29 | try: 30 | output = subprocess.run( 31 | ["ovs-ofctl", "dump-aggregate", "br-int"], capture_output=True, text=True 32 | ) 33 | result = output.stdout 34 | return int(result.split("flow_count=")[1]) 35 | except Exception as e: 36 | logging.info(f"Failed getting flows count: {e}") 37 | return 0 38 | 39 | 40 | def get_number_of_logical_flows(): 41 | output = subprocess.run( 42 | ["ovn-sbctl", "--no-leader-only", "--columns=_uuid", "list", "logical_flow"], 43 | capture_output=True, 44 | text=True, 45 | ) 46 | if len(output.stderr) != 0: 47 | return 0 48 | output_lines = output.stdout.splitlines() 49 | return len(output_lines) // 2 + 1 50 | 51 | 52 | # poll_interval in seconds, float 53 | # convergence_period in seconds, for how long number of flows shouldn't change to consider it stable 54 | # convergence_timeout in seconds, for how long number to wait for stabilisation before timing out 55 | # timout in seconds 56 | def wait_for_flows_to_stabilize( 57 | poll_interval, convergence_period, convergence_timeout, node_name 58 | ): 59 | timed_out = False 60 | timeout = convergence_timeout + convergence_period 61 | start = time.time() 62 | last_changed = time.time() 63 | ovs_flows_num = get_number_of_ovs_flows() 64 | ovs_flows_converged_num = ovs_flows_num 65 | logical_flows_num = get_number_of_logical_flows() 66 | logical_flows_converged_num = logical_flows_num 67 | while ( 68 | time.time() - last_changed < convergence_period 69 | and time.time() - start < timeout 70 | ): 71 | new_logical_flows_num = get_number_of_logical_flows() 72 | if new_logical_flows_num != logical_flows_num: 73 | if abs(new_logical_flows_num - logical_flows_converged_num) > 50: 74 | # allow minor fluctuations within 50 logical flows range to not interrupt convergence 75 | last_changed = time.time() 76 | logical_flows_converged_num = new_logical_flows_num 77 | logical_flows_num = new_logical_flows_num 78 | logging.info( 79 | f"{node_name}: logical flows={new_logical_flows_num}, " 80 | f"convergence flows={logical_flows_converged_num}" 81 | ) 82 | else: 83 | new_ovs_flows_num = get_number_of_ovs_flows() 84 | if new_ovs_flows_num != ovs_flows_num: 85 | if abs(new_ovs_flows_num - ovs_flows_converged_num) > 100: 86 | # allow minor fluctuations within 100 OVS flows range to not interrupt convergence 87 | last_changed = time.time() 88 | ovs_flows_converged_num = new_ovs_flows_num 89 | ovs_flows_num = new_ovs_flows_num 90 | logging.info( 91 | f"{node_name}: OVS flows={new_ovs_flows_num}, " 92 | f"convergence flows={ovs_flows_converged_num}" 93 | ) 94 | 95 | time.sleep(poll_interval) 96 | if time.time() - start >= timeout: 97 | timed_out = True 98 | logging.info(f"TIMEOUT: {node_name} {timeout} seconds passed") 99 | return last_changed, ovs_flows_num, timed_out 100 | 101 | 102 | def get_db_data(): 103 | results = {} 104 | for table in ["acl", "port_group", "address_set"]: 105 | output = subprocess.run( 106 | ["ovn-nbctl", "--no-leader-only", "--columns=_uuid", "list", table], 107 | capture_output=True, 108 | text=True, 109 | ) 110 | if len(output.stderr) != 0: 111 | continue 112 | output_lines = output.stdout.splitlines() 113 | results[table] = len(output_lines) // 2 + 1 114 | for table in ["logical_flow"]: 115 | output = subprocess.run( 116 | ["ovn-sbctl", "--no-leader-only", "--columns=_uuid", "list", table], 117 | capture_output=True, 118 | text=True, 119 | ) 120 | if len(output.stderr) != 0: 121 | continue 122 | output_lines = output.stdout.splitlines() 123 | results[table] = len(output_lines) // 2 + 1 124 | return results 125 | 126 | 127 | def is_ovnic(): 128 | output = subprocess.run(["ls", "/var/run/ovn-ic"], capture_output=True, text=True) 129 | return len(output.stdout.splitlines()) != 0 130 | 131 | 132 | def update_rundir(): 133 | output = subprocess.run( 134 | ["mount", "--bind", "/var/run/ovn-ic", "/var/run/ovn"], 135 | capture_output=True, 136 | text=True, 137 | ) 138 | if output.stderr != "": 139 | print("failed to update /var/run/ovn", output.stderr) 140 | return 1 141 | return 0 142 | 143 | 144 | def check_ovn_health(): 145 | ovn_ic = is_ovnic() 146 | concerning_logs = [] 147 | files = {"vswitchd": "/var/log/openvswitch/ovs-vswitchd.log"} 148 | output = subprocess.run(["ls", "/var/log/pods"], capture_output=True, text=True) 149 | for output_line in output.stdout.splitlines(): 150 | if "ovnkube-master" in output_line: 151 | files["northd"] = f"/var/log/pods/{output_line}/northd/0.log" 152 | if "ovnkube-node" in output_line: 153 | files[ 154 | "ovn-controller" 155 | ] = f"/var/log/pods/{output_line}/ovn-controller/0.log" 156 | if ovn_ic: 157 | files["northd"] = f"/var/log/pods/{output_line}/northd/0.log" 158 | for name, file in files.items(): 159 | output = subprocess.run(["cat", file], capture_output=True, text=True) 160 | if len(output.stderr) != 0: 161 | concerning_logs.append(f"failed to open {file}: {output.stderr}") 162 | else: 163 | output_lines = output.stdout.splitlines() 164 | for log_line in output_lines: 165 | if "no response to inactivity probe" in log_line: 166 | s = log_line.split("stderr F ") 167 | if len(s) > 1: 168 | timestamp = s[1] 169 | else: 170 | timestamp = s[0] 171 | timestamp = timestamp.split("|")[0] 172 | format_string = "%Y-%m-%dT%H:%M:%S.%fZ" 173 | datetime_object = datetime.datetime.strptime( 174 | timestamp, format_string 175 | ) 176 | if start_time < datetime_object: 177 | concerning_logs.append(name + ": " + log_line) 178 | return concerning_logs 179 | 180 | 181 | def main(): 182 | global es_server, es_index, start_time 183 | es_server = os.getenv("ES_SERVER") 184 | es_index = os.getenv("ES_INDEX_NETPOL") 185 | node_name = os.getenv("MY_NODE_NAME") 186 | uuid = os.getenv("UUID") 187 | convergence_period = int(os.getenv("CONVERGENCE_PERIOD")) 188 | convergence_timeout = int(os.getenv("CONVERGENCE_TIMEOUT")) 189 | start_time = datetime.datetime.now() 190 | 191 | logging.basicConfig( 192 | format="%(asctime)s %(levelname)-8s %(message)s", 193 | level=logging.INFO, 194 | datefmt="%Y-%m-%d %H:%M:%S", 195 | ) 196 | doc = { 197 | "metricName": "convergence_tracker_info", 198 | "timestamp": datetime.datetime.now(datetime.UTC), 199 | "workload": "network-policy-perf", 200 | "uuid": uuid, 201 | "source_name": node_name, 202 | "convergence_period": convergence_period, 203 | "convergence_timeout": convergence_timeout, 204 | "test_metadata": os.getenv("METADATA"), 205 | } 206 | index_result(doc) 207 | 208 | logging.info( 209 | f"Start openflow-tracker {node_name}, convergence_period {convergence_period}, convergence timeout {convergence_timeout}" 210 | ) 211 | 212 | if is_ovnic(): 213 | if update_rundir() != 0: 214 | sys.exit(1) 215 | stabilize_time, flow_num, timed_out = wait_for_flows_to_stabilize( 216 | 1, convergence_period, convergence_timeout, node_name 217 | ) 218 | stabilize_datetime = datetime.datetime.fromtimestamp(stabilize_time) 219 | nbdb_data = get_db_data() 220 | logging.info( 221 | f"RESULT: time={stabilize_datetime.isoformat(sep=' ', timespec='milliseconds')} {node_name} " 222 | f"finished with {flow_num} flows, nbdb data: {nbdb_data}" 223 | ) 224 | ovn_health_logs = check_ovn_health() 225 | if len(ovn_health_logs) == 0: 226 | logging.info(f"HEALTHCHECK: {node_name} has no problems") 227 | else: 228 | logging.info(f"HEALTHCHECK: {node_name} has concerning logs: {ovn_health_logs}") 229 | 230 | doc = { 231 | "metricName": "convergence_tracker", 232 | "timestamp": datetime.datetime.now(datetime.UTC), 233 | "workload": "network-policy-perf", 234 | "uuid": uuid, 235 | "source_name": node_name, 236 | "convergence_timestamp": stabilize_datetime, 237 | "nbdb": nbdb_data, 238 | "ovs_flows": flow_num, 239 | "unhealthy_logs": ovn_health_logs, 240 | } 241 | index_result(doc) 242 | sys.exit(int(timed_out)) 243 | 244 | 245 | if __name__ == "__main__": 246 | main() 247 | -------------------------------------------------------------------------------- /kube-burner-workload/openshift/openflow-tracker/requirements.txt: -------------------------------------------------------------------------------- 1 | datetime 2 | requests 3 | kubernetes 4 | opensearch-py 5 | -------------------------------------------------------------------------------- /kube-burner-workload/openshift/test_limit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | wait_cleanup () { 4 | IFS=" " read -r -a POD_NAMES <<< "$(oc get pods -n openshift-ovn-kubernetes -l app=ovnkube-node -o jsonpath='{.items[*].metadata.name}')" 5 | # POD_NAMES=($(oc get pods -n openshift-ovn-kubernetes -l app=ovnkube-node -o jsonpath='{.items[*].metadata.name}')) 6 | FLOW_COUNT=0 7 | for POD_NAME in "${POD_NAMES[@]}"; do 8 | POD_FLOW_COUNT=$(oc exec -n openshift-ovn-kubernetes "$POD_NAME" -c ovn-controller -- curl -s "127.0.0.1:29105/metrics"|grep ovs_vswitchd_bridge_flows_total|grep br-int|rev|cut -f1 -d' '|rev) 9 | if [ "$POD_FLOW_COUNT" -gt $FLOW_COUNT ]; then 10 | FLOW_COUNT=$POD_FLOW_COUNT 11 | fi 12 | done 13 | echo "$FLOW_COUNT" 14 | 15 | while [ "$FLOW_COUNT" -ge 10000 ]; do 16 | FLOW_COUNT=0 17 | for POD_NAME in "${POD_NAMES[@]}"; do 18 | POD_FLOW_COUNT=$(oc exec -n openshift-ovn-kubernetes "$POD_NAME" -c ovn-controller -- curl -s "127.0.0.1:29105/metrics"|grep ovs_vswitchd_bridge_flows_total|grep br-int|rev|cut -f1 -d' '|rev) 19 | if [ "$POD_FLOW_COUNT" -gt $FLOW_COUNT ]; then 20 | FLOW_COUNT=$POD_FLOW_COUNT 21 | fi 22 | done 23 | echo "$FLOW_COUNT" 24 | sleep 60 25 | done 26 | echo "shutdown succeeded" 27 | } 28 | 29 | pushd .. 30 | source ./env 31 | NETPOLS_PER_NAMESPACE=50 32 | STEP=50 33 | expectedStatus=0 34 | status=$expectedStatus 35 | while [ $status -eq $expectedStatus ]; do 36 | echo "Network Policies per namespace=$NETPOLS_PER_NAMESPACE" 37 | wait_cleanup 38 | kube-burner init -m ./openshift/metrics.yml -c ./network-policy.yaml -u "https://$(oc get route prometheus-k8s -n openshift-monitoring -o jsonpath="{.spec.host}")" --token="$(oc create token prometheus-k8s -n openshift-monitoring)" 39 | status=$? 40 | if [ $STEP -eq 0 ]; then 41 | echo "One iteration is finished" 42 | exit 0 43 | fi 44 | NETPOLS_PER_NAMESPACE=$((NETPOLS_PER_NAMESPACE + STEP)) 45 | done 46 | popd || exit -------------------------------------------------------------------------------- /kube-burner-workload/ovn-kubernetes/README.md: -------------------------------------------------------------------------------- 1 | ## Running 2 | 3 | 1. Get ovn-kubernetes code from https://github.com/ovn-org/ovn-kubernetes/tree/master and start a KIND cluster with ./contrib/kind.sh 4 | (more details in https://github.com/ovn-org/ovn-kubernetes/blob/master/docs/kind.md). 5 | This should give you a local kubeconfig that can be used in the scale test. 6 | 7 | 2. Follow [network-policy instructions](../README.md#running) to run the workload 8 | 9 | 2.1 Set env file variable PLATFORM=ovn-kubernetes 10 | 11 | 3. Track convergence with `kubectl logs -l app=convergence-tracker -n convergence-tracker-0 -f` 12 | -------------------------------------------------------------------------------- /kube-burner-workload/ovn-kubernetes/convergence_tracker.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: convergence-tracker-{{.Replica}} 5 | labels: 6 | app: convergence-tracker 7 | spec: 8 | topologySpreadConstraints: 9 | - maxSkew: 1 10 | topologyKey: kubernetes.io/hostname 11 | whenUnsatisfiable: DoNotSchedule 12 | labelSelector: 13 | matchLabels: 14 | app: convergence-tracker 15 | volumes: 16 | - name: openvswitch 17 | hostPath: 18 | path: /var/run/openvswitch 19 | - name: host-var-log-ovs 20 | hostPath: 21 | path: /var/log/openvswitch 22 | restartPolicy: Never 23 | containers: 24 | - name: tracker 25 | # image built with the ./openflow-tracker/Dockerfile 26 | image: quay.io/npinaeva/netpol-scale:ovn-kubernetes 27 | command: [ "/bin/bash", "-c", "python openflow-tracker.py"] 28 | imagePullPolicy: Always 29 | volumeMounts: 30 | - name: openvswitch 31 | mountPath: /var/run/openvswitch 32 | - name: openvswitch 33 | mountPath: /var/run/ovn 34 | - name: host-var-log-ovs 35 | mountPath: /var/log/openvswitch 36 | env: 37 | - name: CONVERGENCE_PERIOD 38 | value: "{{.convergence_period}}" 39 | - name: CONVERGENCE_TIMEOUT 40 | value: "{{.convergence_timeout}}" 41 | - name: ES_SERVER 42 | value: {{.es_server}} 43 | - name: ES_INDEX_NETPOL 44 | value: {{.es_index}} 45 | - name: UUID 46 | value: {{.UUID}} 47 | - name: MY_NODE_NAME 48 | valueFrom: 49 | fieldRef: 50 | fieldPath: spec.nodeName 51 | -------------------------------------------------------------------------------- /kube-burner-workload/ovn-kubernetes/openflow-tracker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:slim 2 | RUN apt update && \ 3 | apt install -y curl openvswitch-switch ovn-central 4 | COPY openflow-tracker.py openflow-tracker.py 5 | COPY ./requirements.txt requirements.txt 6 | RUN pip install -r requirements.txt 7 | -------------------------------------------------------------------------------- /kube-burner-workload/ovn-kubernetes/openflow-tracker/openflow-tracker.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | import os 4 | import sys 5 | import time 6 | import subprocess 7 | 8 | 9 | def get_number_of_flows(): 10 | try: 11 | output = subprocess.run( 12 | ["ovs-ofctl", "dump-aggregate", "br-int"], capture_output=True, text=True 13 | ) 14 | result = output.stdout 15 | return int(result.split("flow_count=")[1]) 16 | except Exception as e: 17 | logging.info(f"Failed getting flows count: {e}") 18 | return 0 19 | 20 | 21 | # poll_interval in seconds, float 22 | # convergence_period in seconds, for how long number of flows shouldn't change to consider it stable 23 | # convergence_timeout in seconds, for how long number to wait for stabilisation before timing out 24 | def wait_for_flows_to_stabilize( 25 | poll_interval, convergence_period, convergence_timeout, node_name 26 | ): 27 | timed_out = False 28 | timeout = convergence_timeout + convergence_period 29 | start = time.time() 30 | last_changed = time.time() 31 | flows_num = get_number_of_flows() 32 | while ( 33 | time.time() - last_changed < convergence_period 34 | and time.time() - start < timeout 35 | ): 36 | new_flows_num = get_number_of_flows() 37 | if new_flows_num != flows_num: 38 | flows_num = new_flows_num 39 | last_changed = time.time() 40 | logging.info(f"{node_name}: {new_flows_num}") 41 | 42 | time.sleep(poll_interval) 43 | if time.time() - start >= timeout: 44 | timed_out = True 45 | logging.info(f"TIMEOUT: {node_name} {timeout} seconds passed") 46 | return last_changed, flows_num, timed_out 47 | 48 | 49 | def get_db_data(): 50 | results = {} 51 | for table in ["acl", "port_group", "address_set"]: 52 | output = subprocess.run( 53 | ["ovn-nbctl", "--no-leader-only", "--columns=_uuid", "list", table], 54 | capture_output=True, 55 | text=True, 56 | ) 57 | if len(output.stderr) != 0: 58 | continue 59 | output_lines = output.stdout.splitlines() 60 | results[table] = len(output_lines) // 2 + 1 61 | for table in ["logical_flow"]: 62 | output = subprocess.run( 63 | ["ovn-sbctl", "--no-leader-only", "--columns=_uuid", "list", table], 64 | capture_output=True, 65 | text=True, 66 | ) 67 | if len(output.stderr) != 0: 68 | continue 69 | output_lines = output.stdout.splitlines() 70 | results[table] = len(output_lines) // 2 + 1 71 | return results 72 | 73 | 74 | def check_ovn_health(): 75 | concerning_logs = [] 76 | for file in [ 77 | "/var/log/openvswitch/ovn-controller.log", 78 | "/var/log/openvswitch/ovs-vswitchd.log", 79 | "/var/log/openvswitch/ovn-northd.log", 80 | ]: 81 | output = subprocess.run(["cat", file], capture_output=True, text=True) 82 | if len(output.stderr) != 0: 83 | continue 84 | else: 85 | output_lines = output.stdout.splitlines() 86 | for log_line in output_lines: 87 | if "no response to inactivity probe" in log_line: 88 | concerning_logs.append(log_line) 89 | return concerning_logs 90 | 91 | 92 | def main(): 93 | node_name = os.getenv("MY_NODE_NAME") 94 | convergence_period = int(os.getenv("CONVERGENCE_PERIOD")) 95 | convergence_timeout = int(os.getenv("CONVERGENCE_TIMEOUT")) 96 | 97 | logging.basicConfig( 98 | format="%(asctime)s %(levelname)-8s %(message)s", 99 | level=logging.INFO, 100 | datefmt="%Y-%m-%d %H:%M:%S", 101 | ) 102 | 103 | logging.info( 104 | f"Start openflow-tracker {node_name}, convergence_period {convergence_period}, convergence timeout {convergence_timeout}" 105 | ) 106 | stabilize_time, flow_num, timed_out = wait_for_flows_to_stabilize( 107 | 1, convergence_period, convergence_timeout, node_name 108 | ) 109 | stabilize_datetime = datetime.datetime.fromtimestamp(stabilize_time) 110 | nbdb_data = get_db_data() 111 | logging.info( 112 | f"RESULT: time={stabilize_datetime.isoformat(sep=' ', timespec='milliseconds')} {node_name} " 113 | f"finished with {flow_num} flows, nbdb data: {nbdb_data}" 114 | ) 115 | ovn_health_logs = check_ovn_health() 116 | if len(ovn_health_logs) == 0: 117 | logging.info(f"HEALTHCHECK: {node_name} has no problems") 118 | else: 119 | logging.info(f"HEALTHCHECK: {node_name} has concerning logs: {ovn_health_logs}") 120 | sys.exit(int(timed_out)) 121 | 122 | 123 | if __name__ == "__main__": 124 | main() 125 | -------------------------------------------------------------------------------- /kube-burner-workload/ovn-kubernetes/openflow-tracker/requirements.txt: -------------------------------------------------------------------------------- 1 | datetime 2 | requests 3 | kubernetes 4 | opensearch-py 5 | -------------------------------------------------------------------------------- /kube-burner-workload/pod.yml: -------------------------------------------------------------------------------- 1 | {{- $myDict := dict "test-pod" "true" "num" (toString .Replica) }} 2 | {{- $replicas := int .Replica }} 3 | {{- range $i, $e := until $replicas }} 4 | {{- $num := add $i 1 }} 5 | {{- $_ := set $myDict (toString $num) "true" }} 6 | {{- end }} 7 | {{- $labels := toJson $myDict }} 8 | apiVersion: v1 9 | kind: Pod 10 | metadata: 11 | name: test-pod-{{.Replica}} 12 | labels: {{$labels}} 13 | spec: 14 | affinity: 15 | podAntiAffinity: 16 | preferredDuringSchedulingIgnoredDuringExecution: 17 | - weight: 10 18 | podAffinityTerm: 19 | labelSelector: 20 | matchLabels: 21 | test-pod: "true" 22 | namespaceSelector: {} 23 | topologyKey: kubernetes.io/hostname 24 | - weight: 10 25 | podAffinityTerm: 26 | labelSelector: 27 | matchLabels: 28 | num: "{{.Replica}}" 29 | namespaceSelector: {} 30 | topologyKey: kubernetes.io/hostname 31 | # nodeAffinity: 32 | # requiredDuringSchedulingIgnoredDuringExecution: 33 | # nodeSelectorTerms: 34 | # - matchExpressions: 35 | # - key: node-role.kubernetes.io/control-plane 36 | # operator: DoesNotExist 37 | containers: 38 | - name: sleeper 39 | args: 40 | - sleep 41 | - infinity 42 | image: registry.k8s.io/pause:3.1 43 | imagePullPolicy: IfNotPresent 44 | ports: 45 | - containerPort: 8080 46 | protocol: TCP 47 | -------------------------------------------------------------------------------- /yaml-analysis/README.md: -------------------------------------------------------------------------------- 1 | ## Build and run 2 | 3 | To run this tool just build a binary with 4 | `go build .` 5 | and you will get `netpol_analysis` binary. To see existing docs, use `netpol_analysis -h` 6 | 7 | ## Get statistics for given yamls 8 | `-print-graphs` option can display statistics about network policies, given yaml output of 9 | `kubectl get pods,namespace,networkpolicies -A -oyaml` 10 | 11 | ```shell 12 | ./netpol_analysis -print-graphs -yaml="path/to/file" 13 | Found: 5413 Pods, 604 Namespaces, 13678 NetworkPolicies 14 | Empty netpols: 3559, peers: 15423, deny-only netpols 495 15 | Average network policy profile: local pods=13.143703241895262 16 | cidrs=0.5431498411463399, single ports=0.8810941271118262, port ranges=0.0033789219629927593 17 | pod selectors=0.6241327886922129, peer pods=35.43462206776716, single ports=0.3548001737619461, port ranges=0.00021720243266724586 18 | 19 | Median network policy profile: local pods=6 20 | cidrs=1, single ports=1, port ranges=0 21 | pod selectors=1, peer pods=2, single ports=0, port ranges=0 22 | 23 | Local pods distribution 24 | 25 | 1 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 1436.0 26 | 2 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 559.0 27 | 3 pod(s): ▇▇ 54.0 28 | 4 pod(s): ▇▇▇▇▇▇▇▇▇ 243.0 29 | 5 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 2512.0 30 | 6 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 496.0 31 | 7 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 927.0 32 | 8 pod(s): ▇▇▇▇▇▇▇ 196.0 33 | 9 pod(s): ▇▇▇▇▇▇▇▇▇ 240.0 34 | 10 pod(s): ▇▇▇▇▇▇▇▇ 211.0 35 | 11 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 572.0 36 | 12 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 482.0 37 | 13 pod(s): ▇▇ 60.0 38 | 14 pod(s): ▇ 33.0 39 | 15 pod(s): ▇ 47.0 40 | 16 pod(s): ▇▇ 57.0 41 | 17 pod(s): ▇▇▇ 100.0 42 | 18 pod(s): ▇ 39.0 43 | 19 pod(s): ▇▇▇ 84.0 44 | 20 pod(s): ▇▇▇ 99.0 45 | 21 pod(s): ▇▇▇▇ 116.0 46 | 22 pod(s): ▇▇▇▇▇ 136.0 47 | 23 pod(s): ▇ 30.0 48 | 24 pod(s): ▇ 50.0 49 | 25 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇ 339.0 50 | 26 pod(s): ▇ 41.0 51 | 27 pod(s): 9.0 52 | 28 pod(s): 2.0 53 | 33 pod(s): 2.0 54 | 34 pod(s): 2.0 55 | 36 pod(s): 1.0 56 | 38 pod(s): 2.0 57 | 53 pod(s): 1.0 58 | 58 pod(s): 2.0 59 | 80 pod(s): 1.0 60 | 81 pod(s): 1.0 61 | 87 pod(s): 2.0 62 | 127 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 431.0 63 | 154 pod(s): 9.0 64 | Total: 9624 65 | 66 | CIDR peers distribution 67 | 68 | 0 CIDR(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 9208.0 69 | 1 CIDR(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 5521.0 70 | 2 CIDR(s): 72.0 71 | 3 CIDR(s): ▇▇▇ 346.0 72 | 4 CIDR(s): 7.0 73 | 5 CIDR(s): 1.0 74 | 6 CIDR(s): ▇▇ 263.0 75 | 7 CIDR(s): 2.0 76 | 14 CIDR(s): 2.0 77 | 21 CIDR(s): 1.0 78 | Total: 15423 79 | 80 | Pod selector peers distribution 81 | 82 | 0 pod selector(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 6215.0 83 | 1 pod selector(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 8790.0 84 | 2 pod selector(s): ▇▇▇▇ 418.0 85 | Total: 15423 86 | 87 | Peer pods distribution 88 | 89 | 1 peer pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇ 590.0 90 | 2 peer pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 4621.0 91 | 3 peer pod(s): ▇▇▇▇▇▇▇▇ 393.0 92 | 4 peer pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 649.0 93 | 5 peer pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 868.0 94 | 6 peer pod(s): ▇▇▇▇▇▇▇▇▇ 433.0 95 | 7 peer pod(s): ▇▇▇▇▇▇▇▇ 384.0 96 | 8 peer pod(s): ▇ 62.0 97 | 9 peer pod(s): ▇ 63.0 98 | 10 peer pod(s): ▇ 75.0 99 | 11 peer pod(s): 18.0 100 | 12 peer pod(s): ▇ 75.0 101 | 13 peer pod(s): ▇▇▇▇▇▇▇ 346.0 102 | 14 peer pod(s): 10.0 103 | 15 peer pod(s): 10.0 104 | 16 peer pod(s): 12.0 105 | 17 peer pod(s): 22.0 106 | 18 peer pod(s): 10.0 107 | 19 peer pod(s): 20.0 108 | 20 peer pod(s): 25.0 109 | 21 peer pod(s): 27.0 110 | 22 peer pod(s): 33.0 111 | 23 peer pod(s): 10.0 112 | 24 peer pod(s): 15.0 113 | 25 peer pod(s): 12.0 114 | 26 peer pod(s): 10.0 115 | 27 peer pod(s): 3.0 116 | 28 peer pod(s): 1.0 117 | 34 peer pod(s): 1.0 118 | 36 peer pod(s): 1.0 119 | 42 peer pod(s): 6.0 120 | 58 peer pod(s): 1.0 121 | 80 peer pod(s): 40.0 122 | 94 peer pod(s): 1.0 123 | 127 peer pod(s): ▇▇▇▇▇▇ 288.0 124 | 154 peer pod(s): 3.0 125 | 3578 peer pod(s): ▇ 70.0 126 | Total: 9208 127 | 128 | Single port peers distribution (CIDRs) 129 | 130 | 0 single port(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 1147.0 131 | 1 single port(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 4710.0 132 | 2 single port(s): ▇▇▇▇▇▇▇ 341.0 133 | 4 single port(s): 1.0 134 | 5 single port(s): 16.0 135 | Total: 6215 136 | 137 | Single port peers distribution (pod selectors) 138 | 139 | 0 single port(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 6370.0 140 | 1 single port(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 2417.0 141 | 2 single port(s): ▇▇▇▇▇▇ 416.0 142 | 3 single port(s): 3.0 143 | 4 single port(s): 1.0 144 | 5 single port(s): 1.0 145 | Total: 9208 146 | 147 | Port range peers distribution (CIDRs) 148 | 149 | 0 port ranges(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 6194.0 150 | 1 port ranges(s): 21.0 151 | Total: 6215 152 | 153 | Port range peers distribution (pod selectors) 154 | 155 | 0 port ranges(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 9206.0 156 | 1 port ranges(s): 2.0 157 | Total: 9208 158 | ``` 159 | 160 | To see which NetworkPolicies are empty (don't affect any connections) use `-print-empty-np` flag. 161 | 162 | ## Use scale profile results to predict if a workload can be handled 163 | 164 | ### Minimal profiles to cover all possible configurations 165 | 166 | We will use testing profile notation from the [SCALE_PROFILES](../kube-burner-workload/SCALE_PROFILES.md) 167 | `------`. 168 | 169 | We have 2 peers types: `cidr` and `pod_selector`, they may be joined in one profile, or split into separate profiles, 170 | but we need at least 1 profile that has non-zero value for these fields. 171 | 172 | For every peer type we need at least one profile with 0 single port and 0 port range, and at least one profile 173 | with non-zero single port and non-zero port ranges. 174 | 175 | The smallest profiles set to cover everything is 176 | - (1-1-0-0-1-1-1) - `cidr` + `pod_selector`, no ports 177 | - (1-1-1-1-1-1-1) - `cidr` + `pod_selector` + 1 single port + 1 port range 178 | 179 | ### Generating scale profiles results 180 | 181 | Scale profiles files can be generated using iterative test results tracked by [helper spreadsheet](https://docs.google.com/spreadsheets/d/1Kq1w8c8Z_wlhBOb_EID2nhvmwEi8H6pSxvtpDcbf-1M/edit?usp=sharing). 182 | To generate the file, put the name of a tab which contains test results [here](https://docs.google.com/spreadsheets/d/1Kq1w8c8Z_wlhBOb_EID2nhvmwEi8H6pSxvtpDcbf-1M/edit#gid=285018284&range=B1), 183 | it will populate the sheet with the results marked as ["BEST RESULT"](https://docs.google.com/spreadsheets/d/1Kq1w8c8Z_wlhBOb_EID2nhvmwEi8H6pSxvtpDcbf-1M/edit#gid=16759354&range=X:X)=true from the linked tab. 184 | To get a file you can use with `netpol_analysis` script (similar to the example [./profiles_example.csv](./profiles_example.csv)) 185 | go to the tab [export](https://docs.google.com/spreadsheets/d/1Kq1w8c8Z_wlhBOb_EID2nhvmwEi8H6pSxvtpDcbf-1M/edit#gid=1319766064) and save it as `csv`. 186 | You can also fill a similar document manually. 187 | 188 | Using `-perf-profiles` flag, you will get a **safe** estimation for a given set of network policies via `-yaml` option 189 | and some statistics about the heaviest network policies for a given set of performance profiles. 190 | 191 | It uses a concept of "weight" for a network policy to reflect the scale impact of a given policy. Cluster can only 192 | handle network policies with weight <= 1. Considering performance profile says we can handle 100 network policies with a 193 | given scale profile, then one network policy weighs 1/100=0.01. 194 | 195 | #### Safe estimation 196 | 197 | The estimation is safe, which means if the workload is accepted (weight < 1) it guarantees the workload will work 198 | based on the given profiles data. When the weight is greater than 1, it doesn't necessarily mean that the workload 199 | won't work, because the approximation adds some overhead in trying to simplify generic network policy to a set of given profiles. 200 | 201 | ```shell 202 | ./netpol_analysis -yaml="path/to/file" -perf-profiles=./profiles_example.csv 203 | Found: 5413 Pods, 604 Namespaces, 13678 NetworkPolicies 204 | Empty netpols: 3559, peers: 15423, deny-only netpols 495 205 | Matched 9624 netpols with given profiles 206 | Final Weight=3.639694444444388, if < 1, the workload is accepted 207 | 208 | 5 heaviest netpols are (profile idx start with 1): 209 | namespace-1/netpol-1 210 | config: localpods=127, rules: 211 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 212 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}] 213 | matched profiles: 214 | {idx:11 copies:936 weight:0.1872} 215 | {idx:5 copies:127 weight:0.0015875000000000002} 216 | weight: 0.1887875 217 | namespace-1/netpol-2 218 | config: localpods=15, rules: 219 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 220 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:2}] 221 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}] 222 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 223 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}] 224 | matched profiles: 225 | {idx:11 copies:144 weight:0.028800000000000003} 226 | {idx:5 copies:30 weight:0.000375} 227 | {idx:5 copies:15 weight:0.0001875} 228 | {idx:11 copies:144 weight:0.028800000000000003} 229 | {idx:5 copies:15 weight:0.0001875} 230 | weight: 0.058350000000000006 231 | namespace-2/netpol-1 232 | config: localpods=14, rules: 233 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 234 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:2}] 235 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}] 236 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 237 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}] 238 | matched profiles: 239 | {idx:11 copies:144 weight:0.028800000000000003} 240 | {idx:5 copies:28 weight:0.00035} 241 | {idx:5 copies:14 weight:0.000175} 242 | {idx:11 copies:144 weight:0.028800000000000003} 243 | {idx:5 copies:14 weight:0.000175} 244 | weight: 0.05830000000000001 245 | namespace-3/netpol-4 246 | config: localpods=12, rules: 247 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 248 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}] 249 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 250 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:2}] 251 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}] 252 | matched profiles: 253 | {idx:7 copies:864 weight:0.028800000000000003} 254 | {idx:5 copies:12 weight:0.00015000000000000001} 255 | {idx:7 copies:864 weight:0.028800000000000003} 256 | {idx:5 copies:24 weight:0.00030000000000000003} 257 | {idx:5 copies:12 weight:0.00015000000000000001} 258 | weight: 0.05820000000000001 259 | namespace-1/netpol-5 260 | config: localpods=33, rules: 261 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 262 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}] 263 | matched profiles: 264 | {idx:11 copies:288 weight:0.057600000000000005} 265 | {idx:5 copies:33 weight:0.0004125} 266 | weight: 0.05801250000000001 267 | 268 | Initial 15423 peers were split into 174057 profiles. 269 | Used profiles statistics (number of copies) 270 | 271 | 1st profile: 326.0 272 | 2nd profile: ▇▇ 1943.0 273 | 3rd profile: 1.0 274 | 4th profile: 840.0 275 | 5th profile: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 94519.0 276 | 6th profile: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 30789.0 277 | 7th profile: ▇▇▇▇▇▇▇▇▇▇▇▇▇ 12959.0 278 | 8th profile: ▇▇▇▇▇▇▇▇▇▇▇▇ 11663.0 279 | 9th profile: ▇ 1456.0 280 | 10th profile: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 14212.0 281 | 11th profile: ▇▇▇▇ 4373.0 282 | 15th profile: 5.0 283 | 16th profile: ▇ 971.0 284 | 285 | 5th profile (5436 peers) stats: 286 | 1st heaviest weight: 0.00385000 used by 1 peer(s) 287 | localpods=154 288 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:2}] 289 | 2nd heaviest weight: 0.00192500 used by 6 peer(s) 290 | localpods=154 291 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}] 292 | localpods=154 293 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}] 294 | localpods=154 295 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:2}] 296 | localpods=154 297 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:2}] 298 | localpods=154 299 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:2}] 300 | 3rd heaviest weight: 0.00158750 used by 418 peer(s) 301 | localpods=127 302 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}] 303 | localpods=127 304 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:2}] 305 | localpods=127 306 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:2}] 307 | localpods=127 308 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:2}] 309 | localpods=127 310 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:2}] 311 | 4th heaviest weight: 0.00072500 used by 1 peer(s) 312 | localpods=58 313 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}] 314 | 5th heaviest weight: 0.00062500 used by 1 peer(s) 315 | localpods=25 316 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:2}] 317 | 6th profile (2118 peers) stats: 318 | 1st heaviest weight: 0.00200000 used by 1 peer(s) 319 | localpods=80 320 | ports=[single: 2, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:1}] 321 | 2nd heaviest weight: 0.00065000 used by 8 peer(s) 322 | localpods=26 323 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:6}] 324 | localpods=26 325 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:4}] 326 | localpods=26 327 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:5}] 328 | localpods=26 329 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:4}] 330 | localpods=26 331 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:6}] 332 | 3rd heaviest weight: 0.00062500 used by 174 peer(s) 333 | localpods=25 334 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:6}] 335 | localpods=25 336 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:4}] 337 | localpods=25 338 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:5}] 339 | localpods=25 340 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:4}] 341 | localpods=25 342 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:6}] 343 | 4th heaviest weight: 0.00060000 used by 12 peer(s) 344 | localpods=24 345 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:6}] 346 | localpods=24 347 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:4}] 348 | localpods=24 349 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:5}] 350 | localpods=24 351 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:4}] 352 | localpods=24 353 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:6}] 354 | 5th heaviest weight: 0.00057500 used by 4 peer(s) 355 | localpods=23 356 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:6}] 357 | localpods=23 358 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:4}] 359 | localpods=23 360 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:5}] 361 | localpods=23 362 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:4}] 363 | 10th profile (4086 peers) stats: 364 | 1st heaviest weight: 0.00546000 used by 1 peer(s) 365 | localpods=127 366 | ports=[single: 1, ranges: 0], peers=[{cidrs:21 podSelectors:0 peerPods:0}] 367 | 2nd heaviest weight: 0.00384000 used by 1 peer(s) 368 | localpods=154 369 | ports=[single: 4, ranges: 0], peers=[{cidrs:3 podSelectors:0 peerPods:0}] 370 | 3rd heaviest weight: 0.00364000 used by 2 peer(s) 371 | localpods=127 372 | ports=[single: 1, ranges: 0], peers=[{cidrs:14 podSelectors:0 peerPods:0}] 373 | localpods=127 374 | ports=[single: 1, ranges: 0], peers=[{cidrs:14 podSelectors:0 peerPods:0}] 375 | 4th heaviest weight: 0.00192000 used by 1 peer(s) 376 | localpods=154 377 | ports=[single: 2, ranges: 0], peers=[{cidrs:3 podSelectors:0 peerPods:0}] 378 | 5th heaviest weight: 0.00130000 used by 5 peer(s) 379 | localpods=127 380 | ports=[single: 5, ranges: 0], peers=[{cidrs:1 podSelectors:0 peerPods:0}] 381 | localpods=127 382 | ports=[single: 5, ranges: 0], peers=[{cidrs:1 podSelectors:0 peerPods:0}] 383 | localpods=127 384 | ports=[single: 5, ranges: 0], peers=[{cidrs:1 podSelectors:0 peerPods:0}] 385 | localpods=127 386 | ports=[single: 1, ranges: 0], peers=[{cidrs:5 podSelectors:0 peerPods:0}] 387 | localpods=127 388 | ports=[single: 5, ranges: 0], peers=[{cidrs:1 podSelectors:0 peerPods:0}] 389 | 7th profile (469 peers) stats: 390 | 1st heaviest weight: 0.02880000 used by 2 peer(s) 391 | localpods=12 392 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 393 | localpods=12 394 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 395 | 2nd heaviest weight: 0.01440000 used by 1 peer(s) 396 | localpods=6 397 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 398 | 3rd heaviest weight: 0.01200000 used by 1 peer(s) 399 | localpods=5 400 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 401 | 4th heaviest weight: 0.00960000 used by 3 peer(s) 402 | localpods=4 403 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 404 | localpods=4 405 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 406 | localpods=4 407 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 408 | 5th heaviest weight: 0.00720000 used by 9 peer(s) 409 | localpods=3 410 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 411 | localpods=3 412 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 413 | localpods=3 414 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 415 | localpods=3 416 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 417 | localpods=3 418 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 419 | 8th profile (719 peers) stats: 420 | 1st heaviest weight: 0.04233333 used by 1 peer(s) 421 | localpods=127 422 | ports=[single: 5, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:127}] 423 | 2nd heaviest weight: 0.02540000 used by 1 peer(s) 424 | localpods=127 425 | ports=[single: 3, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:127}] 426 | 3rd heaviest weight: 0.01693333 used by 2 peer(s) 427 | localpods=127 428 | ports=[single: 2, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:127}] 429 | localpods=127 430 | ports=[single: 2, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:127}] 431 | 4th heaviest weight: 0.01270000 used by 2 peer(s) 432 | localpods=127 433 | ports=[single: 3, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:13}] 434 | localpods=127 435 | ports=[single: 3, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:13}] 436 | 5th heaviest weight: 0.00846667 used by 5 peer(s) 437 | localpods=127 438 | ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:127}] 439 | localpods=127 440 | ports=[single: 2, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:13}] 441 | localpods=127 442 | ports=[single: 2, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:13}] 443 | localpods=127 444 | ports=[single: 2, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:13}] 445 | localpods=127 446 | ports=[single: 2, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:94}] 447 | 11th profile (464 peers) stats: 448 | 1st heaviest weight: 0.18720000 used by 1 peer(s) 449 | localpods=127 450 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 451 | 2nd heaviest weight: 0.05760000 used by 1 peer(s) 452 | localpods=33 453 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 454 | 3rd heaviest weight: 0.04320000 used by 1 peer(s) 455 | localpods=27 456 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 457 | 4th heaviest weight: 0.02880000 used by 7 peer(s) 458 | localpods=14 459 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 460 | localpods=14 461 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 462 | localpods=13 463 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 464 | localpods=17 465 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 466 | localpods=17 467 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 468 | 5th heaviest weight: 0.01440000 used by 8 peer(s) 469 | localpods=7 470 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 471 | localpods=7 472 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 473 | localpods=9 474 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 475 | localpods=9 476 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 477 | localpods=10 478 | ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}] 479 | 480 | ``` 481 | 482 | You can adjust the number of heaviest network policies to print with `-print-heavy-np` flag (default 5). 483 | 484 | ### Most common value ranges 485 | 486 | - SINGLE_PORTS = 0-10 487 | - PORT_RANGE = 0-5 488 | - LOCAL_PODS = 1-250 (max pods pwe namespace) 489 | - CIDRS = 1-10 490 | - POD_SELECTORS = 1-10 491 | - selected pods = PEER_PODS*PEER_NAMESPACES = 1-3500 (all pods in the cluster) 492 | -------------------------------------------------------------------------------- /yaml-analysis/analyze.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | 7 | v1 "k8s.io/api/core/v1" 8 | networkingv1 "k8s.io/api/networking/v1" 9 | ) 10 | 11 | // findClosestProfile returns profilesMatch with minimal weight for a given netpolConfig and a set of profiles. 12 | // It also updates stats for a given netpolConfig. 13 | func findClosestProfile(npConfig *netpolConfig, existingProfiles []*perfProfile, stat *stats) (matchedProfiles profilesMatch, emptyPol bool) { 14 | if npConfig.localPods == 0 || len(npConfig.gressRules) == 0 { 15 | // that policy doesn't do anything 16 | emptyPol = true 17 | return 18 | } 19 | stat.localPods[npConfig.localPods] += 1 20 | // 2 local pods <= 2 netpol * 1 local pod 21 | // 2 pods selectors <= 2 netpol * 1 pods selector (gress rules) 22 | // 2 pods selected for a peer <= 2 peers * 1 pod 23 | // 2 CIDRs <= 2 peers with 1 cidr 24 | // 2 ports <= 2 peers with 1 port 25 | // same for ranges 26 | // CIDRs and pod selectors may be split into different profiles 27 | 28 | for _, peer := range npConfig.gressRules { 29 | stat.peersCounter += 1 30 | stat.singlePorts.Increment(peer.singlePorts, peer.cidrs > 0, peer.podSelectors > 0) 31 | stat.portRanges.Increment(peer.portRanges, peer.cidrs > 0, peer.podSelectors > 0) 32 | stat.cidrs[peer.cidrs] += 1 33 | stat.podSelectors[peer.podSelectors] += 1 34 | if peer.podSelectors != 0 { 35 | stat.peerPods[peer.peerPods] += 1 36 | } 37 | 38 | if len(existingProfiles) > 0 { 39 | // network policy may be split into CIDR-only and pod-selector-only profiles or 40 | // be fully Matched by one profile 41 | fullProfile := &profileMatch{} 42 | cidrProfile := &profileMatch{} 43 | podSelProfile := &profileMatch{} 44 | for idx, profile := range existingProfiles { 45 | // find the number of copies needed to match given peer 46 | copiesFull, copiesCIDR, copiesPodSel := matchProfile(profile, peer) 47 | if peer.cidrs == 0 && copiesPodSel != 0 { 48 | // if peer doesn't have cidrs, then podSelector match is full match 49 | copiesFull = copiesPodSel 50 | } 51 | if peer.podSelectors == 0 && copiesCIDR != 0 { 52 | // if peer doesn't have podSelectors, then CIDR match is full match 53 | copiesFull = copiesCIDR 54 | } 55 | if debug { 56 | fmt.Printf("DEBUG: matchProfile for %+v localpods %v %+v is %v %v %v\n", profile, npConfig.localPods, peer, copiesFull, copiesCIDR, copiesPodSel) 57 | } 58 | // check if current profile match has less weight and update running minimum 59 | updateMinimalMatch(fullProfile, npConfig.localPods, copiesFull, idx, profile) 60 | updateMinimalMatch(cidrProfile, npConfig.localPods, copiesCIDR, idx, profile) 61 | updateMinimalMatch(podSelProfile, npConfig.localPods, copiesPodSel, idx, profile) 62 | } 63 | 64 | // if network policy was split into CIDR-only and pod-selector-only profiles, the final weight 65 | // needs to be summarized 66 | combinedWeight := cidrProfile.weight + podSelProfile.weight 67 | // compare and accumulate, check for no match 68 | if (cidrProfile.copies == 0 || podSelProfile.copies == 0) && fullProfile.copies == 0 { 69 | // no match was found 70 | matchedProfiles = nil 71 | return 72 | } 73 | result := []*profileMatch{} 74 | if fullProfile.copies != 0 && fullProfile.weight <= combinedWeight { 75 | // use full match 76 | result = append(result, fullProfile) 77 | } else { 78 | // use cidr + selector 79 | result = append(result, cidrProfile, podSelProfile) 80 | } 81 | matchedProfiles = append(matchedProfiles, result...) 82 | 83 | for _, profile := range result { 84 | if _, ok := stat.profilesToNetpols[profile.idx]; !ok { 85 | stat.profilesToNetpols[profile.idx] = map[float64][]*gressWithLocalPods{} 86 | } 87 | stat.profilesToNetpols[profile.idx][profile.weight] = append(stat.profilesToNetpols[profile.idx][profile.weight], 88 | &gressWithLocalPods{peer, npConfig.localPods}) 89 | } 90 | } 91 | } 92 | if debug { 93 | fmt.Printf("matched %v profiles:\n", len(matchedProfiles)) 94 | matchedProfiles.print("") 95 | } 96 | return 97 | } 98 | 99 | // updateMinimalMatch compares current match with minimal weight and updates it is newProfile's weight is less. 100 | func updateMinimalMatch(currentMin *profileMatch, localPods int, newCopies, newIdx int, newProfile *perfProfile) { 101 | localPodsMultiplier := topDiv(localPods, newProfile.localPods) 102 | newCopies = newCopies * localPodsMultiplier 103 | newWeight := float64(newCopies) * newProfile.weight 104 | if newCopies > 0 && (newWeight < currentMin.weight || currentMin.copies == 0) { 105 | currentMin.copies = newCopies 106 | currentMin.weight = newWeight 107 | currentMin.idx = newIdx 108 | } 109 | } 110 | 111 | func matchProfile(profile *perfProfile, peer *gressRule) (copiesFull, copiesCIDR, copiesSel int) { 112 | // check if ports config is correct 113 | // TODO may be improved to split profiles for single ports and port ranges in a similar way as 114 | // cidrs and pod selectors are split 115 | if peer.singlePorts != 0 && profile.singlePorts == 0 || peer.portRanges != 0 && profile.portRanges == 0 || 116 | (peer.singlePorts == 0 && peer.portRanges == 0 && (profile.singlePorts != 0 || profile.portRanges != 0)) { 117 | //fmt.Printf("ports config doesn't match\n") 118 | return 119 | } 120 | 121 | // can do full match 122 | portCopies := maxInt(topDiv(peer.singlePorts, profile.singlePorts), topDiv(peer.portRanges, profile.portRanges)) 123 | selectorMul := 0 124 | cidrMul := 0 125 | if peer.podSelectors > 0 && profile.podSelectors > 0 { 126 | selectorMul = topDiv(peer.podSelectors, profile.podSelectors) 127 | selectorMul *= topDiv(peer.peerPods, profile.peerPods) 128 | } 129 | if peer.cidrs > 0 && profile.CIDRs > 0 { 130 | cidrMul = topDiv(peer.cidrs, profile.CIDRs) 131 | } 132 | copiesFull = portCopies * selectorMul * cidrMul 133 | copiesSel = portCopies * selectorMul 134 | copiesCIDR = portCopies * cidrMul 135 | return 136 | } 137 | 138 | func analyze(netpolList []*networkingv1.NetworkPolicy, existingProfiles []*perfProfile, countSelected podsCounter) *stats { 139 | stat := newStats() 140 | // log every 10% progress 141 | logMul := len(netpolList) / 10 142 | nextLog := logMul 143 | if len(netpolList) < 500 { 144 | // don't log if there are not many netpols 145 | nextLog = -1 146 | } 147 | for i, netpol := range netpolList { 148 | if i == nextLog { 149 | fmt.Printf("INFO: %v Network Policies handled\n", i) 150 | nextLog += logMul 151 | } 152 | npConfig := getNetpolConfig(netpol, countSelected) 153 | matchedProfiles, emtyPol := findClosestProfile(npConfig, existingProfiles, stat) 154 | if emtyPol { 155 | if len(netpol.Spec.Egress) == 0 && len(netpol.Spec.Ingress) == 0 { 156 | stat.noPeersNetpols[netpol.Namespace] = append(stat.noPeersNetpols[netpol.Namespace], netpol.Name) 157 | stat.noPeersCounter += 1 158 | } else { 159 | stat.emptyNetpols[netpol.Namespace] = append(stat.emptyNetpols[netpol.Namespace], netpol.Name) 160 | stat.emptyCounter += 1 161 | } 162 | } else if len(existingProfiles) > 0 { 163 | if len(matchedProfiles) == 0 { 164 | fmt.Printf("ERROR: Closest profile for policy %s/%s not found\n", netpol.Namespace, netpol.Name) 165 | npConfig.print("") 166 | } else { 167 | stat.matchedNetpols += 1 168 | stat.weights = append(stat.weights, &netpolWeight{npConfig, matchedProfiles, matchedProfiles.weight(), netpol.Namespace + "/" + netpol.Name}) 169 | } 170 | } 171 | } 172 | return stat 173 | } 174 | 175 | var debug bool 176 | 177 | func main() { 178 | filePath := flag.String("yaml", "", "Required. Path to the yaml output of \"kubectl get pods,namespace,networkpolicies -A -oyaml\"") 179 | printEmptyNetpols := flag.Bool("print-empty-np", false, "Print empty network policies that don't have any effect.\n"+ 180 | "It may be useful to delete them if they are not needed.") 181 | printGraphs := flag.Bool("print-graphs", false, "Print statistics for netpol parameters.\n"+ 182 | "It may help you understand how network policies from a given file are configured, and which performance profiles will "+ 183 | "suit this workload the best.") 184 | profilesPath := flag.String("perf-profiles", "", "Path to the cvs-formatted test results.\n"+ 185 | "Expected data format: local_pods, gress_rules, single_ports, port_ranges, peer_pods, peer_namespaces, CIDRs, result") 186 | printHeavyNetpols := flag.Int("print-heavy-np", 5, "Print a given number of the heaviest network policies.\n"+ 187 | "It may be useful to review which network policies are considered the heaviest for a given set of performance profiles,\n"+ 188 | "and which new performance profiles may help better approximate this workload.\n"+ 189 | "Can only be used with -perf-profiles.") 190 | debugFlag := flag.Bool("debug", false, "Print debug info for profiles matching") 191 | flag.Parse() 192 | debug = *debugFlag 193 | 194 | pods := []*v1.Pod{} 195 | namespaces := []*v1.Namespace{} 196 | netpols := []*networkingv1.NetworkPolicy{} 197 | parseYamls(*filePath, &pods, &namespaces, &netpols) 198 | if len(namespaces) == 0 { 199 | fmt.Printf("WARNING: No namespaces are given\n") 200 | } 201 | fmt.Printf("Found: %v Pods, %v Namespaces, %v NetworkPolicies\n", len(pods), len(namespaces), len(netpols)) 202 | 203 | existingProfiles := []*perfProfile{} 204 | if *profilesPath != "" { 205 | existingProfiles = parseProfiles(*profilesPath) 206 | } 207 | 208 | statistics := analyze(netpols, existingProfiles, getPodsCounter(pods, namespaces)) 209 | statistics.print(*printEmptyNetpols, *printGraphs, *printHeavyNetpols, len(existingProfiles) == 0) 210 | } 211 | -------------------------------------------------------------------------------- /yaml-analysis/go.mod: -------------------------------------------------------------------------------- 1 | module netpol_analysis 2 | 3 | go 1.21 4 | 5 | toolchain go1.21.3 6 | 7 | require ( 8 | github.com/daoleno/tgraph v0.0.2 9 | k8s.io/api v0.29.0 10 | k8s.io/apimachinery v0.29.0 11 | k8s.io/client-go v0.29.0 12 | ) 13 | 14 | require ( 15 | github.com/fatih/color v1.9.0 // indirect 16 | github.com/go-logr/logr v1.3.0 // indirect 17 | github.com/gogo/protobuf v1.3.2 // indirect 18 | github.com/google/gofuzz v1.2.0 // indirect 19 | github.com/json-iterator/go v1.1.12 // indirect 20 | github.com/mattn/go-colorable v0.1.4 // indirect 21 | github.com/mattn/go-isatty v0.0.11 // indirect 22 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 23 | github.com/modern-go/reflect2 v1.0.2 // indirect 24 | golang.org/x/net v0.17.0 // indirect 25 | golang.org/x/sys v0.13.0 // indirect 26 | golang.org/x/text v0.13.0 // indirect 27 | gopkg.in/inf.v0 v0.9.1 // indirect 28 | gopkg.in/yaml.v2 v2.4.0 // indirect 29 | k8s.io/klog/v2 v2.110.1 // indirect 30 | k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect 31 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect 32 | sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect 33 | sigs.k8s.io/yaml v1.3.0 // indirect 34 | ) 35 | -------------------------------------------------------------------------------- /yaml-analysis/go.sum: -------------------------------------------------------------------------------- 1 | github.com/daoleno/tgraph v0.0.2 h1:/mhUodtmMfM5Nnc6eOLlwUhXCNP689RgqOkfgOB4uR0= 2 | github.com/daoleno/tgraph v0.0.2/go.mod h1:Me3/RyLAEy5g8q8rKfJGsm04eM007cb3e6kOnCQSFQI= 3 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 5 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 6 | github.com/fatih/color v1.9.0 h1:8xPHl4/q1VyqGIPif1F+1V3Y3lSmrq01EabUW3CoW5s= 7 | github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= 8 | github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY= 9 | github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= 10 | github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= 11 | github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= 12 | github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 13 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 14 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 15 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 16 | github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= 17 | github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 18 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 19 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 20 | github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= 21 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= 22 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= 23 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= 24 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 25 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 26 | github.com/mattn/go-colorable v0.1.4 h1:snbPLB8fVfU9iwbbo30TPtbLRzwWu6aJS6Xh4eaaviA= 27 | github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= 28 | github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= 29 | github.com/mattn/go-isatty v0.0.11 h1:FxPOTFNqGkuDUGi3H/qkUbQO4ZiBa2brKq5r0l8TGeM= 30 | github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= 31 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 32 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 33 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 34 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 35 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 36 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 37 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 38 | github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= 39 | github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= 40 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= 41 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 42 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 43 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 44 | github.com/stretchr/testify v1.6.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 45 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= 46 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 47 | github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 48 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 49 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 50 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 51 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 52 | golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 53 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 54 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 55 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 56 | golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 57 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= 58 | golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= 59 | golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= 60 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 61 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 62 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 63 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 64 | golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 65 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 66 | golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 67 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 68 | golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= 69 | golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 70 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 71 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 72 | golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= 73 | golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= 74 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 75 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 76 | golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= 77 | golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= 78 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 79 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 80 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 81 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 82 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 83 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 84 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 85 | gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= 86 | gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= 87 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 88 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 89 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 90 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 91 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 92 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 93 | k8s.io/api v0.29.0 h1:NiCdQMY1QOp1H8lfRyeEf8eOwV6+0xA6XEE44ohDX2A= 94 | k8s.io/api v0.29.0/go.mod h1:sdVmXoz2Bo/cb77Pxi71IPTSErEW32xa4aXwKH7gfBA= 95 | k8s.io/apimachinery v0.29.0 h1:+ACVktwyicPz0oc6MTMLwa2Pw3ouLAfAon1wPLtG48o= 96 | k8s.io/apimachinery v0.29.0/go.mod h1:eVBxQ/cwiJxH58eK/jd/vAk4mrxmVlnpBH5J2GbMeis= 97 | k8s.io/client-go v0.29.0 h1:KmlDtFcrdUzOYrBhXHgKw5ycWzc3ryPX5mQe0SkG3y8= 98 | k8s.io/client-go v0.29.0/go.mod h1:yLkXH4HKMAywcrD82KMSmfYg2DlE8mepPR4JGSo5n38= 99 | k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= 100 | k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= 101 | k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= 102 | k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= 103 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= 104 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= 105 | sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= 106 | sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= 107 | sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= 108 | sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= 109 | -------------------------------------------------------------------------------- /yaml-analysis/helpers.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "sort" 7 | 8 | v1 "k8s.io/api/core/v1" 9 | networkingv1 "k8s.io/api/networking/v1" 10 | "k8s.io/client-go/kubernetes/scheme" 11 | ) 12 | 13 | func parseYamls(filename string, pods *[]*v1.Pod, namespaces *[]*v1.Namespace, netpols *[]*networkingv1.NetworkPolicy) { 14 | content, err := os.ReadFile(filename) 15 | if err != nil { 16 | fmt.Printf("ERROR: failed to read file %s: %v\n", filename, err) 17 | return 18 | } 19 | decode := scheme.Codecs.UniversalDeserializer().Decode 20 | obj, _, err := decode(content, nil, nil) 21 | if err != nil { 22 | fmt.Printf("ERROR: failed to decode yaml file %s: %v\n", filename, err) 23 | return 24 | } 25 | for _, rawObj := range obj.(*v1.List).Items { 26 | obj, _, err := decode(rawObj.Raw, nil, nil) 27 | if err != nil { 28 | fmt.Printf("ERROR: failed to decode object %s: %v\n", string(rawObj.Raw), err) 29 | return 30 | } 31 | if pod, ok := obj.(*v1.Pod); ok { 32 | *pods = append(*pods, pod) 33 | } else if namespace, ok := obj.(*v1.Namespace); ok { 34 | *namespaces = append(*namespaces, namespace) 35 | } else if netpol, ok := obj.(*networkingv1.NetworkPolicy); ok { 36 | *netpols = append(*netpols, netpol) 37 | } else { 38 | fmt.Printf("WARN: unexpected type %T\n", obj) 39 | } 40 | } 41 | } 42 | 43 | func maxInt(a, b int) int { 44 | if a > b { 45 | return a 46 | } 47 | return b 48 | } 49 | 50 | type numeric interface { 51 | int | float64 52 | } 53 | 54 | func sortedMap[T1 numeric, T2 any](m map[T1]T2, reverse bool) (keys []T1, values []T2) { 55 | for k := range m { 56 | keys = append(keys, k) 57 | } 58 | sort.Slice(keys, func(i, j int) bool { 59 | if !reverse { 60 | return keys[i] < keys[j] 61 | } else { 62 | return keys[i] > keys[j] 63 | } 64 | }) 65 | for _, k := range keys { 66 | values = append(values, m[k]) 67 | } 68 | return 69 | } 70 | 71 | type pair[T1 comparable, T2 numeric] struct { 72 | key T1 73 | value T2 74 | } 75 | 76 | func sortedMapByValue[T1 comparable, T2 numeric](m map[T1]T2, reverse bool) []pair[T1, T2] { 77 | pairs := []pair[T1, T2]{} 78 | for k, v := range m { 79 | pairs = append(pairs, pair[T1, T2]{k, v}) 80 | } 81 | sort.Slice(pairs, func(i, j int) bool { 82 | if !reverse { 83 | return pairs[i].value < pairs[j].value 84 | } else { 85 | return pairs[i].value > pairs[j].value 86 | } 87 | }) 88 | return pairs 89 | } 90 | 91 | func printMap[T1 comparable, T2 any](m map[T1]T2) string { 92 | s := "" 93 | for k, v := range m { 94 | s += fmt.Sprintf("%v: %v\n", k, v) 95 | } 96 | return s 97 | } 98 | 99 | func topDiv(a, b int) int { 100 | if a == 0 { 101 | return 1 102 | } 103 | res := a / b 104 | if a%b > 0 { 105 | res += 1 106 | } 107 | return res 108 | } 109 | 110 | func ordinalString(i int) string { 111 | switch i { 112 | case 1: 113 | return "1st" 114 | case 2: 115 | return "2nd" 116 | case 3: 117 | return "3rd" 118 | default: 119 | return fmt.Sprintf("%dth", i) 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /yaml-analysis/netpol_config.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | v1 "k8s.io/api/core/v1" 7 | networkingv1 "k8s.io/api/networking/v1" 8 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | "k8s.io/apimachinery/pkg/labels" 10 | "k8s.io/apimachinery/pkg/util/sets" 11 | ) 12 | 13 | type portConfig struct { 14 | singlePorts int 15 | portRanges int 16 | } 17 | 18 | type peerConfig struct { 19 | cidrs int 20 | podSelectors int 21 | peerPods int 22 | } 23 | 24 | type gressRule struct { 25 | portConfig 26 | peerConfig 27 | } 28 | 29 | func (r *gressRule) print(indent string) { 30 | fmt.Printf("%s\tports=[single: %v, ranges: %v], peers=[%+v]\n", indent, r.singlePorts, r.portRanges, r.peerConfig) 31 | } 32 | 33 | func (pc *peerConfig) join(pc2 *peerConfig) *peerConfig { 34 | if pc2 == nil { 35 | return pc 36 | } 37 | pc.cidrs += pc2.cidrs 38 | pc.podSelectors += pc2.podSelectors 39 | pc.peerPods = maxInt(pc.peerPods, pc2.peerPods) 40 | return pc 41 | } 42 | 43 | type netpolConfig struct { 44 | // TODO: differentiate ingress and egress? 45 | localPods int 46 | gressRules []*gressRule 47 | } 48 | 49 | func (c *netpolConfig) print(indent string) { 50 | fmt.Printf("%sconfig: localpods=%v, rules:\n", indent, c.localPods) 51 | for _, peer := range c.gressRules { 52 | peer.print(indent) 53 | } 54 | } 55 | 56 | func getGressRuleConfig(netpolNs string, policyPorts []networkingv1.NetworkPolicyPort, peers []networkingv1.NetworkPolicyPeer, 57 | countSelected podsCounter) (*portConfig, *peerConfig) { 58 | CIDRs := 0 59 | podSelectors := 0 60 | maxSelectedPods := 0 61 | 62 | ports := 0 63 | portRanges := 0 64 | for _, port := range policyPorts { 65 | if port.EndPort != nil { 66 | portRanges += 1 67 | } else { 68 | ports += 1 69 | } 70 | } 71 | for _, peer := range peers { 72 | if peer.IPBlock != nil { 73 | CIDRs += 1 74 | } else { 75 | podSelectors += 1 76 | selectedPods := countSelected(peer.PodSelector, netpolNs, peer.NamespaceSelector) 77 | maxSelectedPods = maxInt(maxSelectedPods, selectedPods) 78 | } 79 | } 80 | if CIDRs == 0 && (podSelectors == 0 || maxSelectedPods == 0) { 81 | return nil, nil 82 | } 83 | return &portConfig{ports, portRanges}, 84 | &peerConfig{CIDRs, 85 | podSelectors, 86 | maxSelectedPods, 87 | } 88 | } 89 | 90 | func getNetpolConfig(netpol *networkingv1.NetworkPolicy, countSelected podsCounter) *netpolConfig { 91 | localPods := countSelected(&netpol.Spec.PodSelector, netpol.Namespace, nil) 92 | portPeers := map[*portConfig]*peerConfig{} 93 | 94 | for _, egress := range netpol.Spec.Egress { 95 | portConf, peerConf := getGressRuleConfig(netpol.Namespace, egress.Ports, egress.To, countSelected) 96 | if portConf != nil { 97 | portPeers[portConf] = peerConf.join(portPeers[portConf]) 98 | } 99 | } 100 | for _, ingress := range netpol.Spec.Ingress { 101 | portConf, peerConf := getGressRuleConfig(netpol.Namespace, ingress.Ports, ingress.From, countSelected) 102 | if portConf != nil { 103 | portPeers[portConf] = peerConf.join(portPeers[portConf]) 104 | } 105 | } 106 | peers := []*gressRule{} 107 | for portConf, peerConf := range portPeers { 108 | peers = append(peers, &gressRule{ 109 | *portConf, *peerConf, 110 | }) 111 | } 112 | 113 | return &netpolConfig{ 114 | localPods: localPods, 115 | gressRules: peers, 116 | } 117 | } 118 | 119 | type podsCounter func(podSelector *metav1.LabelSelector, namespace string, namespaceSelector *metav1.LabelSelector) int 120 | 121 | // returns podsCounter 122 | func getPodsCounter(podsList []*v1.Pod, nsList []*v1.Namespace) func(podSelector *metav1.LabelSelector, namespace string, namespaceSelector *metav1.LabelSelector) int { 123 | selectedCounter := map[string]int{} 124 | return func(podSelector *metav1.LabelSelector, namespace string, namespaceSelector *metav1.LabelSelector) int { 125 | stringSelector := podSelector.String() + namespace + namespaceSelector.String() 126 | if result, ok := selectedCounter[stringSelector]; ok { 127 | return result 128 | } 129 | matchPodSelector := func(pod *v1.Pod) bool { 130 | if podSelector != nil { 131 | sel, err := metav1.LabelSelectorAsSelector(podSelector) 132 | if err != nil { 133 | fmt.Println("ERROR") 134 | return false 135 | } 136 | return sel.Matches(labels.Set(pod.Labels)) 137 | } else { 138 | return true 139 | } 140 | } 141 | matchNamespace := func(ns *v1.Namespace) bool { 142 | if namespaceSelector != nil { 143 | sel, err := metav1.LabelSelectorAsSelector(namespaceSelector) 144 | if err != nil { 145 | fmt.Println("ERROR") 146 | return false 147 | } 148 | return sel.Matches(labels.Set(ns.Labels)) 149 | } else if namespace != "" { 150 | return ns.Name == namespace 151 | } else { 152 | return true 153 | } 154 | } 155 | result := 0 156 | matchedNamespaces := sets.Set[string]{} 157 | for _, ns := range nsList { 158 | if matchNamespace(ns) { 159 | matchedNamespaces.Insert(ns.Name) 160 | } 161 | } 162 | 163 | matchPod := func(pod *v1.Pod) bool { 164 | return matchPodSelector(pod) && matchedNamespaces.Has(pod.Namespace) 165 | } 166 | if len(nsList) == 0 { 167 | matchPod = func(pod *v1.Pod) bool { 168 | return matchPodSelector(pod) && (namespace == "" || pod.Namespace == namespace) 169 | } 170 | } 171 | 172 | for _, pod := range podsList { 173 | if matchPod(pod) { 174 | result += 1 175 | } 176 | } 177 | selectedCounter[stringSelector] = result 178 | return result 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /yaml-analysis/profile.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "os" 7 | "strconv" 8 | ) 9 | 10 | // ------ 11 | type perfProfile struct { 12 | localPods int 13 | singlePorts int 14 | portRanges int 15 | podSelectors int 16 | // peer namespace just affects the number of peer pods in the end 17 | // now used for now 18 | //peerNamespaces int 19 | peerPods int 20 | CIDRs int 21 | // weight = 1/number of policies with this profile 22 | weight float64 23 | } 24 | 25 | func newProfile(localPods, podSelectors, singlePorts, portRanges, peerPods, peerNamespaces, CIDRs, amount int) *perfProfile { 26 | return &perfProfile{ 27 | localPods: localPods, 28 | singlePorts: singlePorts, 29 | portRanges: portRanges, 30 | podSelectors: podSelectors, 31 | peerPods: peerPods * peerNamespaces, 32 | CIDRs: CIDRs, 33 | weight: 1.0 / float64(amount), 34 | } 35 | } 36 | 37 | type profileMatch struct { 38 | // profile index in a given file, indexing starts with 0 39 | idx int 40 | copies int 41 | // summarized weight for copies 42 | weight float64 43 | } 44 | 45 | type profilesMatch []*profileMatch 46 | 47 | func (matches profilesMatch) print(indent string) { 48 | fmt.Printf("%smatched profiles:\n", indent) 49 | for _, match := range matches { 50 | readableMatch := *match 51 | readableMatch.idx += 1 52 | fmt.Printf("%s\t%+v\n", indent, readableMatch) 53 | } 54 | } 55 | 56 | func (matches profilesMatch) weight() float64 { 57 | res := 0.0 58 | for _, match := range matches { 59 | res += match.weight 60 | } 61 | return res 62 | } 63 | 64 | func parseProfiles(filename string) []*perfProfile { 65 | f, err := os.Open(filename) 66 | if err != nil { 67 | fmt.Printf("ERROR: failed to read file %s: %v\n", filename, err) 68 | return nil 69 | } 70 | defer f.Close() 71 | csvReader := csv.NewReader(f) 72 | records, err := csvReader.ReadAll() 73 | if err != nil { 74 | fmt.Printf("ERROR: failed parse profiles: %v\n", err) 75 | return nil 76 | } 77 | profiles := []*perfProfile{} 78 | for _, record := range records { 79 | ints := []int{} 80 | for _, strInt := range record { 81 | counter, err := strconv.Atoi(strInt) 82 | if err != nil { 83 | fmt.Printf("ERROR: failed to convert str %s to int: %v\n", strInt, err) 84 | return nil 85 | } 86 | ints = append(ints, counter) 87 | } 88 | if len(ints) != 8 { 89 | fmt.Printf("ERROR: failed to read a profile: expected 8 ints, got %v\n", len(ints)) 90 | return nil 91 | } 92 | profiles = append(profiles, newProfile(ints[0], ints[1], ints[2], ints[3], ints[4], ints[5], ints[6], ints[7])) 93 | } 94 | return profiles 95 | } 96 | -------------------------------------------------------------------------------- /yaml-analysis/profiles_example.csv: -------------------------------------------------------------------------------- 1 | 1,0,0,0,0,0,1,100000 2 | 1,0,1,0,0,0,1,100000 3 | 1,0,0,1,0,0,1,90000 4 | 1,0,1,1,0,0,1,90000 5 | 1,1,0,0,1,3,0,80000 6 | 1,1,1,0,1,3,0,80000 7 | 1,1,0,0,10,10,0,30000 8 | 1,1,1,0,10,10,0,30000 9 | 10,0,0,0,0,0,1,50000 10 | 10,0,1,0,0,0,1,50000 11 | 10,1,0,0,10,10,1,5000 12 | 1,1,0,0,1,3,1,60000 13 | 10,1,0,0,10,10,1,5000 14 | 1,0,0,0,0,0,10,20000 15 | 1,0,1,0,0,0,10,20000 16 | 1,1,1,1,1,1,0,30000 -------------------------------------------------------------------------------- /yaml-analysis/stats.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | 7 | "github.com/daoleno/tgraph" 8 | ) 9 | 10 | type portStats struct { 11 | cidrs map[int]int 12 | podSelectors map[int]int 13 | } 14 | 15 | func newPortStats() portStats { 16 | return portStats{ 17 | cidrs: map[int]int{}, 18 | podSelectors: map[int]int{}, 19 | } 20 | } 21 | 22 | func (s *portStats) Increment(key int, cidrs, podSelectors bool) { 23 | if cidrs { 24 | s.cidrs[key] += 1 25 | } 26 | if podSelectors { 27 | s.podSelectors[key] += 1 28 | } 29 | } 30 | 31 | type netpolWeight struct { 32 | npConfig *netpolConfig 33 | result profilesMatch 34 | weight float64 35 | netpolName string 36 | } 37 | 38 | func (w *netpolWeight) print() { 39 | fmt.Printf("%v\n", w.netpolName) 40 | indent := " " 41 | w.npConfig.print(indent) 42 | w.result.print(indent) 43 | fmt.Printf("%sweight: %v\n", indent, w.weight) 44 | } 45 | 46 | type stats struct { 47 | singlePorts portStats 48 | portRanges portStats 49 | cidrs map[int]int 50 | podSelectors map[int]int 51 | peerPods map[int]int 52 | peersCounter int 53 | localPods map[int]int 54 | matchedNetpols int 55 | // emptyNetpols are netpols that have some peers defined, but it doesn't have real effect. 56 | // it can happen if either no local pods are selected or all peers don't select any enpdoints 57 | emptyNetpols map[string][]string 58 | emptyCounter int 59 | // noPeersNetpols are netpol that have zero peers defined, they may be used as deny-all policy and are not 60 | // invalid 61 | noPeersNetpols map[string][]string 62 | noPeersCounter int 63 | weights []*netpolWeight 64 | 65 | // [profile idx][match weight][peers with given weight] 66 | profilesToNetpols map[int]map[float64][]*gressWithLocalPods 67 | } 68 | 69 | type gressWithLocalPods struct { 70 | *gressRule 71 | localPods int 72 | } 73 | 74 | func newStats() *stats { 75 | return &stats{ 76 | localPods: map[int]int{}, 77 | singlePorts: newPortStats(), 78 | portRanges: newPortStats(), 79 | cidrs: map[int]int{}, 80 | podSelectors: map[int]int{}, 81 | peerPods: map[int]int{}, 82 | emptyNetpols: map[string][]string{}, 83 | noPeersNetpols: map[string][]string{}, 84 | profilesToNetpols: map[int]map[float64][]*gressWithLocalPods{}, 85 | } 86 | } 87 | 88 | func toTgraphData(input map[int]int, getLabel func(key int) string) ([][]float64, []string) { 89 | data := [][]float64{} 90 | labels := []string{} 91 | sortedKeys, sortedValues := sortedMap[int, int](input, false) 92 | for i, key := range sortedKeys { 93 | data = append(data, []float64{float64(sortedValues[i])}) 94 | labels = append(labels, getLabel(key)) 95 | } 96 | return data, labels 97 | } 98 | 99 | type graphData struct { 100 | input map[int]int 101 | label string 102 | title string 103 | } 104 | 105 | func median(data map[int]int, ignoreZeros bool) int { 106 | inlinedData := []int{} 107 | for value, counter := range data { 108 | if ignoreZeros && value == 0 { 109 | continue 110 | } 111 | for i := 0; i < counter; i++ { 112 | inlinedData = append(inlinedData, value) 113 | } 114 | } 115 | 116 | sort.Ints(inlinedData) 117 | 118 | l := len(inlinedData) 119 | if l == 0 { 120 | return 0 121 | } else { 122 | return inlinedData[l/2] 123 | } 124 | } 125 | 126 | func average(data map[int]int) float64 { 127 | sum := 0 128 | samplesCounter := 0 129 | for value, counter := range data { 130 | sum += value * counter 131 | samplesCounter += counter 132 | } 133 | return float64(sum) / float64(samplesCounter) 134 | } 135 | 136 | func (stat *stats) print(printEmptyNetpols, printGraphs bool, heaviestNetpols int, noProfiles bool) { 137 | fmt.Printf("Empty netpols: %v, peers: %v, deny-only netpols %v\n", stat.emptyCounter, stat.peersCounter, stat.noPeersCounter) 138 | if printEmptyNetpols { 139 | fmt.Printf("\nEmpty netpols (namespace:[netpol names]):\n%s\n", printMap[string, []string](stat.emptyNetpols)) 140 | } 141 | 142 | if printGraphs { 143 | fmt.Printf("Average network policy profile: local pods=%v\n"+ 144 | "\tcidrs=%v, single ports=%v, port ranges=%v\n"+ 145 | "\tpod selectors=%v, peer pods=%v, single ports=%v, port ranges=%v\n\n", 146 | average(stat.localPods), 147 | average(stat.cidrs), average(stat.singlePorts.cidrs), average(stat.portRanges.cidrs), 148 | average(stat.podSelectors), average(stat.peerPods), average(stat.singlePorts.podSelectors), average(stat.portRanges.podSelectors), 149 | ) 150 | 151 | fmt.Printf("Median network policy profile: local pods=%v\n"+ 152 | "\tcidrs=%v, single ports=%v, port ranges=%v\n"+ 153 | "\tpod selectors=%v, peer pods=%v, single ports=%v, port ranges=%v\n\n", 154 | median(stat.localPods, true), 155 | median(stat.cidrs, true), median(stat.singlePorts.cidrs, false), median(stat.portRanges.cidrs, false), 156 | median(stat.podSelectors, true), median(stat.peerPods, true), median(stat.singlePorts.podSelectors, false), median(stat.portRanges.podSelectors, false), 157 | ) 158 | 159 | for _, gData := range []graphData{ 160 | {stat.localPods, "pod(s)", "Local pods distribution"}, 161 | {stat.cidrs, "CIDR(s)", "CIDR peers distribution"}, 162 | {stat.podSelectors, "pod selector(s)", "Pod selector peers distribution"}, 163 | {stat.peerPods, "peer pod(s)", "Peer pods distribution"}, 164 | {stat.singlePorts.cidrs, "single port(s)", "Single port peers distribution (CIDRs)"}, 165 | {stat.singlePorts.podSelectors, "single port(s)", "Single port peers distribution (pod selectors)"}, 166 | {stat.portRanges.cidrs, "port ranges(s)", "Port range peers distribution (CIDRs)"}, 167 | {stat.portRanges.podSelectors, "port ranges(s)", "Port range peers distribution (pod selectors)"}, 168 | } { 169 | data, labels := toTgraphData(gData.input, func(key int) string { return fmt.Sprintf("%d %s", key, gData.label) }) 170 | tgraph.Chart(gData.title, labels, data, nil, 171 | nil, 100, false, "▇") 172 | total := 0 173 | for _, i := range gData.input { 174 | total += i 175 | } 176 | fmt.Println("Total: ", total) 177 | fmt.Println() 178 | } 179 | } 180 | 181 | if !noProfiles { 182 | fmt.Printf("Matched %v netpols with given profiles\n", stat.matchedNetpols) 183 | 184 | sumWeight := 0.0 185 | for _, npWeight := range stat.weights { 186 | sumWeight += npWeight.weight 187 | } 188 | fmt.Printf("Final Weight=%v, if < 1, the workload is accepted\n\n", sumWeight) 189 | sort.Slice(stat.weights, func(i, j int) bool { 190 | return stat.weights[i].weight > stat.weights[j].weight 191 | }) 192 | 193 | if heaviestNetpols > 0 { 194 | fmt.Printf("%v heaviest netpols are (profile idx start with 1):\n", heaviestNetpols) 195 | weightsToPrint := heaviestNetpols 196 | if len(stat.weights) < weightsToPrint { 197 | weightsToPrint = len(stat.weights) 198 | } 199 | for _, npWeight := range stat.weights[:weightsToPrint] { 200 | npWeight.print() 201 | } 202 | fmt.Println() 203 | } 204 | 205 | profileCopies := map[int]int{} 206 | totalProfiles := 0 207 | 208 | for _, npWeight := range stat.weights { 209 | for _, result := range npWeight.result { 210 | // use idx + 1 to count profiles from 1, which should be easier to read 211 | profileCopies[result.idx+1] += result.copies 212 | totalProfiles += result.copies 213 | } 214 | } 215 | fmt.Printf("Initial %v peers were split into %v profiles.\n", stat.peersCounter, totalProfiles) 216 | data, labels := toTgraphData(profileCopies, func(key int) string { return fmt.Sprintf("%s profile", ordinalString(key)) }) 217 | tgraph.Chart("Used profiles statistics (number of copies)", labels, data, nil, 218 | nil, 100, false, "▇") 219 | fmt.Println() 220 | 221 | // [pair(key=profile idx, value=number of copies)] 222 | sortedCopies := sortedMapByValue[int, int](profileCopies, true) 223 | totalPeers := 0 224 | for _, profileCopy := range sortedCopies { 225 | profilesToNetpolsIdx := profileCopy.key - 1 226 | weightToPeers := stat.profilesToNetpols[profilesToNetpolsIdx] 227 | 228 | profilePeers := 0 229 | for _, copies := range weightToPeers { 230 | profilePeers += len(copies) 231 | } 232 | totalPeers += profilePeers 233 | fmt.Printf("%s profile (%v peers) stats: \n", ordinalString(profileCopy.key), profilePeers) 234 | 235 | sortedWeights, _ := sortedMap[float64, []*gressWithLocalPods](weightToPeers, true) 236 | 237 | weightsToPrint := 5 238 | if len(sortedWeights) < weightsToPrint { 239 | weightsToPrint = len(sortedWeights) 240 | } 241 | 242 | for i, weight := range sortedWeights[:weightsToPrint] { 243 | weightUsages := stat.profilesToNetpols[profilesToNetpolsIdx][weight] 244 | fmt.Printf("%s heaviest weight: %.8f used by %v peer(s)\n", ordinalString(i+1), weight, len(weightUsages)) 245 | for _, rule := range weightUsages[:min(5, len(weightUsages))] { 246 | fmt.Printf("\tlocalpods=%v\n", rule.localPods) 247 | rule.print("") 248 | } 249 | } 250 | } 251 | //fmt.Printf("Total peers: %v", totalPeers) 252 | } 253 | } 254 | --------------------------------------------------------------------------------