├── .github
    └── workflows
    │   └── pullrequest.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── kube-burner-workload
    ├── README.md
    ├── SCALE_PROFILES.md
    ├── calico
    │   ├── README.md
    │   ├── calico-dashboard.png
    │   ├── convergence_tracker.yml
    │   ├── env
    │   ├── grafana_dash.json
    │   ├── metrics.yml
    │   ├── monitoring.yaml
    │   ├── policy-tracker
    │   │   ├── Dockerfile
    │   │   ├── policy-tracker.py
    │   │   └── requirements.txt
    │   └── test_limit.sh
    ├── convergence_waiter.sh
    ├── egress-np.yml
    ├── env
    ├── ingress-np.yml
    ├── kind-metrics
    │   ├── README.md
    │   ├── env
    │   ├── grafana.png
    │   ├── grafana_dash.json
    │   ├── metrics.yml
    │   └── monitoring.yaml
    ├── network-policy.yaml
    ├── openshift
    │   ├── README.md
    │   ├── convergence_tracker.yml
    │   ├── env
    │   ├── grafana.png
    │   ├── grafana_dash.json
    │   ├── metrics.yml
    │   ├── openflow-tracker
    │   │   ├── Dockerfile
    │   │   ├── openflow-tracker.py
    │   │   └── requirements.txt
    │   └── test_limit.sh
    ├── ovn-kubernetes
    │   ├── README.md
    │   ├── convergence_tracker.yml
    │   └── openflow-tracker
    │   │   ├── Dockerfile
    │   │   ├── openflow-tracker.py
    │   │   └── requirements.txt
    └── pod.yml
└── yaml-analysis
    ├── README.md
    ├── analyze.go
    ├── go.mod
    ├── go.sum
    ├── helpers.go
    ├── netpol_config.go
    ├── profile.go
    ├── profiles_example.csv
    └── stats.go


/.github/workflows/pullrequest.yml:
--------------------------------------------------------------------------------
 1 | # pullrequest.yml
 2 | name: Lint Workflow
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request_target:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   linters:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 | 
16 |     - name: Check out code
17 |       uses: actions/checkout@v3
18 |       with:
19 |         fetch-depth: 1
20 |         ref: ${{ github.event.pull_request.head.sha }}
21 |         persist-credentials: false
22 | 
23 |     - name: Install pre-commit
24 |       run: pip install pre-commit
25 | 
26 |     - name: Run pre-commit hooks
27 |       run: pre-commit run --all-files
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | #IDE (GoLand) specific 
2 | .idea/
3 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/golangci/golangci-lint
 3 |     rev: v1.51.2
 4 |     hooks:
 5 |       - id: golangci-lint
 6 |         entry: bash -c 'cd yaml-analysis && golangci-lint run --timeout=5m'
 7 |   - repo: https://github.com/igorshubovych/markdownlint-cli
 8 |     rev: v0.34.0
 9 |     hooks:
10 |       - id: markdownlint
11 |         args: [--disable, MD013, MD002]
12 |   - repo: https://github.com/jumanjihouse/pre-commit-hooks
13 |     rev: 3.0.0
14 |     hooks:
15 |       - id: shellcheck
16 |   - repo: https://github.com/pre-commit/pre-commit-hooks
17 |     rev: v4.4.0
18 |     hooks:
19 |       - id: check-json
20 |   - repo: https://github.com/psf/black
21 |     rev: 22.10.0
22 |     hooks:
23 |       - id: black


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## k8s-netpol-scale
2 | 
3 | This repository contains tools for k8s Network Policy scale testing.
4 | In [./kube-burner](./kube-burner) folder you will find a network policy configurable workload that may be run by
5 | [kube-burner](https://github.com/cloud-bulldozer/kube-burner)
6 | 
7 | In [./yaml-analysis](./yaml-analysis) folder you will find tools to analyze network policies based on their yamls,
8 | and predict if a given workload will be properly handled by a cluster based on provided scale profiles data.
9 | 


--------------------------------------------------------------------------------
/kube-burner-workload/README.md:
--------------------------------------------------------------------------------
  1 | ## Intro
  2 | 
  3 | network-policy workload is used to generate a large number of non-trivial network policies.
  4 | The workload is defined in [./network-policy.yaml](./network-policy.yaml) and consists of
  5 | - `convergence-tracker` job: these are the pods that should track when the created objects are applied.
  6 | - `network-policy-perf` job: this job creates the workload after `convergence-tracker` resources are created.
  7 | 
  8 | ## Workload details
  9 | 
 10 | `network-policy-perf` job creates a given number of namespaces named `network-policy-perf-<iteration>` (iteration starts form 0) with the same contents.
 11 | Every namespace has a given number of pods named `test-pod-<replica>` (replica starts from 1) based on [./templates/pod.yml](pod.yml).
 12 | Every pod is labeled with `1: "true", ..., <replica>: "true` to select the required number of pods in the future.
 13 | `test-pod: "true"` label is added for `podAntiAffinity` to spread the workload pods as evenly as possible across the nodes.
 14 | Network policy always selects first N pods by pod selector, therefore we add `num=<replica>` label to ensure
 15 | equal distribution of selected pods.
 16 | 
 17 | ```
 18 | kubectl get pods -n network-policy-perf-0 --show-labels
 19 | NAME         READY   STATUS    RESTARTS   AGE     LABELS
 20 | test-pod-1   1/1     Running   0          2m52s   1=true,kube-burner-index=0,kube-burner-job=network-policy-perf,kube-burner-runid=4310adad-84eb-4d5b-a984-f408b2b1cd4e,kube-burner-uuid=106b3aff-4b90-4e0d-b69f-495e9f24e8d5,num=1,test-pod=true
 21 | test-pod-2   1/1     Running   0          2m52s   1=true,2=true,kube-burner-index=0,kube-burner-job=network-policy-perf,kube-burner-runid=4310adad-84eb-4d5b-a984-f408b2b1cd4e,kube-burner-uuid=106b3aff-4b90-4e0d-b69f-495e9f24e8d5,num=2,test-pod=true
 22 | test-pod-3   1/1     Running   0          2m52s   1=true,2=true,3=true,kube-burner-index=0,kube-burner-job=network-policy-perf,kube-burner-runid=4310adad-84eb-4d5b-a984-f408b2b1cd4e,kube-burner-uuid=106b3aff-4b90-4e0d-b69f-495e9f24e8d5,num=3,test-pod=true
 23 | ```
 24 | 
 25 | Every namespace has a given number of network policies named `ingress-<idx>` and `egress-<idx>` (idx starts form 0)
 26 | based on [./templates/ingress-np.yml](ingress-np.yml) and [./templates/egress-np.yml](egress-np.yml),
 27 | corresponding ingress and egress policies have exactly same peers for now (may be changed in the future).
 28 | 
 29 | Networkpolicy template takes the following args:
 30 | - local_pods: number of pods to select by `spec.podSelector`, same set of pods for every policy in the namespace
 31 | - single_ports: number of single ports for every gress rule
 32 | - port_ranges: number of port ranges for every gress rule
 33 | - pod_selectors: number of selector-based gress rules, every rule has only 1 selector and may have multiple ports
 34 | - peer_namespaces: number of selected namespaces for every pod_selector peer
 35 | - peer_pods: number of selected pods for every pod_selector peer
 36 | - cidr_rules: number of CIDR-based gress rules
 37 | 
 38 | To increase the real load and reduce the number of possible internal optimizations, we need to generate different peers.
 39 | CIDRs are all different inside one namespace, they start from `1.0.0.0/24`, and are incremented by 1, `1.0.1.0/24`, `1.0.2.0/24, etc.
 40 | Peer selectors always select the same pods based on the required number, but always have different namespace selectors.
 41 | We have BinomialCoefficient(NAMESPACES, PEER_NAMESPACES) different peer namespace sets, which don't repeat across network policies
 42 | and namespace unless we have more selector-based rules. 
 43 | 
 44 | Example: if we have 5 namespaces and every policy selects 3 namespaces for every peer, we have BinomialCoefficient(5, 3) = 10, 
 45 | which gives the following choices (think peer namespace indexes):
 46 | 
 47 | [1] 1,2,3\
 48 | [2] 1,2,4\
 49 | [3] 1,2,5\
 50 | [4] 1,3,4\
 51 | [5] 1,3,5\
 52 | [6] 1,4,5\
 53 | [7] 2,3,4\
 54 | [8] 2,3,5\
 55 | [9] 2,4,5\
 56 | [10] 3,4,5
 57 | 
 58 | If we create 5 namespaces with 1 network policy each and 2 peer selector with 3 peer_namespaces, we will have
 59 | 
 60 | ns1.np1.peer1 selects namespaces 1,2,3 [1]\
 61 | ns1.np1.peer2 selects namespaces 1,2,4 [2]\
 62 | ns2.np1.peer1 selects namespaces 1,2,5 [3]\
 63 | ns2.np1.peer1 selects namespaces 1,3,4 [4]\
 64 | ...\
 65 | ns5.np1.peer1 selects namespaces 2,4,5 [9]\
 66 | ns5.np1.peer1 selects namespaces 3,4,5 [10]
 67 | 
 68 | <details>
 69 |   <summary>In the cluster it will look like (ingress-only)</summary>
 70 | 
 71 | ```
 72 | items:
 73 | - apiVersion: networking.k8s.io/v1
 74 |   kind: NetworkPolicy
 75 |   metadata:
 76 |     creationTimestamp: "2023-08-23T09:35:39Z"
 77 |     generation: 1
 78 |     labels:
 79 |       kube-burner-index: "1"
 80 |       kube-burner-job: network-policy-perf
 81 |       kube-burner-runid: 39baa8cb-07c6-441f-add6-07a56404a14b
 82 |       kube-burner-uuid: 9c3cdf2e-4fd7-470a-b1b0-2d1c1a7b5c32
 83 |     name: ingress-1
 84 |     namespace: network-policy-perf-0
 85 |     resourceVersion: "66063"
 86 |     uid: 5abd93b5-906e-44e0-be1f-1be30b6bbeed
 87 |   spec:
 88 |     ingress:
 89 |     - from:
 90 |       - namespaceSelector:
 91 |           matchExpressions:
 92 |           - key: kubernetes.io/metadata.name
 93 |             operator: In
 94 |             values:
 95 |             - network-policy-perf-1
 96 |             - network-policy-perf-2
 97 |             - network-policy-perf-3
 98 |         podSelector:
 99 |           matchLabels:
100 |             "1": "true"
101 |     - from:
102 |       - namespaceSelector:
103 |           matchExpressions:
104 |           - key: kubernetes.io/metadata.name
105 |             operator: In
106 |             values:
107 |             - network-policy-perf-1
108 |             - network-policy-perf-2
109 |             - network-policy-perf-4
110 |         podSelector:
111 |           matchLabels:
112 |             "1": "true"
113 |     podSelector:
114 |       matchLabels:
115 |         "1": "true"
116 |     policyTypes:
117 |     - Ingress
118 |   status: {}
119 | - apiVersion: networking.k8s.io/v1
120 |   kind: NetworkPolicy
121 |   metadata:
122 |     creationTimestamp: "2023-08-23T09:35:39Z"
123 |     generation: 1
124 |     labels:
125 |       kube-burner-index: "1"
126 |       kube-burner-job: network-policy-perf
127 |       kube-burner-runid: 39baa8cb-07c6-441f-add6-07a56404a14b
128 |       kube-burner-uuid: 9c3cdf2e-4fd7-470a-b1b0-2d1c1a7b5c32
129 |     name: ingress-1
130 |     namespace: network-policy-perf-1
131 |     resourceVersion: "66068"
132 |     uid: f5f26105-125b-4436-8a97-3d2fdace15bb
133 |   spec:
134 |     ingress:
135 |     - from:
136 |       - namespaceSelector:
137 |           matchExpressions:
138 |           - key: kubernetes.io/metadata.name
139 |             operator: In
140 |             values:
141 |             - network-policy-perf-1
142 |             - network-policy-perf-2
143 |             - network-policy-perf-5
144 |         podSelector:
145 |           matchLabels:
146 |             "1": "true"
147 |     - from:
148 |       - namespaceSelector:
149 |           matchExpressions:
150 |           - key: kubernetes.io/metadata.name
151 |             operator: In
152 |             values:
153 |             - network-policy-perf-1
154 |             - network-policy-perf-3
155 |             - network-policy-perf-4
156 |         podSelector:
157 |           matchLabels:
158 |             "1": "true"
159 |     podSelector:
160 |       matchLabels:
161 |         "1": "true"
162 |     policyTypes:
163 |     - Ingress
164 |   status: {}
165 | - apiVersion: networking.k8s.io/v1
166 |   kind: NetworkPolicy
167 |   metadata:
168 |     creationTimestamp: "2023-08-23T09:35:39Z"
169 |     generation: 1
170 |     labels:
171 |       kube-burner-index: "1"
172 |       kube-burner-job: network-policy-perf
173 |       kube-burner-runid: 39baa8cb-07c6-441f-add6-07a56404a14b
174 |       kube-burner-uuid: 9c3cdf2e-4fd7-470a-b1b0-2d1c1a7b5c32
175 |     name: ingress-1
176 |     namespace: network-policy-perf-2
177 |     resourceVersion: "66071"
178 |     uid: e74b9e03-311c-4dcb-b1f1-22424ace949d
179 |   spec:
180 |     ingress:
181 |     - from:
182 |       - namespaceSelector:
183 |           matchExpressions:
184 |           - key: kubernetes.io/metadata.name
185 |             operator: In
186 |             values:
187 |             - network-policy-perf-1
188 |             - network-policy-perf-3
189 |             - network-policy-perf-5
190 |         podSelector:
191 |           matchLabels:
192 |             "1": "true"
193 |     - from:
194 |       - namespaceSelector:
195 |           matchExpressions:
196 |           - key: kubernetes.io/metadata.name
197 |             operator: In
198 |             values:
199 |             - network-policy-perf-1
200 |             - network-policy-perf-4
201 |             - network-policy-perf-5
202 |         podSelector:
203 |           matchLabels:
204 |             "1": "true"
205 |     podSelector:
206 |       matchLabels:
207 |         "1": "true"
208 |     policyTypes:
209 |     - Ingress
210 |   status: {}
211 | - apiVersion: networking.k8s.io/v1
212 |   kind: NetworkPolicy
213 |   metadata:
214 |     creationTimestamp: "2023-08-23T09:35:39Z"
215 |     generation: 1
216 |     labels:
217 |       kube-burner-index: "1"
218 |       kube-burner-job: network-policy-perf
219 |       kube-burner-runid: 39baa8cb-07c6-441f-add6-07a56404a14b
220 |       kube-burner-uuid: 9c3cdf2e-4fd7-470a-b1b0-2d1c1a7b5c32
221 |     name: ingress-1
222 |     namespace: network-policy-perf-3
223 |     resourceVersion: "66079"
224 |     uid: c1c3b966-390c-4c44-8fc9-c106fb036e64
225 |   spec:
226 |     ingress:
227 |     - from:
228 |       - namespaceSelector:
229 |           matchExpressions:
230 |           - key: kubernetes.io/metadata.name
231 |             operator: In
232 |             values:
233 |             - network-policy-perf-2
234 |             - network-policy-perf-3
235 |             - network-policy-perf-4
236 |         podSelector:
237 |           matchLabels:
238 |             "1": "true"
239 |     - from:
240 |       - namespaceSelector:
241 |           matchExpressions:
242 |           - key: kubernetes.io/metadata.name
243 |             operator: In
244 |             values:
245 |             - network-policy-perf-2
246 |             - network-policy-perf-3
247 |             - network-policy-perf-5
248 |         podSelector:
249 |           matchLabels:
250 |             "1": "true"
251 |     podSelector:
252 |       matchLabels:
253 |         "1": "true"
254 |     policyTypes:
255 |     - Ingress
256 |   status: {}
257 | - apiVersion: networking.k8s.io/v1
258 |   kind: NetworkPolicy
259 |   metadata:
260 |     creationTimestamp: "2023-08-23T09:35:39Z"
261 |     generation: 1
262 |     labels:
263 |       kube-burner-index: "1"
264 |       kube-burner-job: network-policy-perf
265 |       kube-burner-runid: 39baa8cb-07c6-441f-add6-07a56404a14b
266 |       kube-burner-uuid: 9c3cdf2e-4fd7-470a-b1b0-2d1c1a7b5c32
267 |     name: ingress-1
268 |     namespace: network-policy-perf-4
269 |     resourceVersion: "66086"
270 |     uid: db941c7d-e6a7-48db-8575-3d2a3da6f2bf
271 |   spec:
272 |     ingress:
273 |     - from:
274 |       - namespaceSelector:
275 |           matchExpressions:
276 |           - key: kubernetes.io/metadata.name
277 |             operator: In
278 |             values:
279 |             - network-policy-perf-2
280 |             - network-policy-perf-4
281 |             - network-policy-perf-5
282 |         podSelector:
283 |           matchLabels:
284 |             "1": "true"
285 |     - from:
286 |       - namespaceSelector:
287 |           matchExpressions:
288 |           - key: kubernetes.io/metadata.name
289 |             operator: In
290 |             values:
291 |             - network-policy-perf-3
292 |             - network-policy-perf-4
293 |             - network-policy-perf-5
294 |         podSelector:
295 |           matchLabels:
296 |             "1": "true"
297 |     podSelector:
298 |       matchLabels:
299 |         "1": "true"
300 |     policyTypes:
301 |     - Ingress
302 |   status: {}
303 | 
304 | ```
305 | 
306 | </details>
307 | 
308 | <details>
309 |     <summary>env</summary>
310 | 
311 | ```
312 | NAMESPACES=5
313 | PODS_PER_NAMESPACE=1
314 | NETPOLS_PER_NAMESPACE=1
315 | 
316 | LOCAL_PODS=1
317 | SINGLE_PORTS=0
318 | PORT_RANGES=0
319 | POD_SELECTORS=2
320 | PEER_NAMESPACES=3
321 | PEER_PODS=1
322 | CIDRS=0
323 | ```
324 | </details>
325 | 
326 | ## Different Platforms
327 | 
328 | Different clusters may require different techniques to do the scale testing. While this framework may be used
329 | as it is by any kubernetes cluster, extra features like convergence tracker and metrics may be also enabled.
330 | Since config for these features usually differs based on the network plugin and cluster type, we have added 
331 | `PLATFORM` env variable and corresponding `platfrom_name` folders that may be used as an example by other platforms,
332 | and also may be reused and improved by the same platform as a part of this framework.
333 | 
334 | Every platform may have its own README.
335 | 
336 | ### Comparing different platforms
337 | 
338 | To ensure results for different platform are comparable, set up the convergence tracker logic to be as similar as possible,
339 | all timeouts and variables defining successful test run should be the same.
340 | Cluster-specific parameters, like resource quotas, enables services (e.g. observability), nodes configurations may also
341 | affect the results.
342 | 
343 | ## Tracking the end of the test
344 | 
345 | `CONVERGENCE_TRACKER` env variable enables `convergence-tracker` job.
346 | The right way to track convergence may differ based on network plugin or cluster type, you can use existing platforms
347 | as an example.
348 | For example, ovn-kubernetes network plugin uses OVS flows underneath, therefore this job spins up a pod
349 | on every node and tracks the number of OVS flows, when this number stops changing it considers the config to be applied.
350 | There is a `CONVERGENCE_PERIOD` parameter that defines for how long it waits to consider the number of flows converged.
351 | In addition, `CONVERGENCE_TIMEOUT` sets the hard deadline for convergence tracking.
352 | `convergence_waiter.sh` is a script that waits `CONVERGENCE_PERIOD` for all convergence-tracker pod to be completed,
353 | before deleting the workload.
354 | 
355 | ## Running
356 | 
357 | 1. Install kube-burner v1.9.4+
358 |   
359 |     1.1 You can download kube-burner from https://github.com/cloud-bulldozer/kube-burner/releases
360 |     
361 |     1.2 You can build it from source [kube-burner](https://github.com/cloud-bulldozer/kube-burner/tree/main) with
362 |      `make build`
363 | 2. `cd ./kube-burner-workload`
364 | 3. Set env variables with the test config in the `env` file\
365 | **NOTE**: some corner cases may not work
366 | 4. `source ./env`
367 | 5. `kube-burner init -c ./network-policy.yaml`
368 | 
369 | **Note** Every platform may have its own README with more details
370 | 
371 | To clean up resources created by this workload run `kubectl delete ns -l kube-burner-job`
372 | 
373 | ## Result
374 | 
375 | When using metrics collection, you can create a dashboard that will give you details about the test config and 
376 | cluster state during the test, including performance metrics. An example dashboard for `./openshift` profile is
377 | 
378 | ![image](openshift/grafana.png)
379 | 


--------------------------------------------------------------------------------
/kube-burner-workload/SCALE_PROFILES.md:
--------------------------------------------------------------------------------
 1 | ## NetworkPolicy variables
 2 | 
 3 | All variables that this framework has now may be presented as
 4 | 1. NetworkPolicy config
 5 |   - `LOCAL_PODS`
 6 |   - `SINGLE_PORTS`
 7 |   - `PORT_RANGES`
 8 |   - `POD_SELECTORS`
 9 |   - `PEER_NAMESPACES`
10 |   - `PEER_PODS`
11 |   - `CIDRS`
12 | 
13 | These parameters define a scale impact of a single NetworkPolicy
14 | 2. Namespace config and scale
15 |   - `PODS_PER_NAMESPACE`
16 |   - `INGRESS`
17 |   - `EGRESS`
18 |   - `NAMESPACES`
19 |   - `NETPOLS_PER_NAMESPACE`
20 | 
21 | These variables define a namespace config and may be used to find scalability limit.
22 | `PODS_PER_NAMESPACE` also serves as a restriction for some NetworkPolicy parameters (like `LOCAL_PODS`) but increases per-namespace
23 | workload at the same time. `NAMESPACES` parameter also limits potential values of `PEER_NAMESPACES`.
24 | 
25 | There are some extra test parameters composed of the env variables, where `I()` is an indicator function:
26 | - Number of network policies = `NAMESPACES * NETPOLS_PER_NAMESPACE * (I(INGRESS) + I(EGRESS))`
27 | - Number of used peer namespace selectors = `Number of network policies * POD_SELECTORS`
28 | - Number of different peer namespace selectors = `Binomial(NAMESPACES, PEER_NAMESPACES)`
29 | - % of used different peer selectors = `Number of used peer namespace selectors / Number of different peer namespace selectors`
30 | 
31 | When the last parameter is getting >= 100%, some peer namespace selectors will be repeated.
32 | 
33 | ## Scale testing
34 | 
35 | To find scalability limit for a cluster, we can iteratively increase the workload until the test fails (different
36 | clusters/platforms may have different definitions of failure). Considering we are trying to answer a question: 
37 | "How many network policies can I create?", we want the result to be a network policy count.
38 | 
39 | Therefore, the easiest way to do so, is to save all parameters values, expect for `NETPOLS_PER_NAMESPACE`.
40 | Then by increasing the `NETPOLS_PER_NAMESPACE` number, we leave everything else exactly the same.
41 | 
42 | You can copy a [helper spreadsheet](https://docs.google.com/spreadsheets/d/1Kq1w8c8Z_wlhBOb_EID2nhvmwEi8H6pSxvtpDcbf-1M/edit?usp=sharing) to track test results
43 | 
44 | ## Scale testing profiles
45 | 
46 | While this framework may be used to define a network policy config based on a specific customer's request,
47 | we also want to provide pre-defined scale testing results that will help customers understand what kind of
48 | workload can be handled.
49 | 
50 | To do so, we can create a set of scale testing profiles by defining all variable values. We will code them as
51 | `<LOCAL_PODS>-<SINGLE_PORTS>-<PORT_RANGES>-<POD_SELECTORS>-<PEER_NAMESPACES>-<PEER_PODS>-<CIDRS>`
52 | Here are some examples:
53 | 
54 | MINIMAL
55 | - CIDR-only                         (1-0-0-0-0-0-1)
56 | - port+range+CIDR                   (1-1-1-0-0-0-1)
57 | - pod-selector-only                 (1-0-0-1-3-1-0)
58 | - port+range+pod-selector           (1-1-1-1-3-1-0)
59 | - pod-selector+CIDR                 (1-1-0-0-3-1-1)
60 | - port+range+pod-selector+CIDR      (1-1-1-1-3-1-1)
61 | 
62 | MEDIUM
63 | - CIDR-only                         (10- 0- 0- 0- 0- 0-10)
64 | - port+range+CIDR                   (10-10-10- 0- 0- 0-10)
65 | - pod-selector-only                 (10- 0- 0-10-10-10- 0)
66 | - port+range+pod-selector           (10-10-10-10-10-10- 0)
67 | - pod-selector+CIDR                 (10- 0- 0-10-10-10-10)
68 | - port+range+pod-selector+CIDR      (10-10-10-10-10-10-10)
69 | 
70 | 
71 | ## Spreadsheet
72 | 
73 | To simplify results tracking, you can copy a [spreadsheet](https://docs.google.com/spreadsheets/d/1Kq1w8c8Z_wlhBOb_EID2nhvmwEi8H6pSxvtpDcbf-1M/edit#gid=16759354)
74 | that shows an example of increasing workload and finding the best result.
75 | 
76 | `export` sheets may be used with [yaml_analysis](../yaml-analysis) tools, check [README](../yaml-analysis/README.md) for more details.


--------------------------------------------------------------------------------
/kube-burner-workload/calico/README.md:
--------------------------------------------------------------------------------
 1 | ## Running
 2 | 
 3 | 1. This profile assumes you have a calico cluster, and the KUBECONFIG that can be used in the scale test.
 4 | 2. Set env variables with the test config in the `env` file
 5 | 
 6 |    2.1 Set env file variable PLATFORM=calico
 7 | 
 8 | 3. Set env variables in the `calico/env` file
 9 | 4. `source ./env`
10 | 5. Run the test: `kube-burner init -m ./calico/metrics.yml -c ./network-policy.yaml -u https://[prometheus url] --log-level=debug`
11 | 6. When the test finishes, metrics should be collected by the ES_SERVER
12 | 
13 | ## Finding the limit
14 | 
15 | To automate finding the limit, [test_limit.sh](./test_limit.sh) script may be used.
16 | It can run multiple iterations increasing the number of network policies until test fails.
17 | It waits for full cleanup after every iteration to ensure the cluster is ready for the next one.
18 | 
19 | ## Metrics and Dashboards
20 | 
21 | Metrics in this folder are calico-specific, but may be tweaked for other clusters, e.g. by changing
22 | filtered namespaces for `containerCPU` metrics.
23 | 
24 | `./grafana_dash.json` has the JSON model that defines the dashboard. It uses metrics defined in `./metrics.yml`
25 | and may be used as an example to define dashboard for other clusters.
26 | 


--------------------------------------------------------------------------------
/kube-burner-workload/calico/calico-dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/npinaeva/k8s-netpol-scale/3d1aabaf4511f27966b567ba8192f8cce6b52375/kube-burner-workload/calico/calico-dashboard.png


--------------------------------------------------------------------------------
/kube-burner-workload/calico/convergence_tracker.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: convergence-tracker-{{.Replica}}
 5 |   labels:
 6 |     app: convergence-tracker
 7 | spec:
 8 |   topologySpreadConstraints:
 9 |     - maxSkew: 1
10 |       topologyKey: kubernetes.io/hostname
11 |       whenUnsatisfiable: DoNotSchedule
12 |       labelSelector:
13 |         matchLabels:
14 |           app: convergence-tracker
15 |   restartPolicy: Never
16 |   hostNetwork: true
17 |   containers:
18 |     - name: tracker
19 |       # image built with the ./policy-tracker/Dockerfile
20 |       image: gcr.io/unique-caldron-775/netpol-benchmark/convergence:latest
21 |       securityContext:
22 |         privileged: true
23 |       command: [ "/bin/bash", "-c", "python policy-tracker.py"]
24 |       imagePullPolicy: Always
25 |       env:
26 |         - name: CONVERGENCE_PERIOD
27 |           value: "{{.convergence_period}}"
28 |         - name: CONVERGENCE_TIMEOUT
29 |           value: "{{.convergence_timeout}}"
30 |         - name: ES_SERVER
31 |           value: {{.es_server}}
32 |         - name: ES_INDEX_NETPOL
33 |           value: {{.es_index}}
34 |         - name: UUID
35 |           value: {{.UUID}}
36 |         - name: METADATA
37 |           value: "{{.metadata}}"
38 |         - name: MY_NODE_NAME
39 |           valueFrom:
40 |             fieldRef:
41 |               fieldPath: spec.nodeName
42 | 


--------------------------------------------------------------------------------
/kube-burner-workload/calico/env:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -a
3 | ES_SERVER=http://localhost:9200
4 | ES_INDEX=calico-benchmark
5 | set +a
6 | 


--------------------------------------------------------------------------------
/kube-burner-workload/calico/metrics.yml:
--------------------------------------------------------------------------------
 1 | # API server
 2 | 
 3 | - query: irate(apiserver_request_total{verb="POST", resource="pods", subresource="binding",code="201"}[2m]) > 0
 4 |   metricName: schedulingThroughput
 5 | 
 6 | - query: histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"LIST|GET", subresource!~"log|exec|portforward|attach|proxy"}[2m])) by (le, resource, verb, scope)) > 0
 7 |   metricName: readOnlyAPICallsLatency
 8 | 
 9 | - query: histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"POST|PUT|DELETE|PATCH", subresource!~"log|exec|portforward|attach|proxy"}[2m])) by (le, resource, verb, scope)) > 0
10 |   metricName: mutatingAPICallsLatency
11 | 
12 | - query: sum(irate(apiserver_request_total{apiserver="kube-apiserver",verb!="WATCH"}[2m])) by (verb,resource,code) > 0
13 |   metricName: APIRequestRate
14 | 
15 | # Containers & pod metrics
16 | - query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace=~"kube-system|calico-system"}[2m]) * 100) by (container, pod, namespace, node)) > 0
17 |   metricName: containerCPU
18 | 
19 | - query: (sum(container_memory_rss{name!="",container!="POD",namespace=~"kube-system|calico-system"}) by (container, pod, namespace, node)) > 0
20 |   metricName: containerMemory
21 | 
22 | # Cluster metrics
23 | 
24 | - query: max_over_time( count(kube_pod_labels{label_kube_burner_job="network-policy-perf"})[{{ .elapsed }}:] )
25 |   metricName: podCount
26 | 
27 | - query: max_over_time( count(kube_namespace_labels{label_kube_burner_job="network-policy-perf"})[{{ .elapsed }}:] )
28 |   metricName: namespaceCount
29 | 
30 | - query: max_over_time( count(kube_networkpolicy_labels{networkpolicy=~"ingress.*"})[{{ .elapsed }}:] )
31 |   metricName: netpolIngressCount
32 | 
33 | - query: max_over_time( count(kube_networkpolicy_labels{networkpolicy=~"egress.*"})[{{ .elapsed }}:] )
34 |   metricName: netpolEgressCount
35 | 
36 | - query: kube_node_role
37 |   metricName: nodeRoles
38 | 
39 | - query: sum(kube_node_status_condition{status="true"}) by (condition)
40 |   metricName: nodeStatus
41 | 
42 | - query: kubernetes_build_info
43 |   metricName: k8sVersion
44 |   instant: true
45 | 
46 | # Calico metrics
47 | 
48 | - query: max_over_time( count(felix_cluster_num_hosts)[{{ .elapsed }}:] )
49 |   metricName: felix_cluster_num_hosts
50 | 
51 | - query: felix_active_local_endpoints
52 |   metricName: felix_active_local_endpoints
53 | 
54 | - query: felix_active_local_policies
55 |   metricName: felix_active_local_policies
56 | 
57 | - query: felix_active_local_selectors
58 |   metricName: felix_active_local_selectors
59 | 
60 | - query: felix_label_index_num_endpoints
61 |   metricName: felix_label_index_num_endpoints
62 | 
63 | - query: felix_label_index_num_active_selectors{optimized="true"}
64 |   metricName: felix_label_index_num_active_selectors_optimized
65 | 
66 | - query: felix_label_index_num_active_selectors{optimized="false"}
67 |   metricName: felix_label_index_num_active_selectors_not_optimized
68 | 


--------------------------------------------------------------------------------
/kube-burner-workload/calico/monitoring.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | apiVersion: v1
  3 | kind: Namespace
  4 | metadata:
  5 |   name: calico-monitoring
  6 | ---
  7 | apiVersion: v1
  8 | kind: Service
  9 | metadata:
 10 |   name: prometheus-service
 11 |   namespace: calico-monitoring
 12 |   annotations:
 13 |       prometheus.io/scrape: 'true'
 14 |       prometheus.io/port:   '9090'
 15 | spec:
 16 |   selector: 
 17 |     app: prometheus-server
 18 |   type: NodePort
 19 |   ports:
 20 |     - port: 8080
 21 |       targetPort: 9090 
 22 | ---
 23 | apiVersion: rbac.authorization.k8s.io/v1
 24 | kind: ClusterRole
 25 | metadata:
 26 |   name: prometheus
 27 | rules:
 28 | - apiGroups: [""]
 29 |   resources:
 30 |   - nodes
 31 |   - nodes/proxy
 32 |   - services
 33 |   - endpoints
 34 |   - pods
 35 |   verbs: ["get", "list", "watch"]
 36 | - apiGroups:
 37 |   - extensions
 38 |   resources:
 39 |   - ingresses
 40 |   verbs: ["get", "list", "watch"]
 41 | - nonResourceURLs: ["/metrics"]
 42 |   verbs: ["get"]
 43 | ---
 44 | apiVersion: rbac.authorization.k8s.io/v1
 45 | kind: ClusterRoleBinding
 46 | metadata:
 47 |   name: prometheus
 48 | roleRef:
 49 |   apiGroup: rbac.authorization.k8s.io
 50 |   kind: ClusterRole
 51 |   name: prometheus
 52 | subjects:
 53 | - kind: ServiceAccount
 54 |   name: default
 55 |   namespace: calico-monitoring
 56 | ---
 57 | apiVersion: v1
 58 | kind: ConfigMap
 59 | metadata:
 60 |   name: prometheus-server-conf
 61 |   labels:
 62 |     name: prometheus-server-conf
 63 |   namespace: calico-monitoring
 64 | data:
 65 |   prometheus.yml: |-
 66 |     global:
 67 |       scrape_interval: 5s
 68 |       evaluation_interval: 5s
 69 |     scrape_configs:
 70 |       - job_name: 'kubernetes-apiservers'
 71 |         kubernetes_sd_configs:
 72 |         - role: endpoints
 73 |         scheme: https
 74 |         tls_config:
 75 |           ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
 76 |           insecure_skip_verify: true
 77 |         bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
 78 |         relabel_configs:
 79 |         - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
 80 |           action: keep
 81 |           regex: default;kubernetes;https
 82 | 
 83 |       - job_name: 'kubernetes-controller-manager'
 84 |         honor_labels: true
 85 |         scheme: https
 86 |         tls_config:
 87 |           ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
 88 |           insecure_skip_verify: true
 89 |         bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
 90 |         static_configs:
 91 |           - targets:
 92 |             - 127.0.0.1:10257
 93 | 
 94 |       - job_name: 'kubernetes-nodes'
 95 |         scheme: https
 96 |         tls_config:
 97 |           ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
 98 |           insecure_skip_verify: true
 99 |         bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
100 |         kubernetes_sd_configs:
101 |         - role: node
102 |         relabel_configs:
103 |         - action: labelmap
104 |           regex: __meta_kubernetes_node_label_(.+)
105 |         - target_label: __address__
106 |           replacement: localhost:6443
107 |         - source_labels: [__meta_kubernetes_node_name]
108 |           regex: (.+)
109 |           target_label: __metrics_path__
110 |           replacement: /api/v1/nodes/${1}/proxy/metrics
111 | 
112 |       - job_name: 'calico-nodes'
113 |         scheme: http
114 |         kubernetes_sd_configs:
115 |         - role: node
116 |         relabel_configs:
117 |         - action: labelmap
118 |           regex: __meta_kubernetes_node_label_(.+)
119 |         - source_labels: [__meta_kubernetes_node_address_InternalIP]
120 |           target_label: __address__
121 |           replacement: $1:9091
122 |         - source_labels: [__meta_kubernetes_node_name]
123 |           regex: (.+)
124 |           target_label: __metrics_path__
125 |           replacement: /metrics
126 | 
127 |       - job_name: 'kubernetes-cadvisor'
128 |         scheme: https
129 |         tls_config:
130 |           ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
131 |           insecure_skip_verify: true
132 |         bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
133 |         kubernetes_sd_configs:
134 |         - role: node
135 |         relabel_configs:
136 |         - action: labelmap
137 |           regex: __meta_kubernetes_node_label_(.+)
138 |         - target_label: __address__
139 |           replacement: localhost:6443
140 |         - source_labels: [__meta_kubernetes_node_name]
141 |           regex: (.+)
142 |           target_label: __metrics_path__
143 |           replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
144 |           
145 |       - job_name: 'kube-state-metrics'
146 |         honor_timestamps: true
147 |         scrape_interval: 1m
148 |         scrape_timeout: 1m
149 |         metrics_path: /metrics
150 |         scheme: http
151 |         static_configs:
152 |         - targets:
153 |           - kube-state-metrics.kube-system.svc.cluster.local:8080
154 |        
155 | ---          
156 | apiVersion: v1
157 | kind: Pod
158 | metadata:
159 |   name: prometheus
160 |   namespace: calico-monitoring
161 |   labels:
162 |     app: prometheus-server
163 | spec:
164 |   hostNetwork: true
165 |   nodeSelector:
166 |     node-role.kubernetes.io/control-plane: ""
167 |   tolerations:
168 |   - key: CriticalAddonsOnly
169 |     operator: Exists
170 |   - effect: NoSchedule
171 |     key: node-role.kubernetes.io/master
172 |   - effect: NoSchedule
173 |     key: node-role.kubernetes.io/control-plane
174 |   containers:
175 |     - name: prometheus
176 |       image: prom/prometheus:latest
177 |       args:
178 |         - "--config.file=/etc/prometheus/prometheus.yml"
179 |         - "--storage.tsdb.path=/prometheus/"
180 |         - "--web.enable-admin-api"
181 |       ports:
182 |         - containerPort: 9090
183 |       volumeMounts:
184 |         - name: prometheus-config-volume
185 |           mountPath: /etc/prometheus/
186 |         - name: prometheus-storage-volume
187 |           mountPath: /prometheus/
188 |   volumes:
189 |     - name: prometheus-config-volume
190 |       configMap:
191 |         defaultMode: 420
192 |         name: prometheus-server-conf
193 |     - name: prometheus-storage-volume
194 |       emptyDir: {}
195 | 


--------------------------------------------------------------------------------
/kube-burner-workload/calico/policy-tracker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:slim
2 | RUN apt update && \
3 |       apt install -y curl iptables ipset
4 | COPY policy-tracker.py policy-tracker.py
5 | COPY ./requirements.txt requirements.txt
6 | RUN pip install -r requirements.txt
7 | 


--------------------------------------------------------------------------------
/kube-burner-workload/calico/policy-tracker/policy-tracker.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import logging
  3 | import os
  4 | import ssl
  5 | import sys
  6 | import time
  7 | import subprocess
  8 | 
  9 | from opensearchpy import OpenSearch
 10 | 
 11 | 
 12 | def index_result(payload, retry_count=30):
 13 |     logging.info(
 14 |         f"Sending metric to es server {es_server} with index {es_index}\n{payload}"
 15 |     )
 16 |     while retry_count > 0:
 17 |         try:
 18 |             ssl_ctx = ssl.create_default_context()
 19 |             ssl_ctx.check_hostname = False
 20 |             ssl_ctx.verify_mode = ssl.CERT_NONE
 21 |             es = OpenSearch([es_server])
 22 |             es.index(index=es_index, body=payload)
 23 |             retry_count = 0
 24 |         except Exception as e:
 25 |             logging.info("Failed Indexing", e)
 26 |             logging.info("Retrying to index...")
 27 |             retry_count -= 1
 28 | 
 29 | 
 30 | def get_number_of_filter_rules():
 31 |     result = get_iptables_rules("filter")
 32 |     return result.count("\n")
 33 | 
 34 | 
 35 | def get_number_of_raw_rules():
 36 |     result = get_iptables_rules("raw")
 37 |     return result.count("\n")
 38 | 
 39 | 
 40 | def get_iptables_rules(table="filter"):
 41 |     try:
 42 |         output = subprocess.run(
 43 |             ["iptables-legacy", "--list-rules", "-t", table],
 44 |             capture_output=True,
 45 |             text=True,
 46 |         )
 47 |         return output.stdout
 48 |     except Exception as e:
 49 |         logging.error(f"Failed getting iptables rules in table {table}: {e}")
 50 |         return ""
 51 | 
 52 | 
 53 | def get_ipsets_len():
 54 |     result = get_all_ipsets()
 55 |     return result.count("\n")
 56 | 
 57 | 
 58 | def get_all_ipsets():
 59 |     try:
 60 |         output = subprocess.run(
 61 |             ["ipset", "list"],
 62 |             capture_output=True,
 63 |             text=True,
 64 |         )
 65 |         return output.stdout
 66 |     except Exception as e:
 67 |         logging.error(f"Failed listing ipsets: {e}")
 68 |         return ""
 69 | 
 70 | 
 71 | # poll_interval in seconds, float
 72 | # convergence_period in seconds, for how long number of flows shouldn't change to consider it stable
 73 | # convergence_timeout in seconds, for how long number to wait for stabilisation before timing out
 74 | def wait_for_rules_to_stabilize(
 75 |     poll_interval, convergence_period, convergence_timeout, node_name
 76 | ):
 77 |     timeout = convergence_timeout + convergence_period
 78 |     start = time.time()
 79 |     last_changed = time.time()
 80 |     filter_rules_num = get_number_of_filter_rules()
 81 |     raw_rules_num = get_number_of_raw_rules()
 82 |     changed = False
 83 |     ipsets_len = get_ipsets_len()
 84 |     while time.time() - last_changed < convergence_period:
 85 |         if time.time() - start >= timeout:
 86 |             logging.info(f"TIMEOUT: {node_name} {timeout} seconds passed")
 87 |             return 1
 88 | 
 89 |         new_raw_rules_num = get_number_of_raw_rules()
 90 |         if new_raw_rules_num != raw_rules_num:
 91 |             raw_rules_num = new_raw_rules_num
 92 |             last_changed = time.time()
 93 |             changed = True
 94 |             logging.info(f"{node_name}: iptables raw table rules={raw_rules_num}")
 95 | 
 96 |         new_filter_rules_num = get_number_of_filter_rules()
 97 |         if new_filter_rules_num != filter_rules_num:
 98 |             filter_rules_num = new_filter_rules_num
 99 |             last_changed = time.time()
100 |             changed = True
101 |             logging.info(f"{node_name}: iptables filter table rules={filter_rules_num}")
102 | 
103 |         new_ipsets_len = get_ipsets_len()
104 |         if new_ipsets_len != ipsets_len:
105 |             ipsets_len = new_ipsets_len
106 |             last_changed = time.time()
107 |             changed = True
108 |             logging.info(f"{node_name}: length of ipset list={ipsets_len}")
109 | 
110 |         if changed:
111 |             doc = {
112 |                 "metricName": "convergence_tracker",
113 |                 "timestamp": datetime.datetime.now(datetime.UTC),
114 |                 "workload": "network-policy-perf",
115 |                 "uuid": uuid,
116 |                 "source_name": node_name,
117 |                 "convergence_timestamp": datetime.datetime.fromtimestamp(last_changed),
118 |                 "iptables_filter_rules": filter_rules_num,
119 |                 "iptables_raw_rules": raw_rules_num,
120 |                 "ipsets_list_len": ipsets_len,
121 |             }
122 |             index_result(doc)
123 |             changed = False
124 | 
125 |         time.sleep(poll_interval)
126 | 
127 |     stabilize_datetime = datetime.datetime.fromtimestamp(last_changed)
128 |     logging.info(
129 |         f"RESULT: time={stabilize_datetime.isoformat(sep=' ', timespec='milliseconds')} {node_name} "
130 |         f"finished with {filter_rules_num} rules in filter table, and {raw_rules_num} rules in raw table "
131 |         f"and with {ipsets_len} lines in ipset list."
132 |     )
133 |     doc = {
134 |         "metricName": "convergence_tracker",
135 |         "timestamp": datetime.datetime.now(datetime.UTC),
136 |         "workload": "network-policy-perf",
137 |         "uuid": uuid,
138 |         "source_name": node_name,
139 |         "convergence_timestamp": datetime.datetime.fromtimestamp(last_changed),
140 |         "iptables_filter_rules": filter_rules_num,
141 |         "iptables_raw_rules": raw_rules_num,
142 |         "ipsets_list_len": ipsets_len,
143 |     }
144 |     index_result(doc)
145 |     return 0
146 | 
147 | 
148 | def main():
149 |     global es_server, es_index, start_time, uuid
150 |     es_server = os.getenv("ES_SERVER")
151 |     es_index = os.getenv("ES_INDEX_NETPOL")
152 |     node_name = os.getenv("MY_NODE_NAME")
153 |     uuid = os.getenv("UUID")
154 |     convergence_period = int(os.getenv("CONVERGENCE_PERIOD"))
155 |     convergence_timeout = int(os.getenv("CONVERGENCE_TIMEOUT"))
156 |     start_time = datetime.datetime.now()
157 | 
158 |     logging.basicConfig(
159 |         format="%(asctime)s %(levelname)-8s %(message)s",
160 |         level=logging.INFO,
161 |         datefmt="%Y-%m-%d %H:%M:%S",
162 |     )
163 |     doc = {
164 |         "metricName": "convergence_tracker_info",
165 |         "timestamp": datetime.datetime.now(datetime.UTC),
166 |         "workload": "network-policy-perf",
167 |         "uuid": uuid,
168 |         "source_name": node_name,
169 |         "convergence_period": convergence_period,
170 |         "convergence_timeout": convergence_timeout,
171 |         "test_metadata": os.getenv("METADATA"),
172 |     }
173 |     index_result(doc)
174 | 
175 |     logging.info(
176 |         f"Start calico-tracker {node_name}, convergence_period {convergence_period}, convergence timeout {convergence_timeout}"
177 |     )
178 |     timeout = wait_for_rules_to_stabilize(
179 |         10, convergence_period, convergence_timeout, node_name
180 |     )
181 |     sys.exit(timeout)
182 | 
183 | 
184 | if __name__ == "__main__":
185 |     main()
186 | 


--------------------------------------------------------------------------------
/kube-burner-workload/calico/policy-tracker/requirements.txt:
--------------------------------------------------------------------------------
1 | datetime
2 | requests
3 | kubernetes
4 | opensearch-py
5 | 


--------------------------------------------------------------------------------
/kube-burner-workload/calico/test_limit.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | check_and_wait () {
 4 |   pause=30
 5 |   echo "============================================================"
 6 |   echo "> Iteration with $NETPOLS_PER_NAMESPACE network policies per ns finished. Status: $status"
 7 |   if [ "$status" -ne "$expectedStatus" ]; then
 8 |     echo "> Test failed. Exiting..." 
 9 |     exit 0
10 |   fi
11 |   echo "> Test passed. Waiting for $pause seconds for next iteration."
12 |   sleep $pause
13 | }
14 | 
15 | find_prometheus() {
16 |   prometheus_port=$(kubectl get svc prometheus-service -n calico-monitoring -ojsonpath="{.spec.ports[0].nodePort}")
17 |   prometheus_addr=$(kubectl get node -ojsonpath="{.items[0].status.addresses[0].address}")
18 |   prometheus_url="http://$prometheus_addr:$prometheus_port"
19 |   echo "> Promtheus URL=$prometheus_url"
20 | }
21 | 
22 | 
23 | cd ..
24 | source ./env
25 | kubectl apply -f "$PLATFORM/monitoring.yaml"
26 | kubectl patch felixconfiguration default --type='merge' -p '{"spec":{"prometheusMetricsEnabled":true}}'
27 | sleep 10
28 | 
29 | NETPOLS_PER_NAMESPACE=0
30 | STEP=100
31 | expectedStatus=0
32 | status=$expectedStatus
33 | find_prometheus
34 | 
35 | while true; do
36 |   NETPOLS_PER_NAMESPACE=$((NETPOLS_PER_NAMESPACE + STEP))
37 |   echo "> Starting iteration with $NETPOLS_PER_NAMESPACE network policies per ns."
38 |   echo "============================================================"
39 |   kube-burner init -m "$PLATFORM/metrics.yml" -c ./network-policy.yaml -u "$prometheus_url"
40 |   status=$?
41 |   check_and_wait
42 | done
43 | 


--------------------------------------------------------------------------------
/kube-burner-workload/convergence_waiter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | TIME_SPENT=0
 4 | TIMEOUT=$((CONVERGENCE_TIMEOUT + CONVERGENCE_PERIOD))
 5 | while [ $TIME_SPENT -le "$TIMEOUT" ]; do
 6 |   FAILED_COUNT=$(kubectl get pods -n convergence-tracker-0 --field-selector status.phase=Failed -o name | wc -l)
 7 |   if [ "$FAILED_COUNT" -ne 0 ]; then
 8 |     echo "ERROR: convergence tracker pod reported failure"
 9 |     kubectl get pods -n convergence-tracker-0 --field-selector status.phase=Failed -o name
10 |     exit 1
11 |   fi
12 |   RUNNING_COUNT=$(kubectl get pods -n convergence-tracker-0 --field-selector status.phase!=Succeeded -o name | wc -l)
13 |   if [ "$RUNNING_COUNT" -eq 0 ]; then
14 |     echo "DONE"
15 |     exit 0
16 |   fi
17 |   sleep 30
18 |   TIME_SPENT=$((TIME_SPENT + 30))
19 | done
20 | exit 1
21 | 


--------------------------------------------------------------------------------
/kube-burner-workload/egress-np.yml:
--------------------------------------------------------------------------------
 1 | {{- $podNum := add .pods_per_namespace 1 }}
 2 | {{- $podNum = sub $podNum .peer_pods }}
 3 | {{- $podDict := dict (toString $podNum) "true"}}
 4 | {{- $podLabel := toJson $podDict }}
 5 | {{- $localPodNum := add .pods_per_namespace 1 }}
 6 | {{- $localPodNum = sub $localPodNum .local_pods }}
 7 | {{- $localPodDict := dict (toString $localPodNum) "true"}}
 8 | {{- $localPodLabel := toJson $localPodDict }}
 9 | {{- $binomial := Binomial $.namespaces $.peer_namespaces }}
10 | kind: NetworkPolicy
11 | apiVersion: networking.k8s.io/v1
12 | metadata:
13 |   name: egress-{{.Replica}}
14 | spec:
15 |   podSelector:
16 |     matchLabels: {{$localPodLabel}}
17 |   egress:
18 |   {{- $startIdx := mul $.Iteration .pod_selectors .netpols_per_namespace }}
19 |   {{- $nsShift := mul (sub $.Replica 1) .pod_selectors }}
20 |   {{- $startIdx = add $startIdx $nsShift -1 }}
21 |   {{- range $i, $e := until .pod_selectors }}
22 |     {{- $startIdx = add $startIdx 1 }}
23 |     {{- if ge $startIdx $binomial }}
24 |       {{- $startIdx = mod $startIdx $binomial }}
25 |     {{- end }}
26 |     {{- $nsIdxList := IndexToCombination nil (int $startIdx) $.namespaces $.peer_namespaces }}
27 |     {{- $nsList := list }}
28 |     {{- range $i, $nextNs := $nsIdxList }}
29 |       {{- $next_namespace := print "network-policy-perf-" (add $nextNs 1) }}
30 |       {{- $nsList = append $nsList $next_namespace }}
31 |     {{- end }}
32 |     {{- $nsNames := toJson $nsList }}
33 |   - to:
34 |     - podSelector:
35 |         matchLabels: {{$podLabel}}
36 |       namespaceSelector:
37 |         matchExpressions:
38 |           - key: kubernetes.io/metadata.name
39 |             operator: In
40 |             values: {{$nsNames}}
41 |     ports:
42 |     {{- $single_port := 1000 }}
43 |     {{- range $i, $e := until $.single_ports }}
44 |       {{- $single_port = add $single_port 1 }}
45 |       - protocol: TCP
46 |         port: {{$single_port}}
47 |     {{- end }}
48 |     {{- $rangeStart := 5000 }}
49 |     {{- range $i, $e := until $.port_ranges }}
50 |       {{- $rangeEnd := add $rangeStart 5 }}
51 |       - protocol: TCP
52 |         port: {{$rangeStart}}
53 |         endPort: {{$rangeEnd}}
54 |       {{ $rangeStart = add $rangeStart 10}}
55 |     {{- end }}
56 |   {{- end }}
57 |   {{- if gt .cidr_rules 0 }}
58 |     {{- $subnetIdx := add (mul $.Replica $.cidr_rules) 1 }}
59 |     {{- range $i, $e := until .cidr_rules }}
60 |   - to:
61 |     - ipBlock:
62 |         cidr: {{GetSubnet24 (int $subnetIdx) }}
63 |     ports:
64 |       {{- $single_port := 1000 }}
65 |       {{- range $i, $e := until $.single_ports }}
66 |         {{- $single_port = add $single_port 1 }}
67 |       - protocol: TCP
68 |         port: {{$single_port}}
69 |       {{- end }}
70 |       {{- $rangeStart := 5000 }}
71 |       {{- range $i, $e := until $.port_ranges }}
72 |         {{- $rangeEnd := add $rangeStart 5 }}
73 |       - protocol: TCP
74 |         port: {{$rangeStart}}
75 |         endPort: {{$rangeEnd}}
76 |         {{ $rangeStart = add $rangeStart 10}}
77 |       {{- end }}
78 |     {{- $subnetIdx = add $subnetIdx 1 }}
79 |     {{- end }}
80 |   {{- end }}
81 |   policyTypes:
82 |     - Egress
83 | 


--------------------------------------------------------------------------------
/kube-burner-workload/env:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -a
 3 | # minimal example
 4 | NAMESPACES=1
 5 | PODS_PER_NAMESPACE=1
 6 | NETPOLS_PER_NAMESPACE=1
 7 | 
 8 | # netpol config
 9 | INGRESS=true
10 | EGRESS=false
11 | LOCAL_PODS=1
12 | SINGLE_PORTS=0
13 | PORT_RANGES=0
14 | POD_SELECTORS=0
15 | PEER_NAMESPACES=0
16 | PEER_PODS=0
17 | CIDRS=1
18 | 
19 | # set kubeconfig
20 | KUBECONFIG=
21 | 
22 | # PLATFORM is one of the folders under network-policy workload
23 | PLATFORM=kind-metrics
24 | # Convergence tracker settings
25 | CONVERGENCE_TRACKER=
26 | # CONVERGENCE_PERIOD and CONVERGENCE_TIMEOUT are convergence tracker parameters.
27 | # CONVERGENCE_PERIOD specifies for how long the system should be stable to be considered converged and
28 | # CONVERGENCE_TIMEOUT is a timer specifying the hard deadline for policy convergence.
29 | # A test failure will be reported by convergence tracker in CONVERGENCE_TIMEOUT + CONVERGENCE_PERIOD seconds.
30 | CONVERGENCE_PERIOD=60
31 | CONVERGENCE_TIMEOUT=3600
32 | 
33 | # Number of nodes to run convergence tracker. Doesn't have effect if CONVERGENCE_TRACKER is false
34 | NODES_COUNT=3
35 | 
36 | # JOB_PAUSE defines for how long he workload won't be deleted after the test is done
37 | # default behaviour is to wait for 5 minutes after job completion to see how the system
38 | # behaves some time after all work is done
39 | JOB_PAUSE=5m
40 | # to debug, use longer interval
41 | #JOB_PAUSE=1h
42 | 
43 | # variables that should be filled by platform or stay empty
44 | JOB_NAMESPACE_LABELS=
45 | ES_SERVER=
46 | ES_INDEX=
47 | 
48 | if [[ ! -z $PLATFORM ]]; then
49 |   if test -f $PLATFORM/env; then
50 |     source $PLATFORM/env
51 |   fi
52 | fi
53 | set +a
54 | 


--------------------------------------------------------------------------------
/kube-burner-workload/ingress-np.yml:
--------------------------------------------------------------------------------
 1 | {{- $podNum := add .pods_per_namespace 1 }}
 2 | {{- $podNum = sub $podNum .peer_pods }}
 3 | {{- $podDict := dict (toString $podNum) "true"}}
 4 | {{- $podLabel := toJson $podDict }}
 5 | {{- $localPodNum := add .pods_per_namespace 1 }}
 6 | {{- $localPodNum = sub $localPodNum .local_pods }}
 7 | {{- $localPodDict := dict (toString $localPodNum) "true"}}
 8 | {{- $localPodLabel := toJson $localPodDict }}
 9 | {{- $binomial := Binomial $.namespaces $.peer_namespaces }}
10 | kind: NetworkPolicy
11 | apiVersion: networking.k8s.io/v1
12 | metadata:
13 |   name: ingress-{{.Replica}}
14 | spec:
15 |   podSelector:
16 |     matchLabels: {{$localPodLabel}}
17 |   ingress:
18 |   {{- $startIdx := mul $.Iteration .pod_selectors .netpols_per_namespace }}
19 |   {{- $nsShift := mul (sub $.Replica 1) .pod_selectors }}
20 |   {{- $startIdx = add $startIdx $nsShift -1 }}
21 |   {{- range $i, $e := until .pod_selectors }}
22 |     {{- $startIdx = add $startIdx 1 }}
23 |     {{- if ge $startIdx $binomial }}
24 |       {{- $startIdx = mod $startIdx $binomial }}
25 |     {{- end }}
26 |     {{- $nsIdxList := IndexToCombination nil (int $startIdx) $.namespaces $.peer_namespaces }}
27 |     {{- $nsList := list }}
28 |     {{- range $i, $nextNs := $nsIdxList }}
29 |       {{- $next_namespace := print "network-policy-perf-" (add $nextNs 1) }}
30 |       {{- $nsList = append $nsList $next_namespace }}
31 |     {{- end }}
32 |     {{- $nsNames := toJson $nsList }}
33 |   - from:
34 |     - podSelector:
35 |         matchLabels: {{$podLabel}}
36 |       namespaceSelector:
37 |         matchExpressions:
38 |           - key: kubernetes.io/metadata.name
39 |             operator: In
40 |             values: {{$nsNames}}
41 |     ports:
42 |     {{- $single_port := 1000 }}
43 |     {{- range $i, $e := until $.single_ports }}
44 |       {{- $single_port = add $single_port 1 }}
45 |       - protocol: TCP
46 |         port: {{$single_port}}
47 |     {{- end }}
48 |     {{- $rangeStart := 5000 }}
49 |     {{- range $i, $e := until $.port_ranges }}
50 |       {{- $rangeEnd := add $rangeStart 5 }}
51 |       - protocol: TCP
52 |         port: {{$rangeStart}}
53 |         endPort: {{$rangeEnd}}
54 |       {{ $rangeStart = add $rangeStart 10}}
55 |     {{- end }}
56 |   {{- end }}
57 |   {{- if gt .cidr_rules 0 }}
58 |     {{- $subnetIdx := add (mul $.Replica $.cidr_rules) 1 }}
59 |     {{- range $i, $e := until .cidr_rules }}
60 |   - from:
61 |     - ipBlock:
62 |         cidr: {{GetSubnet24 (int $subnetIdx) }}
63 |     ports:
64 |       {{- $single_port := 1000 }}
65 |       {{- range $i, $e := until $.single_ports }}
66 |         {{- $single_port = add $single_port 1 }}
67 |       - protocol: TCP
68 |         port: {{$single_port}}
69 |       {{- end }}
70 |       {{- $rangeStart := 5000 }}
71 |       {{- range $i, $e := until $.port_ranges }}
72 |         {{- $rangeEnd := add $rangeStart 5 }}
73 |       - protocol: TCP
74 |         port: {{$rangeStart}}
75 |         endPort: {{$rangeEnd}}
76 |         {{ $rangeStart = add $rangeStart 10}}
77 |       {{- end }}
78 |     {{- $subnetIdx = add $subnetIdx 1 }}
79 |     {{- end }}
80 |   {{- end }}
81 |   policyTypes:
82 |     - Ingress
83 | 


--------------------------------------------------------------------------------
/kube-burner-workload/kind-metrics/README.md:
--------------------------------------------------------------------------------
 1 | This folder helps you enable metric collection for scale tests.
 2 | It consists of the following steps:
 3 | 1. Install Prometheus in a KinD cluster
 4 | 2. Run Elasticsearch locally with docker
 5 | 3. Run Grafana locally with docker
 6 | 4. Run kube-burner with metrics collection
 7 | 5. Configure Grafana dashboard to collect data from Elasticsearch
 8 | 
 9 | 
10 | You may have some of the mentioned steps already done, then just replace IPs and ports in the following steps.
11 | 
12 | 1. Install Prometheus in a KinD cluster
13 | `kubectl apply -f monitoring.yaml`
14 | This command will create a monitoring namespace, Prometheus pod and a NodePort service.
15 | You can check Prometheus interface at <prometehus pod's node ip>:<svc port>
16 | 
17 | 2,3. Run Elasticsearch and Grafana locally with docker
18 | 
19 | To ensure collected data outlives the KinD cluster, we run Elasticsearch and Grafana as external containers.
20 | 
21 | ```shell
22 | docker run -d --name=elasticsearch -p 9200:9200 -e "discovery.type=single-node" -e "xpack.security.enabled=false" docker.elastic.co/elasticsearch/elasticsearch:8.12.1
23 | docker run -d --name=grafana -p 3000:3000 grafana/grafana:latest
24 | ```
25 | 
26 | To get docker container IP, use
27 | `docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' <container ID>`
28 | 
29 | Grafana should be running at localhost:3000, credentials are admin/admin.
30 | 
31 | 4. Run kube-burner with metrics collection
32 | 
33 | ```shell
34 | cd ./kube-burner-workload
35 | source ./env
36 | kube-burner init -m ./kind-metrics/metrics.yml -c ./network-policy.yaml -u http://<prometehus pod's node ip>:<svc port>
37 | ```
38 | 
39 | Wait for kube-burner to finish (takes around 6 minutes, waiting time is configured with JOB_PAUSE).
40 | 
41 | 5. Configure Grafana dashboard to collect data from Elasticsearch
42 | 
43 | - Log into Grafana (see step 3), go to configure Data Sources (Menu > Connections > Data sources)
44 | - Add data source of type elasticsearch
45 | - Configure
46 | 
47 |   URL = `http://admin:admin@<elasticsearch container ip>:9200`\
48 |   No Authentication\
49 |   Elasticsearch details >
50 |   - Index name = `kube-burner`
51 |   - Time field name = `timestamp`
52 | 
53 | - Click `Save & test`, expect "Data source successfully connected."
54 | - Create a dashboard, Menu > Dashboards > New > Import Dashboard
55 | - Import json form [./grafana_dash.json](./grafana_dash.json)
56 | 
57 | You should see something like ![image](./grafana.png)
58 | If not, try to click on UUID dropdown and see if there is something to select.
59 | 
60 | 6. More metrics may be added with e.g. https://github.com/kubernetes/kube-state-metrics
61 | 


--------------------------------------------------------------------------------
/kube-burner-workload/kind-metrics/env:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -a
3 | ES_SERVER=http://admin:admin@localhost:9200
4 | ES_INDEX=kube-burner
5 | set +a
6 | 


--------------------------------------------------------------------------------
/kube-burner-workload/kind-metrics/grafana.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/npinaeva/k8s-netpol-scale/3d1aabaf4511f27966b567ba8192f8cce6b52375/kube-burner-workload/kind-metrics/grafana.png


--------------------------------------------------------------------------------
/kube-burner-workload/kind-metrics/metrics.yml:
--------------------------------------------------------------------------------
 1 | # API server
 2 | 
 3 | - query: irate(apiserver_request_total{verb="POST", resource="pods", subresource="binding",code="201"}[2m]) > 0
 4 |   metricName: schedulingThroughput
 5 | 
 6 | - query: histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"LIST|GET", subresource!~"log|exec|portforward|attach|proxy"}[2m])) by (le, resource, verb, scope)) > 0
 7 |   metricName: readOnlyAPICallsLatency
 8 | 
 9 | - query: histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"POST|PUT|DELETE|PATCH", subresource!~"log|exec|portforward|attach|proxy"}[2m])) by (le, resource, verb, scope)) > 0
10 |   metricName: mutatingAPICallsLatency
11 | 
12 | - query: sum(irate(apiserver_request_total{apiserver="kube-apiserver",verb!="WATCH"}[2m])) by (verb,resource,code) > 0
13 |   metricName: APIRequestRate
14 | 
15 | # Containers & pod metrics
16 | - query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace="kube-system"}[2m]) * 100) by (container, pod, namespace, node)) > 0
17 |   metricName: containerCPU
18 | 
19 | - query: (sum(container_memory_rss{name!="",container!="POD",namespace="kube-system"}) by (container, pod, namespace, node)) > 0
20 |   metricName: containerMemory
21 | 
22 | # Cluster metrics
23 | 
24 | - query: kubernetes_build_info
25 |   metricName: k8sVersion
26 |   instant: true
27 | 


--------------------------------------------------------------------------------
/kube-burner-workload/kind-metrics/monitoring.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | apiVersion: v1
  3 | kind: Namespace
  4 | metadata:
  5 |   name: monitoring
  6 | ---
  7 | apiVersion: v1
  8 | kind: Service
  9 | metadata:
 10 |   name: prometheus-service
 11 |   namespace: monitoring
 12 |   annotations:
 13 |       prometheus.io/scrape: 'true'
 14 |       prometheus.io/port:   '9090'
 15 | spec:
 16 |   selector: 
 17 |     app: prometheus-server
 18 |   type: NodePort
 19 |   ports:
 20 |     - port: 8080
 21 |       targetPort: 9090 
 22 | ---
 23 | apiVersion: rbac.authorization.k8s.io/v1
 24 | kind: ClusterRole
 25 | metadata:
 26 |   name: prometheus
 27 | rules:
 28 | - apiGroups: [""]
 29 |   resources:
 30 |   - nodes
 31 |   - nodes/proxy
 32 |   - services
 33 |   - endpoints
 34 |   - pods
 35 |   verbs: ["get", "list", "watch"]
 36 | - apiGroups:
 37 |   - extensions
 38 |   resources:
 39 |   - ingresses
 40 |   verbs: ["get", "list", "watch"]
 41 | - nonResourceURLs: ["/metrics"]
 42 |   verbs: ["get"]
 43 | ---
 44 | apiVersion: rbac.authorization.k8s.io/v1
 45 | kind: ClusterRoleBinding
 46 | metadata:
 47 |   name: prometheus
 48 | roleRef:
 49 |   apiGroup: rbac.authorization.k8s.io
 50 |   kind: ClusterRole
 51 |   name: prometheus
 52 | subjects:
 53 | - kind: ServiceAccount
 54 |   name: default
 55 |   namespace: monitoring
 56 | ---
 57 | apiVersion: v1
 58 | kind: ConfigMap
 59 | metadata:
 60 |   name: prometheus-server-conf
 61 |   labels:
 62 |     name: prometheus-server-conf
 63 |   namespace: monitoring
 64 | data:
 65 |   prometheus.yml: |-
 66 |     global:
 67 |       scrape_interval: 5s
 68 |       evaluation_interval: 5s
 69 |     scrape_configs:
 70 |       - job_name: 'kubernetes-apiservers'
 71 |         kubernetes_sd_configs:
 72 |         - role: endpoints
 73 |         scheme: https
 74 |         tls_config:
 75 |           ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
 76 |         bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
 77 |         relabel_configs:
 78 |         - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
 79 |           action: keep
 80 |           regex: default;kubernetes;https
 81 | 
 82 |       - job_name: 'kubernetes-controller-manager'
 83 |         honor_labels: true
 84 |         scheme: https
 85 |         tls_config:
 86 |           ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
 87 |           insecure_skip_verify: true
 88 |         bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
 89 |         static_configs:
 90 |           - targets:
 91 |             - 127.0.0.1:10257
 92 | 
 93 |       - job_name: 'kubernetes-nodes'
 94 |         scheme: https
 95 |         tls_config:
 96 |           ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
 97 |         bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
 98 |         kubernetes_sd_configs:
 99 |         - role: node
100 |         relabel_configs:
101 |         - action: labelmap
102 |           regex: __meta_kubernetes_node_label_(.+)
103 |         - target_label: __address__
104 |           replacement: localhost:6443
105 |         - source_labels: [__meta_kubernetes_node_name]
106 |           regex: (.+)
107 |           target_label: __metrics_path__
108 |           replacement: /api/v1/nodes/${1}/proxy/metrics
109 | 
110 |       - job_name: 'kubernetes-cadvisor'
111 |         scheme: https
112 |         tls_config:
113 |           ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
114 |         bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
115 |         kubernetes_sd_configs:
116 |         - role: node
117 |         relabel_configs:
118 |         - action: labelmap
119 |           regex: __meta_kubernetes_node_label_(.+)
120 |         - target_label: __address__
121 |           replacement: localhost:6443
122 |         - source_labels: [__meta_kubernetes_node_name]
123 |           regex: (.+)
124 |           target_label: __metrics_path__
125 |           replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
126 |        
127 | ---          
128 | apiVersion: v1
129 | kind: Pod
130 | metadata:
131 |   name: prometheus
132 |   namespace: monitoring
133 |   labels:
134 |     app: prometheus-server
135 | spec:
136 |   hostNetwork: true
137 |   nodeSelector:
138 |     node-role.kubernetes.io/control-plane: ""
139 |   tolerations:
140 |   - key: CriticalAddonsOnly
141 |     operator: Exists
142 |   - effect: NoSchedule
143 |     key: node-role.kubernetes.io/master
144 |   - effect: NoSchedule
145 |     key: node-role.kubernetes.io/control-plane
146 |   containers:
147 |     - name: prometheus
148 |       image: prom/prometheus:latest
149 |       args:
150 |         - "--config.file=/etc/prometheus/prometheus.yml"
151 |         - "--storage.tsdb.path=/prometheus/"
152 |         - "--web.enable-admin-api"
153 |       ports:
154 |         - containerPort: 9090
155 |       volumeMounts:
156 |         - name: prometheus-config-volume
157 |           mountPath: /etc/prometheus/
158 |         - name: prometheus-storage-volume
159 |           mountPath: /prometheus/
160 |   volumes:
161 |     - name: prometheus-config-volume
162 |       configMap:
163 |         defaultMode: 420
164 |         name: prometheus-server-conf
165 |     - name: prometheus-storage-volume
166 |       emptyDir: {}
167 | 


--------------------------------------------------------------------------------
/kube-burner-workload/network-policy.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | {{- if .ES_SERVER }}
 3 | global:
 4 |   gc: true
 5 |   indexerConfig:
 6 |     esServers: ["{{.ES_SERVER}}"]
 7 |     insecureSkipVerify: true
 8 |     defaultIndex: {{.ES_INDEX}}
 9 |     type: elastic
10 | {{- end }}
11 | jobs:
12 |   {{- if .CONVERGENCE_TRACKER }}
13 |   - name: convergence-tracker
14 |     namespace: convergence-tracker
15 |     jobIterations: 1
16 |     podWait: false
17 |     waitWhenFinished: false
18 |     preLoadImages: false
19 |     churn: false
20 |     cleanup: true
21 |     {{- if .JOB_NAMESPACE_LABELS }}
22 |     namespaceLabels: {{.JOB_NAMESPACE_LABELS}}
23 |     {{- end}}
24 |     jobPause: 30s
25 |     objects:
26 |       - objectTemplate: {{.PLATFORM}}/convergence_tracker.yml
27 |         replicas: {{.NODES_COUNT}}
28 |         inputVars:
29 |           convergence_period: "{{.CONVERGENCE_PERIOD}}"
30 |           convergence_timeout: "{{.CONVERGENCE_TIMEOUT}}"
31 |           es_server: "{{.ES_SERVER}}"
32 |           es_index: {{.ES_INDEX}}
33 |           metadata: "netpols_per_namespace: {{.NETPOLS_PER_NAMESPACE}}, pods_per_namespace: {{.PODS_PER_NAMESPACE}},
34 |            local_pods: {{.LOCAL_PODS}}, pod_selectors: {{.POD_SELECTORS}}, 
35 |            single_ports: {{.SINGLE_PORTS}}, port_ranges: {{.PORT_RANGES}},
36 |            peer_namespaces: {{.PEER_NAMESPACES}}, peer_pods: {{.PEER_PODS}}, cidr_rules: {{.CIDRS}}"
37 |   {{- end }}
38 |   - name: network-policy-perf
39 |     namespace: network-policy-perf
40 |     jobIterations: {{.NAMESPACES}}
41 |     qps: 300
42 |     burst: 300
43 |     namespacedIterations: true
44 |     podWait: false
45 |     waitWhenFinished: true
46 | #    preLoadImages: true
47 |     preLoadImages: false
48 | #    preLoadPeriod: 30s
49 | #    jobIterationDelay: 1m
50 |     churn: false
51 |     jobPause: "{{.JOB_PAUSE}}"
52 |     {{- if .CONVERGENCE_TRACKER }}
53 |     beforeCleanup: "convergence_waiter.sh"
54 |     {{- end }}
55 |     cleanup: true
56 |     {{- if .JOB_NAMESPACE_LABELS }}
57 |     namespaceLabels: {{.JOB_NAMESPACE_LABELS}}
58 |     {{- end }}
59 |     objects:
60 |       - objectTemplate: pod.yml
61 |         replicas: {{.PODS_PER_NAMESPACE}}
62 |       {{- if eq .INGRESS "true" }}
63 |       - objectTemplate: ingress-np.yml
64 |         replicas: {{.NETPOLS_PER_NAMESPACE}}
65 |         inputVars:
66 |           namespaces: {{.NAMESPACES}}
67 |           pods_per_namespace: {{.PODS_PER_NAMESPACE}}
68 |           netpols_per_namespace: {{.NETPOLS_PER_NAMESPACE}}
69 |           local_pods: {{.LOCAL_PODS}}
70 |           pod_selectors: {{.POD_SELECTORS}}
71 |           single_ports: {{.SINGLE_PORTS}}
72 |           port_ranges: {{.PORT_RANGES}}
73 |           peer_namespaces: {{.PEER_NAMESPACES}}
74 |           peer_pods: {{.PEER_PODS}}
75 |           cidr_rules: {{.CIDRS}}
76 |       {{- end }}
77 |       {{- if eq .EGRESS "true" }}
78 |       - objectTemplate: egress-np.yml
79 |         replicas: {{.NETPOLS_PER_NAMESPACE}}
80 |         inputVars:
81 |           namespaces: {{.NAMESPACES}}
82 |           pods_per_namespace: {{.PODS_PER_NAMESPACE}}
83 |           netpols_per_namespace: {{.NETPOLS_PER_NAMESPACE}}
84 |           local_pods: {{.LOCAL_PODS}}
85 |           pod_selectors: {{.POD_SELECTORS}}
86 |           single_ports: {{.SINGLE_PORTS}}
87 |           port_ranges: {{.PORT_RANGES}}
88 |           peer_namespaces: {{.PEER_NAMESPACES}}
89 |           peer_pods: {{.PEER_PODS}}
90 |           cidr_rules: {{.CIDRS}}
91 |       {{- end }}
92 | 


--------------------------------------------------------------------------------
/kube-burner-workload/openshift/README.md:
--------------------------------------------------------------------------------
 1 | ## Running
 2 | 
 3 | 1. This profile assumes you have an openshift cluster, and the KUBECONFIG that can be used in the scale test.
 4 | 2. Build kube-burner from the current branch
 5 |       `make build`
 6 | 3. `cd ./examples/workloads/network-policy`
 7 | 4. Set env variables with the test config in the `env` file
 8 | 
 9 |    4.1 Set env file variable PLATFORM=openshift
10 | 
11 | 5. Set env variables in the `openshift/env` file
12 | 6. `source ./env`
13 | 7. This command uses `oc` binary which is an Openshift CLI similar to kubectl
14 | `kube-burner init -m ./openshift/metrics.yml -c ./network-policy.yaml -u https://$(oc get route prometheus-k8s -n openshift-monitoring -o jsonpath="{.spec.host}") --log-level=debug --token=$(oc create token prometheus-k8s -n openshift-monitoring)`
15 | 8. When the test finishes, metrics should be collected by the ES_SERVER
16 | 
17 | ## Finding the limit
18 | 
19 | To automate finding the limit, [test_limit.sh](./test_limit.sh) script may be used.
20 | It can run multiple iterations increasing the number of network policies until test fails.
21 | It waits for full cleanup after every iteration to ensure the cluster is ready for the next one.
22 | 
23 | ## Metrics and Dashboards
24 | 
25 | Metrics in this folder are Openshift-specific, but may be tweaked for other clusters, e.g. by changing
26 | filtered namespaces for `containerCPU` metrics.
27 | 
28 | `./grafana_dash.json` has the JSON model that defines the dashboard. It uses metrics defined in `./metrics.yml`
29 | and may be used as an example to define dashboard for other clusters.


--------------------------------------------------------------------------------
/kube-burner-workload/openshift/convergence_tracker.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: convergence-tracker-{{.Replica}}
 5 |   labels:
 6 |     app: convergence-tracker
 7 | spec:
 8 |   topologySpreadConstraints:
 9 |     - maxSkew: 1
10 |       topologyKey: kubernetes.io/hostname
11 |       whenUnsatisfiable: DoNotSchedule
12 |       labelSelector:
13 |         matchLabels:
14 |           app: convergence-tracker
15 |   tolerations:
16 |     - key: "node-role.kubernetes.io/master"
17 |       operator: "Exists"
18 |   volumes:
19 |     - name: openvswitch
20 |       hostPath:
21 |         path: /var/run/openvswitch
22 |     - name: ovn
23 |       hostPath:
24 |         path: /var/run/ovn/
25 |     - name: ovn-ic
26 |       hostPath:
27 |         path: /var/run/ovn-ic/
28 |     - name: ovn-kubernetes
29 |       hostPath:
30 |         path: /var/run/ovn-kubernetes
31 |     - name: host-var-log-ovs
32 |       hostPath:
33 |         path: /var/log/openvswitch
34 |     - name: pod-logs
35 |       hostPath:
36 |         path: /var/log/pods
37 |   restartPolicy: Never
38 |   containers:
39 |     - name: tracker
40 |       # image built with the ./openflow-tracker/Dockerfile
41 |       image: quay.io/npinaeva/netpol-scale:openshift
42 |       securityContext:
43 |         privileged: true
44 |       command: [ "/bin/bash", "-c", "python openflow-tracker.py"]
45 |       imagePullPolicy: Always
46 |       volumeMounts:
47 |         - name: openvswitch
48 |           mountPath: /var/run/openvswitch
49 |         - name: host-var-log-ovs
50 |           mountPath: /var/log/openvswitch
51 |         - name: ovn
52 |           mountPath: /var/run/ovn
53 |         - name: ovn-ic
54 |           mountPath: /var/run/ovn-ic
55 |         - name: pod-logs
56 |           mountPath: /var/log/pods
57 |       env:
58 |         - name: CONVERGENCE_PERIOD
59 |           value: "{{.convergence_period}}"
60 |         - name: CONVERGENCE_TIMEOUT
61 |           value: "{{.convergence_timeout}}"
62 |         - name: ES_SERVER
63 |           value: {{.es_server}}
64 |         - name: ES_INDEX_NETPOL
65 |           value: {{.es_index}}
66 |         - name: UUID
67 |           value: {{.UUID}}
68 |         - name: METADATA
69 |           value: "{{.metadata}}"
70 |         - name: MY_NODE_NAME
71 |           valueFrom:
72 |             fieldRef:
73 |               fieldPath: spec.nodeName
74 | 


--------------------------------------------------------------------------------
/kube-burner-workload/openshift/env:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -a
 3 | ES_SERVER=https://example.com:443
 4 | ES_INDEX=ripsaw-kube-burner
 5 | JOB_NAMESPACE_LABELS="
 6 |       security.openshift.io/scc.podSecurityLabelSync: false
 7 |       pod-security.kubernetes.io/enforce: privileged
 8 |       pod-security.kubernetes.io/audit: privileged
 9 |       pod-security.kubernetes.io/warn: privileged
10 | "
11 | set +a
12 | 


--------------------------------------------------------------------------------
/kube-burner-workload/openshift/grafana.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/npinaeva/k8s-netpol-scale/3d1aabaf4511f27966b567ba8192f8cce6b52375/kube-burner-workload/openshift/grafana.png


--------------------------------------------------------------------------------
/kube-burner-workload/openshift/metrics.yml:
--------------------------------------------------------------------------------
  1 | # API server
  2 | 
  3 | - query: irate(apiserver_request_total{verb="POST", resource="pods", subresource="binding",code="201"}[2m]) > 0
  4 |   metricName: schedulingThroughput
  5 | 
  6 | - query: histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"LIST|GET", subresource!~"log|exec|portforward|attach|proxy"}[2m])) by (le, resource, verb, scope)) > 0
  7 |   metricName: readOnlyAPICallsLatency
  8 | 
  9 | - query: histogram_quantile(0.99, sum(irate(apiserver_request_duration_seconds_bucket{apiserver="kube-apiserver", verb=~"POST|PUT|DELETE|PATCH", subresource!~"log|exec|portforward|attach|proxy"}[2m])) by (le, resource, verb, scope)) > 0
 10 |   metricName: mutatingAPICallsLatency
 11 | 
 12 | - query: sum(irate(apiserver_request_total{apiserver="kube-apiserver",verb!="WATCH"}[2m])) by (verb,resource,code) > 0
 13 |   metricName: APIRequestRate
 14 | 
 15 | # Containers & pod metrics
 16 | - query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace=~"openshift-(etcd|oauth-apiserver|sdn|ovn-kubernetes|.*apiserver|authentication|.*controller-manager|.*scheduler|image-registry|operator-lifecycle-manager)"}[2m]) * 100) by (container, pod, namespace, node) and on (node) kube_node_role{role="master"}) > 0
 17 |   metricName: containerCPU-Masters
 18 | 
 19 | - query: (avg(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace=~"openshift-(sdn|ovn-kubernetes|ingress)"}[2m]) * 100 and on (node) kube_node_role{role="worker"}) by (namespace, pod, container, node)) > 0
 20 |   metricName: containerCPU-AggregatedWorkers
 21 | 
 22 | - query: (sum(irate(container_cpu_usage_seconds_total{name!="",container!="POD",namespace=~"openshift-(monitoring|sdn|ovn-kubernetes|ingress)"}[2m]) * 100) by (container, pod, namespace, node) and on (node) kube_node_role{role="infra"}) > 0
 23 |   metricName: containerCPU-Infra
 24 | 
 25 | - query: (sum(container_memory_rss{name!="",container!="POD",namespace=~"openshift-(etcd|oauth-apiserver|.*apiserver|ovn-kubernetes|sdn|ingress|authentication|.*controller-manager|.*scheduler|image-registry|operator-lifecycle-manager)"}) by (container, pod, namespace, node) and on (node) kube_node_role{role="master"}) > 0
 26 |   metricName: containerMemory-Masters
 27 | 
 28 | - query: avg(container_memory_rss{name!="",container!="POD",namespace=~"openshift-(sdn|ovn-kubernetes|ingress)"} and on (node) kube_node_role{role="worker"}) by (pod, container, namespace, node)
 29 |   metricName: containerMemory-AggregatedWorkers
 30 | 
 31 | - query: (sum(container_memory_rss{name!="",container!="POD",namespace=~"openshift-(sdn|ovn-kubernetes|ingress|monitoring|image-registry)"}) by (container, pod, namespace, node) and on (node) kube_node_role{role="infra"}) > 0
 32 |   metricName: containerMemory-Infra
 33 | 
 34 | # Node metrics: CPU & Memory
 35 | 
 36 | - query: (sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) and on (instance) label_replace(kube_node_role{role="master"}, "instance", "$1", "node", "(.+)")) > 0
 37 |   metricName: nodeCPU-Masters
 38 | 
 39 | - query: (avg((sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) and on (instance) label_replace(kube_node_role{role="worker"}, "instance", "$1", "node", "(.+)"))) by (mode)) > 0
 40 |   metricName: nodeCPU-AggregatedWorkers
 41 | 
 42 | - query: (sum(irate(node_cpu_seconds_total[2m])) by (mode,instance) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)")) > 0
 43 |   metricName: nodeCPU-Infra
 44 | 
 45 | # We compute memory utilization by substrating available memory to the total
 46 | 
 47 | - query: avg((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) and on (instance) label_replace(kube_node_role{role="worker"}, "instance", "$1", "node", "(.+)"))
 48 |   metricName: nodeMemoryUtilization-AggregatedWorkers
 49 | 
 50 | - query: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) and on (instance) label_replace(kube_node_role{role="master"}, "instance", "$1", "node", "(.+)")
 51 |   metricName: nodeMemoryUtilization-Masters
 52 | 
 53 | - query: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) and on (instance) label_replace(kube_node_role{role="infra"}, "instance", "$1", "node", "(.+)")
 54 |   metricName: nodeMemoryUtilization-Infra
 55 | 
 56 | # Kubelet & CRI-O runtime metrics
 57 | 
 58 | - query: irate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[2m]) * 100 and on (node) topk(3,avg_over_time(irate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[2m])[{{ .elapsed }}:]) and on (node) kube_node_role{role="worker"})
 59 |   metricName: kubeletCPU
 60 | 
 61 | - query: process_resident_memory_bytes{service="kubelet",job="kubelet"} and on (node) topk(3,max_over_time(irate(process_resident_memory_bytes{service="kubelet",job="kubelet"}[2m])[{{ .elapsed }}:]) and on (node) kube_node_role{role="worker"})
 62 |   metricName: kubeletMemory
 63 | 
 64 | - query: irate(process_cpu_seconds_total{service="kubelet",job="crio"}[2m]) * 100 and on (node) topk(3,avg_over_time(irate(process_cpu_seconds_total{service="kubelet",job="crio"}[2m])[{{ .elapsed }}:]) and on (node) kube_node_role{role="worker"})
 65 |   metricName: crioCPU
 66 | 
 67 | - query: process_resident_memory_bytes{service="kubelet",job="crio"} and on (node) topk(3,max_over_time(irate(process_resident_memory_bytes{service="kubelet",job="crio"}[2m])[{{ .elapsed }}:]) and on (node) kube_node_role{role="worker"})
 68 |   metricName: crioMemory
 69 | 
 70 | # Etcd metrics
 71 | 
 72 | - query: sum(rate(etcd_server_leader_changes_seen_total[2m]))
 73 |   metricName: etcdLeaderChangesRate
 74 | 
 75 | - query: histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket[2m]))
 76 |   metricName: 99thEtcdDiskBackendCommitDurationSeconds
 77 | 
 78 | - query: histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket[2m]))
 79 |   metricName: 99thEtcdDiskWalFsyncDurationSeconds
 80 | 
 81 | - query: histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket[5m]))
 82 |   metricName: 99thEtcdRoundTripTimeSeconds
 83 | 
 84 | - query: sum by (cluster_version)(etcd_cluster_version)
 85 |   metricName: etcdVersion
 86 |   instant: true
 87 | 
 88 | - query: cluster_version{type="completed"}
 89 |   metricName: clusterVersion
 90 |   instant: true
 91 | 
 92 | # Cluster metrics
 93 | 
 94 | - query: max_over_time( count(kube_pod_labels{label_kube_burner_job="network-policy-perf"})[{{ .elapsed }}:] )
 95 |   metricName: podCount
 96 | 
 97 | - query: max_over_time( count(kube_namespace_labels{label_kube_burner_job="network-policy-perf"})[{{ .elapsed }}:] )
 98 |   metricName: namespaceCount
 99 | 
100 | - query: max_over_time( count(kube_networkpolicy_labels{networkpolicy=~"ingress.*"})[{{ .elapsed }}:] )
101 |   metricName: netpolIngressCount
102 | 
103 | - query: max_over_time( count(kube_networkpolicy_labels{networkpolicy=~"egress.*"})[{{ .elapsed }}:] )
104 |   metricName: netpolEgressCount
105 | 
106 | - query: kube_node_role
107 |   metricName: nodeRoles
108 | 
109 | - query: sum(kube_node_status_condition{status="true"}) by (condition)
110 |   metricName: nodeStatus
111 | 
112 | - query: kubernetes_build_info
113 |   metricName: k8sVersion
114 |   instant: true
115 | 
116 | # Prometheus metrics
117 | 
118 | - query: openshift:prometheus_tsdb_head_series:sum{job="prometheus-k8s"}
119 |   metricName: prometheus-timeseriestotal
120 | 
121 | - query: openshift:prometheus_tsdb_head_samples_appended_total:sum{job="prometheus-k8s"}
122 |   metricName: prometheus-ingestionrate
123 | 
124 | # OVS metrics
125 | - query: (sum(irate(container_cpu_usage_seconds_total{id=~"/system.slice/ovs-vswitchd.service"}[2m]) * 100) by (node)) > 0
126 |   metricName: ovsVswitchdCPU
127 | 
128 | - query: (sum(irate(container_cpu_usage_seconds_total{id=~"/system.slice/ovsdb-server.service"}[2m]) * 100) by (node)) > 0
129 |   metricName: ovsdbServerCPU
130 | 
131 | - query: ovs_vswitchd_bridge_flows_total
132 |   metricName: ovsFlowsCounter
133 | 
134 | - query: ovs_vswitchd_rconn_discarded
135 |   metricName: ovsVswitchdRconnDiscarded
136 | 
137 | - query: ovs_vswitchd_rconn_overflow
138 |   metricName: ovsVswitchdRconnOverflow
139 | 
140 | - query: ovs_vswitchd_stream_open
141 |   metricName: ovsVswitchdStreamOpen
142 | 
143 | # OVN metrics
144 | - query: ovn_controller_rconn_discarded
145 |   metricName: ovnControllerRconnDiscarded
146 | 
147 | - query: ovn_controller_rconn_overflow
148 |   metricName: ovnControllerRconnOverflow
149 | 
150 | - query: ovn_controller_flow_generation_95th_percentile
151 |   metricName: ovnControllerFlowGeneration95Perc
152 | 
153 | - query: ovn_controller_flow_installation_95th_percentile
154 |   metricName: ovnControllerFlowInstallation95Perc


--------------------------------------------------------------------------------
/kube-burner-workload/openshift/openflow-tracker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:slim
2 | RUN apt update && \
3 |       apt install -y curl openvswitch-switch ovn-central
4 | COPY openflow-tracker.py openflow-tracker.py
5 | COPY ./requirements.txt requirements.txt
6 | RUN pip install -r requirements.txt
7 | 


--------------------------------------------------------------------------------
/kube-burner-workload/openshift/openflow-tracker/openflow-tracker.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import logging
  3 | import os
  4 | import ssl
  5 | import sys
  6 | import time
  7 | import subprocess
  8 | 
  9 | from opensearchpy import OpenSearch
 10 | 
 11 | 
 12 | def index_result(payload, retry_count=3):
 13 |     print(f"Indexing documents in {es_index}")
 14 |     while retry_count > 0:
 15 |         try:
 16 |             ssl_ctx = ssl.create_default_context()
 17 |             ssl_ctx.check_hostname = False
 18 |             ssl_ctx.verify_mode = ssl.CERT_NONE
 19 |             es = OpenSearch([es_server])
 20 |             es.index(index=es_index, body=payload)
 21 |             retry_count = 0
 22 |         except Exception as e:
 23 |             logging.info("Failed Indexing", e)
 24 |             logging.info("Retrying to index...")
 25 |             retry_count -= 1
 26 | 
 27 | 
 28 | def get_number_of_ovs_flows():
 29 |     try:
 30 |         output = subprocess.run(
 31 |             ["ovs-ofctl", "dump-aggregate", "br-int"], capture_output=True, text=True
 32 |         )
 33 |         result = output.stdout
 34 |         return int(result.split("flow_count=")[1])
 35 |     except Exception as e:
 36 |         logging.info(f"Failed getting flows count: {e}")
 37 |         return 0
 38 | 
 39 | 
 40 | def get_number_of_logical_flows():
 41 |     output = subprocess.run(
 42 |         ["ovn-sbctl", "--no-leader-only", "--columns=_uuid", "list", "logical_flow"],
 43 |         capture_output=True,
 44 |         text=True,
 45 |     )
 46 |     if len(output.stderr) != 0:
 47 |         return 0
 48 |     output_lines = output.stdout.splitlines()
 49 |     return len(output_lines) // 2 + 1
 50 | 
 51 | 
 52 | # poll_interval in seconds, float
 53 | # convergence_period in seconds, for how long number of flows shouldn't change to consider it stable
 54 | # convergence_timeout in seconds, for how long number to wait for stabilisation before timing out
 55 | # timout in seconds
 56 | def wait_for_flows_to_stabilize(
 57 |     poll_interval, convergence_period, convergence_timeout, node_name
 58 | ):
 59 |     timed_out = False
 60 |     timeout = convergence_timeout + convergence_period
 61 |     start = time.time()
 62 |     last_changed = time.time()
 63 |     ovs_flows_num = get_number_of_ovs_flows()
 64 |     ovs_flows_converged_num = ovs_flows_num
 65 |     logical_flows_num = get_number_of_logical_flows()
 66 |     logical_flows_converged_num = logical_flows_num
 67 |     while (
 68 |         time.time() - last_changed < convergence_period
 69 |         and time.time() - start < timeout
 70 |     ):
 71 |         new_logical_flows_num = get_number_of_logical_flows()
 72 |         if new_logical_flows_num != logical_flows_num:
 73 |             if abs(new_logical_flows_num - logical_flows_converged_num) > 50:
 74 |                 # allow minor fluctuations within 50 logical flows range to not interrupt convergence
 75 |                 last_changed = time.time()
 76 |                 logical_flows_converged_num = new_logical_flows_num
 77 |             logical_flows_num = new_logical_flows_num
 78 |             logging.info(
 79 |                 f"{node_name}: logical flows={new_logical_flows_num}, "
 80 |                 f"convergence flows={logical_flows_converged_num}"
 81 |             )
 82 |         else:
 83 |             new_ovs_flows_num = get_number_of_ovs_flows()
 84 |             if new_ovs_flows_num != ovs_flows_num:
 85 |                 if abs(new_ovs_flows_num - ovs_flows_converged_num) > 100:
 86 |                     # allow minor fluctuations within 100 OVS flows range to not interrupt convergence
 87 |                     last_changed = time.time()
 88 |                     ovs_flows_converged_num = new_ovs_flows_num
 89 |                 ovs_flows_num = new_ovs_flows_num
 90 |                 logging.info(
 91 |                     f"{node_name}: OVS flows={new_ovs_flows_num}, "
 92 |                     f"convergence flows={ovs_flows_converged_num}"
 93 |                 )
 94 | 
 95 |         time.sleep(poll_interval)
 96 |     if time.time() - start >= timeout:
 97 |         timed_out = True
 98 |         logging.info(f"TIMEOUT: {node_name} {timeout} seconds passed")
 99 |     return last_changed, ovs_flows_num, timed_out
100 | 
101 | 
102 | def get_db_data():
103 |     results = {}
104 |     for table in ["acl", "port_group", "address_set"]:
105 |         output = subprocess.run(
106 |             ["ovn-nbctl", "--no-leader-only", "--columns=_uuid", "list", table],
107 |             capture_output=True,
108 |             text=True,
109 |         )
110 |         if len(output.stderr) != 0:
111 |             continue
112 |         output_lines = output.stdout.splitlines()
113 |         results[table] = len(output_lines) // 2 + 1
114 |     for table in ["logical_flow"]:
115 |         output = subprocess.run(
116 |             ["ovn-sbctl", "--no-leader-only", "--columns=_uuid", "list", table],
117 |             capture_output=True,
118 |             text=True,
119 |         )
120 |         if len(output.stderr) != 0:
121 |             continue
122 |         output_lines = output.stdout.splitlines()
123 |         results[table] = len(output_lines) // 2 + 1
124 |     return results
125 | 
126 | 
127 | def is_ovnic():
128 |     output = subprocess.run(["ls", "/var/run/ovn-ic"], capture_output=True, text=True)
129 |     return len(output.stdout.splitlines()) != 0
130 | 
131 | 
132 | def update_rundir():
133 |     output = subprocess.run(
134 |         ["mount", "--bind", "/var/run/ovn-ic", "/var/run/ovn"],
135 |         capture_output=True,
136 |         text=True,
137 |     )
138 |     if output.stderr != "":
139 |         print("failed to update /var/run/ovn", output.stderr)
140 |         return 1
141 |     return 0
142 | 
143 | 
144 | def check_ovn_health():
145 |     ovn_ic = is_ovnic()
146 |     concerning_logs = []
147 |     files = {"vswitchd": "/var/log/openvswitch/ovs-vswitchd.log"}
148 |     output = subprocess.run(["ls", "/var/log/pods"], capture_output=True, text=True)
149 |     for output_line in output.stdout.splitlines():
150 |         if "ovnkube-master" in output_line:
151 |             files["northd"] = f"/var/log/pods/{output_line}/northd/0.log"
152 |         if "ovnkube-node" in output_line:
153 |             files[
154 |                 "ovn-controller"
155 |             ] = f"/var/log/pods/{output_line}/ovn-controller/0.log"
156 |             if ovn_ic:
157 |                 files["northd"] = f"/var/log/pods/{output_line}/northd/0.log"
158 |     for name, file in files.items():
159 |         output = subprocess.run(["cat", file], capture_output=True, text=True)
160 |         if len(output.stderr) != 0:
161 |             concerning_logs.append(f"failed to open {file}: {output.stderr}")
162 |         else:
163 |             output_lines = output.stdout.splitlines()
164 |             for log_line in output_lines:
165 |                 if "no response to inactivity probe" in log_line:
166 |                     s = log_line.split("stderr F ")
167 |                     if len(s) > 1:
168 |                         timestamp = s[1]
169 |                     else:
170 |                         timestamp = s[0]
171 |                     timestamp = timestamp.split("|")[0]
172 |                     format_string = "%Y-%m-%dT%H:%M:%S.%fZ"
173 |                     datetime_object = datetime.datetime.strptime(
174 |                         timestamp, format_string
175 |                     )
176 |                     if start_time < datetime_object:
177 |                         concerning_logs.append(name + ": " + log_line)
178 |     return concerning_logs
179 | 
180 | 
181 | def main():
182 |     global es_server, es_index, start_time
183 |     es_server = os.getenv("ES_SERVER")
184 |     es_index = os.getenv("ES_INDEX_NETPOL")
185 |     node_name = os.getenv("MY_NODE_NAME")
186 |     uuid = os.getenv("UUID")
187 |     convergence_period = int(os.getenv("CONVERGENCE_PERIOD"))
188 |     convergence_timeout = int(os.getenv("CONVERGENCE_TIMEOUT"))
189 |     start_time = datetime.datetime.now()
190 | 
191 |     logging.basicConfig(
192 |         format="%(asctime)s %(levelname)-8s %(message)s",
193 |         level=logging.INFO,
194 |         datefmt="%Y-%m-%d %H:%M:%S",
195 |     )
196 |     doc = {
197 |         "metricName": "convergence_tracker_info",
198 |         "timestamp": datetime.datetime.now(datetime.UTC),
199 |         "workload": "network-policy-perf",
200 |         "uuid": uuid,
201 |         "source_name": node_name,
202 |         "convergence_period": convergence_period,
203 |         "convergence_timeout": convergence_timeout,
204 |         "test_metadata": os.getenv("METADATA"),
205 |     }
206 |     index_result(doc)
207 | 
208 |     logging.info(
209 |         f"Start openflow-tracker {node_name}, convergence_period {convergence_period}, convergence timeout {convergence_timeout}"
210 |     )
211 | 
212 |     if is_ovnic():
213 |         if update_rundir() != 0:
214 |             sys.exit(1)
215 |     stabilize_time, flow_num, timed_out = wait_for_flows_to_stabilize(
216 |         1, convergence_period, convergence_timeout, node_name
217 |     )
218 |     stabilize_datetime = datetime.datetime.fromtimestamp(stabilize_time)
219 |     nbdb_data = get_db_data()
220 |     logging.info(
221 |         f"RESULT: time={stabilize_datetime.isoformat(sep=' ', timespec='milliseconds')} {node_name} "
222 |         f"finished with {flow_num} flows, nbdb data: {nbdb_data}"
223 |     )
224 |     ovn_health_logs = check_ovn_health()
225 |     if len(ovn_health_logs) == 0:
226 |         logging.info(f"HEALTHCHECK: {node_name} has no problems")
227 |     else:
228 |         logging.info(f"HEALTHCHECK: {node_name} has concerning logs: {ovn_health_logs}")
229 | 
230 |     doc = {
231 |         "metricName": "convergence_tracker",
232 |         "timestamp": datetime.datetime.now(datetime.UTC),
233 |         "workload": "network-policy-perf",
234 |         "uuid": uuid,
235 |         "source_name": node_name,
236 |         "convergence_timestamp": stabilize_datetime,
237 |         "nbdb": nbdb_data,
238 |         "ovs_flows": flow_num,
239 |         "unhealthy_logs": ovn_health_logs,
240 |     }
241 |     index_result(doc)
242 |     sys.exit(int(timed_out))
243 | 
244 | 
245 | if __name__ == "__main__":
246 |     main()
247 | 


--------------------------------------------------------------------------------
/kube-burner-workload/openshift/openflow-tracker/requirements.txt:
--------------------------------------------------------------------------------
1 | datetime
2 | requests
3 | kubernetes
4 | opensearch-py
5 | 


--------------------------------------------------------------------------------
/kube-burner-workload/openshift/test_limit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | wait_cleanup () {
 4 |   IFS=" " read -r -a POD_NAMES <<< "$(oc get pods -n openshift-ovn-kubernetes -l app=ovnkube-node -o jsonpath='{.items[*].metadata.name}')"
 5 | #  POD_NAMES=($(oc get pods -n openshift-ovn-kubernetes -l app=ovnkube-node -o jsonpath='{.items[*].metadata.name}'))
 6 |   FLOW_COUNT=0
 7 |   for POD_NAME in "${POD_NAMES[@]}"; do
 8 |     POD_FLOW_COUNT=$(oc exec -n openshift-ovn-kubernetes "$POD_NAME" -c ovn-controller -- curl -s "127.0.0.1:29105/metrics"|grep ovs_vswitchd_bridge_flows_total|grep br-int|rev|cut -f1 -d' '|rev)
 9 |     if [ "$POD_FLOW_COUNT" -gt $FLOW_COUNT ]; then
10 |       FLOW_COUNT=$POD_FLOW_COUNT
11 |     fi
12 |   done
13 |   echo "$FLOW_COUNT"
14 | 
15 |   while [ "$FLOW_COUNT" -ge 10000 ]; do
16 |     FLOW_COUNT=0
17 |     for POD_NAME in "${POD_NAMES[@]}"; do
18 |       POD_FLOW_COUNT=$(oc exec -n openshift-ovn-kubernetes "$POD_NAME" -c ovn-controller -- curl -s "127.0.0.1:29105/metrics"|grep ovs_vswitchd_bridge_flows_total|grep br-int|rev|cut -f1 -d' '|rev)
19 |       if [ "$POD_FLOW_COUNT" -gt $FLOW_COUNT ]; then
20 |         FLOW_COUNT=$POD_FLOW_COUNT
21 |       fi
22 |     done
23 |     echo "$FLOW_COUNT"
24 |     sleep 60
25 |   done
26 |   echo "shutdown succeeded"
27 | }
28 | 
29 | pushd ..
30 | source ./env
31 | NETPOLS_PER_NAMESPACE=50
32 | STEP=50
33 | expectedStatus=0
34 | status=$expectedStatus
35 | while [ $status -eq $expectedStatus ]; do
36 |   echo "Network Policies per namespace=$NETPOLS_PER_NAMESPACE"
37 |   wait_cleanup
38 |   kube-burner init -m ./openshift/metrics.yml -c ./network-policy.yaml -u "https://$(oc get route prometheus-k8s -n openshift-monitoring -o jsonpath="{.spec.host}")" --token="$(oc create token prometheus-k8s -n openshift-monitoring)"
39 |   status=$?
40 |   if [ $STEP -eq 0 ]; then
41 |     echo "One iteration is finished"
42 |     exit 0
43 |   fi
44 |   NETPOLS_PER_NAMESPACE=$((NETPOLS_PER_NAMESPACE + STEP))
45 | done
46 | popd || exit


--------------------------------------------------------------------------------
/kube-burner-workload/ovn-kubernetes/README.md:
--------------------------------------------------------------------------------
 1 | ## Running
 2 | 
 3 | 1. Get ovn-kubernetes code from https://github.com/ovn-org/ovn-kubernetes/tree/master and start a KIND cluster with ./contrib/kind.sh
 4 | (more details in https://github.com/ovn-org/ovn-kubernetes/blob/master/docs/kind.md).
 5 | This should give you a local kubeconfig that can be used in the scale test.
 6 | 
 7 | 2. Follow [network-policy instructions](../README.md#running) to run the workload
 8 |    
 9 |     2.1 Set env file variable PLATFORM=ovn-kubernetes
10 | 
11 | 3. Track convergence with `kubectl logs -l app=convergence-tracker -n convergence-tracker-0 -f`
12 | 


--------------------------------------------------------------------------------
/kube-burner-workload/ovn-kubernetes/convergence_tracker.yml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Pod
 3 | metadata:
 4 |   name: convergence-tracker-{{.Replica}}
 5 |   labels:
 6 |     app: convergence-tracker
 7 | spec:
 8 |   topologySpreadConstraints:
 9 |     - maxSkew: 1
10 |       topologyKey: kubernetes.io/hostname
11 |       whenUnsatisfiable: DoNotSchedule
12 |       labelSelector:
13 |         matchLabels:
14 |           app: convergence-tracker
15 |   volumes:
16 |     - name: openvswitch
17 |       hostPath:
18 |         path: /var/run/openvswitch
19 |     - name: host-var-log-ovs
20 |       hostPath:
21 |         path: /var/log/openvswitch
22 |   restartPolicy: Never
23 |   containers:
24 |     - name: tracker
25 |       # image built with the ./openflow-tracker/Dockerfile
26 |       image: quay.io/npinaeva/netpol-scale:ovn-kubernetes
27 |       command: [ "/bin/bash", "-c", "python openflow-tracker.py"]
28 |       imagePullPolicy: Always
29 |       volumeMounts:
30 |         - name: openvswitch
31 |           mountPath: /var/run/openvswitch
32 |         - name: openvswitch
33 |           mountPath: /var/run/ovn
34 |         - name: host-var-log-ovs
35 |           mountPath: /var/log/openvswitch
36 |       env:
37 |         - name: CONVERGENCE_PERIOD
38 |           value: "{{.convergence_period}}"
39 |         - name: CONVERGENCE_TIMEOUT
40 |           value: "{{.convergence_timeout}}"
41 |         - name: ES_SERVER
42 |           value: {{.es_server}}
43 |         - name: ES_INDEX_NETPOL
44 |           value: {{.es_index}}
45 |         - name: UUID
46 |           value: {{.UUID}}
47 |         - name: MY_NODE_NAME
48 |           valueFrom:
49 |             fieldRef:
50 |               fieldPath: spec.nodeName
51 | 


--------------------------------------------------------------------------------
/kube-burner-workload/ovn-kubernetes/openflow-tracker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:slim
2 | RUN apt update && \
3 |       apt install -y curl openvswitch-switch ovn-central
4 | COPY openflow-tracker.py openflow-tracker.py
5 | COPY ./requirements.txt requirements.txt
6 | RUN pip install -r requirements.txt
7 | 


--------------------------------------------------------------------------------
/kube-burner-workload/ovn-kubernetes/openflow-tracker/openflow-tracker.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import logging
  3 | import os
  4 | import sys
  5 | import time
  6 | import subprocess
  7 | 
  8 | 
  9 | def get_number_of_flows():
 10 |     try:
 11 |         output = subprocess.run(
 12 |             ["ovs-ofctl", "dump-aggregate", "br-int"], capture_output=True, text=True
 13 |         )
 14 |         result = output.stdout
 15 |         return int(result.split("flow_count=")[1])
 16 |     except Exception as e:
 17 |         logging.info(f"Failed getting flows count: {e}")
 18 |         return 0
 19 | 
 20 | 
 21 | # poll_interval in seconds, float
 22 | # convergence_period in seconds, for how long number of flows shouldn't change to consider it stable
 23 | # convergence_timeout in seconds, for how long number to wait for stabilisation before timing out
 24 | def wait_for_flows_to_stabilize(
 25 |     poll_interval, convergence_period, convergence_timeout, node_name
 26 | ):
 27 |     timed_out = False
 28 |     timeout = convergence_timeout + convergence_period
 29 |     start = time.time()
 30 |     last_changed = time.time()
 31 |     flows_num = get_number_of_flows()
 32 |     while (
 33 |         time.time() - last_changed < convergence_period
 34 |         and time.time() - start < timeout
 35 |     ):
 36 |         new_flows_num = get_number_of_flows()
 37 |         if new_flows_num != flows_num:
 38 |             flows_num = new_flows_num
 39 |             last_changed = time.time()
 40 |             logging.info(f"{node_name}: {new_flows_num}")
 41 | 
 42 |         time.sleep(poll_interval)
 43 |     if time.time() - start >= timeout:
 44 |         timed_out = True
 45 |         logging.info(f"TIMEOUT: {node_name} {timeout} seconds passed")
 46 |     return last_changed, flows_num, timed_out
 47 | 
 48 | 
 49 | def get_db_data():
 50 |     results = {}
 51 |     for table in ["acl", "port_group", "address_set"]:
 52 |         output = subprocess.run(
 53 |             ["ovn-nbctl", "--no-leader-only", "--columns=_uuid", "list", table],
 54 |             capture_output=True,
 55 |             text=True,
 56 |         )
 57 |         if len(output.stderr) != 0:
 58 |             continue
 59 |         output_lines = output.stdout.splitlines()
 60 |         results[table] = len(output_lines) // 2 + 1
 61 |     for table in ["logical_flow"]:
 62 |         output = subprocess.run(
 63 |             ["ovn-sbctl", "--no-leader-only", "--columns=_uuid", "list", table],
 64 |             capture_output=True,
 65 |             text=True,
 66 |         )
 67 |         if len(output.stderr) != 0:
 68 |             continue
 69 |         output_lines = output.stdout.splitlines()
 70 |         results[table] = len(output_lines) // 2 + 1
 71 |     return results
 72 | 
 73 | 
 74 | def check_ovn_health():
 75 |     concerning_logs = []
 76 |     for file in [
 77 |         "/var/log/openvswitch/ovn-controller.log",
 78 |         "/var/log/openvswitch/ovs-vswitchd.log",
 79 |         "/var/log/openvswitch/ovn-northd.log",
 80 |     ]:
 81 |         output = subprocess.run(["cat", file], capture_output=True, text=True)
 82 |         if len(output.stderr) != 0:
 83 |             continue
 84 |         else:
 85 |             output_lines = output.stdout.splitlines()
 86 |             for log_line in output_lines:
 87 |                 if "no response to inactivity probe" in log_line:
 88 |                     concerning_logs.append(log_line)
 89 |     return concerning_logs
 90 | 
 91 | 
 92 | def main():
 93 |     node_name = os.getenv("MY_NODE_NAME")
 94 |     convergence_period = int(os.getenv("CONVERGENCE_PERIOD"))
 95 |     convergence_timeout = int(os.getenv("CONVERGENCE_TIMEOUT"))
 96 | 
 97 |     logging.basicConfig(
 98 |         format="%(asctime)s %(levelname)-8s %(message)s",
 99 |         level=logging.INFO,
100 |         datefmt="%Y-%m-%d %H:%M:%S",
101 |     )
102 | 
103 |     logging.info(
104 |         f"Start openflow-tracker {node_name}, convergence_period {convergence_period}, convergence timeout {convergence_timeout}"
105 |     )
106 |     stabilize_time, flow_num, timed_out = wait_for_flows_to_stabilize(
107 |         1, convergence_period, convergence_timeout, node_name
108 |     )
109 |     stabilize_datetime = datetime.datetime.fromtimestamp(stabilize_time)
110 |     nbdb_data = get_db_data()
111 |     logging.info(
112 |         f"RESULT: time={stabilize_datetime.isoformat(sep=' ', timespec='milliseconds')} {node_name} "
113 |         f"finished with {flow_num} flows, nbdb data: {nbdb_data}"
114 |     )
115 |     ovn_health_logs = check_ovn_health()
116 |     if len(ovn_health_logs) == 0:
117 |         logging.info(f"HEALTHCHECK: {node_name} has no problems")
118 |     else:
119 |         logging.info(f"HEALTHCHECK: {node_name} has concerning logs: {ovn_health_logs}")
120 |     sys.exit(int(timed_out))
121 | 
122 | 
123 | if __name__ == "__main__":
124 |     main()
125 | 


--------------------------------------------------------------------------------
/kube-burner-workload/ovn-kubernetes/openflow-tracker/requirements.txt:
--------------------------------------------------------------------------------
1 | datetime
2 | requests
3 | kubernetes
4 | opensearch-py
5 | 


--------------------------------------------------------------------------------
/kube-burner-workload/pod.yml:
--------------------------------------------------------------------------------
 1 | {{- $myDict := dict "test-pod" "true" "num" (toString .Replica) }}
 2 | {{- $replicas := int .Replica }}
 3 | {{- range $i, $e := until $replicas }}
 4 | {{- $num := add $i 1 }}
 5 | {{- $_ := set $myDict (toString $num) "true" }}
 6 | {{- end }}
 7 | {{- $labels := toJson $myDict }}
 8 | apiVersion: v1
 9 | kind: Pod
10 | metadata:
11 |   name: test-pod-{{.Replica}}
12 |   labels: {{$labels}}
13 | spec:
14 |   affinity:
15 |     podAntiAffinity:
16 |       preferredDuringSchedulingIgnoredDuringExecution:
17 |         - weight: 10
18 |           podAffinityTerm:
19 |             labelSelector:
20 |               matchLabels:
21 |                 test-pod: "true"
22 |             namespaceSelector: {}
23 |             topologyKey: kubernetes.io/hostname
24 |         - weight: 10
25 |           podAffinityTerm:
26 |             labelSelector:
27 |               matchLabels:
28 |                 num: "{{.Replica}}"
29 |             namespaceSelector: {}
30 |             topologyKey: kubernetes.io/hostname
31 | #    nodeAffinity:
32 | #      requiredDuringSchedulingIgnoredDuringExecution:
33 | #        nodeSelectorTerms:
34 | #          - matchExpressions:
35 | #              - key: node-role.kubernetes.io/control-plane
36 | #                operator: DoesNotExist
37 |   containers:
38 |   - name: sleeper
39 |     args:
40 |     - sleep
41 |     - infinity
42 |     image: registry.k8s.io/pause:3.1
43 |     imagePullPolicy: IfNotPresent
44 |     ports:
45 |      - containerPort: 8080
46 |        protocol: TCP
47 | 


--------------------------------------------------------------------------------
/yaml-analysis/README.md:
--------------------------------------------------------------------------------
  1 | ## Build and run
  2 | 
  3 | To run this tool just build a binary with
  4 | `go build .`
  5 | and you will get `netpol_analysis` binary. To see existing docs, use `netpol_analysis -h`
  6 | 
  7 | ## Get statistics for given yamls
  8 | `-print-graphs` option can display statistics about network policies, given yaml output of 
  9 | `kubectl get pods,namespace,networkpolicies -A -oyaml`
 10 | 
 11 | ```shell
 12 | ./netpol_analysis -print-graphs -yaml="path/to/file"
 13 | Found: 5413 Pods, 604 Namespaces, 13678 NetworkPolicies
 14 | Empty netpols: 3559, peers: 15423, deny-only netpols 495
 15 | Average network policy profile: local pods=13.143703241895262
 16 | 	cidrs=0.5431498411463399, single ports=0.8810941271118262, port ranges=0.0033789219629927593
 17 | 	pod selectors=0.6241327886922129, peer pods=35.43462206776716, single ports=0.3548001737619461, port ranges=0.00021720243266724586
 18 | 
 19 | Median network policy profile: local pods=6
 20 | 	cidrs=1, single ports=1, port ranges=0
 21 | 	pod selectors=1, peer pods=2, single ports=0, port ranges=0
 22 | 
 23 | Local pods distribution
 24 | 
 25 |   1 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 1436.0
 26 |   2 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 559.0
 27 |   3 pod(s): ▇▇ 54.0
 28 |   4 pod(s): ▇▇▇▇▇▇▇▇▇ 243.0
 29 |   5 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 2512.0
 30 |   6 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 496.0
 31 |   7 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 927.0
 32 |   8 pod(s): ▇▇▇▇▇▇▇ 196.0
 33 |   9 pod(s): ▇▇▇▇▇▇▇▇▇ 240.0
 34 |  10 pod(s): ▇▇▇▇▇▇▇▇ 211.0
 35 |  11 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 572.0
 36 |  12 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 482.0
 37 |  13 pod(s): ▇▇ 60.0
 38 |  14 pod(s): ▇ 33.0
 39 |  15 pod(s): ▇ 47.0
 40 |  16 pod(s): ▇▇ 57.0
 41 |  17 pod(s): ▇▇▇ 100.0
 42 |  18 pod(s): ▇ 39.0
 43 |  19 pod(s): ▇▇▇ 84.0
 44 |  20 pod(s): ▇▇▇ 99.0
 45 |  21 pod(s): ▇▇▇▇ 116.0
 46 |  22 pod(s): ▇▇▇▇▇ 136.0
 47 |  23 pod(s): ▇ 30.0
 48 |  24 pod(s): ▇ 50.0
 49 |  25 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇ 339.0
 50 |  26 pod(s): ▇ 41.0
 51 |  27 pod(s):  9.0
 52 |  28 pod(s):  2.0
 53 |  33 pod(s):  2.0
 54 |  34 pod(s):  2.0
 55 |  36 pod(s):  1.0
 56 |  38 pod(s):  2.0
 57 |  53 pod(s):  1.0
 58 |  58 pod(s):  2.0
 59 |  80 pod(s):  1.0
 60 |  81 pod(s):  1.0
 61 |  87 pod(s):  2.0
 62 | 127 pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 431.0
 63 | 154 pod(s):  9.0
 64 | Total:  9624
 65 | 
 66 | CIDR peers distribution
 67 | 
 68 |  0 CIDR(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 9208.0
 69 |  1 CIDR(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 5521.0
 70 |  2 CIDR(s):  72.0
 71 |  3 CIDR(s): ▇▇▇ 346.0
 72 |  4 CIDR(s):  7.0
 73 |  5 CIDR(s):  1.0
 74 |  6 CIDR(s): ▇▇ 263.0
 75 |  7 CIDR(s):  2.0
 76 | 14 CIDR(s):  2.0
 77 | 21 CIDR(s):  1.0
 78 | Total:  15423
 79 | 
 80 | Pod selector peers distribution
 81 | 
 82 | 0 pod selector(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 6215.0
 83 | 1 pod selector(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 8790.0
 84 | 2 pod selector(s): ▇▇▇▇ 418.0
 85 | Total:  15423
 86 | 
 87 | Peer pods distribution
 88 | 
 89 |    1 peer pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇ 590.0
 90 |    2 peer pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 4621.0
 91 |    3 peer pod(s): ▇▇▇▇▇▇▇▇ 393.0
 92 |    4 peer pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 649.0
 93 |    5 peer pod(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 868.0
 94 |    6 peer pod(s): ▇▇▇▇▇▇▇▇▇ 433.0
 95 |    7 peer pod(s): ▇▇▇▇▇▇▇▇ 384.0
 96 |    8 peer pod(s): ▇ 62.0
 97 |    9 peer pod(s): ▇ 63.0
 98 |   10 peer pod(s): ▇ 75.0
 99 |   11 peer pod(s):  18.0
100 |   12 peer pod(s): ▇ 75.0
101 |   13 peer pod(s): ▇▇▇▇▇▇▇ 346.0
102 |   14 peer pod(s):  10.0
103 |   15 peer pod(s):  10.0
104 |   16 peer pod(s):  12.0
105 |   17 peer pod(s):  22.0
106 |   18 peer pod(s):  10.0
107 |   19 peer pod(s):  20.0
108 |   20 peer pod(s):  25.0
109 |   21 peer pod(s):  27.0
110 |   22 peer pod(s):  33.0
111 |   23 peer pod(s):  10.0
112 |   24 peer pod(s):  15.0
113 |   25 peer pod(s):  12.0
114 |   26 peer pod(s):  10.0
115 |   27 peer pod(s):  3.0
116 |   28 peer pod(s):  1.0
117 |   34 peer pod(s):  1.0
118 |   36 peer pod(s):  1.0
119 |   42 peer pod(s):  6.0
120 |   58 peer pod(s):  1.0
121 |   80 peer pod(s):  40.0
122 |   94 peer pod(s):  1.0
123 |  127 peer pod(s): ▇▇▇▇▇▇ 288.0
124 |  154 peer pod(s):  3.0
125 | 3578 peer pod(s): ▇ 70.0
126 | Total:  9208
127 | 
128 | Single port peers distribution (CIDRs)
129 | 
130 | 0 single port(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 1147.0
131 | 1 single port(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 4710.0
132 | 2 single port(s): ▇▇▇▇▇▇▇ 341.0
133 | 4 single port(s):  1.0
134 | 5 single port(s):  16.0
135 | Total:  6215
136 | 
137 | Single port peers distribution (pod selectors)
138 | 
139 | 0 single port(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 6370.0
140 | 1 single port(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 2417.0
141 | 2 single port(s): ▇▇▇▇▇▇ 416.0
142 | 3 single port(s):  3.0
143 | 4 single port(s):  1.0
144 | 5 single port(s):  1.0
145 | Total:  9208
146 | 
147 | Port range peers distribution (CIDRs)
148 | 
149 | 0 port ranges(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 6194.0
150 | 1 port ranges(s):  21.0
151 | Total:  6215
152 | 
153 | Port range peers distribution (pod selectors)
154 | 
155 | 0 port ranges(s): ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 9206.0
156 | 1 port ranges(s):  2.0
157 | Total:  9208
158 | ```
159 | 
160 | To see which NetworkPolicies are empty (don't affect any connections) use `-print-empty-np` flag.
161 | 
162 | ## Use scale profile results to predict if a workload can be handled
163 | 
164 | ### Minimal profiles to cover all possible configurations
165 | 
166 | We will use testing profile notation from the [SCALE_PROFILES](../kube-burner-workload/SCALE_PROFILES.md)
167 | `<LOCAL_PODS>-<SINGLE_PORTS>-<PORT_RANGES>-<POD_SELECTORS>-<PEER_NAMESPACES>-<PEER_PODS>-<CIDRS>`.
168 | 
169 | We have 2 peers types: `cidr` and  `pod_selector`, they may be joined in one profile, or split into separate profiles,
170 | but we need at least 1 profile that has non-zero value for these fields.
171 | 
172 | For every peer type we need at least one profile with 0 single port and 0 port range, and at least one profile
173 | with non-zero single port and non-zero port ranges.
174 | 
175 | The smallest profiles set to cover everything is
176 | - (1-1-0-0-1-1-1) - `cidr` + `pod_selector`, no ports
177 | - (1-1-1-1-1-1-1) - `cidr` + `pod_selector` + 1 single port + 1 port range
178 | 
179 | ### Generating scale profiles results
180 | 
181 | Scale profiles files can be generated using iterative test results tracked by [helper spreadsheet](https://docs.google.com/spreadsheets/d/1Kq1w8c8Z_wlhBOb_EID2nhvmwEi8H6pSxvtpDcbf-1M/edit?usp=sharing).
182 | To generate the file, put the name of a tab which contains test results [here](https://docs.google.com/spreadsheets/d/1Kq1w8c8Z_wlhBOb_EID2nhvmwEi8H6pSxvtpDcbf-1M/edit#gid=285018284&range=B1),
183 | it will populate the sheet with the results marked as ["BEST RESULT"](https://docs.google.com/spreadsheets/d/1Kq1w8c8Z_wlhBOb_EID2nhvmwEi8H6pSxvtpDcbf-1M/edit#gid=16759354&range=X:X)=true from the linked tab.
184 | To get a file you can use with `netpol_analysis` script (similar to the example [./profiles_example.csv](./profiles_example.csv))
185 | go to the tab [export](https://docs.google.com/spreadsheets/d/1Kq1w8c8Z_wlhBOb_EID2nhvmwEi8H6pSxvtpDcbf-1M/edit#gid=1319766064) and save it as `csv`.
186 | You can also fill a similar document manually.
187 | 
188 | Using `-perf-profiles` flag, you will get a **safe** estimation for a given set of network policies via `-yaml` option
189 | and some statistics about the heaviest network policies for a given set of performance profiles.
190 | 
191 | It uses a concept of "weight" for a network policy to reflect the scale impact of a given policy. Cluster can only
192 | handle network policies with weight <= 1. Considering performance profile says we can handle 100 network policies with a
193 | given scale profile, then one network policy weighs 1/100=0.01.
194 | 
195 | #### Safe estimation
196 | 
197 | The estimation is safe, which means if the workload is accepted (weight < 1) it guarantees the workload will work
198 | based on the given profiles data. When the weight is greater than 1, it doesn't necessarily mean that the workload
199 | won't work, because the approximation adds some overhead in trying to simplify generic network policy to a set of given profiles.
200 | 
201 | ```shell
202 | ./netpol_analysis -yaml="path/to/file" -perf-profiles=./profiles_example.csv 
203 | Found: 5413 Pods, 604 Namespaces, 13678 NetworkPolicies
204 | Empty netpols: 3559, peers: 15423, deny-only netpols 495
205 | Matched 9624 netpols with given profiles
206 | Final Weight=3.639694444444388, if < 1, the workload is accepted
207 | 
208 | 5 heaviest netpols are (profile idx start with 1):
209 | namespace-1/netpol-1
210 |   config: localpods=127, rules:
211 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
212 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}]
213 |   matched profiles:
214 |   	{idx:11 copies:936 weight:0.1872}
215 |   	{idx:5 copies:127 weight:0.0015875000000000002}
216 |   weight: 0.1887875
217 | namespace-1/netpol-2
218 |   config: localpods=15, rules:
219 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
220 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:2}]
221 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}]
222 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
223 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}]
224 |   matched profiles:
225 |   	{idx:11 copies:144 weight:0.028800000000000003}
226 |   	{idx:5 copies:30 weight:0.000375}
227 |   	{idx:5 copies:15 weight:0.0001875}
228 |   	{idx:11 copies:144 weight:0.028800000000000003}
229 |   	{idx:5 copies:15 weight:0.0001875}
230 |   weight: 0.058350000000000006
231 | namespace-2/netpol-1
232 |   config: localpods=14, rules:
233 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
234 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:2}]
235 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}]
236 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
237 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}]
238 |   matched profiles:
239 |   	{idx:11 copies:144 weight:0.028800000000000003}
240 |   	{idx:5 copies:28 weight:0.00035}
241 |   	{idx:5 copies:14 weight:0.000175}
242 |   	{idx:11 copies:144 weight:0.028800000000000003}
243 |   	{idx:5 copies:14 weight:0.000175}
244 |   weight: 0.05830000000000001
245 | namespace-3/netpol-4
246 |   config: localpods=12, rules:
247 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
248 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}]
249 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
250 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:2}]
251 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}]
252 |   matched profiles:
253 |   	{idx:7 copies:864 weight:0.028800000000000003}
254 |   	{idx:5 copies:12 weight:0.00015000000000000001}
255 |   	{idx:7 copies:864 weight:0.028800000000000003}
256 |   	{idx:5 copies:24 weight:0.00030000000000000003}
257 |   	{idx:5 copies:12 weight:0.00015000000000000001}
258 |   weight: 0.05820000000000001
259 | namespace-1/netpol-5
260 |   config: localpods=33, rules:
261 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
262 |   	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}]
263 |   matched profiles:
264 |   	{idx:11 copies:288 weight:0.057600000000000005}
265 |   	{idx:5 copies:33 weight:0.0004125}
266 |   weight: 0.05801250000000001
267 | 
268 | Initial 15423 peers were split into 174057 profiles.
269 | Used profiles statistics (number of copies)
270 | 
271 |  1st profile:  326.0
272 |  2nd profile: ▇▇ 1943.0
273 |  3rd profile:  1.0
274 |  4th profile:  840.0
275 |  5th profile: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 94519.0
276 |  6th profile: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 30789.0
277 |  7th profile: ▇▇▇▇▇▇▇▇▇▇▇▇▇ 12959.0
278 |  8th profile: ▇▇▇▇▇▇▇▇▇▇▇▇ 11663.0
279 |  9th profile: ▇ 1456.0
280 | 10th profile: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 14212.0
281 | 11th profile: ▇▇▇▇ 4373.0
282 | 15th profile:  5.0
283 | 16th profile: ▇ 971.0
284 | 
285 | 5th profile (5436 peers) stats: 
286 | 1st heaviest weight: 0.00385000 used by 1 peer(s)
287 | 	localpods=154
288 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:2}]
289 | 2nd heaviest weight: 0.00192500 used by 6 peer(s)
290 | 	localpods=154
291 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}]
292 | 	localpods=154
293 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}]
294 | 	localpods=154
295 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:2}]
296 | 	localpods=154
297 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:2}]
298 | 	localpods=154
299 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:2}]
300 | 3rd heaviest weight: 0.00158750 used by 418 peer(s)
301 | 	localpods=127
302 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}]
303 | 	localpods=127
304 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:2}]
305 | 	localpods=127
306 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:2}]
307 | 	localpods=127
308 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:2}]
309 | 	localpods=127
310 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:2}]
311 | 4th heaviest weight: 0.00072500 used by 1 peer(s)
312 | 	localpods=58
313 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:3}]
314 | 5th heaviest weight: 0.00062500 used by 1 peer(s)
315 | 	localpods=25
316 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:2}]
317 | 6th profile (2118 peers) stats: 
318 | 1st heaviest weight: 0.00200000 used by 1 peer(s)
319 | 	localpods=80
320 | 	ports=[single: 2, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:1}]
321 | 2nd heaviest weight: 0.00065000 used by 8 peer(s)
322 | 	localpods=26
323 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:6}]
324 | 	localpods=26
325 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:4}]
326 | 	localpods=26
327 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:5}]
328 | 	localpods=26
329 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:4}]
330 | 	localpods=26
331 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:6}]
332 | 3rd heaviest weight: 0.00062500 used by 174 peer(s)
333 | 	localpods=25
334 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:6}]
335 | 	localpods=25
336 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:4}]
337 | 	localpods=25
338 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:5}]
339 | 	localpods=25
340 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:4}]
341 | 	localpods=25
342 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:6}]
343 | 4th heaviest weight: 0.00060000 used by 12 peer(s)
344 | 	localpods=24
345 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:6}]
346 | 	localpods=24
347 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:4}]
348 | 	localpods=24
349 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:5}]
350 | 	localpods=24
351 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:4}]
352 | 	localpods=24
353 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:6}]
354 | 5th heaviest weight: 0.00057500 used by 4 peer(s)
355 | 	localpods=23
356 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:6}]
357 | 	localpods=23
358 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:4}]
359 | 	localpods=23
360 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:5}]
361 | 	localpods=23
362 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:4}]
363 | 10th profile (4086 peers) stats: 
364 | 1st heaviest weight: 0.00546000 used by 1 peer(s)
365 | 	localpods=127
366 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:21 podSelectors:0 peerPods:0}]
367 | 2nd heaviest weight: 0.00384000 used by 1 peer(s)
368 | 	localpods=154
369 | 	ports=[single: 4, ranges: 0], peers=[{cidrs:3 podSelectors:0 peerPods:0}]
370 | 3rd heaviest weight: 0.00364000 used by 2 peer(s)
371 | 	localpods=127
372 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:14 podSelectors:0 peerPods:0}]
373 | 	localpods=127
374 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:14 podSelectors:0 peerPods:0}]
375 | 4th heaviest weight: 0.00192000 used by 1 peer(s)
376 | 	localpods=154
377 | 	ports=[single: 2, ranges: 0], peers=[{cidrs:3 podSelectors:0 peerPods:0}]
378 | 5th heaviest weight: 0.00130000 used by 5 peer(s)
379 | 	localpods=127
380 | 	ports=[single: 5, ranges: 0], peers=[{cidrs:1 podSelectors:0 peerPods:0}]
381 | 	localpods=127
382 | 	ports=[single: 5, ranges: 0], peers=[{cidrs:1 podSelectors:0 peerPods:0}]
383 | 	localpods=127
384 | 	ports=[single: 5, ranges: 0], peers=[{cidrs:1 podSelectors:0 peerPods:0}]
385 | 	localpods=127
386 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:5 podSelectors:0 peerPods:0}]
387 | 	localpods=127
388 | 	ports=[single: 5, ranges: 0], peers=[{cidrs:1 podSelectors:0 peerPods:0}]
389 | 7th profile (469 peers) stats: 
390 | 1st heaviest weight: 0.02880000 used by 2 peer(s)
391 | 	localpods=12
392 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
393 | 	localpods=12
394 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
395 | 2nd heaviest weight: 0.01440000 used by 1 peer(s)
396 | 	localpods=6
397 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
398 | 3rd heaviest weight: 0.01200000 used by 1 peer(s)
399 | 	localpods=5
400 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
401 | 4th heaviest weight: 0.00960000 used by 3 peer(s)
402 | 	localpods=4
403 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
404 | 	localpods=4
405 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
406 | 	localpods=4
407 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
408 | 5th heaviest weight: 0.00720000 used by 9 peer(s)
409 | 	localpods=3
410 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
411 | 	localpods=3
412 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
413 | 	localpods=3
414 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
415 | 	localpods=3
416 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
417 | 	localpods=3
418 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
419 | 8th profile (719 peers) stats: 
420 | 1st heaviest weight: 0.04233333 used by 1 peer(s)
421 | 	localpods=127
422 | 	ports=[single: 5, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:127}]
423 | 2nd heaviest weight: 0.02540000 used by 1 peer(s)
424 | 	localpods=127
425 | 	ports=[single: 3, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:127}]
426 | 3rd heaviest weight: 0.01693333 used by 2 peer(s)
427 | 	localpods=127
428 | 	ports=[single: 2, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:127}]
429 | 	localpods=127
430 | 	ports=[single: 2, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:127}]
431 | 4th heaviest weight: 0.01270000 used by 2 peer(s)
432 | 	localpods=127
433 | 	ports=[single: 3, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:13}]
434 | 	localpods=127
435 | 	ports=[single: 3, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:13}]
436 | 5th heaviest weight: 0.00846667 used by 5 peer(s)
437 | 	localpods=127
438 | 	ports=[single: 1, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:127}]
439 | 	localpods=127
440 | 	ports=[single: 2, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:13}]
441 | 	localpods=127
442 | 	ports=[single: 2, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:13}]
443 | 	localpods=127
444 | 	ports=[single: 2, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:13}]
445 | 	localpods=127
446 | 	ports=[single: 2, ranges: 0], peers=[{cidrs:0 podSelectors:1 peerPods:94}]
447 | 11th profile (464 peers) stats: 
448 | 1st heaviest weight: 0.18720000 used by 1 peer(s)
449 | 	localpods=127
450 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
451 | 2nd heaviest weight: 0.05760000 used by 1 peer(s)
452 | 	localpods=33
453 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
454 | 3rd heaviest weight: 0.04320000 used by 1 peer(s)
455 | 	localpods=27
456 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
457 | 4th heaviest weight: 0.02880000 used by 7 peer(s)
458 | 	localpods=14
459 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
460 | 	localpods=14
461 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
462 | 	localpods=13
463 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
464 | 	localpods=17
465 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
466 | 	localpods=17
467 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
468 | 5th heaviest weight: 0.01440000 used by 8 peer(s)
469 | 	localpods=7
470 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
471 | 	localpods=7
472 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
473 | 	localpods=9
474 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
475 | 	localpods=9
476 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
477 | 	localpods=10
478 | 	ports=[single: 0, ranges: 0], peers=[{cidrs:0 podSelectors:2 peerPods:3578}]
479 | 
480 | ```
481 | 
482 | You can adjust the number of heaviest network policies to print with `-print-heavy-np` flag (default 5).
483 | 
484 | ### Most common value ranges
485 | 
486 | - SINGLE_PORTS = 0-10
487 | - PORT_RANGE = 0-5
488 | - LOCAL_PODS = 1-250 (max pods pwe namespace)
489 | - CIDRS = 1-10
490 | - POD_SELECTORS = 1-10
491 |   - selected pods = PEER_PODS*PEER_NAMESPACES = 1-3500 (all pods in the cluster)
492 | 


--------------------------------------------------------------------------------
/yaml-analysis/analyze.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"flag"
  5 | 	"fmt"
  6 | 
  7 | 	v1 "k8s.io/api/core/v1"
  8 | 	networkingv1 "k8s.io/api/networking/v1"
  9 | )
 10 | 
 11 | // findClosestProfile returns profilesMatch with minimal weight for a given netpolConfig and a set of profiles.
 12 | // It also updates stats for a given netpolConfig.
 13 | func findClosestProfile(npConfig *netpolConfig, existingProfiles []*perfProfile, stat *stats) (matchedProfiles profilesMatch, emptyPol bool) {
 14 | 	if npConfig.localPods == 0 || len(npConfig.gressRules) == 0 {
 15 | 		// that policy doesn't do anything
 16 | 		emptyPol = true
 17 | 		return
 18 | 	}
 19 | 	stat.localPods[npConfig.localPods] += 1
 20 | 	// 2 local pods <= 2 netpol * 1 local pod
 21 | 	// 2 pods selectors <= 2 netpol * 1 pods selector (gress rules)
 22 | 	// 2 pods selected for a peer <= 2 peers * 1 pod
 23 | 	// 2 CIDRs <= 2 peers with 1 cidr
 24 | 	// 2 ports <= 2 peers with 1 port
 25 | 	// same for ranges
 26 | 	// CIDRs and pod selectors may be split into different profiles
 27 | 
 28 | 	for _, peer := range npConfig.gressRules {
 29 | 		stat.peersCounter += 1
 30 | 		stat.singlePorts.Increment(peer.singlePorts, peer.cidrs > 0, peer.podSelectors > 0)
 31 | 		stat.portRanges.Increment(peer.portRanges, peer.cidrs > 0, peer.podSelectors > 0)
 32 | 		stat.cidrs[peer.cidrs] += 1
 33 | 		stat.podSelectors[peer.podSelectors] += 1
 34 | 		if peer.podSelectors != 0 {
 35 | 			stat.peerPods[peer.peerPods] += 1
 36 | 		}
 37 | 
 38 | 		if len(existingProfiles) > 0 {
 39 | 			// network policy may be split into CIDR-only and pod-selector-only profiles or
 40 | 			// be fully Matched by one profile
 41 | 			fullProfile := &profileMatch{}
 42 | 			cidrProfile := &profileMatch{}
 43 | 			podSelProfile := &profileMatch{}
 44 | 			for idx, profile := range existingProfiles {
 45 | 				// find the number of copies needed to match given peer
 46 | 				copiesFull, copiesCIDR, copiesPodSel := matchProfile(profile, peer)
 47 | 				if peer.cidrs == 0 && copiesPodSel != 0 {
 48 | 					// if peer doesn't have cidrs, then podSelector match is full match
 49 | 					copiesFull = copiesPodSel
 50 | 				}
 51 | 				if peer.podSelectors == 0 && copiesCIDR != 0 {
 52 | 					// if peer doesn't have podSelectors, then CIDR match is full match
 53 | 					copiesFull = copiesCIDR
 54 | 				}
 55 | 				if debug {
 56 | 					fmt.Printf("DEBUG: matchProfile for %+v localpods %v %+v is %v %v %v\n", profile, npConfig.localPods, peer, copiesFull, copiesCIDR, copiesPodSel)
 57 | 				}
 58 | 				// check if current profile match has less weight and update running minimum
 59 | 				updateMinimalMatch(fullProfile, npConfig.localPods, copiesFull, idx, profile)
 60 | 				updateMinimalMatch(cidrProfile, npConfig.localPods, copiesCIDR, idx, profile)
 61 | 				updateMinimalMatch(podSelProfile, npConfig.localPods, copiesPodSel, idx, profile)
 62 | 			}
 63 | 
 64 | 			// if network policy was split into CIDR-only and pod-selector-only profiles, the final weight
 65 | 			// needs to be summarized
 66 | 			combinedWeight := cidrProfile.weight + podSelProfile.weight
 67 | 			// compare and accumulate, check for no match
 68 | 			if (cidrProfile.copies == 0 || podSelProfile.copies == 0) && fullProfile.copies == 0 {
 69 | 				// no match was found
 70 | 				matchedProfiles = nil
 71 | 				return
 72 | 			}
 73 | 			result := []*profileMatch{}
 74 | 			if fullProfile.copies != 0 && fullProfile.weight <= combinedWeight {
 75 | 				// use full match
 76 | 				result = append(result, fullProfile)
 77 | 			} else {
 78 | 				// use cidr + selector
 79 | 				result = append(result, cidrProfile, podSelProfile)
 80 | 			}
 81 | 			matchedProfiles = append(matchedProfiles, result...)
 82 | 
 83 | 			for _, profile := range result {
 84 | 				if _, ok := stat.profilesToNetpols[profile.idx]; !ok {
 85 | 					stat.profilesToNetpols[profile.idx] = map[float64][]*gressWithLocalPods{}
 86 | 				}
 87 | 				stat.profilesToNetpols[profile.idx][profile.weight] = append(stat.profilesToNetpols[profile.idx][profile.weight],
 88 | 					&gressWithLocalPods{peer, npConfig.localPods})
 89 | 			}
 90 | 		}
 91 | 	}
 92 | 	if debug {
 93 | 		fmt.Printf("matched %v profiles:\n", len(matchedProfiles))
 94 | 		matchedProfiles.print("")
 95 | 	}
 96 | 	return
 97 | }
 98 | 
 99 | // updateMinimalMatch compares current match with minimal weight and updates it is newProfile's weight is less.
100 | func updateMinimalMatch(currentMin *profileMatch, localPods int, newCopies, newIdx int, newProfile *perfProfile) {
101 | 	localPodsMultiplier := topDiv(localPods, newProfile.localPods)
102 | 	newCopies = newCopies * localPodsMultiplier
103 | 	newWeight := float64(newCopies) * newProfile.weight
104 | 	if newCopies > 0 && (newWeight < currentMin.weight || currentMin.copies == 0) {
105 | 		currentMin.copies = newCopies
106 | 		currentMin.weight = newWeight
107 | 		currentMin.idx = newIdx
108 | 	}
109 | }
110 | 
111 | func matchProfile(profile *perfProfile, peer *gressRule) (copiesFull, copiesCIDR, copiesSel int) {
112 | 	// check if ports config is correct
113 | 	// TODO may be improved to split profiles for single ports and port ranges in a similar way as
114 | 	// cidrs and pod selectors are split
115 | 	if peer.singlePorts != 0 && profile.singlePorts == 0 || peer.portRanges != 0 && profile.portRanges == 0 ||
116 | 		(peer.singlePorts == 0 && peer.portRanges == 0 && (profile.singlePorts != 0 || profile.portRanges != 0)) {
117 | 		//fmt.Printf("ports config doesn't match\n")
118 | 		return
119 | 	}
120 | 
121 | 	// can do full match
122 | 	portCopies := maxInt(topDiv(peer.singlePorts, profile.singlePorts), topDiv(peer.portRanges, profile.portRanges))
123 | 	selectorMul := 0
124 | 	cidrMul := 0
125 | 	if peer.podSelectors > 0 && profile.podSelectors > 0 {
126 | 		selectorMul = topDiv(peer.podSelectors, profile.podSelectors)
127 | 		selectorMul *= topDiv(peer.peerPods, profile.peerPods)
128 | 	}
129 | 	if peer.cidrs > 0 && profile.CIDRs > 0 {
130 | 		cidrMul = topDiv(peer.cidrs, profile.CIDRs)
131 | 	}
132 | 	copiesFull = portCopies * selectorMul * cidrMul
133 | 	copiesSel = portCopies * selectorMul
134 | 	copiesCIDR = portCopies * cidrMul
135 | 	return
136 | }
137 | 
138 | func analyze(netpolList []*networkingv1.NetworkPolicy, existingProfiles []*perfProfile, countSelected podsCounter) *stats {
139 | 	stat := newStats()
140 | 	// log every 10% progress
141 | 	logMul := len(netpolList) / 10
142 | 	nextLog := logMul
143 | 	if len(netpolList) < 500 {
144 | 		// don't log if there are not many netpols
145 | 		nextLog = -1
146 | 	}
147 | 	for i, netpol := range netpolList {
148 | 		if i == nextLog {
149 | 			fmt.Printf("INFO: %v Network Policies handled\n", i)
150 | 			nextLog += logMul
151 | 		}
152 | 		npConfig := getNetpolConfig(netpol, countSelected)
153 | 		matchedProfiles, emtyPol := findClosestProfile(npConfig, existingProfiles, stat)
154 | 		if emtyPol {
155 | 			if len(netpol.Spec.Egress) == 0 && len(netpol.Spec.Ingress) == 0 {
156 | 				stat.noPeersNetpols[netpol.Namespace] = append(stat.noPeersNetpols[netpol.Namespace], netpol.Name)
157 | 				stat.noPeersCounter += 1
158 | 			} else {
159 | 				stat.emptyNetpols[netpol.Namespace] = append(stat.emptyNetpols[netpol.Namespace], netpol.Name)
160 | 				stat.emptyCounter += 1
161 | 			}
162 | 		} else if len(existingProfiles) > 0 {
163 | 			if len(matchedProfiles) == 0 {
164 | 				fmt.Printf("ERROR: Closest profile for policy %s/%s not found\n", netpol.Namespace, netpol.Name)
165 | 				npConfig.print("")
166 | 			} else {
167 | 				stat.matchedNetpols += 1
168 | 				stat.weights = append(stat.weights, &netpolWeight{npConfig, matchedProfiles, matchedProfiles.weight(), netpol.Namespace + "/" + netpol.Name})
169 | 			}
170 | 		}
171 | 	}
172 | 	return stat
173 | }
174 | 
175 | var debug bool
176 | 
177 | func main() {
178 | 	filePath := flag.String("yaml", "", "Required. Path to the yaml output of \"kubectl get pods,namespace,networkpolicies -A -oyaml\"")
179 | 	printEmptyNetpols := flag.Bool("print-empty-np", false, "Print empty network policies that don't have any effect.\n"+
180 | 		"It may be useful to delete them if they are not needed.")
181 | 	printGraphs := flag.Bool("print-graphs", false, "Print statistics for netpol parameters.\n"+
182 | 		"It may help you understand how network policies from a given file are configured, and which performance profiles will "+
183 | 		"suit this workload the best.")
184 | 	profilesPath := flag.String("perf-profiles", "", "Path to the cvs-formatted test results.\n"+
185 | 		"Expected data format: local_pods, gress_rules, single_ports, port_ranges, peer_pods, peer_namespaces, CIDRs, result")
186 | 	printHeavyNetpols := flag.Int("print-heavy-np", 5, "Print a given number of the heaviest network policies.\n"+
187 | 		"It may be useful to review which network policies are considered the heaviest for a given set of performance profiles,\n"+
188 | 		"and which new performance profiles may help better approximate this workload.\n"+
189 | 		"Can only be used with -perf-profiles.")
190 | 	debugFlag := flag.Bool("debug", false, "Print debug info for profiles matching")
191 | 	flag.Parse()
192 | 	debug = *debugFlag
193 | 
194 | 	pods := []*v1.Pod{}
195 | 	namespaces := []*v1.Namespace{}
196 | 	netpols := []*networkingv1.NetworkPolicy{}
197 | 	parseYamls(*filePath, &pods, &namespaces, &netpols)
198 | 	if len(namespaces) == 0 {
199 | 		fmt.Printf("WARNING: No namespaces are given\n")
200 | 	}
201 | 	fmt.Printf("Found: %v Pods, %v Namespaces, %v NetworkPolicies\n", len(pods), len(namespaces), len(netpols))
202 | 
203 | 	existingProfiles := []*perfProfile{}
204 | 	if *profilesPath != "" {
205 | 		existingProfiles = parseProfiles(*profilesPath)
206 | 	}
207 | 
208 | 	statistics := analyze(netpols, existingProfiles, getPodsCounter(pods, namespaces))
209 | 	statistics.print(*printEmptyNetpols, *printGraphs, *printHeavyNetpols, len(existingProfiles) == 0)
210 | }
211 | 


--------------------------------------------------------------------------------
/yaml-analysis/go.mod:
--------------------------------------------------------------------------------
 1 | module netpol_analysis
 2 | 
 3 | go 1.21
 4 | 
 5 | toolchain go1.21.3
 6 | 
 7 | require (
 8 | 	github.com/daoleno/tgraph v0.0.2
 9 | 	k8s.io/api v0.29.0
10 | 	k8s.io/apimachinery v0.29.0
11 | 	k8s.io/client-go v0.29.0
12 | )
13 | 
14 | require (
15 | 	github.com/fatih/color v1.9.0 // indirect
16 | 	github.com/go-logr/logr v1.3.0 // indirect
17 | 	github.com/gogo/protobuf v1.3.2 // indirect
18 | 	github.com/google/gofuzz v1.2.0 // indirect
19 | 	github.com/json-iterator/go v1.1.12 // indirect
20 | 	github.com/mattn/go-colorable v0.1.4 // indirect
21 | 	github.com/mattn/go-isatty v0.0.11 // indirect
22 | 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
23 | 	github.com/modern-go/reflect2 v1.0.2 // indirect
24 | 	golang.org/x/net v0.17.0 // indirect
25 | 	golang.org/x/sys v0.13.0 // indirect
26 | 	golang.org/x/text v0.13.0 // indirect
27 | 	gopkg.in/inf.v0 v0.9.1 // indirect
28 | 	gopkg.in/yaml.v2 v2.4.0 // indirect
29 | 	k8s.io/klog/v2 v2.110.1 // indirect
30 | 	k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect
31 | 	sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
32 | 	sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
33 | 	sigs.k8s.io/yaml v1.3.0 // indirect
34 | )
35 | 


--------------------------------------------------------------------------------
/yaml-analysis/go.sum:
--------------------------------------------------------------------------------
  1 | github.com/daoleno/tgraph v0.0.2 h1:/mhUodtmMfM5Nnc6eOLlwUhXCNP689RgqOkfgOB4uR0=
  2 | github.com/daoleno/tgraph v0.0.2/go.mod h1:Me3/RyLAEy5g8q8rKfJGsm04eM007cb3e6kOnCQSFQI=
  3 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
  4 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
  5 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
  6 | github.com/fatih/color v1.9.0 h1:8xPHl4/q1VyqGIPif1F+1V3Y3lSmrq01EabUW3CoW5s=
  7 | github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU=
  8 | github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY=
  9 | github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
 10 | github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
 11 | github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
 12 | github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 13 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 14 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 15 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 16 | github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
 17 | github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 18 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
 19 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
 20 | github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
 21 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
 22 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 23 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
 24 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 25 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 26 | github.com/mattn/go-colorable v0.1.4 h1:snbPLB8fVfU9iwbbo30TPtbLRzwWu6aJS6Xh4eaaviA=
 27 | github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
 28 | github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
 29 | github.com/mattn/go-isatty v0.0.11 h1:FxPOTFNqGkuDUGi3H/qkUbQO4ZiBa2brKq5r0l8TGeM=
 30 | github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE=
 31 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 32 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
 33 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 34 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
 35 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
 36 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 37 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 38 | github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
 39 | github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
 40 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
 41 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 42 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 43 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 44 | github.com/stretchr/testify v1.6.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 45 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
 46 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 47 | github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 48 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 49 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 50 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
 51 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 52 | golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 53 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 54 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 55 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 56 | golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 57 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
 58 | golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
 59 | golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
 60 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 61 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 62 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 63 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 64 | golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 65 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 66 | golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 67 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 68 | golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
 69 | golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 70 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 71 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 72 | golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
 73 | golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 74 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 75 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
 76 | golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
 77 | golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
 78 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 79 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 80 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 81 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 82 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 83 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 84 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
 85 | gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
 86 | gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
 87 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 88 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
 89 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
 90 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 91 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 92 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 93 | k8s.io/api v0.29.0 h1:NiCdQMY1QOp1H8lfRyeEf8eOwV6+0xA6XEE44ohDX2A=
 94 | k8s.io/api v0.29.0/go.mod h1:sdVmXoz2Bo/cb77Pxi71IPTSErEW32xa4aXwKH7gfBA=
 95 | k8s.io/apimachinery v0.29.0 h1:+ACVktwyicPz0oc6MTMLwa2Pw3ouLAfAon1wPLtG48o=
 96 | k8s.io/apimachinery v0.29.0/go.mod h1:eVBxQ/cwiJxH58eK/jd/vAk4mrxmVlnpBH5J2GbMeis=
 97 | k8s.io/client-go v0.29.0 h1:KmlDtFcrdUzOYrBhXHgKw5ycWzc3ryPX5mQe0SkG3y8=
 98 | k8s.io/client-go v0.29.0/go.mod h1:yLkXH4HKMAywcrD82KMSmfYg2DlE8mepPR4JGSo5n38=
 99 | k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0=
100 | k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo=
101 | k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI=
102 | k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
103 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
104 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
105 | sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
106 | sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
107 | sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
108 | sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8=
109 | 


--------------------------------------------------------------------------------
/yaml-analysis/helpers.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"os"
  6 | 	"sort"
  7 | 
  8 | 	v1 "k8s.io/api/core/v1"
  9 | 	networkingv1 "k8s.io/api/networking/v1"
 10 | 	"k8s.io/client-go/kubernetes/scheme"
 11 | )
 12 | 
 13 | func parseYamls(filename string, pods *[]*v1.Pod, namespaces *[]*v1.Namespace, netpols *[]*networkingv1.NetworkPolicy) {
 14 | 	content, err := os.ReadFile(filename)
 15 | 	if err != nil {
 16 | 		fmt.Printf("ERROR: failed to read file %s: %v\n", filename, err)
 17 | 		return
 18 | 	}
 19 | 	decode := scheme.Codecs.UniversalDeserializer().Decode
 20 | 	obj, _, err := decode(content, nil, nil)
 21 | 	if err != nil {
 22 | 		fmt.Printf("ERROR: failed to decode yaml file %s: %v\n", filename, err)
 23 | 		return
 24 | 	}
 25 | 	for _, rawObj := range obj.(*v1.List).Items {
 26 | 		obj, _, err := decode(rawObj.Raw, nil, nil)
 27 | 		if err != nil {
 28 | 			fmt.Printf("ERROR: failed to decode object %s: %v\n", string(rawObj.Raw), err)
 29 | 			return
 30 | 		}
 31 | 		if pod, ok := obj.(*v1.Pod); ok {
 32 | 			*pods = append(*pods, pod)
 33 | 		} else if namespace, ok := obj.(*v1.Namespace); ok {
 34 | 			*namespaces = append(*namespaces, namespace)
 35 | 		} else if netpol, ok := obj.(*networkingv1.NetworkPolicy); ok {
 36 | 			*netpols = append(*netpols, netpol)
 37 | 		} else {
 38 | 			fmt.Printf("WARN: unexpected type %T\n", obj)
 39 | 		}
 40 | 	}
 41 | }
 42 | 
 43 | func maxInt(a, b int) int {
 44 | 	if a > b {
 45 | 		return a
 46 | 	}
 47 | 	return b
 48 | }
 49 | 
 50 | type numeric interface {
 51 | 	int | float64
 52 | }
 53 | 
 54 | func sortedMap[T1 numeric, T2 any](m map[T1]T2, reverse bool) (keys []T1, values []T2) {
 55 | 	for k := range m {
 56 | 		keys = append(keys, k)
 57 | 	}
 58 | 	sort.Slice(keys, func(i, j int) bool {
 59 | 		if !reverse {
 60 | 			return keys[i] < keys[j]
 61 | 		} else {
 62 | 			return keys[i] > keys[j]
 63 | 		}
 64 | 	})
 65 | 	for _, k := range keys {
 66 | 		values = append(values, m[k])
 67 | 	}
 68 | 	return
 69 | }
 70 | 
 71 | type pair[T1 comparable, T2 numeric] struct {
 72 | 	key   T1
 73 | 	value T2
 74 | }
 75 | 
 76 | func sortedMapByValue[T1 comparable, T2 numeric](m map[T1]T2, reverse bool) []pair[T1, T2] {
 77 | 	pairs := []pair[T1, T2]{}
 78 | 	for k, v := range m {
 79 | 		pairs = append(pairs, pair[T1, T2]{k, v})
 80 | 	}
 81 | 	sort.Slice(pairs, func(i, j int) bool {
 82 | 		if !reverse {
 83 | 			return pairs[i].value < pairs[j].value
 84 | 		} else {
 85 | 			return pairs[i].value > pairs[j].value
 86 | 		}
 87 | 	})
 88 | 	return pairs
 89 | }
 90 | 
 91 | func printMap[T1 comparable, T2 any](m map[T1]T2) string {
 92 | 	s := ""
 93 | 	for k, v := range m {
 94 | 		s += fmt.Sprintf("%v: %v\n", k, v)
 95 | 	}
 96 | 	return s
 97 | }
 98 | 
 99 | func topDiv(a, b int) int {
100 | 	if a == 0 {
101 | 		return 1
102 | 	}
103 | 	res := a / b
104 | 	if a%b > 0 {
105 | 		res += 1
106 | 	}
107 | 	return res
108 | }
109 | 
110 | func ordinalString(i int) string {
111 | 	switch i {
112 | 	case 1:
113 | 		return "1st"
114 | 	case 2:
115 | 		return "2nd"
116 | 	case 3:
117 | 		return "3rd"
118 | 	default:
119 | 		return fmt.Sprintf("%dth", i)
120 | 	}
121 | }
122 | 


--------------------------------------------------------------------------------
/yaml-analysis/netpol_config.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 
  6 | 	v1 "k8s.io/api/core/v1"
  7 | 	networkingv1 "k8s.io/api/networking/v1"
  8 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  9 | 	"k8s.io/apimachinery/pkg/labels"
 10 | 	"k8s.io/apimachinery/pkg/util/sets"
 11 | )
 12 | 
 13 | type portConfig struct {
 14 | 	singlePorts int
 15 | 	portRanges  int
 16 | }
 17 | 
 18 | type peerConfig struct {
 19 | 	cidrs        int
 20 | 	podSelectors int
 21 | 	peerPods     int
 22 | }
 23 | 
 24 | type gressRule struct {
 25 | 	portConfig
 26 | 	peerConfig
 27 | }
 28 | 
 29 | func (r *gressRule) print(indent string) {
 30 | 	fmt.Printf("%s\tports=[single: %v, ranges: %v], peers=[%+v]\n", indent, r.singlePorts, r.portRanges, r.peerConfig)
 31 | }
 32 | 
 33 | func (pc *peerConfig) join(pc2 *peerConfig) *peerConfig {
 34 | 	if pc2 == nil {
 35 | 		return pc
 36 | 	}
 37 | 	pc.cidrs += pc2.cidrs
 38 | 	pc.podSelectors += pc2.podSelectors
 39 | 	pc.peerPods = maxInt(pc.peerPods, pc2.peerPods)
 40 | 	return pc
 41 | }
 42 | 
 43 | type netpolConfig struct {
 44 | 	// TODO: differentiate ingress and egress?
 45 | 	localPods  int
 46 | 	gressRules []*gressRule
 47 | }
 48 | 
 49 | func (c *netpolConfig) print(indent string) {
 50 | 	fmt.Printf("%sconfig: localpods=%v, rules:\n", indent, c.localPods)
 51 | 	for _, peer := range c.gressRules {
 52 | 		peer.print(indent)
 53 | 	}
 54 | }
 55 | 
 56 | func getGressRuleConfig(netpolNs string, policyPorts []networkingv1.NetworkPolicyPort, peers []networkingv1.NetworkPolicyPeer,
 57 | 	countSelected podsCounter) (*portConfig, *peerConfig) {
 58 | 	CIDRs := 0
 59 | 	podSelectors := 0
 60 | 	maxSelectedPods := 0
 61 | 
 62 | 	ports := 0
 63 | 	portRanges := 0
 64 | 	for _, port := range policyPorts {
 65 | 		if port.EndPort != nil {
 66 | 			portRanges += 1
 67 | 		} else {
 68 | 			ports += 1
 69 | 		}
 70 | 	}
 71 | 	for _, peer := range peers {
 72 | 		if peer.IPBlock != nil {
 73 | 			CIDRs += 1
 74 | 		} else {
 75 | 			podSelectors += 1
 76 | 			selectedPods := countSelected(peer.PodSelector, netpolNs, peer.NamespaceSelector)
 77 | 			maxSelectedPods = maxInt(maxSelectedPods, selectedPods)
 78 | 		}
 79 | 	}
 80 | 	if CIDRs == 0 && (podSelectors == 0 || maxSelectedPods == 0) {
 81 | 		return nil, nil
 82 | 	}
 83 | 	return &portConfig{ports, portRanges},
 84 | 		&peerConfig{CIDRs,
 85 | 			podSelectors,
 86 | 			maxSelectedPods,
 87 | 		}
 88 | }
 89 | 
 90 | func getNetpolConfig(netpol *networkingv1.NetworkPolicy, countSelected podsCounter) *netpolConfig {
 91 | 	localPods := countSelected(&netpol.Spec.PodSelector, netpol.Namespace, nil)
 92 | 	portPeers := map[*portConfig]*peerConfig{}
 93 | 
 94 | 	for _, egress := range netpol.Spec.Egress {
 95 | 		portConf, peerConf := getGressRuleConfig(netpol.Namespace, egress.Ports, egress.To, countSelected)
 96 | 		if portConf != nil {
 97 | 			portPeers[portConf] = peerConf.join(portPeers[portConf])
 98 | 		}
 99 | 	}
100 | 	for _, ingress := range netpol.Spec.Ingress {
101 | 		portConf, peerConf := getGressRuleConfig(netpol.Namespace, ingress.Ports, ingress.From, countSelected)
102 | 		if portConf != nil {
103 | 			portPeers[portConf] = peerConf.join(portPeers[portConf])
104 | 		}
105 | 	}
106 | 	peers := []*gressRule{}
107 | 	for portConf, peerConf := range portPeers {
108 | 		peers = append(peers, &gressRule{
109 | 			*portConf, *peerConf,
110 | 		})
111 | 	}
112 | 
113 | 	return &netpolConfig{
114 | 		localPods:  localPods,
115 | 		gressRules: peers,
116 | 	}
117 | }
118 | 
119 | type podsCounter func(podSelector *metav1.LabelSelector, namespace string, namespaceSelector *metav1.LabelSelector) int
120 | 
121 | // returns podsCounter
122 | func getPodsCounter(podsList []*v1.Pod, nsList []*v1.Namespace) func(podSelector *metav1.LabelSelector, namespace string, namespaceSelector *metav1.LabelSelector) int {
123 | 	selectedCounter := map[string]int{}
124 | 	return func(podSelector *metav1.LabelSelector, namespace string, namespaceSelector *metav1.LabelSelector) int {
125 | 		stringSelector := podSelector.String() + namespace + namespaceSelector.String()
126 | 		if result, ok := selectedCounter[stringSelector]; ok {
127 | 			return result
128 | 		}
129 | 		matchPodSelector := func(pod *v1.Pod) bool {
130 | 			if podSelector != nil {
131 | 				sel, err := metav1.LabelSelectorAsSelector(podSelector)
132 | 				if err != nil {
133 | 					fmt.Println("ERROR")
134 | 					return false
135 | 				}
136 | 				return sel.Matches(labels.Set(pod.Labels))
137 | 			} else {
138 | 				return true
139 | 			}
140 | 		}
141 | 		matchNamespace := func(ns *v1.Namespace) bool {
142 | 			if namespaceSelector != nil {
143 | 				sel, err := metav1.LabelSelectorAsSelector(namespaceSelector)
144 | 				if err != nil {
145 | 					fmt.Println("ERROR")
146 | 					return false
147 | 				}
148 | 				return sel.Matches(labels.Set(ns.Labels))
149 | 			} else if namespace != "" {
150 | 				return ns.Name == namespace
151 | 			} else {
152 | 				return true
153 | 			}
154 | 		}
155 | 		result := 0
156 | 		matchedNamespaces := sets.Set[string]{}
157 | 		for _, ns := range nsList {
158 | 			if matchNamespace(ns) {
159 | 				matchedNamespaces.Insert(ns.Name)
160 | 			}
161 | 		}
162 | 
163 | 		matchPod := func(pod *v1.Pod) bool {
164 | 			return matchPodSelector(pod) && matchedNamespaces.Has(pod.Namespace)
165 | 		}
166 | 		if len(nsList) == 0 {
167 | 			matchPod = func(pod *v1.Pod) bool {
168 | 				return matchPodSelector(pod) && (namespace == "" || pod.Namespace == namespace)
169 | 			}
170 | 		}
171 | 
172 | 		for _, pod := range podsList {
173 | 			if matchPod(pod) {
174 | 				result += 1
175 | 			}
176 | 		}
177 | 		selectedCounter[stringSelector] = result
178 | 		return result
179 | 	}
180 | }
181 | 


--------------------------------------------------------------------------------
/yaml-analysis/profile.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"encoding/csv"
 5 | 	"fmt"
 6 | 	"os"
 7 | 	"strconv"
 8 | )
 9 | 
10 | // <LOCAL_PODS>-<SINGLE_PORTS>-<PORT_RANGES>-<POD_SELECTORS>-<PEER_NAMESPACES>-<PEER_PODS>-<CIDRS>
11 | type perfProfile struct {
12 | 	localPods    int
13 | 	singlePorts  int
14 | 	portRanges   int
15 | 	podSelectors int
16 | 	// peer namespace just affects the number of peer pods in the end
17 | 	// now used for now
18 | 	//peerNamespaces int
19 | 	peerPods int
20 | 	CIDRs    int
21 | 	// weight = 1/number of policies with this profile
22 | 	weight float64
23 | }
24 | 
25 | func newProfile(localPods, podSelectors, singlePorts, portRanges, peerPods, peerNamespaces, CIDRs, amount int) *perfProfile {
26 | 	return &perfProfile{
27 | 		localPods:    localPods,
28 | 		singlePorts:  singlePorts,
29 | 		portRanges:   portRanges,
30 | 		podSelectors: podSelectors,
31 | 		peerPods:     peerPods * peerNamespaces,
32 | 		CIDRs:        CIDRs,
33 | 		weight:       1.0 / float64(amount),
34 | 	}
35 | }
36 | 
37 | type profileMatch struct {
38 | 	// profile index in a given file, indexing starts with 0
39 | 	idx    int
40 | 	copies int
41 | 	// summarized weight for copies
42 | 	weight float64
43 | }
44 | 
45 | type profilesMatch []*profileMatch
46 | 
47 | func (matches profilesMatch) print(indent string) {
48 | 	fmt.Printf("%smatched profiles:\n", indent)
49 | 	for _, match := range matches {
50 | 		readableMatch := *match
51 | 		readableMatch.idx += 1
52 | 		fmt.Printf("%s\t%+v\n", indent, readableMatch)
53 | 	}
54 | }
55 | 
56 | func (matches profilesMatch) weight() float64 {
57 | 	res := 0.0
58 | 	for _, match := range matches {
59 | 		res += match.weight
60 | 	}
61 | 	return res
62 | }
63 | 
64 | func parseProfiles(filename string) []*perfProfile {
65 | 	f, err := os.Open(filename)
66 | 	if err != nil {
67 | 		fmt.Printf("ERROR: failed to read file %s: %v\n", filename, err)
68 | 		return nil
69 | 	}
70 | 	defer f.Close()
71 | 	csvReader := csv.NewReader(f)
72 | 	records, err := csvReader.ReadAll()
73 | 	if err != nil {
74 | 		fmt.Printf("ERROR: failed parse profiles: %v\n", err)
75 | 		return nil
76 | 	}
77 | 	profiles := []*perfProfile{}
78 | 	for _, record := range records {
79 | 		ints := []int{}
80 | 		for _, strInt := range record {
81 | 			counter, err := strconv.Atoi(strInt)
82 | 			if err != nil {
83 | 				fmt.Printf("ERROR: failed to convert str %s to int: %v\n", strInt, err)
84 | 				return nil
85 | 			}
86 | 			ints = append(ints, counter)
87 | 		}
88 | 		if len(ints) != 8 {
89 | 			fmt.Printf("ERROR: failed to read a profile: expected 8 ints, got %v\n", len(ints))
90 | 			return nil
91 | 		}
92 | 		profiles = append(profiles, newProfile(ints[0], ints[1], ints[2], ints[3], ints[4], ints[5], ints[6], ints[7]))
93 | 	}
94 | 	return profiles
95 | }
96 | 


--------------------------------------------------------------------------------
/yaml-analysis/profiles_example.csv:
--------------------------------------------------------------------------------
 1 | 1,0,0,0,0,0,1,100000
 2 | 1,0,1,0,0,0,1,100000
 3 | 1,0,0,1,0,0,1,90000
 4 | 1,0,1,1,0,0,1,90000
 5 | 1,1,0,0,1,3,0,80000
 6 | 1,1,1,0,1,3,0,80000
 7 | 1,1,0,0,10,10,0,30000
 8 | 1,1,1,0,10,10,0,30000
 9 | 10,0,0,0,0,0,1,50000
10 | 10,0,1,0,0,0,1,50000
11 | 10,1,0,0,10,10,1,5000
12 | 1,1,0,0,1,3,1,60000
13 | 10,1,0,0,10,10,1,5000
14 | 1,0,0,0,0,0,10,20000
15 | 1,0,1,0,0,0,10,20000
16 | 1,1,1,1,1,1,0,30000


--------------------------------------------------------------------------------
/yaml-analysis/stats.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"sort"
  6 | 
  7 | 	"github.com/daoleno/tgraph"
  8 | )
  9 | 
 10 | type portStats struct {
 11 | 	cidrs        map[int]int
 12 | 	podSelectors map[int]int
 13 | }
 14 | 
 15 | func newPortStats() portStats {
 16 | 	return portStats{
 17 | 		cidrs:        map[int]int{},
 18 | 		podSelectors: map[int]int{},
 19 | 	}
 20 | }
 21 | 
 22 | func (s *portStats) Increment(key int, cidrs, podSelectors bool) {
 23 | 	if cidrs {
 24 | 		s.cidrs[key] += 1
 25 | 	}
 26 | 	if podSelectors {
 27 | 		s.podSelectors[key] += 1
 28 | 	}
 29 | }
 30 | 
 31 | type netpolWeight struct {
 32 | 	npConfig   *netpolConfig
 33 | 	result     profilesMatch
 34 | 	weight     float64
 35 | 	netpolName string
 36 | }
 37 | 
 38 | func (w *netpolWeight) print() {
 39 | 	fmt.Printf("%v\n", w.netpolName)
 40 | 	indent := "  "
 41 | 	w.npConfig.print(indent)
 42 | 	w.result.print(indent)
 43 | 	fmt.Printf("%sweight: %v\n", indent, w.weight)
 44 | }
 45 | 
 46 | type stats struct {
 47 | 	singlePorts    portStats
 48 | 	portRanges     portStats
 49 | 	cidrs          map[int]int
 50 | 	podSelectors   map[int]int
 51 | 	peerPods       map[int]int
 52 | 	peersCounter   int
 53 | 	localPods      map[int]int
 54 | 	matchedNetpols int
 55 | 	// emptyNetpols are netpols that have some peers defined, but it doesn't have real effect.
 56 | 	// it can happen if either no local pods are selected or all peers don't select any enpdoints
 57 | 	emptyNetpols map[string][]string
 58 | 	emptyCounter int
 59 | 	// noPeersNetpols are netpol that have zero peers defined, they may be used as deny-all policy and are not
 60 | 	// invalid
 61 | 	noPeersNetpols map[string][]string
 62 | 	noPeersCounter int
 63 | 	weights        []*netpolWeight
 64 | 
 65 | 	// [profile idx][match weight][peers with given weight]
 66 | 	profilesToNetpols map[int]map[float64][]*gressWithLocalPods
 67 | }
 68 | 
 69 | type gressWithLocalPods struct {
 70 | 	*gressRule
 71 | 	localPods int
 72 | }
 73 | 
 74 | func newStats() *stats {
 75 | 	return &stats{
 76 | 		localPods:         map[int]int{},
 77 | 		singlePorts:       newPortStats(),
 78 | 		portRanges:        newPortStats(),
 79 | 		cidrs:             map[int]int{},
 80 | 		podSelectors:      map[int]int{},
 81 | 		peerPods:          map[int]int{},
 82 | 		emptyNetpols:      map[string][]string{},
 83 | 		noPeersNetpols:    map[string][]string{},
 84 | 		profilesToNetpols: map[int]map[float64][]*gressWithLocalPods{},
 85 | 	}
 86 | }
 87 | 
 88 | func toTgraphData(input map[int]int, getLabel func(key int) string) ([][]float64, []string) {
 89 | 	data := [][]float64{}
 90 | 	labels := []string{}
 91 | 	sortedKeys, sortedValues := sortedMap[int, int](input, false)
 92 | 	for i, key := range sortedKeys {
 93 | 		data = append(data, []float64{float64(sortedValues[i])})
 94 | 		labels = append(labels, getLabel(key))
 95 | 	}
 96 | 	return data, labels
 97 | }
 98 | 
 99 | type graphData struct {
100 | 	input map[int]int
101 | 	label string
102 | 	title string
103 | }
104 | 
105 | func median(data map[int]int, ignoreZeros bool) int {
106 | 	inlinedData := []int{}
107 | 	for value, counter := range data {
108 | 		if ignoreZeros && value == 0 {
109 | 			continue
110 | 		}
111 | 		for i := 0; i < counter; i++ {
112 | 			inlinedData = append(inlinedData, value)
113 | 		}
114 | 	}
115 | 
116 | 	sort.Ints(inlinedData)
117 | 
118 | 	l := len(inlinedData)
119 | 	if l == 0 {
120 | 		return 0
121 | 	} else {
122 | 		return inlinedData[l/2]
123 | 	}
124 | }
125 | 
126 | func average(data map[int]int) float64 {
127 | 	sum := 0
128 | 	samplesCounter := 0
129 | 	for value, counter := range data {
130 | 		sum += value * counter
131 | 		samplesCounter += counter
132 | 	}
133 | 	return float64(sum) / float64(samplesCounter)
134 | }
135 | 
136 | func (stat *stats) print(printEmptyNetpols, printGraphs bool, heaviestNetpols int, noProfiles bool) {
137 | 	fmt.Printf("Empty netpols: %v, peers: %v, deny-only netpols %v\n", stat.emptyCounter, stat.peersCounter, stat.noPeersCounter)
138 | 	if printEmptyNetpols {
139 | 		fmt.Printf("\nEmpty netpols (namespace:[netpol names]):\n%s\n", printMap[string, []string](stat.emptyNetpols))
140 | 	}
141 | 
142 | 	if printGraphs {
143 | 		fmt.Printf("Average network policy profile: local pods=%v\n"+
144 | 			"\tcidrs=%v, single ports=%v, port ranges=%v\n"+
145 | 			"\tpod selectors=%v, peer pods=%v, single ports=%v, port ranges=%v\n\n",
146 | 			average(stat.localPods),
147 | 			average(stat.cidrs), average(stat.singlePorts.cidrs), average(stat.portRanges.cidrs),
148 | 			average(stat.podSelectors), average(stat.peerPods), average(stat.singlePorts.podSelectors), average(stat.portRanges.podSelectors),
149 | 		)
150 | 
151 | 		fmt.Printf("Median network policy profile: local pods=%v\n"+
152 | 			"\tcidrs=%v, single ports=%v, port ranges=%v\n"+
153 | 			"\tpod selectors=%v, peer pods=%v, single ports=%v, port ranges=%v\n\n",
154 | 			median(stat.localPods, true),
155 | 			median(stat.cidrs, true), median(stat.singlePorts.cidrs, false), median(stat.portRanges.cidrs, false),
156 | 			median(stat.podSelectors, true), median(stat.peerPods, true), median(stat.singlePorts.podSelectors, false), median(stat.portRanges.podSelectors, false),
157 | 		)
158 | 
159 | 		for _, gData := range []graphData{
160 | 			{stat.localPods, "pod(s)", "Local pods distribution"},
161 | 			{stat.cidrs, "CIDR(s)", "CIDR peers distribution"},
162 | 			{stat.podSelectors, "pod selector(s)", "Pod selector peers distribution"},
163 | 			{stat.peerPods, "peer pod(s)", "Peer pods distribution"},
164 | 			{stat.singlePorts.cidrs, "single port(s)", "Single port peers distribution (CIDRs)"},
165 | 			{stat.singlePorts.podSelectors, "single port(s)", "Single port peers distribution (pod selectors)"},
166 | 			{stat.portRanges.cidrs, "port ranges(s)", "Port range peers distribution (CIDRs)"},
167 | 			{stat.portRanges.podSelectors, "port ranges(s)", "Port range peers distribution (pod selectors)"},
168 | 		} {
169 | 			data, labels := toTgraphData(gData.input, func(key int) string { return fmt.Sprintf("%d %s", key, gData.label) })
170 | 			tgraph.Chart(gData.title, labels, data, nil,
171 | 				nil, 100, false, "▇")
172 | 			total := 0
173 | 			for _, i := range gData.input {
174 | 				total += i
175 | 			}
176 | 			fmt.Println("Total: ", total)
177 | 			fmt.Println()
178 | 		}
179 | 	}
180 | 
181 | 	if !noProfiles {
182 | 		fmt.Printf("Matched %v netpols with given profiles\n", stat.matchedNetpols)
183 | 
184 | 		sumWeight := 0.0
185 | 		for _, npWeight := range stat.weights {
186 | 			sumWeight += npWeight.weight
187 | 		}
188 | 		fmt.Printf("Final Weight=%v, if < 1, the workload is accepted\n\n", sumWeight)
189 | 		sort.Slice(stat.weights, func(i, j int) bool {
190 | 			return stat.weights[i].weight > stat.weights[j].weight
191 | 		})
192 | 
193 | 		if heaviestNetpols > 0 {
194 | 			fmt.Printf("%v heaviest netpols are (profile idx start with 1):\n", heaviestNetpols)
195 | 			weightsToPrint := heaviestNetpols
196 | 			if len(stat.weights) < weightsToPrint {
197 | 				weightsToPrint = len(stat.weights)
198 | 			}
199 | 			for _, npWeight := range stat.weights[:weightsToPrint] {
200 | 				npWeight.print()
201 | 			}
202 | 			fmt.Println()
203 | 		}
204 | 
205 | 		profileCopies := map[int]int{}
206 | 		totalProfiles := 0
207 | 
208 | 		for _, npWeight := range stat.weights {
209 | 			for _, result := range npWeight.result {
210 | 				// use idx + 1 to count profiles from 1, which should be easier to read
211 | 				profileCopies[result.idx+1] += result.copies
212 | 				totalProfiles += result.copies
213 | 			}
214 | 		}
215 | 		fmt.Printf("Initial %v peers were split into %v profiles.\n", stat.peersCounter, totalProfiles)
216 | 		data, labels := toTgraphData(profileCopies, func(key int) string { return fmt.Sprintf("%s profile", ordinalString(key)) })
217 | 		tgraph.Chart("Used profiles statistics (number of copies)", labels, data, nil,
218 | 			nil, 100, false, "▇")
219 | 		fmt.Println()
220 | 
221 | 		// [pair(key=profile idx, value=number of copies)]
222 | 		sortedCopies := sortedMapByValue[int, int](profileCopies, true)
223 | 		totalPeers := 0
224 | 		for _, profileCopy := range sortedCopies {
225 | 			profilesToNetpolsIdx := profileCopy.key - 1
226 | 			weightToPeers := stat.profilesToNetpols[profilesToNetpolsIdx]
227 | 
228 | 			profilePeers := 0
229 | 			for _, copies := range weightToPeers {
230 | 				profilePeers += len(copies)
231 | 			}
232 | 			totalPeers += profilePeers
233 | 			fmt.Printf("%s profile (%v peers) stats: \n", ordinalString(profileCopy.key), profilePeers)
234 | 
235 | 			sortedWeights, _ := sortedMap[float64, []*gressWithLocalPods](weightToPeers, true)
236 | 
237 | 			weightsToPrint := 5
238 | 			if len(sortedWeights) < weightsToPrint {
239 | 				weightsToPrint = len(sortedWeights)
240 | 			}
241 | 
242 | 			for i, weight := range sortedWeights[:weightsToPrint] {
243 | 				weightUsages := stat.profilesToNetpols[profilesToNetpolsIdx][weight]
244 | 				fmt.Printf("%s heaviest weight: %.8f used by %v peer(s)\n", ordinalString(i+1), weight, len(weightUsages))
245 | 				for _, rule := range weightUsages[:min(5, len(weightUsages))] {
246 | 					fmt.Printf("\tlocalpods=%v\n", rule.localPods)
247 | 					rule.print("")
248 | 				}
249 | 			}
250 | 		}
251 | 		//fmt.Printf("Total peers: %v", totalPeers)
252 | 	}
253 | }
254 | 


--------------------------------------------------------------------------------