├── .DS_Store
├── .gitignore
├── .idea
    ├── .gitignore
    ├── deployment.xml
    ├── energy-aware-recommender.iml
    ├── inspectionProfiles
    │   ├── Project_Default.xml
    │   └── profiles_settings.xml
    ├── misc.xml
    ├── modules.xml
    ├── remote-mappings.xml
    └── vcs.xml
├── Dockerfile
├── LICENSE
├── PromClient.py
├── README.md
├── dashboards
    ├── .DS_Store
    ├── clever-dashboard-w-IPS.json
    ├── clever-dashboard.json
    └── clever-sysbench-kubecon22.json
├── main.py
├── manifests
    ├── clever.yaml
    ├── random.yaml
    └── sysbench.yaml
├── recommender.py
├── requirements.txt
├── scripts
    ├── set_cpu_freq.sh
    └── watch_vpa.sh
├── testPromClient.py
└── utils.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/clever/fc20b3e8947978ec13c744daa4f936d16af4d3fa/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 | 


--------------------------------------------------------------------------------
/.idea/deployment.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="PublishConfigData" autoUpload="Always" serverName="root@52.117.128.227:22 agent" remoteFilesAllowedToDisappearOnAutoupload="false">
 4 |     <serverData>
 5 |       <paths name="root@169.61.236.35:22 key">
 6 |         <serverdata>
 7 |           <mappings>
 8 |             <mapping deploy="/tmp/pycharm_project_419" local="$PROJECT_DIR$" />
 9 |           </mappings>
10 |         </serverdata>
11 |       </paths>
12 |       <paths name="root@169.61.236.35:22 key (1)">
13 |         <serverdata>
14 |           <mappings>
15 |             <mapping deploy="/tmp/pycharm_project_848" local="$PROJECT_DIR$" />
16 |           </mappings>
17 |         </serverdata>
18 |       </paths>
19 |       <paths name="root@169.61.236.35:22 key (2)">
20 |         <serverdata>
21 |           <mappings>
22 |             <mapping deploy="/root/python/venv/clever" local="$PROJECT_DIR$" />
23 |           </mappings>
24 |         </serverdata>
25 |       </paths>
26 |       <paths name="root@52.117.128.227:22 agent">
27 |         <serverdata>
28 |           <mappings>
29 |             <mapping deploy="/tmp/pycharm_project_431" local="$PROJECT_DIR$" />
30 |           </mappings>
31 |         </serverdata>
32 |       </paths>
33 |     </serverData>
34 |     <option name="myAutoUpload" value="ALWAYS" />
35 |   </component>
36 | </project>


--------------------------------------------------------------------------------
/.idea/energy-aware-recommender.iml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <module type="PYTHON_MODULE" version="4">
3 |   <component name="NewModuleRootManager">
4 |     <content url="file://$MODULE_DIR$" />
5 |     <orderEntry type="jdk" jdkName="Remote Python 3.6.8 (/root/.virtualenvs/clever/bin/python)" jdkType="Python SDK" />
6 |     <orderEntry type="sourceFolder" forTests="false" />
7 |   </component>
8 | </module>


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
 1 | <component name="InspectionProjectProfileManager">
 2 |   <profile version="1.0">
 3 |     <option name="myName" value="Project Default" />
 4 |     <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
 5 |       <option name="ignoredPackages">
 6 |         <value>
 7 |           <list size="25">
 8 |             <item index="0" class="java.lang.String" itemvalue="numba" />
 9 |             <item index="1" class="java.lang.String" itemvalue="joblib" />
10 |             <item index="2" class="java.lang.String" itemvalue="threadpoolctl" />
11 |             <item index="3" class="java.lang.String" itemvalue="scikit-learn" />
12 |             <item index="4" class="java.lang.String" itemvalue="python-dateutil" />
13 |             <item index="5" class="java.lang.String" itemvalue="sklearn" />
14 |             <item index="6" class="java.lang.String" itemvalue="cycler" />
15 |             <item index="7" class="java.lang.String" itemvalue="llvmlite" />
16 |             <item index="8" class="java.lang.String" itemvalue="numpy" />
17 |             <item index="9" class="java.lang.String" itemvalue="patsy" />
18 |             <item index="10" class="java.lang.String" itemvalue="urllib3" />
19 |             <item index="11" class="java.lang.String" itemvalue="pyparsing" />
20 |             <item index="12" class="java.lang.String" itemvalue="Cython" />
21 |             <item index="13" class="java.lang.String" itemvalue="scipy" />
22 |             <item index="14" class="java.lang.String" itemvalue="six" />
23 |             <item index="15" class="java.lang.String" itemvalue="ruptures" />
24 |             <item index="16" class="java.lang.String" itemvalue="kiwisolver" />
25 |             <item index="17" class="java.lang.String" itemvalue="pymannkendall" />
26 |             <item index="18" class="java.lang.String" itemvalue="pandas" />
27 |             <item index="19" class="java.lang.String" itemvalue="sktime" />
28 |             <item index="20" class="java.lang.String" itemvalue="matplotlib" />
29 |             <item index="21" class="java.lang.String" itemvalue="statsmodels" />
30 |             <item index="22" class="java.lang.String" itemvalue="pytz" />
31 |             <item index="23" class="java.lang.String" itemvalue="pmdarima" />
32 |             <item index="24" class="java.lang.String" itemvalue="Pillow" />
33 |           </list>
34 |         </value>
35 |       </option>
36 |     </inspection_tool>
37 |   </profile>
38 | </component>


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.6.8 (/root/.virtualenvs/clever/bin/python)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/energy-aware-recommender.iml" filepath="$PROJECT_DIR$/.idea/energy-aware-recommender.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/remote-mappings.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="RemoteMappingsManager">
 4 |     <list>
 5 |       <list>
 6 |         <remote-mappings server-id="python@sftp://root@150.238.242.198:22/usr/local/bin/python3.9" />
 7 |       </list>
 8 |     </list>
 9 |   </component>
10 | </project>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.6.8
 2 | 
 3 | WORKDIR /root/src/clever
 4 | 
 5 | COPY requirements.txt ./
 6 | RUN pip install --no-cache-dir -r requirements.txt
 7 | 
 8 | COPY . .
 9 | 
10 | CMD [ "python", "./main.py" ]


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/PromClient.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import requests
 4 | import os
 5 | import json
 6 | 
 7 | requests.packages.urllib3.disable_warnings()
 8 | 
 9 | class PromClient:
10 |     now = None
11 |     start = None
12 |     prom_address = "http://127.0.0.1:9090"
13 |     prom_token = None
14 |     step = '15s'
15 |     chunk_sz = 900
16 | 
17 |     def __init__(self, prom_address=None, prom_token=None):
18 |         self.prom_address = prom_address or os.getenv("PROM_HOST")
19 |         self.prom_token = prom_token or os.getenv("PROM_TOKEN")
20 | 
21 |         if not self.prom_address:
22 |             raise ValueError(
23 |                 "Please appropriately configure environment variables $PROM_HOST, $PROM_TOKEN to successfully run the crawler and profiler!")
24 | 
25 |     def get_query(self, my_query):
26 |         try:
27 |             if self.prom_token:
28 |                 headers = {"content-type": "application/json; charset=UTF-8",
29 |                            'Authorization': 'Bearer {}'.format(self.prom_token)}
30 |             else:
31 |                 headers = {"content-type": "application/json; charset=UTF-8"}
32 |             response = requests.get('{0}/api/v1/query'.format(self.prom_address),
33 |                                     params={'query': my_query},
34 |                                     headers=headers, verify=False)
35 | 
36 |         except requests.exceptions.RequestException as e:
37 |             print(e)
38 |             return None
39 | 
40 |         try:
41 |             if response.json()['status'] != "success":
42 |                 print("Error processing the request: " + response.json()['status'])
43 |                 print("The Error is: " + response.json()['error'])
44 |                 return None
45 | 
46 |             results = response.json()['data']['result']
47 | 
48 |             if (results is None):
49 |                 # print("the results[] came back empty!")
50 |                 return None
51 | 
52 |             length = len(results)
53 |             if length > 0:
54 |                 return results
55 |             else:
56 |                 # print("the results[] has no entries!")
57 |                 return None
58 |         except:
59 |             print(response)
60 |             return None


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CLEVER
 2 | Container Level Energy-efficient VPA Recommender for Kubernetes
 3 | 
 4 | ## Pre-requisites
 5 | - Baremetal Node OS - RedHat 8
 6 | - Kubernetes 1.22+
 7 | - Kepler v0.2
 8 | - Prometheus release-0.11
 9 | - Kubernetes Vertical Pod Autoscaler (VPA) 0.11
10 | 
11 | ## Installation
12 | ### Install Kepler
13 | - Follow the instructions in the [Kepler](https://github.com/sustainable-computing-io/kepler) to install Kepler as DaemonSets on nodes of the Kubernetes Cluster.
14 | 
15 | ### Install Prometheus & Grafana Dashboard
16 | - Follow the instructions in the [kube-prometheus](https://github.com/prometheus-operator/kube-prometheus) to install both Prometheus and Grafana on the Kubernetes Cluster.
17 | - Import the [Grafana Dashboard](https://grafana.com/docs/grafana/v9.0/dashboards/export-import/). The dashboard is available in the `dashboards/clever-dashboard.json` folder.
18 | - Access Prometheus UI and Grafana Dashboard via `kubectl port-forward` command following the [Access UIs tutorial](https://github.com/prometheus-operator/kube-prometheus/blob/main/docs/access-ui.md).
19 | 
20 | ### Install VPA
21 | - Follow the instructions [here](https://github.com/kubernetes/autoscaler/blob/master/vertical-pod-autoscaler/README.md) to install the VPA on the Kubernetes Cluster.
22 | 
23 | ### Install CLEVER
24 | - Clone the CLEVER repository
25 | ```bash
26 | git clone https://github.com/sustainable-computing-io/clever.git
27 | ```
28 | 
29 | - Deploy CLEVER Recommender to run as an alternative recommender for VPA.
30 | ```bash
31 | kubectl apply -f manifests/clever.yaml
32 | ```
33 | 
34 | ## Tutorial
35 | - Deploy the example application that selects the CLEVER recommender for VPA.
36 | ```bash
37 | kubectl apply -f manifests/random.yaml
38 | ```
39 | 
40 | - The example application defines a VPA Custom Resource with the following configuration:
41 | ```yaml
42 | apiVersion: "autoscaling.k8s.io/v1"
43 | kind: VerticalPodAutoscaler
44 | metadata:
45 |   name: random-vpa
46 | spec:
47 |   recommenders:
48 |     - name: clever
49 |   targetRef:
50 |     apiVersion: "apps/v1"
51 |     kind: Deployment
52 |     name: random
53 |   resourcePolicy:
54 |     containerPolicies:
55 |       - containerName: '*'
56 |         minAllowed:
57 |           cpu: 100m
58 |         maxAllowed:
59 |           cpu: 16
60 |         controlledResources: ["cpu"]
61 | ```
62 | 
63 | - Monitor the recommended CPU requests for the example application by watching the VPA object.
64 | ```bash
65 | watch -n 0.1 ./scripts/vpa.sh random-vpa
66 | ```
67 | 
68 | - Change the node CPU frequencies to observe the effect on the recommended CPU requests.
69 | ```bash
70 | ./scripts/set_cpu_freq.sh 1GHz
71 | ```
72 | 


--------------------------------------------------------------------------------
/dashboards/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/clever/fc20b3e8947978ec13c744daa4f936d16af4d3fa/dashboards/.DS_Store


--------------------------------------------------------------------------------
/dashboards/clever-dashboard-w-IPS.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "annotations": {
  3 |     "list": [
  4 |       {
  5 |         "builtIn": 1,
  6 |         "datasource": {
  7 |           "type": "grafana",
  8 |           "uid": "-- Grafana --"
  9 |         },
 10 |         "enable": true,
 11 |         "hide": true,
 12 |         "iconColor": "rgba(0, 211, 255, 1)",
 13 |         "name": "Annotations & Alerts",
 14 |         "target": {
 15 |           "limit": 100,
 16 |           "matchAny": false,
 17 |           "tags": [],
 18 |           "type": "dashboard"
 19 |         },
 20 |         "type": "dashboard"
 21 |       }
 22 |     ]
 23 |   },
 24 |   "editable": true,
 25 |   "fiscalYearStartMonth": 0,
 26 |   "graphTooltip": 0,
 27 |   "id": 27,
 28 |   "links": [],
 29 |   "liveNow": false,
 30 |   "panels": [
 31 |     {
 32 |       "datasource": {
 33 |         "type": "prometheus",
 34 |         "uid": "P1809F7CD0C75ACF3"
 35 |       },
 36 |       "description": "For the node with more than 1 CPU, choose the maximum frequency.",
 37 |       "fieldConfig": {
 38 |         "defaults": {
 39 |           "color": {
 40 |             "mode": "palette-classic"
 41 |           },
 42 |           "custom": {
 43 |             "axisCenteredZero": false,
 44 |             "axisColorMode": "text",
 45 |             "axisLabel": "",
 46 |             "axisPlacement": "auto",
 47 |             "barAlignment": 0,
 48 |             "drawStyle": "line",
 49 |             "fillOpacity": 0,
 50 |             "gradientMode": "none",
 51 |             "hideFrom": {
 52 |               "legend": false,
 53 |               "tooltip": false,
 54 |               "viz": false
 55 |             },
 56 |             "lineInterpolation": "linear",
 57 |             "lineWidth": 1,
 58 |             "pointSize": 5,
 59 |             "scaleDistribution": {
 60 |               "type": "linear"
 61 |             },
 62 |             "showPoints": "auto",
 63 |             "spanNulls": false,
 64 |             "stacking": {
 65 |               "group": "A",
 66 |               "mode": "none"
 67 |             },
 68 |             "thresholdsStyle": {
 69 |               "mode": "off"
 70 |             }
 71 |           },
 72 |           "mappings": [],
 73 |           "thresholds": {
 74 |             "mode": "absolute",
 75 |             "steps": [
 76 |               {
 77 |                 "color": "green",
 78 |                 "value": null
 79 |               },
 80 |               {
 81 |                 "color": "red",
 82 |                 "value": 80
 83 |               }
 84 |             ]
 85 |           },
 86 |           "unit": "hertz"
 87 |         },
 88 |         "overrides": []
 89 |       },
 90 |       "gridPos": {
 91 |         "h": 9,
 92 |         "w": 24,
 93 |         "x": 0,
 94 |         "y": 0
 95 |       },
 96 |       "id": 4,
 97 |       "options": {
 98 |         "legend": {
 99 |           "calcs": [],
100 |           "displayMode": "list",
101 |           "placement": "bottom",
102 |           "showLegend": true
103 |         },
104 |         "tooltip": {
105 |           "mode": "single",
106 |           "sort": "none"
107 |         }
108 |       },
109 |       "targets": [
110 |         {
111 |           "datasource": {
112 |             "type": "prometheus",
113 |             "uid": "P1809F7CD0C75ACF3"
114 |           },
115 |           "editorMode": "builder",
116 |           "expr": "max(node_cpu_scaling_frequency_hertz{instance=\"$node\"})",
117 |           "legendFormat": "Current CPU Frequency ",
118 |           "range": true,
119 |           "refId": "A"
120 |         },
121 |         {
122 |           "datasource": {
123 |             "type": "prometheus",
124 |             "uid": "P1809F7CD0C75ACF3"
125 |           },
126 |           "editorMode": "builder",
127 |           "expr": "max(node_cpu_frequency_max_hertz{instance=\"$node\"})",
128 |           "hide": false,
129 |           "legendFormat": "Max CPU Frequency",
130 |           "range": true,
131 |           "refId": "B"
132 |         },
133 |         {
134 |           "datasource": {
135 |             "type": "prometheus",
136 |             "uid": "P1809F7CD0C75ACF3"
137 |           },
138 |           "editorMode": "builder",
139 |           "expr": "max(node_cpu_frequency_min_hertz{instance=\"$node\"})",
140 |           "hide": false,
141 |           "legendFormat": "Min CPU Frequency",
142 |           "range": true,
143 |           "refId": "C"
144 |         }
145 |       ],
146 |       "title": "Current CPU Frequency (Max of all CPUs) for Node $node",
147 |       "type": "timeseries"
148 |     },
149 |     {
150 |       "datasource": {
151 |         "type": "prometheus",
152 |         "uid": "P1809F7CD0C75ACF3"
153 |       },
154 |       "fieldConfig": {
155 |         "defaults": {
156 |           "color": {
157 |             "mode": "palette-classic"
158 |           },
159 |           "custom": {
160 |             "axisCenteredZero": false,
161 |             "axisColorMode": "text",
162 |             "axisLabel": "",
163 |             "axisPlacement": "auto",
164 |             "barAlignment": 0,
165 |             "drawStyle": "line",
166 |             "fillOpacity": 0,
167 |             "gradientMode": "none",
168 |             "hideFrom": {
169 |               "legend": false,
170 |               "tooltip": false,
171 |               "viz": false
172 |             },
173 |             "lineInterpolation": "linear",
174 |             "lineWidth": 1,
175 |             "pointSize": 5,
176 |             "scaleDistribution": {
177 |               "type": "linear"
178 |             },
179 |             "showPoints": "auto",
180 |             "spanNulls": false,
181 |             "stacking": {
182 |               "group": "A",
183 |               "mode": "none"
184 |             },
185 |             "thresholdsStyle": {
186 |               "mode": "off"
187 |             }
188 |           },
189 |           "mappings": [],
190 |           "max": 4,
191 |           "min": 0,
192 |           "thresholds": {
193 |             "mode": "absolute",
194 |             "steps": [
195 |               {
196 |                 "color": "green",
197 |                 "value": null
198 |               },
199 |               {
200 |                 "color": "red",
201 |                 "value": 80
202 |               }
203 |             ]
204 |           }
205 |         },
206 |         "overrides": []
207 |       },
208 |       "gridPos": {
209 |         "h": 10,
210 |         "w": 24,
211 |         "x": 0,
212 |         "y": 9
213 |       },
214 |       "id": 2,
215 |       "options": {
216 |         "legend": {
217 |           "calcs": [],
218 |           "displayMode": "list",
219 |           "placement": "bottom",
220 |           "showLegend": true
221 |         },
222 |         "tooltip": {
223 |           "mode": "single",
224 |           "sort": "none"
225 |         }
226 |       },
227 |       "targets": [
228 |         {
229 |           "datasource": {
230 |             "type": "prometheus",
231 |             "uid": "P1809F7CD0C75ACF3"
232 |           },
233 |           "editorMode": "builder",
234 |           "expr": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{namespace=\"$namespace\", resource=\"cpu\", container=\"$container\"}",
235 |           "legendFormat": "{{pod}}",
236 |           "range": true,
237 |           "refId": "A"
238 |         }
239 |       ],
240 |       "title": "Managed Container CPU Requests",
241 |       "type": "timeseries"
242 |     },
243 |     {
244 |       "datasource": {
245 |         "type": "prometheus",
246 |         "uid": "P1809F7CD0C75ACF3"
247 |       },
248 |       "description": "",
249 |       "fieldConfig": {
250 |         "defaults": {
251 |           "color": {
252 |             "mode": "palette-classic"
253 |           },
254 |           "custom": {
255 |             "axisCenteredZero": false,
256 |             "axisColorMode": "text",
257 |             "axisLabel": "",
258 |             "axisPlacement": "auto",
259 |             "barAlignment": 0,
260 |             "drawStyle": "line",
261 |             "fillOpacity": 0,
262 |             "gradientMode": "none",
263 |             "hideFrom": {
264 |               "legend": false,
265 |               "tooltip": false,
266 |               "viz": false
267 |             },
268 |             "lineInterpolation": "linear",
269 |             "lineWidth": 1,
270 |             "pointSize": 5,
271 |             "scaleDistribution": {
272 |               "type": "linear"
273 |             },
274 |             "showPoints": "auto",
275 |             "spanNulls": false,
276 |             "stacking": {
277 |               "group": "A",
278 |               "mode": "none"
279 |             },
280 |             "thresholdsStyle": {
281 |               "mode": "off"
282 |             }
283 |           },
284 |           "mappings": [],
285 |           "max": 10000000000,
286 |           "min": 0,
287 |           "thresholds": {
288 |             "mode": "absolute",
289 |             "steps": [
290 |               {
291 |                 "color": "green",
292 |                 "value": null
293 |               },
294 |               {
295 |                 "color": "red",
296 |                 "value": 80
297 |               }
298 |             ]
299 |           }
300 |         },
301 |         "overrides": []
302 |       },
303 |       "gridPos": {
304 |         "h": 9,
305 |         "w": 24,
306 |         "x": 0,
307 |         "y": 19
308 |       },
309 |       "id": 6,
310 |       "options": {
311 |         "legend": {
312 |           "calcs": [],
313 |           "displayMode": "list",
314 |           "placement": "bottom",
315 |           "showLegend": true
316 |         },
317 |         "tooltip": {
318 |           "mode": "single",
319 |           "sort": "none"
320 |         }
321 |       },
322 |       "targets": [
323 |         {
324 |           "datasource": {
325 |             "type": "prometheus",
326 |             "uid": "P1809F7CD0C75ACF3"
327 |           },
328 |           "editorMode": "code",
329 |           "expr": "pod_cpu_instructions{pod_namespace=\"$namespace\"} / 3",
330 |           "legendFormat": "{{pod_name}}",
331 |           "range": true,
332 |           "refId": "A"
333 |         }
334 |       ],
335 |       "title": "Container Actual IPS (Instructions/second)",
336 |       "type": "timeseries"
337 |     }
338 |   ],
339 |   "schemaVersion": 37,
340 |   "style": "dark",
341 |   "tags": [],
342 |   "templating": {
343 |     "list": [
344 |       {
345 |         "current": {
346 |           "selected": false,
347 |           "text": "default",
348 |           "value": "default"
349 |         },
350 |         "datasource": {
351 |           "type": "prometheus",
352 |           "uid": "P1809F7CD0C75ACF3"
353 |         },
354 |         "definition": "label_values(kube_pod_container_resource_requests, namespace)",
355 |         "hide": 0,
356 |         "includeAll": false,
357 |         "label": "Namespace",
358 |         "multi": false,
359 |         "name": "namespace",
360 |         "options": [],
361 |         "query": {
362 |           "query": "label_values(kube_pod_container_resource_requests, namespace)",
363 |           "refId": "StandardVariableQuery"
364 |         },
365 |         "refresh": 1,
366 |         "regex": "",
367 |         "skipUrlSync": false,
368 |         "sort": 0,
369 |         "type": "query"
370 |       },
371 |       {
372 |         "current": {
373 |           "selected": true,
374 |           "text": "sysbench",
375 |           "value": "sysbench"
376 |         },
377 |         "datasource": {
378 |           "type": "prometheus",
379 |           "uid": "P1809F7CD0C75ACF3"
380 |         },
381 |         "definition": "label_values(kube_pod_container_resource_requests{namespace=\"$namespace\"}, container)",
382 |         "hide": 0,
383 |         "includeAll": false,
384 |         "label": "Container",
385 |         "multi": false,
386 |         "name": "container",
387 |         "options": [],
388 |         "query": {
389 |           "query": "label_values(kube_pod_container_resource_requests{namespace=\"$namespace\"}, container)",
390 |           "refId": "StandardVariableQuery"
391 |         },
392 |         "refresh": 1,
393 |         "regex": "",
394 |         "skipUrlSync": false,
395 |         "sort": 0,
396 |         "type": "query"
397 |       },
398 |       {
399 |         "current": {
400 |           "selected": false,
401 |           "text": "clever.ibm.cloud",
402 |           "value": "clever.ibm.cloud"
403 |         },
404 |         "datasource": {
405 |           "type": "prometheus",
406 |           "uid": "P1809F7CD0C75ACF3"
407 |         },
408 |         "definition": "label_values(node_cpu_frequency_max_hertz, instance)",
409 |         "hide": 0,
410 |         "includeAll": false,
411 |         "label": "Node",
412 |         "multi": false,
413 |         "name": "node",
414 |         "options": [],
415 |         "query": {
416 |           "query": "label_values(node_cpu_frequency_max_hertz, instance)",
417 |           "refId": "StandardVariableQuery"
418 |         },
419 |         "refresh": 1,
420 |         "regex": "",
421 |         "skipUrlSync": false,
422 |         "sort": 0,
423 |         "type": "query"
424 |       }
425 |     ]
426 |   },
427 |   "time": {
428 |     "from": "now-5m",
429 |     "to": "now"
430 |   },
431 |   "timepicker": {},
432 |   "timezone": "",
433 |   "title": "Clever VPA Recommender",
434 |   "uid": "ePYMOfnVk",
435 |   "version": 5,
436 |   "weekStart": ""
437 | }


--------------------------------------------------------------------------------
/dashboards/clever-dashboard.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "annotations": {
  3 |     "list": [
  4 |       {
  5 |         "builtIn": 1,
  6 |         "datasource": {
  7 |           "type": "grafana",
  8 |           "uid": "-- Grafana --"
  9 |         },
 10 |         "enable": true,
 11 |         "hide": true,
 12 |         "iconColor": "rgba(0, 211, 255, 1)",
 13 |         "name": "Annotations & Alerts",
 14 |         "target": {
 15 |           "limit": 100,
 16 |           "matchAny": false,
 17 |           "tags": [],
 18 |           "type": "dashboard"
 19 |         },
 20 |         "type": "dashboard"
 21 |       }
 22 |     ]
 23 |   },
 24 |   "editable": true,
 25 |   "fiscalYearStartMonth": 0,
 26 |   "graphTooltip": 0,
 27 |   "id": 29,
 28 |   "links": [],
 29 |   "liveNow": false,
 30 |   "panels": [
 31 |     {
 32 |       "datasource": {
 33 |         "type": "prometheus",
 34 |         "uid": "P1809F7CD0C75ACF3"
 35 |       },
 36 |       "description": "For the node with more than 1 CPU, choose the maximum frequency.",
 37 |       "fieldConfig": {
 38 |         "defaults": {
 39 |           "color": {
 40 |             "mode": "palette-classic"
 41 |           },
 42 |           "custom": {
 43 |             "axisLabel": "",
 44 |             "axisPlacement": "auto",
 45 |             "barAlignment": 0,
 46 |             "drawStyle": "line",
 47 |             "fillOpacity": 0,
 48 |             "gradientMode": "none",
 49 |             "hideFrom": {
 50 |               "legend": false,
 51 |               "tooltip": false,
 52 |               "viz": false
 53 |             },
 54 |             "lineInterpolation": "linear",
 55 |             "lineWidth": 1,
 56 |             "pointSize": 5,
 57 |             "scaleDistribution": {
 58 |               "type": "linear"
 59 |             },
 60 |             "showPoints": "auto",
 61 |             "spanNulls": false,
 62 |             "stacking": {
 63 |               "group": "A",
 64 |               "mode": "none"
 65 |             },
 66 |             "thresholdsStyle": {
 67 |               "mode": "off"
 68 |             }
 69 |           },
 70 |           "mappings": [],
 71 |           "thresholds": {
 72 |             "mode": "absolute",
 73 |             "steps": [
 74 |               {
 75 |                 "color": "green",
 76 |                 "value": null
 77 |               },
 78 |               {
 79 |                 "color": "red",
 80 |                 "value": 80
 81 |               }
 82 |             ]
 83 |           },
 84 |           "unit": "hertz"
 85 |         },
 86 |         "overrides": []
 87 |       },
 88 |       "gridPos": {
 89 |         "h": 9,
 90 |         "w": 24,
 91 |         "x": 0,
 92 |         "y": 0
 93 |       },
 94 |       "id": 4,
 95 |       "options": {
 96 |         "legend": {
 97 |           "calcs": [],
 98 |           "displayMode": "list",
 99 |           "placement": "bottom"
100 |         },
101 |         "tooltip": {
102 |           "mode": "single",
103 |           "sort": "none"
104 |         }
105 |       },
106 |       "targets": [
107 |         {
108 |           "datasource": {
109 |             "type": "prometheus",
110 |             "uid": "P1809F7CD0C75ACF3"
111 |           },
112 |           "editorMode": "builder",
113 |           "expr": "max(node_cpu_scaling_frequency_hertz{instance=\"$node\"})",
114 |           "legendFormat": "Current CPU Frequency ",
115 |           "range": true,
116 |           "refId": "A"
117 |         },
118 |         {
119 |           "datasource": {
120 |             "type": "prometheus",
121 |             "uid": "P1809F7CD0C75ACF3"
122 |           },
123 |           "editorMode": "builder",
124 |           "expr": "max(node_cpu_frequency_max_hertz{instance=\"$node\"})",
125 |           "hide": false,
126 |           "legendFormat": "Max CPU Frequency",
127 |           "range": true,
128 |           "refId": "B"
129 |         },
130 |         {
131 |           "datasource": {
132 |             "type": "prometheus",
133 |             "uid": "P1809F7CD0C75ACF3"
134 |           },
135 |           "editorMode": "builder",
136 |           "expr": "max(node_cpu_frequency_min_hertz{instance=\"$node\"})",
137 |           "hide": false,
138 |           "legendFormat": "Min CPU Frequency",
139 |           "range": true,
140 |           "refId": "C"
141 |         }
142 |       ],
143 |       "title": "Current CPU Frequency (Max of all CPUs) for Node $node",
144 |       "type": "timeseries"
145 |     },
146 |     {
147 |       "datasource": {
148 |         "type": "prometheus",
149 |         "uid": "P1809F7CD0C75ACF3"
150 |       },
151 |       "fieldConfig": {
152 |         "defaults": {
153 |           "color": {
154 |             "mode": "palette-classic"
155 |           },
156 |           "custom": {
157 |             "axisLabel": "",
158 |             "axisPlacement": "auto",
159 |             "barAlignment": 0,
160 |             "drawStyle": "line",
161 |             "fillOpacity": 0,
162 |             "gradientMode": "none",
163 |             "hideFrom": {
164 |               "legend": false,
165 |               "tooltip": false,
166 |               "viz": false
167 |             },
168 |             "lineInterpolation": "linear",
169 |             "lineWidth": 1,
170 |             "pointSize": 5,
171 |             "scaleDistribution": {
172 |               "type": "linear"
173 |             },
174 |             "showPoints": "auto",
175 |             "spanNulls": false,
176 |             "stacking": {
177 |               "group": "A",
178 |               "mode": "none"
179 |             },
180 |             "thresholdsStyle": {
181 |               "mode": "off"
182 |             }
183 |           },
184 |           "mappings": [],
185 |           "thresholds": {
186 |             "mode": "absolute",
187 |             "steps": [
188 |               {
189 |                 "color": "green",
190 |                 "value": null
191 |               },
192 |               {
193 |                 "color": "red",
194 |                 "value": 80
195 |               }
196 |             ]
197 |           }
198 |         },
199 |         "overrides": []
200 |       },
201 |       "gridPos": {
202 |         "h": 10,
203 |         "w": 24,
204 |         "x": 0,
205 |         "y": 9
206 |       },
207 |       "id": 2,
208 |       "options": {
209 |         "legend": {
210 |           "calcs": [],
211 |           "displayMode": "list",
212 |           "placement": "bottom"
213 |         },
214 |         "tooltip": {
215 |           "mode": "single",
216 |           "sort": "none"
217 |         }
218 |       },
219 |       "targets": [
220 |         {
221 |           "datasource": {
222 |             "type": "prometheus",
223 |             "uid": "P1809F7CD0C75ACF3"
224 |           },
225 |           "editorMode": "builder",
226 |           "expr": "cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{namespace=\"$namespace\", resource=\"cpu\", container=\"$container\"}",
227 |           "legendFormat": "{{pod}}",
228 |           "range": true,
229 |           "refId": "A"
230 |         }
231 |       ],
232 |       "title": "Managed Container CPU Requests",
233 |       "type": "timeseries"
234 |     }
235 |   ],
236 |   "schemaVersion": 36,
237 |   "style": "dark",
238 |   "tags": [],
239 |   "templating": {
240 |     "list": [
241 |       {
242 |         "current": {
243 |           "selected": true,
244 |           "text": "default",
245 |           "value": "default"
246 |         },
247 |         "datasource": {
248 |           "type": "prometheus",
249 |           "uid": "P1809F7CD0C75ACF3"
250 |         },
251 |         "definition": "label_values(kube_pod_container_resource_requests, namespace)",
252 |         "hide": 0,
253 |         "includeAll": false,
254 |         "label": "Namespace",
255 |         "multi": false,
256 |         "name": "namespace",
257 |         "options": [],
258 |         "query": {
259 |           "query": "label_values(kube_pod_container_resource_requests, namespace)",
260 |           "refId": "StandardVariableQuery"
261 |         },
262 |         "refresh": 1,
263 |         "regex": "",
264 |         "skipUrlSync": false,
265 |         "sort": 0,
266 |         "type": "query"
267 |       },
268 |       {
269 |         "current": {
270 |           "selected": false,
271 |           "text": "hamster",
272 |           "value": "hamster"
273 |         },
274 |         "datasource": {
275 |           "type": "prometheus",
276 |           "uid": "P1809F7CD0C75ACF3"
277 |         },
278 |         "definition": "label_values(kube_pod_container_resource_requests{namespace=\"$namespace\"}, container)",
279 |         "hide": 0,
280 |         "includeAll": false,
281 |         "label": "Container",
282 |         "multi": false,
283 |         "name": "container",
284 |         "options": [],
285 |         "query": {
286 |           "query": "label_values(kube_pod_container_resource_requests{namespace=\"$namespace\"}, container)",
287 |           "refId": "StandardVariableQuery"
288 |         },
289 |         "refresh": 1,
290 |         "regex": "",
291 |         "skipUrlSync": false,
292 |         "sort": 0,
293 |         "type": "query"
294 |       },
295 |       {
296 |         "current": {
297 |           "selected": false,
298 |           "text": "load-test.ibm.cloud",
299 |           "value": "load-test.ibm.cloud"
300 |         },
301 |         "datasource": {
302 |           "type": "prometheus",
303 |           "uid": "P1809F7CD0C75ACF3"
304 |         },
305 |         "definition": "label_values(node_cpu_frequency_max_hertz, instance)",
306 |         "hide": 0,
307 |         "includeAll": false,
308 |         "label": "Node",
309 |         "multi": false,
310 |         "name": "node",
311 |         "options": [],
312 |         "query": {
313 |           "query": "label_values(node_cpu_frequency_max_hertz, instance)",
314 |           "refId": "StandardVariableQuery"
315 |         },
316 |         "refresh": 1,
317 |         "regex": "",
318 |         "skipUrlSync": false,
319 |         "sort": 0,
320 |         "type": "query"
321 |       }
322 |     ]
323 |   },
324 |   "time": {
325 |     "from": "now-15m",
326 |     "to": "now"
327 |   },
328 |   "timepicker": {},
329 |   "timezone": "",
330 |   "title": "Clever VPA Recommender",
331 |   "uid": "ePYMOfnVk",
332 |   "version": 13,
333 |   "weekStart": ""
334 | }


--------------------------------------------------------------------------------
/dashboards/clever-sysbench-kubecon22.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "annotations": {
  3 |     "list": [
  4 |       {
  5 |         "builtIn": 1,
  6 |         "datasource": {
  7 |           "type": "grafana",
  8 |           "uid": "-- Grafana --"
  9 |         },
 10 |         "enable": true,
 11 |         "hide": true,
 12 |         "iconColor": "rgba(0, 211, 255, 1)",
 13 |         "name": "Annotations & Alerts",
 14 |         "target": {
 15 |           "limit": 100,
 16 |           "matchAny": false,
 17 |           "tags": [],
 18 |           "type": "dashboard"
 19 |         },
 20 |         "type": "dashboard"
 21 |       }
 22 |     ]
 23 |   },
 24 |   "editable": true,
 25 |   "fiscalYearStartMonth": 0,
 26 |   "graphTooltip": 0,
 27 |   "id": 28,
 28 |   "links": [],
 29 |   "liveNow": false,
 30 |   "panels": [
 31 |     {
 32 |       "datasource": {
 33 |         "type": "prometheus",
 34 |         "uid": "P1809F7CD0C75ACF3"
 35 |       },
 36 |       "description": "For the node with more than 1 CPU, choose the maximum frequency.",
 37 |       "fieldConfig": {
 38 |         "defaults": {
 39 |           "color": {
 40 |             "mode": "palette-classic"
 41 |           },
 42 |           "custom": {
 43 |             "axisCenteredZero": false,
 44 |             "axisColorMode": "text",
 45 |             "axisLabel": "",
 46 |             "axisPlacement": "auto",
 47 |             "barAlignment": 0,
 48 |             "drawStyle": "line",
 49 |             "fillOpacity": 0,
 50 |             "gradientMode": "none",
 51 |             "hideFrom": {
 52 |               "legend": false,
 53 |               "tooltip": false,
 54 |               "viz": false
 55 |             },
 56 |             "lineInterpolation": "linear",
 57 |             "lineWidth": 1,
 58 |             "pointSize": 5,
 59 |             "scaleDistribution": {
 60 |               "type": "linear"
 61 |             },
 62 |             "showPoints": "auto",
 63 |             "spanNulls": false,
 64 |             "stacking": {
 65 |               "group": "A",
 66 |               "mode": "none"
 67 |             },
 68 |             "thresholdsStyle": {
 69 |               "mode": "off"
 70 |             }
 71 |           },
 72 |           "mappings": [],
 73 |           "thresholds": {
 74 |             "mode": "absolute",
 75 |             "steps": [
 76 |               {
 77 |                 "color": "green",
 78 |                 "value": null
 79 |               },
 80 |               {
 81 |                 "color": "red",
 82 |                 "value": 80
 83 |               }
 84 |             ]
 85 |           },
 86 |           "unit": "hertz"
 87 |         },
 88 |         "overrides": []
 89 |       },
 90 |       "gridPos": {
 91 |         "h": 9,
 92 |         "w": 24,
 93 |         "x": 0,
 94 |         "y": 0
 95 |       },
 96 |       "id": 4,
 97 |       "options": {
 98 |         "legend": {
 99 |           "calcs": [],
100 |           "displayMode": "list",
101 |           "placement": "bottom",
102 |           "showLegend": true
103 |         },
104 |         "tooltip": {
105 |           "mode": "single",
106 |           "sort": "none"
107 |         }
108 |       },
109 |       "targets": [
110 |         {
111 |           "datasource": {
112 |             "type": "prometheus",
113 |             "uid": "P1809F7CD0C75ACF3"
114 |           },
115 |           "editorMode": "builder",
116 |           "expr": "max(node_cpu_scaling_frequency_hertz{instance=\"$node\"})",
117 |           "legendFormat": "Current CPU Frequency ",
118 |           "range": true,
119 |           "refId": "A"
120 |         },
121 |         {
122 |           "datasource": {
123 |             "type": "prometheus",
124 |             "uid": "P1809F7CD0C75ACF3"
125 |           },
126 |           "editorMode": "builder",
127 |           "expr": "max(node_cpu_frequency_max_hertz{instance=\"$node\"})",
128 |           "hide": false,
129 |           "legendFormat": "Max CPU Frequency",
130 |           "range": true,
131 |           "refId": "B"
132 |         },
133 |         {
134 |           "datasource": {
135 |             "type": "prometheus",
136 |             "uid": "P1809F7CD0C75ACF3"
137 |           },
138 |           "editorMode": "builder",
139 |           "expr": "max(node_cpu_frequency_min_hertz{instance=\"$node\"})",
140 |           "hide": false,
141 |           "legendFormat": "Min CPU Frequency",
142 |           "range": true,
143 |           "refId": "C"
144 |         }
145 |       ],
146 |       "title": "Current CPU Frequency (Max of all CPUs) for Node $node",
147 |       "type": "timeseries"
148 |     },
149 |     {
150 |       "datasource": {
151 |         "type": "prometheus",
152 |         "uid": "P1809F7CD0C75ACF3"
153 |       },
154 |       "fieldConfig": {
155 |         "defaults": {
156 |           "color": {
157 |             "mode": "palette-classic"
158 |           },
159 |           "custom": {
160 |             "axisCenteredZero": false,
161 |             "axisColorMode": "text",
162 |             "axisLabel": "",
163 |             "axisPlacement": "auto",
164 |             "barAlignment": 0,
165 |             "drawStyle": "line",
166 |             "fillOpacity": 0,
167 |             "gradientMode": "none",
168 |             "hideFrom": {
169 |               "legend": false,
170 |               "tooltip": false,
171 |               "viz": false
172 |             },
173 |             "lineInterpolation": "linear",
174 |             "lineWidth": 1,
175 |             "pointSize": 5,
176 |             "scaleDistribution": {
177 |               "type": "linear"
178 |             },
179 |             "showPoints": "auto",
180 |             "spanNulls": false,
181 |             "stacking": {
182 |               "group": "A",
183 |               "mode": "none"
184 |             },
185 |             "thresholdsStyle": {
186 |               "mode": "off"
187 |             }
188 |           },
189 |           "mappings": [],
190 |           "max": 1,
191 |           "min": 0,
192 |           "thresholds": {
193 |             "mode": "absolute",
194 |             "steps": [
195 |               {
196 |                 "color": "green",
197 |                 "value": null
198 |               },
199 |               {
200 |                 "color": "red",
201 |                 "value": 80
202 |               }
203 |             ]
204 |           }
205 |         },
206 |         "overrides": []
207 |       },
208 |       "gridPos": {
209 |         "h": 10,
210 |         "w": 24,
211 |         "x": 0,
212 |         "y": 9
213 |       },
214 |       "id": 2,
215 |       "options": {
216 |         "legend": {
217 |           "calcs": [],
218 |           "displayMode": "list",
219 |           "placement": "bottom",
220 |           "showLegend": true
221 |         },
222 |         "tooltip": {
223 |           "mode": "single",
224 |           "sort": "none"
225 |         }
226 |       },
227 |       "targets": [
228 |         {
229 |           "datasource": {
230 |             "type": "prometheus",
231 |             "uid": "P1809F7CD0C75ACF3"
232 |           },
233 |           "editorMode": "builder",
234 |           "expr": "max by(container) (cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{namespace=\"$namespace\", resource=\"cpu\", container=\"$container\"})",
235 |           "legendFormat": "{{pod}}",
236 |           "range": true,
237 |           "refId": "A"
238 |         }
239 |       ],
240 |       "title": "Managed Container CPU Requests",
241 |       "type": "timeseries"
242 |     },
243 |     {
244 |       "datasource": {
245 |         "type": "prometheus",
246 |         "uid": "P1809F7CD0C75ACF3"
247 |       },
248 |       "description": "",
249 |       "fieldConfig": {
250 |         "defaults": {
251 |           "color": {
252 |             "mode": "palette-classic"
253 |           },
254 |           "custom": {
255 |             "axisCenteredZero": false,
256 |             "axisColorMode": "text",
257 |             "axisLabel": "",
258 |             "axisPlacement": "auto",
259 |             "barAlignment": 0,
260 |             "drawStyle": "line",
261 |             "fillOpacity": 0,
262 |             "gradientMode": "none",
263 |             "hideFrom": {
264 |               "legend": false,
265 |               "tooltip": false,
266 |               "viz": false
267 |             },
268 |             "lineInterpolation": "linear",
269 |             "lineWidth": 1,
270 |             "pointSize": 5,
271 |             "scaleDistribution": {
272 |               "type": "linear"
273 |             },
274 |             "showPoints": "auto",
275 |             "spanNulls": false,
276 |             "stacking": {
277 |               "group": "A",
278 |               "mode": "none"
279 |             },
280 |             "thresholdsStyle": {
281 |               "mode": "off"
282 |             }
283 |           },
284 |           "mappings": [],
285 |           "max": 1000000000,
286 |           "min": 0,
287 |           "thresholds": {
288 |             "mode": "absolute",
289 |             "steps": [
290 |               {
291 |                 "color": "green",
292 |                 "value": null
293 |               },
294 |               {
295 |                 "color": "red",
296 |                 "value": 80
297 |               }
298 |             ]
299 |           }
300 |         },
301 |         "overrides": []
302 |       },
303 |       "gridPos": {
304 |         "h": 9,
305 |         "w": 24,
306 |         "x": 0,
307 |         "y": 19
308 |       },
309 |       "id": 6,
310 |       "options": {
311 |         "legend": {
312 |           "calcs": [],
313 |           "displayMode": "list",
314 |           "placement": "bottom",
315 |           "showLegend": true
316 |         },
317 |         "tooltip": {
318 |           "mode": "single",
319 |           "sort": "none"
320 |         }
321 |       },
322 |       "targets": [
323 |         {
324 |           "datasource": {
325 |             "type": "prometheus",
326 |             "uid": "P1809F7CD0C75ACF3"
327 |           },
328 |           "editorMode": "builder",
329 |           "expr": "max(pod_cpu_instructions{pod_namespace=\"$namespace\"} / 3)",
330 |           "legendFormat": "sysbench",
331 |           "range": true,
332 |           "refId": "A"
333 |         }
334 |       ],
335 |       "title": "Container Actual IPS (Instructions/second)",
336 |       "type": "timeseries"
337 |     }
338 |   ],
339 |   "schemaVersion": 37,
340 |   "style": "dark",
341 |   "tags": [],
342 |   "templating": {
343 |     "list": [
344 |       {
345 |         "current": {
346 |           "selected": false,
347 |           "text": "default",
348 |           "value": "default"
349 |         },
350 |         "datasource": {
351 |           "type": "prometheus",
352 |           "uid": "P1809F7CD0C75ACF3"
353 |         },
354 |         "definition": "label_values(kube_pod_container_resource_requests, namespace)",
355 |         "hide": 0,
356 |         "includeAll": false,
357 |         "label": "Namespace",
358 |         "multi": false,
359 |         "name": "namespace",
360 |         "options": [],
361 |         "query": {
362 |           "query": "label_values(kube_pod_container_resource_requests, namespace)",
363 |           "refId": "StandardVariableQuery"
364 |         },
365 |         "refresh": 1,
366 |         "regex": "",
367 |         "skipUrlSync": false,
368 |         "sort": 0,
369 |         "type": "query"
370 |       },
371 |       {
372 |         "current": {
373 |           "selected": false,
374 |           "text": "sysbench",
375 |           "value": "sysbench"
376 |         },
377 |         "datasource": {
378 |           "type": "prometheus",
379 |           "uid": "P1809F7CD0C75ACF3"
380 |         },
381 |         "definition": "label_values(kube_pod_container_resource_requests{namespace=\"$namespace\"}, container)",
382 |         "hide": 0,
383 |         "includeAll": false,
384 |         "label": "Container",
385 |         "multi": false,
386 |         "name": "container",
387 |         "options": [],
388 |         "query": {
389 |           "query": "label_values(kube_pod_container_resource_requests{namespace=\"$namespace\"}, container)",
390 |           "refId": "StandardVariableQuery"
391 |         },
392 |         "refresh": 1,
393 |         "regex": "",
394 |         "skipUrlSync": false,
395 |         "sort": 0,
396 |         "type": "query"
397 |       },
398 |       {
399 |         "current": {
400 |           "selected": false,
401 |           "text": "clever.ibm.cloud",
402 |           "value": "clever.ibm.cloud"
403 |         },
404 |         "datasource": {
405 |           "type": "prometheus",
406 |           "uid": "P1809F7CD0C75ACF3"
407 |         },
408 |         "definition": "label_values(node_cpu_frequency_max_hertz, instance)",
409 |         "hide": 0,
410 |         "includeAll": false,
411 |         "label": "Node",
412 |         "multi": false,
413 |         "name": "node",
414 |         "options": [],
415 |         "query": {
416 |           "query": "label_values(node_cpu_frequency_max_hertz, instance)",
417 |           "refId": "StandardVariableQuery"
418 |         },
419 |         "refresh": 1,
420 |         "regex": "",
421 |         "skipUrlSync": false,
422 |         "sort": 0,
423 |         "type": "query"
424 |       }
425 |     ]
426 |   },
427 |   "time": {
428 |     "from": "now-5m",
429 |     "to": "now"
430 |   },
431 |   "timepicker": {},
432 |   "timezone": "",
433 |   "title": "Clever VPA Recommender",
434 |   "uid": "ePYMOfnVk",
435 |   "version": 11,
436 |   "weekStart": ""
437 | }


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from kubernetes import client, config
  3 | from kubernetes.client.rest import ApiException
  4 | 
  5 | from utils import *
  6 | from recommender import *
  7 | 
  8 | # Current Recommender Name
  9 | RECOMMENDER_NAME = "clever"
 10 | SLEEP_WINDOW = 60
 11 | 
 12 | # VPA resources
 13 | DOMAIN = "autoscaling.k8s.io"
 14 | VPA_NAME = "verticalpodautoscaler"
 15 | VPA_PLURAL = "verticalpodautoscalers"
 16 | VPA_CHECKPOINT_NAME = "verticalpodautoscalercheckpoint"
 17 | VPA_CHECKPOINT_PLURAL = "verticalpodautoscalercheckpoints"
 18 | 
 19 | # PROMETHEUS Queries
 20 | MAX_CPU_FREQUENCY_QUERY = "node_cpu_frequency_max_hertz"
 21 | MIN_CPU_FREQUENCY_QUERY = "node_cpu_frequency_min_hertz"
 22 | LATEST_CPU_FREQUENCY_QUERY = "node_cpu_scaling_frequency_hertz"
 23 | 
 24 | # Keep the latest node frequencies and the VPA default requests in cache
 25 | MAX_NODE_CPU_FREQUENCY = {}
 26 | LATEST_NODE_CPU_FREQUENCY = {}
 27 | ACTIVE_VPA_DEFAULT_CPU_REQUESTS = {}
 28 | 
 29 | # Press the green button in the gutter to run the script.
 30 | if __name__ == '__main__':
 31 |     if 'KUBERNETES_PORT' in os.environ:
 32 |         config.load_incluster_config()
 33 |     else:
 34 |         config.load_kube_config()
 35 | 
 36 |     # Get the api instance to interact with the cluster
 37 |     api_client = client.api_client.ApiClient()
 38 |     v1 = client.ApiextensionsV1Api(api_client)
 39 |     corev1 = client.CoreV1Api(api_client)
 40 |     crds = client.CustomObjectsApi(api_client)
 41 |     resource_version = ''
 42 | 
 43 |     # Initialize the prometheus client
 44 |     prom_client = PromClient()
 45 | 
 46 |     # Initialize the node CPU frequency cache.
 47 |     MAX_NODE_CPU_FREQUENCY = get_all_node_homogeneous_frequencies(prom_client, MAX_CPU_FREQUENCY_QUERY)
 48 |     if MAX_NODE_CPU_FREQUENCY is None:
 49 |         print("Prometheus Query {} at Endpoint {} failed.".format(MAX_CPU_FREQUENCY_QUERY, prom_client.prom_address))
 50 |         exit(-1)
 51 | 
 52 |     LATEST_NODE_CPU_FREQUENCY = get_all_node_homogeneous_frequencies(prom_client, LATEST_CPU_FREQUENCY_QUERY)
 53 |     if LATEST_NODE_CPU_FREQUENCY is None:
 54 |         print("Prometheus Query {} at Endpoint {} failed.".format(LATEST_CPU_FREQUENCY_QUERY, prom_client.prom_address))
 55 |         exit(-1)
 56 |     print("Initialized the node CPU frequency cache {}".format(LATEST_NODE_CPU_FREQUENCY))
 57 | 
 58 |     # Get the VPA CRD
 59 |     current_crds = [x['spec']['names']['kind'].lower() for x in v1.list_custom_resource_definition().to_dict()['items']]
 60 |     if VPA_NAME not in current_crds:
 61 |         print("VerticalPodAutoscaler CRD is not created!")
 62 |         exit(-1)
 63 | 
 64 |     while True:
 65 |         print("Checking the frequency and the target IPS")
 66 |         # Updating the default VPA CPU cache.
 67 |         vpas = crds.list_cluster_custom_object(group=DOMAIN, version="v1", plural=VPA_PLURAL)
 68 |         selectedVpas = selects_recommender(vpas, RECOMMENDER_NAME)
 69 | 
 70 |         # Update the container default requests for selectedVpas
 71 |         # Keep the mapping between nodes and vpas, which manage pods on those nodes.
 72 |         node_vpas = {}
 73 |         for vpa in selectedVpas:
 74 |             vpa_name = vpa["metadata"]["name"]
 75 |             vpa_namespace = vpa["metadata"]["namespace"]
 76 | 
 77 |             # Get initial container request.
 78 |             if vpa_name not in ACTIVE_VPA_DEFAULT_CPU_REQUESTS.keys():
 79 |                 ACTIVE_VPA_DEFAULT_CPU_REQUESTS[vpa_name], vpa_nodes = get_vpa_detailed_info(corev1, vpa)
 80 |                 print("Updating the default CPU request cache for newly discovered VPA {}".format(vpa_name))
 81 |                 print(ACTIVE_VPA_DEFAULT_CPU_REQUESTS)
 82 |             else:
 83 |                 _, vpa_nodes = get_vpa_detailed_info(corev1, vpa)
 84 | 
 85 |             # Select VPAs per node.
 86 |             for node in list(set(vpa_nodes.values())):
 87 |                 if node not in node_vpas.keys():
 88 |                     node_vpas[node] = [vpa]
 89 |                 else:
 90 |                     node_vpas[node].append(vpa)
 91 |             print("Discovering VPAs running on the following nodes.")
 92 |             print(node_vpas)
 93 | 
 94 |         # Obtain the latest node cpu frequencies
 95 |         CUR_NODE_CPU_FREQUENCY = get_all_node_homogeneous_frequencies(prom_client, LATEST_CPU_FREQUENCY_QUERY)
 96 |         print("Current node CPU frequencies\n {}".format(CUR_NODE_CPU_FREQUENCY))
 97 | 
 98 |         # Check difference between LATEST_NODE_CPU_FREQUENCY and CUR_NODE_CPU_FREQUENCY
 99 |         if CUR_NODE_CPU_FREQUENCY != LATEST_NODE_CPU_FREQUENCY:
100 |             # Select nodes with frequency changes.
101 |             nodes_with_frequency_changes = find_node_with_frequency_changes(CUR_NODE_CPU_FREQUENCY, LATEST_NODE_CPU_FREQUENCY)
102 |             print("Discover nodes with frequency changes {}".format(nodes_with_frequency_changes))
103 | 
104 |             vpas_to_update = {}
105 |             for node in nodes_with_frequency_changes:
106 |                 if node not in node_vpas.keys():
107 |                     print("Frequency changes on node {} does not impact any vpa managed pods!")
108 |                     continue
109 | 
110 |                 cur_node_vpas = node_vpas[node]
111 |                 for vpa in cur_node_vpas:
112 |                     vpa_name = vpa["metadata"]["name"]
113 |                     vpas_to_update[vpa_name] = vpa
114 | 
115 |             print("These VPAs {} are impacted by the following nodes with frequency changes {}.".format(vpas_to_update.keys(), nodes_with_frequency_changes))
116 |             for vpa in vpas_to_update.values():
117 |                 vpa_name = vpa["metadata"]["name"]
118 |                 vpa_namespace = vpa["metadata"]["namespace"]
119 | 
120 |                 print("Recommend sizes according to current frequency for vpas on nodes with frequency changes!")
121 | 
122 |                 recommendations = get_recommendation(vpa, corev1, CUR_NODE_CPU_FREQUENCY, MAX_NODE_CPU_FREQUENCY, ACTIVE_VPA_DEFAULT_CPU_REQUESTS[vpa_name])
123 |                 print("Recommendations for VPA {} are {}".format(vpa_name, recommendations))
124 | 
125 |                 if not recommendations:
126 |                     print("No new recommendations obtained, so skip updating the vpa object {}".format(vpa_name))
127 |                     continue
128 | 
129 |                 # Update the recommendations.
130 |                 patched_vpa = {"recommendation": {"containerRecommendations": recommendations}}
131 |                 body = {"status": patched_vpa}
132 |                 vpa_api = client.CustomObjectsApi()
133 | 
134 |                 # Update the VPA object
135 |                 # API call doc: https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/CustomObjectsApi.md#patch_namespaced_custom_object
136 |                 try:
137 |                     vpa_updated = vpa_api.patch_namespaced_custom_object(group=DOMAIN, version="v1", plural=VPA_PLURAL,
138 |                                                                          namespace=vpa_namespace, name=vpa_name,
139 |                                                                          body=body)
140 |                     print("Successfully patched VPA object with the recommendation: %s" %
141 |                           vpa_updated['status']['recommendation']['containerRecommendations'])
142 |                 except ApiException as e:
143 |                     print("Exception when calling CustomObjectsApi->patch_namespaced_custom_object: %s\n" % e)
144 | 
145 |         print("Sleeping for {} seconds".format(SLEEP_WINDOW))
146 |         print("=====================================================================================================")
147 |         time.sleep(SLEEP_WINDOW)
148 | 
149 | 


--------------------------------------------------------------------------------
/manifests/clever.yaml:
--------------------------------------------------------------------------------
 1 | kind: ClusterRole
 2 | apiVersion: rbac.authorization.k8s.io/v1
 3 | metadata:
 4 |   name: clever-role
 5 | rules:
 6 | - apiGroups:
 7 |   - ""
 8 |   resources:
 9 |   - pods
10 |   - customresourcedefinitions
11 |   verbs:
12 |   - '*'
13 | - apiGroups:
14 |   - apiextensions.k8s.io
15 |   resources:
16 |   - customresourcedefinitions
17 |   verbs:
18 |   - '*'
19 | - apiGroups:
20 |   - autoscaling.k8s.io
21 |   resources:
22 |   - verticalpodautoscalers
23 |   - verticalpodautoscalercheckpoints
24 |   verbs:
25 |   - '*'
26 | - apiGroups:
27 |   - rbac.authorization.k8s.io
28 |   resources:
29 |   - clusterrolebindings
30 |   verbs:
31 |   - '*'
32 | - apiGroups:
33 |   - apps
34 |   resources:
35 |   - deployments
36 |   verbs:
37 |   - "*"
38 | ---
39 | apiVersion: v1
40 | kind: ServiceAccount
41 | metadata:
42 |   name: clever
43 |   namespace: kube-system
44 | ---
45 | kind: ClusterRoleBinding
46 | apiVersion: rbac.authorization.k8s.io/v1
47 | metadata:
48 |   name: clever-role-binding
49 | subjects:
50 | - kind: ServiceAccount
51 |   name: clever
52 |   namespace: kube-system
53 | roleRef:
54 |   apiGroup: rbac.authorization.k8s.io
55 |   kind: ClusterRole
56 |   name: clever-role
57 | ---
58 | apiVersion: apps/v1
59 | kind: Deployment
60 | metadata:
61 |   name: clever
62 |   namespace: kube-system
63 | spec:
64 |   replicas: 1
65 |   selector:
66 |     matchLabels:
67 |       app: clever
68 |   template:
69 |     metadata:
70 |       labels:
71 |         app: clever
72 |     spec:
73 |       containers:
74 |         - name: clever
75 |           image: quay.io/chenw615/clever:latest
76 |           env:
77 |             - name: PROM_HOST
78 |               value: "http://prometheus-k8s.monitoring.svc.cluster.local:9090"
79 |           imagePullPolicy: Always
80 |       serviceAccountName: clever
81 |       serviceAccount: clever


--------------------------------------------------------------------------------
/manifests/random.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: "autoscaling.k8s.io/v1"
 2 | kind: VerticalPodAutoscaler
 3 | metadata:
 4 |   name: random-vpa
 5 | spec:
 6 |   recommenders:
 7 |     - name: clever
 8 |   targetRef:
 9 |     apiVersion: "apps/v1"
10 |     kind: Deployment
11 |     name: random
12 |   resourcePolicy:
13 |     containerPolicies:
14 |       - containerName: '*'
15 |         minAllowed:
16 |           cpu: 100m
17 |         maxAllowed:
18 |           cpu: 16
19 |         controlledResources: ["cpu"]
20 | ---
21 | apiVersion: apps/v1
22 | kind: Deployment
23 | metadata:
24 |   name: random
25 | spec:
26 |   selector:
27 |     matchLabels:
28 |       app: random
29 |   replicas: 2
30 |   template:
31 |     metadata:
32 |       labels:
33 |         app: random
34 |     spec:
35 |       securityContext:
36 |         runAsNonRoot: true
37 |         runAsUser: 65534 # nobody
38 |       containers:
39 |         - name: hamster
40 |           image: k8s.gcr.io/ubuntu-slim:0.1
41 |           resources:
42 |             requests:
43 |               cpu: 1
44 |               memory: 500Mi
45 |           command: ["/bin/sh"]
46 |           args:
47 |             - "-c"
48 |             - "cat /dev/random"


--------------------------------------------------------------------------------
/manifests/sysbench.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: "autoscaling.k8s.io/v1"
 2 | kind: VerticalPodAutoscaler
 3 | metadata:
 4 |   name: sysbench-vpa
 5 | spec:
 6 |   recommenders:
 7 |     - name: clever
 8 |   targetRef:
 9 |     apiVersion: "apps/v1"
10 |     kind: Deployment
11 |     name: sysbench
12 |   resourcePolicy:
13 |     containerPolicies:
14 |       - containerName: '*'
15 |         minAllowed:
16 |           cpu: 100m
17 |         maxAllowed:
18 |           cpu: 16
19 |         controlledResources: ["cpu"]
20 | ---
21 | apiVersion: apps/v1
22 | kind: Deployment
23 | metadata:
24 |   name: sysbench
25 | spec:
26 |   selector:
27 |     matchLabels:
28 |       app: sysbench
29 |   replicas: 2
30 |   template:
31 |     metadata:
32 |       labels:
33 |         app: sysbench
34 |     spec:
35 |       securityContext:
36 |         runAsNonRoot: true
37 |         runAsUser: 65534 # nobody
38 |       containers:
39 |         - name: sysbench
40 |           image: severalnines/sysbench
41 |           resources:
42 |             requests:
43 |               cpu: 250m
44 |               memory: 500Mi
45 |             limits:
46 |               cpu: 250m
47 |               memory: 500Mi
48 |           command:
49 |           - sysbench
50 |           - cpu
51 |           - --threads=1
52 |           - --time=100000
53 |           - run
54 | 


--------------------------------------------------------------------------------
/recommender.py:
--------------------------------------------------------------------------------
  1 | 
  2 | DEFAULT_NAMESPACE="default"
  3 | DELTA = 0.2
  4 | 
  5 | # Select the VPAs that choose the current clever recommender
  6 | def selects_recommender(vpas, recommender_name):
  7 |     selected_vpas = []
  8 |     for vpa in vpas["items"]:
  9 |         vpa_spec = vpa["spec"]
 10 |         if "recommenders" not in vpa_spec.keys():
 11 |             continue
 12 |         else:
 13 |             print("VPA {} has chosen {} recommenders".format(vpa["metadata"]["name"], len(vpa_spec["recommenders"])))
 14 |             print(vpa_spec)
 15 |             for recommender in vpa_spec["recommenders"]:
 16 |                 if recommender["name"] == recommender_name:
 17 |                     selected_vpas.append(vpa)
 18 | 
 19 |     return selected_vpas
 20 | 
 21 | # Check if all container CPU requests are the same and get the consistent value.
 22 | # If some container requests are larger than others, is_consistent would be False.
 23 | def get_consistent_max_val(request_dict):
 24 |     max_val = -1
 25 |     consistent_cnt = 0
 26 |     for pod in request_dict.keys():
 27 |         for container in request_dict[pod].keys():
 28 |             if request_dict[pod][container] > max_val:
 29 |                 max_val = request_dict[pod][container]
 30 |                 consistent_cnt += 1
 31 | 
 32 |     is_consistent = True
 33 |     if consistent_cnt > 1:
 34 |         is_consistent = False
 35 | 
 36 |     return is_consistent, max_val
 37 | 
 38 | 
 39 | # Only check the default CPU request. If not existed, it will use 1 core by default.
 40 | def get_vpa_detailed_info(corev1, vpa):
 41 |     # Get the VPA spec
 42 |     vpa_spec = vpa["spec"]
 43 | 
 44 |     # example target_ref {'apiVersion': 'apps/v1', 'kind': 'Deployment', 'name': 'hamster'}
 45 |     target_ref = vpa_spec["targetRef"]
 46 |     print(target_ref)
 47 | 
 48 |     # Retrieve the target pods
 49 |     if "namespace" in target_ref.keys():
 50 |         target_namespace = target_ref["namespace"]
 51 |     else:
 52 |         target_namespace = DEFAULT_NAMESPACE
 53 | 
 54 |     # Get the target containers
 55 |     target_pods = corev1.list_namespaced_pod(namespace=target_namespace, label_selector="app=" + target_ref["name"])
 56 | 
 57 |     # Retrieve the target containers
 58 |     vpa_pod_nodes = {}
 59 |     all_container_cpu_requests = {}
 60 |     for pod in target_pods.items:
 61 |         all_container_cpu_requests[pod.metadata.name] = {}
 62 |         vpa_pod_nodes[pod.metadata.name] = pod.spec.node_name
 63 |         for container in pod.spec.containers:
 64 |             # print(container.name)
 65 |             # obtain the CPU request and convert it to int
 66 |             cur_request = str2resource("cpu", container.resources.requests["cpu"])
 67 |             all_container_cpu_requests[pod.metadata.name][container.name] = cur_request
 68 | 
 69 |     # Get the maximum default request if there are many containers.
 70 |     is_consistent, max_cpu_val = get_consistent_max_val(all_container_cpu_requests)
 71 |     vpa_container_cpu_request = max_cpu_val
 72 | 
 73 |     if not is_consistent:
 74 |         print("Warning: the containers managed by {} do not have consistent CPU requests!", vpa["metadata"]["name"])
 75 | 
 76 |     return vpa_container_cpu_request, vpa_pod_nodes
 77 | 
 78 | # resource2str converts a resource (CPU, Memory) value to a string
 79 | def resource2str(resource, value):
 80 |     if resource.lower() == "cpu":
 81 |         if value < 1:
 82 |             return str(int(value * 1000)) + "m"
 83 |         else:
 84 |             return str(value)
 85 |     # Memory is in bytes
 86 |     else:
 87 |         if value < 1024:
 88 |             return str(value) + "B"
 89 |         elif value < 1024 * 1024:
 90 |             return str(int(value / 1024)) + "k"
 91 |         elif value < 1024 * 1024 * 1024:
 92 |             return str(int(value / 1024 / 1024)) + "Mi"
 93 |         else:
 94 |             return str(int(value / 1024 / 1024 / 1024)) + "Gi"
 95 | 
 96 | # Convert a resource (CPU, Memory) string to a float value
 97 | def str2resource(resource, value):
 98 |     if type(value) is str:
 99 |         if resource.lower() == "cpu":
100 |             if value[-1] == "m":
101 |                 return float(value[:-1]) / 1000
102 |             else:
103 |                 return float(value)
104 |         else:
105 |             if value[-1].lower() == "b":
106 |                 return float(value[:-1])
107 |             elif value[-1].lower() == "k":
108 |                 return float(value[:-1]) * 1024
109 |             elif value[-2:].lower() == "mi":
110 |                 return float(value[:-2]) * 1024 * 1024
111 |             elif value[-2:].lower() == "gi":
112 |                 return float(value[:-2]) * 1024 * 1024 * 1024
113 |             else:
114 |                 return float(value)
115 |     else:
116 |         return value
117 | 
118 | def bound_var(var, min_value, max_value):
119 |     if var < min_value:
120 |         return min_value
121 |     elif var > max_value:
122 |         return max_value
123 |     else:
124 |         return var
125 | 
126 | # Find the nodes with frequency changes in the last iteration
127 | def find_node_with_frequency_changes(cur_node_frequencies, prev_node_frequencies):
128 |     node_with_frequency_changes = []
129 |     for node in cur_node_frequencies.keys():
130 |         # TODO: compare frequencies
131 |         if node not in prev_node_frequencies.keys():
132 |             node_with_frequency_changes.append(node)
133 |         else:
134 |             if cur_node_frequencies[node] == prev_node_frequencies[node]:
135 |                 continue
136 |             else:
137 |                 node_with_frequency_changes.append(node)
138 |     return node_with_frequency_changes
139 | 
140 | def  get_recommendation(vpa, corev1, node_frequencies, max_node_frequencies, vpa_default_request):
141 |     """
142 |     This function takes a VPA and returns a list of recommendations
143 |     """
144 |     # Get the VPA spec
145 |     vpa_spec = vpa["spec"]
146 | 
147 |     # example target_ref {'apiVersion': 'apps/v1', 'kind': 'Deployment', 'name': 'hamster'}
148 |     target_ref = vpa_spec["targetRef"]
149 |     print(target_ref)
150 | 
151 |     # Retrieve the target pods
152 |     if "namespace" in target_ref.keys():
153 |         target_namespace = target_ref["namespace"]
154 |     else:
155 |         target_namespace = DEFAULT_NAMESPACE
156 | 
157 |     # Get the target pods
158 |     target_pods = corev1.list_namespaced_pod(namespace=target_namespace, label_selector="app=" + target_ref["name"])
159 | 
160 |     # Get the target container traces
161 |     recommendations = []
162 | 
163 |     # Get uncapped target
164 |     uncapped_targets = {}
165 |     for pod in target_pods.items:
166 |         pod_node = pod.spec.node_name
167 |         node_frequency = node_frequencies[pod_node]
168 |         max_node_frequency = max_node_frequencies[pod_node]
169 |         for container in pod.spec.containers:
170 |             container_name = container.name
171 |             uncapped_target = vpa_default_request * float(max_node_frequency) / float(node_frequency)
172 |             if container_name not in uncapped_targets.keys():
173 |                 uncapped_targets[container_name] = uncapped_target
174 |             else:
175 |                 uncapped_targets[container_name] = max(uncapped_target, uncapped_targets[container_name])
176 | 
177 | 
178 |     for containerPolicy in vpa_spec["resourcePolicy"]["containerPolicies"]:
179 |         controlled_resources = containerPolicy["controlledResources"]
180 |         max_allowed = containerPolicy["maxAllowed"]
181 |         min_allowed = containerPolicy["minAllowed"]
182 | 
183 |         for resource in controlled_resources:
184 |             if resource != "cpu":
185 |                 continue
186 |             else:
187 |                 for container_name in uncapped_targets.keys():
188 |                     container_recommendation = {"containerName": container_name, "lowerBound": {}, "target": {},
189 |                                                 "uncappedTarget": {}, "upperBound": {}}
190 |                     uncapped_target = uncapped_targets[container_name]
191 |                     lower_bound = uncapped_target * (1 - DELTA)
192 |                     upper_bound = uncapped_target * (1 + DELTA)
193 | 
194 |                     # If the target is below the lowerbound, set it to the lowerbound
195 |                     min_allowed_value = str2resource(resource, min_allowed[resource])
196 |                     max_allowed_value = str2resource(resource, max_allowed[resource])
197 |                     target = bound_var(uncapped_target, min_allowed_value, max_allowed_value)
198 |                     lower_bound = bound_var(lower_bound, min_allowed_value, max_allowed_value)
199 |                     upper_bound = bound_var(upper_bound, min_allowed_value, max_allowed_value)
200 | 
201 |                     # Convert CPU/Memory values to millicores/bytes
202 |                     container_recommendation["lowerBound"][resource] = resource2str(resource, lower_bound)
203 |                     container_recommendation["target"][resource] = resource2str(resource, target)
204 |                     container_recommendation["uncappedTarget"][resource] = resource2str(resource, uncapped_target)
205 |                     container_recommendation["upperBound"][resource] = resource2str(resource, upper_bound)
206 | 
207 |                     recommendations.append(container_recommendation)
208 |     return recommendations
209 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cachetools==4.2.4
 2 | certifi==2022.9.14
 3 | charset-normalizer==2.0.12
 4 | google-auth==2.11.1
 5 | idna==3.4
 6 | kubernetes==24.2.0
 7 | oauthlib==3.2.1
 8 | pyasn1==0.4.8
 9 | pyasn1-modules==0.2.8
10 | python-dateutil==2.8.2
11 | PyYAML==6.0
12 | requests==2.27.1
13 | requests-oauthlib==1.3.1
14 | rsa==4.9
15 | six==1.16.0
16 | urllib3==1.26.12
17 | websocket-client==1.3.1
18 | 


--------------------------------------------------------------------------------
/scripts/set_cpu_freq.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | FREQ=${1}
4 | cpupower frequency-set -d ${FREQ}
5 | cpupower frequency-set -u ${FREQ}


--------------------------------------------------------------------------------
/scripts/watch_vpa.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | echo "\$kubectl get vpa ${1} --no-headers -o \"custom-columns=:status.recommendation.containerRecommendations[0].target.cpu\""
4 | kubectl get vpa ${1} --no-headers -o "custom-columns=:status.recommendation.containerRecommendations[0].target.cpu"
5 | echo -e "\n"


--------------------------------------------------------------------------------
/testPromClient.py:
--------------------------------------------------------------------------------
 1 | from utils import *
 2 | 
 3 | if __name__ == '__main__':
 4 |     prom_address = "http://127.0.0.1:39090"
 5 |     prom_client = PromClient(prom_address)
 6 | 
 7 |     max_cpu_frequency_query = "node_cpu_frequency_max_hertz"
 8 |     min_cpu_frequency_query = "node_cpu_frequency_min_hertz"
 9 |     latest_cpu_frequency_query = "node_cpu_scaling_frequency_hertz"
10 | 
11 |     pod_ips_query = "pod_energy_stat"
12 | 
13 |     all_node_homogeneous_max_frequencies = get_all_node_homogeneous_frequencies(prom_client, max_cpu_frequency_query)
14 |     print(all_node_homogeneous_max_frequencies)
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | from PromClient import *
 2 | 
 3 | def parse_frequency_dict(cpu_frequency_data):
 4 |     all_node_frequencies = {}
 5 |     for cur_element in cpu_frequency_data:
 6 |         node_name = cur_element["metric"]["instance"]
 7 |         cpu_idx = cur_element["metric"]["cpu"]
 8 |         if node_name not in all_node_frequencies.keys():
 9 |             all_node_frequencies[node_name] = {}
10 |         cur_val = cur_element["value"][1]
11 |         all_node_frequencies[node_name][cpu_idx] = cur_val
12 | 
13 |     return all_node_frequencies
14 | 
15 | def get_homogeneous_value(node_frequencies):
16 |     cpu_count = 0
17 |     homogeneous_frequency = -1
18 |     for cpu in node_frequencies.keys():
19 |         if cpu_count == 0:
20 |             homogeneous_frequency = int(node_frequencies[cpu])
21 |         else:
22 |             if node_frequencies[cpu] != homogeneous_frequency:
23 |                 homogeneous_frequency = max(int(node_frequencies[cpu]), homogeneous_frequency)
24 | 
25 |         cpu_count +=1
26 |     homogeneous_frequency = get_rounded_frequency(homogeneous_frequency)
27 |     return homogeneous_frequency
28 | 
29 | def get_rounded_frequency(frequency):
30 |     return round(frequency / 1000000000) * 1000000000
31 | 
32 | def get_all_node_homogeneous_frequencies(prom_cient, prometheus_query):
33 |     frequency_data = prom_cient.get_query(prometheus_query)
34 |     if frequency_data is None:
35 |         return None
36 |     all_node_frequencies = parse_frequency_dict(frequency_data)
37 |     all_node_homogeneous_frequencies = {}
38 |     for node in all_node_frequencies.keys():
39 |         cur_node_homogeneous_frequency = get_homogeneous_value(all_node_frequencies[node])
40 |         if cur_node_homogeneous_frequency == -1:
41 |             return None
42 |         all_node_homogeneous_frequencies[node] = cur_node_homogeneous_frequency
43 |     return all_node_homogeneous_frequencies


--------------------------------------------------------------------------------