├── .dockerignore
├── .flake8
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    └── workflows
    │   ├── build-on-release.yml
    │   ├── docker-build-on-tag.yml
    │   └── pytest-on-push.yml
├── .gitignore
├── .pre-commit-config.yaml
├── ADOPTERS.md
├── Dockerfile
├── LICENSE
├── README.md
├── build_linux.sh
├── build_local.sh
├── build_release.sh
├── docker
    ├── README.md
    └── aws.Dockerfile
├── docs
    ├── google-cloud-managed-service-for-prometheus.md
    └── krr-in-cluster
    │   └── krr-in-cluster-job.yaml
├── enforcer
    ├── Dockerfile
    ├── README.md
    ├── dal
    │   ├── robusta_config.py
    │   └── supabase_dal.py
    ├── enforcer_main.py
    ├── env_vars.py
    ├── metrics.py
    ├── model.py
    ├── params_utils.py
    ├── patch_manager.py
    ├── requirements.txt
    ├── resources
    │   ├── kubernetes_resource_loader.py
    │   ├── owner_store.py
    │   └── recommendation_store.py
    └── utils.py
├── examples
    ├── custom_formatter.py
    ├── custom_severity_calculator.py
    └── custom_strategy.py
├── helm
    ├── krr-enforcer
    │   ├── .helmignore
    │   ├── Chart.yaml
    │   ├── templates
    │   │   ├── enforcer-cert-job.yaml
    │   │   ├── enforcer-service-account.yaml
    │   │   ├── enforcer.yaml
    │   │   └── service-monitor.yaml
    │   └── values.yaml
    └── upload_chart.sh
├── images
    ├── krr-datasources.png
    ├── krr-datasources.svg
    ├── krr-other-integrations.png
    ├── krr-other-integrations.svg
    ├── krr_slack_example.png
    ├── logo.png
    ├── screenshot.jpeg
    ├── ui_recommendation.png
    ├── ui_screenshot_new.png
    └── ui_video.gif
├── intro.txt
├── krr.py
├── poetry.lock
├── pyproject.toml
├── requirements.txt
├── robusta_krr
    ├── __init__.py
    ├── api
    │   ├── formatters.py
    │   ├── models.py
    │   └── strategies.py
    ├── common
    │   └── ssl_utils.py
    ├── core
    │   ├── __init__.py
    │   ├── abstract
    │   │   ├── formatters.py
    │   │   ├── metrics.py
    │   │   └── strategies.py
    │   ├── integrations
    │   │   ├── kubernetes
    │   │   │   ├── __init__.py
    │   │   │   └── config_patch.py
    │   │   ├── openshift
    │   │   │   ├── __init__.py
    │   │   │   └── token.py
    │   │   └── prometheus
    │   │   │   ├── __init__.py
    │   │   │   ├── loader.py
    │   │   │   ├── metrics
    │   │   │       ├── __init__.py
    │   │   │       ├── base.py
    │   │   │       ├── cpu.py
    │   │   │       └── memory.py
    │   │   │   ├── metrics_service
    │   │   │       ├── base_metric_service.py
    │   │   │       ├── mimir_metrics_service.py
    │   │   │       ├── prometheus_metrics_service.py
    │   │   │       ├── thanos_metrics_service.py
    │   │   │       └── victoria_metrics_service.py
    │   │   │   └── prometheus_utils.py
    │   ├── models
    │   │   ├── allocations.py
    │   │   ├── config.py
    │   │   ├── objects.py
    │   │   ├── result.py
    │   │   └── severity.py
    │   └── runner.py
    ├── formatters
    │   ├── __init__.py
    │   ├── csv.py
    │   ├── csv_raw.py
    │   ├── html.py
    │   ├── json.py
    │   ├── pprint.py
    │   ├── table.py
    │   └── yaml.py
    ├── main.py
    ├── strategies
    │   ├── __init__.py
    │   ├── simple.py
    │   └── simple_limit.py
    └── utils
    │   ├── batched.py
    │   ├── intro.py
    │   ├── object_like_dict.py
    │   ├── patch.py
    │   ├── progress_bar.py
    │   ├── resource_units.py
    │   ├── service_discovery.py
    │   └── version.py
└── tests
    ├── conftest.py
    ├── formatters
        └── test_csv_formatter.py
    ├── models
        └── test_resource_allocations.py
    ├── single_namespace_as_group.yaml
    ├── single_namespace_permissions.yaml
    ├── test_krr.py
    └── test_runner.py


/.dockerignore:
--------------------------------------------------------------------------------
 1 | # .dockerignore
 2 | __pycache__
 3 | *.pyc
 4 | *.pyo
 5 | *.pyd
 6 | 
 7 | # Exclude development files
 8 | .git
 9 | .gitignore
10 | Dockerfile
11 | *.md
12 | .vscode
13 | 
14 | # Exclude logs and cache
15 | logs/
16 | cache/
17 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | max-line-length = 120
 3 | exclude = .git,
 4 |     __pycache__,
 5 |     old,
 6 |     build,
 7 |     dist,
 8 |     .venv,
 9 |     .vscode,
10 |     .pytest_cache,
11 |     __init__.py,
12 |     .mypy_cache,
13 |     src/robusta/integrations/kubernetes/autogenerated,
14 |     src/robusta/integrations/kubernetes/custom_models.py
15 | ignore = E501, W503, E203
16 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Are you interested in contributing a fix for this?**
27 | Yes/no. If yes, we will provide guidance what parts of the code to modify and help you.
28 | 
29 | **Desktop (please complete the following information):**
30 |  - OS: [e.g. iOS]
31 |  - Browser [e.g. chrome, safari]
32 |  - Version [e.g. 22]
33 | 
34 | **Smartphone (please complete the following information):**
35 |  - Device: [e.g. iPhone6]
36 |  - OS: [e.g. iOS8.1]
37 |  - Browser [e.g. stock browser, safari]
38 |  - Version [e.g. 22]
39 | 
40 | **Additional context**
41 | Add any other context about the problem here.
42 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Are you interested in contributing a PR for this?**
20 | Yes/no. If yes, we will provide guidance what parts of the code to modify and help you.
21 | 
22 | **Additional context**
23 | Add any other context or screenshots about the feature request here.
24 | 


--------------------------------------------------------------------------------
/.github/workflows/build-on-release.yml:
--------------------------------------------------------------------------------
  1 | name: Build and Release
  2 | 
  3 | on:
  4 |   release:
  5 |     types: [created]
  6 | 
  7 | jobs:
  8 |   build:
  9 |     strategy:
 10 |       matrix:
 11 |         # we build on macos-13 for x86 builds
 12 |         os: [ubuntu-latest, windows-latest, macos-latest, macos-13]
 13 | 
 14 |     runs-on: ${{ matrix.os }}
 15 | 
 16 |     steps:
 17 |     - uses: actions/checkout@v2
 18 | 
 19 |     - name: Set up Python
 20 |       uses: actions/setup-python@v2
 21 |       with:
 22 |         python-version: '3.11'
 23 |     
 24 |     - name: Install dependencies
 25 |       run: |
 26 |         python -m pip install --upgrade pip
 27 |         pip install -r requirements.txt
 28 |         pip install pyinstaller
 29 | 
 30 |     - name: Install dependancies (Linux)
 31 |       if: matrix.os == 'ubuntu-latest'
 32 |       run: |
 33 |         sudo apt-get install -y binutils
 34 | 
 35 |     - name: Install the Apple certificate and provisioning profile
 36 |       if: matrix.os == 'macos-latest' || matrix.os == 'macos-13' 
 37 |       env:
 38 |         BUILD_CERTIFICATE_BASE64: ${{ secrets.BUILD_CERTIFICATE_BASE64 }}
 39 |         P12_PASSWORD: ${{ secrets.P12_PASSWORD }}
 40 |         BUILD_PROVISION_PROFILE_BASE64: ${{ secrets.BUILD_PROVISION_PROFILE_BASE64 }}
 41 |         KEYCHAIN_PASSWORD: ${{ secrets.KEYCHAIN_PASSWORD }}
 42 |       run: |
 43 |         # create variables
 44 |         CERTIFICATE_PATH=$RUNNER_TEMP/build_certificate.p12
 45 |         PP_PATH=$RUNNER_TEMP/build_pp.mobileprovision
 46 |         KEYCHAIN_PATH=$RUNNER_TEMP/app-signing.keychain-db
 47 | 
 48 |         # import certificate and provisioning profile from secrets
 49 |         echo -n "$BUILD_CERTIFICATE_BASE64" | base64 --decode -o $CERTIFICATE_PATH
 50 |         echo -n "$BUILD_PROVISION_PROFILE_BASE64" | base64 --decode -o $PP_PATH
 51 | 
 52 |         # create temporary keychain
 53 |         security create-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH
 54 |         security set-keychain-settings -lut 21600 $KEYCHAIN_PATH
 55 |         security unlock-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH
 56 | 
 57 |         # import certificate to keychain
 58 |         security import $CERTIFICATE_PATH -P "$P12_PASSWORD" -A -t cert -f pkcs12 -k $KEYCHAIN_PATH
 59 |         security list-keychain -d user -s $KEYCHAIN_PATH
 60 | 
 61 |         # apply provisioning profile
 62 |         mkdir -p ~/Library/MobileDevice/Provisioning\ Profiles
 63 |         cp $PP_PATH ~/Library/MobileDevice/Provisioning\ Profiles
 64 | 
 65 |     - name: Set version in code (Unix)
 66 |       if: matrix.os == 'macos-latest' || matrix.os == 'ubuntu-latest' || matrix.os == 'macos-13' 
 67 |       run: |
 68 |         awk 'NR==3{$0="__version__ = \"'${{ github.ref_name }}'\""}1' ./robusta_krr/__init__.py > temp && mv temp ./robusta_krr/__init__.py
 69 |         cat ./robusta_krr/__init__.py
 70 | 
 71 |     - name: Set version in code (Windows)
 72 |       if: matrix.os == 'windows-latest'
 73 |       run: |
 74 |         $content = Get-Content -Path .\robusta_krr\__init__.py
 75 |         $content[2] = "__version__=`"$($env:GITHUB_REF_NAME)`""
 76 |         $content | Out-File -FilePath .\robusta_krr\__init__.py -Encoding ascii
 77 |         Get-Content .\robusta_krr\__init__.py
 78 |       shell: pwsh
 79 |       env:
 80 |         GITHUB_REF_NAME: ${{ github.ref_name }}
 81 | 
 82 |     - name: Build with PyInstaller
 83 |       if: matrix.os == 'macos-latest' 
 84 |       shell: bash
 85 |       run: |
 86 |         pyinstaller --target-architecture arm64 krr.py
 87 |         mkdir -p ./dist/krr/grapheme/data
 88 |         cp $(python -c "import grapheme; print(grapheme.__path__[0] + '/data/grapheme_break_property.json')") ./dist/krr/grapheme/data/grapheme_break_property.json
 89 |         cp ./intro.txt ./dist/krr/intro.txt
 90 | 
 91 |     - name: Build with PyInstaller
 92 |       if: matrix.os != 'macos-latest'
 93 |       shell: bash
 94 |       run: |
 95 |         pyinstaller krr.py
 96 |         mkdir -p ./dist/krr/grapheme/data
 97 |         cp $(python -c "import grapheme; print(grapheme.__path__[0] + '/data/grapheme_break_property.json')") ./dist/krr/grapheme/data/grapheme_break_property.json
 98 |         cp ./intro.txt ./dist/krr/intro.txt
 99 | 
100 |     - name: Zip the application (Unix)
101 |       if: matrix.os == 'macos-latest' || matrix.os == 'ubuntu-latest' || matrix.os == 'macos-13'
102 |       run: |
103 |         cd dist
104 |         zip -r krr-${{ matrix.os }}-${{ github.ref_name }}.zip krr
105 |         mv krr-${{ matrix.os }}-${{ github.ref_name }}.zip ../
106 |         cd ..
107 | 
108 |     - name: Zip the application (Windows)
109 |       if: matrix.os == 'windows-latest'
110 |       run: |
111 |         Set-Location -Path dist
112 |         Compress-Archive -Path krr -DestinationPath krr-${{ matrix.os }}-${{ github.ref_name }}.zip -Force
113 |         Move-Item -Path krr-${{ matrix.os }}-${{ github.ref_name }}.zip -Destination ..\
114 |         Set-Location -Path ..
115 | 
116 |     - name: Upload Release Asset
117 |       uses: actions/upload-release-asset@v1.0.2
118 |       env:
119 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
120 |       with:
121 |         upload_url: ${{ github.event.release.upload_url }} 
122 |         asset_path: ./krr-${{ matrix.os }}-${{ github.ref_name }}.zip
123 |         asset_name: krr-${{ matrix.os }}-${{ github.ref_name }}.zip
124 |         asset_content_type: application/octet-stream
125 | 
126 |     - name: Upload build as artifact
127 |       uses: actions/upload-artifact@v4
128 |       with:
129 |         name: krr-${{ matrix.os }}-${{ github.ref_name }}
130 |         path: ./krr-${{ matrix.os }}-${{ github.ref_name }}.zip
131 | 
132 |     - name: Clean up keychain and provisioning profile
133 |       if: (matrix.os == 'macos-latest' || matrix.os == 'macos-13') && always()
134 |       run: |
135 |         security delete-keychain $RUNNER_TEMP/app-signing.keychain-db
136 |         rm ~/Library/MobileDevice/Provisioning\ Profiles/build_pp.mobileprovision
137 | 
138 |   check-latest:
139 |     needs: build
140 |     runs-on: ubuntu-latest
141 |     outputs: 
142 |       IS_LATEST: ${{ steps.check-latest.outputs.release == github.ref_name }}
143 |     steps:
144 |       - id: check-latest
145 |         uses: pozetroninc/github-action-get-latest-release@v0.7.0
146 |         with:
147 |           token: ${{ secrets.GITHUB_TOKEN }}
148 |           repository: ${{ github.repository }}
149 |           excludes: prerelease, draft
150 | 
151 |   # Define MacOS hash job
152 |   mac-hash:
153 |     needs: check-latest
154 |     runs-on: ubuntu-latest
155 |     if: needs.check-latest.outputs.IS_LATEST
156 |     outputs:
157 |       MAC_BUILD_HASH: ${{ steps.calc-hash.outputs.MAC_BUILD_HASH }}
158 |     steps:
159 |       - name: Checkout Repository
160 |         uses: actions/checkout@v2
161 |       - name: Download MacOS artifact
162 |         uses: actions/download-artifact@v4
163 |         with:
164 |           name: krr-macos-latest-${{ github.ref_name }}
165 |       - name: Calculate hash
166 |         id: calc-hash
167 |         run: echo "::set-output name=MAC_BUILD_HASH::$(sha256sum krr-macos-latest-${{ github.ref_name }}.zip | awk '{print $1}')"
168 | 
169 |   # Define Linux hash job
170 |   linux-hash:
171 |     needs: check-latest
172 |     runs-on: ubuntu-latest
173 |     if: needs.check-latest.outputs.IS_LATEST
174 |     outputs:
175 |       LINUX_BUILD_HASH: ${{ steps.calc-hash.outputs.LINUX_BUILD_HASH }}
176 |     steps:
177 |       - name: Checkout Repository
178 |         uses: actions/checkout@v2
179 |       - name: Download Linux artifact
180 |         uses: actions/download-artifact@v4
181 |         with:
182 |           name: krr-ubuntu-latest-${{ github.ref_name }}
183 |       - name: Calculate hash
184 |         id: calc-hash
185 |         run: echo "::set-output name=LINUX_BUILD_HASH::$(sha256sum krr-ubuntu-latest-${{ github.ref_name }}.zip | awk '{print $1}')"
186 | 
187 |   # Define job to update homebrew formula
188 |   update-formula:
189 |     needs: [mac-hash, linux-hash]
190 |     runs-on: ubuntu-latest
191 |     steps:
192 |       - name: Checkout homebrew-krr repository
193 |         uses: actions/checkout@v2
194 |         with:
195 |           repository: robusta-dev/homebrew-krr
196 |           token: ${{ secrets.MULTIREPO_GITHUB_TOKEN }}
197 |       - name: Update krr.rb formula
198 |         run: |
199 |           MAC_BUILD_HASH=${{ needs.mac-hash.outputs.MAC_BUILD_HASH }}
200 |           LINUX_BUILD_HASH=${{ needs.linux-hash.outputs.LINUX_BUILD_HASH }}
201 |           TAG_NAME=${{ github.ref_name }}
202 |           awk 'NR==6{$0="        url \"https://github.com/robusta-dev/krr/releases/download/'"$TAG_NAME"'/krr-macos-latest-'"$TAG_NAME"'.zip\""}1' ./Formula/krr.rb > temp && mv temp ./Formula/krr.rb
203 |           awk 'NR==7{$0="        sha256 \"'$MAC_BUILD_HASH'\""}1' ./Formula/krr.rb > temp && mv temp ./Formula/krr.rb
204 |           awk 'NR==9{$0="        url \"https://github.com/robusta-dev/krr/releases/download/'"$TAG_NAME"'/krr-ubuntu-latest-'"$TAG_NAME"'.zip\""}1' ./Formula/krr.rb > temp && mv temp ./Formula/krr.rb
205 |           awk 'NR==10{$0="        sha256 \"'$LINUX_BUILD_HASH'\""}1' ./Formula/krr.rb > temp && mv temp ./Formula/krr.rb
206 |       - name: Commit and push changes
207 |         run: |
208 |           git config --local user.email "action@github.com"
209 |           git config --local user.name "GitHub Action"
210 |           git commit -am "Update formula for release ${TAG_NAME}"
211 |           git push
212 | 


--------------------------------------------------------------------------------
/.github/workflows/docker-build-on-tag.yml:
--------------------------------------------------------------------------------
 1 | name: Docker Build and Push
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - '*'
 7 | 
 8 | jobs:
 9 |   build:
10 | 
11 |     runs-on: ubuntu-latest
12 | 
13 |     permissions:
14 |       contents: 'read'
15 |       id-token: 'write'
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v4
19 | 
20 |     - uses: 'google-github-actions/auth@v2'
21 |       with:
22 |         project_id: 'genuine-flight-317411'
23 |         workload_identity_provider: 'projects/429189597230/locations/global/workloadIdentityPools/github/providers/robusta-repos'
24 | 
25 |     - name: Set up gcloud CLI
26 |       uses: google-github-actions/setup-gcloud@v2
27 |       with:
28 |         project_id: genuine-flight-317411
29 | 
30 |     - name: Configure Docker Registry
31 |       run: gcloud auth configure-docker us-central1-docker.pkg.dev
32 | 
33 |     - name: Login to Docker Hub
34 |       uses: docker/login-action@v1
35 |       with:
36 |         username: ${{ secrets.DOCKER_USERNAME }}
37 |         password: ${{ secrets.DOCKER_PASSWORD }}
38 | 
39 |     - name: Set up Docker Buildx
40 |       uses: docker/setup-buildx-action@v1
41 | 
42 |     - name: Build and push Docker images
43 |       uses: docker/build-push-action@v2
44 |       with:
45 |         context: .
46 |         platforms: linux/arm64,linux/amd64
47 |         push: true
48 |         tags: |
49 |           robustadev/krr:${{ github.ref_name }}
50 |           us-central1-docker.pkg.dev/genuine-flight-317411/devel/krr:${{ github.ref_name }}
51 |         build-args: |
52 |           BUILDKIT_INLINE_CACHE=1


--------------------------------------------------------------------------------
/.github/workflows/pytest-on-push.yml:
--------------------------------------------------------------------------------
 1 | name: Pytest
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 | 
 8 |     runs-on: ubuntu-latest
 9 | 
10 |     steps:
11 |     - uses: actions/checkout@v2
12 | 
13 |     - name: Set up Python
14 |       uses: actions/setup-python@v2
15 |       with:
16 |         python-version: '3.9'
17 | 
18 |     - name: Install dependencies
19 |       run: |
20 |         python -m pip install --upgrade pip
21 |         pip install -r requirements.txt
22 |         pip install -e .
23 |         pip install pytest
24 | 
25 |     - name: Test with pytest
26 |       run: |
27 |         pytest
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | .idea/
  3 | __pycache__/
  4 | *.py[cod]
  5 | *$py.class
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | pip-wheel-metadata/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | *.py,cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # pipenv
 89 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 90 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 91 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 92 | #   install all needed dependencies.
 93 | #Pipfile.lock
 94 | 
 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 96 | __pypackages__/
 97 | 
 98 | # Celery stuff
 99 | celerybeat-schedule
100 | celerybeat.pid
101 | 
102 | # SageMath parsed files
103 | *.sage.py
104 | 
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 | 
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 | 
118 | # Rope project settings
119 | .ropeproject
120 | 
121 | # mkdocs documentation
122 | /site
123 | 
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 | 
129 | # Pyre type checker
130 | .pyre/
131 | 
132 | 
133 | .DS_Store
134 | robusta_lib
135 | .idea
136 | .vscode
137 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/ambv/black
 3 |     rev: 23.1.0
 4 |     hooks:
 5 |       - id: black
 6 |         language_version: python3
 7 |         args: [--config=pyproject.toml]
 8 | 
 9 |   - repo: https://github.com/pre-commit/pre-commit-hooks
10 |     rev: v3.3.0
11 |     hooks:
12 |       - id: trailing-whitespace
13 |       - id: end-of-file-fixer
14 | 
15 |   - repo: https://github.com/pycqa/flake8
16 |     rev: 6.0.0
17 |     hooks:
18 |       - id: flake8
19 |         args: [--config=.flake8]
20 | 
21 |   - repo: https://github.com/pycqa/isort
22 |     rev: 5.12.0
23 |     hooks:
24 |       - id: isort
25 |         args: [--settings-path=pyproject.toml]
26 | 
27 |   - repo: https://github.com/pre-commit/mirrors-mypy
28 |     rev: v1.0.1
29 |     hooks:
30 |       - id: mypy
31 |         language: system
32 | 


--------------------------------------------------------------------------------
/ADOPTERS.md:
--------------------------------------------------------------------------------
1 | # KRR Adopters
2 | 
3 | This is a list of adopters of Robusta KRR operator:
4 | 
5 | Everton Arakaki - WAES Platform Consultant for ASML (Semiconductor Industry)
6 | 
7 | > I used Robusta KRR in my production clusters, and it took me less than 5 minutes to get very well detailed cpu/memory recommendations. Our applications and platform tooling were discovered automatically; our kubecontext was discovered automatically; and our kube-prometheus-stack was discovered automatically.


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use the official Python 3.9 slim image as the base image
 2 | FROM python:3.12-slim as builder
 3 | ENV LANG=C.UTF-8
 4 | ENV PYTHONDONTWRITEBYTECODE=1
 5 | ENV PYTHONUNBUFFERED=1
 6 | ENV PATH="/app/venv/bin:$PATH"
 7 | 
 8 | # Install system dependencies required for Poetry
 9 | RUN apt-get update && \
10 |     dpkg --add-architecture arm64
11 | 
12 | # We're installing here libexpat1, to upgrade the package to include a fix to 3 high CVEs. CVE-2024-45491,CVE-2024-45490,CVE-2024-45492
13 | RUN apt-get update \
14 |     && apt-get install -y --no-install-recommends libexpat1 \
15 |     && rm -rf /var/lib/apt/lists/*
16 | 
17 | # Set the working directory
18 | WORKDIR /app
19 | 
20 | COPY ./requirements.txt requirements.txt
21 | 
22 | RUN pip install --no-cache-dir --upgrade pip
23 | # Install the project dependencies
24 | RUN python -m ensurepip --upgrade
25 | RUN pip install --no-cache-dir -r requirements.txt
26 | 
27 | # Copy the rest of the application code
28 | COPY ./krr.py krr.py
29 | COPY ./robusta_krr/ robusta_krr/
30 | COPY ./intro.txt intro.txt
31 | 
32 | # Run the application using 'poetry run krr simple'
33 | CMD ["python", "krr.py", "simple"]
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Robusta
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/build_linux.sh:
--------------------------------------------------------------------------------
 1 | # Remove old build
 2 | rm -rf build
 3 | rm -rf dist
 4 | 
 5 | # MacOS Build first
 6 | 
 7 | # Active venv
 8 | # python -m pip install -r requirements.txt
 9 | pip install pyinstaller
10 | apt-get install binutils
11 | 
12 | # source .venv/bin/activate
13 | 
14 | # Build
15 | pyinstaller krr.py
16 | cd dist
17 | # zip -r "krr-linux-v1.1.0.zip" krr
18 | 
19 | # Deactivate venv
20 | # deactivate


--------------------------------------------------------------------------------
/build_local.sh:
--------------------------------------------------------------------------------
 1 | # Remove old build
 2 | rm -rf build
 3 | rm -rf dist
 4 | 
 5 | # Active venv
 6 | source .venv/bin/activate
 7 | pip install -r requirements.txt
 8 | pip install pyinstaller
 9 | 
10 | # Build
11 | pyinstaller krr.py
12 | cd dist
13 | zip -r "krr-macos-v1.1.0.zip" krr


--------------------------------------------------------------------------------
/build_release.sh:
--------------------------------------------------------------------------------
1 | docker buildx build \
2 |   --build-arg BUILDKIT_INLINE_CACHE=1 \
3 |   --platform linux/arm64,linux/amd64 \
4 |   --tag us-central1-docker.pkg.dev/genuine-flight-317411/devel/krr:${TAG} \
5 |   --push \
6 |   .


--------------------------------------------------------------------------------
/docker/README.md:
--------------------------------------------------------------------------------
 1 | # Dockerfiles for specific clouds
 2 | 
 3 | This directory will include Dockerfiles for various cloud providers.
 4 | 
 5 | ## AWS
 6 | 
 7 | For the usage of `krr` container we need the Dockerfile to have `awscli` installed on it.
 8 | The `aws.Dockerfile` is a modified `krr` dockerfile which includes:
 9 |   -  installation of curl & zip
10 |   -  installation of awscli
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/docker/aws.Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use the official Python 3.9 slim image as the base image
 2 | FROM python:3.9-slim as builder
 3 | 
 4 | # Set the working directory
 5 | WORKDIR /app
 6 | 
 7 | # Install system dependencies required for Poetry
 8 | RUN apt-get update && \
 9 |     dpkg --add-architecture arm64
10 | 
11 | COPY ./requirements.txt requirements.txt
12 | 
13 | # Install the project dependencies
14 | RUN pip install --no-cache-dir -r requirements.txt
15 | 
16 | # Install curl and unzip for awscli 
17 | RUN apt-get -y update; apt-get -y install curl; apt-get -y install unzip
18 | 
19 | # Download awscli and unzip it
20 | RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
21 |     unzip awscliv2.zip && \
22 | 	./aws/install
23 | 
24 | # Copy the rest of the application code
25 | COPY . .
26 | 
27 | # Run the application using 'poetry run krr simple'
28 | ENTRYPOINT ["python", "krr.py", "simple"]
29 | 


--------------------------------------------------------------------------------
/docs/google-cloud-managed-service-for-prometheus.md:
--------------------------------------------------------------------------------
 1 | ## Installation instructions for [Google Managed Service for Prometheus](https://cloud.google.com/stackdriver/docs/managed-prometheus)
 2 | 
 3 | The following instructions assume that you are running [Google Managed Service for Prometheus (GMP)](https://cloud.google.com/stackdriver/docs/managed-prometheus) in its [managed collection](https://cloud.google.com/stackdriver/docs/managed-prometheus/setup-managed) mode and that you have installed krr.
 4 | 
 5 | krr depends upon 2 [cAdvisor](https://github.com/google/cadvisor) [metrics](https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md#prometheus-container-metrics):
 6 | 
 7 | 1. `container_cpu_usage_seconds_total`
 8 | 1. `container_memory_working_set_bytes`
 9 | 
10 | 
11 | In order for krr to work with GMP, we need to ensure that cAdvisor is enabled and that the GMP Operator is configured to collect these 2 metrics. This can be combined into a single step that involves revising the GMP Operator configuration file `operatorconfig/config` in Namespace `gmp-public`
12 | 
13 | Google provides instructions for enabling [Kubelet/cAdvisor](https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/kubelet-cadvisor). This requires adding a `kubeletScraping` section to the configuration file.
14 | 
15 | We must also add a `filter` section to the configuration file. The `filter` matches the 2 metrics that krr uses.
16 | 
17 | `operatorconfig.krr.patch.yaml`:
18 | ```YAML
19 | collection:
20 |   filter:
21 |     matchOneOf:
22 |     - '{__name__="container_cpu_usage_seconds_total"}'
23 |     - '{__name__="container_memory_working_set_bytes"}'
24 |   kubeletScraping:
25 |     interval: 30s
26 | ```
27 | 
28 | There are various ways to make this Resource change to the cluster.
29 | 
30 | You can `kubectl edit` the file and manually add the changes:
31 | 
32 | ```bash
33 | KUBE_EDITOR="nano" \
34 | kubectl edit operatorconfig/config \
35 | --namespace=gmp-public
36 | ```
37 | 
38 | Or you can `kubectl patch` the file:
39 | 
40 | ```bash
41 | kubectl patch operatorconfig/config \
42 | --namespace=gmp-public \
43 | --type=merge \
44 | --patch-file=/path/to/operatorconfig.krr.patch.yaml
45 | ```
46 | 
47 | ### Test
48 | 
49 | There are multiple ways to confirm that GMP is collecting the metrics needed by krr.
50 | 
51 | The simplest is to access Google Cloud Console "Metric Diagnostics" and confirm that the "Metrics" section includes the 2 metrics with (recent) "Metric Data Ingested":
52 | 
53 | `https://console.cloud.google.com/monitoring/metrics-diagnostics?project={project}`
54 | 
55 | > **NOTE** Replace `{project}` with your Google Cloud Project ID.
56 | 
57 | Another way is to deploy the [Frontend UI for GMP](https://cloud.google.com/stackdriver/docs/managed-prometheus/query#promui-deploy) and use the UI to browse the metrics.
58 | 
59 | GMP implements the [Prometheus HTTP API](https://prometheus.io/docs/prometheus/latest/querying/api/) and, like krr, we can use this to query the metrics:
60 | 
61 | ```bash
62 | PROJECT="..." # Google Cloud Project ID
63 | MONITORING="https://monitoring.googleapis.com/v1"
64 | ENDPOINT="${MONITORING}/projects/${PROJECT}/location/global/prometheus"
65 | 
66 | TOKEN=$(gcloud auth print-access-token)
67 | 
68 | # Either
69 | QUERY="count({__name__=\"container_cpu_usage_seconds_total\"})"
70 | # Or
71 | QUERY="count({__name__=\"container_memory_working_set_bytes\"})"
72 | 
73 | curl \
74 | --silent \
75 | --get \
76 | --header "Authorization: Bearer ${TOKEN}" \
77 | --data-urlencode "query=${QUERY}" \
78 | ${ENDPOINT}/api/v1/query
79 | ```
80 | If you have [jq]() installed, you can filter the results to output only the latest value:
81 | ```bash
82 | | jq -r .data.result[0].value[1]
83 | ```
84 | 
85 | ### Run krr
86 | 
87 | krr leverages Google [Application Default Credentials (ADC)](https://cloud.google.com/docs/authentication/application-default-credentials). Ensure that ADC credentials are accessible (per Google's documentation) before running krr so that krr can authenticate to GMP.
88 | 
89 | ```bash
90 | PROJECT="..." # Google Cloud Project ID
91 | MONITORING="https://monitoring.googleapis.com/v1"
92 | ENDPOINT="${MONITORING}/projects/${PROJECT}/location/global/prometheus"
93 | 
94 | python krr.py simple \
95 | --prometheus-url=${ENDPOINT}
96 | ```
97 | 


--------------------------------------------------------------------------------
/docs/krr-in-cluster/krr-in-cluster-job.yaml:
--------------------------------------------------------------------------------
  1 | kind: ClusterRole
  2 | apiVersion: rbac.authorization.k8s.io/v1
  3 | metadata:
  4 |   name: krr-cluster-role
  5 |   namespace: default
  6 | rules:
  7 |   - apiGroups:
  8 |       - ""
  9 |     resources:
 10 |       - configmaps
 11 |       - daemonsets
 12 |       - deployments
 13 |       - namespaces
 14 |       - pods
 15 |       - replicasets
 16 |       - replicationcontrollers
 17 |       - services
 18 |     verbs:
 19 |       - get
 20 |       - list
 21 |       - watch
 22 | 
 23 |   - apiGroups:
 24 |       - ""
 25 |     resources:
 26 |       - nodes
 27 |     verbs:
 28 |       - get
 29 |       - list
 30 |       - watch
 31 | 
 32 |   - apiGroups:
 33 |       - apps
 34 |     resources:
 35 |       - daemonsets
 36 |       - deployments
 37 |       - deployments/scale
 38 |       - replicasets
 39 |       - replicasets/scale
 40 |       - statefulsets
 41 |     verbs:
 42 |       - get
 43 |       - list
 44 |       - watch
 45 | 
 46 |   - apiGroups:
 47 |       - extensions
 48 |     resources:
 49 |       - daemonsets
 50 |       - deployments
 51 |       - deployments/scale
 52 |       - ingresses
 53 |       - replicasets
 54 |       - replicasets/scale
 55 |       - replicationcontrollers/scale
 56 |     verbs:
 57 |       - get
 58 |       - list
 59 |       - watch
 60 |   - apiGroups:
 61 |       - batch
 62 |     resources:
 63 |       - cronjobs
 64 |       - jobs
 65 |     verbs:
 66 |       - get
 67 |       - list
 68 |       - watch
 69 |   - apiGroups:
 70 |       - "autoscaling"
 71 |     resources:
 72 |       - horizontalpodautoscalers
 73 |     verbs:
 74 |       - get
 75 |       - list
 76 |       - watch
 77 | 
 78 | ---
 79 | apiVersion: v1
 80 | kind: ServiceAccount
 81 | metadata:
 82 |   name: krr-service-account
 83 |   namespace: default
 84 | ---
 85 | apiVersion: rbac.authorization.k8s.io/v1
 86 | kind: ClusterRoleBinding
 87 | metadata:
 88 |   name: krr-cluster-role-binding
 89 | roleRef:
 90 |   apiGroup: rbac.authorization.k8s.io
 91 |   kind: ClusterRole
 92 |   name: krr-cluster-role
 93 | subjects:
 94 |   - kind: ServiceAccount
 95 |     name: krr-service-account
 96 |     namespace: default
 97 | 
 98 | ---
 99 | apiVersion: batch/v1
100 | kind: Job
101 | metadata:
102 |   name: krr
103 |   namespace: default
104 | spec:
105 |   template:
106 |     spec:
107 |       containers:
108 |         - command:
109 |             - /bin/sh
110 |             - -c
111 |             - "python krr.py simple   --max-workers 3 --width 2048 "
112 |           image: robustadev/krr:v1.17.0
113 |           imagePullPolicy: Always
114 |           name: krr
115 |           resources:
116 |             limits:
117 |               memory: 2Gi
118 |             requests:
119 |               memory: 1Gi
120 |       restartPolicy: Never
121 |       serviceAccount: krr-service-account
122 |       serviceAccountName: krr-service-account
123 | 


--------------------------------------------------------------------------------
/enforcer/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use the official Python 3.9 slim image as the base image
 2 | FROM python:3.12-slim
 3 | ENV LANG=C.UTF-8
 4 | ENV PYTHONDONTWRITEBYTECODE=1
 5 | ENV PYTHONUNBUFFERED=1
 6 | ENV PATH="/app/venv/bin:$PATH"
 7 | 
 8 | # We're installing here libexpat1, to upgrade the package to include a fix to 3 high CVEs. CVE-2024-45491,CVE-2024-45490,CVE-2024-45492
 9 | RUN apt-get update \
10 |     && apt-get install -y --no-install-recommends libexpat1 \
11 |     && rm -rf /var/lib/apt/lists/*
12 | 
13 | # Set the working directory
14 | WORKDIR /app/enforcer
15 | 
16 | COPY ./*.py .
17 | COPY ./dal/ dal/
18 | COPY ./resources/ resources/
19 | COPY ./requirements.txt requirements.txt
20 | 
21 | 
22 | RUN pip install --no-cache-dir --upgrade pip
23 | # Install the project dependencies
24 | RUN python -m ensurepip --upgrade
25 | RUN pip install --no-cache-dir -r requirements.txt
26 | 
27 | CMD ["python", "enforcer_main.py"]
28 | 


--------------------------------------------------------------------------------
/enforcer/README.md:
--------------------------------------------------------------------------------
  1 | # KRR Enforcer - Kubernetes Resource Recommendation Mutation Webhook
  2 | 
  3 | A mutating webhook server that automatically enforces [KRR (Kubernetes Resource Recommender)](https://github.com/robusta-dev/krr) recommendations by patching pod resource requests and limits in real-time.
  4 | 
  5 | ## Features
  6 | 
  7 | - **Automatic Resource Enforcement**: Applies KRR recommendations to pods during pod creation
  8 | - **Flexible Enforcement Modes**: Support for enforce/ignore modes per workload
  9 | - **REST API**: Query recommendations via HTTP endpoints
 10 | 
 11 | ## Enforcement Modes
 12 | 
 13 | Enforcement can be configured globally or on a per-workload basis.
 14 | 
 15 | ### Global Enforcement Mode
 16 | The global default mode is configured via the `KRR_MUTATION_MODE_DEFAULT` environment variable:
 17 | - `enforce` - Apply recommendations to all pods by default
 18 | - `ignore` - Skip enforcement for all pods by default
 19 | 
 20 | ### Per-Workload Mode
 21 | You can override the default mode for specific workloads using the annotation:
 22 | 
 23 | ```yaml
 24 | apiVersion: apps/v1
 25 | kind: Deployment
 26 | metadata:
 27 |   name: my-app
 28 | spec:
 29 |   template:
 30 |     metadata:
 31 |       annotations:
 32 |         admission.robusta.dev/krr-mutation-mode: enforce  # or "ignore"
 33 | ```
 34 | 
 35 | **Mode Priority**: Pod annotation > Global default
 36 | 
 37 | ## Webhook Failure Mode
 38 | 
 39 | The webhook uses `failurePolicy: Ignore` by default, meaning if the webhook fails, pods are created without resource optimization rather than being blocked.
 40 | 
 41 | 
 42 | ## Installation with Helm
 43 | 
 44 | ### Prerequisites
 45 | - Helm 3.x
 46 | - Prometheus Operator (optional, for metrics collection)
 47 | - Robusta UI account - used to store KRR scan results
 48 | 
 49 | ### Certificate
 50 | 
 51 | - Each helm install/upgrade, a new certificate is created and deployed for the admission webhook.
 52 | - <B>The certificate is set to expire after 1 year.</b>
 53 | - In order to avoid certificate expiration, you must upgrade the enforcer helm release, <b>at least once a year</b>.
 54 | 
 55 | ### Quick Start
 56 | 
 57 | 1. **Add the helm repository** (if available):
 58 | ```bash
 59 | helm repo add robusta https://robusta-charts.storage.googleapis.com && helm repo update
 60 | ```
 61 | 
 62 | 2. **Add cluster configuration**:
 63 | 
 64 | If the enforcer is installed in the same namespace as Robusta, it will automatically detect the Robusta account settings.
 65 | 
 66 | If your Robusta UI sink token, is pulled from a secret (as described [here](https://docs.robusta.dev/master/setup-robusta/configuration-secrets.html#pulling-values-from-kubernetes-secrets)), you should add the same environement variable to the `Enforcer` pod as well.
 67 | 
 68 | If the `Enforcer` is installed on a different namespace, you can provide your Robusta account credentials using env variables:
 69 | 
 70 | Add your robusta credentials and cluster name: (`enforcer-values.yaml`)
 71 | 
 72 | ```yaml
 73 | additionalEnvVars:
 74 |   - name: CLUSTER_NAME
 75 |     value: my-cluster-name  # should be the same as the robusta installation on this cluster
 76 |   - name: ROBUSTA_UI_TOKEN
 77 |     value: "MY ROBUSTA UI TOKEN"
 78 | #  - name: ROBUSTA_UI_TOKEN  # or pulled from a secret
 79 | #    valueFrom:
 80 | #      secretKeyRef:
 81 | #        name: robusta-secrets
 82 | #        key: robustaSinkToken
 83 | ```
 84 | 
 85 | 2. **Install with default settings**:
 86 | ```bash
 87 | helm install krr-enforcer robusta/krr-enforcer -f enforcer-values.yaml
 88 | ```
 89 | 
 90 | ### Helm values
 91 | 
 92 | | Parameter | Description                                                         | Default |
 93 | |-----------|---------------------------------------------------------------------|---------|
 94 | | `logLevel` | Log level (DEBUG, INFO, WARN, ERROR)                                | `INFO` |
 95 | | `certificate` | Base64-encoded custom CA certificate - for self signed certificates | `""` |
 96 | | `serviceMonitor.enabled` | Enable Prometheus ServiceMonitor                                    | `true` |
 97 | | `resources.requests.cpu` | CPU request for the enforcer pod                                    | `100m` |
 98 | | `resources.requests.memory` | Memory request for the enforcer pod                                                     | `256Mi` |
 99 | 
100 | 
101 | ## Running Locally
102 | 
103 | ### Prerequisites
104 | - Python 3.9+
105 | - Access to a Kubernetes cluster
106 | - KRR recommendations data from Robusta UI
107 | 
108 | ### Setup
109 | 
110 | 1. **Install dependencies**:
111 | ```bash
112 | pip install -r requirements.txt
113 | ```
114 | 
115 | 2. **Set environment variables**:
116 | ```bash
117 | export ENFORCER_SSL_KEY_FILE="path/to/tls.key"
118 | export ENFORCER_SSL_CERT_FILE="path/to/tls.crt"
119 | export LOG_LEVEL="DEBUG"
120 | export KRR_MUTATION_MODE_DEFAULT="enforce"
121 | ```
122 | 
123 | 3. **Generate TLS certificates**:
124 | ```bash
125 | # Generate private key
126 | openssl genrsa -out tls.key 2048
127 | 
128 | # Generate certificate signing request
129 | openssl req -new -key tls.key -out tls.csr \
130 |   -subj "/CN=krr-enforcer.krr-system.svc"
131 | 
132 | # Generate self-signed certificate
133 | openssl x509 -req -in tls.csr -signkey tls.key -out tls.crt -days 365
134 | ```
135 | 
136 | 4. **Run the server**:
137 | ```bash
138 | python enforcer_main.py
139 | ```
140 | 
141 | The server will start on `https://localhost:8443` with the following endpoints:
142 | 
143 | - `POST /mutate` - Webhook endpoint for Kubernetes admission control
144 | - `GET /health` - Health check endpoint
145 | - `GET /metrics` - Prometheus metrics
146 | - `GET /recommendations/{namespace}/{kind}/{name}` - Query recommendations
147 | 
148 | ### Local Development Tips
149 | 
150 | - Use `LOG_LEVEL=DEBUG` for detailed request/response logging
151 | - Test webhook locally using tools like `curl` or `httpie`
152 | - Monitor metrics at `https://localhost:8443/metrics`
153 | - Query recommendations: `GET https://localhost:8443/recommendations/default/Deployment/my-app`
154 | 
155 | ### Testing the Webhook
156 | 
157 | ```bash
158 | # Test health endpoint
159 | curl -k https://localhost:8443/health
160 | 
161 | # Test metrics endpoint
162 | curl -k https://localhost:8443/metrics
163 | 
164 | # Test recommendations endpoint
165 | curl -k https://localhost:8443/recommendations/default/Deployment/my-app
166 | ```
167 | 
168 | ## Metrics
169 | 
170 | The enforcer exposes Prometheus metrics at `/metrics`:
171 | 
172 | - `krr_pod_admission_mutations_total` - Total pod mutations (with `mutated` label)
173 | - `krr_replicaset_admissions_total` - Total ReplicaSet admissions (with `operation` label)
174 | - `krr_rs_owners_map_size` - Current size of the ReplicaSet owners map
175 | - `krr_admission_duration_seconds` - Duration of admission operations (with `kind` label)
176 | 
177 | ## API Endpoints
178 | 
179 | ### GET /recommendations/{namespace}/{kind}/{name}
180 | 
181 | Retrieve recommendations for a specific workload:
182 | 
183 | ```bash
184 | curl -k https://krr-enforcer.krr-system.svc.cluster.local/recommendations/default/Deployment/my-app
185 | ```
186 | 
187 | Response:
188 | ```json
189 | {
190 |   "namespace": "default",
191 |   "kind": "Deployment",
192 |   "name": "my-app",
193 |   "containers": {
194 |     "web": {
195 |       "cpu": {
196 |         "request": "100m",
197 |         "limit": "200m"
198 |       },
199 |       "memory": {
200 |         "request": "128Mi",
201 |         "limit": "256Mi"
202 |       }
203 |     }
204 |   }
205 | }
206 | ```
207 | 
208 | ## Troubleshooting
209 | 
210 | ### Common Issues
211 | 
212 | 1. **Certificate Errors**: Ensure TLS certificates are properly configured and valid
213 | 2. **Permission Denied**: Verify the ServiceAccount has proper RBAC permissions
214 | 3. **No Recommendations**: Check that KRR has generated recommendations and they're accessible
215 | 4. **Webhook Timeout**: Increase `timeoutSeconds` in MutatingWebhookConfiguration
216 | 
217 | ### Debug Mode
218 | 
219 | Enable debug logging to troubleshoot issues:
220 | 
221 | ```bash
222 | helm upgrade krr-enforcer ./helm/krr-enforcer --set logLevel=DEBUG
223 | ```
224 | 
225 | ### Logs
226 | 
227 | Check enforcer logs:
228 | ```bash
229 | kubectl logs -n krr-system deployment/krr-enforcer-krr-enforcer -f
230 | ```


--------------------------------------------------------------------------------
/enforcer/dal/robusta_config.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Dict
 2 | from pydantic import BaseModel
 3 | 
 4 | 
 5 | class RobustaConfig(BaseModel):
 6 |     sinks_config: List[Dict[str, Dict]]
 7 |     global_config: dict
 8 | 
 9 | class RobustaToken(BaseModel):
10 |     store_url: str
11 |     api_key: str
12 |     account_id: str
13 |     email: str
14 |     password: str


--------------------------------------------------------------------------------
/enforcer/env_vars.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | ROBUSTA_CONFIG_PATH = os.environ.get(
 4 |     "ROBUSTA_CONFIG_PATH", "/etc/robusta/config/active_playbooks.yaml"
 5 | )
 6 | ROBUSTA_ACCOUNT_ID = os.environ.get("ROBUSTA_ACCOUNT_ID", "")
 7 | STORE_URL = os.environ.get("STORE_URL", "")
 8 | STORE_API_KEY = os.environ.get("STORE_API_KEY", "")
 9 | STORE_EMAIL = os.environ.get("STORE_EMAIL", "")
10 | STORE_PASSWORD = os.environ.get("STORE_PASSWORD", "")
11 | 
12 | DISCOVERY_MAX_BATCHES = int(os.environ.get("DISCOVERY_MAX_BATCHES", 50))
13 | DISCOVERY_BATCH_SIZE = int(os.environ.get("DISCOVERY_BATCH_SIZE", 30000))
14 | 
15 | UPDATE_THRESHOLD = float(os.environ.get("UPDATE_THRESHOLD", 20.0))
16 | 
17 | SCAN_RELOAD_INTERVAL = int(os.environ.get("SCAN_RELOAD_INTERVAL", 3600))
18 | KRR_MUTATION_MODE_DEFAULT = os.environ.get("KRR_MUTATION_MODE_DEFAULT", "enforce")
19 | REPLICA_SET_CLEANUP_INTERVAL = int(os.environ.get("REPLICA_SET_CLEANUP_INTERVAL", 600))
20 | REPLICA_SET_DELETION_WAIT = int(os.environ.get("REPLICA_SET_DELETION_WAIT", 600))
21 | SCAN_AGE_HOURS_THRESHOLD = int(os.environ.get("SCAN_AGE_HOURS_THRESHOLD", 360)) # 15 days
22 | 
23 | ENFORCER_SSL_KEY_FILE = os.environ.get("ENFORCER_SSL_KEY_FILE", "")
24 | ENFORCER_SSL_CERT_FILE = os.environ.get("ENFORCER_SSL_CERT_FILE", "")


--------------------------------------------------------------------------------
/enforcer/metrics.py:
--------------------------------------------------------------------------------
 1 | from prometheus_client import Counter, Histogram, Gauge
 2 | 
 3 | # Prometheus metrics
 4 | pod_admission_mutations = Counter(
 5 |     'krr_pod_admission_mutations_total',
 6 |     'Total pod admission mutations',
 7 |     ['mutated', 'reason']  # labels: 'true' or 'false', reason for success/failure
 8 | )
 9 | 
10 | replicaset_admissions = Counter(
11 |     'krr_replicaset_admissions_total', 
12 |     'Total replicaset admissions',
13 |     ['operation']  # labels: CREATE, DELETE, etc.
14 | )
15 | 
16 | rs_owners_size = Gauge(
17 |     'krr_rs_owners_map_size',
18 |     'Current size of the rs_owners map'
19 | )
20 | 
21 | admission_duration = Histogram(
22 |     'krr_admission_duration_seconds',
23 |     'Duration of admission operations',
24 |     ['kind']  # labels: Pod, ReplicaSet
25 | )


--------------------------------------------------------------------------------
/enforcer/model.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Optional, Dict, Any, List
 3 | 
 4 | from pydantic import BaseModel
 5 | 
 6 | 
 7 | class PodOwner(BaseModel):
 8 |     kind: str
 9 |     name: str
10 |     namespace: str
11 | 
12 | class RsOwner(BaseModel):
13 |     rs_name: str
14 |     namespace: str
15 |     owner_name: str
16 |     owner_kind: str
17 |     deletion_ts: Optional[float] = None
18 | 
19 | class Resources(BaseModel):
20 |     request: float
21 |     limit: Optional[float]
22 | 
23 | 
24 | class ContainerRecommendation(BaseModel):
25 |     cpu: Optional[Resources] = None
26 |     memory: Optional[Resources] = None
27 | 
28 |     @staticmethod
29 |     def build(recommendation: Dict[str, Any]) -> Optional["ContainerRecommendation"]:
30 |         resource_recommendation = ContainerRecommendation()
31 |         content: List[Dict] = recommendation["content"]
32 |         for container_resource in content:
33 |             resource = container_resource["resource"]
34 |             if resource not in ["memory", "cpu"]:
35 |                 continue
36 | 
37 |             recommended: Dict[str, Any] = container_resource["recommended"]
38 |             request = recommended.get("request", 0.0)
39 |             limit = recommended.get("limit", None)
40 | 
41 |             if request == 0.0:
42 |                 logging.debug("skipping container recommendations without request, %s", recommendation)
43 |                 return None
44 | 
45 |             if request == "?" or limit == "?":
46 |                 logging.debug("skipping container recommendations with '?', %s", recommendation)
47 |                 return None
48 | 
49 |             resources = Resources(request=request, limit=limit)
50 |             if resource == "memory":
51 |                 resource_recommendation.memory = resources
52 |             elif resource == "cpu":
53 |                 resource_recommendation.cpu = resources
54 | 
55 |         return resource_recommendation
56 | 
57 | 
58 | class WorkloadRecommendation(BaseModel):
59 |     workload_key: str
60 |     container_recommendations: Dict[str, ContainerRecommendation] = {}
61 | 
62 |     def get(self, container: str) -> Optional[ContainerRecommendation]:
63 |         return self.container_recommendations.get(container, None)
64 | 
65 | 
66 |     def add(self, container: str, recommendation: ContainerRecommendation):
67 |         self.container_recommendations[container] = recommendation


--------------------------------------------------------------------------------
/enforcer/params_utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import re
 4 | from typing import Dict, Optional
 5 | 
 6 | from pydantic.types import SecretStr
 7 | 
 8 | def get_env_replacement(value: str) -> Optional[str]:
 9 |     env_values = re.findall(r"{{[ ]*env\.(.*)[ ]*}}", value)
10 |     if env_values:
11 |         env_var_value = os.environ.get(env_values[0].strip(), None)
12 |         if not env_var_value:
13 |             msg = f"ENV var replacement {env_values[0]} does not exist for param: {value}"
14 |             logging.error(msg)
15 |             raise Exception(msg)
16 |         return env_var_value
17 |     return None
18 | 
19 | 
20 | def replace_env_vars_values(values: Dict) -> Dict:
21 |     for key, value in values.items():
22 |         if isinstance(value, str):
23 |             env_var_value = get_env_replacement(value)
24 |             if env_var_value:
25 |                 values[key] = env_var_value
26 |         elif isinstance(value, SecretStr):
27 |             env_var_value = get_env_replacement(value.get_secret_value())
28 |             if env_var_value:
29 |                 values[key] = SecretStr(env_var_value)
30 |         elif isinstance(value, dict):
31 |             env_var_value = replace_env_vars_values(value)
32 |             if env_var_value:
33 |                 values[key] = env_var_value
34 | 
35 |     return values
36 | 


--------------------------------------------------------------------------------
/enforcer/requirements.txt:
--------------------------------------------------------------------------------
1 | fastapi==0.109.2
2 | uvicorn==0.27.1
3 | pydantic==2.6.1
4 | supabase==2.5
5 | PyYAML==6.0.1
6 | cachetools==5.3.3
7 | prometheus-client==0.20.0
8 | kubernetes==26.1.0
9 | 


--------------------------------------------------------------------------------
/enforcer/resources/kubernetes_resource_loader.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | from typing import List
 4 | 
 5 | from enforcer.env_vars import DISCOVERY_MAX_BATCHES, DISCOVERY_BATCH_SIZE
 6 | from kubernetes import client
 7 | from kubernetes.client import V1ReplicaSetList
 8 | from kubernetes import config
 9 | 
10 | from enforcer.model import RsOwner
11 | 
12 | if os.getenv("KUBERNETES_SERVICE_HOST"):
13 |     config.load_incluster_config()
14 | else:
15 |     config.load_kube_config()
16 | 
17 | 
18 | class KubernetesResourceLoader:
19 | 
20 |     @staticmethod
21 |     def load_replicasets() -> List[RsOwner]:
22 |         cluster_rs: List[RsOwner] = []
23 |         continue_ref = None
24 |         for batch_num in range(DISCOVERY_MAX_BATCHES):
25 |             replicasets: V1ReplicaSetList = client.AppsV1Api().list_replica_set_for_all_namespaces(
26 |                 limit=DISCOVERY_BATCH_SIZE, _continue=continue_ref
27 |             )
28 | 
29 |             for replicaset in replicasets.items:
30 |                 owner_references = replicaset.metadata.owner_references
31 |                 if owner_references:
32 |                     rs_owner = owner_references[0]
33 |                     if len(owner_references) > 1:
34 |                         logging.warning(f"ReplicasSet with multiple owner_references: {owner_references}")
35 |                         controllers = [owner for owner in owner_references if owner.get("controller", False)]
36 |                         if controllers:
37 |                             rs_owner = controllers[0]
38 | 
39 |                     cluster_rs.append(RsOwner(
40 |                         rs_name=replicaset.metadata.name,
41 |                         namespace=replicaset.metadata.namespace,
42 |                         owner_name=rs_owner.name,
43 |                         owner_kind=rs_owner.kind,
44 |                     ))
45 | 
46 |             continue_ref = replicasets.metadata._continue
47 |             if not continue_ref:
48 |                 break
49 |             
50 |             if batch_num == DISCOVERY_MAX_BATCHES - 1:
51 |                 replicas_limit = DISCOVERY_MAX_BATCHES * DISCOVERY_BATCH_SIZE
52 |                 logging.warning(f"Reached replicas loading limit: {replicas_limit}.")
53 |         
54 |         return cluster_rs
55 | 


--------------------------------------------------------------------------------
/enforcer/resources/owner_store.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import threading
  3 | import time
  4 | from typing import Dict, Any, Optional, List
  5 | 
  6 | from enforcer.env_vars import REPLICA_SET_CLEANUP_INTERVAL, REPLICA_SET_DELETION_WAIT
  7 | from enforcer.metrics import rs_owners_size
  8 | from enforcer.model import PodOwner, RsOwner
  9 | from enforcer.resources.kubernetes_resource_loader import KubernetesResourceLoader
 10 | 
 11 | 
 12 | class OwnerStore:
 13 | 
 14 |     def __init__(self):
 15 |         self.rs_owners: Dict[str, RsOwner] = {}
 16 |         self._rs_owners_lock = threading.Lock()
 17 |         self._owners_loaded = threading.Event()
 18 |         self._loading_in_progress = threading.Lock()
 19 |         self.cleanup_interval = REPLICA_SET_CLEANUP_INTERVAL
 20 |         self._stop_event = threading.Event()
 21 |         self._cleanup_thread = threading.Thread(target=self._periodic_cleanup, daemon=True)
 22 |         self._cleanup_thread.start()
 23 | 
 24 |     def _rs_key(self, rs_name: str, namespace: str) -> str:
 25 |         return f"{namespace}/{rs_name}"
 26 | 
 27 |     def finalize_owner_initialization(self):
 28 |         """Initialize rs_owners on-demand, thread-safe, only once."""
 29 |         if self._owners_loaded.is_set():
 30 |             return  # Already loaded
 31 |             
 32 |         # Try to acquire the loading lock without blocking
 33 |         if not self._loading_in_progress.acquire(blocking=False):
 34 |             # Another thread is loading, just return
 35 |             return
 36 |             
 37 |         try:
 38 |             if self._owners_loaded.is_set():
 39 |                 return
 40 |                 
 41 |             replica_sets_owners: List[RsOwner] = KubernetesResourceLoader.load_replicasets()
 42 |             loaded_owners: Dict[str, RsOwner] = {}
 43 |             for owner in replica_sets_owners:
 44 |                 loaded_owners[self._rs_key(owner.rs_name, owner.namespace)] = owner
 45 |             
 46 |             with self._rs_owners_lock:
 47 |                 self.rs_owners.update(loaded_owners)
 48 |                 rs_owners_size.set(len(self.rs_owners))
 49 |             
 50 |             self._owners_loaded.set()
 51 |             logging.info(f"Loaded {len(loaded_owners)} ReplicaSet owners")
 52 |             
 53 |         except Exception:
 54 |             logging.exception(f"Failed to load ReplicaSet owners")
 55 |         finally:
 56 |             self._loading_in_progress.release()
 57 | 
 58 |     @staticmethod
 59 |     def get_pod_name(metadata: Dict[str, Any]) -> str:
 60 |         # if the pod's name is randomized, the name is under generateName
 61 |         return metadata.get("name") or metadata.get("generateName")
 62 | 
 63 |     def get_pod_owner(self, pod: Dict[str, Any]) -> Optional[PodOwner]:
 64 |         metadata = pod.get("metadata", {})
 65 |         owner_references = metadata.get("ownerReferences", [])
 66 |         namespace: str = metadata.get("namespace")
 67 | 
 68 |         try:
 69 |             if not owner_references:  # pod has no owner, standalone pod. Return the pod
 70 |                 return PodOwner(
 71 |                     kind="Pod", namespace=namespace, name=self.get_pod_name(pod)
 72 |                 )
 73 | 
 74 |             # get only owners with controller == true
 75 |             controllers = [owner for owner in owner_references if owner.get("controller", False)]
 76 |             if controllers:
 77 |                 if len(controllers) > 1:
 78 |                     logging.warning(f"Multiple controllers found for {pod}")
 79 | 
 80 |                 controller = controllers[0]
 81 |                 controller_kind: str = controller.get("kind")
 82 |                 if controller_kind == "ReplicaSet":
 83 |                     with self._rs_owners_lock:
 84 |                         rs_owner = self.rs_owners.get(self._rs_key(controller.get("name"), namespace), None)
 85 |                         return PodOwner(
 86 |                             name=rs_owner.owner_name,
 87 |                             namespace=rs_owner.namespace,
 88 |                             kind=rs_owner.owner_kind,
 89 |                         ) if rs_owner else None
 90 |                 else:  # Pod owner is a k8s workload: Job, StatefulSet, DaemonSet
 91 |                     return PodOwner(kind=controller_kind, name=controller.get("name"), namespace=namespace)
 92 |         except Exception:
 93 |             logging.exception(f"Failed to get pod owner for {pod}")
 94 | 
 95 |         return None
 96 | 
 97 |     def handle_rs_admission(self, request: Dict[str, Any]):
 98 |         logging.debug(f"handle_rs_admission %s", request)
 99 |         operation = request.get("operation")
100 |         if operation == "DELETE":
101 |             old_object = request.get("oldObject") or {}  # delete has old object
102 |             metadata = old_object.get("metadata", {})
103 |             rs_name = metadata.get("name")
104 |             namespace = metadata.get("namespace")
105 |             if rs_name and namespace:
106 |                 with self._rs_owners_lock:
107 |                     rs_owner = self.rs_owners.get(self._rs_key(rs_name, namespace), None)
108 |                     if rs_owner:
109 |                         rs_owner.deletion_ts = time.time()
110 |         elif operation == "CREATE":
111 |             self._add_rs_owner(request)
112 | 
113 |     def _add_rs_owner(self, rs_create_request: Dict[str, Any]):
114 |         metadata = rs_create_request.get("object", {}).get("metadata", {})
115 |         owner_references = metadata.get("ownerReferences", [])
116 |         if len(owner_references):
117 |             rs_owner = RsOwner(
118 |                 rs_name=metadata.get("name"),
119 |                 namespace=metadata.get("namespace"),
120 |                 owner_name=owner_references[0].get("name"),
121 |                 owner_kind=owner_references[0].get("kind"),
122 |             )
123 |             with self._rs_owners_lock:
124 |                 self.rs_owners[self._rs_key(rs_owner.rs_name, rs_owner.namespace)] = rs_owner
125 |         else:
126 |             logging.warning(f"No owner references for {rs_create_request}")
127 | 
128 | 
129 |     def _cleanup_deleted_replica_sets(self):
130 |         current_time = time.time()
131 | 
132 |         with self._rs_owners_lock:
133 |             # Delete rs owners that were deleted more than REPLICA_SET_DELETION_WAIT seconds ago
134 |             keys_to_delete = [
135 |                 key for key, rs_owner in self.rs_owners.items()
136 |                 if rs_owner.deletion_ts is not None and (current_time - rs_owner.deletion_ts) >= REPLICA_SET_DELETION_WAIT
137 |             ]
138 |             
139 |             for key in keys_to_delete:
140 |                 del self.rs_owners[key]
141 | 
142 |     def _periodic_cleanup(self):
143 |         while not self._stop_event.wait(self.cleanup_interval):
144 |             try:
145 |                 self._cleanup_deleted_replica_sets()
146 |                 logging.debug("Deleted replicasets cleanup completed")
147 |             except Exception as e:
148 |                 logging.exception(f"Failed to cleanup deleted replicasets")
149 | 
150 |     def get_rs_owners_count(self) -> int:
151 |         with self._rs_owners_lock:
152 |             return len(self.rs_owners)
153 | 
154 |     def stop(self):
155 |         self._stop_event.set()
156 |         self._cleanup_thread.join()


--------------------------------------------------------------------------------
/enforcer/resources/recommendation_store.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import threading
 3 | from typing import Dict, Optional, Tuple
 4 | 
 5 | from enforcer.dal.supabase_dal import SupabaseDal
 6 | from enforcer.env_vars import SCAN_RELOAD_INTERVAL
 7 | from enforcer.model import WorkloadRecommendation, ContainerRecommendation
 8 | 
 9 | 
10 | class RecommendationStore:
11 | 
12 |     def __init__(self, dal: SupabaseDal):
13 |         self.dal = dal
14 |         self.recommendations: Dict[str, WorkloadRecommendation] = {}
15 |         self.scan_id: Optional[str] = None
16 |         self._recommendations_lock = threading.Lock()
17 |         self._reload_recommendations()
18 | 
19 |         self.reload_interval = SCAN_RELOAD_INTERVAL
20 |         self._stop_event = threading.Event()
21 |         self._reload_thread = threading.Thread(target=self._periodic_reload, daemon=True)
22 |         self._reload_thread.start()
23 | 
24 | 
25 |     def _load_recommendations(self, current_stored_scan: Optional[str]) -> Tuple[Optional[str], Optional[Dict[str, WorkloadRecommendation]]]:
26 |         latest_scan_id, latest_scan = self.dal.get_latest_krr_scan(current_stored_scan)
27 | 
28 |         if not latest_scan:
29 |             return None, None
30 | 
31 |         # group workload containers recommendations, into WorkloadRecommendation object
32 |         scan_recommendations: Dict[str, WorkloadRecommendation] = {}
33 |         for container_recommendation in latest_scan:
34 |             try:
35 |                 store_key = self._store_key(
36 |                         name=container_recommendation["name"],
37 |                         namespace=container_recommendation["namespace"],
38 |                         kind=container_recommendation["kind"],
39 |                     )
40 | 
41 |                 recommendation = ContainerRecommendation.build(container_recommendation)
42 |                 if recommendation:  # if a valid recommendation was created, connect it to the workload
43 |                     workload_recommendation: WorkloadRecommendation = scan_recommendations.get(store_key, None)
44 |                     if not workload_recommendation:
45 |                         workload_recommendation = WorkloadRecommendation(workload_key=store_key)
46 |                         scan_recommendations[store_key] = workload_recommendation
47 | 
48 |                     workload_recommendation.add(container_recommendation["container"], recommendation)
49 |             except Exception:
50 |                 logging.exception(f"Failed to load container recommendation: {container_recommendation}")
51 | 
52 |         return latest_scan_id, scan_recommendations
53 | 
54 |     def _store_key(self, name: str, namespace: str, kind: str) -> str:
55 |         return f"{namespace}/{name}/{kind}"
56 | 
57 |     def _reload_recommendations(self):
58 |         scan_id, new_recommendations = self._load_recommendations(self.scan_id)
59 |         if new_recommendations is not None:
60 |             with self._recommendations_lock:
61 |                 self.recommendations = new_recommendations
62 |                 self.scan_id = scan_id
63 |                 logging.info("Recommendations reloaded successfully")
64 |                 logging.debug("Loaded recommendations: %s", new_recommendations)
65 | 
66 |     def _periodic_reload(self):
67 |         while not self._stop_event.wait(self.reload_interval):
68 |             try:
69 |                 self._reload_recommendations()
70 |             except Exception as e:
71 |                 logging.error(f"Failed to reload recommendations: {e}")
72 | 
73 |     def stop(self):
74 |         self._stop_event.set()
75 |         self._reload_thread.join()
76 | 
77 |     def get_recommendations(self, name: str, namespace: str, kind: str) -> Optional[WorkloadRecommendation]:
78 |         with self._recommendations_lock:
79 |             return self.recommendations.get(self._store_key(name, namespace, kind))
80 | 
81 | 


--------------------------------------------------------------------------------
/enforcer/utils.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import os
 3 | 
 4 | import certifi
 5 | 
 6 | CUSTOM_CERTIFICATE_PATH = "/tmp/custom_ca.pem"
 7 | 
 8 | 
 9 | def append_custom_certificate(custom_ca: str) -> None:
10 |     with open(certifi.where(), "ab") as outfile:
11 |         outfile.write(base64.b64decode(custom_ca))
12 | 
13 |     os.environ["WEBSOCKET_CLIENT_CA_BUNDLE"] = certifi.where()
14 | 
15 | 
16 | def create_temporary_certificate(custom_ca: str) -> None:
17 |     with open(certifi.where(), "rb") as base_cert:
18 |         base_cert_content = base_cert.read()
19 | 
20 |     with open(CUSTOM_CERTIFICATE_PATH, "wb") as outfile:
21 |         outfile.write(base_cert_content)
22 |         outfile.write(base64.b64decode(custom_ca))
23 | 
24 |     os.environ["REQUESTS_CA_BUNDLE"] = CUSTOM_CERTIFICATE_PATH
25 |     os.environ["WEBSOCKET_CLIENT_CA_BUNDLE"] = CUSTOM_CERTIFICATE_PATH
26 |     certifi.where = lambda: CUSTOM_CERTIFICATE_PATH
27 | 
28 | 
29 | def add_custom_certificate(custom_ca: str) -> bool:
30 |     if not custom_ca:
31 |         return False
32 | 
33 |     # NOTE: Sometimes (Openshift) the certifi.where() is not writable, so we need to
34 |     #       use a temporary file in case of PermissionError.
35 |     try:
36 |         append_custom_certificate(custom_ca)
37 |     except PermissionError:
38 |         create_temporary_certificate(custom_ca)
39 | 
40 |     return True
41 | 


--------------------------------------------------------------------------------
/examples/custom_formatter.py:
--------------------------------------------------------------------------------
 1 | # This is an example on how to create your own custom formatter
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import robusta_krr
 6 | from robusta_krr.api import formatters
 7 | from robusta_krr.api.models import Result
 8 | 
 9 | 
10 | # This is a custom formatter
11 | # It will be available to the CLI as `my_formatter`
12 | # Rich console will be enabled in this case, so the output will be colored and formatted
13 | @formatters.register(rich_console=True)
14 | def my_formatter(result: Result) -> str:
15 |     # Return custom formatter
16 |     return "Custom formatter"
17 | 
18 | 
19 | # Running this file will register the formatter and make it available to the CLI
20 | # Run it as `python ./custom_formatter.py simple --formater my_formatter`
21 | if __name__ == "__main__":
22 |     robusta_krr.run()
23 | 


--------------------------------------------------------------------------------
/examples/custom_severity_calculator.py:
--------------------------------------------------------------------------------
 1 | # This is an example on how to create your own custom formatter
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from typing import Optional
 6 | 
 7 | import robusta_krr
 8 | from robusta_krr.api.models import ResourceType, Severity, register_severity_calculator
 9 | 
10 | 
11 | @register_severity_calculator(ResourceType.CPU)
12 | def percentage_severity_calculator(
13 |     current: Optional[float], recommended: Optional[float], resource_type: ResourceType
14 | ) -> Severity:
15 |     """
16 |     This is an example on how to create your own custom severity calculator
17 |     You can use this decorator to bind a severity calculator function to a resource type.
18 |     The function will be called with the current value, the recommended value and the resource type.
19 |     The function should return a Severity enum value.
20 | 
21 |     If you have the same calculation for multiple resource types, you can use the `bind_calculator` decorator multiple times.
22 |     Then, the function will be called for each resource type and you can use the resource type to distinguish between them.
23 | 
24 |     Keep in mind that you can not choose the strategy for the resource type using CLI - the last one created for the resource type will be used.
25 |     """
26 | 
27 |     if current is None and recommended is None:
28 |         return Severity.GOOD
29 |     if current is None or recommended is None:
30 |         return Severity.WARNING
31 | 
32 |     diff = abs(current - recommended) / current
33 |     if diff >= 0.5:
34 |         return Severity.CRITICAL
35 |     elif diff >= 0.25:
36 |         return Severity.WARNING
37 |     elif diff >= 0.1:
38 |         return Severity.OK
39 |     else:
40 |         return Severity.GOOD
41 | 
42 | 
43 | # Running this file will register the formatter and make it available to the CLI
44 | # Run it as `python ./custom_formatter.py simple --formater my_formatter`
45 | if __name__ == "__main__":
46 |     robusta_krr.run()
47 | 


--------------------------------------------------------------------------------
/examples/custom_strategy.py:
--------------------------------------------------------------------------------
 1 | # This is an example on how to create your own custom strategy
 2 | 
 3 | import pydantic as pd
 4 | 
 5 | import robusta_krr
 6 | from robusta_krr.api.models import K8sObjectData, MetricsPodData, ResourceRecommendation, ResourceType, RunResult
 7 | from robusta_krr.api.strategies import BaseStrategy, StrategySettings
 8 | from robusta_krr.core.integrations.prometheus.metrics import MaxMemoryLoader, PercentileCPULoader
 9 | 
10 | 
11 | # Providing description to the settings will make it available in the CLI help
12 | class CustomStrategySettings(StrategySettings):
13 |     param_1: float = pd.Field(99, gt=0, description="First example parameter")
14 |     param_2: float = pd.Field(105_000, gt=0, description="Second example parameter")
15 | 
16 | 
17 | class CustomStrategy(BaseStrategy[CustomStrategySettings]):
18 |     """
19 |     A custom strategy that uses the provided parameters for CPU and memory.
20 |     Made only in order to demonstrate how to create a custom strategy.
21 |     """
22 | 
23 |     display_name = "custom"  # The name of the strategy
24 |     rich_console = True  # Whether to use rich console for the CLI
25 |     metrics = [PercentileCPULoader(90), MaxMemoryLoader]  # The metrics to use for the strategy
26 | 
27 |     def run(self, history_data: MetricsPodData, object_data: K8sObjectData) -> RunResult:
28 |         return {
29 |             ResourceType.CPU: ResourceRecommendation(request=self.settings.param_1, limit=None),
30 |             ResourceType.Memory: ResourceRecommendation(request=self.settings.param_2, limit=self.settings.param_2),
31 |         }
32 | 
33 | 
34 | # Running this file will register the strategy and make it available to the CLI
35 | # Run it as `python ./custom_strategy.py my_strategy`
36 | if __name__ == "__main__":
37 |     robusta_krr.run()
38 | 


--------------------------------------------------------------------------------
/helm/krr-enforcer/.helmignore:
--------------------------------------------------------------------------------
 1 | # Patterns to ignore when building packages.
 2 | # This supports shell glob matching, relative path matching, and
 3 | # negation (prefixed with !). Only one pattern per line.
 4 | .DS_Store
 5 | # Common VCS dirs
 6 | .git/
 7 | .gitignore
 8 | .bzr/
 9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *.orig
18 | *~
19 | # Various IDEs
20 | .project
21 | .idea/
22 | *.tmproj
23 | .vscode/
24 | 


--------------------------------------------------------------------------------
/helm/krr-enforcer/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: krr-enforcer
3 | description: KRR enforcer - auto apply KRR recommendations
4 | type: application
5 | 
6 | version: 0.3.1
7 | appVersion: 0.3.1
8 | 


--------------------------------------------------------------------------------
/helm/krr-enforcer/templates/enforcer-cert-job.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: batch/v1
  2 | kind: Job
  3 | metadata:
  4 |   name: {{ .Release.Name }}-krr-enforcer-cert-job
  5 |   namespace: {{ .Release.Namespace }}
  6 |   labels:
  7 |     app.kubernetes.io/component: krr-enforcer-cert-job
  8 |   annotations:
  9 |     helm.sh/hook: pre-install,pre-upgrade
 10 |     helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
 11 |     helm.sh/hook-weight: "-5"
 12 | spec:
 13 |   backoffLimit: 3
 14 |   template:
 15 |     metadata:
 16 |       labels:
 17 |         app.kubernetes.io/name: krr-enforcer
 18 |         app.kubernetes.io/instance: {{ .Release.Name }}
 19 |         app.kubernetes.io/component: krr-enforcer-cert-job
 20 |     spec:
 21 |       serviceAccountName: {{ .Release.Name }}-krr-enforcer-cert-job
 22 |       restartPolicy: OnFailure
 23 |       volumes:
 24 |         - name: workdir
 25 |           emptyDir: {}
 26 |       containers:
 27 |         - name: cert-job
 28 |           image: "bitnami/kubectl:1.30"
 29 |           workingDir: /tmp/certs
 30 |           volumeMounts:
 31 |             - name: workdir
 32 |               mountPath: /tmp/certs
 33 |           command:
 34 |             - /bin/bash
 35 |             - -c
 36 |             - |
 37 |               set -e
 38 | 
 39 |               # Generate a CA key and certificate
 40 |               echo "Generating CA certificate..."
 41 |               openssl genrsa -out ca.key 2048
 42 |               openssl req -x509 -new -nodes -key ca.key -subj "/CN=robusta-krr-enforcer-ca" -days 365 -out ca.crt
 43 | 
 44 |               # Generate a server key and certificate signing request (CSR)
 45 |               echo "Generating server certificate..."
 46 |               SERVICE_NAME={{ .Release.Name }}-krr-enforcer
 47 |               NAMESPACE={{ .Release.Namespace }}
 48 |               DNS_NAME=${SERVICE_NAME}.${NAMESPACE}.svc
 49 |               
 50 |               openssl genrsa -out server.key 2048
 51 |               cat > server.conf <<EOF
 52 |               [req]
 53 |               req_extensions = v3_req
 54 |               distinguished_name = req_distinguished_name
 55 |               [req_distinguished_name]
 56 |               [v3_req]
 57 |               basicConstraints = CA:FALSE
 58 |               keyUsage = nonRepudiation, digitalSignature, keyEncipherment
 59 |               extendedKeyUsage = serverAuth
 60 |               subjectAltName = @alt_names
 61 |               [alt_names]
 62 |               DNS.1 = ${SERVICE_NAME}
 63 |               DNS.2 = ${SERVICE_NAME}.${NAMESPACE}
 64 |               DNS.3 = ${DNS_NAME}
 65 |               DNS.4 = ${DNS_NAME}.cluster.local
 66 |               EOF
 67 |               
 68 |               openssl req -new -key server.key -out server.csr -subj "/CN=${DNS_NAME}" -config server.conf
 69 | 
 70 |               # Sign the server certificate with our CA
 71 |               echo "Signing server certificate..."
 72 |               cat > server-ext.conf <<EOF
 73 |               [v3_req]
 74 |               basicConstraints = CA:FALSE
 75 |               keyUsage = nonRepudiation, digitalSignature, keyEncipherment
 76 |               extendedKeyUsage = serverAuth
 77 |               subjectAltName = @alt_names
 78 |               [alt_names]
 79 |               DNS.1 = ${SERVICE_NAME}
 80 |               DNS.2 = ${SERVICE_NAME}.${NAMESPACE}
 81 |               DNS.3 = ${DNS_NAME}
 82 |               DNS.4 = ${DNS_NAME}.cluster.local
 83 |               EOF
 84 |               
 85 |               openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out server.crt -days 365 -extensions v3_req -extfile server-ext.conf
 86 | 
 87 |               # Create a Secret with the certificates
 88 |               echo "Creating/updating secret..."
 89 |               kubectl create secret tls ${SERVICE_NAME}-certs \
 90 |                 --cert=server.crt --key=server.key \
 91 |                 --dry-run=client -o yaml | kubectl apply -f -
 92 | 
 93 |               # Update the secret to include the CA certificate
 94 |               kubectl get secret ${SERVICE_NAME}-certs -o json | \
 95 |                 jq --arg ca "$(cat ca.crt | base64 | tr -d '\n')" \
 96 |                   '.data["ca.crt"] = $ca' | \
 97 |                 kubectl apply -f -
 98 | 
 99 |               # Get the CA bundle for the webhook
100 |               CA_BUNDLE=$(cat ca.crt | base64 | tr -d '\n')
101 | 
102 |               # Set up the webhook configuration
103 |               echo "Creating/updating webhook configuration..."
104 |               cat <<EOF | kubectl apply -f -
105 |               apiVersion: admissionregistration.k8s.io/v1
106 |               kind: MutatingWebhookConfiguration
107 |               metadata:
108 |                 name: ${SERVICE_NAME}
109 |                 labels:
110 |                   app.kubernetes.io/name: {{ .Release.Name }}
111 |                   app.kubernetes.io/instance: {{ .Release.Name }}
112 |                   app.kubernetes.io/component: krr-enforcer
113 |               webhooks:
114 |               - name: enforcer.${SERVICE_NAME}.svc
115 |                 sideEffects: None
116 |                 admissionReviewVersions: ["v1"]
117 |                 clientConfig:
118 |                   service:
119 |                     name: ${SERVICE_NAME}
120 |                     namespace: ${NAMESPACE}
121 |                     path: "/mutate"
122 |                   caBundle: ${CA_BUNDLE}
123 |                 rules:
124 |                 - operations: ["CREATE"]
125 |                   apiGroups: [""]
126 |                   apiVersions: ["v1"]
127 |                   resources: ["pods"]
128 |                 - operations: ["CREATE", "DELETE"]
129 |                   apiGroups: ["apps"]
130 |                   apiVersions: ["v1"]
131 |                   resources: ["replicasets"]
132 |                 failurePolicy: Ignore
133 |                 timeoutSeconds: 30
134 |                 matchPolicy: Equivalent
135 |                 namespaceSelector:
136 |                   matchExpressions:
137 |                   - key: kubernetes.io/metadata.name
138 |                     operator: NotIn
139 |                     values: ["kube-system"]
140 |               EOF
141 |               
142 |               # Restart the enforcer deployment to pick up new certificates (only if it exists)
143 |               echo "Checking if enforcer deployment exists..."
144 |               if kubectl get deployment ${SERVICE_NAME} -n ${NAMESPACE} >/dev/null 2>&1; then
145 |                 echo "Restarting enforcer deployment..."
146 |                 kubectl rollout restart deployment ${SERVICE_NAME} -n ${NAMESPACE}
147 |               else
148 |                 echo "Deployment ${SERVICE_NAME} does not exist yet, skipping restart"
149 |               fi
150 |               
151 |               echo "Job completed successfully!"
152 | ---
153 | apiVersion: v1
154 | kind: ServiceAccount
155 | metadata:
156 |   name: {{ .Release.Name }}-krr-enforcer-cert-job
157 |   namespace: {{ .Release.Namespace }}
158 |   labels:
159 |     app.kubernetes.io/component: krr-enforcer-cert-job
160 |   annotations:
161 |     helm.sh/hook: pre-install,pre-upgrade
162 |     helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
163 |     helm.sh/hook-weight: "-6"
164 | ---
165 | apiVersion: rbac.authorization.k8s.io/v1
166 | kind: ClusterRole
167 | metadata:
168 |   name: {{ .Release.Name }}-krr-enforcer-cert-job
169 |   labels:
170 |     app.kubernetes.io/component: krr-enforcer-cert-job
171 |   annotations:
172 |     helm.sh/hook: pre-install,pre-upgrade
173 |     helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
174 |     helm.sh/hook-weight: "-6"
175 | rules:
176 |   - apiGroups: [""]
177 |     resources: ["secrets"]
178 |     verbs: ["create", "get", "update", "patch"]
179 |   - apiGroups: ["admissionregistration.k8s.io"]
180 |     resources: ["mutatingwebhookconfigurations"]
181 |     verbs: ["create", "get", "update", "patch"]
182 |   - apiGroups: ["apps"]
183 |     resources: ["deployments"]
184 |     verbs: ["get", "patch"]
185 | ---
186 | apiVersion: rbac.authorization.k8s.io/v1
187 | kind: ClusterRoleBinding
188 | metadata:
189 |   name: {{ .Release.Name }}-krr-enforcer-cert-job
190 |   labels:
191 |     app.kubernetes.io/component: krr-enforcer-cert-job
192 |   annotations:
193 |     helm.sh/hook: pre-install,pre-upgrade
194 |     helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
195 |     helm.sh/hook-weight: "-6"
196 | roleRef:
197 |   apiGroup: rbac.authorization.k8s.io
198 |   kind: ClusterRole
199 |   name: {{ .Release.Name }}-krr-enforcer-cert-job
200 | subjects:
201 |   - kind: ServiceAccount
202 |     name: {{ .Release.Name }}-krr-enforcer-cert-job
203 |     namespace: {{ .Release.Namespace }}
204 | 


--------------------------------------------------------------------------------
/helm/krr-enforcer/templates/enforcer-service-account.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ServiceAccount
 3 | metadata:
 4 |   name: {{ .Release.Name }}-krr-enforcer
 5 |   namespace: {{ .Release.Namespace }}
 6 |   labels:
 7 |     app.kubernetes.io/component: krr-enforcer
 8 |   {{- with .Values.serviceAccount.annotations }}
 9 |   annotations:
10 |     {{- toYaml . | nindent 4 }}
11 |   {{- end }}
12 | ---
13 | apiVersion: rbac.authorization.k8s.io/v1
14 | kind: ClusterRole
15 | metadata:
16 |   name: {{ .Release.Name }}-krr-enforcer
17 |   labels:
18 |     app.kubernetes.io/component: krr-enforcer
19 | rules:
20 |   - apiGroups: [""]
21 |     resources: ["pods"]
22 |     verbs: ["get", "list", "watch"]
23 |   - apiGroups: ["apps"]
24 |     resources: ["replicasets"]
25 |     verbs: ["get", "list", "watch"]
26 | ---
27 | apiVersion: rbac.authorization.k8s.io/v1
28 | kind: ClusterRoleBinding
29 | metadata:
30 |   name: {{ .Release.Name }}-krr-enforcer
31 |   labels:
32 |     app.kubernetes.io/component: krr-enforcer
33 | roleRef:
34 |   apiGroup: rbac.authorization.k8s.io
35 |   kind: ClusterRole
36 |   name: {{ .Release.Name }}-krr-enforcer
37 | subjects:
38 |   - kind: ServiceAccount
39 |     name: {{ .Release.Name }}-krr-enforcer
40 |     namespace: {{ .Release.Namespace }}
41 | 


--------------------------------------------------------------------------------
/helm/krr-enforcer/templates/enforcer.yaml:
--------------------------------------------------------------------------------
  1 | apiVersion: apps/v1
  2 | kind: Deployment
  3 | metadata:
  4 |   name: {{ .Release.Name }}-krr-enforcer
  5 |   namespace: {{ .Release.Namespace }}
  6 |   labels:
  7 |     app.kubernetes.io/component: krr-enforcer
  8 |   {{- with .Values.annotations }}
  9 |   annotations:
 10 |     {{- toYaml . | nindent 4 }}
 11 |   {{- end }}
 12 | spec:
 13 |   replicas: 1
 14 |   selector:
 15 |     matchLabels:
 16 |       app.kubernetes.io/name: krr-enforcer
 17 |       app.kubernetes.io/instance: {{ .Release.Name }}
 18 |       app.kubernetes.io/component: krr-enforcer
 19 |   template:
 20 |     metadata:
 21 |       labels:
 22 |         app.kubernetes.io/name: krr-enforcer
 23 |         app.kubernetes.io/instance: {{ .Release.Name }}
 24 |         app.kubernetes.io/component: krr-enforcer
 25 |       annotations:
 26 |         admission.robusta.dev/krr-mutation-mode: ignore
 27 |         {{- with .Values.annotations }}
 28 |         {{- toYaml . | nindent 8 }}
 29 |         {{- end }}
 30 |     spec:
 31 |       serviceAccountName: {{ .Release.Name }}-krr-enforcer
 32 |       {{- with .Values.imagePullSecrets }}
 33 |       imagePullSecrets:
 34 |         {{- toYaml . | nindent 8 }}
 35 |       {{- end }}
 36 |       {{- with .Values.nodeSelector }}
 37 |       nodeSelector:
 38 |         {{- toYaml . | nindent 8 }}
 39 |       {{- end }}
 40 |       {{- with .Values.tolerations }}
 41 |       tolerations:
 42 |         {{- toYaml . | nindent 8 }}
 43 |       {{- end }}
 44 |       {{- with .Values.priorityClassName }}
 45 |       priorityClassName: {{ . }}
 46 |       {{- end }}
 47 |       {{- with .Values.securityContext.pod }}
 48 |       securityContext:
 49 |         {{- toYaml . | nindent 8 }}
 50 |       {{- end }}
 51 |       volumes:
 52 |         - name: playbooks-config-secret
 53 |           secret:
 54 |             secretName: robusta-playbooks-config-secret
 55 |             optional: true
 56 |         - name: certs
 57 |           secret:
 58 |             secretName: {{ .Release.Name }}-krr-enforcer-certs
 59 |       containers:
 60 |         - name: enforcer
 61 |           {{- if .Values.fullImage }}
 62 |           image: "{{ .Values.fullImage }}"
 63 |           {{- else }}
 64 |           image: "{{ .Values.image.repository }}/{{ .Values.image.name }}:{{ .Values.image.tag }}"
 65 |           {{- end }}
 66 |           imagePullPolicy: {{ .Values.imagePullPolicy }}
 67 |           {{- with .Values.securityContext.container }}
 68 |           securityContext:
 69 |             {{- toYaml . | nindent 12 }}
 70 |           {{- end }}
 71 |           ports:
 72 |             - name: https
 73 |               containerPort: 8443
 74 |               protocol: TCP
 75 |           volumeMounts:
 76 |             - name: certs
 77 |               mountPath: /etc/webhook/certs
 78 |               readOnly: true
 79 |             - name: playbooks-config-secret
 80 |               mountPath: /etc/robusta/config
 81 |           livenessProbe:
 82 |             httpGet:
 83 |               path: /health
 84 |               port: https
 85 |               scheme: HTTPS
 86 |             initialDelaySeconds: 30
 87 |             periodSeconds: 10
 88 |           readinessProbe:
 89 |             httpGet:
 90 |               path: /health
 91 |               port: https
 92 |               scheme: HTTPS
 93 |             initialDelaySeconds: 5
 94 |             periodSeconds: 5
 95 |           resources:
 96 |             {{- if .Values.resources.requests }}
 97 |             requests:
 98 |               {{- if .Values.resources.requests.cpu }}
 99 |               cpu: {{ .Values.resources.requests.cpu }}
100 |               {{- end }}
101 |               {{- if .Values.resources.requests.memory }}
102 |               memory: {{ .Values.resources.requests.memory }}
103 |               {{- end }}
104 |             {{- end }}
105 |             {{- if .Values.resources.limits }}
106 |             limits:
107 |               {{- if .Values.resources.limits.cpu }}
108 |               cpu: {{ .Values.resources.limits.cpu }}
109 |               {{- end }}
110 |               {{- if .Values.resources.limits.memory }}
111 |               memory: {{ .Values.resources.limits.memory }}
112 |               {{- end }}
113 |             {{- end }}
114 |           env:
115 |             - name: ENFORCER_SSL_KEY_FILE
116 |               value: "/etc/webhook/certs/tls.key"
117 |             - name: ENFORCER_SSL_CERT_FILE
118 |               value: "/etc/webhook/certs/tls.crt"
119 |             - name: LOG_LEVEL
120 |               value: {{ .Values.logLevel | quote }}
121 |             {{- if .Values.certificate }}
122 |             - name: CERTIFICATE
123 |               value: {{ .Values.certificate | quote }}
124 |             {{- end }}
125 |             {{- if .Values.additionalEnvVars }}
126 |             {{- toYaml .Values.additionalEnvVars | nindent 12 }}
127 |             {{- end }}
128 | 
129 | ---
130 | apiVersion: v1
131 | kind: Service
132 | metadata:
133 |   name: {{ .Release.Name }}-krr-enforcer
134 |   namespace: {{ .Release.Namespace }}
135 |   labels:
136 |     app.kubernetes.io/name: krr-enforcer
137 |     app.kubernetes.io/instance: {{ .Release.Name }}
138 |     app.kubernetes.io/component: krr-enforcer
139 |   {{- with .Values.service.annotations }}
140 |   annotations:
141 |     {{- toYaml . | nindent 4 }}
142 |   {{- end }}
143 | spec:
144 |   type: ClusterIP
145 |   ports:
146 |     - port: 443
147 |       targetPort: https
148 |       protocol: TCP
149 |       name: https
150 |   selector:
151 |     app.kubernetes.io/name: krr-enforcer
152 |     app.kubernetes.io/instance: {{ .Release.Name }}
153 |     app.kubernetes.io/component: krr-enforcer
154 | 


--------------------------------------------------------------------------------
/helm/krr-enforcer/templates/service-monitor.yaml:
--------------------------------------------------------------------------------
 1 | {{- if .Values.serviceMonitor.enabled }}
 2 | apiVersion: monitoring.coreos.com/v1
 3 | kind: ServiceMonitor
 4 | metadata:
 5 |   name: {{ .Release.Name }}-krr-enforcer
 6 |   namespace: {{ .Release.Namespace }}
 7 |   labels:
 8 |     app.kubernetes.io/name: krr-enforcer
 9 |     app.kubernetes.io/instance: {{ .Release.Name }}
10 |     app.kubernetes.io/component: krr-enforcer
11 |     {{- with .Values.serviceMonitor.labels }}
12 |     {{- toYaml . | nindent 4 }}
13 |     {{- end }}
14 |   {{- with .Values.serviceMonitor.annotations }}
15 |   annotations:
16 |     {{- toYaml . | nindent 4 }}
17 |   {{- end }}
18 | spec:
19 |   selector:
20 |     matchLabels:
21 |       app.kubernetes.io/name: krr-enforcer
22 |       app.kubernetes.io/instance: {{ .Release.Name }}
23 |       app.kubernetes.io/component: krr-enforcer
24 |   endpoints:
25 |   - port: https
26 |     path: /metrics
27 |     scheme: https
28 |     tlsConfig:
29 |       insecureSkipVerify: true
30 |     {{- with .Values.serviceMonitor.interval }}
31 |     interval: {{ . }}
32 |     {{- end }}
33 |     {{- with .Values.serviceMonitor.scrapeTimeout }}
34 |     scrapeTimeout: {{ . }}
35 |     {{- end }}
36 | {{- end }}


--------------------------------------------------------------------------------
/helm/krr-enforcer/values.yaml:
--------------------------------------------------------------------------------
 1 | certificate: "" # base64 encoded
 2 | logLevel: INFO
 3 | 
 4 | # fullImage: ~  # full image path can be used to override image.repository/image.name:image.tag
 5 | 
 6 | image:
 7 |   repository: us-central1-docker.pkg.dev/genuine-flight-317411/devel
 8 |   name: krr-enforcer
 9 |   tag: 0.3.1
10 | imagePullPolicy: IfNotPresent
11 | resources:
12 |   requests:
13 |     cpu: 100m
14 |     memory: 256Mi
15 |   limits:
16 |     cpu: ~
17 | additionalEnvVars: []
18 | priorityClassName: ""
19 | tolerations: []
20 | annotations: {}
21 | nodeSelector: ~
22 | imagePullSecrets: []
23 | securityContext:
24 |   container:
25 |     allowPrivilegeEscalation: false
26 |     capabilities: {}
27 |     privileged: false
28 |     readOnlyRootFilesystem: false
29 |     runAsUser: 1000
30 |   pod: {}
31 | service:
32 |   annotations: {}
33 | serviceAccount:
34 |   annotations: {}
35 | serviceMonitor:
36 |   enabled: true
37 |   interval: 30s
38 |   scrapeTimeout: 10s
39 |   labels: {}
40 |   annotations: {}
41 | 


--------------------------------------------------------------------------------
/helm/upload_chart.sh:
--------------------------------------------------------------------------------
 1 | rm -rf ./tmp
 2 | mkdir ./tmp
 3 | cd ./tmp
 4 | helm package ../krr-enforcer
 5 | mkdir krr-enforcer
 6 | mv *.tgz ./krr-enforcer
 7 | curl https://robusta-charts.storage.googleapis.com/index.yaml > index.yaml
 8 | helm repo index --merge index.yaml --url https://robusta-charts.storage.googleapis.com ./krr-enforcer
 9 | gsutil rsync -r krr-enforcer gs://robusta-charts
10 | gsutil setmeta -h "Cache-Control:max-age=0" gs://robusta-charts/index.yaml
11 | cd ../
12 | rm -rf ./tmp
13 | 


--------------------------------------------------------------------------------
/images/krr-datasources.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/krr/f220c3156fd3d71ab3bfa7b75b9c31338cc21ce2/images/krr-datasources.png


--------------------------------------------------------------------------------
/images/krr-other-integrations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/krr/f220c3156fd3d71ab3bfa7b75b9c31338cc21ce2/images/krr-other-integrations.png


--------------------------------------------------------------------------------
/images/krr_slack_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/krr/f220c3156fd3d71ab3bfa7b75b9c31338cc21ce2/images/krr_slack_example.png


--------------------------------------------------------------------------------
/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/krr/f220c3156fd3d71ab3bfa7b75b9c31338cc21ce2/images/logo.png


--------------------------------------------------------------------------------
/images/screenshot.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/krr/f220c3156fd3d71ab3bfa7b75b9c31338cc21ce2/images/screenshot.jpeg


--------------------------------------------------------------------------------
/images/ui_recommendation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/krr/f220c3156fd3d71ab3bfa7b75b9c31338cc21ce2/images/ui_recommendation.png


--------------------------------------------------------------------------------
/images/ui_screenshot_new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/krr/f220c3156fd3d71ab3bfa7b75b9c31338cc21ce2/images/ui_screenshot_new.png


--------------------------------------------------------------------------------
/images/ui_video.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/krr/f220c3156fd3d71ab3bfa7b75b9c31338cc21ce2/images/ui_video.gif


--------------------------------------------------------------------------------
/intro.txt:
--------------------------------------------------------------------------------
 1 | [bold magenta]
 2 |  _____       _               _          _  _______  _____
 3 | |  __ \     | |             | |        | |/ /  __ \|  __ \
 4 | | |__) |___ | |__  _   _ ___| |_ __ _  | ' /| |__) | |__) |
 5 | |  _  // _ \| '_ \| | | / __| __/ _` | |  < |  _  /|  _  /
 6 | | | \ \ (_) | |_) | |_| \__ \ || (_| | | . \| | \ \| | \ \
 7 | |_|  \_\___/|_.__/ \__,_|___/\__\__,_| |_|\_\_|  \_\_|  \_\
 8 | 
 9 | 
10 | Thanks for using Robusta KRR. If you have any questions or feedback, please feel free to reach out to us at
11 | https://github.com/robusta-dev/krr/issues
12 | 
13 | Watch our latest video to optimize your workloads and save costs: https://www.youtube.com/watch?v=TYRA2QcDIuI
14 | 
15 | [/bold magenta]


--------------------------------------------------------------------------------
/krr.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from robusta_krr.common.ssl_utils import add_custom_certificate
 4 | 
 5 | ADDITIONAL_CERTIFICATE: str = os.environ.get("CERTIFICATE", "")
 6 | 
 7 | if add_custom_certificate(ADDITIONAL_CERTIFICATE):
 8 |     print("added custom certificate")
 9 | 
10 | # DO NOT ADD ANY CODE ABOVE THIS
11 | # ADDING IMPORTS BEFORE ADDING THE CUSTOM CERTS MIGHT INIT HTTP CLIENTS THAT DOESN'T RESPECT THE CUSTOM CERT
12 | 
13 | from robusta_krr import run
14 | 
15 | if __name__ == "__main__":
16 |     run()
17 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "robusta-krr"
 3 | version = "1.8.2-dev"
 4 | description = "Robusta's Resource Recommendation engine for Kubernetes"
 5 | authors = ["Pavel Zhukov <33721692+LeaveMyYard@users.noreply.github.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | packages = [{ include = "robusta_krr" }]
 9 | 
10 | [tool.black]
11 | line-length = 120
12 | target-version = ['py39']
13 | 
14 | [tool.isort]
15 | line_length = 120
16 | multi_line_output = 3
17 | include_trailing_comma = true
18 | 
19 | [tool.mypy]
20 | plugins = "numpy.typing.mypy_plugin,pydantic.mypy"
21 | 
22 | [tool.poetry.scripts]
23 | krr = "robusta_krr.main:run"
24 | 
25 | [tool.poetry.dependencies]
26 | python = ">=3.9,<=3.12.3"
27 | typer = { extras = ["all"], version = "^0.7.0" }
28 | pydantic = "^1.10.7"
29 | kubernetes = "^26.1.0"
30 | prometheus-api-client = "0.5.3"
31 | numpy = ">=1.26.4,<1.27.0"
32 | alive-progress = "^3.1.2"
33 | prometrix = "0.2.0"
34 | slack-sdk = "^3.21.3"
35 | pandas = "2.2.2"
36 | requests = "2.32.0"
37 | pyyaml = "6.0.1"
38 | typing-extensions = "4.6.0"
39 | idna = "3.7"
40 | urllib3 = "^1.26.20"
41 | setuptools = "^70.0.0"
42 | zipp = "^3.19.1"
43 | tenacity = "^9.0.0"
44 | 
45 | 
46 | 
47 | [tool.poetry.group.dev.dependencies]
48 | mypy = "^1.0.1"
49 | black = "^23.1.0"
50 | isort = "^5.12.0"
51 | flake8 = "^6.0.0"
52 | types-pyyaml = "^6.0.12.8"
53 | types-cachetools = "^5.3.0.4"
54 | types-requests = "^2.28.11.15"
55 | pyinstaller = "^5.9.0"
56 | pytest = "^7.2.2"
57 | 
58 | [build-system]
59 | requires = ["poetry-core"]
60 | build-backend = "poetry.core.masonry.api"
61 | 
62 | 
63 | [project]
64 | name = "robusta_krr"
65 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | about-time==4.2.1 ; python_version >= "3.9" and python_full_version < "3.13"
 2 | alive-progress==3.1.5 ; python_version >= "3.9" and python_full_version < "3.13"
 3 | boto3==1.34.62 ; python_version >= "3.9" and python_full_version < "3.13"
 4 | botocore==1.34.62 ; python_version >= "3.9" and python_full_version < "3.13"
 5 | cachetools==5.3.3 ; python_version >= "3.9" and python_full_version < "3.13"
 6 | certifi==2024.2.2 ; python_version >= "3.9" and python_full_version < "3.13"
 7 | charset-normalizer==3.3.2 ; python_version >= "3.9" and python_full_version < "3.13"
 8 | click==8.1.7 ; python_version >= "3.9" and python_full_version < "3.13"
 9 | colorama==0.4.6 ; python_version >= "3.9" and python_full_version < "3.13"
10 | commonmark==0.9.1 ; python_version >= "3.9" and python_full_version < "3.13"
11 | contourpy==1.2.0 ; python_version >= "3.9" and python_full_version < "3.13"
12 | cycler==0.12.1 ; python_version >= "3.9" and python_full_version < "3.13"
13 | dateparser==1.2.0 ; python_version >= "3.9" and python_full_version < "3.13"
14 | fonttools==4.49.0 ; python_version >= "3.9" and python_full_version < "3.13"
15 | google-auth==2.28.2 ; python_version >= "3.9" and python_full_version < "3.13"
16 | grapheme==0.6.0 ; python_version >= "3.9" and python_full_version < "3.13"
17 | httmock==1.4.0 ; python_version >= "3.9" and python_full_version < "3.13"
18 | idna==3.7 ; python_version >= "3.9" and python_full_version < "3.13"
19 | importlib-resources==6.3.0 ; python_version >= "3.9" and python_version < "3.10"
20 | jmespath==1.0.1 ; python_version >= "3.9" and python_full_version < "3.13"
21 | kiwisolver==1.4.5 ; python_version >= "3.9" and python_full_version < "3.13"
22 | kubernetes==26.1.0 ; python_version >= "3.9" and python_full_version < "3.13"
23 | matplotlib==3.8.3 ; python_version >= "3.9" and python_full_version < "3.13"
24 | numpy==1.26.4 ; python_version >= "3.9" and python_full_version < "3.13"
25 | oauthlib==3.2.2 ; python_version >= "3.9" and python_full_version < "3.13"
26 | packaging==24.0 ; python_version >= "3.9" and python_full_version < "3.13"
27 | pandas==2.2.2 ; python_version >= "3.9" and python_full_version < "3.13"
28 | pillow==10.3.0 ; python_version >= "3.9" and python_full_version < "3.13"
29 | prometheus-api-client==0.5.3 ; python_version >= "3.9" and python_full_version < "3.13"
30 | prometrix==0.1.17 ; python_version >= "3.9" and python_full_version < "3.13"
31 | pyasn1-modules==0.3.0 ; python_version >= "3.9" and python_full_version < "3.13"
32 | pyasn1==0.5.1 ; python_version >= "3.9" and python_full_version < "3.13"
33 | pydantic==1.10.15 ; python_version >= "3.9" and python_full_version < "3.13"
34 | pygments==2.17.2 ; python_version >= "3.9" and python_full_version < "3.13"
35 | pyparsing==3.1.2 ; python_version >= "3.9" and python_full_version < "3.13"
36 | python-dateutil==2.9.0.post0 ; python_version >= "3.9" and python_full_version < "3.13"
37 | pytz==2024.1 ; python_version >= "3.9" and python_full_version < "3.13"
38 | pyyaml==6.0.1 ; python_version >= "3.9" and python_full_version < "3.13"
39 | regex==2023.12.25 ; python_version >= "3.9" and python_full_version < "3.13"
40 | requests-oauthlib==1.4.0 ; python_version >= "3.9" and python_full_version < "3.13"
41 | requests==2.32.0 ; python_version >= "3.9" and python_full_version < "3.13"
42 | rich==12.6.0 ; python_version >= "3.9" and python_full_version < "3.13"
43 | rsa==4.9 ; python_version >= "3.9" and python_full_version < "3.13"
44 | s3transfer==0.10.0 ; python_version >= "3.9" and python_full_version < "3.13"
45 | setuptools==70.3.0 ; python_version >= "3.9" and python_full_version < "3.13"
46 | shellingham==1.5.4 ; python_version >= "3.9" and python_full_version < "3.13"
47 | six==1.16.0 ; python_version >= "3.9" and python_full_version < "3.13"
48 | slack-sdk==3.27.1 ; python_version >= "3.9" and python_full_version < "3.13"
49 | typer[all]==0.7.0 ; python_version >= "3.9" and python_full_version < "3.13"
50 | typing-extensions==4.6.0 ; python_version >= "3.9" and python_full_version < "3.13"
51 | tzdata==2024.1 ; python_version >= "3.9" and python_full_version < "3.13"
52 | tzlocal==5.2 ; python_version >= "3.9" and python_full_version < "3.13"
53 | urllib3==1.26.19 ; python_version >= "3.9" and python_full_version < "3.13"
54 | websocket-client==1.7.0 ; python_version >= "3.9" and python_full_version < "3.13"
55 | zipp==3.19.2 ; python_version >= "3.9" and python_version < "3.13"
56 | tenacity==9.0.0 ; python_version >= "3.9" and python_version < "3.13"


--------------------------------------------------------------------------------
/robusta_krr/__init__.py:
--------------------------------------------------------------------------------
1 | from .main import run
2 | 
3 | __version__ = "dev"
4 | __all__ = ["run", "__version__"]
5 | 


--------------------------------------------------------------------------------
/robusta_krr/api/formatters.py:
--------------------------------------------------------------------------------
1 | from robusta_krr.core.abstract.formatters import find, list_available, register
2 | 
3 | __all__ = ["register", "find", "list_available"]
4 | 


--------------------------------------------------------------------------------
/robusta_krr/api/models.py:
--------------------------------------------------------------------------------
 1 | from robusta_krr.core.abstract.strategies import MetricsPodData, PodsTimeData, ResourceRecommendation, RunResult
 2 | from robusta_krr.core.models.allocations import RecommendationValue, ResourceAllocations, ResourceType
 3 | from robusta_krr.core.models.objects import K8sObjectData, PodData
 4 | from robusta_krr.core.models.result import ResourceScan, Result
 5 | from robusta_krr.core.models.severity import Severity, register_severity_calculator
 6 | 
 7 | __all__ = [
 8 |     "ResourceType",
 9 |     "ResourceAllocations",
10 |     "RecommendationValue",
11 |     "K8sObjectData",
12 |     "PodData",
13 |     "Result",
14 |     "Severity",
15 |     "register_severity_calculator",
16 |     "ResourceScan",
17 |     "ResourceRecommendation",
18 |     "PodsTimeData",
19 |     "MetricsPodData",
20 |     "RunResult",
21 | ]
22 | 


--------------------------------------------------------------------------------
/robusta_krr/api/strategies.py:
--------------------------------------------------------------------------------
1 | from robusta_krr.core.abstract.strategies import BaseStrategy, StrategySettings
2 | 
3 | __all__ = ["BaseStrategy", "StrategySettings"]
4 | 


--------------------------------------------------------------------------------
/robusta_krr/common/ssl_utils.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import os
 3 | 
 4 | import certifi
 5 | 
 6 | CUSTOM_CERTIFICATE_PATH = "/tmp/custom_ca.pem"
 7 | 
 8 | 
 9 | def append_custom_certificate(custom_ca: str) -> None:
10 |     with open(certifi.where(), "ab") as outfile:
11 |         outfile.write(base64.b64decode(custom_ca))
12 | 
13 |     os.environ["WEBSOCKET_CLIENT_CA_BUNDLE"] = certifi.where()
14 | 
15 | 
16 | def create_temporary_certificate(custom_ca: str) -> None:
17 |     with open(certifi.where(), "rb") as base_cert:
18 |         base_cert_content = base_cert.read()
19 | 
20 |     with open(CUSTOM_CERTIFICATE_PATH, "wb") as outfile:
21 |         outfile.write(base_cert_content)
22 |         outfile.write(base64.b64decode(custom_ca))
23 | 
24 |     os.environ["REQUESTS_CA_BUNDLE"] = CUSTOM_CERTIFICATE_PATH
25 |     os.environ["WEBSOCKET_CLIENT_CA_BUNDLE"] = CUSTOM_CERTIFICATE_PATH
26 |     certifi.where = lambda: CUSTOM_CERTIFICATE_PATH
27 | 
28 | 
29 | def add_custom_certificate(custom_ca: str) -> bool:
30 |     if not custom_ca:
31 |         return False
32 | 
33 |     # NOTE: Sometimes (Openshift) the certifi.where() is not writable, so we need to
34 |     #       use a temporary file in case of PermissionError.
35 |     try:
36 |         append_custom_certificate(custom_ca)
37 |     except PermissionError:
38 |         create_temporary_certificate(custom_ca)
39 | 
40 |     return True
41 | 


--------------------------------------------------------------------------------
/robusta_krr/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robusta-dev/krr/f220c3156fd3d71ab3bfa7b75b9c31338cc21ce2/robusta_krr/core/__init__.py


--------------------------------------------------------------------------------
/robusta_krr/core/abstract/formatters.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import Any, Callable, Optional
 4 | 
 5 | from robusta_krr.core.models.result import Result
 6 | 
 7 | FormatterFunc = Callable[[Result], Any]
 8 | 
 9 | FORMATTERS_REGISTRY: dict[str, FormatterFunc] = {}
10 | 
11 | 
12 | # NOTE: Here asterisk is used to make the argument `rich_console` keyword-only
13 | #       This is done to avoid the following usage, where it is unclear what the boolean value is for:
14 | #           @register("My Formatter", True)
15 | #           def my_formatter(result: Result) -> str:
16 | #               return "My formatter"
17 | #
18 | #       Instead, the following usage is enforced:
19 | #           @register("My Formatter", rich_console=True)
20 | #           def my_formatter(result: Result) -> str:
21 | #               return "My formatter"
22 | 
23 | 
24 | def register(
25 |     display_name: Optional[str] = None, *, rich_console: bool = False
26 | ) -> Callable[[FormatterFunc], FormatterFunc]:
27 |     """
28 |     A decorator to register a formatter function.
29 | 
30 |     Args:
31 |         display_name (str, optional): The name to use for the formatter in the registry.
32 |         rich_console (bool): Whether or not the formatter is for a rich console. Defaults to False.
33 | 
34 |     Returns:
35 |         Callable[[FormatterFunc], FormatterFunc]: The decorator function.
36 |     """
37 | 
38 |     def decorator(func: FormatterFunc) -> FormatterFunc:
39 |         name = display_name or func.__name__
40 | 
41 |         FORMATTERS_REGISTRY[name] = func
42 | 
43 |         func.__display_name__ = name  # type: ignore
44 |         func.__rich_console__ = rich_console  # type: ignore
45 | 
46 |         return func
47 | 
48 |     return decorator
49 | 
50 | 
51 | def find(name: str) -> FormatterFunc:
52 |     """
53 |     Find a formatter by name in the registry.
54 | 
55 |     Args:
56 |         name (str): The name of the formatter.
57 | 
58 |     Returns:
59 |         FormatterFunc: The formatter function.
60 | 
61 |     Raises:
62 |         ValueError: If a formatter with the given name does not exist.
63 |     """
64 | 
65 |     try:
66 |         return FORMATTERS_REGISTRY[name]
67 |     except KeyError as e:
68 |         raise ValueError(f"Formatter '{name}' not found") from e
69 | 
70 | 
71 | def list_available() -> list[str]:
72 |     """
73 |     List available formatters in the registry.
74 | 
75 |     Returns:
76 |     list[str]: A list of the names of the available formatters.
77 |     """
78 | 
79 |     return list(FORMATTERS_REGISTRY)
80 | 
81 | 
82 | __all__ = ["register", "find"]
83 | 


--------------------------------------------------------------------------------
/robusta_krr/core/abstract/metrics.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | from abc import ABC, abstractmethod
 3 | 
 4 | from robusta_krr.core.abstract.strategies import PodsTimeData
 5 | from robusta_krr.core.models.objects import K8sObjectData
 6 | 
 7 | 
 8 | class BaseMetric(ABC):
 9 |     """
10 |     This abstraction is done for a future use.
11 |     Currently we only scrape metrics from Prometheus,
12 |     but in the future we may want to support other metric sources like Datadog, etc.
13 | 
14 |     TODO: When we want to support other metric sources, we should maybe rethink an interface here.
15 |     """
16 | 
17 |     @abstractmethod
18 |     async def load_data(
19 |         self, object: K8sObjectData, period: datetime.timedelta, step: datetime.timedelta
20 |     ) -> PodsTimeData:
21 |         ...
22 | 


--------------------------------------------------------------------------------
/robusta_krr/core/abstract/strategies.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import abc
  4 | import datetime
  5 | from textwrap import dedent
  6 | from typing import TYPE_CHECKING, Annotated, Generic, Literal, Optional, Sequence, TypeVar, get_args
  7 | 
  8 | import numpy as np
  9 | import pydantic as pd
 10 | from numpy.typing import NDArray
 11 | 
 12 | from robusta_krr.core.models.result import K8sObjectData, ResourceType
 13 | 
 14 | if TYPE_CHECKING:
 15 |     from robusta_krr.core.abstract.metrics import BaseMetric  # noqa: F401
 16 |     from robusta_krr.core.integrations.prometheus.metrics import PrometheusMetric
 17 | 
 18 | SelfRR = TypeVar("SelfRR", bound="ResourceRecommendation")
 19 | 
 20 | 
 21 | class ResourceRecommendation(pd.BaseModel):
 22 |     """A class to represent resource recommendation with optional request and limit values.
 23 | 
 24 |     The NaN values are used to represent undefined values: the strategy did not provide a recommendation for the resource.
 25 |     None values are used to represent the strategy says that value should not be set.
 26 |     """
 27 | 
 28 |     request: Optional[float]
 29 |     limit: Optional[float]
 30 |     info: Optional[str] = pd.Field(
 31 |         None, description="Additional information about the recommendation."
 32 |     )
 33 | 
 34 |     @classmethod
 35 |     def undefined(cls: type[SelfRR], info: Optional[str] = None) -> SelfRR:
 36 |         return cls(request=float("NaN"), limit=float("NaN"), info=info)
 37 | 
 38 | 
 39 | class StrategySettings(pd.BaseModel):
 40 |     """A class to represent strategy settings with configurable history and timeframe duration.
 41 | 
 42 |     It is used in CLI to generate the help, parameters and validate values.
 43 |     Description is used to generate the help.
 44 |     Other pydantic features can be used to validate the values.
 45 | 
 46 |     Nested classes are not supported here.
 47 |     """
 48 | 
 49 |     history_duration: float = pd.Field(
 50 |         24 * 7 * 2, ge=1, description="The duration of the history data to use (in hours)."
 51 |     )
 52 |     timeframe_duration: float = pd.Field(1.25, gt=0, description="The step for the history data (in minutes).")
 53 | 
 54 |     @property
 55 |     def history_timedelta(self) -> datetime.timedelta:
 56 |         return datetime.timedelta(hours=self.history_duration)
 57 | 
 58 |     @property
 59 |     def timeframe_timedelta(self) -> datetime.timedelta:
 60 |         return datetime.timedelta(minutes=self.timeframe_duration)
 61 | 
 62 |     def history_range_enough(self, history_range: tuple[datetime.timedelta, datetime.timedelta]) -> bool:
 63 |         """Override this function to check if the history range is enough for the strategy."""
 64 | 
 65 |         return True
 66 | 
 67 | 
 68 | # A type alias for a numpy array of shape (N, 2).
 69 | ArrayNx2 = Annotated[NDArray[np.float64], Literal["N", 2]]
 70 | 
 71 | 
 72 | PodsTimeData = dict[str, ArrayNx2]  # Mapping: pod -> [(time, value)]
 73 | MetricsPodData = dict[str, PodsTimeData]
 74 | 
 75 | RunResult = dict[ResourceType, ResourceRecommendation]
 76 | 
 77 | SelfBS = TypeVar("SelfBS", bound="BaseStrategy")
 78 | _StrategySettings = TypeVar("_StrategySettings", bound=StrategySettings)
 79 | 
 80 | 
 81 | # An abstract base class for strategy implementation.
 82 | # This class requires implementation of a 'run' method for calculating recommendation.
 83 | # Make a subclass if you want to create a concrete strategy.
 84 | class BaseStrategy(abc.ABC, Generic[_StrategySettings]):
 85 |     """An abstract base class for strategy implementation.
 86 | 
 87 |     This class is generic, and requires a type for the settings.
 88 |     This settings type will be used for the settings property of the strategy.
 89 |     It will be used to generate CLI parameters for this strategy, validated by pydantic.
 90 | 
 91 |     This class requires implementation of a 'run' method for calculating recommendation.
 92 |     Additionally, it provides a 'description' property for generating a description for the strategy.
 93 |     Description property uses the docstring of the strategy class and the settings of the strategy.
 94 | 
 95 |     The name of the strategy is the name of the class in lowercase, without the 'Strategy' suffix, if exists.
 96 |     If you want to change the name of the strategy, you can change the display_name class attribute.
 97 | 
 98 |     The strategy will automatically be registered in the strategy registry using __subclasses__ mechanism.
 99 |     """
100 | 
101 |     display_name: str
102 |     rich_console: bool = False
103 | 
104 |     # TODO: this should be BaseMetric, but currently we only support Prometheus
105 |     @property
106 |     @abc.abstractmethod
107 |     def metrics(self) -> Sequence[type[PrometheusMetric]]:
108 |         pass
109 | 
110 |     def __init__(self, settings: _StrategySettings):
111 |         self.settings = settings
112 | 
113 |     def __str__(self) -> str:
114 |         return self.display_name.title()
115 | 
116 |     @property
117 |     def description(self) -> Optional[str]:
118 |         """
119 |         Generate a description for the strategy.
120 |         You can use Rich's markdown syntax to format the description.
121 |         """
122 |         raise NotImplementedError()
123 | 
124 |     # Abstract method that needs to be implemented by subclass.
125 |     # This method is intended to calculate resource recommendation based on history data and kubernetes object data.
126 |     @abc.abstractmethod
127 |     def run(self, history_data: MetricsPodData, object_data: K8sObjectData) -> RunResult:
128 |         pass
129 | 
130 |     # This method is intended to return a strategy by its name.
131 |     @classmethod
132 |     def find(cls: type[SelfBS], name: str) -> type[SelfBS]:
133 |         strategies = cls.get_all()
134 |         if name.lower() in strategies:
135 |             return strategies[name.lower()]
136 | 
137 |         raise ValueError(f"Unknown strategy name: {name}. Available strategies: {', '.join(strategies)}")
138 | 
139 |     # This method is intended to return all the available strategies.
140 |     @classmethod
141 |     def get_all(cls: type[SelfBS]) -> dict[str, type[SelfBS]]:
142 |         from robusta_krr import strategies as _  # noqa: F401
143 | 
144 |         return {sub_cls.display_name.lower(): sub_cls for sub_cls in cls.__subclasses__()}
145 | 
146 |     # This method is intended to return the type of settings used in strategy.
147 |     @classmethod
148 |     def get_settings_type(cls) -> type[StrategySettings]:
149 |         return get_args(cls.__orig_bases__[0])[0]  # type: ignore
150 | 
151 | 
152 | AnyStrategy = BaseStrategy[StrategySettings]
153 | 
154 | 
155 | __all__ = [
156 |     "AnyStrategy",
157 |     "BaseStrategy",
158 |     "StrategySettings",
159 |     "PodsTimeData",
160 |     "MetricsPodData",
161 |     "K8sObjectData",
162 |     "ResourceType",
163 | ]
164 | 


--------------------------------------------------------------------------------
/robusta_krr/core/integrations/kubernetes/config_patch.py:
--------------------------------------------------------------------------------
 1 | # NOTE: This is a workaround for the issue described here:
 2 | # https://github.com/kubernetes-client/python/pull/1863
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | from typing import Optional
 7 | 
 8 | from kubernetes.client import configuration
 9 | from kubernetes.config import kube_config
10 | 
11 | 
12 | class KubeConfigLoader(kube_config.KubeConfigLoader):
13 |     def _load_cluster_info(self):
14 |         super()._load_cluster_info()
15 | 
16 |         if "proxy-url" in self._cluster:
17 |             self.proxy = self._cluster["proxy-url"]
18 | 
19 |     def _set_config(self, client_configuration: Configuration):
20 |         super()._set_config(client_configuration)
21 | 
22 |         key = "proxy"
23 |         if key in self.__dict__:
24 |             setattr(client_configuration, key, getattr(self, key))
25 | 
26 | 
27 | class Configuration(configuration.Configuration):
28 |     def __init__(
29 |         self,
30 |         proxy: Optional[str] = None,
31 |         **kwargs,
32 |     ):
33 |         super().__init__(**kwargs)
34 | 
35 |         self.proxy = proxy
36 | 
37 | 
38 | configuration.Configuration = Configuration
39 | kube_config.KubeConfigLoader = KubeConfigLoader
40 | 


--------------------------------------------------------------------------------
/robusta_krr/core/integrations/openshift/__init__.py:
--------------------------------------------------------------------------------
1 | from .token import TOKEN_LOCATION, load_token
2 | 
3 | __all__ = ["TOKEN_LOCATION", "load_token"]
4 | 


--------------------------------------------------------------------------------
/robusta_krr/core/integrations/openshift/token.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from robusta_krr.core.models.config import settings
 4 | 
 5 | # NOTE: This one should be mounted if openshift is enabled (done by Robusta Runner)
 6 | TOKEN_LOCATION = '/var/run/secrets/kubernetes.io/serviceaccount/token'
 7 | 
 8 | 
 9 | def load_token() -> Optional[str]:
10 |     if not settings.openshift:
11 |         return None
12 | 
13 |     try:
14 |         with open(TOKEN_LOCATION, 'r') as file:
15 |             return file.read()
16 |     except FileNotFoundError:
17 |         return None
18 | 


--------------------------------------------------------------------------------
/robusta_krr/core/integrations/prometheus/__init__.py:
--------------------------------------------------------------------------------
1 | from .loader import PrometheusMetricsLoader
2 | from .metrics_service.prometheus_metrics_service import PrometheusDiscovery, PrometheusNotFound
3 | from .prometheus_utils import ClusterNotSpecifiedException
4 | 


--------------------------------------------------------------------------------
/robusta_krr/core/integrations/prometheus/loader.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import datetime
  4 | import logging
  5 | from concurrent.futures import ThreadPoolExecutor
  6 | from typing import TYPE_CHECKING, Optional, Dict, Any
  7 | 
  8 | from kubernetes import config as k8s_config
  9 | from kubernetes.client.api_client import ApiClient
 10 | from kubernetes.client.exceptions import ApiException
 11 | from prometrix import MetricsNotFound, PrometheusNotFound
 12 | 
 13 | from robusta_krr.core.models.config import settings
 14 | from robusta_krr.core.models.objects import K8sObjectData, PodData
 15 | 
 16 | from .metrics_service.prometheus_metrics_service import PrometheusMetricsService
 17 | from .metrics_service.thanos_metrics_service import ThanosMetricsService
 18 | from .metrics_service.victoria_metrics_service import VictoriaMetricsService
 19 | from .metrics_service.mimir_metrics_service import MimirMetricsService
 20 | 
 21 | if TYPE_CHECKING:
 22 |     from robusta_krr.core.abstract.strategies import BaseStrategy, MetricsPodData
 23 | 
 24 | logger = logging.getLogger("krr")
 25 | 
 26 | class PrometheusMetricsLoader:
 27 |     def __init__(self, *, cluster: Optional[str] = None) -> None:
 28 |         """
 29 |         Initializes the Prometheus Loader.
 30 | 
 31 |         Args:
 32 |             cluster (Optional[str]): The name of the cluster. Defaults to None.
 33 |         """
 34 | 
 35 |         self.executor = ThreadPoolExecutor(settings.max_workers)
 36 |         self.api_client = settings.get_kube_client(context=cluster)
 37 |         loader = self.get_metrics_service(api_client=self.api_client, cluster=cluster)
 38 |         if loader is None:
 39 |             raise PrometheusNotFound(
 40 |                 f"Wasn't able to connect to any Prometheus service in {cluster or 'inner'} cluster\n"
 41 |                 "Try using port-forwarding and/or setting the url manually (using the -p flag.).\n"
 42 |                 "For more information, see 'Giving the Explicit Prometheus URL' at https://github.com/robusta-dev/krr?tab=readme-ov-file#usage"
 43 |             )
 44 | 
 45 |         self.loader = loader
 46 | 
 47 |         logger.info(f"{self.loader.name()} connected successfully for {cluster or 'default'} cluster")
 48 | 
 49 |     def get_metrics_service(
 50 |         self,
 51 |         api_client: Optional[ApiClient] = None,
 52 |         cluster: Optional[str] = None,
 53 |     ) -> Optional[PrometheusMetricsService]:
 54 |         if settings.prometheus_url is not None:
 55 |             logger.info("Prometheus URL is specified, will not auto-detect a metrics service")
 56 |             metrics_to_check = [PrometheusMetricsService]
 57 |         else:
 58 |             logger.info("No Prometheus URL is specified, trying to auto-detect a metrics service")
 59 |             metrics_to_check = [VictoriaMetricsService, ThanosMetricsService, MimirMetricsService, PrometheusMetricsService]
 60 | 
 61 |         for metric_service_class in metrics_to_check:
 62 |             service_name = metric_service_class.name()
 63 |             try:
 64 |                 loader = metric_service_class(api_client=api_client, cluster=cluster, executor=self.executor)
 65 |                 loader.check_connection()
 66 |             except MetricsNotFound as e:
 67 |                 logger.info(f"{service_name} not found: {e}")
 68 |             except ApiException as e:
 69 |                 logger.warning(
 70 |                     f"Unable to automatically discover a {service_name} in the cluster ({e}). "
 71 |                     "Try specifying how to connect to Prometheus via cli options"
 72 |                 )
 73 |             else:
 74 |                 logger.info(f"{service_name} found")
 75 |                 loader.validate_cluster_name()
 76 |                 return loader
 77 | 
 78 |         return None
 79 | 
 80 |     async def get_history_range(
 81 |         self, history_duration: datetime.timedelta
 82 |     ) -> Optional[tuple[datetime.datetime, datetime.datetime]]:
 83 |         return await self.loader.get_history_range(history_duration)
 84 | 
 85 |     async def load_pods(self, object: K8sObjectData, period: datetime.timedelta) -> list[PodData]:
 86 |         try:
 87 |             return await self.loader.load_pods(object, period)
 88 |         except Exception as e:
 89 |             logger.exception(f"Failed to load pods for {object}: {e}")
 90 |             return []
 91 | 
 92 |     async def get_cluster_summary(self) -> Dict[str, Any]:
 93 |         try:
 94 |             return await self.loader.get_cluster_summary()
 95 |         except Exception as e:
 96 |             logger.exception(f"Failed to get cluster summary: {e}")
 97 |             return {}
 98 | 
 99 |     async def gather_data(
100 |         self,
101 |         object: K8sObjectData,
102 |         strategy: BaseStrategy,
103 |         period: datetime.timedelta,
104 |         *,
105 |         step: datetime.timedelta = datetime.timedelta(minutes=30),
106 |     ) -> MetricsPodData:
107 |         """
108 |         Gathers data from Prometheus for a specified object and resource.
109 | 
110 |         Args:
111 |             object (K8sObjectData): The Kubernetes object.
112 |             resource (ResourceType): The resource type.
113 |             period (datetime.timedelta): The time period for which to gather data.
114 |             step (datetime.timedelta, optional): The time step between data points. Defaults to 30 minutes.
115 | 
116 |         Returns:
117 |             ResourceHistoryData: The gathered resource history data.
118 |         """
119 | 
120 |         return {
121 |             MetricLoader.__name__: await self.loader.gather_data(object, MetricLoader, period, step)
122 |             for MetricLoader in strategy.metrics
123 |         }
124 | 


--------------------------------------------------------------------------------
/robusta_krr/core/integrations/prometheus/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import PrometheusMetric
2 | from .cpu import CPUAmountLoader, CPULoader, PercentileCPULoader
3 | from .memory import MaxMemoryLoader, MemoryAmountLoader, MemoryLoader, MaxOOMKilledMemoryLoader
4 | 


--------------------------------------------------------------------------------
/robusta_krr/core/integrations/prometheus/metrics/cpu.py:
--------------------------------------------------------------------------------
 1 | from robusta_krr.core.models.objects import K8sObjectData
 2 | 
 3 | from .base import PrometheusMetric, QueryType
 4 | 
 5 | 
 6 | class CPULoader(PrometheusMetric):
 7 |     """
 8 |     A metric loader for loading CPU usage metrics.
 9 |     """
10 | 
11 |     query_type: QueryType = QueryType.QueryRange
12 | 
13 |     def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
14 |         pods_selector = "|".join(pod.name for pod in object.pods)
15 |         cluster_label = self.get_prometheus_cluster_label()
16 |         return f"""
17 |             max(
18 |                 rate(
19 |                     container_cpu_usage_seconds_total{{
20 |                         namespace="{object.namespace}",
21 |                         pod=~"{pods_selector}",
22 |                         container="{object.container}"
23 |                         {cluster_label}
24 |                     }}[{step}]
25 |                 )
26 |             ) by (container, pod, job)
27 |         """
28 | 
29 | 
30 | def PercentileCPULoader(percentile: float) -> type[PrometheusMetric]:
31 |     """
32 |     A factory for creating percentile CPU usage metric loaders.
33 |     """
34 | 
35 |     if not 0 <= percentile <= 100:
36 |         raise ValueError("percentile must be between 0 and 100")
37 | 
38 |     class PercentileCPULoader(PrometheusMetric):
39 |         def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
40 |             pods_selector = "|".join(pod.name for pod in object.pods)
41 |             cluster_label = self.get_prometheus_cluster_label()
42 |             return f"""
43 |                 quantile_over_time(
44 |                     {round(percentile / 100, 2)},
45 |                     max(
46 |                         rate(
47 |                             container_cpu_usage_seconds_total{{
48 |                                 namespace="{object.namespace}",
49 |                                 pod=~"{pods_selector}",
50 |                                 container="{object.container}"
51 |                                 {cluster_label}
52 |                             }}[{step}]
53 |                         )
54 |                     ) by (container, pod, job)
55 |                     [{duration}:{step}]
56 |                 )
57 |             """
58 | 
59 |     return PercentileCPULoader
60 | 
61 | 
62 | class CPUAmountLoader(PrometheusMetric):
63 |     """
64 |     A metric loader for loading CPU points count.
65 |     """
66 | 
67 |     def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
68 |         pods_selector = "|".join(pod.name for pod in object.pods)
69 |         cluster_label = self.get_prometheus_cluster_label()
70 |         return f"""
71 |             count_over_time(
72 |                 max(
73 |                     container_cpu_usage_seconds_total{{
74 |                         namespace="{object.namespace}",
75 |                         pod=~"{pods_selector}",
76 |                         container="{object.container}"
77 |                         {cluster_label}
78 |                     }}
79 |                 ) by (container, pod, job)
80 |                 [{duration}:{step}]
81 |             )
82 |         """
83 | 


--------------------------------------------------------------------------------
/robusta_krr/core/integrations/prometheus/metrics/memory.py:
--------------------------------------------------------------------------------
  1 | from robusta_krr.core.models.objects import K8sObjectData
  2 | 
  3 | from .base import PrometheusMetric, QueryType
  4 | 
  5 | 
  6 | class MemoryLoader(PrometheusMetric):
  7 |     """
  8 |     A metric loader for loading memory usage metrics.
  9 |     """
 10 | 
 11 |     query_type: QueryType = QueryType.QueryRange
 12 | 
 13 |     def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
 14 |         pods_selector = "|".join(pod.name for pod in object.pods)
 15 |         cluster_label = self.get_prometheus_cluster_label()
 16 |         return f"""
 17 |             max(
 18 |                 container_memory_working_set_bytes{{
 19 |                     namespace="{object.namespace}",
 20 |                     pod=~"{pods_selector}",
 21 |                     container="{object.container}"
 22 |                     {cluster_label}
 23 |                 }}
 24 |             ) by (container, pod, job)
 25 |         """
 26 | 
 27 | 
 28 | class MaxMemoryLoader(PrometheusMetric):
 29 |     """
 30 |     A metric loader for loading max memory usage metrics.
 31 |     """
 32 | 
 33 |     def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
 34 |         pods_selector = "|".join(pod.name for pod in object.pods)
 35 |         cluster_label = self.get_prometheus_cluster_label()
 36 |         return f"""
 37 |             max_over_time(
 38 |                 max(
 39 |                     container_memory_working_set_bytes{{
 40 |                         namespace="{object.namespace}",
 41 |                         pod=~"{pods_selector}",
 42 |                         container="{object.container}"
 43 |                         {cluster_label}
 44 |                     }}
 45 |                 ) by (container, pod, job)
 46 |                 [{duration}:{step}]
 47 |             )
 48 |         """
 49 | 
 50 | 
 51 | class MemoryAmountLoader(PrometheusMetric):
 52 |     """
 53 |     A metric loader for loading memory points count.
 54 |     """
 55 | 
 56 |     def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
 57 |         pods_selector = "|".join(pod.name for pod in object.pods)
 58 |         cluster_label = self.get_prometheus_cluster_label()
 59 |         return f"""
 60 |             count_over_time(
 61 |                 max(
 62 |                     container_memory_working_set_bytes{{
 63 |                         namespace="{object.namespace}",
 64 |                         pod=~"{pods_selector}",
 65 |                         container="{object.container}"
 66 |                         {cluster_label}
 67 |                     }}
 68 |                 ) by (container, pod, job)
 69 |                 [{duration}:{step}]
 70 |             )
 71 |         """
 72 | 
 73 | # TODO: Need to battle test if this one is correct.
 74 | class MaxOOMKilledMemoryLoader(PrometheusMetric):
 75 |     """
 76 |     A metric loader for loading the maximum memory limits that were surpassed by the OOMKilled event.
 77 |     """
 78 | 
 79 |     warning_on_no_data = False
 80 | 
 81 |     def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
 82 |         pods_selector = "|".join(pod.name for pod in object.pods)
 83 |         cluster_label = self.get_prometheus_cluster_label()
 84 |         return f"""
 85 |             max_over_time(
 86 |                 max(
 87 |                     max(
 88 |                         kube_pod_container_resource_limits{{
 89 |                             resource="memory",
 90 |                             namespace="{object.namespace}",
 91 |                             pod=~"{pods_selector}",
 92 |                             container="{object.container}"
 93 |                             {cluster_label}
 94 |                         }} 
 95 |                     ) by (pod, container, job)
 96 |                     * on(pod, container, job) group_left(reason)
 97 |                     max(
 98 |                         kube_pod_container_status_last_terminated_reason{{
 99 |                             reason="OOMKilled",
100 |                             namespace="{object.namespace}",
101 |                             pod=~"{pods_selector}",
102 |                             container="{object.container}"
103 |                             {cluster_label}
104 |                         }}
105 |                     ) by (pod, container, job, reason)
106 |                 ) by (container, pod, job)
107 |                 [{duration}:{step}]
108 |             )
109 |         """
110 | 


--------------------------------------------------------------------------------
/robusta_krr/core/integrations/prometheus/metrics_service/base_metric_service.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | import datetime
 3 | from concurrent.futures import ThreadPoolExecutor
 4 | from typing import List, Optional, Dict, Any
 5 | 
 6 | from kubernetes.client.api_client import ApiClient
 7 | 
 8 | from robusta_krr.core.abstract.strategies import PodsTimeData
 9 | from robusta_krr.core.models.config import settings
10 | from robusta_krr.core.models.objects import K8sObjectData
11 | 
12 | from ..metrics import PrometheusMetric
13 | 
14 | 
15 | class MetricsService(abc.ABC):
16 |     def __init__(
17 |         self,
18 |         api_client: Optional[ApiClient] = None,
19 |         cluster: Optional[str] = None,
20 |         executor: Optional[ThreadPoolExecutor] = None,
21 |     ) -> None:
22 |         self.api_client = api_client
23 |         self.cluster = cluster or "default"
24 |         self.executor = executor
25 | 
26 |     @abc.abstractmethod
27 |     def check_connection(self):
28 |         ...
29 | 
30 |     @classmethod
31 |     def name(cls) -> str:
32 |         classname = cls.__name__
33 |         return classname.replace("MetricsService", "") if classname != MetricsService.__name__ else classname
34 | 
35 |     @abc.abstractmethod
36 |     def get_cluster_names(self) -> Optional[List[str]]:
37 |         ...
38 | 
39 |     @abc.abstractmethod
40 |     async def get_cluster_summary(self) -> Dict[str, Any]:
41 |         ...
42 | 
43 |     @abc.abstractmethod
44 |     async def gather_data(
45 |         self,
46 |         object: K8sObjectData,
47 |         LoaderClass: type[PrometheusMetric],
48 |         period: datetime.timedelta,
49 |         step: datetime.timedelta = datetime.timedelta(minutes=30),
50 |     ) -> PodsTimeData:
51 |         ...
52 | 
53 |     def get_prometheus_cluster_label(self) -> str:
54 |         """
55 |         Generates the cluster label for querying a centralized Prometheus
56 | 
57 |         Returns:
58 |         str: a promql safe label string for querying the cluster.
59 |         """
60 |         if settings.prometheus_cluster_label is None:
61 |             return ""
62 |         return f', {settings.prometheus_label}="{settings.prometheus_cluster_label}"'
63 | 


--------------------------------------------------------------------------------
/robusta_krr/core/integrations/prometheus/metrics_service/mimir_metrics_service.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from kubernetes.client import ApiClient
 4 | from prometrix import MetricsNotFound
 5 | 
 6 | from robusta_krr.utils.service_discovery import MetricsServiceDiscovery
 7 | 
 8 | from .prometheus_metrics_service import PrometheusMetricsService
 9 | 
10 | class MimirMetricsDiscovery(MetricsServiceDiscovery):
11 |     def find_metrics_url(self, *, api_client: Optional[ApiClient] = None) -> Optional[str]:
12 |         """
13 |         Finds the Mimir Metrics URL using selectors.
14 |         Args:
15 |             api_client (Optional[ApiClient]): A Kubernetes API client. Defaults to None.
16 |         Returns:
17 |             Optional[str]: The discovered Mimir Metrics URL, or None if not found.
18 |         """
19 |         return super().find_url(
20 |             selectors=[
21 |                 "app.kubernetes.io/name=mimir,app.kubernetes.io/component=query-frontend",
22 |             ]
23 |         )
24 | 
25 | 
26 | class MimirMetricsService(PrometheusMetricsService):
27 |     """
28 |     A class for fetching metrics from Mimir Metrics.
29 |     """
30 | 
31 |     service_discovery = MimirMetricsDiscovery
32 |     url_postfix = "/prometheus"
33 |     additional_headers = {"X-Scope-OrgID": "anonymous"}
34 | 
35 |     def check_connection(self):
36 |         """
37 |         Checks the connection to Prometheus.
38 |         Raises:
39 |             MimirMetricsNotFound: If the connection to Mimir Metrics cannot be established.
40 |         """
41 |         try:
42 |             super().check_connection()
43 |         except MetricsNotFound as e:
44 |             # This is to clarify which metrics service had the issue and not say its a prometheus issue
45 |             raise MetricsNotFound(
46 |                 f"Couldn't connect to Mimir Metrics found under {self.prometheus.url}\nCaused by {e.__class__.__name__}: {e})"
47 |             ) from e
48 | 


--------------------------------------------------------------------------------
/robusta_krr/core/integrations/prometheus/metrics_service/thanos_metrics_service.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from kubernetes.client import ApiClient
 4 | from prometrix import MetricsNotFound, ThanosMetricsNotFound
 5 | 
 6 | from robusta_krr.utils.service_discovery import MetricsServiceDiscovery
 7 | 
 8 | from .prometheus_metrics_service import PrometheusMetricsService
 9 | 
10 | 
11 | class ThanosMetricsDiscovery(MetricsServiceDiscovery):
12 |     def find_metrics_url(self, *, api_client: Optional[ApiClient] = None) -> Optional[str]:
13 |         """
14 |         Finds the Thanos URL using selectors.
15 |         Args:
16 |             api_client (Optional[ApiClient]): A Kubernetes API client. Defaults to None.
17 |         Returns:
18 |             Optional[str]: The discovered Thanos URL, or None if not found.
19 |         """
20 | 
21 |         return super().find_url(
22 |             selectors=[
23 |                 "app.kubernetes.io/component=query,app.kubernetes.io/name=thanos",
24 |                 "app.kubernetes.io/name=thanos-query",
25 |                 "app=thanos-query",
26 |                 "app=thanos-querier",
27 |             ]
28 |         )
29 | 
30 | 
31 | class ThanosMetricsService(PrometheusMetricsService):
32 |     """
33 |     A class for fetching metrics from Thanos.
34 |     """
35 | 
36 |     service_discovery = ThanosMetricsDiscovery
37 | 
38 |     def check_connection(self):
39 |         """
40 |         Checks the connection to Prometheus.
41 |         Raises:
42 |             ThanosMetricsNotFound: If the connection to Thanos cannot be established.
43 |         """
44 |         try:
45 |             super().check_connection()
46 |         except MetricsNotFound as e:
47 |             # This is to clarify which metrics service had the issue and not say its a prometheus issue
48 |             raise ThanosMetricsNotFound(
49 |                 f"Couldn't connect to Thanos found under {self.prometheus.url}\nCaused by {e.__class__.__name__}: {e})"
50 |             ) from e
51 | 


--------------------------------------------------------------------------------
/robusta_krr/core/integrations/prometheus/metrics_service/victoria_metrics_service.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from kubernetes.client import ApiClient
 4 | from prometrix import MetricsNotFound, VictoriaMetricsNotFound
 5 | 
 6 | from robusta_krr.utils.service_discovery import MetricsServiceDiscovery
 7 | 
 8 | from .prometheus_metrics_service import PrometheusMetricsService
 9 | 
10 | 
11 | class VictoriaMetricsDiscovery(MetricsServiceDiscovery):
12 |     def find_metrics_url(self, *, api_client: Optional[ApiClient] = None) -> Optional[str]:
13 |         """
14 |         Finds the Victoria Metrics URL using selectors.
15 |         Args:
16 |             api_client (Optional[ApiClient]): A Kubernetes API client. Defaults to None.
17 |         Returns:
18 |             Optional[str]: The discovered Victoria Metrics URL, or None if not found.
19 |         """
20 |         url = super().find_url(
21 |             selectors=[
22 |                 "app.kubernetes.io/name=vmsingle",
23 |                 "app.kubernetes.io/name=victoria-metrics-single",
24 |             ]
25 |         )
26 |         if url is None:
27 |             url = super().find_url(
28 |                 selectors=[
29 |                     "app.kubernetes.io/name=vmselect",
30 |                     "app=vmselect",
31 |                 ]
32 |             )
33 |             if url is not None:
34 |                 url = f"{url}/select/0/prometheus/"
35 |         return url
36 | 
37 | 
38 | class VictoriaMetricsService(PrometheusMetricsService):
39 |     """
40 |     A class for fetching metrics from Victoria Metrics.
41 |     """
42 | 
43 |     service_discovery = VictoriaMetricsDiscovery
44 | 
45 |     @classmethod
46 |     def name(cls) -> str:
47 |         return "Victoria Metrics"
48 | 
49 |     def check_connection(self):
50 |         """
51 |         Checks the connection to Prometheus.
52 |         Raises:
53 |             VictoriaMetricsNotFound: If the connection to Victoria Metrics cannot be established.
54 |         """
55 |         try:
56 |             super().check_connection()
57 |         except MetricsNotFound as e:
58 |             # This is to clarify which metrics service had the issue and not say its a prometheus issue
59 |             raise VictoriaMetricsNotFound(
60 |                 f"Couldn't connect to Victoria Metrics found under {self.prometheus.url}\nCaused by {e.__class__.__name__}: {e})"
61 |             ) from e
62 | 


--------------------------------------------------------------------------------
/robusta_krr/core/integrations/prometheus/prometheus_utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from typing import TYPE_CHECKING
 4 | 
 5 | import boto3
 6 | from prometrix import AWSPrometheusConfig, CoralogixPrometheusConfig, PrometheusConfig, VictoriaMetricsPrometheusConfig
 7 | 
 8 | from robusta_krr.core.models.config import settings
 9 | 
10 | if TYPE_CHECKING:
11 |     from robusta_krr.core.integrations.prometheus.metrics_service.prometheus_metrics_service import (
12 |         PrometheusMetricsService,
13 |     )
14 | 
15 | 
16 | class ClusterNotSpecifiedException(Exception):
17 |     """
18 |     An exception raised when a prometheus requires a cluster label but an invalid one is provided.
19 |     """
20 | 
21 |     pass
22 | 
23 | 
24 | def generate_prometheus_config(
25 |     url: str, headers: dict[str, str], metrics_service: PrometheusMetricsService
26 | ) -> PrometheusConfig:
27 |     from .metrics_service.victoria_metrics_service import VictoriaMetricsService
28 | 
29 |     baseconfig = {
30 |         "url": url,
31 |         "disable_ssl": not settings.prometheus_ssl_enabled,
32 |         "headers": headers,
33 |     }
34 | 
35 |     # aws config
36 |     if settings.eks_managed_prom:
37 |         session = boto3.Session(profile_name=settings.eks_managed_prom_profile_name)
38 |         credentials = session.get_credentials()
39 |         region = settings.eks_managed_prom_region if settings.eks_managed_prom_region else session.region_name
40 | 
41 |         if settings.eks_access_key and settings.eks_secret_key:
42 |             # when we have both access key and secret key, don't try to read credentials which can fail
43 |             access_key = settings.eks_access_key
44 |             secret_key = settings.eks_secret_key.get_secret_value()
45 |         else:
46 |             # we need at least one parameter from credentials, but we should use whatever we can from settings (this has higher precedence)
47 |             credentials = credentials.get_frozen_credentials()
48 |             access_key = settings.eks_access_key if settings.eks_access_key else credentials.access_key
49 |             secret_key = settings.eks_secret_key.get_secret_value() if settings.eks_secret_key else credentials.secret_key
50 |         
51 |         service_name = settings.eks_service_name if settings.eks_secret_key else "aps"
52 |         if not region:
53 |             raise Exception("No eks region specified")
54 | 
55 |         return AWSPrometheusConfig(
56 |             access_key=access_key,
57 |             secret_access_key=secret_key,
58 |             aws_region=region,
59 |             service_name=service_name,
60 |             **baseconfig,
61 |         )
62 |     # coralogix config
63 |     if settings.coralogix_token:
64 |         return CoralogixPrometheusConfig(**baseconfig, prometheus_token=settings.coralogix_token.get_secret_value())
65 |     if isinstance(metrics_service, VictoriaMetricsService):
66 |         return VictoriaMetricsPrometheusConfig(**baseconfig)
67 |     return PrometheusConfig(**baseconfig)
68 | 


--------------------------------------------------------------------------------
/robusta_krr/core/models/allocations.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import enum
  4 | import math
  5 | from typing import Literal, Optional, TypeVar, Union
  6 | 
  7 | import pydantic as pd
  8 | from kubernetes.client.models import V1Container
  9 | 
 10 | from robusta_krr.utils import resource_units
 11 | 
 12 | 
 13 | class ResourceType(str, enum.Enum):
 14 |     """The type of resource.
 15 | 
 16 |     Just add new types here and they will be automatically supported.
 17 |     """
 18 | 
 19 |     CPU = "cpu"
 20 |     Memory = "memory"
 21 | 
 22 | 
 23 | RecommendationValue = Union[float, Literal["?"], None]
 24 | RecommendationValueRaw = Union[float, str, None]
 25 | 
 26 | Self = TypeVar("Self", bound="ResourceAllocations")
 27 | 
 28 | NONE_LITERAL = "unset"
 29 | NAN_LITERAL = "?"
 30 | 
 31 | def format_recommendation_value(value: RecommendationValue) -> str:
 32 |     if value is None:
 33 |         return NONE_LITERAL
 34 |     elif isinstance(value, str):
 35 |         return NAN_LITERAL
 36 |     else:
 37 |         return resource_units.format(value)
 38 | 
 39 | def format_diff(allocated, recommended, selector, multiplier=1, colored=False) -> str:
 40 |     if recommended is None or isinstance(recommended.value, str) or selector != "requests":
 41 |         return ""
 42 |     else:
 43 |         reccomended_val = recommended.value if isinstance(recommended.value, (int, float)) else 0
 44 |         allocated_val = allocated if isinstance(allocated, (int, float)) else 0
 45 |         diff_val = reccomended_val - allocated_val
 46 |         if colored:
 47 |             diff_sign = "[green]+[/green]" if diff_val >= 0 else "[red]-[/red]"
 48 |         else:
 49 |             diff_sign = "+" if diff_val >= 0 else "-"         
 50 |         return f"{diff_sign}{format_recommendation_value(abs(diff_val) * multiplier)}"
 51 |     
 52 | class ResourceAllocations(pd.BaseModel):
 53 |     requests: dict[ResourceType, RecommendationValue]
 54 |     limits: dict[ResourceType, RecommendationValue]
 55 |     info: dict[ResourceType, Optional[str]] = {}
 56 | 
 57 |     @staticmethod
 58 |     def __parse_resource_value(value: RecommendationValueRaw) -> RecommendationValue:
 59 |         if value is None:
 60 |             return None
 61 | 
 62 |         if isinstance(value, str):
 63 |             return float(resource_units.parse(value))
 64 | 
 65 |         if math.isnan(value):
 66 |             return "?"
 67 | 
 68 |         return float(value)
 69 | 
 70 |     @pd.validator("requests", "limits", pre=True)
 71 |     def validate_requests(
 72 |         cls, value: dict[ResourceType, RecommendationValueRaw]
 73 |     ) -> dict[ResourceType, RecommendationValue]:
 74 |         return {
 75 |             resource_type: cls.__parse_resource_value(resource_value) for resource_type, resource_value in value.items()
 76 |         }
 77 | 
 78 |     @classmethod
 79 |     def from_container(cls: type[Self], container: V1Container) -> Self:
 80 |         """Get the resource allocations from a Kubernetes container.
 81 | 
 82 |         Args:
 83 |             container: The Kubernetes container.
 84 | 
 85 |         Returns:
 86 |             The resource allocations.
 87 |         """
 88 | 
 89 |         return cls(
 90 |             requests={
 91 |                 ResourceType.CPU: container.resources.requests.get("cpu")
 92 |                 if container.resources and container.resources.requests
 93 |                 else None,
 94 |                 ResourceType.Memory: container.resources.requests.get("memory")
 95 |                 if container.resources and container.resources.requests
 96 |                 else None,
 97 |             },
 98 |             limits={
 99 |                 ResourceType.CPU: container.resources.limits.get("cpu")
100 |                 if container.resources and container.resources.limits
101 |                 else None,
102 |                 ResourceType.Memory: container.resources.limits.get("memory")
103 |                 if container.resources and container.resources.limits
104 |                 else None,
105 |             },
106 |         )
107 | 


--------------------------------------------------------------------------------
/robusta_krr/core/models/config.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import logging
  4 | import sys
  5 | from typing import Any, Literal, Optional, Union
  6 | 
  7 | import pydantic as pd
  8 | from kubernetes import config
  9 | from kubernetes.config.config_exception import ConfigException
 10 | from rich.console import Console
 11 | from rich.logging import RichHandler
 12 | 
 13 | from robusta_krr.core.abstract import formatters
 14 | from robusta_krr.core.abstract.strategies import AnyStrategy, BaseStrategy
 15 | from robusta_krr.core.models.objects import KindLiteral
 16 | 
 17 | logger = logging.getLogger("krr")
 18 | 
 19 | 
 20 | class Config(pd.BaseSettings):
 21 |     quiet: bool = pd.Field(False)
 22 |     verbose: bool = pd.Field(False)
 23 | 
 24 |     clusters: Union[list[str], Literal["*"], None] = None
 25 |     kubeconfig: Optional[str] = None
 26 |     impersonate_user: Optional[str] = None
 27 |     impersonate_group: Optional[str] = None
 28 |     namespaces: Union[list[str], Literal["*"]] = pd.Field("*")
 29 |     resources: Union[list[KindLiteral], Literal["*"]] = pd.Field("*")
 30 |     selector: Optional[str] = None
 31 | 
 32 |     # Value settings
 33 |     cpu_min_value: int = pd.Field(10, ge=0)  # in millicores
 34 |     memory_min_value: int = pd.Field(100, ge=0)  # in megabytes
 35 | 
 36 |     # Prometheus Settings
 37 |     prometheus_url: Optional[str] = pd.Field(None)
 38 |     prometheus_auth_header: Optional[pd.SecretStr] = pd.Field(None)
 39 |     prometheus_other_headers: dict[str, pd.SecretStr] = pd.Field(default_factory=dict)
 40 |     prometheus_ssl_enabled: bool = pd.Field(False)
 41 |     prometheus_cluster_label: Optional[str] = pd.Field(None)
 42 |     prometheus_label: Optional[str] = pd.Field(None)
 43 |     eks_managed_prom: bool = pd.Field(False)
 44 |     eks_managed_prom_profile_name: Optional[str] = pd.Field(None)
 45 |     eks_access_key: Optional[str] = pd.Field(None)
 46 |     eks_secret_key: Optional[pd.SecretStr] = pd.Field(None)
 47 |     eks_service_name: Optional[str] = pd.Field(None)
 48 |     eks_managed_prom_region: Optional[str] = pd.Field(None)
 49 |     coralogix_token: Optional[pd.SecretStr] = pd.Field(None)
 50 |     openshift: bool = pd.Field(False)
 51 | 
 52 |     # Threading settings
 53 |     max_workers: int = pd.Field(6, ge=1)
 54 | 
 55 |     # Logging Settings
 56 |     format: str
 57 |     show_cluster_name: bool
 58 |     strategy: str
 59 |     log_to_stderr: bool
 60 |     width: Optional[int] = pd.Field(None, ge=1)
 61 |     show_severity: bool = True
 62 | 
 63 |     # Output Settings
 64 |     file_output: Optional[str] = pd.Field(None)
 65 |     file_output_dynamic: bool = pd.Field(False)
 66 |     slack_output: Optional[str] = pd.Field(None)
 67 | 
 68 |     other_args: dict[str, Any]
 69 | 
 70 |     # Internal
 71 |     inside_cluster: bool = False
 72 |     _logging_console: Optional[Console] = pd.PrivateAttr(None)
 73 | 
 74 |     def __init__(self, **kwargs: Any) -> None:
 75 |         super().__init__(**kwargs)
 76 | 
 77 |     @property
 78 |     def Formatter(self) -> formatters.FormatterFunc:
 79 |         return formatters.find(self.format)
 80 | 
 81 |     @pd.validator("prometheus_url")
 82 |     def validate_prometheus_url(cls, v: Optional[str]):
 83 |         if v is None:
 84 |             return None
 85 | 
 86 |         if not v.startswith("https://") and not v.startswith("http://"):
 87 |             raise Exception("--prometheus-url must start with https:// or http://")
 88 | 
 89 |         v = v.removesuffix("/")
 90 | 
 91 |         return v
 92 | 
 93 |     @pd.validator("prometheus_other_headers", pre=True)
 94 |     def validate_prometheus_other_headers(cls, headers: Union[list[str], dict[str, str]]) -> dict[str, str]:
 95 |         if isinstance(headers, dict):
 96 |             return headers
 97 | 
 98 |         return {k.strip().lower(): v.strip() for k, v in [header.split(":") for header in headers]}
 99 | 
100 |     @pd.validator("namespaces")
101 |     def validate_namespaces(cls, v: Union[list[str], Literal["*"]]) -> Union[list[str], Literal["*"]]:
102 |         if v == []:
103 |             return "*"
104 | 
105 |         if isinstance(v, list):
106 |             for val in v:
107 |                 if val.startswith("*"):
108 |                     raise ValueError("Namespace's values cannot start with an asterisk (*)")
109 | 
110 |         return [val.lower() for val in v]
111 | 
112 |     @pd.validator("resources", pre=True)
113 |     def validate_resources(cls, v: Union[list[str], Literal["*"]]) -> Union[list[str], Literal["*"]]:
114 |         if v == []:
115 |             return "*"
116 | 
117 |         # NOTE: KindLiteral.__args__ is a tuple of all possible values of KindLiteral
118 |         # So this will preserve the big and small letters of the resource
119 |         return [next(r for r in KindLiteral.__args__ if r.lower() == val.lower()) for val in v]
120 | 
121 |     def create_strategy(self) -> AnyStrategy:
122 |         StrategyType = AnyStrategy.find(self.strategy)
123 |         StrategySettingsType = StrategyType.get_settings_type()
124 |         return StrategyType(StrategySettingsType(**self.other_args))  # type: ignore
125 | 
126 |     @pd.validator("strategy")
127 |     def validate_strategy(cls, v: str) -> str:
128 |         BaseStrategy.find(v)  # NOTE: raises if strategy is not found
129 |         return v
130 | 
131 |     @pd.validator("format")
132 |     def validate_format(cls, v: str) -> str:
133 |         formatters.find(v)  # NOTE: raises if strategy is not found
134 |         return v
135 | 
136 |     @property
137 |     def context(self) -> Optional[str]:
138 |         return self.clusters[0] if self.clusters != "*" and self.clusters else None
139 | 
140 |     @property
141 |     def logging_console(self) -> Console:
142 |         if getattr(self, "_logging_console") is None:
143 |             self._logging_console = Console(file=sys.stderr if self.log_to_stderr else sys.stdout, width=self.width)
144 |         return self._logging_console
145 | 
146 |     def load_kubeconfig(self) -> None:
147 |         try:
148 |             config.load_kube_config(config_file=self.kubeconfig, context=self.context)
149 |             self.inside_cluster = False
150 |         except ConfigException:
151 |             config.load_incluster_config()
152 |             self.inside_cluster = True
153 | 
154 |     def get_kube_client(self, context: Optional[str] = None):
155 |         if context is None:
156 |             return None
157 | 
158 |         api_client = config.new_client_from_config(context=context, config_file=self.kubeconfig)
159 |         if self.impersonate_user is not None:
160 |             # trick copied from https://github.com/kubernetes-client/python/issues/362
161 |             api_client.set_default_header("Impersonate-User", self.impersonate_user)
162 |         if self.impersonate_group is not None:
163 |             api_client.set_default_header("Impersonate-Group", self.impersonate_group)
164 |         return api_client
165 | 
166 |     @staticmethod
167 |     def set_config(config: Config) -> None:
168 |         global _config
169 | 
170 |         _config = config
171 |         logging.basicConfig(
172 |             level="NOTSET",
173 |             format="%(message)s",
174 |             datefmt="[%X]",
175 |             handlers=[RichHandler(console=config.logging_console)],
176 |         )
177 |         logging.getLogger("").setLevel(logging.CRITICAL)
178 |         logger.setLevel(logging.DEBUG if config.verbose else logging.CRITICAL if config.quiet else logging.INFO)
179 | 
180 |     @staticmethod
181 |     def get_config() -> Optional[Config]:
182 |         return _config
183 | 
184 | 
185 | # NOTE: This class is just a proxy for _config.
186 | # Import settings from this module and use it like it is just a config object.
187 | class _Settings(Config):  # Config here is used for type checking
188 |     def __init__(self) -> None:
189 |         pass
190 | 
191 |     def __getattr__(self, name: str):
192 |         if _config is None:
193 |             raise AttributeError("Config is not set")
194 | 
195 |         return getattr(_config, name)
196 | 
197 | 
198 | _config: Optional[Config] = None
199 | settings = _Settings()
200 | 


--------------------------------------------------------------------------------
/robusta_krr/core/models/objects.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import Literal, Optional
  4 | 
  5 | import pydantic as pd
  6 | 
  7 | from robusta_krr.core.models.allocations import ResourceAllocations
  8 | from robusta_krr.utils.batched import batched
  9 | from kubernetes.client.models import V1LabelSelector
 10 | 
 11 | KindLiteral = Literal["Deployment", "DaemonSet", "StatefulSet", "Job", "CronJob", "Rollout", "DeploymentConfig", "StrimziPodSet"]
 12 | 
 13 | 
 14 | class PodData(pd.BaseModel):
 15 |     name: str
 16 |     deleted: bool
 17 | 
 18 |     def __hash__(self) -> int:
 19 |         return hash(self.name)
 20 | 
 21 | 
 22 | class HPAData(pd.BaseModel):
 23 |     min_replicas: Optional[int]
 24 |     max_replicas: int
 25 |     current_replicas: Optional[int]
 26 |     desired_replicas: int
 27 |     target_cpu_utilization_percentage: Optional[float]
 28 |     target_memory_utilization_percentage: Optional[float]
 29 | 
 30 | 
 31 | PodWarning = Literal[
 32 |     "NoPrometheusPods",
 33 |     "NoPrometheusCPUMetrics",
 34 |     "NoPrometheusMemoryMetrics",
 35 | ]
 36 | 
 37 | 
 38 | class K8sObjectData(pd.BaseModel):
 39 |     # NOTE: Here None means that we are running inside the cluster
 40 |     cluster: Optional[str]
 41 |     name: str
 42 |     container: str
 43 |     pods: list[PodData] = []
 44 |     hpa: Optional[HPAData]
 45 |     namespace: str
 46 |     kind: KindLiteral
 47 |     allocations: ResourceAllocations
 48 |     warnings: set[PodWarning] = set()
 49 |     labels: Optional[dict[str, str]]
 50 |     annotations: Optional[dict[str, str]]
 51 | 
 52 |     _api_resource = pd.PrivateAttr(None)
 53 | 
 54 |     def __str__(self) -> str:
 55 |         return f"{self.kind} {self.namespace}/{self.name}/{self.container}"
 56 | 
 57 |     def __hash__(self) -> int:
 58 |         return hash(str(self))
 59 | 
 60 |     def add_warning(self, warning: PodWarning) -> None:
 61 |         self.warnings.add(warning)
 62 | 
 63 |     @property
 64 |     def current_pods_count(self) -> int:
 65 |         return len([pod for pod in self.pods if not pod.deleted])
 66 | 
 67 |     @property
 68 |     def deleted_pods_count(self) -> int:
 69 |         return len([pod for pod in self.pods if pod.deleted])
 70 | 
 71 |     @property
 72 |     def pods_count(self) -> int:
 73 |         return len(self.pods)
 74 | 
 75 |     @property
 76 |     def selector(self) -> V1LabelSelector:
 77 |         if self._api_resource is None:
 78 |             raise ValueError("api_resource is not set")
 79 | 
 80 |         if self.kind == 'CronJob':
 81 |             return self._api_resource.spec.job_template.spec.selector
 82 |         else:
 83 |             return self._api_resource.spec.selector
 84 | 
 85 |     def split_into_batches(self, n: int) -> list[K8sObjectData]:
 86 |         """
 87 |         Batch this object into n objects, splitting the pods into batches of size n.
 88 |         """
 89 | 
 90 |         if self.pods_count <= n:
 91 |             return [self]
 92 | 
 93 |         return [
 94 |             K8sObjectData(
 95 |                 cluster=self.cluster,
 96 |                 name=self.name,
 97 |                 container=self.container,
 98 |                 pods=batch,
 99 |                 hpa=self.hpa,
100 |                 namespace=self.namespace,
101 |                 kind=self.kind,
102 |                 allocations=self.allocations,
103 |                 labels=self.labels,
104 |                 annotations=self.annotations,
105 |             )
106 |             for batch in batched(self.pods, n)
107 |         ]
108 | 


--------------------------------------------------------------------------------
/robusta_krr/core/models/result.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from typing import Any, Optional, Union
  4 | 
  5 | import pydantic as pd
  6 | 
  7 | from robusta_krr.core.abstract import formatters
  8 | from robusta_krr.core.models.allocations import RecommendationValue, ResourceAllocations, ResourceType
  9 | from robusta_krr.core.models.objects import K8sObjectData
 10 | from robusta_krr.core.models.severity import Severity
 11 | from robusta_krr.core.models.config import Config
 12 | 
 13 | 
 14 | class Recommendation(pd.BaseModel):
 15 |     value: RecommendationValue
 16 |     severity: Severity
 17 | 
 18 | 
 19 | class ResourceRecommendation(pd.BaseModel):
 20 |     requests: dict[ResourceType, Union[RecommendationValue, Recommendation]]
 21 |     limits: dict[ResourceType, Union[RecommendationValue, Recommendation]]
 22 |     info: dict[ResourceType, Optional[str]]
 23 | 
 24 | 
 25 | class ResourceScan(pd.BaseModel):
 26 |     object: K8sObjectData
 27 |     recommended: ResourceRecommendation
 28 |     severity: Severity
 29 | 
 30 |     @classmethod
 31 |     def calculate(cls, object: K8sObjectData, recommendation: ResourceAllocations) -> ResourceScan:
 32 |         recommendation_processed = ResourceRecommendation(requests={}, limits={}, info={})
 33 | 
 34 |         for resource_type in ResourceType:
 35 |             recommendation_processed.info[resource_type] = recommendation.info.get(resource_type)
 36 | 
 37 |             for selector in ["requests", "limits"]:
 38 |                 current = getattr(object.allocations, selector).get(resource_type)
 39 |                 recommended = getattr(recommendation, selector).get(resource_type)
 40 | 
 41 |                 current_severity = Severity.calculate(current, recommended, resource_type)
 42 | 
 43 |                 #TODO: consider... changing field after model created doesn't validate it.
 44 |                 getattr(recommendation_processed, selector)[resource_type] = Recommendation(
 45 |                     value=recommended, severity=current_severity
 46 |                 )
 47 | 
 48 |         for severity in [Severity.CRITICAL, Severity.WARNING, Severity.OK, Severity.GOOD, Severity.UNKNOWN]:
 49 |             for selector in ["requests", "limits"]:
 50 |                 for recommendation_request in getattr(recommendation_processed, selector).values():
 51 |                     if recommendation_request.severity == severity:
 52 |                         return cls(object=object, recommended=recommendation_processed, severity=severity)
 53 | 
 54 |         return cls(object=object, recommended=recommendation_processed, severity=Severity.UNKNOWN)
 55 | 
 56 | 
 57 | class StrategyData(pd.BaseModel):
 58 |     name: str
 59 |     settings: dict[str, Any]
 60 | 
 61 | 
 62 | class Result(pd.BaseModel):
 63 |     scans: list[ResourceScan]
 64 |     score: int = 0
 65 |     resources: list[str] = ["cpu", "memory"]
 66 |     description: Optional[str] = None
 67 |     strategy: StrategyData
 68 |     errors: list[dict[str, Any]] = pd.Field(default_factory=list)
 69 |     clusterSummary: dict[str, Any] = {}
 70 |     config: Optional[Config] = pd.Field(default_factory=Config.get_config)
 71 | 
 72 |     def __init__(self, *args, **kwargs) -> None:
 73 |         super().__init__(*args, **kwargs)
 74 |         self.score = self.__calculate_score()
 75 | 
 76 |     def format(self, formatter: Union[formatters.FormatterFunc, str]) -> Any:
 77 |         """Format the result.
 78 | 
 79 |         Args:
 80 |             formatter: The formatter to use.
 81 | 
 82 |         Returns:
 83 |             The formatted result.
 84 |         """
 85 | 
 86 |         formatter = formatters.find(formatter) if isinstance(formatter, str) else formatter
 87 |         return formatter(self)
 88 | 
 89 |     @staticmethod
 90 |     def __scan_cost(scan: ResourceScan) -> float:
 91 |         return 0.7 if scan.severity == Severity.WARNING else 1 if scan.severity == Severity.CRITICAL else 0
 92 | 
 93 |     def __calculate_score(self) -> int:
 94 |         """Get the score of the result.
 95 | 
 96 |         Returns:
 97 |             The score of the result.
 98 |         """
 99 | 
100 |         score = sum(self.__scan_cost(scan) for scan in self.scans)
101 |         return int((len(self.scans) - score) / len(self.scans) * 100) if self.scans else 0
102 | 
103 |     @property
104 |     def score_letter(self) -> str:
105 |         return (
106 |             "F"
107 |             if self.score < 30
108 |             else "D"
109 |             if self.score < 55
110 |             else "C"
111 |             if self.score < 70
112 |             else "B"
113 |             if self.score < 90
114 |             else "A"
115 |         )
116 | 


--------------------------------------------------------------------------------
/robusta_krr/core/models/severity.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import enum
  4 | from typing import Callable, Optional
  5 | 
  6 | from robusta_krr.core.models.allocations import RecommendationValue, ResourceType
  7 | 
  8 | 
  9 | class Severity(str, enum.Enum):
 10 |     """
 11 |     The severity of the scan.
 12 | 
 13 |     The severity is calculated based on the difference between the current value and the recommended value.
 14 |     You can override the severity calculation function by using the `bind_calculator` decorator from the same module.
 15 |     """
 16 | 
 17 |     UNKNOWN = "UNKNOWN"
 18 |     GOOD = "GOOD"
 19 |     OK = "OK"
 20 |     WARNING = "WARNING"
 21 |     CRITICAL = "CRITICAL"
 22 | 
 23 |     @property
 24 |     def color(self) -> str:
 25 |         return {
 26 |             self.UNKNOWN: "dim",
 27 |             self.GOOD: "green",
 28 |             self.OK: "gray",
 29 |             self.WARNING: "yellow",
 30 |             self.CRITICAL: "red",
 31 |         }[self]
 32 | 
 33 |     @classmethod
 34 |     def calculate(
 35 |         cls, current: RecommendationValue, recommended: RecommendationValue, resource_type: ResourceType
 36 |     ) -> Severity:
 37 |         if isinstance(recommended, str) or isinstance(current, str):
 38 |             return cls.UNKNOWN
 39 | 
 40 |         return calculate_severity(current, recommended, resource_type)
 41 | 
 42 | 
 43 | def register_severity_calculator(resource_type: ResourceType) -> Callable[[SeverityCalculator], SeverityCalculator]:
 44 |     """
 45 |     Bind a severity calculator function to a resource type.
 46 |     Use this decorator to bind a severity calculator function to a resource type.
 47 | 
 48 |     Example:
 49 |     >>> @bind_severity_calculator(ResourceType.CPU)
 50 |     >>> def cpu_severity_calculator(current: Optional[float], recommended: Optional[float], resource_type: ResourceType) -> Severity:
 51 |     >>>     if current is None and recommended is None:
 52 |     >>>         return Severity.GOOD
 53 |     >>>     if current is None or recommended is None:
 54 |     >>>         return Severity.WARNING
 55 |     >>>
 56 |     >>>     return Severity.CRITICAL if abs(current - recommended) >= 0.5 else Severity.GOOD
 57 |     """
 58 | 
 59 |     def decorator(func: SeverityCalculator) -> SeverityCalculator:
 60 |         SEVERITY_CALCULATORS_REGISTRY[resource_type] = func
 61 |         return func
 62 | 
 63 |     return decorator
 64 | 
 65 | 
 66 | SeverityCalculator = Callable[[Optional[float], Optional[float], ResourceType], Severity]
 67 | SEVERITY_CALCULATORS_REGISTRY: dict[ResourceType, SeverityCalculator] = {}
 68 | 
 69 | 
 70 | def calculate_severity(current: Optional[float], recommended: Optional[float], resource_type: ResourceType) -> Severity:
 71 |     """
 72 |     Calculate the severity of the scan based on the current value and the recommended value.
 73 | 
 74 |     This function will use the severity calculator function that is bound to the resource type.
 75 |     If there is no calculator function bound to the resource type, it will use the default severity calculator function.
 76 |     """
 77 | 
 78 |     return SEVERITY_CALCULATORS_REGISTRY.get(resource_type, default_severity_calculator)(
 79 |         current, recommended, resource_type
 80 |     )
 81 | 
 82 | 
 83 | def default_severity_calculator(
 84 |     current: Optional[float], recommended: Optional[float], resource_type: ResourceType
 85 | ) -> Severity:
 86 |     return Severity.UNKNOWN
 87 | 
 88 | 
 89 | @register_severity_calculator(ResourceType.CPU)
 90 | def cpu_severity_calculator(
 91 |     current: Optional[float], recommended: Optional[float], resource_type: ResourceType
 92 | ) -> Severity:
 93 |     if current is None and recommended is None:
 94 |         return Severity.GOOD
 95 |     if current is None or recommended is None:
 96 |         return Severity.WARNING
 97 | 
 98 |     diff = abs(current - recommended)
 99 | 
100 |     if diff >= 0.5:
101 |         return Severity.CRITICAL
102 |     elif diff >= 0.25:
103 |         return Severity.WARNING
104 |     elif diff >= 0.1:
105 |         return Severity.OK
106 |     else:
107 |         return Severity.GOOD
108 | 
109 | 
110 | @register_severity_calculator(ResourceType.Memory)
111 | def memory_severity_calculator(
112 |     current: Optional[float], recommended: Optional[float], resource_type: ResourceType
113 | ) -> Severity:
114 |     if current is None and recommended is None:
115 |         return Severity.GOOD
116 |     if current is None or recommended is None:
117 |         return Severity.WARNING
118 | 
119 |     diff = abs(current - recommended) / 1024 / 1024
120 | 
121 |     if diff >= 500:
122 |         return Severity.CRITICAL
123 |     elif diff >= 250:
124 |         return Severity.WARNING
125 |     elif diff >= 100:
126 |         return Severity.OK
127 |     else:
128 |         return Severity.GOOD
129 | 


--------------------------------------------------------------------------------
/robusta_krr/formatters/__init__.py:
--------------------------------------------------------------------------------
1 | from .json import json
2 | from .pprint import pprint
3 | from .table import table
4 | from .yaml import yaml
5 | from .csv import csv
6 | from .csv_raw import csv_raw
7 | from .html import html
8 | 


--------------------------------------------------------------------------------
/robusta_krr/formatters/csv.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import io
  3 | import itertools
  4 | import logging
  5 | from typing import Any
  6 | 
  7 | from robusta_krr.core.abstract import formatters
  8 | from robusta_krr.core.models.allocations import NONE_LITERAL, format_diff, format_recommendation_value
  9 | from robusta_krr.core.models.config import settings
 10 | from robusta_krr.core.models.result import ResourceScan, ResourceType, Result
 11 | 
 12 | logger = logging.getLogger("krr")
 13 | 
 14 | 
 15 | NAMESPACE_HEADER = "Namespace"
 16 | NAME_HEADER = "Name"
 17 | PODS_HEADER = "Pods"
 18 | OLD_PODS_HEADER = "Old Pods"
 19 | TYPE_HEADER = "Type"
 20 | CONTAINER_HEADER = "Container"
 21 | CLUSTER_HEADER = "Cluster"
 22 | SEVERITY_HEADER = "Severity"
 23 | 
 24 | RESOURCE_DIFF_HEADER = "{resource_name} Diff"
 25 | RESOURCE_REQUESTS_HEADER = "{resource_name} Requests"
 26 | RESOURCE_LIMITS_HEADER = "{resource_name} Limits"
 27 | 
 28 | 
 29 | def _format_request_str(item: ResourceScan, resource: ResourceType, selector: str) -> str:
 30 |     allocated = getattr(item.object.allocations, selector)[resource]
 31 |     recommended = getattr(item.recommended, selector)[resource]
 32 | 
 33 |     if allocated is None and recommended.value is None:
 34 |         return f"{NONE_LITERAL}"
 35 | 
 36 |     diff = format_diff(allocated, recommended, selector)
 37 |     if diff != "":
 38 |         diff = f"({diff}) "
 39 | 
 40 |     return diff + format_recommendation_value(allocated) + " -> " + format_recommendation_value(recommended.value)
 41 | 
 42 | 
 43 | def _format_total_diff(item: ResourceScan, resource: ResourceType, pods_current: int) -> str:
 44 |     selector = "requests"
 45 |     allocated = getattr(item.object.allocations, selector)[resource]
 46 |     recommended = getattr(item.recommended, selector)[resource]
 47 | 
 48 |     return format_diff(allocated, recommended, selector, pods_current)
 49 | 
 50 | 
 51 | @formatters.register("csv")
 52 | def csv_exporter(result: Result) -> str:
 53 |     # We need to order the resource columns so that they are in the format of Namespace,Name,Pods,Old Pods,Type,Container,CPU Diff,CPU Requests,CPU Limits,Memory Diff,Memory Requests,Memory Limits
 54 |     csv_columns = ["Namespace", "Name", "Pods", "Old Pods", "Type", "Container"]
 55 | 
 56 |     if settings.show_cluster_name:
 57 |         csv_columns.insert(0, "Cluster")
 58 | 
 59 |     if settings.show_severity:
 60 |         csv_columns.append("Severity")
 61 | 
 62 |     for resource in ResourceType:
 63 |         csv_columns.append(RESOURCE_DIFF_HEADER.format(resource_name=resource.name))
 64 |         csv_columns.append(RESOURCE_REQUESTS_HEADER.format(resource_name=resource.name))
 65 |         csv_columns.append(RESOURCE_LIMITS_HEADER.format(resource_name=resource.name))
 66 | 
 67 |     output = io.StringIO()
 68 |     csv_writer = csv.DictWriter(output, csv_columns, extrasaction="ignore")
 69 |     csv_writer.writeheader()
 70 | 
 71 |     for _, group in itertools.groupby(
 72 |         enumerate(result.scans), key=lambda x: (x[1].object.cluster, x[1].object.namespace, x[1].object.name)
 73 |     ):
 74 |         group_items = list(group)
 75 | 
 76 |         for j, (_, item) in enumerate(group_items):
 77 |             full_info_row = j == 0
 78 | 
 79 |             row: dict[str, Any] = {
 80 |                 NAMESPACE_HEADER: item.object.namespace if full_info_row else "",
 81 |                 NAME_HEADER: item.object.name if full_info_row else "",
 82 |                 PODS_HEADER: f"{item.object.current_pods_count}" if full_info_row else "",
 83 |                 OLD_PODS_HEADER: f"{item.object.deleted_pods_count}" if full_info_row else "",
 84 |                 TYPE_HEADER: item.object.kind if full_info_row else "",
 85 |                 CONTAINER_HEADER: item.object.container,
 86 |                 SEVERITY_HEADER: item.severity,
 87 |                 CLUSTER_HEADER: item.object.cluster,
 88 |             }
 89 | 
 90 |             for resource in ResourceType:
 91 |                 row[RESOURCE_DIFF_HEADER.format(resource_name=resource.name)] = _format_total_diff(
 92 |                     item, resource, item.object.current_pods_count
 93 |                 )
 94 |                 row[RESOURCE_REQUESTS_HEADER.format(resource_name=resource.name)] = _format_request_str(
 95 |                     item, resource, "requests"
 96 |                 )
 97 |                 row[RESOURCE_LIMITS_HEADER.format(resource_name=resource.name)] = _format_request_str(
 98 |                     item, resource, "limits"
 99 |                 )
100 | 
101 |             csv_writer.writerow(row)
102 | 
103 |     return output.getvalue()
104 | 


--------------------------------------------------------------------------------
/robusta_krr/formatters/csv_raw.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import io
  3 | import logging
  4 | from typing import Any, Union
  5 | 
  6 | from robusta_krr.core.abstract import formatters
  7 | from robusta_krr.core.models.allocations import NAN_LITERAL, NONE_LITERAL
  8 | from robusta_krr.core.models.config import settings
  9 | from robusta_krr.core.models.result import ResourceScan, ResourceType, Result
 10 | 
 11 | logger = logging.getLogger("krr")
 12 | 
 13 | 
 14 | NAMESPACE_HEADER = "Namespace"
 15 | NAME_HEADER = "Name"
 16 | PODS_HEADER = "Pods"
 17 | OLD_PODS_HEADER = "Old Pods"
 18 | TYPE_HEADER = "Type"
 19 | CONTAINER_HEADER = "Container"
 20 | CLUSTER_HEADER = "Cluster"
 21 | SEVERITY_HEADER = "Severity"
 22 | 
 23 | RESOURCE_REQUESTS_CURRENT_HEADER = "{resource_name} Requests Current"
 24 | RESOURCE_REQUESTS_RECOMMENDED_HEADER = '{resource_name} Requests Recommended'
 25 | 
 26 | RESOURCE_LIMITS_CURRENT_HEADER = "{resource_name} Limits Current"
 27 | RESOURCE_LIMITS_RECOMMENDED_HEADER = '{resource_name} Limits Recommended'
 28 | 
 29 | 
 30 | def _format_value(val: Union[float, int]) -> str:
 31 |     if isinstance(val, int):
 32 |         return str(val)
 33 |     elif isinstance(val, float):
 34 |         return str(int(val)) if val.is_integer() else str(val)
 35 |     elif val is None:
 36 |         return NONE_LITERAL
 37 |     elif isinstance(val, str):
 38 |         return NAN_LITERAL
 39 |     else:
 40 |         raise ValueError(f'unknown value: {val}')
 41 | 
 42 | 
 43 | def _format_request_current(item: ResourceScan, resource: ResourceType, selector: str) -> str:
 44 |     allocated = getattr(item.object.allocations, selector)[resource]
 45 |     if allocated is None:
 46 |         return NONE_LITERAL
 47 |     return _format_value(allocated)
 48 | 
 49 | 
 50 | def _format_request_recommend(item: ResourceScan, resource: ResourceType, selector: str) -> str:
 51 |     recommended = getattr(item.recommended, selector)[resource]
 52 |     if recommended is None:
 53 |         return NONE_LITERAL
 54 |     return _format_value(recommended.value)
 55 | 
 56 | 
 57 | @formatters.register("csv-raw")
 58 | def csv_raw(result: Result) -> str:
 59 |     # We need to order the resource columns so that they are in the format of
 60 |     # Namespace, Name, Pods, Old Pods, Type, Container,
 61 |     # CPU Requests Current, CPU Requests Recommend, CPU Limits Current, CPU Limits Recommend,
 62 |     # Memory Requests Current, Memory Requests Recommend, Memory Limits Current, Memory Limits Recommend,
 63 |     csv_columns = ["Namespace", "Name", "Pods", "Old Pods", "Type", "Container"]
 64 | 
 65 |     if settings.show_cluster_name:
 66 |         csv_columns.insert(0, "Cluster")
 67 | 
 68 |     if settings.show_severity:
 69 |         csv_columns.append("Severity")
 70 | 
 71 |     for resource in ResourceType:
 72 |         csv_columns.append(RESOURCE_REQUESTS_CURRENT_HEADER.format(resource_name=resource.name))
 73 |         csv_columns.append(RESOURCE_REQUESTS_RECOMMENDED_HEADER.format(resource_name=resource.name))
 74 |         csv_columns.append(RESOURCE_LIMITS_CURRENT_HEADER.format(resource_name=resource.name))
 75 |         csv_columns.append(RESOURCE_LIMITS_RECOMMENDED_HEADER.format(resource_name=resource.name))
 76 | 
 77 |     output = io.StringIO()
 78 |     csv_writer = csv.DictWriter(output, csv_columns, extrasaction="ignore")
 79 |     csv_writer.writeheader()
 80 | 
 81 |     for item in result.scans:
 82 |         row: dict[str, Any] = {
 83 |             NAMESPACE_HEADER: item.object.namespace,
 84 |             NAME_HEADER: item.object.name,
 85 |             PODS_HEADER: f"{item.object.current_pods_count}",
 86 |             OLD_PODS_HEADER: f"{item.object.deleted_pods_count}",
 87 |             TYPE_HEADER: item.object.kind,
 88 |             CONTAINER_HEADER: item.object.container,
 89 |             SEVERITY_HEADER: item.severity,
 90 |             CLUSTER_HEADER: item.object.cluster,
 91 |         }
 92 | 
 93 |         for resource in ResourceType:
 94 |             resource: ResourceType
 95 |             row[RESOURCE_REQUESTS_CURRENT_HEADER.format(resource_name=resource.name)] = _format_request_current(
 96 |                 item, resource, "requests"
 97 |             )
 98 |             row[RESOURCE_REQUESTS_RECOMMENDED_HEADER.format(resource_name=resource.name)] = _format_request_recommend(
 99 |                 item, resource, "requests"
100 |             )
101 |             row[RESOURCE_LIMITS_CURRENT_HEADER.format(resource_name=resource.name)] = _format_request_current(
102 |                 item, resource, "limits"
103 |             )
104 |             row[RESOURCE_LIMITS_RECOMMENDED_HEADER.format(resource_name=resource.name)] = _format_request_recommend(
105 |                 item, resource, "limits"
106 |             )
107 | 
108 |         csv_writer.writerow(row)
109 | 
110 |     return output.getvalue()
111 | 


--------------------------------------------------------------------------------
/robusta_krr/formatters/html.py:
--------------------------------------------------------------------------------
 1 | from rich.console import Console
 2 | 
 3 | from robusta_krr.core.abstract import formatters
 4 | from robusta_krr.core.models.result import Result
 5 | from .table import table
 6 | 
 7 | @formatters.register("html")
 8 | def html(result: Result) -> str:
 9 |     console = Console(record=True)
10 |     table_output = table(result)
11 |     console.print(table_output)
12 |     return console.export_html(inline_styles=True)
13 | 


--------------------------------------------------------------------------------
/robusta_krr/formatters/json.py:
--------------------------------------------------------------------------------
1 | from robusta_krr.core.abstract import formatters
2 | from robusta_krr.core.models.result import Result
3 | 
4 | 
5 | @formatters.register()
6 | def json(result: Result) -> str:
7 |     return result.json(indent=2)
8 | 


--------------------------------------------------------------------------------
/robusta_krr/formatters/pprint.py:
--------------------------------------------------------------------------------
 1 | from pprint import pformat
 2 | 
 3 | from robusta_krr.core.abstract import formatters
 4 | from robusta_krr.core.models.result import Result
 5 | 
 6 | 
 7 | @formatters.register()
 8 | def pprint(result: Result) -> str:
 9 |     return pformat(result.dict())
10 | 


--------------------------------------------------------------------------------
/robusta_krr/formatters/table.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | from typing import Any
  3 | 
  4 | from rich.table import Table
  5 | 
  6 | from robusta_krr.core.abstract import formatters
  7 | from robusta_krr.core.models.allocations import RecommendationValue, format_recommendation_value, format_diff, NONE_LITERAL, NAN_LITERAL
  8 | from robusta_krr.core.models.result import ResourceScan, ResourceType, Result
  9 | from robusta_krr.core.models.config import settings
 10 | from robusta_krr.utils import resource_units
 11 | 
 12 | 
 13 | DEFAULT_INFO_COLOR = "grey27"
 14 | INFO_COLORS: dict[str, str] = {
 15 |     "OOMKill detected": "dark_red",
 16 | }
 17 | 
 18 | 
 19 | def _format_request_str(item: ResourceScan, resource: ResourceType, selector: str) -> str:
 20 |     allocated = getattr(item.object.allocations, selector)[resource]
 21 |     info = item.recommended.info.get(resource)
 22 |     recommended = getattr(item.recommended, selector)[resource]
 23 |     severity = recommended.severity
 24 | 
 25 |     if allocated is None and recommended.value is None:
 26 |         return f"[{severity.color}]{NONE_LITERAL}[/{severity.color}]"
 27 | 
 28 |     diff = format_diff(allocated, recommended, selector, colored=True)
 29 |     if diff != "":
 30 |         diff = f"({diff}) "
 31 | 
 32 |     if info is None:
 33 |         info_formatted = ""
 34 |     else:
 35 |         color = INFO_COLORS.get(info, DEFAULT_INFO_COLOR)
 36 |         info_formatted = f"\n[{color}]({info})[/{color}]"
 37 | 
 38 |     return (
 39 |         diff
 40 |         + f"[{severity.color}]"
 41 |         + format_recommendation_value(allocated)
 42 |         + " -> "
 43 |         + format_recommendation_value(recommended.value)
 44 |         + f"[/{severity.color}]"
 45 |         + info_formatted
 46 |     )
 47 | 
 48 | 
 49 | def _format_total_diff(item: ResourceScan, resource: ResourceType, pods_current: int) -> str:
 50 |     selector = "requests"
 51 |     allocated = getattr(item.object.allocations, selector)[resource]
 52 |     recommended = getattr(item.recommended, selector)[resource]
 53 | 
 54 |     # if we have more than one pod, say so (this explains to the user why the total is different than the recommendation)
 55 |     if pods_current == 1:
 56 |         pods_info = ""
 57 |     else:
 58 |         pods_info = f"\n({pods_current} pods)"
 59 | 
 60 |     return f"{format_diff(allocated, recommended, selector, pods_current, colored=True)}{pods_info}"
 61 | 
 62 | 
 63 | @formatters.register(rich_console=True)
 64 | def table(result: Result) -> Table:
 65 |     """Format the result as text.
 66 | 
 67 |     :param result: The result to format.
 68 |     :type result: :class:`core.result.Result`
 69 |     :returns: The formatted results.
 70 |     :rtype: str
 71 |     """
 72 | 
 73 |     table = Table(
 74 |         show_header=True,
 75 |         header_style="bold magenta",
 76 |         title=f"\n{result.description}\n" if result.description else None,
 77 |         title_justify="left",
 78 |         title_style="",
 79 |         caption=f"{result.score} points - {result.score_letter}",
 80 |     )
 81 | 
 82 |     cluster_count = len(set(item.object.cluster for item in result.scans))
 83 | 
 84 |     table.add_column("Number", justify="right", no_wrap=True)
 85 |     if cluster_count > 1 or settings.show_cluster_name:
 86 |         table.add_column("Cluster", style="cyan")
 87 |     table.add_column("Namespace", style="cyan")
 88 |     table.add_column("Name", style="cyan")
 89 |     table.add_column("Pods", style="cyan")
 90 |     table.add_column("Old Pods", style="cyan")
 91 |     table.add_column("Type", style="cyan")
 92 |     table.add_column("Container", style="cyan")
 93 |     for resource in ResourceType:
 94 |         table.add_column(f"{resource.name} Diff")
 95 |         table.add_column(f"{resource.name} Requests")
 96 |         table.add_column(f"{resource.name} Limits")
 97 | 
 98 |     for _, group in itertools.groupby(
 99 |         enumerate(result.scans), key=lambda x: (x[1].object.cluster, x[1].object.namespace, x[1].object.name)
100 |     ):
101 |         group_items = list(group)
102 | 
103 |         for j, (i, item) in enumerate(group_items):
104 |             last_row = j == len(group_items) - 1
105 |             full_info_row = j == 0
106 | 
107 |             cells: list[Any] = [f"[{item.severity.color}]{i + 1}.[/{item.severity.color}]"]
108 |             if cluster_count > 1 or settings.show_cluster_name:
109 |                 cells.append(item.object.cluster if full_info_row else "")
110 |             cells += [
111 |                 item.object.namespace if full_info_row else "",
112 |                 item.object.name if full_info_row else "",
113 |                 f"{item.object.current_pods_count}" if full_info_row else "",
114 |                 f"{item.object.deleted_pods_count}" if full_info_row else "",
115 |                 item.object.kind if full_info_row else "",
116 |                 item.object.container,
117 |             ]
118 | 
119 |             for resource in ResourceType:
120 |                 cells.append(_format_total_diff(item, resource, item.object.current_pods_count))
121 |                 cells += [_format_request_str(item, resource, selector) for selector in ["requests", "limits"]]
122 | 
123 |             table.add_row(*cells, end_section=last_row)
124 | 
125 |     return table
126 | 


--------------------------------------------------------------------------------
/robusta_krr/formatters/yaml.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | import yaml as yaml_module
 4 | 
 5 | from robusta_krr.core.abstract import formatters
 6 | from robusta_krr.core.models.result import Result
 7 | 
 8 | 
 9 | @formatters.register()
10 | def yaml(result: Result) -> str:
11 |     return yaml_module.dump(json.loads(result.json()), sort_keys=False)
12 | 


--------------------------------------------------------------------------------
/robusta_krr/strategies/__init__.py:
--------------------------------------------------------------------------------
1 | from .simple import SimpleStrategy
2 | from .simple_limit import SimpleLimitStrategy


--------------------------------------------------------------------------------
/robusta_krr/strategies/simple.py:
--------------------------------------------------------------------------------
  1 | import textwrap
  2 | from datetime import timedelta
  3 | 
  4 | import numpy as np
  5 | import pydantic as pd
  6 | 
  7 | from robusta_krr.core.abstract.strategies import (
  8 |     BaseStrategy,
  9 |     K8sObjectData,
 10 |     MetricsPodData,
 11 |     PodsTimeData,
 12 |     ResourceRecommendation,
 13 |     ResourceType,
 14 |     RunResult,
 15 |     StrategySettings,
 16 | )
 17 | from robusta_krr.core.integrations.prometheus.metrics import (
 18 |     CPUAmountLoader,
 19 |     MaxMemoryLoader,
 20 |     MemoryAmountLoader,
 21 |     PercentileCPULoader,
 22 |     PrometheusMetric,
 23 |     MaxOOMKilledMemoryLoader,
 24 | )
 25 | 
 26 | 
 27 | class SimpleStrategySettings(StrategySettings):
 28 |     cpu_percentile: float = pd.Field(95, gt=0, le=100, description="The percentile to use for the CPU recommendation.")
 29 |     memory_buffer_percentage: float = pd.Field(
 30 |         15, gt=0, description="The percentage of added buffer to the peak memory usage for memory recommendation."
 31 |     )
 32 |     points_required: int = pd.Field(
 33 |         100, ge=1, description="The number of data points required to make a recommendation for a resource."
 34 |     )
 35 |     allow_hpa: bool = pd.Field(
 36 |         False,
 37 |         description="Whether to calculate recommendations even when there is an HPA scaler defined on that resource.",
 38 |     )
 39 |     use_oomkill_data: bool = pd.Field(
 40 |         False,
 41 |         description="Whether to bump the memory when OOMKills are detected (experimental).",
 42 |     )
 43 |     oom_memory_buffer_percentage: float = pd.Field(
 44 |         25, ge=0, description="What percentage to increase the memory when there are OOMKill events."
 45 |     )
 46 | 
 47 |     def calculate_memory_proposal(self, data: PodsTimeData, max_oomkill: float = 0) -> float:
 48 |         data_ = [np.max(values[:, 1]) for values in data.values()]
 49 |         if len(data_) == 0:
 50 |             return float("NaN")
 51 | 
 52 |         return max(
 53 |             np.max(data_) * (1 + self.memory_buffer_percentage / 100),
 54 |             max_oomkill * (1 + self.oom_memory_buffer_percentage / 100),
 55 |         )
 56 | 
 57 |     def calculate_cpu_proposal(self, data: PodsTimeData) -> float:
 58 |         if len(data) == 0:
 59 |             return float("NaN")
 60 | 
 61 |         if len(data) > 1:
 62 |             data_ = np.concatenate([values[:, 1] for values in data.values()])
 63 |         else:
 64 |             data_ = list(data.values())[0][:, 1]
 65 | 
 66 |         return np.max(data_)
 67 | 
 68 |     def history_range_enough(self, history_range: tuple[timedelta, timedelta]) -> bool:
 69 |         start, end = history_range
 70 |         return (end - start) >= timedelta(hours=3)
 71 | 
 72 | 
 73 | class SimpleStrategy(BaseStrategy[SimpleStrategySettings]):
 74 |     
 75 |     display_name = "simple"
 76 |     rich_console = True
 77 | 
 78 |     @property
 79 |     def metrics(self) -> list[type[PrometheusMetric]]:
 80 |         metrics = [
 81 |             PercentileCPULoader(self.settings.cpu_percentile),
 82 |             MaxMemoryLoader,
 83 |             CPUAmountLoader,
 84 |             MemoryAmountLoader,
 85 |         ]
 86 | 
 87 |         if self.settings.use_oomkill_data:
 88 |             metrics.append(MaxOOMKilledMemoryLoader)
 89 | 
 90 |         return metrics
 91 | 
 92 |     @property
 93 |     def description(self):
 94 |         s = textwrap.dedent(f"""\
 95 |             CPU request: {self.settings.cpu_percentile}% percentile, limit: unset
 96 |             Memory request: max + {self.settings.memory_buffer_percentage}%, limit: max + {self.settings.memory_buffer_percentage}%
 97 |             History: {self.settings.history_duration} hours
 98 |             Step: {self.settings.timeframe_duration} minutes
 99 | 
100 |             All parameters can be customized. For example: `krr simple --cpu_percentile=90 --memory_buffer_percentage=15 --history_duration=24 --timeframe_duration=0.5`
101 |             """)
102 |         
103 |         if not self.settings.allow_hpa:
104 |             s += "\n" + textwrap.dedent(f"""\
105 |                 This strategy does not work with objects with HPA defined (Horizontal Pod Autoscaler).
106 |                 If HPA is defined for CPU or Memory, the strategy will return "?" for that resource.
107 |                 You can override this behaviour by passing the --allow-hpa flag
108 |                 """)        
109 | 
110 |         s += "\nLearn more: [underline]https://github.com/robusta-dev/krr#algorithm[/underline]"
111 |         return s
112 |         
113 |     def __calculate_cpu_proposal(
114 |         self, history_data: MetricsPodData, object_data: K8sObjectData
115 |     ) -> ResourceRecommendation:
116 |         data = history_data["PercentileCPULoader"]
117 | 
118 |         if len(data) == 0:
119 |             return ResourceRecommendation.undefined(info="No data")
120 | 
121 |         # NOTE: metrics for each pod are returned as list[values] where values is [timestamp, value]
122 |         # As CPUAmountLoader returns only the last value (1 point), [0, 1] is used to get the value
123 |         # So each pod is string with pod name, and values is numpy array of shape (N, 2)
124 |         data_count = {pod: values[0, 1] for pod, values in history_data["CPUAmountLoader"].items()}
125 |         total_points_count = sum(data_count.values())
126 | 
127 |         if total_points_count < self.settings.points_required:
128 |             return ResourceRecommendation.undefined(info="Not enough data")
129 | 
130 |         if (
131 |             object_data.hpa is not None
132 |             and object_data.hpa.target_cpu_utilization_percentage is not None
133 |             and not self.settings.allow_hpa
134 |         ):
135 |             return ResourceRecommendation.undefined(info="HPA detected")
136 | 
137 |         cpu_usage = self.settings.calculate_cpu_proposal(data)
138 |         return ResourceRecommendation(request=cpu_usage, limit=None)
139 | 
140 |     def __calculate_memory_proposal(
141 |         self, history_data: MetricsPodData, object_data: K8sObjectData
142 |     ) -> ResourceRecommendation:
143 |         data = history_data["MaxMemoryLoader"]
144 | 
145 |         oomkill_detected = False
146 | 
147 |         if self.settings.use_oomkill_data:
148 |             max_oomkill_data = history_data["MaxOOMKilledMemoryLoader"]
149 |             # NOTE: metrics for each pod are returned as list[values] where values is [timestamp, value]
150 |             # As MaxOOMKilledMemoryLoader returns only the last value (1 point), [0, 1] is used to get the value
151 |             # So each value is numpy array of shape (N, 2)
152 |             max_oomkill_value = (
153 |                 np.max([values[0, 1] for values in max_oomkill_data.values()]) if len(max_oomkill_data) > 0 else 0
154 |             )
155 |             if max_oomkill_value != 0:
156 |                 oomkill_detected = True
157 |         else:
158 |             max_oomkill_value = 0
159 | 
160 |         if len(data) == 0:
161 |             return ResourceRecommendation.undefined(info="No data")
162 | 
163 |         # NOTE: metrics for each pod are returned as list[values] where values is [timestamp, value]
164 |         # As MemoryAmountLoader returns only the last value (1 point), [0, 1] is used to get the value
165 |         # So each pod is string with pod name, and values is numpy array of shape (N, 2)
166 |         data_count = {pod: values[0, 1] for pod, values in history_data["MemoryAmountLoader"].items()}
167 |         total_points_count = sum(data_count.values())
168 | 
169 |         if total_points_count < self.settings.points_required:
170 |             return ResourceRecommendation.undefined(info="Not enough data")
171 | 
172 |         if (
173 |             object_data.hpa is not None
174 |             and object_data.hpa.target_memory_utilization_percentage is not None
175 |             and not self.settings.allow_hpa
176 |         ):
177 |             return ResourceRecommendation.undefined(info="HPA detected")
178 | 
179 |         memory_usage = self.settings.calculate_memory_proposal(data, max_oomkill_value)
180 |         return ResourceRecommendation(
181 |             request=memory_usage, limit=memory_usage, info="OOMKill detected" if oomkill_detected else None
182 |         )
183 | 
184 |     def run(self, history_data: MetricsPodData, object_data: K8sObjectData) -> RunResult:
185 |         return {
186 |             ResourceType.CPU: self.__calculate_cpu_proposal(history_data, object_data),
187 |             ResourceType.Memory: self.__calculate_memory_proposal(history_data, object_data),
188 |         }
189 | 


--------------------------------------------------------------------------------
/robusta_krr/strategies/simple_limit.py:
--------------------------------------------------------------------------------
  1 | import textwrap
  2 | from datetime import timedelta
  3 | 
  4 | import numpy as np
  5 | import pydantic as pd
  6 | 
  7 | from robusta_krr.core.abstract.strategies import (
  8 |     BaseStrategy,
  9 |     K8sObjectData,
 10 |     MetricsPodData,
 11 |     PodsTimeData,
 12 |     ResourceRecommendation,
 13 |     ResourceType,
 14 |     RunResult,
 15 |     StrategySettings,
 16 | )
 17 | from robusta_krr.core.integrations.prometheus.metrics import (
 18 |     CPUAmountLoader,
 19 |     MaxMemoryLoader,
 20 |     MemoryAmountLoader,
 21 |     CPULoader,
 22 |     PrometheusMetric,
 23 |     MaxOOMKilledMemoryLoader,
 24 | )
 25 | 
 26 | 
 27 | class SimpleLimitStrategySettings(StrategySettings):
 28 |     cpu_request: float = pd.Field(66, gt=0, le=100, description="The percentile to use for the CPU request.")
 29 |     cpu_limit: float = pd.Field(96, gt=0, le=100, description="The percentile to use for the CPU limit.")
 30 |     memory_buffer_percentage: float = pd.Field(
 31 |         15, gt=0, description="The percentage of added buffer to the peak memory usage for memory recommendation."
 32 |     )
 33 |     points_required: int = pd.Field(
 34 |         100, ge=1, description="The number of data points required to make a recommendation for a resource."
 35 |     )
 36 |     allow_hpa: bool = pd.Field(
 37 |         False,
 38 |         description="Whether to calculate recommendations even when there is an HPA scaler defined on that resource.",
 39 |     )
 40 |     use_oomkill_data: bool = pd.Field(
 41 |         False,
 42 |         description="Whether to bump the memory when OOMKills are detected (experimental).",
 43 |     )
 44 |     oom_memory_buffer_percentage: float = pd.Field(
 45 |         25, ge=0, description="What percentage to increase the memory when there are OOMKill events."
 46 |     )
 47 | 
 48 |     def calculate_memory_proposal(self, data: PodsTimeData, max_oomkill: float = 0) -> float:
 49 |         data_ = [np.max(values[:, 1]) for values in data.values()]
 50 |         if len(data_) == 0:
 51 |             return float("NaN")
 52 | 
 53 |         return max(
 54 |             np.max(data_) * (1 + self.memory_buffer_percentage / 100),
 55 |             max_oomkill * (1 + self.oom_memory_buffer_percentage / 100),
 56 |         )
 57 | 
 58 |     def calculate_cpu_percentile(self, data: PodsTimeData, percentile: float) -> float:
 59 |         if len(data) == 0:
 60 |             return float("NaN")
 61 | 
 62 |         if len(data) > 1:
 63 |             data_ = np.concatenate([values[:, 1] for values in data.values()])
 64 |         else:
 65 |             data_ = list(data.values())[0][:, 1]
 66 | 
 67 |         return np.percentile(data_, percentile)
 68 | 
 69 |     def history_range_enough(self, history_range: tuple[timedelta, timedelta]) -> bool:
 70 |         start, end = history_range
 71 |         return (end - start) >= timedelta(hours=3)
 72 | 
 73 | 
 74 | class SimpleLimitStrategy(BaseStrategy[SimpleLimitStrategySettings]):
 75 | 
 76 |     display_name = "simple_limit"
 77 |     rich_console = True
 78 | 
 79 |     @property
 80 |     def metrics(self) -> list[type[PrometheusMetric]]:
 81 |         metrics = [
 82 |             CPULoader,
 83 |             MaxMemoryLoader,
 84 |             CPUAmountLoader,
 85 |             MemoryAmountLoader,
 86 |         ]
 87 | 
 88 |         if self.settings.use_oomkill_data:
 89 |             metrics.append(MaxOOMKilledMemoryLoader)
 90 | 
 91 |         return metrics
 92 | 
 93 |     @property
 94 |     def description(self):
 95 |         s = textwrap.dedent(f"""\
 96 |             CPU request: {self.settings.cpu_request}% percentile, limit: {self.settings.cpu_limit}% percentile
 97 |             Memory request: max + {self.settings.memory_buffer_percentage}%, limit: max + {self.settings.memory_buffer_percentage}%
 98 |             History: {self.settings.history_duration} hours
 99 |             Step: {self.settings.timeframe_duration} minutes
100 | 
101 |             All parameters can be customized. For example: `krr simple_limit --cpu_request=66 --cpu_limit=96 --memory_buffer_percentage=15 --history_duration=24 --timeframe_duration=0.5`
102 |             """)
103 | 
104 |         if not self.settings.allow_hpa:
105 |             s += "\n" + textwrap.dedent(f"""\
106 |                 This strategy does not work with objects with HPA defined (Horizontal Pod Autoscaler).
107 |                 If HPA is defined for CPU or Memory, the strategy will return "?" for that resource.
108 |                 You can override this behaviour by passing the --allow-hpa flag
109 |                 """)
110 | 
111 |         s += "\nLearn more: [underline]https://github.com/robusta-dev/krr#algorithm[/underline]"
112 |         return s
113 | 
114 |     def __calculate_cpu_proposal(
115 |         self, history_data: MetricsPodData, object_data: K8sObjectData
116 |     ) -> ResourceRecommendation:
117 |         data = history_data["CPULoader"]
118 | 
119 |         if len(data) == 0:
120 |             return ResourceRecommendation.undefined(info="No data")
121 | 
122 |         # NOTE: metrics for each pod are returned as list[values] where values is [timestamp, value]
123 |         # As CPUAmountLoader returns only the last value (1 point), [0, 1] is used to get the value
124 |         # So each pod is string with pod name, and values is numpy array of shape (N, 2)
125 |         data_count = {pod: values[0, 1] for pod, values in history_data["CPUAmountLoader"].items()}
126 |         total_points_count = sum(data_count.values())
127 | 
128 |         if total_points_count < self.settings.points_required:
129 |             return ResourceRecommendation.undefined(info="Not enough data")
130 | 
131 |         if (
132 |             object_data.hpa is not None
133 |             and object_data.hpa.target_cpu_utilization_percentage is not None
134 |             and not self.settings.allow_hpa
135 |         ):
136 |             return ResourceRecommendation.undefined(info="HPA detected")
137 | 
138 |         cpu_request = self.settings.calculate_cpu_percentile(data, self.settings.cpu_request)
139 |         cpu_limit = self.settings.calculate_cpu_percentile(data, self.settings.cpu_limit)
140 |         return ResourceRecommendation(request=cpu_request, limit=cpu_limit)
141 | 
142 |     def __calculate_memory_proposal(
143 |         self, history_data: MetricsPodData, object_data: K8sObjectData
144 |     ) -> ResourceRecommendation:
145 |         data = history_data["MaxMemoryLoader"]
146 | 
147 |         oomkill_detected = False
148 | 
149 |         if self.settings.use_oomkill_data:
150 |             max_oomkill_data = history_data["MaxOOMKilledMemoryLoader"]
151 |             # NOTE: metrics for each pod are returned as list[values] where values is [timestamp, value]
152 |             # As MaxOOMKilledMemoryLoader returns only the last value (1 point), [0, 1] is used to get the value
153 |             # So each value is numpy array of shape (N, 2)
154 |             max_oomkill_value = (
155 |                 np.max([values[0, 1] for values in max_oomkill_data.values()]) if len(max_oomkill_data) > 0 else 0
156 |             )
157 |             if max_oomkill_value != 0:
158 |                 oomkill_detected = True
159 |         else:
160 |             max_oomkill_value = 0
161 | 
162 |         if len(data) == 0:
163 |             return ResourceRecommendation.undefined(info="No data")
164 | 
165 |         # NOTE: metrics for each pod are returned as list[values] where values is [timestamp, value]
166 |         # As MemoryAmountLoader returns only the last value (1 point), [0, 1] is used to get the value
167 |         # So each pod is string with pod name, and values is numpy array of shape (N, 2)
168 |         data_count = {pod: values[0, 1] for pod, values in history_data["MemoryAmountLoader"].items()}
169 |         total_points_count = sum(data_count.values())
170 | 
171 |         if total_points_count < self.settings.points_required:
172 |             return ResourceRecommendation.undefined(info="Not enough data")
173 | 
174 |         if (
175 |             object_data.hpa is not None
176 |             and object_data.hpa.target_memory_utilization_percentage is not None
177 |             and not self.settings.allow_hpa
178 |         ):
179 |             return ResourceRecommendation.undefined(info="HPA detected")
180 | 
181 |         memory_usage = self.settings.calculate_memory_proposal(data, max_oomkill_value)
182 |         return ResourceRecommendation(
183 |             request=memory_usage, limit=memory_usage, info="OOMKill detected" if oomkill_detected else None
184 |         )
185 | 
186 |     def run(self, history_data: MetricsPodData, object_data: K8sObjectData) -> RunResult:
187 |         return {
188 |             ResourceType.CPU: self.__calculate_cpu_proposal(history_data, object_data),
189 |             ResourceType.Memory: self.__calculate_memory_proposal(history_data, object_data),
190 |         }
191 | 


--------------------------------------------------------------------------------
/robusta_krr/utils/batched.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | from typing import Iterable, TypeVar
 3 | 
 4 | _T = TypeVar("_T")
 5 | 
 6 | 
 7 | def batched(iterable: Iterable[_T], n: int) -> Iterable[list[_T]]:
 8 |     "Batch data into tuples of length n. The last batch may be shorter."
 9 |     # batched('ABCDEFG', 3) --> ABC DEF G
10 |     if n < 1:
11 |         raise ValueError("n must be at least one")
12 |     it = iter(iterable)
13 |     while batch := list(itertools.islice(it, n)):
14 |         yield batch
15 | 


--------------------------------------------------------------------------------
/robusta_krr/utils/intro.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import asyncio
 3 | from concurrent.futures import ThreadPoolExecutor
 4 | 
 5 | from .version import get_version
 6 | 
 7 | 
 8 | ONLINE_LINK = 'https://api.robusta.dev/krr/intro'
 9 | LOCAL_LINK = './intro.txt'
10 | TIMEOUT = 0.5
11 | 
12 | 
13 | # Synchronous function to fetch intro message
14 | def fetch_intro_message() -> str:
15 |     try:
16 |         # Attempt to get the message from the URL
17 |         response = requests.get(ONLINE_LINK, params={"version": get_version()}, timeout=TIMEOUT)
18 |         response.raise_for_status()  # Raises an error for bad responses
19 |         result = response.json()
20 |         return result['message']
21 |     except Exception as e1:
22 |         # If there's any error, fallback to local file
23 |         try:
24 |             with open(LOCAL_LINK, 'r') as file:
25 |                 return file.read()
26 |         except Exception as e2:
27 |             return (
28 |                 "[red]Failed to load the intro message.\n"
29 |                 f"Both from the URL: {e1.__class__.__name__} {e1}\n"
30 |                 f"and the local file: {e2.__class__.__name__} {e2}\n"
31 |                 "But as that is not critical, KRR will continue to run without the intro message.[/red]"
32 |             )
33 | 
34 | 
35 | async def load_intro_message() -> str:
36 |     loop = asyncio.get_running_loop()
37 |     # Use a ThreadPoolExecutor to run the synchronous function in a separate thread
38 |     with ThreadPoolExecutor() as pool:
39 |         return await loop.run_in_executor(pool, fetch_intro_message)
40 | 
41 | 
42 | __all__ = ['load_intro_message']
43 | 


--------------------------------------------------------------------------------
/robusta_krr/utils/object_like_dict.py:
--------------------------------------------------------------------------------
 1 | class ObjectLikeDict:
 2 |     def __init__(self, dictionary):
 3 |         for key, value in dictionary.items():
 4 |             if isinstance(value, dict):
 5 |                 value = ObjectLikeDict(value)  # Convert inner dict
 6 |             if isinstance(value, list):
 7 |                 value = [ObjectLikeDict(item) if isinstance(item, dict) else item for item in value]
 8 |             self.__dict__[key] = value
 9 | 
10 |     def __getattr__(self, name):
11 |         return self.__dict__.get(name)
12 | 
13 |     def __setattr__(self, name, value):
14 |         self.__dict__[name] = value
15 | 
16 |     def __str__(self):
17 |         return str(self.__dict__)
18 | 
19 |     def __repr__(self):
20 |         return repr(self.__dict__)
21 | 
22 |     def __len__(self):
23 |         return len(self.__dict__)
24 | 
25 |     def get(self, key, default=None):
26 |         return self.__dict__.get(key, default)
27 | 
28 |     def items(self):
29 |         return self.__dict__.items()
30 | 


--------------------------------------------------------------------------------
/robusta_krr/utils/patch.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from kubernetes.client.models.v1_pod_failure_policy_rule import V1PodFailurePolicyRule
 4 | 
 5 | def create_monkey_patches():
 6 |     """
 7 |         The python kubernetes client will throw exceptions for specific fields that were not allowed to be None on older versions of kubernetes.
 8 |     """
 9 |     logger = logging.getLogger("krr")
10 |     logger.debug("Creating kubernetes python cli monkey patches")
11 | 
12 |     def patched_setter_pod_failure_policy(self, on_pod_conditions):
13 |         self._on_pod_conditions = on_pod_conditions
14 | 
15 |     V1PodFailurePolicyRule.on_pod_conditions = V1PodFailurePolicyRule.on_pod_conditions.setter(patched_setter_pod_failure_policy)
16 | 


--------------------------------------------------------------------------------
/robusta_krr/utils/progress_bar.py:
--------------------------------------------------------------------------------
 1 | from alive_progress import alive_bar
 2 | 
 3 | # from robusta_krr.core.models.config import settings
 4 | 
 5 | 
 6 | class ProgressBar:
 7 |     """
 8 |     Progress bar for displaying progress of gathering recommendations.
 9 | 
10 |     Use `ProgressBar` as a context manager to automatically handle the progress bar.
11 |     Use `progress` method to step the progress bar.
12 |     """
13 | 
14 |     def __init__(self, **kwargs) -> None:
15 |         # self.show_bar = not settings.quiet and not settings.log_to_stderr
16 |         self.show_bar = False  # FIXME: Progress bar is not working good with other logs
17 |         if self.show_bar:
18 |             self.alive_bar = alive_bar(**kwargs, enrich_print=False)
19 | 
20 |     def __enter__(self):
21 |         if self.show_bar:
22 |             self.bar = self.alive_bar.__enter__()
23 |         return self
24 | 
25 |     def progress(self):
26 |         if self.show_bar:
27 |             self.bar()
28 | 
29 |     def __exit__(self, *args):
30 |         if self.show_bar:
31 |             self.alive_bar.__exit__(*args)
32 | 


--------------------------------------------------------------------------------
/robusta_krr/utils/resource_units.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal, Union
 2 | 
 3 | UNITS: dict[str, float] = {
 4 |     "m": 0.001,
 5 |     "Ki": 1024,
 6 |     "Mi": 1024**2,
 7 |     "Gi": 1024**3,
 8 |     "Ti": 1024**4,
 9 |     "Pi": 1024**5,
10 |     "Ei": 1024**6,
11 |     "k": 1e3,
12 |     "M": 1e6,
13 |     "G": 1e9,
14 |     "T": 1e12,
15 |     "P": 1e15,
16 |     "E": 1e18,
17 | }
18 | 
19 | 
20 | def parse(x: str, /) -> Union[float, int]:
21 |     """Converts a string to an integer with respect of units."""
22 | 
23 |     for unit, multiplier in UNITS.items():
24 |         if x.endswith(unit):
25 |             return float(x[: -len(unit)]) * multiplier
26 | 
27 |     return float(x)
28 | 
29 | 
30 | def get_base(x: str, /) -> Literal[1024, 1000]:
31 |     """Returns the base of the unit."""
32 | 
33 |     for unit, _ in UNITS.items():
34 |         if x.endswith(unit):
35 |             return 1024 if unit in ["Ki", "Mi", "Gi", "Ti", "Pi", "Ei"] else 1000
36 |     return 1000 if "." in x else 1024
37 | 
38 | 
39 | def format(x: Union[float, int], /, *, base: Literal[1024, 1000] = 1024) -> str:
40 |     """Converts an integer to a string with respect of units."""
41 | 
42 |     if x < 1:
43 |         return f"{int(x*1000)}m"
44 |     if x < base:
45 |         return str(x)
46 | 
47 |     units = ["", "K", "M", "G", "T", "P", "E"]
48 |     binary_units = ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei"]
49 | 
50 |     x = int(x)
51 |     for i, unit in enumerate(binary_units if base == 1024 else units):
52 |         if x < base ** (i + 1) or i == len(units) - 1 or x / base ** (i + 1) < 10:
53 |             return f"{x/base**i:.0f}{unit}"
54 |     return f"{x/6**i:.0f}{unit}"
55 | 


--------------------------------------------------------------------------------
/robusta_krr/utils/service_discovery.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from abc import ABC, abstractmethod
 3 | from typing import Optional
 4 | 
 5 | from cachetools import TTLCache
 6 | from kubernetes import client
 7 | from kubernetes.client import V1IngressList, V1ServiceList
 8 | from kubernetes.client.api_client import ApiClient
 9 | from kubernetes.client.models.v1_ingress import V1Ingress
10 | from kubernetes.client.models.v1_service import V1Service
11 | 
12 | from robusta_krr.core.models.config import settings
13 | 
14 | logger = logging.getLogger("krr")
15 | 
16 | 
17 | class ServiceDiscovery:
18 |     SERVICE_CACHE_TTL_SEC = 900
19 |     cache: TTLCache = TTLCache(maxsize=1, ttl=SERVICE_CACHE_TTL_SEC)
20 | 
21 |     def __init__(self, api_client: Optional[ApiClient] = None) -> None:
22 |         self.api_client = api_client
23 | 
24 |     def find_service_url(self, label_selector: str) -> Optional[str]:
25 |         """
26 |         Get the url of an in-cluster service with a specific label
27 |         """
28 |         # we do it this way because there is a weird issue with hikaru's ServiceList.listServiceForAllNamespaces()
29 |         v1 = client.CoreV1Api(api_client=self.api_client)
30 |         svc_list: V1ServiceList = v1.list_service_for_all_namespaces(label_selector=label_selector)
31 |         if not svc_list.items:
32 |             return None
33 | 
34 |         svc: V1Service = svc_list.items[0]
35 |         name = svc.metadata.name
36 |         namespace = svc.metadata.namespace
37 |         port = svc.spec.ports[0].port
38 | 
39 |         if settings.inside_cluster:
40 |             return f"http://{name}.{namespace}.svc.cluster.local:{port}"
41 | 
42 |         elif self.api_client is not None:
43 |             return f"{self.api_client.configuration.host}/api/v1/namespaces/{namespace}/services/{name}:{port}/proxy"
44 | 
45 |         return None
46 | 
47 |     def find_ingress_host(self, label_selector: str) -> Optional[str]:
48 |         """
49 |         Discover the ingress host of the Prometheus if krr is not running in cluster
50 |         """
51 |         if settings.inside_cluster:
52 |             return None
53 | 
54 |         v1 = client.NetworkingV1Api(api_client=self.api_client)
55 |         ingress_list: V1IngressList = v1.list_ingress_for_all_namespaces(label_selector=label_selector)
56 |         if not ingress_list.items:
57 |             return None
58 | 
59 |         ingress: V1Ingress = ingress_list.items[0]
60 |         prometheus_host = ingress.spec.rules[0].host
61 |         return f"http://{prometheus_host}"
62 | 
63 |     def find_url(self, selectors: list[str]) -> Optional[str]:
64 |         """
65 |         Try to autodiscover the url of an in-cluster service
66 |         """
67 |         cache_key = ",".join(selectors + [self.api_client.configuration.host if self.api_client else ""])
68 |         cached_value = self.cache.get(cache_key)
69 |         if cached_value:
70 |             return cached_value
71 | 
72 |         for label_selector in selectors:
73 |             logger.debug(f"Trying to find service with label selector {label_selector}")
74 |             service_url = self.find_service_url(label_selector)
75 |             if service_url:
76 |                 logger.debug(f"Found service with label selector {label_selector}")
77 |                 self.cache[cache_key] = service_url
78 |                 return service_url
79 | 
80 |             logger.debug(f"Trying to find ingress with label selector {label_selector}")
81 |             self.find_ingress_host(label_selector)
82 |             ingress_url = self.find_ingress_host(label_selector)
83 |             if ingress_url:
84 |                 return ingress_url
85 | 
86 |         return None
87 | 
88 | 
89 | class MetricsServiceDiscovery(ServiceDiscovery, ABC):
90 |     @abstractmethod
91 |     def find_metrics_url(self, *, api_client: Optional[ApiClient] = None) -> Optional[str]:
92 |         pass
93 | 


--------------------------------------------------------------------------------
/robusta_krr/utils/version.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | import subprocess
 4 | import sys
 5 | from concurrent.futures import ThreadPoolExecutor
 6 | from typing import Optional
 7 | 
 8 | import requests
 9 | 
10 | import robusta_krr
11 | 
12 | 
13 | def get_version() -> str:
14 |     # the version string was patched by a release - return __version__ which will be correct
15 |     if robusta_krr.__version__ != "dev":
16 |         return robusta_krr.__version__
17 |     
18 |     # we are running from an unreleased dev version
19 |     try:
20 |         # Get the latest git tag
21 |         tag = subprocess.check_output(["git", "describe", "--tags"]).decode().strip()
22 | 
23 |         # Get the current branch name
24 |         branch = subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"]).decode().strip()
25 | 
26 |         # Check if there are uncommitted changes
27 |         status = subprocess.check_output(["git", "status", "--porcelain"]).decode().strip()
28 |         dirty = "-dirty" if status else ""
29 | 
30 |         return f"{tag}-{branch}{dirty}"
31 |     
32 |     except Exception:
33 |         return robusta_krr.__version__
34 | 
35 | 
36 | # Synchronous function to fetch the latest release version from GitHub API
37 | def fetch_latest_version() -> Optional[str]:
38 |     url = "https://api.github.com/repos/robusta-dev/krr/releases/latest"
39 |     try:
40 |         response = requests.get(url, timeout=0.5)  # 0.5 seconds timeout
41 |         response.raise_for_status()  # Raises an error for bad responses
42 |         data = response.json()
43 |         return data.get("tag_name")  # Returns the tag name of the latest release
44 |     except Exception:
45 |         return None
46 | 
47 | 
48 | async def load_latest_version() -> Optional[str]:
49 |     loop = asyncio.get_running_loop()
50 |     # Run the synchronous function in a separate thread
51 |     with ThreadPoolExecutor() as pool:
52 |         return await loop.run_in_executor(pool, fetch_latest_version)
53 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from datetime import datetime, timedelta
  3 | from unittest.mock import AsyncMock, patch
  4 | 
  5 | import numpy as np
  6 | import pytest
  7 | 
  8 | from robusta_krr.api.models import K8sObjectData, PodData, ResourceAllocations
  9 | from robusta_krr.strategies.simple import SimpleStrategy, SimpleStrategySettings
 10 | 
 11 | TEST_OBJECT = K8sObjectData(
 12 |     cluster="mock-cluster",
 13 |     name="mock-object-1",
 14 |     container="mock-container-1",
 15 |     pods=[
 16 |         PodData(name="mock-pod-1", deleted=False),
 17 |         PodData(name="mock-pod-2", deleted=False),
 18 |         PodData(name="mock-pod-3", deleted=True),
 19 |     ],
 20 |     namespace="default",
 21 |     kind="Deployment",
 22 |     allocations=ResourceAllocations(
 23 |         requests={"cpu": 1, "memory": 1},  # type: ignore
 24 |         limits={"cpu": 2, "memory": 2},  # type: ignore
 25 |     ),
 26 | )
 27 | 
 28 | 
 29 | @pytest.fixture(autouse=True, scope="session")
 30 | def mock_list_clusters():
 31 |     with patch(
 32 |         "robusta_krr.core.integrations.kubernetes.KubernetesLoader.list_clusters",
 33 |         new=AsyncMock(return_value=[TEST_OBJECT.cluster]),
 34 |     ):
 35 |         yield
 36 | 
 37 | 
 38 | @pytest.fixture(autouse=True, scope="session")
 39 | def mock_list_scannable_objects():
 40 |     with patch(
 41 |         "robusta_krr.core.integrations.kubernetes.KubernetesLoader.list_scannable_objects",
 42 |         new=AsyncMock(return_value=[TEST_OBJECT]),
 43 |     ):
 44 |         yield
 45 | 
 46 | 
 47 | @pytest.fixture(autouse=True, scope="session")
 48 | def mock_load_kubeconfig():
 49 |     with patch("robusta_krr.core.models.config.Config.load_kubeconfig", return_value=None):
 50 |         yield
 51 | 
 52 | 
 53 | @pytest.fixture(autouse=True, scope="session")
 54 | def mock_prometheus_loader():
 55 |     now = datetime.now()
 56 |     start = now - timedelta(hours=1)
 57 |     now_ts, start_ts = now.timestamp(), start.timestamp()
 58 |     metric_points_data = np.array([(t, random.randrange(0, 100)) for t in np.linspace(start_ts, now_ts, 3600)])
 59 | 
 60 |     settings = SimpleStrategySettings()
 61 |     strategy = SimpleStrategy(settings)
 62 | 
 63 |     with patch(
 64 |         "robusta_krr.core.integrations.prometheus.loader.PrometheusMetricsLoader.gather_data",
 65 |         new=AsyncMock(
 66 |             return_value={
 67 |                 metric.__name__: {pod.name: metric_points_data for pod in TEST_OBJECT.pods}
 68 |                 for metric in strategy.metrics
 69 |             },
 70 |         ),
 71 |     ) as mock_prometheus_loader:
 72 |         mock_prometheus_loader
 73 |         yield
 74 | 
 75 | 
 76 | @pytest.fixture(autouse=True, scope="session")
 77 | def mock_prometheus_load_pods():
 78 |     with patch(
 79 |         "robusta_krr.core.integrations.prometheus.loader.PrometheusMetricsLoader.load_pods",
 80 |         new=AsyncMock(
 81 |             return_value=TEST_OBJECT.pods,
 82 |         ),
 83 |     ) as mock_prometheus_loader:
 84 |         mock_prometheus_loader
 85 |         yield
 86 | 
 87 | 
 88 | @pytest.fixture(autouse=True, scope="session")
 89 | def mock_prometheus_get_history_range():
 90 |     async def get_history_range(self, history_duration: timedelta) -> tuple[datetime, datetime]:
 91 |         now = datetime.now()
 92 |         start = now - history_duration
 93 |         return start, now
 94 | 
 95 |     with patch(
 96 |         "robusta_krr.core.integrations.prometheus.loader.PrometheusMetricsLoader.get_history_range", get_history_range
 97 |     ):
 98 |         yield
 99 | 
100 | 
101 | @pytest.fixture(autouse=True, scope="session")
102 | def mock_prometheus_init():
103 |     with patch("robusta_krr.core.integrations.prometheus.loader.PrometheusMetricsLoader.__init__", return_value=None):
104 |         yield
105 | 


--------------------------------------------------------------------------------
/tests/models/test_resource_allocations.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | import pytest
 4 | 
 5 | from robusta_krr.core.models.allocations import ResourceAllocations, ResourceType
 6 | 
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     "cpu",
10 |     [
11 |         {"request": "5m", "limit": None},
12 |         {"request": 0.005, "limit": None},
13 |     ],
14 | )
15 | @pytest.mark.parametrize(
16 |     "memory",
17 |     [
18 |         {"request": 128974848, "limit": 128974848},
19 |         {"request": 128.974848e6, "limit": 128.974848e6},
20 |         {"request": "128.9748480M", "limit": "128.9748480M"},
21 |         {"request": "128974848000m", "limit": "128974848000m"},
22 |         {"request": "123Mi", "limit": "123Mi"},
23 |         {"request": "128974848e0", "limit": "128974848e0"},
24 |     ],
25 | )
26 | def test_resource_allocation_supported_formats(
27 |     cpu: dict[str, Union[str, int, float, None]], memory: dict[str, Union[str, int, float, None]]
28 | ):
29 |     allocations = ResourceAllocations(
30 |         requests={ResourceType.CPU: cpu["request"], ResourceType.Memory: memory["request"]},
31 |         limits={ResourceType.CPU: cpu["limit"], ResourceType.Memory: memory["limit"]},
32 |     )
33 |     assert allocations.requests[ResourceType.CPU] == 0.005
34 |     assert allocations.limits[ResourceType.CPU] == None
35 |     assert (allocations.requests[ResourceType.Memory] // 1) == 128974848.0
36 |     assert (allocations.limits[ResourceType.Memory] // 1) == 128974848.0
37 | 


--------------------------------------------------------------------------------
/tests/single_namespace_as_group.yaml:
--------------------------------------------------------------------------------
 1 | # Test environment for per-namespace scans using a group object ID (for e.g. Microsoft Entra)
 2 | # The purpose of this setup is to verify that per-namespace features work without cluster level permissions
 3 | # You can test this Group and KRR using:
 4 | # A user named aksdev that's part of the appdev group.
 5 | # krr simple --as aksdev --as-group <appdev-groupID> -n kube-system
 6 | apiVersion: rbac.authorization.k8s.io/v1
 7 | kind: Role
 8 | metadata:
 9 |   namespace: kube-system
10 |   name: krr-role
11 | rules:
12 | - apiGroups: [""]
13 |   resources: ["pods", "services"]
14 |   verbs: ["get", "watch", "list"]
15 | - apiGroups: ["batch"]
16 |   resources: ["jobs"]
17 |   verbs: ["get", "watch", "list"]
18 | - apiGroups: ["apps"]
19 |   resources: ["deployments", "replicasets", "daemonsets", "statefulsets"]
20 |   verbs: ["get", "list", "watch"]
21 | - apiGroups: ["autoscaling"]
22 |   resources: ["horizontalpodautoscalers"]
23 |   verbs: ["get", "list", "watch"]
24 | ---
25 | apiVersion: rbac.authorization.k8s.io/v1
26 | kind: RoleBinding
27 | metadata:
28 |   name: krr-role-binding
29 |   namespace: kube-system
30 | subjects:
31 | - kind: Group
32 |   # Replace <appdev-groupID> with the actual Group Object ID
33 |   name: <appdev-groupID>
34 |   apiGroup: rbac.authorization.k8s.io
35 | roleRef:
36 |   kind: Role
37 |   name: krr-role
38 |   apiGroup: rbac.authorization.k8s.io
39 | 


--------------------------------------------------------------------------------
/tests/single_namespace_permissions.yaml:
--------------------------------------------------------------------------------
 1 | # Test environment for per-namespace scans
 2 | # The purpose of this setup is to verify that per-namespace features work without cluster level permissions
 3 | # You can test this ServiceAccount and KRR using:
 4 | #   krr simple --as system:serviceaccount:kube-system:krr-account -n kube-system
 5 | apiVersion: v1
 6 | kind: ServiceAccount
 7 | metadata:
 8 |   name: krr-account
 9 |   namespace: kube-system
10 | ---
11 | apiVersion: rbac.authorization.k8s.io/v1
12 | kind: Role
13 | metadata:
14 |   namespace: kube-system
15 |   name: krr-role
16 | rules:
17 | - apiGroups: [""]
18 |   resources: ["pods", "services"]
19 |   verbs: ["get", "watch", "list"]
20 | - apiGroups: ["batch"]
21 |   resources: ["jobs"]
22 |   verbs: ["get", "watch", "list"]
23 | - apiGroups: ["apps"]
24 |   resources: ["deployments", "replicasets", "daemonsets", "statefulsets"]
25 |   verbs: ["get", "list", "watch"]
26 | - apiGroups: ["autoscaling"]
27 |   resources: ["horizontalpodautoscalers"]
28 |   verbs: ["get", "list", "watch"]
29 | ---
30 | apiVersion: rbac.authorization.k8s.io/v1
31 | kind: RoleBinding
32 | metadata:
33 |   name: krr-role-binding
34 |   namespace: kube-system
35 | subjects:
36 | - kind: ServiceAccount
37 |   name: krr-account
38 |   namespace: kube-system
39 | roleRef:
40 |   kind: Role
41 |   name: krr-role
42 |   apiGroup: rbac.authorization.k8s.io
43 | 


--------------------------------------------------------------------------------
/tests/test_krr.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from typing import Literal, Union
 3 | from unittest.mock import patch, Mock, MagicMock
 4 | from typer.testing import CliRunner
 5 | 
 6 | from robusta_krr.main import app, load_commands
 7 | from robusta_krr.core.integrations.kubernetes import ClusterLoader
 8 | from robusta_krr.core.models.config import settings
 9 | 
10 | runner = CliRunner(mix_stderr=False)
11 | load_commands()
12 | 
13 | STRATEGY_NAME = "simple"
14 | 
15 | 
16 | def test_help():
17 |     result = runner.invoke(app, [STRATEGY_NAME, "--help"])
18 |     try:
19 |         assert result.exit_code == 0
20 |     except AssertionError as e:
21 |         raise e from result.exception
22 | 
23 | 
24 | @pytest.mark.parametrize("log_flag", ["-v", "-q"])
25 | def test_run(log_flag: str):
26 |     result = runner.invoke(app, [STRATEGY_NAME, log_flag, "--namespace", "default"])
27 |     try:
28 |         assert result.exit_code == 0, result.stdout
29 |     except AssertionError as e:
30 |         raise e from result.exception
31 | 
32 | 
33 | @pytest.mark.parametrize("format", ["json", "yaml", "table", "pprint", "csv"])
34 | @pytest.mark.parametrize("output", ["--logtostderr", "-q"])
35 | def test_output_formats(format: str, output: str):
36 |     result = runner.invoke(app, [STRATEGY_NAME, output, "-f", format])
37 |     try:
38 |         assert result.exit_code == 0, result.exc_info
39 |     except AssertionError as e:
40 |         raise e from result.exception
41 | 
42 | @pytest.mark.parametrize(
43 |         "setting_namespaces,cluster_all_ns,expected",[
44 |             (
45 |                 # default settings
46 |                 "*",
47 |                 ["kube-system", "robusta-frontend", "robusta-backend", "infra-grafana"],
48 |                 "*"
49 |             ),
50 |             (
51 |                 # list of namespace provided from arguments without regex pattern
52 |                 ["robusta-krr", "kube-system"],
53 |                 ["kube-system", "robusta-frontend", "robusta-backend", "robusta-krr"],
54 |                 ["robusta-krr", "kube-system"]
55 |             ),
56 |             (
57 |                 # list of namespace provided from arguments with regex pattern and will not duplicating in final result
58 |                 ["robusta-.*", "robusta-frontend"],
59 |                 ["kube-system", "robusta-frontend", "robusta-backend", "robusta-krr"],
60 |                 ["robusta-frontend", "robusta-backend", "robusta-krr"]
61 |             ),
62 |             (
63 |                 # namespace provided with regex pattern and will match for some namespaces
64 |                 [".*end$"],
65 |                 ["kube-system", "robusta-frontend", "robusta-backend", "robusta-krr"],
66 |                 ["robusta-frontend", "robusta-backend"]
67 |             )
68 |         ]
69 |     )
70 | def test_cluster_namespace_list(
71 |         setting_namespaces: Union[Literal["*"], list[str]],
72 |         cluster_all_ns: list[str],
73 |         expected: Union[Literal["*"], list[str]],
74 |     ):
75 |     cluster = ClusterLoader()
76 |     with patch("robusta_krr.core.models.config.settings.namespaces", setting_namespaces):
77 |         with patch.object(cluster.core, "list_namespace", return_value=MagicMock(
78 |             items=[MagicMock(**{"metadata.name": m}) for m in cluster_all_ns])):
79 |             assert sorted(cluster.namespaces) == sorted(expected)
80 | 


--------------------------------------------------------------------------------
/tests/test_runner.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from click.testing import Result
 3 | from typer.testing import CliRunner
 4 | 
 5 | from robusta_krr.main import app, load_commands
 6 | 
 7 | runner = CliRunner(mix_stderr=False)
 8 | load_commands()
 9 | 
10 | 
11 | @pytest.mark.parametrize(
12 |     "args, expected_exit_code",
13 |     [
14 |         (["--exclude-severity", "-f", "csv"], 0),
15 |         (["--exclude-severity", "-f", "table"], 2),
16 |         (["--exclude-severity"], 2),
17 |     ],
18 | )
19 | def test_exclude_severity_option(args: list[str], expected_exit_code: int) -> None:
20 |     result: Result = runner.invoke(app, ["simple", *args])
21 |     assert result.exit_code == expected_exit_code
22 | 


--------------------------------------------------------------------------------