├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.yaml
│ ├── document.yaml
│ └── feature_request.yaml
├── dependabot.yml
├── pull_request_template.md
└── workflows
│ ├── build-push.yml
│ ├── collect-data-self-hosted.yml
│ ├── collect-train.yml
│ ├── commit-msg.yml
│ ├── integration-test.yml
│ ├── lint.yml
│ ├── pr.yml
│ ├── push-to-main.yml
│ ├── release.yml
│ ├── tekton-test.yml
│ ├── train-model.yml
│ ├── train.yml
│ └── unit-test.yml
├── .gitignore
├── .vscode
└── settings.json
├── .yamllint.yaml
├── LICENSE
├── Makefile
├── README.md
├── VERSION
├── cmd
└── main.py
├── contributing.md
├── dockerfiles
├── Dockerfile
├── Dockerfile.base
├── Dockerfile.dockerignore
├── Dockerfile.test
├── Dockerfile.test-nobase
├── Dockerfile.test-nobase.dockerignore
└── Dockerfile.test.dockerignore
├── docs
└── developer
│ ├── README.md
│ ├── estimate
│ ├── classes.plantuml
│ ├── classes.svg
│ ├── packages.plantuml
│ └── packages.svg
│ ├── server
│ ├── classes.plantuml
│ ├── classes.svg
│ ├── packages.plantuml
│ └── packages.svg
│ └── train
│ ├── classes.plantuml
│ ├── classes.svg
│ ├── packages.plantuml
│ ├── packages.svg
│ └── trainer
│ ├── classes.plantuml
│ ├── classes.svg
│ ├── packages.plantuml
│ └── packages.svg
├── fig
├── comm_diagram.png
├── model-server-components-simplified.png
├── tekton-complete-train.png
├── tekton-kepler-default.png
└── tekton-single-train.png
├── hack
├── aws_helper.sh
├── k8s_helper.sh
└── utils.bash
├── manifests
├── base
│ ├── estimate-only
│ │ └── kustomization.yaml
│ ├── estimate-with-server
│ │ └── kustomization.yaml
│ ├── kustomization.yaml
│ ├── openshift
│ │ ├── estimate-only
│ │ │ └── kustomization.yaml
│ │ ├── estimate-with-server
│ │ │ └── kustomization.yaml
│ │ ├── scc.yaml
│ │ └── serve-only
│ │ │ └── kustomization.yaml
│ ├── patch
│ │ ├── patch-estimator-sidecar.yaml
│ │ ├── patch-model-server.yaml
│ │ ├── patch-openshift.yaml
│ │ └── patch-server-only.yaml
│ └── serve-only
│ │ └── kustomization.yaml
├── compose
│ ├── dev
│ │ ├── compose.yaml
│ │ ├── grafana
│ │ │ └── dashboards
│ │ │ │ └── dev
│ │ │ │ └── dashboard.json
│ │ ├── kepler
│ │ │ ├── common
│ │ │ │ └── var
│ │ │ │ │ └── lib
│ │ │ │ │ └── kepler
│ │ │ │ │ └── data
│ │ │ │ │ ├── cpus.yaml
│ │ │ │ │ └── model_weight
│ │ │ │ │ ├── acpi_AbsPowerModel.json
│ │ │ │ │ ├── acpi_DynPowerModel.json
│ │ │ │ │ ├── intel_rapl_AbsPowerModel.json
│ │ │ │ │ └── intel_rapl_DynPowerModel.json
│ │ │ ├── metal
│ │ │ │ └── etc
│ │ │ │ │ └── kepler
│ │ │ │ │ └── kepler.config
│ │ │ │ │ ├── ENABLE_PROCESS_METRICS
│ │ │ │ │ ├── EXPOSE_ESTIMATED_IDLE_POWER_METRICS
│ │ │ │ │ └── EXPOSE_VM_METRICS
│ │ │ └── models
│ │ │ │ └── etc
│ │ │ │ └── kepler
│ │ │ │ └── kepler.config
│ │ │ │ ├── ENABLE_PROCESS_METRICS
│ │ │ │ ├── EXPOSE_ESTIMATED_IDLE_POWER_METRICS
│ │ │ │ ├── MODEL_CONFIG
│ │ │ │ ├── MODEL_SERVER_ENABLE
│ │ │ │ └── MODEL_SERVER_URL
│ │ ├── overrides.yaml
│ │ └── prometheus
│ │ │ └── scrape-configs
│ │ │ └── dev.yaml
│ └── monitoring
│ │ ├── compose.yaml
│ │ ├── grafana
│ │ ├── Dockerfile
│ │ ├── dashboards.yml
│ │ └── datasource.yml
│ │ └── prometheus
│ │ ├── Dockerfile
│ │ ├── prometheus.yml
│ │ └── rules
│ │ └── kepler.rule
├── kepler
│ ├── kustomization.yaml
│ └── patch
│ │ └── patch-ci.yaml
├── offline-trainer
│ ├── kustomization.yaml
│ └── offline-trainer.yaml
├── server
│ ├── base
│ │ └── kustomization.yaml
│ ├── kustomization.yaml
│ ├── kustomizeconfig.yaml
│ ├── online-train
│ │ ├── kustomization.yaml
│ │ └── patch-trainer.yaml
│ ├── openshift
│ │ ├── online-train
│ │ │ ├── kustomization.yaml
│ │ │ └── patch-trainer.yaml
│ │ ├── patch-openshift.yaml
│ │ └── serve-only
│ │ │ └── kustomization.yaml
│ └── server.yaml
├── set.sh
└── test
│ ├── file-server.yaml
│ ├── model-request-client.yaml
│ ├── patch-estimator-sidecar.yaml
│ └── power-request-client.yaml
├── model_training
├── README.md
├── cmd_instruction.md
├── deployment
│ ├── kepler.yaml
│ ├── prom-kepler-rbac.yaml
│ └── prom-np.yaml
├── s3
│ ├── Dockerfile
│ ├── LICENSE.txt
│ ├── README.md
│ ├── pyproject.toml
│ ├── src
│ │ └── s3
│ │ │ ├── __about__.py
│ │ │ ├── __init__.py
│ │ │ ├── loader.py
│ │ │ ├── pusher.py
│ │ │ └── util.py
│ └── tests
│ │ └── __init__.py
├── script.sh
└── tekton
│ ├── README.md
│ ├── examples
│ ├── complete-pipelinerun.yaml
│ ├── single-train
│ │ ├── abs-power.yaml
│ │ ├── aws-push.yaml
│ │ ├── default.yaml
│ │ ├── dyn-power.yaml
│ │ └── ibmcloud-push.yaml
│ ├── test-collect.yaml
│ └── test-retrain.yaml
│ ├── pipelines
│ ├── collect.yaml
│ ├── complete-retrain.yaml
│ ├── complete-train.yaml
│ ├── single-retrain.yaml
│ └── single-train.yaml
│ ├── pvc
│ └── hostpath.yaml
│ └── tasks
│ ├── extract-task.yaml
│ ├── isolate-task.yaml
│ ├── original-pipeline-task.yaml
│ ├── s3
│ ├── aws-s3-load.yaml
│ ├── aws-s3-push.yaml
│ ├── ibmcloud-s3-load.yaml
│ └── ibmcloud-s3-push.yaml
│ ├── stressng-task.yaml
│ └── train-task.yaml
├── pyproject.toml
├── src
└── kepler_model
│ ├── __about__.py
│ ├── __init__.py
│ ├── abs-train-pipelinerun.yaml
│ ├── cmd
│ ├── README.md
│ ├── __init__.py
│ ├── cmd_plot.py
│ ├── cmd_util.py
│ └── main.py
│ ├── estimate
│ ├── __init__.py
│ ├── archived_model.py
│ ├── estimator.py
│ ├── model
│ │ ├── __init__.py
│ │ ├── curvefit_model.py
│ │ ├── estimate_common.py
│ │ ├── keras_model.py
│ │ ├── model.py
│ │ ├── scikit_model.py
│ │ └── xgboost_model.py
│ └── model_server_connector.py
│ ├── server
│ └── model_server.py
│ ├── train
│ ├── __init__.py
│ ├── ec2_pipeline.py
│ ├── exporter
│ │ ├── __init__.py
│ │ ├── exporter.py
│ │ ├── validator.py
│ │ └── writer.py
│ ├── extractor
│ │ ├── __init__.py
│ │ ├── extractor.py
│ │ ├── preprocess.py
│ │ └── smooth_extractor.py
│ ├── isolator
│ │ ├── __init__.py
│ │ ├── isolator.py
│ │ └── train_isolator.py
│ ├── offline_trainer.py
│ ├── online_trainer.py
│ ├── pipeline.py
│ ├── profiler
│ │ ├── __init__.py
│ │ ├── generate_scaler.py
│ │ ├── node_type_index.py
│ │ └── profiler.py
│ ├── prom
│ │ ├── __init__.py
│ │ └── prom_query.py
│ ├── specpower_pipeline.py
│ └── trainer
│ │ ├── ExponentialRegressionTrainer
│ │ ├── __init__.py
│ │ └── main.py
│ │ ├── GradientBoostingRegressorTrainer
│ │ ├── __init__.py
│ │ └── main.py
│ │ ├── KNeighborsRegressorTrainer
│ │ ├── __init__.py
│ │ └── main.py
│ │ ├── LinearRegressionTrainer
│ │ ├── __init__.py
│ │ └── main.py
│ │ ├── LogarithmicRegressionTrainer
│ │ ├── __init__.py
│ │ └── main.py
│ │ ├── LogisticRegressionTrainer
│ │ ├── __init__.py
│ │ └── main.py
│ │ ├── PolynomialRegressionTrainer
│ │ ├── __init__.py
│ │ └── main.py
│ │ ├── SGDRegressorTrainer
│ │ ├── __init__.py
│ │ └── main.py
│ │ ├── SVRRegressorTrainer
│ │ ├── __init__.py
│ │ └── main.py
│ │ ├── XGBoostTrainer
│ │ ├── __init__.py
│ │ └── main.py
│ │ ├── XgboostFitTrainer
│ │ ├── __init__.py
│ │ └── main.py
│ │ ├── __init__.py
│ │ ├── curvefit.py
│ │ ├── scikit.py
│ │ └── xgboost_interface.py
│ └── util
│ ├── __init__.py
│ ├── config.py
│ ├── extract_types.py
│ ├── format.py
│ ├── loader.py
│ ├── prom_types.py
│ ├── saver.py
│ ├── similarity.py
│ └── train_types.py
└── tests
├── README.md
├── __init__.py
├── client_load_tester.py
├── common_plot.py
├── data
├── machine
│ └── spec.json
├── node_type_index.json
└── prom_output
│ ├── idle.json
│ └── prom_response.json
├── e2e_test.sh
├── estimator_model_request_test.py
├── estimator_model_test.py
├── estimator_power_request_test.py
├── extractor_test.py
├── http_server.py
├── isolator_test.py
├── minimal_trainer.py
├── model_select_test.py
├── model_server_test.py
├── model_tester.py
├── offline_trainer_test.py
├── pipeline_test.py
├── prom_test.py
├── trainer_test.py
├── weight_model_request_test.py
└── xgboost_test.py
/.github/ISSUE_TEMPLATE/document.yaml:
--------------------------------------------------------------------------------
1 | name: Documentation Issue
2 | description: Provide supporting details for documentation issue
3 | labels: kind/documentation
4 | body:
5 | - type: textarea
6 | id: document
7 | attributes:
8 | label: Which document would you like to address?
9 | description: Include the link to the document if applicable
10 | validations:
11 | required: true
12 |
13 | - type: textarea
14 | id: documentFixDetail
15 | attributes:
16 | label: What is the issue?
17 | validations:
18 | required: true
19 |
20 | - type: textarea
21 | id: documentFixDetail
22 | attributes:
23 | label: How do you suggest this is fixed?
24 | validations:
25 | required: false
26 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yaml:
--------------------------------------------------------------------------------
1 | name: Enhancement Tracking Issue
2 | description: Provide supporting details for a feature in development
3 | labels: kind/feature
4 | body:
5 | - type: textarea
6 | id: feature
7 | attributes:
8 | label: What would you like to be added?
9 | description: |
10 | Feature requests are unlikely to make progress as issues. Please consider engaging with SIGs on slack and mailing lists, instead.
11 | A proposal that works through the design along with the implications of the change can be opened as a KEP.
12 | See https://git.k8s.io/enhancements/keps#kubernetes-enhancement-proposals-keps
13 | validations:
14 | required: true
15 |
16 | - type: textarea
17 | id: rationale
18 | attributes:
19 | label: Why is this needed?
20 | validations:
21 | required: true
22 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: pip
4 | directory: /
5 | schedule:
6 | day: monday
7 | interval: weekly
8 | groups:
9 | github-actions:
10 | patterns:
11 | - "*"
12 | - package-ecosystem: github-actions
13 | directory: /
14 | schedule:
15 | day: monday
16 | interval: weekly
17 | groups:
18 | github-actions:
19 | patterns:
20 | - "*"
21 | - package-ecosystem: docker
22 | directory: /
23 | schedule:
24 | day: monday
25 | interval: weekly
26 | groups:
27 | github-actions:
28 | patterns:
29 | - "*"
30 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | # Checklist for PR Author
2 |
3 | ---
4 |
5 | In addition to approval, the author must confirm the following check list:
6 |
7 | - [ ] Run the following command to format your code:
8 |
9 | ```bash
10 | make fmt
11 | ```
12 |
13 | - [ ] Create issues for unresolved comments and link them to this PR. Use one of the following labels:
14 | - `must-fix`: The logic appears incorrect and must be addressed.
15 | - `minor`: Typos, minor issues, or potential refactoring for better readability.
16 | - `nit`: Trivial issues like extra spaces, commas, etc.
17 |
--------------------------------------------------------------------------------
/.github/workflows/collect-train.yml:
--------------------------------------------------------------------------------
1 | # manually run on collect needed
2 | on: # yamllint disable-line rule:truthy
3 | workflow_dispatch:
4 |
5 | jobs:
6 | collect-data:
7 | uses: ./.github/workflows/collect-data-self-hosted.yml
8 | strategy:
9 | matrix:
10 | instance_type: [i3.metal]
11 | max-parallel: 1
12 | with:
13 | instance_type: ${{ matrix.instance_type }}
14 | ami_id: ami-0e4d0bb9670ea8db0
15 | github_repo: ${{ github.repository }}
16 | model_server_image: ${{ vars.IMAGE_REPO }}/kepler_model_server:latest
17 | secrets:
18 | self_hosted_github_token: ${{ secrets.GH_SELF_HOSTED_RUNNER_TOKEN }}
19 | aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
20 | aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
21 | security_group_id: ${{ secrets.AWS_SECURITY_GROUP_ID }}
22 | aws_region: ${{ secrets.AWS_REGION }}
23 |
24 | train-model:
25 | needs: [collect-data]
26 | strategy:
27 | matrix:
28 | instance_type: [i3.metal]
29 | uses: ./.github/workflows/train-model.yml
30 | with:
31 | pipeline_name: std_v0.7.11
32 | instance_type: ${{ matrix.instance_type }}
33 | ami_id: ami-0e4d0bb9670ea8db0
34 | github_repo: ${{ github.repository }}
35 | model_server_image: ${{ vars.IMAGE_REPO }}/kepler_model_server:latest
36 | trainers: LogisticRegressionTrainer,ExponentialRegressionTrainer,SGDRegressorTrainer,GradientBoostingRegressorTrainer,XgboostFitTrainer
37 | secrets:
38 | self_hosted_github_token: ${{ secrets.GH_SELF_HOSTED_RUNNER_TOKEN }}
39 | aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
40 | aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
41 | aws_region: ${{ secrets.AWS_REGION }}
42 |
--------------------------------------------------------------------------------
/.github/workflows/commit-msg.yml:
--------------------------------------------------------------------------------
1 | name: Commit message check
2 |
3 | on: # yamllint disable-line rule:truthy
4 | pull_request:
5 |
6 | jobs:
7 | check-commit-message:
8 | runs-on: ubuntu-latest
9 | steps:
10 | - name: Checkout code
11 | uses: actions/checkout@v4
12 |
13 | - name: Check commit message
14 | uses: webiny/action-conventional-commits@v1.3.0
15 | with:
16 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
17 |
--------------------------------------------------------------------------------
/.github/workflows/integration-test.yml:
--------------------------------------------------------------------------------
1 | name: Integration Test
2 | on: # yamllint disable-line rule:truthy
3 | workflow_call:
4 | inputs:
5 | base_change:
6 | description: Change flag on base image
7 | required: true
8 | type: string
9 | docker_secret:
10 | description: Secret check
11 | required: true
12 | type: string
13 | image_repo:
14 | description: The image repo to use
15 | required: true
16 | type: string
17 | image_tag:
18 | description: The image tag to use
19 | required: true
20 | type: string
21 | kepler_tag:
22 | description: Kepler image tag
23 | required: true
24 | type: string
25 | additional_opts:
26 | description: additional deployment opts
27 | required: true
28 | type: string
29 |
30 | env:
31 | BASE_IMAGE: ${{ inputs.image_repo }}/kepler_model_server_base:${{ inputs.image_tag }}
32 | IMAGE: localhost:5001/kepler_model_server:devel
33 | KEPLER_IMAGE: quay.io/sustainable_computing_io/kepler:${{ inputs.kepler_tag }}
34 | DEFAULT_MODEL_SERVER_BASE_IMAGE: quay.io/sustainable_computing_io/kepler_model_server_base:latest
35 |
36 | jobs:
37 | run-integration:
38 | runs-on: ubuntu-20.04
39 | steps:
40 | - name: use Kepler action to deploy cluster
41 | uses: sustainable-computing-io/kepler-action@v0.0.9
42 | with:
43 | runningBranch: kind
44 | cluster_provider: kind
45 | - name: load kepler image
46 | run: |
47 | docker pull ${{ env.KEPLER_IMAGE }}
48 | kind load docker-image ${{ env.KEPLER_IMAGE }}
49 | - name: checkout
50 | uses: actions/checkout@v4
51 | - name: set up QEMU
52 | uses: docker/setup-qemu-action@v3
53 | - name: set up Docker Buildx
54 | uses: docker/setup-buildx-action@v3
55 | - name: Replace value in Dockerfile if base changes
56 | if: ${{ (inputs.base_change == 'true') && (inputs.docker_secret == 'true') }}
57 | run: |
58 | sed -i "s|${{ env.DEFAULT_MODEL_SERVER_BASE_IMAGE }}|${{ env.BASE_IMAGE }}|" dockerfiles/Dockerfile
59 | - name: Replace value in Dockerfile.test if base changes
60 | if: ${{ (inputs.base_change == 'true') && (inputs.docker_secret == 'true') }}
61 | run: |
62 | sed -i "s|${{ env.DEFAULT_MODEL_SERVER_BASE_IMAGE }}|${{ env.BASE_IMAGE }}|" dockerfiles/Dockerfile.test
63 | - name: build Kepler model server and test image and push to local registry
64 | run: make build build-test push push-test
65 | - name: set up Kustomize
66 | run: |
67 | curl -o install_kustomize.sh https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh
68 | chmod +x install_kustomize.sh
69 | ./install_kustomize.sh 5.3.0
70 | chmod +x kustomize
71 | mv kustomize /usr/local/bin/
72 | - name: test deploying with only estimator
73 | run: |
74 | make deploy
75 | ./tests/e2e_test.sh --estimator ${{ inputs.additional_opts }}
76 | make cleanup
77 | env:
78 | OPTS: ESTIMATOR
79 | KEPLER_IMAGE_VERSION: ${{ inputs.kepler_tag }}
80 | - name: test deploying with only server
81 | run: |
82 | make deploy
83 | ./tests/e2e_test.sh --server ${{ inputs.additional_opts }}
84 | make cleanup
85 | env:
86 | OPTS: SERVER
87 | KEPLER_IMAGE_VERSION: ${{ inputs.kepler_tag }}
88 | - name: test deploying with estimator and model server
89 | run: |
90 | make deploy
91 | ./tests/e2e_test.sh --estimator --server ${{ inputs.additional_opts }}
92 | make cleanup
93 | env:
94 | OPTS: ESTIMATOR SERVER
95 | KEPLER_IMAGE_VERSION: ${{ inputs.kepler_tag }}
96 |
97 | - name: upload artifacts on failure
98 | if: ${{ failure() }}
99 | uses: actions/upload-artifact@v4
100 | with:
101 | name: integration-test-artifacts
102 | path: tmp/e2e
103 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: Run linters and formatters
2 |
3 | on: # yamllint disable-line rule:truthy
4 | pull_request:
5 |
6 | jobs:
7 | markdown-lint:
8 | runs-on: ubuntu-latest
9 | steps:
10 | # checkout soruce code
11 | - name: Checkout code
12 | uses: actions/checkout@v4
13 |
14 | # setup Python environment
15 | - name: Set up Python
16 | uses: actions/setup-python@v5
17 | with:
18 | python-version: "3.10"
19 |
20 | # install hatch
21 | - name: Install hatch
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install hatch
25 |
26 | # scan for markdown linting errors
27 | - name: Run pymarkdownlnt on markdown files
28 | shell: bash
29 | run: |
30 | make lint
31 |
32 | # run hatch fmt
33 | - name: Run formatter using hatch
34 | shell: bash
35 | run: |
36 | make fmt
37 | git diff --exit-code
38 |
--------------------------------------------------------------------------------
/.github/workflows/push-to-main.yml:
--------------------------------------------------------------------------------
1 | on: # yamllint disable-line rule:truthy
2 | push:
3 | branches:
4 | - main
5 |
6 | env:
7 | TAG: latest
8 |
9 | jobs:
10 | check-branch:
11 | runs-on: ubuntu-latest
12 |
13 | outputs:
14 | tag: ${{ steps.image-tag.outputs.tag }}
15 |
16 | steps:
17 | - uses: actions/checkout@v4
18 | - name: Find Image Tag
19 | id: image-tag
20 | env:
21 | BRANCH: ${{ github.ref_name }}
22 | COMMIT: ${{ github.sha }}
23 | run: |
24 | if [ "${{ github.event_name }}" == 'pull_request' ]; then
25 | echo "tag=pr-${{ github.event.number }}" >> "$GITHUB_OUTPUT"
26 | else
27 | if [ "$BRANCH" == "main" ]; then
28 | echo "tag=${{ env.TAG }}" >> "$GITHUB_OUTPUT"
29 | else
30 | echo "tag=$COMMIT" >> "$GITHUB_OUTPUT"
31 | fi
32 | fi
33 |
34 | check-change:
35 | runs-on: ubuntu-latest
36 |
37 | outputs:
38 | base: ${{ steps.filter.outputs.base }}
39 | modeling: ${{ steps.filter.outputs.modeling }}
40 | s3: ${{ steps.filter.outputs.s3 }}
41 |
42 | steps:
43 | - uses: actions/checkout@v4
44 | - uses: dorny/paths-filter@v3
45 | id: filter
46 | with:
47 | filters: |
48 | base:
49 | - 'pyproject.toml'
50 | - 'dockerfiles/Dockerfile.base'
51 | - '.github/workflows/build-push.yml'
52 | modeling:
53 | - 'src/**'
54 | - 'model_training/**'
55 | - 'hack/**'
56 | - '.github/workflows/train-model.yml'
57 | s3:
58 | - 'model_training/s3/**'
59 |
60 | build-push:
61 | needs: [check-change, check-branch]
62 | uses: ./.github/workflows/build-push.yml
63 | with:
64 | base_change: ${{ needs.check-change.outputs.base }}
65 | s3_change: ${{ needs.check-change.outputs.s3 }}
66 | image_repo: ${{ vars.IMAGE_REPO }}
67 | image_tag: ${{ needs.check-branch.outputs.tag }}
68 | push: true
69 | secrets:
70 | docker_username: ${{ secrets.BOT_NAME }}
71 | docker_password: ${{ secrets.BOT_TOKEN }}
72 |
73 | train-model:
74 | needs: [check-change, check-branch, build-push]
75 | if: ${{ needs.check-change.outputs.modeling == 'true' }}
76 | strategy:
77 | matrix:
78 | instance_type: [i3.metal]
79 | uses: ./.github/workflows/train-model.yml
80 | with:
81 | pipeline_name: std_v0.7.11
82 | instance_type: ${{ matrix.instance_type }}
83 | ami_id: ami-0e4d0bb9670ea8db0
84 | github_repo: ${{ github.repository }}
85 | model_server_image: ${{ vars.IMAGE_REPO }}/kepler_model_server:${{ needs.check-branch.outputs.tag }}
86 | trainers: LogisticRegressionTrainer,ExponentialRegressionTrainer,SGDRegressorTrainer,GradientBoostingRegressorTrainer,XgboostFitTrainer
87 | secrets:
88 | self_hosted_github_token: ${{ secrets.GH_SELF_HOSTED_RUNNER_TOKEN }}
89 | aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
90 | aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
91 | aws_region: ${{ secrets.AWS_REGION }}
92 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 | on: # yamllint disable-line rule:truthy
3 | workflow_dispatch:
4 | inputs:
5 | tag:
6 | description: Tag name, e.g. 0.7.11
7 | default: ""
8 | required: true
9 |
10 | jobs:
11 | build:
12 | name: Upload Release Asset
13 | permissions:
14 | contents: write
15 | runs-on: ubuntu-latest
16 | steps:
17 | - name: Checkout code
18 | uses: actions/checkout@v4
19 |
20 | - name: Login to Quay.io
21 | uses: docker/login-action@v3
22 | with:
23 | registry: ${{ vars.IMAGE_REGISTRY }}
24 | username: ${{ secrets.BOT_NAME }}
25 | password: ${{ secrets.BOT_TOKEN }}
26 |
27 | - name: Git set user
28 | shell: bash
29 | run: |
30 | git config user.name "$USERNAME"
31 | git config user.email "$USERENAME-bot@users.noreply.github.com"
32 | env:
33 | USERNAME: ${{ github.actor }}
34 |
35 | - name: Update the VERSION
36 | run: |
37 | echo "$VERSION" > VERSION
38 | env:
39 | VERSION: ${{ github.event.inputs.tag }}
40 |
41 | - name: Build model-server-base
42 | run: |
43 | make build-base
44 | env:
45 | IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY }}
46 |
47 | - name: Push model-server-base
48 | run: |
49 | make push-base
50 | env:
51 | IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY }}
52 |
53 | - name: Update base in model-server dockerfile
54 | run: |
55 | sed -i "s/model_server_base:.*/model_server_base:v$VERSION/g" ./dockerfiles/Dockerfile
56 | env:
57 | VERSION: ${{ github.event.inputs.tag }}
58 |
59 | - name: Build model-server
60 | run: |
61 | make build
62 | env:
63 | IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY }}
64 |
65 | - name: Create tag
66 | run: |
67 | git add VERSION ./dockerfiles/Dockerfile
68 | git commit -m "ci: update VERSION to $VERSION"
69 | git tag -a "v$VERSION" -m "$VERSION"
70 | git show --stat
71 | env:
72 | VERSION: ${{ github.event.inputs.tag }}
73 |
74 | - name: Push Images
75 | run: |
76 | make push
77 | env:
78 | IMAGE_REGISTRY: ${{ vars.IMAGE_REGISTRY }}
79 |
80 | - name: Push Release tag
81 | run: |
82 | git push --follow-tags
83 |
84 | - name: Create Release
85 | id: create_release
86 | uses: actions/create-release@v1
87 | env:
88 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
89 | with:
90 | tag_name: v${{ github.event.inputs.tag }}
91 | release_name: v${{ github.event.inputs.tag }}-release
92 | draft: false
93 | prerelease: false
94 |
95 | create-release-branch:
96 | name: Create Release Branch
97 | permissions:
98 | contents: write
99 | needs: build
100 | runs-on: ubuntu-latest
101 | steps:
102 | - name: Create release branch
103 | uses: peterjgrainger/action-create-branch@v3.0.0
104 | env:
105 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
106 | with:
107 | branch: v${{ github.event.inputs.tag }}-release
108 | sha: ${{ github.event.pull_request.head.sha }}
109 |
--------------------------------------------------------------------------------
/.github/workflows/train.yml:
--------------------------------------------------------------------------------
1 | # manually run on retrain needed
2 | name: Retrain All Machines
3 | on: # yamllint disable-line rule:truthy
4 | workflow_dispatch:
5 |
6 | jobs:
7 |
8 | check-change:
9 | runs-on: ubuntu-latest
10 |
11 | outputs:
12 | modeling: ${{ steps.filter.outputs.modeling }}
13 |
14 | steps:
15 | - uses: actions/checkout@v4
16 | - uses: dorny/paths-filter@v3
17 | id: filter
18 | with:
19 | filters: |
20 | modeling:
21 | - 'src/**'
22 | - 'model_training/**'
23 | - 'hack/**'
24 | - '.github/workflows/train-model.yml'
25 |
26 | train-model:
27 | needs: [check-change]
28 | if: ${{ needs.check-change.outputs.modeling == 'true' }}
29 | strategy:
30 | matrix:
31 | instance_type: [i3.metal]
32 | uses: ./.github/workflows/train-model.yml
33 | with:
34 | pipeline_name: std_v0.7.11
35 | instance_type: ${{ matrix.instance_type }}
36 | ami_id: ami-0e4d0bb9670ea8db0
37 | github_repo: ${{ github.repository }}
38 | model_server_image: ${{ vars.IMAGE_REPO }}/kepler_model_server:latest
39 | trainers: LogisticRegressionTrainer,ExponentialRegressionTrainer,SGDRegressorTrainer,GradientBoostingRegressorTrainer,XgboostFitTrainer
40 | secrets:
41 | self_hosted_github_token: ${{ secrets.GH_SELF_HOSTED_RUNNER_TOKEN }}
42 | aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
43 | aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
44 | aws_region: ${{ secrets.AWS_REGION }}
45 |
--------------------------------------------------------------------------------
/.github/workflows/unit-test.yml:
--------------------------------------------------------------------------------
1 | name: Unit Test
2 |
3 | on: # yamllint disable-line rule:truthy
4 | workflow_call:
5 | secrets:
6 | docker_username:
7 | description: Docker username
8 | required: false
9 | docker_password:
10 | description: Docker password
11 | required: false
12 | inputs:
13 | base_change:
14 | description: Change flag on base image
15 | required: true
16 | type: string
17 |
18 | jobs:
19 | unit-test:
20 | runs-on: ubuntu-latest
21 | steps:
22 | - uses: actions/checkout@v4
23 | - name: Set up Docker
24 | uses: docker/setup-buildx-action@v3
25 | - name: Build test with base image
26 | if: ${{ inputs.base_change != 'true' }}
27 | run: make build-test
28 | - name: Build test without base image
29 | if: ${{ inputs.base_change == 'true' }}
30 | run: make build-test-nobase
31 | - name: Test pipeline # need to run first to build the models
32 | run: make test-pipeline
33 | - name: Test model server
34 | run: make test-model-server
35 | timeout-minutes: 5
36 | - name: Test estimator
37 | run: make test-estimator
38 | timeout-minutes: 5
39 | - name: Test offline trainer
40 | run: make test-offline-trainer
41 | - name: Test model server select
42 | run: make test-model-server-select
43 | timeout-minutes: 5
44 | - name: Test model server select via estimator
45 | run: make test-model-server-estimator-select
46 | timeout-minutes: 5
47 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # models
2 | server/train/local/
3 | server/models
4 | */*/download
5 |
6 | # Byte-compiled / optimized / DLL files
7 | __pycache__/
8 | *.py[cod]
9 | *$py.class
10 |
11 | # C extensions
12 | *.so
13 |
14 | # Distribution / packaging
15 | .Python
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | wheels/
28 | pip-wheel-metadata/
29 | share/python-wheels/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 | MANIFEST
34 |
35 | # PyInstaller
36 | # Usually these files are written by a python script from a template
37 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
38 | *.manifest
39 | *.spec
40 |
41 | # Installer logs
42 | pip-log.txt
43 | pip-delete-this-directory.txt
44 |
45 | # Unit test / coverage reports
46 | htmlcov/
47 | .tox/
48 | .nox/
49 | .coverage
50 | .coverage.*
51 | .cache
52 | nosetests.xml
53 | coverage.xml
54 | *.cover
55 | *.py,cover
56 | .hypothesis/
57 | .pytest_cache/
58 |
59 | # Translations
60 | *.mo
61 | *.pot
62 |
63 | # Django stuff:
64 | *.log
65 | local_settings.py
66 | db.sqlite3
67 | db.sqlite3-journal
68 |
69 | # Flask stuff:
70 | instance/
71 | .webassets-cache
72 |
73 | # Scrapy stuff:
74 | .scrapy
75 |
76 | # Sphinx documentation
77 | docs/_build/
78 |
79 | # PyBuilder
80 | target/
81 |
82 | # Jupyter Notebook
83 | .ipynb_checkpoints
84 |
85 | # IPython
86 | profile_default/
87 | ipython_config.py
88 |
89 | # pyenv
90 | .python-version
91 |
92 | # pipenv
93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
96 | # install all needed dependencies.
97 | #Pipfile.lock
98 |
99 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
100 | __pypackages__/
101 |
102 | # Celery stuff
103 | celerybeat-schedule
104 | celerybeat.pid
105 |
106 | # SageMath parsed files
107 | *.sage.py
108 |
109 | # Environments
110 | .env
111 | .venv
112 | env/
113 | venv/
114 | ENV/
115 | env.bak/
116 | venv.bak/
117 |
118 | # Spyder project settings
119 | .spyderproject
120 | .spyproject
121 |
122 | # Rope project settings
123 | .ropeproject
124 |
125 | # mkdocs documentation
126 | /site
127 |
128 | # mypy
129 | .mypy_cache/
130 | .dmypy.json
131 | dmypy.json
132 |
133 | # Pyre type checker
134 | .pyre/
135 |
136 | tests/download/*
137 | .DS_Store
138 | */.DS_Store
139 | */*/.DS_Store
140 | */*/*/.DS_Store
141 |
142 | /src/kepler_model/models/
143 | /tests/models/
144 | /src/resource/
145 | tests/data/extractor_output
146 | tests/data/isolator_output
147 | tests/data/offline_trainer_output
148 | tests/data/plot_output
149 | model_training/*data*
150 | model_training/tekton/secret
151 | local-dev-cluster
152 | tmp
153 | tests/db-models
154 | db-models
155 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "python.analysis.extraPaths": [
3 | "./src/util"
4 | ]
5 | }
--------------------------------------------------------------------------------
/.yamllint.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | extends: default
3 | rules:
4 | line-length: disable
5 | document-start: disable
6 | comments:
7 | min-spaces-from-content: 1
8 | quoted-strings:
9 | required: only-when-needed
10 | extra-required:
11 | - ^.*:\s.*$
12 | - ^.*:$
13 | quote-type: double
14 | ignore:
15 | - model_training/deployment/cpe-operator.yaml
16 | - tmp/
17 |
--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 0.7.12
2 |
--------------------------------------------------------------------------------
/cmd/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # -*- coding: utf-8 -*-
4 | import re
5 | import sys
6 |
7 | from kepler_model.cmd.main import run
8 |
9 | if __name__ == "__main__":
10 | sys.argv[0] = re.sub(r"(-script\.pyw|\.exe)?$", "", sys.argv[0])
11 | sys.exit(run())
12 |
--------------------------------------------------------------------------------
/contributing.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | [Get started with Kepler Model Server.](https://sustainable-computing.io/kepler_model_server/get_started/)
4 |
5 | - The main source codes are in [src directory](./src/).
6 |
7 | ## PR Hands-on
8 |
9 | - Create related [issue](https://github.com/sustainable-computing-io/kepler-model-server/issues) with your name assigned first (if not exist).
10 |
11 | - Set required secret and environment for local repository test if needed. Check below table.
12 |
13 | | Objective | Required Secret | Required Environment |
14 | | --------- | --------------- |----------------------|
15 | | Push to private repo |BOT_NAME, BOT_TOKEN | IMAGE_REPO |
16 | | Change on base image | BOT_NAME, BOT_TOKEN | IMAGE_REPO |
17 | | Save data/models to AWS COS | AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY,AWS_REGION | |
18 |
19 | ## Improve components in training pipelines
20 |
21 | Learn more details about [Training Pipeline](https://sustainable-computing.io/kepler_model_server/pipeline/)
22 |
23 | ### Introduce new feature group
24 |
25 | - Define new feature group name `FeatureGroup` and update metric list map `FeatureGroups` in [train types](./src/util/train_types.py)
26 |
27 | ### Introduce new energy sources
28 |
29 | - Define new energy source map `PowerSourceMap` in [train types](./src/util/train_types.py)
30 |
31 | ### Improve preprocessing method
32 |
33 | - [extractor](./src/train/extractor/): convert from numerically aggregated metrics to per-second value
34 | - [isolator](./src/train/isolator/): isolate background (idle) power from the collected power
35 |
36 | ### Introduce new learning method
37 |
38 | - [trainer](./src/train/trainer/): apply learning method to build a model using extracted data and isolated data
39 |
40 | ## Model training
41 |
42 | Learn more details about [model training](./model_training/)
43 |
44 | ### Introduce new benchmarks
45 |
46 | ### Tekton
47 |
48 | Create workload `Task` and provide example `Pipeline` to run.
49 |
50 | ### Add new trained models
51 |
52 | TBD
53 |
54 | ## Source improvement
55 |
56 | Any improvement in `src` and `cmd`.
57 |
58 | ## Test and CI improvement
59 |
60 | Any improvement in `tests`, `dockerfiles`, `manifests` and `.github/workflows`
61 |
62 | ## Documentation
63 |
64 | Detailed documentation should be posted to [kepler-doc](https://github.com/sustainable-computing-io/kepler-doc) repository.
65 |
--------------------------------------------------------------------------------
/dockerfiles/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM quay.io/sustainable_computing_io/kepler_model_server_base:v0.7.12
2 |
3 | WORKDIR /kepler_model
4 | ENV PYTHONPATH=/kepler_model
5 |
6 | COPY pyproject.toml .
7 | COPY README.md .
8 | COPY cmd/ cmd/
9 | COPY src/ src/
10 |
11 | RUN pip install --no-cache-dir .
12 |
13 | # port for Model Server
14 | EXPOSE 8100
15 | # port for Online Trainer (TODO: reserved for event-based online training)
16 | EXPOSE 8101
17 | # port for Offline Trainer
18 | EXPOSE 8102
19 |
20 | ENTRYPOINT ["bash", "-c"]
21 | CMD ["kepler-model"]
22 |
--------------------------------------------------------------------------------
/dockerfiles/Dockerfile.base:
--------------------------------------------------------------------------------
1 | FROM python:3.10-slim
2 | #
3 | # NOTE: This file contains all tools and dependencies needed for
4 | # setting up the development and testing environment
5 |
6 | # Prevents Python from writing pyc files.
7 | ENV PYTHONDONTWRITEBYTECODE=1
8 |
9 | # Keeps Python from buffering stdout and stderr to avoid situations where
10 | # the application crashes without emitting any logs due to buffering.
11 | ENV PYTHONUNBUFFERED=1
12 |
13 | RUN pip install --no-cache-dir --upgrade pip && \
14 | python -m pip install --no-cache-dir hatch && \
15 | pip cache purge
16 |
17 | WORKDIR /kepler_model
18 | ENV PYTHONPATH=/kepler_model
19 |
20 | COPY pyproject.toml .
21 |
22 | # NOTE: README.md and __about__.py are referenced in pyproject.toml
23 | # so they are copied into the image for pip install to succeed
24 | COPY README.md .
25 |
26 | RUN mkdir -p src/kepler_model
27 | COPY src/kepler_model/__init__.py src/kepler_model/
28 | COPY src/kepler_model/__about__.py src/kepler_model/
29 |
30 | RUN pip install --no-cache-dir . && \
31 | pip cache purge
32 |
--------------------------------------------------------------------------------
/dockerfiles/Dockerfile.dockerignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | src/resource/
3 | src/kepler_model/models/
4 | tests/models/
5 |
--------------------------------------------------------------------------------
/dockerfiles/Dockerfile.test:
--------------------------------------------------------------------------------
1 | FROM quay.io/sustainable_computing_io/kepler_model_server_base:latest
2 |
3 | # Prevents Python from writing pyc files.
4 | ENV PYTHONDONTWRITEBYTECODE=1
5 |
6 | # Keeps Python from buffering stdout and stderr to avoid situations where
7 | # the application crashes without emitting any logs due to buffering.
8 | ENV PYTHONUNBUFFERED=1
9 |
10 |
11 | WORKDIR /kepler_model
12 | ENV PYTHONPATH=/kepler_model
13 |
14 | COPY pyproject.toml .
15 | COPY README.md .
16 | COPY cmd/ cmd/
17 | COPY src/ src/
18 | COPY tests/ tests/
19 |
20 | RUN pip install --no-cache-dir . && \
21 | pip cache purge
22 |
23 | RUN mkdir -p /mnt/models
24 |
25 | # port for Model Server
26 | EXPOSE 8100
27 | # port for Online Trainer (TODO: reserved for event-based online training)
28 | EXPOSE 8101
29 | # port for Offline Trainer
30 | EXPOSE 8102
31 |
32 | CMD ["model-server"]
33 |
--------------------------------------------------------------------------------
/dockerfiles/Dockerfile.test-nobase:
--------------------------------------------------------------------------------
1 | FROM python:3.10-slim
2 |
3 | # NOTE: This file contains all tools and dependencies needed for
4 | # setting up the development and testing environment
5 |
6 | # Prevents Python from writing pyc files.
7 | ENV PYTHONDONTWRITEBYTECODE=1
8 |
9 | # Keeps Python from buffering stdout and stderr to avoid situations where
10 | # the application crashes without emitting any logs due to buffering.
11 | ENV PYTHONUNBUFFERED=1
12 |
13 | RUN python -m pip install --no-cache-dir hatch && \
14 | pip cache purge
15 |
16 | WORKDIR /kepler_model
17 | ENV PYTHONPATH=/kepler_model
18 |
19 |
20 | COPY pyproject.toml .
21 |
22 | # NOTE: README.md and src/../__about__.py are referenced in pyproject.toml
23 | # so that they are copied into the image for pip install to succeed
24 | COPY README.md .
25 | COPY cmd/ cmd/
26 | COPY src/ src/
27 | COPY tests/ tests/
28 |
29 | RUN pip install --no-cache-dir . && \
30 | pip cache purge
31 |
32 | RUN hatch env create
33 |
34 |
35 | RUN mkdir -p /mnt/models
36 | # port for Model Server
37 | EXPOSE 8100
38 | # port for Online Trainer (TODO: reserved for event-based online training)
39 | EXPOSE 8101
40 | # port for Offline Trainer
41 | EXPOSE 8102
42 |
43 | CMD ["model-server"]
44 |
--------------------------------------------------------------------------------
/dockerfiles/Dockerfile.test-nobase.dockerignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | src/resource/
3 | src/kepler_model/models/
4 | tests/models/
5 |
--------------------------------------------------------------------------------
/dockerfiles/Dockerfile.test.dockerignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | src/resource/
3 | src/kepler_model/models/
4 | tests/models/
5 |
--------------------------------------------------------------------------------
/docs/developer/README.md:
--------------------------------------------------------------------------------
1 | # Developer Guide
2 |
3 | - Temporarily add `__init__.py` to all directories
4 |
5 | ```bash
6 | find ./src -type d -exec touch {}/__init__.py \;
7 | ```
8 |
9 | - Generate `classes.plantuml` and `packages.plantuml` using the following commands
10 |
11 | ```bash
12 | pyreverse --colorized --output plantuml --module-names y --show-stdlib --show-associated 2 --show-ancestors 1 --verbose -d umls/server/ --source-roots ./src/ ./src/server/
13 | pyreverse --colorized --output plantuml --module-names y --show-stdlib --show-associated 2 --show-ancestors 1 --verbose -d umls/estimate/ --source-roots ./src/ ./src/estimate/
14 | pyreverse --colorized --output plantuml --module-names y --show-stdlib --show-associated 2 --show-ancestors 1 --verbose -d umls/train/ --source-roots ./src/ ./src/train/
15 | pyreverse --colorized --output plantuml --module-names y --show-stdlib --show-associated 2 --show-ancestors 1 --verbose -d umls/train/trainer/ --source-roots ./src/ ./src/train/trainer/
16 | ```
17 |
18 | - Use [plantuml](https://plantuml.com/download) to convert planuml files to `svg` files
19 | NeoVim plugin `neovim-soil` was used to generate svg files from plantuml files
20 |
--------------------------------------------------------------------------------
/docs/developer/estimate/packages.plantuml:
--------------------------------------------------------------------------------
1 | @startuml packages
2 | set namespaceSeparator none
3 | package "estimate" as estimate #77AADD {
4 | }
5 | package "estimate.archived_model" as estimate.archived_model #77AADD {
6 | }
7 | package "estimate.estimator" as estimate.estimator #77AADD {
8 | }
9 | package "estimate.model" as estimate.model #99DDFF {
10 | }
11 | package "estimate.model.curvefit_model" as estimate.model.curvefit_model #99DDFF {
12 | }
13 | package "estimate.model.estimate_common" as estimate.model.estimate_common #99DDFF {
14 | }
15 | package "estimate.model.keras_model" as estimate.model.keras_model #99DDFF {
16 | }
17 | package "estimate.model.model" as estimate.model.model #99DDFF {
18 | }
19 | package "estimate.model.scikit_model" as estimate.model.scikit_model #99DDFF {
20 | }
21 | package "estimate.model.xgboost_model" as estimate.model.xgboost_model #99DDFF {
22 | }
23 | package "estimate.model_server_connector" as estimate.model_server_connector #77AADD {
24 | }
25 | estimate --> estimate.model
26 | estimate.archived_model --> estimate.model_server_connector
27 | estimate.estimator --> estimate.archived_model
28 | estimate.estimator --> estimate.model
29 | estimate.estimator --> estimate.model_server_connector
30 | estimate.model.curvefit_model --> estimate.model.estimate_common
31 | estimate.model.keras_model --> estimate.model.estimate_common
32 | estimate.model.model --> estimate.model.curvefit_model
33 | estimate.model.model --> estimate.model.scikit_model
34 | estimate.model.model --> estimate.model.xgboost_model
35 | estimate.model.scikit_model --> estimate.model.estimate_common
36 | estimate.model.xgboost_model --> estimate.model.estimate_common
37 | @enduml
38 |
--------------------------------------------------------------------------------
/docs/developer/server/classes.plantuml:
--------------------------------------------------------------------------------
1 | @startuml classes
2 | set namespaceSeparator none
3 | class "server.model_server.ModelRequest" as server.model_server.ModelRequest #77AADD {
4 | filter : str
5 | metrics
6 | node_type : int
7 | output_type
8 | source : str
9 | trainer_name : str
10 | weight : bool
11 | }
12 | @enduml
13 |
--------------------------------------------------------------------------------
/docs/developer/server/classes.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/developer/server/packages.plantuml:
--------------------------------------------------------------------------------
1 | @startuml packages
2 | set namespaceSeparator none
3 | package "server" as server #77AADD {
4 | }
5 | package "server.model_server" as server.model_server #77AADD {
6 | }
7 | @enduml
8 |
--------------------------------------------------------------------------------
/docs/developer/server/packages.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/developer/train/trainer/packages.plantuml:
--------------------------------------------------------------------------------
1 | @startuml packages
2 | set namespaceSeparator none
3 | package "train.trainer" as train.trainer #77AADD {
4 | }
5 | package "train.trainer.ExponentialRegressionTrainer" as train.trainer.ExponentialRegressionTrainer #77AADD {
6 | }
7 | package "train.trainer.ExponentialRegressionTrainer.main" as train.trainer.ExponentialRegressionTrainer.main #77AADD {
8 | }
9 | package "train.trainer.GradientBoostingRegressorTrainer" as train.trainer.GradientBoostingRegressorTrainer #77AADD {
10 | }
11 | package "train.trainer.GradientBoostingRegressorTrainer.main" as train.trainer.GradientBoostingRegressorTrainer.main #77AADD {
12 | }
13 | package "train.trainer.KNeighborsRegressorTrainer" as train.trainer.KNeighborsRegressorTrainer #77AADD {
14 | }
15 | package "train.trainer.KNeighborsRegressorTrainer.main" as train.trainer.KNeighborsRegressorTrainer.main #77AADD {
16 | }
17 | package "train.trainer.LinearRegressionTrainer" as train.trainer.LinearRegressionTrainer #77AADD {
18 | }
19 | package "train.trainer.LinearRegressionTrainer.main" as train.trainer.LinearRegressionTrainer.main #77AADD {
20 | }
21 | package "train.trainer.LogarithmicRegressionTrainer" as train.trainer.LogarithmicRegressionTrainer #77AADD {
22 | }
23 | package "train.trainer.LogarithmicRegressionTrainer.main" as train.trainer.LogarithmicRegressionTrainer.main #77AADD {
24 | }
25 | package "train.trainer.LogisticRegressionTrainer" as train.trainer.LogisticRegressionTrainer #77AADD {
26 | }
27 | package "train.trainer.LogisticRegressionTrainer.main" as train.trainer.LogisticRegressionTrainer.main #77AADD {
28 | }
29 | package "train.trainer.PolynomialRegressionTrainer" as train.trainer.PolynomialRegressionTrainer #77AADD {
30 | }
31 | package "train.trainer.PolynomialRegressionTrainer.main" as train.trainer.PolynomialRegressionTrainer.main #77AADD {
32 | }
33 | package "train.trainer.SGDRegressorTrainer" as train.trainer.SGDRegressorTrainer #77AADD {
34 | }
35 | package "train.trainer.SGDRegressorTrainer.main" as train.trainer.SGDRegressorTrainer.main #77AADD {
36 | }
37 | package "train.trainer.SVRRegressorTrainer" as train.trainer.SVRRegressorTrainer #77AADD {
38 | }
39 | package "train.trainer.SVRRegressorTrainer.main" as train.trainer.SVRRegressorTrainer.main #77AADD {
40 | }
41 | package "train.trainer.XGBoostTrainer" as train.trainer.XGBoostTrainer #77AADD {
42 | }
43 | package "train.trainer.XGBoostTrainer.main" as train.trainer.XGBoostTrainer.main #77AADD {
44 | }
45 | package "train.trainer.XgboostFitTrainer" as train.trainer.XgboostFitTrainer #77AADD {
46 | }
47 | package "train.trainer.XgboostFitTrainer.main" as train.trainer.XgboostFitTrainer.main #77AADD {
48 | }
49 | package "train.trainer.curvefit" as train.trainer.curvefit #77AADD {
50 | }
51 | package "train.trainer.scikit" as train.trainer.scikit #77AADD {
52 | }
53 | package "train.trainer.xgboost_interface" as train.trainer.xgboost_interface #77AADD {
54 | }
55 | train.trainer.XgboostFitTrainer.main --> train.trainer.xgboost_interface
56 | @enduml
57 |
--------------------------------------------------------------------------------
/fig/comm_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/fig/comm_diagram.png
--------------------------------------------------------------------------------
/fig/model-server-components-simplified.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/fig/model-server-components-simplified.png
--------------------------------------------------------------------------------
/fig/tekton-complete-train.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/fig/tekton-complete-train.png
--------------------------------------------------------------------------------
/fig/tekton-kepler-default.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/fig/tekton-kepler-default.png
--------------------------------------------------------------------------------
/fig/tekton-single-train.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/fig/tekton-single-train.png
--------------------------------------------------------------------------------
/hack/k8s_helper.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # This file is part of the Kepler project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 |
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 |
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | # Copyright 2023 The Kepler Contributors
18 | #
19 |
20 | set -e
21 |
22 | rollout_ns_status() {
23 | local resources
24 | resources=$(kubectl get deployments,statefulsets,daemonsets -n=$1 -o name)
25 | for res in $resources; do
26 | kubectl rollout status $res --namespace $1 --timeout=10m || die "failed to check status of ${res} inside namespace ${1}"
27 | done
28 | }
29 |
30 | _get_value() {
31 | res=$1
32 | namespace=$2
33 | location=$3
34 | kubectl get $res -n $namespace -ojson|jq -r $location
35 | }
36 |
37 | _get_succeed_condition() {
38 | resource=$1
39 | name=$2
40 | namespace=$3
41 | if [ "$(kubectl get $resource $name -n $namespace -ojson|jq '.status.conditions | length')" == 0 ]; then
42 | echo Unknown
43 | else
44 | location='.status.conditions|map(select(.type="Succeeded"))[0].status'
45 | _get_value $resource/$name $namespace $location
46 | fi
47 | }
48 |
49 | _log_completed_pod() {
50 | local resources
51 | name=$1
52 | namespace=$2
53 | location=".status.phase"
54 | resources=$(kubectl get pods -n=$namespace -o name)
55 | for res in $resources; do
56 | if [ "$res" == "pod/${name}-run-stressng-pod" ]; then
57 | # get parameters and estimation time
58 | kubectl logs $res -n $namespace|head
59 | fi
60 | echo $res
61 | if [ "$res" == "pod/${name}-presteps-pod" ]; then
62 | # get parameters and estimation time
63 | kubectl logs $res -n $namespace -c step-collect-idle|tail
64 | else
65 | kubectl logs $res -n $namespace|tail
66 | fi
67 | done
68 | }
69 |
70 | wait_for_pipelinerun() {
71 | resource=pipelinerun
72 | name=$1
73 | namespace=default
74 |
75 | if kubectl get taskruns|grep ${name}-run-stressng; then
76 | value=$(_get_succeed_condition $resource $name $namespace)
77 | while [ "$value" == "Unknown" ] ;
78 | do
79 | echo "Wait for pipeline $name to run workload"
80 | kubectl get pods
81 | value=$(_get_succeed_condition $resource $name $namespace)
82 | if kubectl get pod/${name}-run-stressng-pod |grep Running ; then
83 | estimate_time_line=$(kubectl logs pod/${name}-run-stressng-pod -c step-run-stressng -n $namespace|grep "Estimation Time (s):")
84 | estimate_time=$(echo ${estimate_time_line}|awk '{print $4}')
85 | echo "${estimate_time_line}, sleep"
86 | sleep ${estimate_time}
87 | break
88 | fi
89 | sleep 60
90 | done
91 | fi
92 |
93 | value=$(_get_succeed_condition $resource $name $namespace)
94 | while [ "$value" == "Unknown" ] ;
95 | do
96 | echo "Wait for pipeline $name to be succeeded"
97 | kubectl get pods
98 | sleep 60
99 | value=$(_get_succeed_condition $resource $name $namespace)
100 | done
101 |
102 | kubectl get taskrun
103 | _log_completed_pod $name $namespace
104 | if [ "$value" == "False" ]; then
105 | exit 1
106 | fi
107 | }
108 |
109 | "$@"
110 |
--------------------------------------------------------------------------------
/hack/utils.bash:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright 2024.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | #
16 |
17 | is_fn() {
18 | [[ $(type -t "$1") == "function" ]]
19 | return $?
20 | }
21 |
22 | header() {
23 | local title=" 🔆🔆🔆 $* 🔆🔆🔆 "
24 |
25 | local len=40
26 | if [[ ${#title} -gt $len ]]; then
27 | len=${#title}
28 | fi
29 |
30 | echo -e "\n\n \033[1m${title}\033[0m"
31 | echo -n "━━━━━"
32 | printf '━%.0s' $(seq "$len")
33 | echo "━━━━━━━"
34 |
35 | }
36 |
37 | info() {
38 | echo -e " 🔔 $*" >&2
39 | }
40 |
41 | err() {
42 | echo -e " 😱 $*" >&2
43 | }
44 |
45 | warn() {
46 | echo -e " $*" >&2
47 | }
48 |
49 | ok() {
50 | echo -e " ✅ $*" >&2
51 | }
52 |
53 | skip() {
54 | echo -e " 🙈 SKIP: $*" >&2
55 | }
56 |
57 | fail() {
58 | echo -e " ❌ FAIL: $*" >&2
59 | }
60 |
61 | info_run() {
62 | echo -e " $*\n" >&2
63 | }
64 |
65 | run() {
66 | echo -e " ❯ $*\n" >&2
67 | "$@"
68 | }
69 |
70 | die() {
71 | echo -e "\n ✋ $* "
72 | echo -e "──────────────────── ⛔️⛔️⛔️ ────────────────────────\n"
73 | exit 1
74 | }
75 |
76 | line() {
77 | local len="$1"
78 | local style="${2:-thin}"
79 | shift
80 |
81 | local ch='─'
82 | [[ "$style" == 'heavy' ]] && ch="━"
83 |
84 | printf "$ch%.0s" $(seq "$len") >&2
85 | echo
86 | }
87 |
--------------------------------------------------------------------------------
/manifests/base/estimate-only/kustomization.yaml:
--------------------------------------------------------------------------------
1 | namespace: kepler
2 |
3 | apiVersion: kustomize.config.k8s.io/v1beta1
4 | kind: Kustomization
5 | images:
6 | - name: kepler_model_server
7 | newName: localhost:5001/kepler_model_server
8 | newTag: devel
9 |
10 | patchesStrategicMerge:
11 | - ./patch/patch-estimator-sidecar.yaml
12 |
13 | resources:
14 | - ../kepler
15 |
--------------------------------------------------------------------------------
/manifests/base/estimate-with-server/kustomization.yaml:
--------------------------------------------------------------------------------
1 | namespace: kepler
2 |
3 | apiVersion: kustomize.config.k8s.io/v1beta1
4 | kind: Kustomization
5 | images:
6 | - name: kepler_model_server
7 | newName: localhost:5001/kepler_model_server
8 | newTag: devel
9 |
10 | patchesStrategicMerge:
11 | - ./patch/patch-estimator-sidecar.yaml
12 | - ./patch/patch-model-server.yaml
13 |
14 | resources:
15 | - ../kepler
16 | - ../server
17 |
--------------------------------------------------------------------------------
/manifests/base/kustomization.yaml:
--------------------------------------------------------------------------------
1 | namespace: kepler
2 |
3 | apiVersion: kustomize.config.k8s.io/v1beta1
4 | kind: Kustomization
5 | images:
6 | - name: kepler_model_server
7 | newName: quay.io/sustainable_computing_io/kepler_model_server
8 | newTag: latest
9 |
10 | patchesStrategicMerge:
11 | - ./patch/patch-estimator-sidecar.yaml
12 | - ./patch/patch-model-server.yaml
13 |
14 | resources:
15 | - ../kepler
16 | - ../server
17 |
--------------------------------------------------------------------------------
/manifests/base/openshift/estimate-only/kustomization.yaml:
--------------------------------------------------------------------------------
1 | namespace: kepler
2 |
3 | patchesStrategicMerge:
4 | - ./patch/patch-estimator-sidecar.yaml
5 | - ./patch/patch-openshift.yaml
6 |
7 | resources:
8 | - ../kepler
9 | - ./openshift/scc.yaml
10 |
--------------------------------------------------------------------------------
/manifests/base/openshift/estimate-with-server/kustomization.yaml:
--------------------------------------------------------------------------------
1 | namespace: kepler
2 |
3 | patchesStrategicMerge:
4 | - ./patch/patch-estimator-sidecar.yaml
5 | - ./patch/patch-model-server.yaml
6 | - ./patch/patch-openshift.yaml
7 |
8 | resources:
9 | - ../kepler
10 | - ../server
11 | - ./openshift/scc.yaml
12 |
--------------------------------------------------------------------------------
/manifests/base/openshift/scc.yaml:
--------------------------------------------------------------------------------
1 | # scc for the Kepler
2 | kind: SecurityContextConstraints
3 | apiVersion: security.openshift.io/v1
4 | metadata:
5 | name: kepler-scc
6 | # To allow running privilegedContainers
7 | allowPrivilegedContainer: true
8 | allowHostDirVolumePlugin: true
9 | allowHostNetwork: false
10 | allowHostPorts: false
11 | allowHostIPC: false
12 | allowHostPID: true
13 | readOnlyRootFilesystem: true
14 | defaultAddCapabilities:
15 | - SYS_ADMIN
16 | runAsUser:
17 | type: RunAsAny
18 | seLinuxContext:
19 | type: RunAsAny
20 | fsGroup:
21 | type: RunAsAny
22 | volumes:
23 | - configMap
24 | - projected
25 | - emptyDir
26 | - hostPath
27 | - secret
28 | users:
29 | - kepler
30 | - system:serviceaccount:kepler:kepler-sa
31 |
--------------------------------------------------------------------------------
/manifests/base/openshift/serve-only/kustomization.yaml:
--------------------------------------------------------------------------------
1 | namespace: kepler
2 |
3 | patchesStrategicMerge:
4 | - ./patch/patch-model-server.yaml
5 | - ./patch/patch-openshift.yaml
6 |
7 | resources:
8 | - ../kepler
9 | - ../server
10 | - ./openshift/scc.yaml
11 |
--------------------------------------------------------------------------------
/manifests/base/patch/patch-estimator-sidecar.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | name: kepler-cfm
5 | namespace: kepler
6 | data:
7 | MODEL_CONFIG: |
8 | NODE_COMPONENTS_ESTIMATOR=true
9 | NODE_COMPONENTS_INIT_URL=https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/v0.7/ec2-0.7.11/rapl-sysfs/AbsPower/BPFOnly/SGDRegressorTrainer_0.zip
10 | NODE_TOTAL_ESTIMATOR=true
11 | NODE_TOTAL_INIT_URL=https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/v0.7/specpower-0.7.11/acpi/AbsPower/BPFOnly/SGDRegressorTrainer_0.zip
12 | ---
13 | apiVersion: apps/v1
14 | kind: DaemonSet
15 | metadata:
16 | name: kepler-exporter
17 | namespace: kepler
18 | spec:
19 | template:
20 | spec:
21 | containers:
22 | # kepler: wait for estimator socket
23 | - command:
24 | - /bin/sh
25 | - -c
26 | args:
27 | - until [ -e /tmp/estimator.sock ]; do sleep 1; done && /usr/bin/kepler -v=$(KEPLER_LOG_LEVEL)
28 | volumeMounts:
29 | - mountPath: /tmp
30 | name: tmp
31 | name: kepler-exporter
32 | # estimator container
33 | - image: kepler_model_server
34 | imagePullPolicy: IfNotPresent
35 | args: [estimator]
36 | name: estimator
37 | volumeMounts:
38 | - name: cfm
39 | mountPath: /etc/kepler/kepler.config
40 | readOnly: true
41 | - mountPath: /tmp
42 | name: tmp
43 | - mountPath: /mnt
44 | name: mnt
45 | volumes:
46 | - emptyDir: {}
47 | name: tmp
48 | - emptyDir: {}
49 | name: mnt
50 |
--------------------------------------------------------------------------------
/manifests/base/patch/patch-model-server.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | name: kepler-cfm
5 | namespace: kepler
6 | data:
7 | MODEL_SERVER_ENABLE: "true"
8 | MODEL_SERVER_ENDPOINT: http://kepler-model-server.$(MODEL_SERVER_NAMESPACE).svc.cluster.local:$(MODEL_SERVER_PORT)/model
9 | MODEL_SERVER_PORT: |
10 | $(MODEL_SERVER_PORT)
11 | MODEL_SERVER_URL: http://kepler-model-server.$(MODEL_SERVER_NAMESPACE).svc.cluster.local:$(MODEL_SERVER_PORT)
12 | MODEL_SERVER_MODEL_REQ_PATH: /model
13 | MODEL_SERVER_MODEL_LIST_PATH: /best-models
14 |
--------------------------------------------------------------------------------
/manifests/base/patch/patch-openshift.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Namespace
3 | metadata:
4 | annotations:
5 | openshift.io/description: Kepler exporter
6 | openshift.io/display-name: ""
7 | name: kepler
8 | ---
9 | apiVersion: apps/v1
10 | kind: DaemonSet
11 | metadata:
12 | name: kepler-exporter
13 | namespace: kepler
14 | spec:
15 | template:
16 | spec:
17 | containers:
18 | - name: kepler-exporter
19 | volumeMounts:
20 | - name: kernel-src
21 | mountPath: /usr/src/kernels
22 | securityContext:
23 | privileged: true
24 | volumes:
25 | - name: kernel-src
26 | hostPath:
27 | path: /usr/src/kernels
28 | type: Directory
29 |
--------------------------------------------------------------------------------
/manifests/base/patch/patch-server-only.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | name: kepler-cfm
5 | namespace: kepler
6 | data:
7 | MODEL_CONFIG: |
8 | NODE_COMPONENTS_TRAINER=SGDRegressorTrainer
9 | NODE_TOTAL_TRAINER=SGDRegressorTrainer
10 |
--------------------------------------------------------------------------------
/manifests/base/serve-only/kustomization.yaml:
--------------------------------------------------------------------------------
1 | namespace: kepler
2 |
3 | apiVersion: kustomize.config.k8s.io/v1beta1
4 | kind: Kustomization
5 | images:
6 | - name: kepler_model_server
7 | newName: localhost:5001/kepler-model-server
8 | newTag: devel
9 |
10 | patchesStrategicMerge:
11 | - ./patch/patch-model-server.yaml
12 | - ./patch/patch-server-only.yaml
13 | resources:
14 | - ../kepler
15 | - ../server
16 |
--------------------------------------------------------------------------------
/manifests/compose/dev/kepler/common/var/lib/kepler/data/cpus.yaml:
--------------------------------------------------------------------------------
1 | ##########
2 | # CPUS - used to lookup uarch and channels by family, model, and stepping
3 | # The model and stepping fields will be interpreted as regular expressions
4 | # An empty stepping field means 'any' stepping
5 |
6 | ##########
7 | # Intel Core CPUs
8 | ##########
9 | # Haswell
10 | - core: HSW
11 | uarch: Haswell
12 | family: 6
13 | model: (50|69|70)
14 | stepping:
15 |
16 | # Broadwell
17 | - core: BDW
18 | uarch: Broadwell
19 | family: 6
20 | model: (61|71)
21 | stepping:
22 |
23 | # Skylake
24 | - core: SKL
25 | uarch: Skylake
26 | family: 6
27 | model: (78|94)
28 | stepping:
29 |
30 | # Kabylake
31 | - core: KBL
32 | uarch: Kaby Lake
33 | family: 6
34 | model: (142|158)
35 | stepping: 9
36 |
37 | # Coffelake
38 | - core: CFL
39 | uarch: Coffee Lake
40 | family: 6
41 | model: (142|158)
42 | stepping: (10|11|12|13)
43 |
44 | # Rocket Lake
45 | - core: RKL
46 | uarch: Cypress Cove
47 | family: 6
48 | model: 167
49 | stepping:
50 |
51 | # Tiger Lake
52 | - core: TGL
53 | uarch: Willow Cove
54 | family: 6
55 | model: (140|141)
56 | stepping:
57 |
58 | # Alder Lake
59 | - core: ADL
60 | uarch: Golden Cove
61 | family: 6
62 | model: (151|154)
63 | stepping:
64 |
65 | # Raptor Lake
66 | - core: RTL
67 | uarch: Raptor Cove
68 | family: 6
69 | model: 183
70 | stepping:
71 |
72 | ##########
73 | # Intel Xeon CPUs
74 | ##########
75 | # Haswell
76 | - core: HSX
77 | uarch: Haswell
78 | family: 6
79 | model: 63
80 | stepping:
81 |
82 | # Broadwell
83 | - core: BDX
84 | uarch: Broadwell
85 | family: 6
86 | model: (79|86)
87 | stepping:
88 |
89 | # Skylake
90 | - core: SKX
91 | uarch: Skylake
92 | family: 6
93 | model: 85
94 | stepping: (0|1|2|3|4)
95 |
96 | # Cascadelake
97 | - core: CLX
98 | uarch: Cascade Lake
99 | family: 6
100 | model: 85
101 | stepping: (5|6|7)
102 |
103 | # Cooperlake
104 | - core: CPX
105 | uarch: Cooper Lake
106 | family: 6
107 | model: 85
108 | stepping: 11
109 |
110 | # Icelake
111 | - core: ICX
112 | uarch: Sunny Cove
113 | family: 6
114 | model: (106|108)
115 | stepping:
116 |
117 | # Sapphire Rapids
118 | - core: SPR
119 | uarch: Sapphire Rapids
120 | family: 6
121 | model: 143
122 | stepping:
123 |
124 | # Emerald Rapids
125 | - core: EMR
126 | uarch: Emerald Rapids
127 | family: 6
128 | model: 207
129 | stepping:
130 |
131 | # Granite Rapids
132 | - core: GNR
133 | uarch: Granite Rapids
134 | family: 6
135 | model: 173
136 | stepping:
137 |
138 | # Sierra Forest
139 | - core: SRF
140 | uarch: Sierra Forest
141 | family: 6
142 | model: 175
143 | stepping:
144 |
145 | ##########
146 | # AMD CPUs
147 | ##########
148 | # Naples
149 | - core: Naples
150 | uarch: Zen
151 | family: 23
152 | model: 1
153 | stepping:
154 |
155 | # Rome
156 | - core: Rome
157 | uarch: Zen 2
158 | family: 23
159 | model: 49
160 | stepping:
161 |
162 | # Milan
163 | - core: Milan
164 | uarch: Zen 3
165 | family: 25
166 | model: 1
167 | stepping:
168 |
169 | # Genoa
170 | - core: Genoa
171 | uarch: Zen 4
172 | family: 25
173 | model: 17
174 | stepping:
175 |
176 | # Siena
177 | - core: Siena
178 | uarch: Zen 4c
179 | family: 25
180 | model: 160
181 | stepping:
182 |
183 | ##########
184 | # ARM CPUs
185 | #########
186 | # AWS Graviton 2
187 | - core: Ares
188 | uarch: neoverse_n1
189 | family:
190 | model: 1
191 | stepping: r3p1
192 |
193 | # AWS Graviton 3
194 | - core: Zeus
195 | uarch: neoverse_v1
196 | family:
197 | model: 1
198 | stepping: r1p1
199 |
--------------------------------------------------------------------------------
/manifests/compose/dev/kepler/common/var/lib/kepler/data/model_weight/acpi_AbsPowerModel.json:
--------------------------------------------------------------------------------
1 | {"platform": {"All_Weights": {"Bias_Weight": 220.9079278650894, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 29.028228361462897}}}}}
2 |
--------------------------------------------------------------------------------
/manifests/compose/dev/kepler/common/var/lib/kepler/data/model_weight/acpi_DynPowerModel.json:
--------------------------------------------------------------------------------
1 | {"platform": {"All_Weights": {"Bias_Weight": 49.56491877218095, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 28.501356366108837}}}}}
2 |
--------------------------------------------------------------------------------
/manifests/compose/dev/kepler/common/var/lib/kepler/data/model_weight/intel_rapl_AbsPowerModel.json:
--------------------------------------------------------------------------------
1 | {"package": {"All_Weights": {"Bias_Weight": 69.91739430907396, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 22.16772409328642}}}}, "core": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "uncore": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "dram": {"All_Weights": {"Bias_Weight": 47.142633336743344, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 3.57348245077466}}}}}
2 |
--------------------------------------------------------------------------------
/manifests/compose/dev/kepler/common/var/lib/kepler/data/model_weight/intel_rapl_DynPowerModel.json:
--------------------------------------------------------------------------------
1 | {"package": {"All_Weights": {"Bias_Weight": 38.856412561925055, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 22.258830113477515}}}}, "core": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "uncore": {"All_Weights": {"Bias_Weight": 0.0, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 0.0}}}}, "dram": {"All_Weights": {"Bias_Weight": 9.080889901856153, "Categorical_Variables": {}, "Numerical_Variables": {"bpf_cpu_time_ms": {"scale": 5911.969193263386, "mean": 0, "variance": 0, "weight": 3.0358946796490924}}}}}
2 |
--------------------------------------------------------------------------------
/manifests/compose/dev/kepler/metal/etc/kepler/kepler.config/ENABLE_PROCESS_METRICS:
--------------------------------------------------------------------------------
1 | true
2 |
--------------------------------------------------------------------------------
/manifests/compose/dev/kepler/metal/etc/kepler/kepler.config/EXPOSE_ESTIMATED_IDLE_POWER_METRICS:
--------------------------------------------------------------------------------
1 | false
2 |
--------------------------------------------------------------------------------
/manifests/compose/dev/kepler/metal/etc/kepler/kepler.config/EXPOSE_VM_METRICS:
--------------------------------------------------------------------------------
1 | true
2 |
--------------------------------------------------------------------------------
/manifests/compose/dev/kepler/models/etc/kepler/kepler.config/ENABLE_PROCESS_METRICS:
--------------------------------------------------------------------------------
1 | true
2 |
--------------------------------------------------------------------------------
/manifests/compose/dev/kepler/models/etc/kepler/kepler.config/EXPOSE_ESTIMATED_IDLE_POWER_METRICS:
--------------------------------------------------------------------------------
1 | false
2 |
--------------------------------------------------------------------------------
/manifests/compose/dev/kepler/models/etc/kepler/kepler.config/MODEL_CONFIG:
--------------------------------------------------------------------------------
1 | NODE_TOTAL_ESTIMATOR=true
2 | NODE_TOTAL_INIT_URL=https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/v0.7/specpower-0.7.11/acpi/AbsPower/BPFOnly/GradientBoostingRegressorTrainer_0.zip
3 | NODE_COMPONENTS_ESTIMATOR=true
4 | NODE_COMPONENTS_INIT_URL=https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models/v0.7/ec2-0.7.11/rapl-sysfs/AbsPower/BPFOnly/GradientBoostingRegressorTrainer_0.zip
5 |
--------------------------------------------------------------------------------
/manifests/compose/dev/kepler/models/etc/kepler/kepler.config/MODEL_SERVER_ENABLE:
--------------------------------------------------------------------------------
1 | false
2 |
--------------------------------------------------------------------------------
/manifests/compose/dev/kepler/models/etc/kepler/kepler.config/MODEL_SERVER_URL:
--------------------------------------------------------------------------------
1 | http://model-server:8100
2 |
--------------------------------------------------------------------------------
/manifests/compose/dev/overrides.yaml:
--------------------------------------------------------------------------------
1 | services:
2 | prometheus:
3 | networks:
4 | - kepler-models-network
5 | - kepler-metal-network
6 | - model-server-network
7 |
8 | volumes:
9 | - type: bind
10 | source: ../dev/prometheus/scrape-configs/dev.yaml
11 | target: /etc/prometheus/scrape-configs/dev.yaml
12 |
13 | grafana:
14 | environment:
15 | GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH: /var/lib/grafana/dashboards/dev/dashboard.json
16 | volumes:
17 | - type: bind
18 | source: ../dev/grafana/dashboards/dev/
19 | target: /var/lib/grafana/dashboards/dev
20 |
--------------------------------------------------------------------------------
/manifests/compose/dev/prometheus/scrape-configs/dev.yaml:
--------------------------------------------------------------------------------
1 | scrape_configs:
2 | - job_name: models
3 | static_configs:
4 | - targets: [kepler-models:9100]
5 |
6 | - job_name: metal
7 | static_configs:
8 | - targets: [kepler-metal:9100]
9 |
--------------------------------------------------------------------------------
/manifests/compose/monitoring/compose.yaml:
--------------------------------------------------------------------------------
1 | name: monitoring
2 |
3 | services:
4 | prometheus:
5 | build:
6 | context: ./prometheus
7 | ports:
8 | - 19090:9090
9 | volumes:
10 | - prom-data:/prometheus
11 | - type: bind
12 | source: ./prometheus/prometheus.yml
13 | target: /etc/prometheus/prometheus.yml
14 | networks:
15 | - monitoring
16 |
17 | healthcheck:
18 | test: wget -q --spider http://localhost:9090/ -O /dev/null || exit 1
19 | interval: ${HEALTHCHECK_INTERVAL:-50s}
20 | timeout: ${HEALTHCHECK_TIMEOUT:-30s}
21 | retries: ${HEALTHCHECK_RETRIES:-3}
22 | start_period: ${HEALTHCHECK_START_PERIOD:-1m}
23 |
24 | grafana:
25 | build:
26 | context: ./grafana
27 | environment:
28 | GF_AUTH_ANONYMOUS_ENABLED: "true"
29 | GF_SECURITY_ADMIN_PASSWORD: admin
30 | GF_AUTH_ANONYMOUS_ORG_ROLE: Admin
31 |
32 | user: "1000" # NOTE: change this to your `id -u`
33 | depends_on:
34 | - prometheus
35 | ports:
36 | - 13000:3000
37 | networks:
38 | - monitoring
39 |
40 | healthcheck:
41 | test: curl -f http://localhost:3000/ || exit 1
42 | interval: ${HEALTHCHECK_INTERVAL:-50s}
43 | timeout: ${HEALTHCHECK_TIMEOUT:-30s}
44 | retries: ${HEALTHCHECK_RETRIES:-3}
45 | start_period: ${HEALTHCHECK_START_PERIOD:-1m}
46 |
47 | volumes:
48 | # volume for holding prometheus (ts)db
49 | prom-data:
50 |
51 | networks:
52 | monitoring:
53 |
--------------------------------------------------------------------------------
/manifests/compose/monitoring/grafana/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM quay.io/ceph/grafana:10.4.2
2 |
3 | COPY /datasource.yml /etc/grafana/provisioning/datasources/
4 | COPY /dashboards.yml /etc/grafana/provisioning/dashboards/
5 |
--------------------------------------------------------------------------------
/manifests/compose/monitoring/grafana/dashboards.yml:
--------------------------------------------------------------------------------
1 | apiVersion: 1
2 |
3 | providers:
4 | # an unique provider name. Required
5 | - name: kepler
6 | # Org id. Default to 1
7 | orgId: 1
8 | # name of the dashboard folder.
9 | folder: kepler
10 | # provider type. Default to 'file'
11 | type: file
12 | # disable dashboard deletion
13 | disableDeletion: true
14 | # allow updating provisioned dashboards from the UI
15 | allowUiUpdates: true
16 | options:
17 | # path to dashboard files on disk. Required when using the 'file' type
18 | path: /var/lib/grafana/dashboards
19 | # use folder names from filesystem to create folders in Grafana
20 | foldersFromFilesStructure: true
21 |
--------------------------------------------------------------------------------
/manifests/compose/monitoring/grafana/datasource.yml:
--------------------------------------------------------------------------------
1 | # config file version
2 | apiVersion: 1
3 |
4 | datasources:
5 | # name of the datasource. Required
6 | - name: kepler-prometheus
7 | # datasource type. Required
8 | type: prometheus
9 | # access mode. direct or proxy. Required
10 | access: proxy
11 | # org id. will default to orgId 1 if not specified
12 | orgId: 1
13 | # url
14 | url: http://prometheus:9090
15 | isDefault: true
16 | version: 1
17 | # allow users to edit datasources from the UI.
18 | editable: true
19 |
--------------------------------------------------------------------------------
/manifests/compose/monitoring/prometheus/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM quay.io/prometheus/prometheus:main
2 |
3 | COPY /prometheus.yml /etc/prometheus/prometheus.yml
4 |
5 | CMD [\
6 | "--config.file=/etc/prometheus/prometheus.yml",\
7 | "--storage.tsdb.path=/prometheus", \
8 | "--web.enable-admin-api" \
9 | ]
10 |
--------------------------------------------------------------------------------
/manifests/compose/monitoring/prometheus/prometheus.yml:
--------------------------------------------------------------------------------
1 | global:
2 | scrape_interval: 5s # Set the scrape interval to every 5 seconds. Default is every 1 minute.
3 | evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
4 | # scrape_timeout is set to the global default (10s).
5 |
6 | # Attach these labels to any time series or alerts when communicating with
7 | # external systems (federation, remote storage, Alertmanager).
8 | external_labels:
9 | monitor: kepler
10 |
11 | # A scrape configuration containing exactly one endpoint to scrape:
12 | # Here it's Prometheus itself.
13 | scrape_configs:
14 | # The job name is added as a label `job=` to any timeseries scraped from this config.
15 | - job_name: prometheus
16 | # metrics_path defaults to '/metrics'
17 | # scheme defaults to 'http'.
18 | static_configs:
19 | - targets: [localhost:9090]
20 |
21 | # Load rules once and periodically evaluate them according to
22 | # the global 'evaluation_interval'.
23 | rule_files:
24 | - /etc/prometheus/rules/*.yaml
25 | - /etc/prometheus/rules/*.yml
26 |
27 | # additional scrape configs
28 | scrape_config_files:
29 | - /etc/prometheus/scrape-configs/*.yaml
30 | - /etc/prometheus/scrape-configs/*.yml
31 |
32 | # NOTE: e.g. to add more jobs to scrape a
33 | # VM with IP 192.168.122.78 on port 8888,
34 | # - job_name: 'vm'
35 | # static_configs:
36 | # - targets: ['192.168.122.100:8888']
37 |
--------------------------------------------------------------------------------
/manifests/compose/monitoring/prometheus/rules/kepler.rule:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/manifests/compose/monitoring/prometheus/rules/kepler.rule
--------------------------------------------------------------------------------
/manifests/kepler/kustomization.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: kustomize.config.k8s.io/v1beta1
2 | kind: Kustomization
3 | resources:
4 | - github.com/sustainable-computing-io/kepler/manifests/k8s/config/base
5 |
6 | patchesStrategicMerge:
7 | - ./patch/patch-ci.yaml
8 | images:
9 | - name: kepler
10 | newName: quay.io/sustainable_computing_io/kepler
11 | newTag: release-0.7.11
12 |
--------------------------------------------------------------------------------
/manifests/kepler/patch/patch-ci.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | name: kepler-cfm
5 | namespace: system
6 | data:
7 | KEPLER_LOG_LEVEL: 4
8 | ---
9 | apiVersion: apps/v1
10 | kind: DaemonSet
11 | metadata:
12 | name: kepler-exporter
13 | namespace: system
14 | spec:
15 | template:
16 | spec:
17 | containers:
18 | - name: kepler-exporter
19 | imagePullPolicy: IfNotPresent
20 | image: kepler:latest
21 |
--------------------------------------------------------------------------------
/manifests/offline-trainer/kustomization.yaml:
--------------------------------------------------------------------------------
1 | namespace: kepler
2 |
3 | resources:
4 | - offline-trainer.yaml
5 |
6 | images:
7 | - name: kepler_model_server
8 | newName: quay.io/sustainable_computing_io/kepler_model_server
9 | newTag: latest
10 |
--------------------------------------------------------------------------------
/manifests/offline-trainer/offline-trainer.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: kepler-offline-trainer
5 | namespace: system
6 | labels:
7 | app.kubernetes.io/component: model-server
8 | app.kubernetes.io/name: kepler-model-server
9 | spec:
10 | replicas: 1
11 | selector:
12 | matchLabels:
13 | app.kubernetes.io/component: offline-trainer
14 | app.kubernetes.io/name: kepler-offline-trainer
15 | template:
16 | metadata:
17 | labels:
18 | app.kubernetes.io/component: offline-trainer
19 | app.kubernetes.io/name: kepler-offline-trainer
20 | spec:
21 | volumes:
22 | - name: cfm
23 | configMap:
24 | name: kepler-model-server-cfm
25 | - emptyDir: {}
26 | name: mnt
27 | containers:
28 | - name: offline-trainer
29 | image: kepler_model_server
30 | imagePullPolicy: Always
31 | ports:
32 | - containerPort: 8102
33 | name: http
34 | volumeMounts:
35 | - name: cfm
36 | mountPath: /etc/kepler/kepler.config
37 | readOnly: true
38 | - name: mnt
39 | mountPath: /mnt
40 | readOnly: false
41 | args: [offline-trainer]
42 | ---
43 | kind: Service
44 | apiVersion: v1
45 | metadata:
46 | name: kepler-offline-trainer
47 | namespace: system
48 | labels:
49 | app.kubernetes.io/component: offline-trainer
50 | app.kubernetes.io/name: kepler-offline-trainer
51 | spec:
52 | clusterIP: None
53 | selector:
54 | app.kubernetes.io/component: offline-trainer
55 | app.kubernetes.io/name: kepler-offline-trainer
56 | ports:
57 | - name: http
58 | port: 8102
59 | targetPort: http
60 |
--------------------------------------------------------------------------------
/manifests/server/base/kustomization.yaml:
--------------------------------------------------------------------------------
1 | resources:
2 | - server.yaml
3 |
4 | apiVersion: kustomize.config.k8s.io/v1beta1
5 | kind: Kustomization
6 | vars:
7 | - name: MODEL_SERVER_NAMESPACE
8 | objref:
9 | kind: Deployment
10 | group: apps
11 | version: v1
12 | name: kepler-model-server
13 | fieldref:
14 | fieldpath: metadata.namespace
15 | - name: MODEL_SERVER_PORT
16 | objref:
17 | kind: Deployment
18 | group: apps
19 | version: v1
20 | name: kepler-model-server
21 | fieldref:
22 | fieldpath: spec.template.spec.containers[0].ports[0].containerPort
23 |
24 | configurations:
25 | - kustomizeconfig.yaml
26 |
--------------------------------------------------------------------------------
/manifests/server/kustomization.yaml:
--------------------------------------------------------------------------------
1 | resources:
2 | - server.yaml
3 |
4 | apiVersion: kustomize.config.k8s.io/v1beta1
5 | kind: Kustomization
6 | vars:
7 | - fieldref:
8 | fieldPath: metadata.namespace
9 | name: MODEL_SERVER_NAMESPACE
10 | objref:
11 | group: apps
12 | kind: Deployment
13 | name: kepler-model-server
14 | version: v1
15 | - fieldref:
16 | fieldPath: spec.template.spec.containers[0].ports[0].containerPort
17 | name: MODEL_SERVER_PORT
18 | objref:
19 | group: apps
20 | kind: Deployment
21 | name: kepler-model-server
22 | version: v1
23 |
24 | configurations:
25 | - kustomizeconfig.yaml
26 | images:
27 | - name: kepler_model_server
28 | newName: quay.io/sustainable_computing_io/kepler_model_server
29 | newTag: latest
30 |
--------------------------------------------------------------------------------
/manifests/server/kustomizeconfig.yaml:
--------------------------------------------------------------------------------
1 | varReference:
2 | - kind: ConfigMap
3 | group: ""
4 | version: v1
5 | name: kepler-cfm
6 | path: data/MODEL_SERVER_ENDPOINT
7 | - kind: ConfigMap
8 | group: ""
9 | version: v1
10 | name: kepler-cfm
11 | path: data/MODEL_SERVER_URL
12 | - kind: ConfigMap
13 | group: ""
14 | version: v1
15 | name: kepler-cfm
16 | path: data/MODEL_SERVER_PORT
17 |
--------------------------------------------------------------------------------
/manifests/server/online-train/kustomization.yaml:
--------------------------------------------------------------------------------
1 | resources:
2 | - server.yaml
3 |
4 | patchesStrategicMerge:
5 | - ./online-train/patch-trainer.yaml
6 |
7 | apiVersion: kustomize.config.k8s.io/v1beta1
8 | kind: Kustomization
9 | vars:
10 | - name: MODEL_SERVER_NAMESPACE
11 | objref:
12 | kind: Deployment
13 | group: apps
14 | version: v1
15 | name: kepler-model-server
16 | fieldref:
17 | fieldpath: metadata.namespace
18 | - name: MODEL_SERVER_PORT
19 | objref:
20 | kind: Deployment
21 | group: apps
22 | version: v1
23 | name: kepler-model-server
24 | fieldref:
25 | fieldpath: spec.template.spec.containers[0].ports[0].containerPort
26 |
27 | configurations:
28 | - kustomizeconfig.yaml
29 |
--------------------------------------------------------------------------------
/manifests/server/online-train/patch-trainer.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | name: kepler-model-server-cfm
5 | namespace: kepler
6 | data:
7 | PROM_SERVER: http://prometheus-k8s.monitoring.svc.cluster.local:9090
8 | PROM_QUERY_INTERVAL: 20
9 | PROM_QUERY_STEP: 3
10 | PROM_SSL_DISABLE: true
11 | ---
12 | apiVersion: apps/v1
13 | kind: Deployment
14 | metadata:
15 | name: kepler-model-server
16 | namespace: kepler
17 | spec:
18 | template:
19 | spec:
20 | containers:
21 | - name: server-api
22 | - name: online-trainer
23 | image: kepler_model_server
24 | imagePullPolicy: IfNotPresent
25 | volumeMounts:
26 | - name: cfm
27 | mountPath: /etc/kepler/kepler.config
28 | readOnly: true
29 | - name: mnt
30 | mountPath: /mnt
31 | readOnly: false
32 | args: [online-trainer]
33 |
--------------------------------------------------------------------------------
/manifests/server/openshift/online-train/kustomization.yaml:
--------------------------------------------------------------------------------
1 | resources:
2 | - server.yaml
3 |
4 | patchesStrategicMerge:
5 | - ./openshift/patch-openshift.yaml
6 | - ./online-train/patch-trainer.yaml
7 | - ./openshift/online-train/patch-trainer.yaml
8 |
9 | apiVersion: kustomize.config.k8s.io/v1beta1
10 | kind: Kustomization
11 | vars:
12 | - name: MODEL_SERVER_NAMESPACE
13 | objref:
14 | kind: Deployment
15 | group: apps
16 | version: v1
17 | name: kepler-model-server
18 | fieldref:
19 | fieldpath: metadata.namespace
20 | - name: MODEL_SERVER_PORT
21 | objref:
22 | kind: Deployment
23 | group: apps
24 | version: v1
25 | name: kepler-model-server
26 | fieldref:
27 | fieldpath: spec.template.spec.containers[0].ports[0].containerPort
28 |
29 | configurations:
30 | - kustomizeconfig.yaml
31 |
--------------------------------------------------------------------------------
/manifests/server/openshift/online-train/patch-trainer.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | name: kepler-model-server-cfm
5 | namespace: system
6 | data:
7 | PROM_SERVER: http://prometheus-operated.openshift-monitoring.svc.cluster.local:9090
8 | PROM_QUERY_INTERVAL: 20
9 | PROM_QUERY_STEP: 3
10 | PROM_SSL_DISABLE: true
11 | ---
12 | apiVersion: apps/v1
13 | kind: Deployment
14 | metadata:
15 | name: kepler-model-server
16 | namespace: system
17 | spec:
18 | template:
19 | spec:
20 | containers:
21 | - name: server-api
22 | - name: online-trainer
23 | securityContext:
24 | privileged: true
25 |
--------------------------------------------------------------------------------
/manifests/server/openshift/patch-openshift.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: kepler-model-server
5 | namespace: system
6 | spec:
7 | template:
8 | spec:
9 | serviceAccountName: kepler-sa
10 | containers:
11 | - name: server-api
12 | securityContext:
13 | privileged: true
14 |
--------------------------------------------------------------------------------
/manifests/server/openshift/serve-only/kustomization.yaml:
--------------------------------------------------------------------------------
1 | resources:
2 | - server.yaml
3 |
4 | patchesStrategicMerge:
5 | - ./openshift/patch-openshift.yaml
6 |
7 | apiVersion: kustomize.config.k8s.io/v1beta1
8 | kind: Kustomization
9 | vars:
10 | - name: MODEL_SERVER_NAMESPACE
11 | objref:
12 | kind: Deployment
13 | group: apps
14 | version: v1
15 | name: kepler-model-server
16 | fieldref:
17 | fieldpath: metadata.namespace
18 | - name: MODEL_SERVER_PORT
19 | objref:
20 | kind: Deployment
21 | group: apps
22 | version: v1
23 | name: kepler-model-server
24 | fieldref:
25 | fieldpath: spec.template.spec.containers[0].ports[0].containerPort
26 |
27 | configurations:
28 | - kustomizeconfig.yaml
29 |
--------------------------------------------------------------------------------
/manifests/server/server.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 | name: kepler-model-server-cfm
5 | namespace: system
6 | ---
7 | apiVersion: apps/v1
8 | kind: Deployment
9 | metadata:
10 | name: kepler-model-server
11 | namespace: system
12 | labels:
13 | app.kubernetes.io/component: model-server
14 | app.kubernetes.io/name: kepler-model-server
15 | spec:
16 | replicas: 1
17 | selector:
18 | matchLabels:
19 | app.kubernetes.io/component: model-server
20 | app.kubernetes.io/name: kepler-model-server
21 | template:
22 | metadata:
23 | labels:
24 | app.kubernetes.io/component: model-server
25 | app.kubernetes.io/name: kepler-model-server
26 | spec:
27 | volumes:
28 | - name: cfm
29 | configMap:
30 | name: kepler-model-server-cfm
31 | - emptyDir: {}
32 | name: mnt
33 | containers:
34 | - name: server-api
35 | image: kepler_model_server
36 | imagePullPolicy: IfNotPresent
37 | ports:
38 | - containerPort: 8100
39 | name: http
40 | volumeMounts:
41 | - name: cfm
42 | mountPath: /etc/kepler/kepler.config
43 | readOnly: true
44 | - name: mnt
45 | mountPath: /mnt
46 | readOnly: false
47 | args: [model-server]
48 | ---
49 | kind: Service
50 | apiVersion: v1
51 | metadata:
52 | name: kepler-model-server
53 | namespace: system
54 | labels:
55 | app.kubernetes.io/component: model-server
56 | app.kubernetes.io/name: kepler-model-server
57 | spec:
58 | clusterIP: None
59 | selector:
60 | app.kubernetes.io/component: model-server
61 | app.kubernetes.io/name: kepler-model-server
62 | ports:
63 | - name: http
64 | port: 8100
65 | targetPort: http
66 |
--------------------------------------------------------------------------------
/manifests/set.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # This file is part of the Kepler project
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 |
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 |
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | # Copyright 2022 The Kepler Contributors
18 | #
19 |
20 | # set options
21 | # for example: ./set.sh "ESTIMATOR SERVER"
22 | unset $SERVER
23 | unset $ONLINE_TRAINER
24 | unset $ESTIMATOR
25 | unset $OPENSHIFT_DEPLOY
26 |
27 | DEPLOY_OPTIONS=$1
28 | for opt in ${DEPLOY_OPTIONS}; do export $opt=true; done;
29 |
30 | echo DEPLOY_OPTIONS=${DEPLOY_OPTIONS}
31 |
32 | version=$(kubectl version| grep 'Client Version' | sed 's/.*v//g' | cut -b -4)
33 | if [ 1 -eq "$(echo "${version} < 1.21" | bc)" ]
34 | then
35 | echo "You need to update your kubectl version to 1.21+ to support kustomize"
36 | exit 1
37 | fi
38 |
39 | echo "Preparing manifests..."
40 |
41 | if [ ! -z ${SERVER} ]; then
42 | echo "deploy model server"
43 | if [ ! -z ${ESTIMATOR} ]; then
44 | echo "add estimator-sidecar"
45 | # OPTS="ESTIMATOR SERVER" --> base
46 | cp ./manifests/base/estimate-with-server/kustomization.yaml ./manifests/base/kustomization.yaml
47 | if [ ! -z ${OPENSHIFT_DEPLOY} ]; then
48 | echo "patch openshift deployment for exporter (estimator-with-server)"
49 | # OPTS="ESTIMATOR SERVER OPENSHIFT_DEPLOY" --> base
50 | cp ./manifests/base/openshift/estimate-with-server/kustomization.yaml ./manifests/base/kustomization.yaml
51 | fi
52 | else
53 | # OPTS="SERVER" --> base
54 | cp ./manifests/base/serve-only/kustomization.yaml ./manifests/base/kustomization.yaml
55 | if [ ! -z ${OPENSHIFT_DEPLOY} ]; then
56 | echo "patch openshift deployment for exporter (serve-only)"
57 | # OPTS="SERVER OPENSHIFT_DEPLOY" --> base
58 | cp ./manifests/base/openshift/serve-only/kustomization.yaml ./manifests/base/kustomization.yaml
59 | fi
60 | fi
61 |
62 | if [ ! -z ${ONLINE_TRAINER} ]; then
63 | echo "add online trainer"
64 | # OPTS="... SERVER ONLINE_TRAINER" --> server
65 | cp ./manifests/server/online-train/kustomization.yaml ./manifests/server/kustomization.yaml
66 | if [ ! -z ${OPENSHIFT_DEPLOY} ]; then
67 | echo "patch openshift deployment for server (with online trainer)"
68 | # OPTS="... SERVER ONLINE_TRAINER OPENSHIFT_DEPLOY" --> server
69 | cp ./manifests/server/openshift/online-train/kustomization.yaml ./manifests/server/kustomization.yaml
70 | fi
71 | else
72 | # OPTS="... SERVER" --> server
73 | cp ./manifests/server/base/kustomization.yaml ./manifests/server/kustomization.yaml
74 | if [ ! -z ${OPENSHIFT_DEPLOY} ]; then
75 | echo "patch openshift deployment for server"
76 | # OPTS="... SERVER OPENSHIFT_DEPLOY" --> server
77 | cp ./manifests/server/openshift/serve-only/kustomization.yaml ./manifests/server/kustomization.yaml
78 | fi
79 | fi
80 | elif [ ! -z ${ESTIMATOR} ]; then
81 | echo "add estimator-sidecar"
82 | # OPTS="ESTIMATOR" --> base
83 | cp ./manifests/base/estimate-only/kustomization.yaml ./manifests/base/kustomization.yaml
84 | if [ ! -z ${OPENSHIFT_DEPLOY} ]; then
85 | echo "patch openshift deployment for exporter (estimator-only)"
86 | # OPTS="ESTIMATOR OPENSHIFT_DEPLOY" --> base
87 | cp ./manifests/base/openshift/estimate-only/kustomization.yaml ./manifests/base/kustomization.yaml
88 | fi
89 | fi
90 |
91 | for opt in ${DEPLOY_OPTIONS}; do unset $opt; done;
92 |
93 | echo "Done $0"
--------------------------------------------------------------------------------
/manifests/test/file-server.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Pod
3 | metadata:
4 | name: model-db
5 | namespace: kepler
6 | labels:
7 | app.kubernetes.io/component: model-db
8 | spec:
9 | containers:
10 | - name: file-server
11 | image: localhost:5001/kepler_model_server:devel-test
12 | imagePullPolicy: IfNotPresent
13 | args: [python3, tests/http_server.py]
14 | ports:
15 | - containerPort: 8110
16 | name: http
17 | volumeMounts:
18 | - name: mnt
19 | mountPath: /mnt
20 | initContainers:
21 | - name: trainer
22 | image: localhost:5001/kepler_model_server:devel-test
23 | imagePullPolicy: IfNotPresent
24 | args: [python3, tests/minimal_trainer.py]
25 | volumeMounts:
26 | - name: mnt
27 | mountPath: /mnt
28 | # Add other init container configurations here
29 | volumes:
30 | - name: mnt
31 | emptyDir: {}
32 | ---
33 | kind: Service
34 | apiVersion: v1
35 | metadata:
36 | name: model-db
37 | namespace: kepler
38 | labels:
39 | app.kubernetes.io/component: model-db
40 | spec:
41 | clusterIP: None
42 | selector:
43 | app.kubernetes.io/component: model-db
44 | ports:
45 | - name: http
46 | port: 8110
47 | targetPort: http
48 |
--------------------------------------------------------------------------------
/manifests/test/model-request-client.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: DaemonSet
3 | metadata:
4 | name: kepler-exporter
5 | namespace: kepler
6 | spec:
7 | template:
8 | spec:
9 | containers:
10 | - name: kepler-exporter
11 | image: localhost:5001/kepler_model_server:devel-test
12 | imagePullPolicy: IfNotPresent
13 | command: [/bin/bash, -c]
14 | args: [python3 tests/weight_model_request_test.py && echo Done && sleep infinity]
15 | volumeMounts:
16 | - name: cfm
17 | mountPath: /etc/kepler/kepler.config
18 | readOnly: true
19 | - mountPath: /tmp
20 | name: tmp
21 | volumes:
22 | - emptyDir: {}
23 | name: tmp
24 |
--------------------------------------------------------------------------------
/manifests/test/patch-estimator-sidecar.yaml:
--------------------------------------------------------------------------------
1 | data:
2 | MODEL_CONFIG: |
3 | NODE_COMPONENTS_ESTIMATOR=true
4 | NODE_COMPONENTS_INIT_URL=http://model-db.kepler.svc.cluster.local:8110/std_v0.7.11/rapl-sysfs/AbsPower/BPFOnly/GradientBoostingRegressorTrainer_0.zip
5 | NODE_TOTAL_ESTIMATOR=true
6 | NODE_TOTAL_INIT_URL=http://model-db.kepler.svc.cluster.local:8110/std_v0.7.11/acpi/AbsPower/BPFOnly/GradientBoostingRegressorTrainer_0.zip
7 |
--------------------------------------------------------------------------------
/manifests/test/power-request-client.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: DaemonSet
3 | metadata:
4 | name: kepler-exporter
5 | namespace: kepler
6 | spec:
7 | template:
8 | spec:
9 | containers:
10 | - name: kepler-exporter
11 | image: localhost:5001/kepler_model_server:devel-test
12 | imagePullPolicy: IfNotPresent
13 | command: [/bin/bash, -c]
14 | args: ["until [ -e /tmp/estimator.sock ]; do sleep 1; done && python3 -u tests/estimator_power_request_test.py && echo Done && sleep infinity"]
15 | volumeMounts:
16 | - name: cfm
17 | mountPath: /etc/kepler/kepler.config
18 | readOnly: true
19 | - mountPath: /tmp
20 | name: tmp
21 | - name: estimator
22 | volumes:
23 | - emptyDir: {}
24 | name: tmp
25 |
--------------------------------------------------------------------------------
/model_training/README.md:
--------------------------------------------------------------------------------
1 | # Contribute to power profiling and model training
2 |
3 |
4 |
5 | - [Contribute to power profiling and model training](#contribute-to-power-profiling-and-model-training)
6 | - [Requirements](#requirements)
7 | - [Pre-step](#pre-step)
8 | - [Setup](#setup)
9 | - [Prepare cluster](#prepare-cluster)
10 | - [From scratch (no target kubernetes cluster)](#from-scratch-no-target-kubernetes-cluster)
11 | - [For managed cluster](#for-managed-cluster)
12 | - [Run benchmark and collect metrics](#run-benchmark-and-collect-metrics)
13 | - [With manual execution](#with-manual-execution)
14 | - [Clean up](#clean-up)
15 |
16 |
17 |
18 | ## Requirements
19 |
20 | - git > 2.22
21 | - hatch
22 | - kubectl
23 | - yq, jq
24 | - power meter if available
25 |
26 | ## Pre-step
27 |
28 | - Fork and clone this repository and move to `model_training` folder
29 |
30 | ```bash
31 | git clone
32 | cd model_training
33 | ```
34 |
35 | ## Setup
36 |
37 | ### Prepare cluster
38 |
39 | ### From scratch (no target kubernetes cluster)
40 |
41 | > Note: port 9090 and 5101 should not being used. It will be used in port-forward for prometheus and kind registry respectively
42 |
43 | ```bash
44 | ./script.sh prepare_cluster
45 | ```
46 |
47 | The script will:
48 |
49 | - create a kind cluster `kind-for-training` with registry at port `5101`.
50 | - deploy Prometheus.
51 | - deploy Prometheus RBAC and node port to `30090` port on kind node which will be forwarded to `9090` port on the host.
52 | - deploy service monitor for kepler and reload to Prometheus server
53 |
54 | ### For managed cluster
55 |
56 | Please confirm the following requirements:
57 |
58 | - Kepler installation
59 | - Prometheus installation
60 | - Kepler metrics are exported to Promtheus server
61 | - Prometheus server is available at `http://localhost:9090`. Otherwise, set environment `PROM_SERVER`.
62 |
63 | ### Run benchmark and collect metrics
64 |
65 | - [Tekton Pipeline Instruction](./tekton/README.md)
66 |
67 | ### With manual execution
68 |
69 | In addition to the above approach, you can manually run your own benchmarks, then collect, train, and export the models by the entrypoint
70 |
71 | [Manual Metric Collection and Training with Entrypoint](./cmd_instruction.md)
72 |
73 | ## Clean up
74 |
75 | For kind-for-training cluster:
76 |
77 | ```bash
78 | ./script.sh cleanup
79 | ```
80 |
--------------------------------------------------------------------------------
/model_training/deployment/prom-kepler-rbac.yaml:
--------------------------------------------------------------------------------
1 | kind: Role
2 | apiVersion: rbac.authorization.k8s.io/v1
3 | metadata:
4 | name: prometheus-k8s
5 | namespace: kepler
6 | labels:
7 | app.kubernetes.io/component: prometheus
8 | app.kubernetes.io/instance: k8s
9 | app.kubernetes.io/name: prometheus
10 | rules:
11 | - verbs:
12 | - get
13 | - list
14 | - watch
15 | apiGroups:
16 | - "" # yamllint disable-line rule:quoted-strings
17 | resources:
18 | - services
19 | - endpoints
20 | - pods
21 | - verbs:
22 | - get
23 | - list
24 | - watch
25 | apiGroups:
26 | - extensions
27 | resources:
28 | - ingresses
29 | - verbs:
30 | - get
31 | - list
32 | - watch
33 | apiGroups:
34 | - networking.k8s.io
35 | resources:
36 | - ingresses
37 | ---
38 | kind: RoleBinding
39 | apiVersion: rbac.authorization.k8s.io/v1
40 | metadata:
41 | name: prometheus-k8s
42 | namespace: kepler
43 | labels:
44 | app.kubernetes.io/component: prometheus
45 | app.kubernetes.io/instance: k8s
46 | app.kubernetes.io/name: prometheus
47 | subjects:
48 | - kind: ServiceAccount
49 | name: prometheus-k8s
50 | namespace: monitoring
51 | roleRef:
52 | apiGroup: rbac.authorization.k8s.io
53 | kind: Role
54 | name: prometheus-k8s
55 |
--------------------------------------------------------------------------------
/model_training/deployment/prom-np.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Service
3 | metadata:
4 | labels:
5 | app.kubernetes.io/component: prometheus
6 | app.kubernetes.io/instance: k8s
7 | app.kubernetes.io/name: prometheus
8 | app.kubernetes.io/part-of: kube-prometheus
9 | name: prometheus-k8s-np
10 | namespace: monitoring
11 | spec:
12 | ports:
13 | - name: web
14 | port: 9090
15 | protocol: TCP
16 | targetPort: web
17 | nodePort: 30090
18 | selector:
19 | app.kubernetes.io/component: prometheus
20 | app.kubernetes.io/instance: k8s
21 | app.kubernetes.io/name: prometheus
22 | app.kubernetes.io/part-of: kube-prometheus
23 | type: NodePort
24 |
--------------------------------------------------------------------------------
/model_training/s3/Dockerfile:
--------------------------------------------------------------------------------
1 | # NOTE: Dockerfile for generating quay.io/kepler_model_server/s3 images
2 |
3 | FROM python:3.10-slim
4 |
5 | WORKDIR /usr/local
6 |
7 | COPY . /usr/local
8 | RUN pip install --no-cache-dir . && \
9 | pip cache purge
10 |
--------------------------------------------------------------------------------
/model_training/s3/README.md:
--------------------------------------------------------------------------------
1 | # S3-Pusher
2 |
3 | A simple script and Dockerfile to push model_training/data folder to s3 bucket.
4 |
--------------------------------------------------------------------------------
/model_training/s3/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["hatchling"]
3 | build-backend = "hatchling.build"
4 |
5 | [project]
6 | name = "s3"
7 | dynamic = ["version"]
8 | description = ''
9 | readme = "README.md"
10 | requires-python = ">=3.10"
11 | license = "Apache-2.0"
12 | keywords = []
13 | authors = [
14 | { name = "Sunyanan Choochotkaew", email = "sunyanan.choochotkaew1@ibm.com" },
15 | ]
16 | classifiers = [
17 | "Programming Language :: Python",
18 | "Programming Language :: Python :: 3.10",
19 | "Programming Language :: Python :: Implementation :: CPython",
20 | "Programming Language :: Python :: Implementation :: PyPy",
21 | ]
22 | dependencies = [
23 | "boto3",
24 | "ibm-cos-sdk",
25 | ]
26 |
27 | [project.urls]
28 | Documentation = "https://github.com/sustainable-computing-io/kepler-model-server#readme"
29 | Issues = "https://github.com/sustainable-computing-io/kepler-model-server/issues"
30 | Source = "https://github.com/sustainable-computing-io/kepler-model-server"
31 |
32 | [project.scripts]
33 | s3-loader = "s3.loader:run"
34 | s3-pusher = "s3.pusher:run"
35 |
36 | [tool.hatch.version]
37 | path = "src/s3/__about__.py"
38 |
39 | [tool.hatch.envs.default]
40 | python = "3.10"
41 |
42 | [tool.hatch.envs.types]
43 | extra-dependencies = [
44 | "mypy>=1.0.0",
45 | ]
46 | [tool.hatch.envs.types.scripts]
47 | check = "mypy --install-types --non-interactive {args:src/s3 tests}"
48 |
49 | [tool.coverage.run]
50 | source_pkgs = ["s3", "tests"]
51 | branch = true
52 | parallel = true
53 | omit = [
54 | "src/s3/__about__.py",
55 | ]
56 |
57 | [tool.coverage.paths]
58 | s3 = ["src/s3", "*/s3/src/s3"]
59 | tests = ["tests", "*/s3/tests"]
60 |
61 | [tool.coverage.report]
62 | exclude_lines = [
63 | "no cov",
64 | "if __name__ == .__main__.:",
65 | "if TYPE_CHECKING:",
66 | ]
67 |
--------------------------------------------------------------------------------
/model_training/s3/src/s3/__about__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2024-present Sunil Thaha
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | __version__ = "0.7.11"
5 |
--------------------------------------------------------------------------------
/model_training/s3/src/s3/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2024-present Sunil Thaha
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 |
--------------------------------------------------------------------------------
/model_training/s3/src/s3/loader.py:
--------------------------------------------------------------------------------
1 | ## get client
2 | # client = new__client(args)
3 | ## upload all files in mnt path
4 | # _upload(client, mnt_path)
5 | import argparse
6 | import os
7 |
8 | from . import util
9 |
10 | model_dir = "models"
11 | data_dir = "data"
12 | machine_spec_dir = "machine_spec"
13 |
14 |
15 | def aws_list_keys(client, bucket_name, prefix):
16 | response = client.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
17 | return [obj["Key"] for obj in response.get("Contents", [])]
18 |
19 |
20 | def ibmcloud_list_keys(client, bucket_name, prefix):
21 | bucket_obj = client.Bucket(bucket_name)
22 | data_response = bucket_obj.objects.filter(Prefix=prefix)
23 | return [obj.key for obj in data_response]
24 |
25 |
26 | def get_bucket_file_map(client, bucket_name, machine_id, mnt_path, pipeline_name, list_func):
27 | bucket_file_map = dict()
28 | top_key_path = ""
29 | if machine_id is not None and machine_id != "":
30 | top_key_path = "/" + machine_id
31 | # add data key map
32 | data_path = os.path.join(mnt_path, data_dir)
33 | datapath_prefix = top_key_path + "/data"
34 | keys = list_func(client, bucket_name, datapath_prefix)
35 | for key in keys:
36 | filepath = key.replace(datapath_prefix, data_path)
37 | bucket_file_map[key] = filepath
38 | # add model key map
39 | model_path = os.path.join(mnt_path, model_dir, pipeline_name)
40 | model_predix = "/models/" + pipeline_name
41 | keys = list_func(client, bucket_name, model_predix)
42 | for key in keys:
43 | filepath = key.replace(model_predix, model_path)
44 | bucket_file_map[key] = filepath
45 | return bucket_file_map
46 |
47 |
48 | def aws_download(client, bucket_name, machine_id, mnt_path, pipeline_name):
49 | print("AWS Download")
50 | bucket_file_map = get_bucket_file_map(client, bucket_name, machine_id=machine_id, mnt_path=mnt_path, pipeline_name=pipeline_name, list_func=aws_list_keys)
51 | for key, filepath in bucket_file_map.items():
52 | print(key, filepath)
53 | dir = os.path.dirname(filepath)
54 | if not os.path.exists(dir):
55 | os.makedirs(dir)
56 | client.download_file(bucket_name, key, filepath)
57 |
58 |
59 | def ibm_download(client, bucket_name, machine_id, mnt_path, pipeline_name):
60 | print("IBM Download")
61 | bucket_file_map = get_bucket_file_map(
62 | client, bucket_name, machine_id=machine_id, mnt_path=mnt_path, pipeline_name=pipeline_name, list_func=ibmcloud_list_keys
63 | )
64 | for key, filepath in bucket_file_map.items():
65 | print(key, filepath)
66 | dir = os.path.dirname(filepath)
67 | if not os.path.exists(dir):
68 | os.makedirs(dir)
69 | client.Bucket(bucket_name).download_file(key, filepath)
70 |
71 |
72 | def add_common_args(subparser):
73 | subparser.add_argument("--bucket-name", help="Bucket name", required=True)
74 | subparser.add_argument("--mnt-path", help="Mount path", required=True)
75 | subparser.add_argument("--pipeline-name", help="Pipeline name")
76 | subparser.add_argument("--machine-id", help="Machine ID")
77 |
78 |
79 | def run():
80 | parser = argparse.ArgumentParser(description="S3 Pusher")
81 | args = util.get_command(parser, add_common_args, ibm_download, aws_download)
82 | if hasattr(args, "new_client_func") and hasattr(args, "func"):
83 | client = args.new_client_func(args)
84 | args.func(client, args.bucket_name, args.machine_id, args.mnt_path, args.pipeline_name)
85 | else:
86 | parser.print_help()
87 |
88 |
89 | if __name__ == "__main__":
90 | run()
91 |
--------------------------------------------------------------------------------
/model_training/s3/src/s3/pusher.py:
--------------------------------------------------------------------------------
1 | ## get client
2 | # client = new__client(args)
3 | ## upload all files in mnt path
4 | # _upload(client, mnt_path)
5 | import argparse
6 | import os
7 |
8 | from . import util
9 |
10 | model_dir = "models"
11 | data_dir = "data"
12 | machine_spec_dir = "machine_spec"
13 |
14 |
15 | def get_bucket_file_map(machine_id, mnt_path, query_data, idle_data):
16 | model_path = os.path.join(mnt_path, model_dir)
17 | bucket_file_map = dict()
18 | top_key_path = ""
19 | if machine_id is not None and machine_id != "":
20 | top_key_path = "/" + machine_id
21 | if os.path.exists(model_path):
22 | for root, _, files in os.walk(model_path):
23 | for file in files:
24 | filepath = os.path.join(root, file)
25 | key = filepath.replace(model_path, "/models")
26 | bucket_file_map[key] = filepath
27 | data_path = os.path.join(mnt_path, data_dir)
28 | for data_filename in [query_data, idle_data]:
29 | if data_filename is not None:
30 | filepath = os.path.join(data_path, data_filename + ".json")
31 | if os.path.exists(filepath):
32 | key = filepath.replace(data_path, top_key_path + "/data")
33 | bucket_file_map[key] = filepath
34 | filepath = os.path.join(data_path, machine_spec_dir, machine_id + ".json")
35 | if os.path.exists(filepath):
36 | key = filepath.replace(data_path, top_key_path + "/data")
37 | bucket_file_map[key] = filepath
38 | return bucket_file_map
39 |
40 |
41 | def aws_upload(client, bucket_name, machine_id, mnt_path, query_data, idle_data):
42 | print("AWS Upload")
43 | bucket_file_map = get_bucket_file_map(machine_id=machine_id, mnt_path=mnt_path, query_data=query_data, idle_data=idle_data)
44 | for key, filepath in bucket_file_map.items():
45 | print(key, filepath)
46 | client.upload_file(filepath, bucket_name, key)
47 |
48 |
49 | def ibm_upload(client, bucket_name, machine_id, mnt_path, query_data, idle_data):
50 | print("IBM Upload")
51 | bucket_file_map = get_bucket_file_map(machine_id=machine_id, mnt_path=mnt_path, query_data=query_data, idle_data=idle_data)
52 | for key, filepath in bucket_file_map.items():
53 | print(key, filepath)
54 | client.Object(bucket_name, key).upload_file(filepath)
55 |
56 |
57 | def add_common_args(subparser):
58 | subparser.add_argument("--bucket-name", help="Bucket name", required=True)
59 | subparser.add_argument("--mnt-path", help="Mount path", required=True)
60 | subparser.add_argument("--query-data", help="Query data filename")
61 | subparser.add_argument("--idle-data", help="Idle data filename")
62 | subparser.add_argument("--machine-id", help="Machine ID")
63 |
64 |
65 | def run():
66 | parser = argparse.ArgumentParser(description="S3 Pusher")
67 | args = util.get_command(parser, add_common_args, ibm_upload, aws_upload)
68 | if hasattr(args, "new_client_func") and hasattr(args, "func"):
69 | client = args.new_client_func(args)
70 | args.func(client, args.bucket_name, args.machine_id, args.mnt_path, args.query_data, args.idle_data)
71 | else:
72 | parser.print_help()
73 |
74 |
75 | if __name__ == "__main__":
76 | run()
77 |
--------------------------------------------------------------------------------
/model_training/s3/src/s3/util.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import s3.__about__ as about
4 |
5 |
6 | def new_ibm_client(args):
7 | import ibm_boto3
8 | from ibm_botocore.client import Config
9 |
10 | cos = ibm_boto3.resource(
11 | "s3",
12 | ibm_api_key_id=args.api_key,
13 | ibm_service_instance_id=args.service_instance_id,
14 | config=Config(signature_version="oauth"),
15 | endpoint_url=args.service_endpoint,
16 | )
17 | return cos
18 |
19 |
20 | def new_aws_client(args):
21 | import boto3 as aws_boto3
22 |
23 | s3 = aws_boto3.client("s3", aws_access_key_id=args.aws_access_key_id, aws_secret_access_key=args.aws_secret_access_key, region_name=args.region_name)
24 | return s3
25 |
26 |
27 | def get_command(parser: argparse.ArgumentParser, add_common_args, ibm_func, aws_func):
28 | parser.add_argument("--version", action="version", version=about.__version__)
29 |
30 | subparsers = parser.add_subparsers(title="S3 provider", dest="provider")
31 | ibm_parser = subparsers.add_parser("ibmcloud", help="IBM Cloud")
32 | ibm_parser.add_argument("--api-key", type=str, help="API key", required=True)
33 | ibm_parser.add_argument("--service-instance-id", type=str, help="Service instance ID", required=True)
34 | ibm_parser.add_argument("--service-endpoint", type=str, help="Service endpoint", required=True)
35 | add_common_args(ibm_parser)
36 | ibm_parser.set_defaults(new_client_func=new_ibm_client, func=ibm_func)
37 |
38 | aws_parser = subparsers.add_parser("aws", help="AWS")
39 | aws_parser.add_argument("--aws-access-key-id", type=str, help="Access key ID", required=True)
40 | aws_parser.add_argument("--aws-secret-access-key", type=str, help="Secret key", required=True)
41 | aws_parser.add_argument("--region-name", type=str, help="Region name", required=True)
42 | add_common_args(aws_parser)
43 | aws_parser.set_defaults(new_client_func=new_aws_client, func=aws_func)
44 |
45 | args = parser.parse_args()
46 |
47 | return args
48 |
--------------------------------------------------------------------------------
/model_training/s3/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2024-present Sunil Thaha
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 |
--------------------------------------------------------------------------------
/model_training/tekton/examples/complete-pipelinerun.yaml:
--------------------------------------------------------------------------------
1 | # example-complete-train-pipeline
2 | # running pipelines with all default value to train AbsPower/DynPower for all energysource and featuregroup
3 | apiVersion: tekton.dev/v1
4 | kind: PipelineRun
5 | metadata:
6 | name: example-complete-train-pipeline
7 | spec:
8 | timeouts:
9 | pipeline: 6h
10 | tasks: 5h50m
11 | workspaces:
12 | - name: mnt
13 | persistentVolumeClaim:
14 | claimName: task-pvc
15 | params:
16 | - name: PIPELINE_NAME
17 | value: CompleteTrainPipelineExample
18 | # the below parameters are for short test run
19 | - name: STRESS_ARGS
20 | value:
21 | - cpu;none;none
22 | - name: STRESS_TIMEOUT
23 | value: 20
24 | - name: STRESS_BREAK_INTERVAL
25 | value: 1
26 | - name: IDLE_COLLECT_INTERVAL
27 | value: 100
28 | - name: CPU_FREQUENCY_ENABLED
29 | value: false
30 | pipelineRef:
31 | name: complete-train-pipeline
32 |
--------------------------------------------------------------------------------
/model_training/tekton/examples/single-train/abs-power.yaml:
--------------------------------------------------------------------------------
1 | # example-abs-train-pipeline:
2 | # running pipelines with all default value to train AbsPower model (rapl-sysfs, BPFOnly)
3 | apiVersion: tekton.dev/v1
4 | kind: PipelineRun
5 | metadata:
6 | name: example-abs-train-pipeline
7 | spec:
8 | timeouts:
9 | pipeline: 6h
10 | tasks: 5h50m
11 | workspaces:
12 | - name: mnt
13 | persistentVolumeClaim:
14 | claimName: task-pvc
15 | params:
16 | - name: PIPELINE_NAME
17 | value: AbsPowerTrainPipelineExample
18 | - name: OUTPUT_TYPE
19 | value: AbsPower
20 | # the below parameters are for short test run
21 | - name: STRESS_ARGS
22 | value:
23 | - cpu;none;none
24 | - name: STRESS_TIMEOUT
25 | value: 20
26 | - name: STRESS_BREAK_INTERVAL
27 | value: 1
28 | - name: IDLE_COLLECT_INTERVAL
29 | value: 100
30 | - name: CPU_FREQUENCY_ENABLED
31 | value: false
32 | pipelineRef:
33 | name: single-train-pipeline
34 |
--------------------------------------------------------------------------------
/model_training/tekton/examples/single-train/aws-push.yaml:
--------------------------------------------------------------------------------
1 | # test-pipeline-aws
2 | # short run of pipelines to test e2e from collect to train with AWS COS
3 | apiVersion: tekton.dev/v1
4 | kind: PipelineRun
5 | metadata:
6 | name: test-pipeline-aws
7 | spec:
8 | timeouts:
9 | pipeline: 6h
10 | tasks: 5h50m
11 | workspaces:
12 | - name: mnt
13 | persistentVolumeClaim:
14 | claimName: task-pvc
15 | params:
16 | - name: PIPELINE_NAME
17 | value: AbsPowerTrainPipelineExample
18 | - name: OUTPUT_TYPE
19 | value: AbsPower
20 | - name: MACHINE_ID
21 | value: test
22 | - name: COS_PROVIDER
23 | value: aws
24 | - name: COS_SECRET_NAME
25 | value: aws-cos-secret
26 | # the below parameters are for short test run
27 | - name: STRESS_ARGS
28 | value:
29 | - cpu;none;none
30 | - name: STRESS_TIMEOUT
31 | value: 20
32 | - name: STRESS_BREAK_INTERVAL
33 | value: 1
34 | - name: IDLE_COLLECT_INTERVAL
35 | value: 100
36 | - name: CPU_FREQUENCY_ENABLED
37 | value: false
38 | pipelineRef:
39 | name: single-train-pipeline
40 |
--------------------------------------------------------------------------------
/model_training/tekton/examples/single-train/default.yaml:
--------------------------------------------------------------------------------
1 | # kepler-default
2 | # running pipelines with all default value to train AbsPower model (rapl-sysfs, BPFOnly) with COS
3 | apiVersion: tekton.dev/v1
4 | kind: PipelineRun
5 | metadata:
6 | name: default
7 | spec:
8 | timeouts:
9 | pipeline: 6h
10 | tasks: 5h50m
11 | workspaces:
12 | - name: mnt
13 | persistentVolumeClaim:
14 | claimName: task-pvc
15 | params:
16 | - name: PIPELINE_NAME
17 | value: AbsPowerTrainPipelineExample
18 | - name: OUTPUT_TYPE
19 | value: AbsPower
20 | # Uncomment the following lines for IBM Cloud COS
21 | # - name: COS_PROVIDER
22 | # value: ibmcloud
23 | # - name: COS_SECRET_NAME
24 | # value: ibm-cos-secret
25 | # Uncomment the following lines for AWS COS
26 | # - name: COS_PROVIDER
27 | # value: aws
28 | # - name: COS_SECRET_NAME
29 | # value: aws-cos-secret
30 | pipelineRef:
31 | name: single-train-pipeline
32 |
--------------------------------------------------------------------------------
/model_training/tekton/examples/single-train/dyn-power.yaml:
--------------------------------------------------------------------------------
1 | # example-dyn-train-pipeline:
2 | # running pipelines with all default value to train DynPower model (rapl-sysfs, BPFOnly)
3 | apiVersion: tekton.dev/v1
4 | kind: PipelineRun
5 | metadata:
6 | name: example-dyn-train-pipeline
7 | spec:
8 | timeouts:
9 | pipeline: 6h
10 | tasks: 5h50m
11 | workspaces:
12 | - name: mnt
13 | persistentVolumeClaim:
14 | claimName: task-pvc
15 | params:
16 | - name: PIPELINE_NAME
17 | value: DynPowerTrainPipelineExample
18 | - name: OUTPUT_TYPE
19 | value: DynPower
20 | # the below parameters are for short test run
21 | - name: STRESS_ARGS
22 | value:
23 | - cpu;none;none
24 | - name: STRESS_TIMEOUT
25 | value: 20
26 | - name: STRESS_BREAK_INTERVAL
27 | value: 1
28 | - name: IDLE_COLLECT_INTERVAL
29 | value: 100
30 | - name: CPU_FREQUENCY_ENABLED
31 | value: false
32 | pipelineRef:
33 | name: single-train-pipeline
34 |
--------------------------------------------------------------------------------
/model_training/tekton/examples/single-train/ibmcloud-push.yaml:
--------------------------------------------------------------------------------
1 | # test-pipeline-ibmcloud
2 | # short run of pipelines to test e2e from collect to train with IBMCloud COS
3 | apiVersion: tekton.dev/v1
4 | kind: PipelineRun
5 | metadata:
6 | name: test-pipeline-ibmcloud
7 | spec:
8 | timeouts:
9 | pipeline: 6h
10 | tasks: 5h50m
11 | workspaces:
12 | - name: mnt
13 | persistentVolumeClaim:
14 | claimName: task-pvc
15 | params:
16 | - name: PIPELINE_NAME
17 | value: AbsPowerTrainPipelineExample
18 | - name: OUTPUT_TYPE
19 | value: AbsPower
20 | - name: MACHINE_ID
21 | value: test
22 | - name: COS_PROVIDER
23 | value: ibmcloud
24 | - name: COS_SECRET_NAME
25 | value: ibm-cos-secret
26 | # the below parameters are for short test run
27 | - name: STRESS_ARGS
28 | value:
29 | - cpu;none;none
30 | - name: STRESS_TIMEOUT
31 | value: 20
32 | - name: STRESS_BREAK_INTERVAL
33 | value: 1
34 | - name: IDLE_COLLECT_INTERVAL
35 | value: 100
36 | - name: CPU_FREQUENCY_ENABLED
37 | value: false
38 | pipelineRef:
39 | name: single-train-pipeline
40 |
--------------------------------------------------------------------------------
/model_training/tekton/examples/test-collect.yaml:
--------------------------------------------------------------------------------
1 | # test-collect
2 | # short run of pipelines to test collecting data
3 | apiVersion: tekton.dev/v1
4 | kind: PipelineRun
5 | metadata:
6 | name: test-collect
7 | spec:
8 | timeouts:
9 | pipeline: 6h
10 | tasks: 5h50m
11 | workspaces:
12 | - name: mnt
13 | persistentVolumeClaim:
14 | claimName: task-pvc
15 | params:
16 | - name: MACHINE_ID
17 | value: test
18 | - name: STRESS_ARGS
19 | value:
20 | - cpu;none;none
21 | - name: STRESS_TIMEOUT
22 | value: 20
23 | - name: STRESS_BREAK_INTERVAL
24 | value: 1
25 | - name: IDLE_COLLECT_INTERVAL
26 | value: 100
27 | - name: CPU_FREQUENCY_ENABLED
28 | value: false
29 | pipelineRef:
30 | name: collect-data-pipeline
31 |
--------------------------------------------------------------------------------
/model_training/tekton/examples/test-retrain.yaml:
--------------------------------------------------------------------------------
1 | # example-abs-train-pipeline:
2 | # running pipelines with all default value to train AbsPower model (rapl-sysfs, BPFOnly)
3 | apiVersion: tekton.dev/v1
4 | kind: PipelineRun
5 | metadata:
6 | name: test-retrain-ibmcloud
7 | spec:
8 | timeouts:
9 | pipeline: 6h
10 | tasks: 5h50m
11 | workspaces:
12 | - name: mnt
13 | persistentVolumeClaim:
14 | claimName: task-pvc
15 | params:
16 | - name: PIPELINE_NAME
17 | value: AbsPowerTrainPipelineExample
18 | - name: OUTPUT_TYPE
19 | value: AbsPower
20 | pipelineRef:
21 | name: single-retrain-pipeline
22 |
--------------------------------------------------------------------------------
/model_training/tekton/pvc/hostpath.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: PersistentVolume
3 | metadata:
4 | name: task-pv-volume
5 | labels:
6 | type: local
7 | spec:
8 | storageClassName: manual
9 | capacity:
10 | storage: 5Gi
11 | accessModes:
12 | - ReadWriteMany
13 | hostPath:
14 | path: /mnt
15 | ---
16 | apiVersion: v1
17 | kind: PersistentVolumeClaim
18 | metadata:
19 | name: task-pvc
20 | namespace: default
21 | spec:
22 | storageClassName: manual
23 | volumeName: task-pv-volume
24 | accessModes:
25 | - ReadWriteMany
26 | resources:
27 | requests:
28 | storage: 3Gi
29 |
--------------------------------------------------------------------------------
/model_training/tekton/tasks/extract-task.yaml:
--------------------------------------------------------------------------------
1 | ######################################
2 | ##
3 | ## extract-from-metric:
4 | ##
5 | ## load kepler_query.json and extract data to extracted_data.csv
6 | ##
7 | ######################################
8 | apiVersion: tekton.dev/v1
9 | kind: Task
10 | metadata:
11 | name: extract-from-metric
12 | spec:
13 | params:
14 | - name: MODEL_SERVER_IMAGE
15 | description: Specify model server image
16 | default: quay.io/sustainable_computing_io/kepler_model_server:latest
17 | - name: PIPELINE_NAME
18 | description: Specify pipeline name (output prefix/folder)
19 | default: default
20 | - name: OUTPUT_TYPE
21 | description: Specify target output type (check https://sustainable-computing.io/kepler_model_server/pipeline/#power-isolation)
22 | - name: ENERGY_SOURCE
23 | description: Specify target energy source (check https://sustainable-computing.io/kepler_model_server/pipeline/#energy-source)
24 | default: rapl-sysfs
25 | - name: FEATURE_GROUP
26 | description: Specify target feature group (check https://sustainable-computing.io/kepler_model_server/pipeline/#feature-group)
27 | default: BPFOnly
28 | - name: EXTRACTOR
29 | description: Specify extractor class (default or smooth)
30 | default: default
31 | - name: THIRDPARTY_METRICS
32 | description: Specify list of third party metric to export (required only for ThirdParty feature group)
33 | default: ""
34 | workspaces:
35 | - name: mnt
36 | optional: true
37 | steps:
38 | - name: extract
39 | image: $(params.MODEL_SERVER_IMAGE)
40 | command: [kepler-model]
41 | args:
42 | - extract
43 | - --data-path=$(workspaces.mnt.path)/data
44 | - --input=kepler_query
45 | - --output=$(params.PIPELINE_NAME)_$(params.ENERGY_SOURCE)_$(params.FEATURE_GROUP)_data
46 | - --extractor=$(params.EXTRACTOR)
47 | - --feature-group=$(params.FEATURE_GROUP)
48 | - --energy-source=$(params.ENERGY_SOURCE)
49 | - --output-type=$(params.OUTPUT_TYPE)
50 | - --thirdparty-metrics="$(params.THIRDPARTY_METRICS)"
51 |
--------------------------------------------------------------------------------
/model_training/tekton/tasks/isolate-task.yaml:
--------------------------------------------------------------------------------
1 | ######################################
2 | ##
3 | ## isolate-from-metric:
4 | ##
5 | ## load kepler_query.json and isolate data to isolated_data.csv
6 | ##
7 | ######################################
8 | apiVersion: tekton.dev/v1
9 | kind: Task
10 | metadata:
11 | name: isolate-from-metric
12 | spec:
13 | params:
14 | - name: MODEL_SERVER_IMAGE
15 | description: Specify model server image
16 | default: quay.io/sustainable_computing_io/kepler_model_server:latest
17 | - name: PIPELINE_NAME
18 | description: Specify pipeline name (output prefix/folder)
19 | default: default
20 | - name: ENERGY_SOURCE
21 | description: Specify target energy source (check https://sustainable-computing.io/kepler_model_server/pipeline/#energy-source)
22 | default: rapl-sysfs
23 | - name: FEATURE_GROUP
24 | description: Specify target feature group (check https://sustainable-computing.io/kepler_model_server/pipeline/#feature-group)
25 | default: BPFOnly
26 | - name: EXTRACTOR
27 | description: Specify extractor class (default or smooth)
28 | default: default
29 | - name: ISOLATOR
30 | description: Specify isolator class (none, min, profile, or trainer (if ABS_PIPELINE_NAME is set)
31 | default: min
32 | - name: THIRDPARTY_METRICS
33 | description: Specify list of third party metric to export (required only for ThirdParty feature group)
34 | default: ""
35 | - name: TARGET_HINTS
36 | description: Specify target process keywords to keep in DynPower model training
37 | default: stress
38 | - name: BG_HINTS
39 | description: Specify background process keywords to remove from DynPower model training
40 | default: ""
41 | - name: ABS_PIPELINE_NAME
42 | description: Specify pipeline name to be used for initializing trainer isolator
43 | default: ""
44 | workspaces:
45 | - name: mnt
46 | optional: true
47 | steps:
48 | - name: isolate
49 | image: $(params.MODEL_SERVER_IMAGE)
50 | command: [kepler-model]
51 | args:
52 | - isolate
53 | - --data-path=$(workspaces.mnt.path)/data
54 | - --input=kepler_query
55 | - --output=$(params.PIPELINE_NAME)_$(params.ENERGY_SOURCE)_$(params.FEATURE_GROUP)_data
56 | - --pipeline-name=$(params.PIPELINE_NAME)
57 | - --extractor=$(params.EXTRACTOR)
58 | - --isolator=$(params.ISOLATOR)
59 | - --feature-group=$(params.FEATURE_GROUP)
60 | - --energy-source=$(params.ENERGY_SOURCE)
61 | - --output-type=DynPower
62 | - --thirdparty-metrics="$(params.THIRDPARTY_METRICS)"
63 | - --abs-pipeline-name=$(params.ABS_PIPELINE_NAME)
64 | - --profile=idle
65 | - --target-hints="$(params.TARGET_HINTS)"
66 | - --bg-hints="$(params.BG_HINTS)"
67 |
--------------------------------------------------------------------------------
/model_training/tekton/tasks/original-pipeline-task.yaml:
--------------------------------------------------------------------------------
1 | ######################################
2 | ##
3 | ## train-pipeline:
4 | ##
5 | ## load kepler_query.json and run training pipeline
6 | ##
7 | ######################################
8 | apiVersion: tekton.dev/v1
9 | kind: Task
10 | metadata:
11 | name: original-pipeline-task
12 | spec:
13 | params:
14 | - name: MODEL_SERVER_IMAGE
15 | description: Specify model server image
16 | default: quay.io/sustainable_computing_io/kepler_model_server:latest
17 | - name: PIPELINE_NAME
18 | description: Specify output pipeline name
19 | default: default
20 | - name: EXTRACTOR
21 | description: Specify extractor class (default or smooth)
22 | default: default
23 | - name: ISOLATOR
24 | description: Specify isolator class (none, min, profile, or trainer (if ABS_PIPELINE_NAME is set)
25 | default: min
26 | - name: ABS_TRAINERS
27 | description: Specify a list of trainers for training AbsPower models
28 | default: default
29 | - name: DYN_TRAINERS
30 | description: Specify a list of trainers for training DynPower models
31 | default: default
32 | - name: ENERGY_SOURCE
33 | description: Specify target energy source (check https://sustainable-computing.io/kepler_model_server/pipeline/#energy-source)
34 | default: acpi,rapl-sysfs
35 | - name: TARGET_HINTS
36 | description: Specify target process keywords to keep in DynPower model training
37 | default: stress
38 | - name: BG_HINTS
39 | description: Specify background process keywords to remove from DynPower model training
40 | default: ""
41 | - name: THIRDPARTY_METRICS
42 | description: Specify list of third party metric to export (required only for ThirdParty feature group)
43 | default: ""
44 | - name: MACHINE_ID
45 | description: Specify machine id to identify node_type
46 | workspaces:
47 | - name: mnt
48 | optional: true
49 | steps:
50 | - name: pipeline-train
51 | image: $(params.MODEL_SERVER_IMAGE)
52 | command: [kepler-model]
53 | env:
54 | - name: MODEL_PATH
55 | value: $(workspaces.mnt.path)/models
56 | args:
57 | - train
58 | - --data-path=$(workspaces.mnt.path)/data
59 | - --input=kepler_query
60 | - --pipeline-name=$(params.PIPELINE_NAME)
61 | - --extractor=$(params.EXTRACTOR)
62 | - --isolator=$(params.ISOLATOR)
63 | - --profile=idle
64 | - --target-hints="$(params.TARGET_HINTS)"
65 | - --bg-hints="$(params.BG_HINTS)"
66 | - --abs-trainers=$(params.ABS_TRAINERS)
67 | - --dyn-trainers=$(params.DYN_TRAINERS)
68 | - --energy-source=$(params.ENERGY_SOURCE)
69 | - --thirdparty-metrics="$(params.THIRDPARTY_METRICS)"
70 | - --id=$(params.MACHINE_ID)
71 |
--------------------------------------------------------------------------------
/model_training/tekton/tasks/s3/aws-s3-load.yaml:
--------------------------------------------------------------------------------
1 | ######################################
2 | ##
3 | ## s3-push task for AWS
4 | ##
5 | ######################################
6 | apiVersion: tekton.dev/v1
7 | kind: Task
8 | metadata:
9 | name: aws-s3-load
10 | spec:
11 | params:
12 | - name: COS_SECRET_NAME
13 | description: Specify cos secret name
14 | default: ""
15 | - name: MACHINE_ID
16 | description: Specify machine id to group model result in bucket
17 | default: ""
18 | - name: PIPELINE_NAME
19 | description: Specify pipeline name (output prefix/folder)
20 | default: default
21 | workspaces:
22 | - name: mnt
23 | optional: true
24 | steps:
25 | - name: load
26 | image: quay.io/sustainable_computing_io/kepler_model_server/s3:latest
27 | env:
28 | - name: ACCESS_KEY_ID
29 | valueFrom:
30 | secretKeyRef:
31 | name: $(params.COS_SECRET_NAME)
32 | key: accessKeyID
33 | - name: ACCESS_SECRET
34 | valueFrom:
35 | secretKeyRef:
36 | name: $(params.COS_SECRET_NAME)
37 | key: accessSecret
38 | - name: REGION_NAME
39 | valueFrom:
40 | secretKeyRef:
41 | name: $(params.COS_SECRET_NAME)
42 | key: regionName
43 | - name: BUCKET_NAME
44 | valueFrom:
45 | secretKeyRef:
46 | name: $(params.COS_SECRET_NAME)
47 | key: bucketName
48 | command: [s3-loader]
49 | args:
50 | - aws
51 | - --aws-access-key-id=$(ACCESS_KEY_ID)
52 | - --aws-secret-access-key=$(ACCESS_SECRET)
53 | - --region-name=$(REGION_NAME)
54 | - --bucket-name=$(BUCKET_NAME)
55 | - --mnt-path=$(workspaces.mnt.path)
56 | - --pipeline-name=$(params.PIPELINE_NAME)
57 | - --machine-id=$(params.MACHINE_ID)
58 |
--------------------------------------------------------------------------------
/model_training/tekton/tasks/s3/aws-s3-push.yaml:
--------------------------------------------------------------------------------
1 | ######################################
2 | ##
3 | ## s3-push task for AWS
4 | ##
5 | ######################################
6 | apiVersion: tekton.dev/v1
7 | kind: Task
8 | metadata:
9 | name: aws-s3-push
10 | spec:
11 | params:
12 | - name: COS_SECRET_NAME
13 | description: Specify cos secret name
14 | default: ""
15 | - name: MACHINE_ID
16 | description: Specify machine id to group model result in bucket
17 | default: ""
18 | workspaces:
19 | - name: mnt
20 | optional: true
21 | steps:
22 | - name: push
23 | image: quay.io/sustainable_computing_io/kepler_model_server/s3:latest
24 | env:
25 | - name: ACCESS_KEY_ID
26 | valueFrom:
27 | secretKeyRef:
28 | name: $(params.COS_SECRET_NAME)
29 | key: accessKeyID
30 | - name: ACCESS_SECRET
31 | valueFrom:
32 | secretKeyRef:
33 | name: $(params.COS_SECRET_NAME)
34 | key: accessSecret
35 | - name: REGION_NAME
36 | valueFrom:
37 | secretKeyRef:
38 | name: $(params.COS_SECRET_NAME)
39 | key: regionName
40 | - name: BUCKET_NAME
41 | valueFrom:
42 | secretKeyRef:
43 | name: $(params.COS_SECRET_NAME)
44 | key: bucketName
45 | command: [s3-pusher]
46 | args:
47 | - aws
48 | - --aws-access-key-id=$(ACCESS_KEY_ID)
49 | - --aws-secret-access-key=$(ACCESS_SECRET)
50 | - --region-name=$(REGION_NAME)
51 | - --bucket-name=$(BUCKET_NAME)
52 | - --mnt-path=$(workspaces.mnt.path)
53 | - --query-data=kepler_query
54 | - --idle-data=idle
55 | - --machine-id=$(params.MACHINE_ID)
56 |
--------------------------------------------------------------------------------
/model_training/tekton/tasks/s3/ibmcloud-s3-load.yaml:
--------------------------------------------------------------------------------
1 | ######################################
2 | ##
3 | ## s3-push task for IBM Cloud
4 | ##
5 | ######################################
6 | apiVersion: tekton.dev/v1
7 | kind: Task
8 | metadata:
9 | name: ibmcloud-s3-load
10 | spec:
11 | params:
12 | - name: COS_SECRET_NAME
13 | description: Specify cos secret name
14 | default: ""
15 | - name: MACHINE_ID
16 | description: Specify machine id to group model result in bucket
17 | default: ""
18 | - name: PIPELINE_NAME
19 | description: Specify pipeline name (output prefix/folder)
20 | default: default
21 | workspaces:
22 | - name: mnt
23 | optional: true
24 | steps:
25 | - name: load
26 | image: quay.io/sustainable_computing_io/kepler_model_server/s3:latest
27 | env:
28 | - name: SERVICE_ENDPOINT
29 | valueFrom:
30 | secretKeyRef:
31 | name: $(params.COS_SECRET_NAME)
32 | key: serviceEndpoint
33 | - name: API_KEY
34 | valueFrom:
35 | secretKeyRef:
36 | name: $(params.COS_SECRET_NAME)
37 | key: apiKey
38 | - name: SERVICE_INSTANCE_ID
39 | valueFrom:
40 | secretKeyRef:
41 | name: $(params.COS_SECRET_NAME)
42 | key: serviceInstanceID
43 | - name: BUCKET_NAME
44 | valueFrom:
45 | secretKeyRef:
46 | name: $(params.COS_SECRET_NAME)
47 | key: bucketName
48 | command: [s3-loader]
49 | args:
50 | - ibmcloud
51 | - --service-endpoint=$(SERVICE_ENDPOINT)
52 | - --api-key=$(API_KEY)
53 | - --service-instance-id=$(SERVICE_INSTANCE_ID)
54 | - --bucket-name=$(BUCKET_NAME)
55 | - --mnt-path=$(workspaces.mnt.path)
56 | - --pipeline-name=$(params.PIPELINE_NAME)
57 | - --machine-id=$(params.MACHINE_ID)
58 |
--------------------------------------------------------------------------------
/model_training/tekton/tasks/s3/ibmcloud-s3-push.yaml:
--------------------------------------------------------------------------------
1 | ######################################
2 | ##
3 | ## s3-push task for IBM Cloud
4 | ##
5 | ######################################
6 | apiVersion: tekton.dev/v1
7 | kind: Task
8 | metadata:
9 | name: ibmcloud-s3-push
10 | spec:
11 | params:
12 | - name: COS_SECRET_NAME
13 | description: Specify cos secret name
14 | default: ""
15 | - name: MACHINE_ID
16 | description: Specify machine id to group model result in bucket
17 | default: ""
18 | workspaces:
19 | - name: mnt
20 | optional: true
21 | steps:
22 | - name: push
23 | image: quay.io/sustainable_computing_io/kepler_model_server/s3:latest
24 | env:
25 | - name: SERVICE_ENDPOINT
26 | valueFrom:
27 | secretKeyRef:
28 | name: $(params.COS_SECRET_NAME)
29 | key: serviceEndpoint
30 | - name: API_KEY
31 | valueFrom:
32 | secretKeyRef:
33 | name: $(params.COS_SECRET_NAME)
34 | key: apiKey
35 | - name: SERVICE_INSTANCE_ID
36 | valueFrom:
37 | secretKeyRef:
38 | name: $(params.COS_SECRET_NAME)
39 | key: serviceInstanceID
40 | - name: BUCKET_NAME
41 | valueFrom:
42 | secretKeyRef:
43 | name: $(params.COS_SECRET_NAME)
44 | key: bucketName
45 | command: [s3-pusher]
46 | args:
47 | - ibmcloud
48 | - --service-endpoint=$(SERVICE_ENDPOINT)
49 | - --api-key=$(API_KEY)
50 | - --service-instance-id=$(SERVICE_INSTANCE_ID)
51 | - --bucket-name=$(BUCKET_NAME)
52 | - --mnt-path=$(workspaces.mnt.path)
53 | - --query-data=kepler_query
54 | - --idle-data=idle
55 | - --machine-id=$(params.MACHINE_ID)
56 |
--------------------------------------------------------------------------------
/model_training/tekton/tasks/train-task.yaml:
--------------------------------------------------------------------------------
1 | ######################################
2 | ##
3 | ## train-model:
4 | ##
5 | ## train model from extracted data/isolated data
6 | ##
7 | ######################################
8 | apiVersion: tekton.dev/v1
9 | kind: Task
10 | metadata:
11 | name: train-model
12 | spec:
13 | params:
14 | - name: MODEL_SERVER_IMAGE
15 | description: Specify model server image
16 | default: quay.io/sustainable_computing_io/kepler_model_server:latest
17 | - name: INPUT_DATA
18 | description: Specify input data file name (extracted_data or isolated_data)
19 | - name: PIPELINE_NAME
20 | description: Specify pipeline name (output prefix/folder)
21 | default: default
22 | - name: OUTPUT_TYPE
23 | description: Specify target output type (check https://sustainable-computing.io/kepler_model_server/pipeline/#power-isolation)
24 | default: AbsPower
25 | - name: ENERGY_SOURCE
26 | description: Specify target energy source (check https://sustainable-computing.io/kepler_model_server/pipeline/#energy-source)
27 | default: rapl-sysfs
28 | - name: FEATURE_GROUP
29 | description: Specify target feature group (check https://sustainable-computing.io/kepler_model_server/pipeline/#feature-group)
30 | default: BPFOnly
31 | - name: TRAINERS
32 | description: Specify trainer names (use comma(,) as delimiter)
33 | default: XgboostFitTrainer
34 | - name: THIRDPARTY_METRICS
35 | description: Specify list of third party metric to export (required only for ThirdParty feature group)
36 | default: ""
37 | - name: MACHINE_ID
38 | description: Specify machine id to identify node_type
39 | default: ""
40 | workspaces:
41 | - name: mnt
42 | optional: true
43 | steps:
44 | - name: train-from-data
45 | image: $(params.MODEL_SERVER_IMAGE)
46 | command: [kepler-model]
47 | env:
48 | - name: MODEL_PATH
49 | value: $(workspaces.mnt.path)/models
50 | args:
51 | - train_from_data
52 | - --data-path=$(workspaces.mnt.path)/data
53 | - --input=$(params.INPUT_DATA)
54 | - --pipeline-name=$(params.PIPELINE_NAME)
55 | - --feature-group=$(params.FEATURE_GROUP)
56 | - --energy-source=$(params.ENERGY_SOURCE)
57 | - --output-type=$(params.OUTPUT_TYPE)
58 | - --trainers=$(params.TRAINERS)
59 | - --thirdparty-metrics="$(params.THIRDPARTY_METRICS)"
60 | - --id=$(params.MACHINE_ID)
61 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["hatchling"]
3 | build-backend = "hatchling.build"
4 |
5 | [project]
6 | name = "kepler_model"
7 | dynamic = ["version"]
8 | description = "kepler model server for serving kepler models"
9 | readme = "README.md"
10 | requires-python = ">= 3.10"
11 | license = "Apache-2.0"
12 | keywords = [
13 | "kepler", "models",
14 | "model-server", "estimator"
15 | ]
16 |
17 | authors = [
18 | { name = "Sunyanan Choochotkaew", email = "sunyanan.choochotkaew1@ibm.com" },
19 | { name = "Sunil Thaha", email = "sthaha@redhat.com" },
20 | ]
21 |
22 | classifiers = [
23 | "Programming Language :: Python",
24 | "Programming Language :: Python :: 3",
25 | "Programming Language :: Python :: 3.10",
26 | ]
27 | dependencies = [
28 | "flask==3.0.3",
29 | "joblib==1.4.2",
30 | "numpy==2.1.2",
31 | "pandas==2.2.3",
32 | "prometheus-api-client==0.5.5",
33 | "prometheus-client==0.21.0",
34 | "protobuf==5.28.2",
35 | "psutil==6.1.0",
36 | "py-cpuinfo==9.0.0",
37 | "pyudev==0.24.3",
38 | "pyyaml_env_tag==0.1",
39 | "scikit-learn==1.5.2",
40 | "scipy==1.14.1",
41 | "seaborn==0.13.2",
42 | "Werkzeug==3.0.4",
43 | "xgboost==2.1.2",
44 | "boto3==1.35.43",
45 | "pymarkdownlnt==0.9.22",
46 | "yamllint==1.35.1",
47 | "requests-file==2.1.0",
48 | ]
49 |
50 | [project.scripts]
51 | model-server = "kepler_model.server.model_server:run"
52 | estimator = "kepler_model.estimate.estimator:run"
53 | kepler-model = "kepler_model.cmd.main:run"
54 | offline-trainer = "kepler_model.train.offline_trainer:run"
55 | online-trainer = "kepler_model.train.online_trainer:run"
56 |
57 | [project.urls]
58 | Documentation = "https://github.com/sustainable-computing-io/kepler-model-server#readme"
59 | Issues = "https://github.com/sustainable-computing-io/kepler-model-server/issues"
60 | Source = "https://github.com/sustainable-computing-io/kepler-model-server"
61 |
62 | [tool.hatch.version]
63 | path = "src/kepler_model/__about__.py"
64 |
65 | [tool.hatch.envs.default]
66 | python = "3.10"
67 | extra-dependencies = [
68 | "coverage[toml]>=6.5",
69 | "ipdb",
70 | "ipython",
71 | "pytest",
72 | ]
73 |
74 | [tool.hatch.envs.default.scripts]
75 | test = "pytest {args:tests}"
76 | test-cov = "coverage run -m pytest {args:tests}"
77 | cov-report = [
78 | "- coverage combine",
79 | "coverage report",
80 | ]
81 | cov = [
82 | "test-cov",
83 | "cov-report",
84 | ]
85 |
86 | [tool.hatch.envs.lab]
87 | extra-dependencies = [
88 | "jupyterlab",
89 | "notebook",
90 | "voila",
91 | "ipywidgets",
92 | # vim please
93 | "jupyterlab-vim",
94 |
95 | "beautifulsoup4",
96 | # read parquet files
97 | # "pyarrow",
98 |
99 | # graphing
100 | "matplotlib",
101 | "graphviz",
102 | ]
103 |
104 | [tool.hatch.envs.lab.scripts]
105 | note = "jupyter lab --NotebookApp.token='' --allow-root"
106 |
107 | [tool.hatch.envs.types]
108 | extra-dependencies = [
109 | "mypy>=1.0.0",
110 | ]
111 | [tool.hatch.envs.types.scripts]
112 | check = "mypy --install-types --non-interactive {args:src/kepler_model_server tests}"
113 |
114 | [tool.coverage.run]
115 | source_pkgs = ["kepler_model", "tests"]
116 | branch = true
117 | parallel = true
118 | omit = [
119 | "src/kepler_model/__about__.py",
120 | ]
121 |
122 | [tool.coverage.paths]
123 | kepler_model = ["src/kepler_model", "*/kepler_model/src/kepler_model"]
124 | tests = ["tests", "*/kepler_model/tests"]
125 |
126 | [tool.coverage.report]
127 | exclude_lines = [
128 | "no cov",
129 | "if __name__ == .__main__.:",
130 | "if TYPE_CHECKING:",
131 | ]
132 |
133 | [tool.ruff]
134 | line-length = 160
135 |
136 | [tool.pytest.ini_options]
137 | markers = [
138 | "focus", # used in development to mark focused tests
139 | ]
140 |
141 | [tool.pymarkdown]
142 | plugins.md013.enabled = false
143 |
--------------------------------------------------------------------------------
/src/kepler_model/__about__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2024-present
2 | #
3 | # SPDX-License-Identifier: Apache-2.0
4 | __version__ = "0.7.11"
5 |
--------------------------------------------------------------------------------
/src/kepler_model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/src/kepler_model/__init__.py
--------------------------------------------------------------------------------
/src/kepler_model/abs-train-pipelinerun.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: tekton.dev/v1
2 | kind: PipelineRun
3 | metadata:
4 | name: example-abs-train-pipeline
5 | spec:
6 | timeouts:
7 | pipeline: 6h
8 | tasks: 5h50m
9 | workspaces:
10 | - name: mnt
11 | persistentVolumeClaim:
12 | claimName: task-pvc
13 | params:
14 | - name: PIPELINE_NAME
15 | value: AbsPowerTrainPipelineExample
16 | - name: OUTPUT_TYPE
17 | value: AbsPower
18 | pipelineRef:
19 | name: single-train-pipeline
20 |
--------------------------------------------------------------------------------
/src/kepler_model/cmd/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/src/kepler_model/cmd/__init__.py
--------------------------------------------------------------------------------
/src/kepler_model/estimate/__init__.py:
--------------------------------------------------------------------------------
1 | from .model.estimate_common import compute_error
2 | from .model.model import (
3 | default_idle_predicted_col_func,
4 | default_predicted_col_func,
5 | get_background_containers,
6 | get_dynamic_power_colname,
7 | get_label_power_colname,
8 | get_predicted_background_power_colname,
9 | get_predicted_dynamic_background_power_colname,
10 | get_predicted_dynamic_power_colname,
11 | get_predicted_power_colname,
12 | get_reconstructed_power_colname,
13 | load_model,
14 | )
15 |
16 | __all__ = [
17 | "compute_error",
18 | "load_model",
19 | "get_background_containers",
20 | "default_predicted_col_func",
21 | "get_predicted_power_colname",
22 | "get_predicted_background_power_colname",
23 | "get_dynamic_power_colname",
24 | "get_predicted_dynamic_power_colname",
25 | "get_predicted_dynamic_background_power_colname",
26 | "get_label_power_colname",
27 | "get_reconstructed_power_colname",
28 | "default_idle_predicted_col_func",
29 | ]
30 |
--------------------------------------------------------------------------------
/src/kepler_model/estimate/archived_model.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import requests
4 | from requests_file import FileAdapter
5 |
6 | from kepler_model.estimate.model_server_connector import unpack
7 | from kepler_model.util.config import get_init_model_url
8 | from kepler_model.util.loader import load_metadata
9 | from kepler_model.util.train_types import ModelOutputType
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 | failed_list = []
14 |
15 | FILTER_ITEM_DELIMIT = ";"
16 | VALUE_DELIMIT = ":"
17 | ARRAY_DELIMIT = ","
18 |
19 |
20 | def parse_filters(filter):
21 | filter_list = filter.split(FILTER_ITEM_DELIMIT)
22 | filters = dict()
23 | for filter_item in filter_list:
24 | splits = filter_item.split(VALUE_DELIMIT)
25 | if len(splits) != 2:
26 | continue
27 | key = splits[0]
28 | if key == "features":
29 | value = splits[1].split(ARRAY_DELIMIT)
30 | else:
31 | value = splits[1]
32 | filters[key] = value
33 | return filters
34 |
35 |
36 | def valid_metrics(metrics, features):
37 | for feature in features:
38 | if feature not in metrics:
39 | return False
40 | return True
41 |
42 |
43 | def is_valid_model(metrics, metadata, filters):
44 | if not valid_metrics(metrics, metadata["features"]):
45 | return False
46 |
47 | for attrb, val in filters.items():
48 | if not hasattr(metadata, attrb) or getattr(metadata, attrb) is None:
49 | logger.warning(f"{metadata['model_name']} has no {attrb}")
50 | return False
51 |
52 | cmp_val = getattr(metadata, attrb)
53 | val = float(val)
54 | if attrb == "abs_max_corr": # higher is better
55 | valid = cmp_val >= val
56 | else: # lower is better
57 | valid = cmp_val <= val
58 | if not valid:
59 | return False
60 |
61 | return True
62 |
63 |
64 | def reset_failed_list():
65 | global failed_list
66 | failed_list = []
67 |
68 |
69 | def get_achived_model(power_request):
70 | global failed_list
71 | output_type_name = power_request.output_type
72 | if output_type_name in failed_list:
73 | return None
74 | output_type = ModelOutputType[power_request.output_type]
75 | url = get_init_model_url(power_request.energy_source, output_type_name)
76 | if url == "":
77 | logger.warning(f"no URL set for {output_type_name}, {power_request.energy_source}")
78 | return None
79 | logger.info(f"try getting archieved model from URL: {url} for {output_type_name}")
80 |
81 | s = requests.Session()
82 | s.mount("file://", FileAdapter())
83 | response = s.get(url)
84 | logger.debug(f"response: {response}")
85 |
86 | if response.status_code != 200:
87 | return None
88 |
89 | output_path = unpack(power_request.energy_source, output_type, response, replace=False)
90 | if output_path is not None:
91 | metadata = load_metadata(output_path)
92 | filters = parse_filters(power_request.filter)
93 | try:
94 | if not is_valid_model(power_request.metrics, metadata, filters):
95 | failed_list += [output_type_name]
96 | return None
97 | except Exception as e:
98 | logger.warning(f"cannot validate the archived model: {e}")
99 | return None
100 |
101 | return output_path
102 |
--------------------------------------------------------------------------------
/src/kepler_model/estimate/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/src/kepler_model/estimate/model/__init__.py
--------------------------------------------------------------------------------
/src/kepler_model/estimate/model/curvefit_model.py:
--------------------------------------------------------------------------------
1 | import collections.abc
2 |
3 | from kepler_model.estimate.model.estimate_common import (
4 | is_component_model,
5 | load_model_by_json,
6 | load_model_by_pickle,
7 | transform_and_predict,
8 | )
9 | from kepler_model.util import ModelOutputType
10 | from kepler_model.util.train_types import get_valid_feature_groups, main_feature
11 |
12 |
13 | class CurveFitModelEstimator:
14 | def __init__(self, model_path, model_name, output_type, model_file, features, fe_files, component_init=False, feature_group=None):
15 | self.name = model_name
16 | self.features = features
17 | if feature_group is None:
18 | self.feauture_group = get_valid_feature_groups(features)[0]
19 | else:
20 | self.feauture_group = feature_group
21 | self.output_type = ModelOutputType[output_type]
22 |
23 | self.comp_type = not component_init and is_component_model(model_file)
24 | if self.comp_type:
25 | self.models = dict()
26 | model_info = load_model_by_json(model_path, model_file)
27 | for comp, model_metadata in model_info.items():
28 | model = CurveFitModelEstimator(
29 | model_path,
30 | self.name,
31 | self.output_type.name,
32 | model_metadata["model_file"],
33 | model_metadata["features"],
34 | model_metadata["fe_files"],
35 | component_init=True,
36 | )
37 | feature_index = main_feature(self.feauture_group.name, comp)
38 | if model.model is not None:
39 | model.model.set_feature_index(feature_index)
40 | self.models[comp] = model
41 | else:
42 | self.model = load_model_by_pickle(model_path, model_file)
43 | self.fe_list = []
44 | for fe_filename in fe_files:
45 | self.fe_list += [load_model_by_pickle(model_path, fe_filename)]
46 |
47 | def get_power(self, request):
48 | if self.comp_type:
49 | results = dict()
50 | for comp, model in self.models.items():
51 | y, msg = transform_and_predict(model, request)
52 | if msg != "":
53 | return [], msg
54 | if not isinstance(y, collections.abc.Sequence):
55 | y = [y]
56 | results[comp] = y
57 | return results, msg
58 | else:
59 | return transform_and_predict(self, request)
60 |
--------------------------------------------------------------------------------
/src/kepler_model/estimate/model/estimate_common.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import cpuinfo
4 | import numpy as np
5 | from sklearn.metrics import mean_absolute_error, mean_squared_error
6 |
7 | from kepler_model.util.loader import load_json, load_pkl
8 |
9 | keras_enabled = True
10 | cpu_info = cpuinfo.get_cpu_info()
11 |
12 | # if 'flags' in cpu_info and 'avx' in cpu_info['flags']:
13 | # import keras
14 | # from keras import backend as K
15 | # else:
16 | # print("AVX instructions are not available.")
17 | # keras_enabled = False
18 |
19 |
20 | def is_component_model(model_file):
21 | return ".json" in model_file
22 |
23 |
24 | def transform_and_predict(model, datapoint):
25 | msg = ""
26 | try:
27 | x_values = datapoint[model.features].values
28 | for fe in model.fe_list:
29 | if fe is None:
30 | continue
31 | x_values = fe.transform(x_values)
32 | y = model.model.predict(x_values).squeeze()
33 | y[y < 0] = 0
34 | y = y.tolist()
35 | except Exception as e:
36 | msg = f"{e}\n"
37 | y = []
38 | return y, msg
39 |
40 |
41 | def load_model_by_pickle(model_path, model_filename):
42 | return load_pkl(model_path, model_filename)
43 |
44 |
45 | def coeff_determination(y_true, y_pred):
46 | if not keras_enabled:
47 | return None
48 | SS_res = K.sum(K.square(y_true - y_pred))
49 | SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
50 | return 1 - SS_res / (SS_tot + K.epsilon())
51 |
52 |
53 | def load_model_by_keras(model_path, model_filename):
54 | model_file = os.path.join(model_path, model_filename)
55 | try:
56 | model = keras.models.load_model(model_file, custom_objects={"coeff_determination": coeff_determination})
57 | except Exception as e:
58 | print(e)
59 | return None
60 | return model
61 |
62 |
63 | def load_model_by_json(model_path, model_filename):
64 | return load_json(model_path, model_filename)
65 |
66 |
67 | # return mae, mse, mape
68 | def compute_error(predicted_power, actual_powers):
69 | mse = mean_squared_error(actual_powers, predicted_power)
70 | mae = mean_absolute_error(actual_powers, predicted_power)
71 | actual_power_values = list(actual_powers)
72 | predicted_power_values = list(predicted_power)
73 | if len(actual_powers) == 0:
74 | mape = -1
75 | else:
76 | non_zero_predicted_powers = np.array([predicted_power_values[i] for i in range(len(predicted_power_values)) if actual_power_values[i] > 0])
77 | if len(non_zero_predicted_powers) == 0:
78 | mape = -1
79 | else:
80 | non_zero_y_test = np.array([y for y in actual_powers if y > 0])
81 | absolute_percentage_errors = np.abs((non_zero_y_test - non_zero_predicted_powers) / non_zero_y_test) * 100
82 | mape = np.mean(absolute_percentage_errors)
83 | return mae, mse, mape
84 |
--------------------------------------------------------------------------------
/src/kepler_model/estimate/model/keras_model.py:
--------------------------------------------------------------------------------
1 | from kepler_model.estimate.model.estimate_common import (
2 | is_component_model,
3 | load_model_by_json,
4 | load_model_by_keras,
5 | load_model_by_pickle,
6 | transform_and_predict,
7 | )
8 | from kepler_model.estimate.model_server_connector import ModelOutputType
9 |
10 |
11 | class KerasModelEstimator:
12 | def __init__(self, model_path, model_name, output_type, model_file, features, fe_files, component_init=False):
13 | self.name = model_name
14 | self.features = features
15 | self.output_type = ModelOutputType[output_type]
16 | self.comp_type = not component_init and is_component_model(self.output_type)
17 | if self.comp_type:
18 | self.models = dict()
19 | model_info = load_model_by_json(model_path, model_file)
20 | for comp, model_metadata in model_info.items():
21 | model = KerasModelEstimator(
22 | model_path,
23 | self.name,
24 | self.output_type.name,
25 | model_metadata["model_file"],
26 | model_metadata["features"],
27 | model_metadata["fe_files"],
28 | component_init=True,
29 | )
30 | self.models[comp] = model
31 | else:
32 | self.model = load_model_by_keras(model_path, model_file)
33 | self.fe_list = []
34 | for fe_filename in fe_files:
35 | self.fe_list += [load_model_by_pickle(model_path, fe_filename)]
36 |
37 | def get_power(self, request):
38 | if self.comp_type:
39 | results = dict()
40 | for comp, model in self.models.items():
41 | y, msg = transform_and_predict(model, request)
42 | if msg != "":
43 | return [], msg
44 | results[comp] = y
45 | return results, msg
46 | else:
47 | return transform_and_predict(self, request)
48 |
--------------------------------------------------------------------------------
/src/kepler_model/estimate/model/scikit_model.py:
--------------------------------------------------------------------------------
1 | import collections.abc
2 |
3 | from kepler_model.estimate.model.estimate_common import (
4 | is_component_model,
5 | load_model_by_json,
6 | load_model_by_pickle,
7 | transform_and_predict,
8 | )
9 | from kepler_model.util import ModelOutputType
10 |
11 |
12 | class ScikitModelEstimator:
13 | def __init__(self, model_path, model_name, output_type, model_file, features, fe_files, component_init=False):
14 | self.name = model_name
15 | self.features = features
16 | self.output_type = ModelOutputType[output_type]
17 |
18 | self.comp_type = not component_init and is_component_model(model_file)
19 | if self.comp_type:
20 | self.models = dict()
21 | model_info = load_model_by_json(model_path, model_file)
22 | for comp, model_metadata in model_info.items():
23 | model = ScikitModelEstimator(
24 | model_path,
25 | self.name,
26 | self.output_type.name,
27 | model_metadata["model_file"],
28 | model_metadata["features"],
29 | model_metadata["fe_files"],
30 | component_init=True,
31 | )
32 | self.models[comp] = model
33 | else:
34 | self.model = load_model_by_pickle(model_path, model_file)
35 | self.fe_list = []
36 | for fe_filename in fe_files:
37 | self.fe_list += [load_model_by_pickle(model_path, fe_filename)]
38 |
39 | def get_power(self, request):
40 | if self.comp_type:
41 | results = dict()
42 | for comp, model in self.models.items():
43 | y, msg = transform_and_predict(model, request)
44 | if msg != "":
45 | return [], msg
46 | if not isinstance(y, collections.abc.Sequence):
47 | y = [y]
48 | results[comp] = y
49 | return results, msg
50 | else:
51 | return transform_and_predict(self, request)
52 |
--------------------------------------------------------------------------------
/src/kepler_model/estimate/model/xgboost_model.py:
--------------------------------------------------------------------------------
1 | import collections.abc
2 | import os
3 |
4 | import xgboost as xgb
5 |
6 | from kepler_model.estimate.model.estimate_common import (
7 | is_component_model,
8 | load_model_by_json,
9 | load_model_by_pickle,
10 | transform_and_predict,
11 | )
12 | from kepler_model.util import ModelOutputType
13 |
14 |
15 | class XgboostModelEstimator:
16 | def __init__(self, model_path, model_name, output_type, model_file, features, fe_files, component_init=False):
17 | self.name = model_name
18 | self.features = features
19 | self.output_type = ModelOutputType[output_type]
20 |
21 | self.comp_type = not component_init and is_component_model(model_file)
22 | if self.comp_type:
23 | self.models = dict()
24 | model_info = load_model_by_json(model_path, model_file)
25 | for comp, model_metadata in model_info.items():
26 | model = XgboostModelEstimator(
27 | model_path,
28 | self.name,
29 | self.output_type.name,
30 | model_metadata["model_file"],
31 | model_metadata["features"],
32 | model_metadata["fe_files"],
33 | component_init=True,
34 | )
35 | self.models[comp] = model
36 | else:
37 | filepath = os.path.join(model_path, model_file)
38 | self.model = xgb.XGBRegressor(n_estimators=1000, learning_rate=0.1)
39 | self.model.load_model(filepath)
40 | self.fe_list = []
41 | for fe_filename in fe_files:
42 | self.fe_list += [load_model_by_pickle(model_path, fe_filename)]
43 |
44 | def get_power(self, request):
45 | if self.comp_type:
46 | results = dict()
47 | for comp, model in self.models.items():
48 | y, msg = transform_and_predict(model, request)
49 | if msg != "":
50 | return [], msg
51 | if not isinstance(y, collections.abc.Sequence):
52 | y = [y]
53 | results[comp] = y
54 | return results, msg
55 | else:
56 | return transform_and_predict(self, request)
57 |
--------------------------------------------------------------------------------
/src/kepler_model/estimate/model_server_connector.py:
--------------------------------------------------------------------------------
1 | import codecs
2 | import json
3 | import os
4 | import shutil
5 |
6 | import requests
7 |
8 | from kepler_model.server.model_server import ModelListParam
9 | from kepler_model.util.config import (
10 | download_path,
11 | get_model_server_list_endpoint,
12 | get_model_server_req_endpoint,
13 | is_model_server_enabled,
14 | )
15 | from kepler_model.util.loader import get_download_output_path
16 | from kepler_model.util.train_types import ModelOutputType
17 |
18 |
19 | def make_model_request(power_request, machine_spec=None):
20 | model_request = {
21 | "metrics": power_request.metrics + power_request.system_features,
22 | "output_type": power_request.output_type,
23 | "source": power_request.energy_source,
24 | "filter": power_request.filter,
25 | "trainer_name": power_request.trainer_name,
26 | }
27 | if machine_spec is not None:
28 | model_request["machine_spec"] = machine_spec
29 | return model_request
30 |
31 |
32 | TMP_FILE = "tmp.zip"
33 |
34 |
35 | def unpack(energy_source, output_type, response, replace=True):
36 | output_path = get_download_output_path(download_path, energy_source, output_type)
37 | tmp_filepath = os.path.join(download_path, TMP_FILE)
38 | if os.path.exists(output_path):
39 | if not replace:
40 | if os.path.exists(tmp_filepath):
41 | # delete downloaded file
42 | os.remove(tmp_filepath)
43 | return output_path
44 | # delete existing model
45 | shutil.rmtree(output_path)
46 | with codecs.open(tmp_filepath, "wb") as f:
47 | f.write(response.content)
48 | shutil.unpack_archive(tmp_filepath, output_path)
49 | os.remove(tmp_filepath)
50 | return output_path
51 |
52 |
53 | def make_request(power_request, machine_spec):
54 | if not is_model_server_enabled():
55 | return None
56 | model_request = make_model_request(power_request, machine_spec)
57 | output_type = ModelOutputType[power_request.output_type]
58 | try:
59 | response = requests.post(get_model_server_req_endpoint(), json=model_request)
60 | except Exception as err:
61 | print(f"cannot make request to {get_model_server_req_endpoint()}: {err}")
62 | return None
63 | if response.status_code != 200:
64 | return None
65 | return unpack(power_request.energy_source, output_type, response)
66 |
67 |
68 | def list_all_models(energy_source=None, output_type=None, feature_group=None, node_type=None, filter=None):
69 | if not is_model_server_enabled():
70 | return dict()
71 | try:
72 | endpoint = get_model_server_list_endpoint()
73 | params = {}
74 | if energy_source:
75 | params[ModelListParam.EnergySource.value] = energy_source
76 | if output_type:
77 | params[ModelListParam.OutputType.value] = output_type
78 | if feature_group:
79 | params[ModelListParam.FeatureGroup.value] = feature_group
80 | if node_type:
81 | params[ModelListParam.NodeType.value] = node_type
82 | if filter:
83 | params[ModelListParam.Filter.value] = filter
84 |
85 | response = requests.get(endpoint, params=params)
86 | except Exception as err:
87 | print(f"cannot list model: {err}")
88 | return dict()
89 | if response.status_code != 200:
90 | return dict()
91 | model_names = json.loads(response.content.decode("utf-8"))
92 | return model_names
93 |
--------------------------------------------------------------------------------
/src/kepler_model/train/__init__.py:
--------------------------------------------------------------------------------
1 | # comonly used within train module
2 |
3 | from .extractor.extractor import DefaultExtractor
4 | from .extractor.smooth_extractor import SmoothExtractor
5 | from .isolator.isolator import MinIdleIsolator, NoneIsolator, ProfileBackgroundIsolator
6 | from .isolator.train_isolator import TrainIsolator
7 | from .pipeline import NewPipeline, load_class
8 | from .profiler.node_type_index import NodeTypeIndexCollection, NodeTypeSpec
9 | from .profiler.profiler import Profiler, generate_profiles
10 |
11 | DefaultProfiler = Profiler(extractor=DefaultExtractor())
12 |
13 | __all__ = [
14 | "DefaultExtractor",
15 | "SmoothExtractor",
16 | "Profiler",
17 | "generate_profiles",
18 | "NodeTypeIndexCollection",
19 | "NodeTypeSpec",
20 | "MinIdleIsolator",
21 | "ProfileBackgroundIsolator",
22 | "NoneIsolator",
23 | "TrainIsolator",
24 | "NewPipeline",
25 | "load_class",
26 | ]
27 |
--------------------------------------------------------------------------------
/src/kepler_model/train/exporter/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/src/kepler_model/train/exporter/__init__.py
--------------------------------------------------------------------------------
/src/kepler_model/train/exporter/exporter.py:
--------------------------------------------------------------------------------
1 | import datetime
2 |
3 | from kepler_model.train.exporter.validator import BestModelCollection, get_validated_export_items
4 | from kepler_model.train.exporter.writer import (
5 | append_version_readme,
6 | generate_pipeline_page,
7 | generate_pipeline_readme,
8 | generate_report_results,
9 | get_workload_content,
10 | )
11 | from kepler_model.util.config import ERROR_KEY
12 | from kepler_model.util.format import time_to_str
13 | from kepler_model.util.loader import get_export_path, get_version_path, load_metadata, load_node_type_index
14 | from kepler_model.util.saver import save_node_type_index, save_pipeline_metadata
15 |
16 | repo_url = "https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-db/main/models"
17 |
18 |
19 | def export(data_path, pipeline_path, db_path, publisher, collect_date, inputs):
20 | # load pipeline metadata
21 | pipeline_metadata = load_metadata(pipeline_path)
22 | if pipeline_metadata is None:
23 | print("no pipeline metadata")
24 | return
25 | # add publish information to pipeline metadata
26 | pipeline_metadata["publisher"] = publisher
27 | pipeline_metadata["collect_time"] = time_to_str(collect_date)
28 | pipeline_metadata["export_time"] = time_to_str(datetime.datetime.utcnow())
29 |
30 | node_type_index_json = load_node_type_index(pipeline_path)
31 | if node_type_index_json is None:
32 | print("no node type index")
33 | return
34 | node_types = node_type_index_json.keys()
35 | best_model_collections = dict()
36 | for node_type in node_types:
37 | best_model_collections[int(node_type)] = BestModelCollection(ERROR_KEY)
38 |
39 | # get path
40 | pipeline_name = pipeline_metadata["name"]
41 | local_export_path = get_export_path(db_path, pipeline_name)
42 | local_version_path = get_version_path(db_path)
43 | remote_version_path = get_version_path(repo_url, assure=False)
44 |
45 | # get validated export items (models)
46 | export_items, valid_metadata_df = get_validated_export_items(pipeline_path, pipeline_name)
47 | # save pipeline metadata
48 | for energy_source, ot_metadata_df in valid_metadata_df.items():
49 | for model_type, metadata_df in ot_metadata_df.items():
50 | metadata_df = metadata_df.sort_values(by=["feature_group", ERROR_KEY])
51 | save_pipeline_metadata(local_export_path, pipeline_metadata, energy_source, model_type, metadata_df)
52 | # save node_type_index.json
53 | save_node_type_index(local_export_path, node_type_index_json)
54 |
55 | for export_item in export_items:
56 | # export models
57 | export_item.export(local_version_path)
58 | # update best model
59 | best_model_collections[export_item.node_type].compare_new_item(export_item)
60 |
61 | # generate pipeline page
62 | workload_content = get_workload_content(data_path, inputs)
63 | generate_pipeline_page(local_version_path, pipeline_metadata, workload_content)
64 | # generate error report page
65 | generate_report_results(local_export_path, best_model_collections, node_type_index_json, remote_version_path)
66 | # generate validation result page
67 | generate_pipeline_readme(pipeline_name, local_export_path, node_type_index_json, best_model_collections)
68 | # add new pipeline item to version path
69 | append_version_readme(local_version_path, pipeline_metadata)
70 |
71 | return local_export_path
72 |
--------------------------------------------------------------------------------
/src/kepler_model/train/extractor/__init__.py:
--------------------------------------------------------------------------------
1 | from .extractor import DefaultExtractor
2 |
3 | __all__ = ["DefaultExtractor"]
4 |
--------------------------------------------------------------------------------
/src/kepler_model/train/extractor/preprocess.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from kepler_model.estimate.model.model import get_label_power_colname
4 | from kepler_model.util.extract_types import col_to_component
5 | from kepler_model.util.prom_types import TIMESTAMP_COL
6 | from kepler_model.util.train_types import PowerSourceMap
7 |
8 |
9 | def drop_zero_column(data, cols):
10 | sum_col = "sum_val"
11 | data[sum_col] = data[cols].sum(axis=1)
12 | data = data.drop(data[data[sum_col] == 0].index)
13 | data = data.drop(columns=[sum_col])
14 | return data
15 |
16 |
17 | def remove_outlier(df, workload_features, threshold=1):
18 | # Calculate the Z-score for each column
19 | z_scores = np.abs((df[workload_features] - df[workload_features].mean()) / df[workload_features].std())
20 | # Remove rows with outliers
21 | df_no_outliers = df[(z_scores < threshold).all(axis=1)]
22 | return df_no_outliers
23 |
24 |
25 | def time_filter(data, min_time, max_time):
26 | _data = data.reset_index()
27 | start_time = _data[TIMESTAMP_COL].min()
28 | _data = _data[(_data[TIMESTAMP_COL] >= start_time + min_time) & (_data[TIMESTAMP_COL] <= start_time + max_time)]
29 | return _data
30 |
31 |
32 | def get_extracted_power_labels(extracted_data, energy_components, label_cols):
33 | # mean over the same value across container-level
34 | extracted_power_labels = extracted_data[[TIMESTAMP_COL] + label_cols].groupby([TIMESTAMP_COL]).mean().sort_index()
35 | for energy_component in energy_components:
36 | target_cols = [col for col in label_cols if col_to_component(col) == energy_component]
37 | component_label_col = get_label_power_colname(energy_component)
38 | extracted_power_labels[component_label_col] = extracted_power_labels[target_cols].sum(axis=1)
39 | return extracted_power_labels
40 |
41 |
42 | def find_correlations(energy_source, feature_power_data, power_columns, workload_features):
43 | power_data = feature_power_data.reset_index().groupby([TIMESTAMP_COL])[power_columns].mean()
44 | feature_data = feature_power_data.reset_index().groupby([TIMESTAMP_COL])[workload_features].sum()
45 | energy_components = PowerSourceMap[energy_source]
46 | target_cols = [col for col in power_columns if col_to_component(col) == energy_components[0]]
47 | process_power_data = power_data.copy()
48 | # mean over the same value across container-level
49 | process_power_over_ts = process_power_data[target_cols].reset_index().groupby([TIMESTAMP_COL]).sum()
50 | process_power_data[energy_source] = process_power_over_ts.sum(axis=1)
51 | # sum usage all container-level
52 | join_data = feature_data.join(process_power_data[energy_source]).dropna()
53 | corr = join_data.corr()[[energy_source]]
54 | return corr.drop(index=energy_source)
55 |
--------------------------------------------------------------------------------
/src/kepler_model/train/extractor/smooth_extractor.py:
--------------------------------------------------------------------------------
1 | from kepler_model.util.train_types import SYSTEM_FEATURES, FeatureGroup, FeatureGroups
2 |
3 | from .extractor import DefaultExtractor, find_correlations
4 |
5 |
6 | class SmoothExtractor(DefaultExtractor):
7 | def __init__(self, smooth_window=30):
8 | self.smooth_window = smooth_window
9 |
10 | def get_name(self):
11 | return "smooth"
12 |
13 | # implement extract function
14 | def extract(self, query_results, energy_components, feature_group, energy_source, node_level, aggr=True, use_vm_metrics=False):
15 | feature_power_data, power_columns, _, features = super().extract(
16 | query_results, energy_components, feature_group, energy_source, node_level, aggr, use_vm_metrics=use_vm_metrics
17 | )
18 |
19 | features = FeatureGroups[FeatureGroup[feature_group]]
20 | smoothed_data = feature_power_data.copy()
21 | workload_features = [feature for feature in features if feature not in SYSTEM_FEATURES]
22 |
23 | for col in list(workload_features) + list(power_columns):
24 | smoothed_data[col] = feature_power_data[col].rolling(window=self.smooth_window).mean()
25 | smoothed_data = smoothed_data.dropna()
26 |
27 | corr = find_correlations(energy_source, feature_power_data, power_columns, workload_features)
28 |
29 | return smoothed_data, power_columns, corr, features
30 |
--------------------------------------------------------------------------------
/src/kepler_model/train/isolator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/src/kepler_model/train/isolator/__init__.py
--------------------------------------------------------------------------------
/src/kepler_model/train/online_trainer.py:
--------------------------------------------------------------------------------
1 | # TODO: test
2 | import time
3 |
4 | from kepler_model.train.extractor import DefaultExtractor
5 | from kepler_model.train.isolator.isolator import MinIdleIsolator, ProfileBackgroundIsolator
6 | from kepler_model.train.pipeline import NewPipeline
7 | from kepler_model.train.profiler.profiler import load_all_profiles
8 | from kepler_model.train.prom.prom_query import PrometheusClient
9 | from kepler_model.util.config import get_config
10 | from kepler_model.util.loader import default_train_output_pipeline
11 | from kepler_model.util.prom_types import PROM_QUERY_INTERVAL, get_valid_feature_group_from_queries
12 | from kepler_model.util.train_types import FeatureGroups, PowerSourceMap
13 |
14 | SAMPLING_INTERVAL = get_config("SAMPLING_INTERVAL", PROM_QUERY_INTERVAL)
15 |
16 |
17 | default_trainers = ["GradientBoostingRegressorTrainer"]
18 | abs_trainer_names = default_trainers + []
19 | dyn_trainer_names = default_trainers + []
20 |
21 |
22 | def initial_pipelines():
23 | target_energy_sources = PowerSourceMap.keys()
24 | valid_feature_groups = FeatureGroups.keys()
25 | profiles = load_all_profiles()
26 | profile_pipeline = NewPipeline(
27 | default_train_output_pipeline,
28 | abs_trainer_names,
29 | dyn_trainer_names,
30 | extractor=DefaultExtractor(),
31 | isolator=ProfileBackgroundIsolator(profiles),
32 | target_energy_sources=target_energy_sources,
33 | valid_feature_groups=valid_feature_groups,
34 | )
35 | non_profile_pipeline = NewPipeline(
36 | default_train_output_pipeline,
37 | abs_trainer_names,
38 | dyn_trainer_names,
39 | extractor=DefaultExtractor(),
40 | isolator=MinIdleIsolator(),
41 | target_energy_sources=target_energy_sources,
42 | valid_feature_groups=valid_feature_groups,
43 | )
44 | return profile_pipeline, non_profile_pipeline
45 |
46 |
47 | def run():
48 | profile_pipeline, non_profile_pipeline = initial_pipelines()
49 | prom_client = PrometheusClient()
50 | while True:
51 | prom_client.query()
52 | query_results = prom_client.snapshot_query_result()
53 | valid_feature_groups = get_valid_feature_group_from_queries(query_results.keys())
54 | for energy_source, energy_components in PowerSourceMap.items():
55 | for feature_group in valid_feature_groups:
56 | success, _, _ = profile_pipeline.process(query_results, energy_components, energy_source, feature_group=feature_group)
57 | if not success:
58 | # failed to process with profile, try non_profile pipeline
59 | success, _, _ = non_profile_pipeline.process(query_results, energy_components, energy_source, feature_group=feature_group)
60 | if success:
61 | non_profile_pipeline.save_metadata()
62 | else:
63 | profile_pipeline.save_metadata()
64 | time.sleep(SAMPLING_INTERVAL)
65 |
66 |
67 | if __name__ == "__main__":
68 | run()
69 |
--------------------------------------------------------------------------------
/src/kepler_model/train/profiler/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/src/kepler_model/train/profiler/__init__.py
--------------------------------------------------------------------------------
/src/kepler_model/train/profiler/generate_scaler.py:
--------------------------------------------------------------------------------
1 | ############################################################
2 | ##
3 | ## generate_scaler
4 | ## generate a scaler for each node type from prom query
5 | ##
6 | ## ./python generate_scaler.py query_output_folder
7 | ## e.g., ./python generate_scaler.py ../tests/data/prom_output
8 | ##
9 | ## input must be a query output of loaded state
10 | ##
11 | ############################################################
12 |
13 | # WARN: is this file used ?
14 |
15 | import os
16 | import pickle
17 |
18 | import pandas as pd
19 | from sklearn.preprocessing import MaxAbsScaler
20 |
21 | from kepler_model.train import DefaultExtractor
22 | from kepler_model.util.prom_types import TIMESTAMP_COL, node_info_column
23 | from kepler_model.util.train_types import SYSTEM_FEATURES, FeatureGroup, FeatureGroups
24 |
25 | # WARN: unable to find this anymore
26 | # from profile_background import profile_path
27 |
28 |
29 | # HACK:
30 | extractor = DefaultExtractor()
31 | profile_path = "profile/path"
32 | max_scaler_top_path = os.path.join(profile_path, "..", "max_scaler")
33 |
34 | if not os.path.exists(max_scaler_top_path):
35 | os.mkdir(max_scaler_top_path)
36 |
37 |
38 | def read_query_results(query_path):
39 | results = dict()
40 | metric_filenames = [metric_filename for metric_filename in os.listdir(query_path)]
41 | for metric_filename in metric_filenames:
42 | metric = metric_filename.replace(".csv", "")
43 | filepath = os.path.join(query_path, metric_filename)
44 | results[metric] = pd.read_csv(filepath)
45 | return results
46 |
47 |
48 | def save_scaler(scaler, node_type, feature_group, scaler_top_path):
49 | node_type_path = os.path.join(scaler_top_path, str(node_type))
50 | if not os.path.exists(node_type_path):
51 | os.mkdir(node_type_path)
52 | filename = os.path.join(node_type_path, feature_group + ".pkl")
53 | with open(filename, "wb") as f:
54 | pickle.dump(scaler, f)
55 |
56 |
57 | def process(query_results):
58 | node_info_data = extractor.get_system_category(query_results)
59 | if node_info_data is None:
60 | print("No Node Info")
61 | return None
62 | node_types = pd.unique(node_info_data[node_info_column])
63 | for node_type in node_types:
64 | for feature_group in FeatureGroups:
65 | feature_group_name = feature_group.name
66 | features = FeatureGroups[FeatureGroup[feature_group_name]]
67 | workload_features = [feature for feature in features if feature not in SYSTEM_FEATURES]
68 | system_features = [feature for feature in features if feature in SYSTEM_FEATURES]
69 | feature_data = extractor.get_workload_feature_data(query_results, workload_features)
70 | if feature_data is None:
71 | print("cannot process ", feature_group_name)
72 | continue
73 | workload_feature_data = feature_data.groupby([TIMESTAMP_COL]).sum()[workload_features]
74 | if len(system_features) > 0:
75 | system_feature_data = extractor.get_system_feature_data(query_results, system_features)
76 | feature_data = workload_feature_data.join(system_feature_data).sort_index().dropna()
77 | else:
78 | feature_data = workload_feature_data
79 |
80 | feature_data = feature_data.join(node_info_data)
81 | node_types = pd.unique(feature_data[node_info_column])
82 | # filter and extract features
83 | x_values = feature_data[feature_data[node_info_column] == node_type][features].values
84 | max_scaler = MaxAbsScaler()
85 | max_scaler.fit(x_values)
86 | save_scaler(max_scaler, node_type, feature_group_name, max_scaler_top_path)
87 |
--------------------------------------------------------------------------------
/src/kepler_model/train/prom/__init__.py:
--------------------------------------------------------------------------------
1 | from .prom_query import PrometheusClient
2 |
3 | __all__ = ["PrometheusClient"]
4 |
--------------------------------------------------------------------------------
/src/kepler_model/train/prom/prom_query.py:
--------------------------------------------------------------------------------
1 | import datetime
2 |
3 | from prometheus_api_client import PrometheusConnect
4 |
5 | from kepler_model.util.prom_types import (
6 | PROM_QUERY_INTERVAL,
7 | PROM_QUERY_STEP,
8 | PROM_SERVER,
9 | PROM_SSL_DISABLE,
10 | generate_dataframe_from_response,
11 | metric_prefix,
12 | )
13 |
14 | UTC_OFFSET_TIMEDELTA = datetime.datetime.utcnow() - datetime.datetime.now()
15 |
16 |
17 | def _range_queries(prom, metric_list, start, end, step, params=None):
18 | response = dict()
19 | for metric in metric_list:
20 | response[metric] = prom.custom_query_range(metric, start, end, step, params)
21 | return response
22 |
23 |
24 | class PrometheusClient:
25 | def __init__(self):
26 | self.prom = PrometheusConnect(url=PROM_SERVER, disable_ssl=PROM_SSL_DISABLE)
27 | self.interval = PROM_QUERY_INTERVAL
28 | self.step = PROM_QUERY_STEP
29 | self.latest_query_result = dict()
30 |
31 | def query(self):
32 | available_metrics = self.prom.all_metrics()
33 | queries = [m for m in available_metrics if metric_prefix in m]
34 | end = datetime.datetime.now()
35 | start = end - datetime.timedelta(seconds=self.interval)
36 | self.latest_query_result = dict()
37 | response_dict = _range_queries(self.prom, queries, start, end, self.step, None)
38 | for query_metric, prom_response in response_dict.items():
39 | self.latest_query_result[query_metric] = generate_dataframe_from_response(query_metric, prom_response)
40 | return response_dict
41 |
42 | def snapshot_query_result(self):
43 | return {metric: data for metric, data in self.latest_query_result.items() if len(data) > 0}
44 |
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/ExponentialRegressionTrainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/src/kepler_model/train/trainer/ExponentialRegressionTrainer/__init__.py
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/ExponentialRegressionTrainer/main.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import numpy as np
4 |
5 | from kepler_model.train.trainer.curvefit import CurveFitModel, CurveFitTrainer
6 |
7 |
8 | def p0_func(x, y):
9 | a = (y.max() - y.min()) // math.e # scale value
10 | b = 1 # start from linear
11 | c = y.min() - a # initial offset
12 | return [a, b, c]
13 |
14 |
15 | def expo_func(x, a, b, c):
16 | y = a * np.exp(b * x) + c
17 | return y
18 |
19 |
20 | class ExponentialRegressionTrainer(CurveFitTrainer):
21 | def __init__(self, energy_components, feature_group, energy_source, node_level, pipeline_name):
22 | super(ExponentialRegressionTrainer, self).__init__(energy_components, feature_group, energy_source, node_level, pipeline_name=pipeline_name)
23 | self.fe_files = []
24 |
25 | def init_model(self):
26 | return CurveFitModel(expo_func, p0_func=p0_func)
27 |
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/GradientBoostingRegressorTrainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/src/kepler_model/train/trainer/GradientBoostingRegressorTrainer/__init__.py
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/GradientBoostingRegressorTrainer/main.py:
--------------------------------------------------------------------------------
1 | from sklearn.ensemble import GradientBoostingRegressor
2 |
3 | from kepler_model.train.trainer.scikit import ScikitTrainer
4 |
5 | model_class = "scikit"
6 |
7 |
8 | class GradientBoostingRegressorTrainer(ScikitTrainer):
9 | def __init__(self, energy_components, feature_group, energy_source, node_level, pipeline_name):
10 | super(GradientBoostingRegressorTrainer, self).__init__(energy_components, feature_group, energy_source, node_level, pipeline_name=pipeline_name)
11 | self.fe_files = []
12 |
13 | def init_model(self):
14 | return GradientBoostingRegressor(n_estimators=100, max_depth=3, learning_rate=0.1)
15 |
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/KNeighborsRegressorTrainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/src/kepler_model/train/trainer/KNeighborsRegressorTrainer/__init__.py
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/KNeighborsRegressorTrainer/main.py:
--------------------------------------------------------------------------------
1 | from sklearn.neighbors import KNeighborsRegressor
2 |
3 | from kepler_model.train.trainer.scikit import ScikitTrainer
4 |
5 | model_class = "scikit"
6 |
7 |
8 | class KNeighborsRegressorTrainer(ScikitTrainer):
9 | def __init__(self, energy_components, feature_group, energy_source, node_level, pipeline_name):
10 | super(KNeighborsRegressorTrainer, self).__init__(energy_components, feature_group, energy_source, node_level, pipeline_name=pipeline_name)
11 | self.fe_files = []
12 |
13 | def init_model(self):
14 | return KNeighborsRegressor(n_neighbors=6)
15 |
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/LinearRegressionTrainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/src/kepler_model/train/trainer/LinearRegressionTrainer/__init__.py
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/LinearRegressionTrainer/main.py:
--------------------------------------------------------------------------------
1 | from sklearn.linear_model import LinearRegression
2 |
3 | from kepler_model.train.trainer.scikit import ScikitTrainer
4 |
5 | model_class = "scikit"
6 |
7 |
8 | class LinearRegressionTrainer(ScikitTrainer):
9 | def __init__(self, energy_components, feature_group, energy_source, node_level, pipeline_name):
10 | super(LinearRegressionTrainer, self).__init__(energy_components, feature_group, energy_source, node_level, pipeline_name=pipeline_name)
11 | self.fe_files = []
12 |
13 | def init_model(self):
14 | return LinearRegression(positive=True)
15 |
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/LogarithmicRegressionTrainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/src/kepler_model/train/trainer/LogarithmicRegressionTrainer/__init__.py
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/LogarithmicRegressionTrainer/main.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from kepler_model.train.trainer.curvefit import CurveFitModel, CurveFitTrainer
4 |
5 |
6 | def p0_func(x, y):
7 | a = y.max() - y.min()
8 | b = 1
9 | c = y.min()
10 | return [a, b, c]
11 |
12 |
13 | def log_func(x, a, b, c):
14 | y = a * np.log(b * x + 1) + c
15 | return y
16 |
17 |
18 | class LogarithmicRegressionTrainer(CurveFitTrainer):
19 | def __init__(self, energy_components, feature_group, energy_source, node_level, pipeline_name):
20 | super(LogarithmicRegressionTrainer, self).__init__(energy_components, feature_group, energy_source, node_level, pipeline_name=pipeline_name)
21 | self.fe_files = []
22 |
23 | def init_model(self):
24 | return CurveFitModel(log_func, p0_func=p0_func)
25 |
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/LogisticRegressionTrainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/src/kepler_model/train/trainer/LogisticRegressionTrainer/__init__.py
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/LogisticRegressionTrainer/main.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from kepler_model.train.trainer.curvefit import CurveFitModel, CurveFitTrainer
4 |
5 |
6 | def p0_func(x, y):
7 | A = y.max() - y.min() # value range
8 | x0 = 0.5 # sigmoid mid point (as normalized value is in 0 to 1, start mid point = 0.5)
9 | k = A // np.std(y) # growth rate (larger std, lower growth)
10 | off = y.min() # initial offset
11 | return [A, x0, k, off]
12 |
13 |
14 | def logi_func(x, A, x0, k, off):
15 | return A / (1 + np.exp(-k * (x - x0))) + off
16 |
17 |
18 | class LogisticRegressionTrainer(CurveFitTrainer):
19 | def __init__(self, energy_components, feature_group, energy_source, node_level, pipeline_name):
20 | super(LogisticRegressionTrainer, self).__init__(energy_components, feature_group, energy_source, node_level, pipeline_name=pipeline_name)
21 | self.fe_files = []
22 |
23 | def init_model(self):
24 | return CurveFitModel(logi_func, p0_func=p0_func)
25 |
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/PolynomialRegressionTrainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/src/kepler_model/train/trainer/PolynomialRegressionTrainer/__init__.py
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/PolynomialRegressionTrainer/main.py:
--------------------------------------------------------------------------------
1 | from sklearn.linear_model import LinearRegression
2 | from sklearn.preprocessing import PolynomialFeatures
3 |
4 | from kepler_model.train.trainer.scikit import ScikitTrainer
5 |
6 | poly_scaler_filename = "poly_scaler.pkl"
7 |
8 |
9 | class PolynomialRegressionTrainer(ScikitTrainer):
10 | def __init__(self, energy_components, feature_group, energy_source, node_level, pipeline_name):
11 | super(PolynomialRegressionTrainer, self).__init__(energy_components, feature_group, energy_source, node_level, pipeline_name=pipeline_name)
12 | self.poly_scaler = PolynomialFeatures(degree=2)
13 | self.fe_files = [poly_scaler_filename]
14 | self.fe = [PolynomialFeatures(degree=2)]
15 |
16 | def init_model(self):
17 | return LinearRegression()
18 |
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/SGDRegressorTrainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/src/kepler_model/train/trainer/SGDRegressorTrainer/__init__.py
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/SGDRegressorTrainer/main.py:
--------------------------------------------------------------------------------
1 | from sklearn.linear_model import SGDRegressor
2 |
3 | from kepler_model.train.trainer.scikit import ScikitTrainer
4 |
5 |
6 | class SGDRegressorTrainer(ScikitTrainer):
7 | def __init__(self, energy_components, feature_group, energy_source, node_level, pipeline_name):
8 | super(SGDRegressorTrainer, self).__init__(energy_components, feature_group, energy_source, node_level, pipeline_name=pipeline_name)
9 | self.fe_files = []
10 |
11 | def init_model(self):
12 | return SGDRegressor(max_iter=1000)
13 |
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/SVRRegressorTrainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/src/kepler_model/train/trainer/SVRRegressorTrainer/__init__.py
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/SVRRegressorTrainer/main.py:
--------------------------------------------------------------------------------
1 | from sklearn.svm import SVR
2 |
3 | from kepler_model.train.trainer.scikit import ScikitTrainer
4 |
5 | common_node_type = 1
6 |
7 |
8 | class SVRRegressorTrainer(ScikitTrainer):
9 | def __init__(self, energy_components, feature_group, energy_source, node_level, pipeline_name):
10 | super(SVRRegressorTrainer, self).__init__(energy_components, feature_group, energy_source, node_level, pipeline_name=pipeline_name)
11 | self.fe_files = []
12 |
13 | def init_model(self):
14 | return SVR(C=1.0, epsilon=0.2)
15 |
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/XGBoostTrainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/src/kepler_model/train/trainer/XGBoostTrainer/__init__.py
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/XgboostFitTrainer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/src/kepler_model/train/trainer/XgboostFitTrainer/__init__.py
--------------------------------------------------------------------------------
/src/kepler_model/train/trainer/XgboostFitTrainer/main.py:
--------------------------------------------------------------------------------
1 | from kepler_model.train.trainer.xgboost_interface import XgboostTrainer
2 |
3 |
4 | class XgboostFitTrainer(XgboostTrainer):
5 | def __init__(self, energy_components, feature_group, energy_source, node_level, pipeline_name):
6 | super(XgboostFitTrainer, self).__init__(energy_components, feature_group, energy_source, node_level, pipeline_name=pipeline_name)
7 | self.fe_files = []
8 |
9 | def _train(self, node_type, component, X_values, y_values):
10 | model = self.node_models[node_type][component]
11 | if model.__sklearn_is_fitted__():
12 | self.node_models[node_type][component].fit(X_values, y_values, xgb_model=model)
13 | else:
14 | self.node_models[node_type][component].fit(X_values, y_values)
15 |
--------------------------------------------------------------------------------
/src/kepler_model/util/__init__.py:
--------------------------------------------------------------------------------
1 | # commonly-used definitions
2 | from .config import get_config, model_toppath
3 | from .loader import (
4 | class_to_json,
5 | default_train_output_pipeline,
6 | list_model_names,
7 | load_csv,
8 | load_json,
9 | load_metadata,
10 | load_pkl,
11 | load_remote_pkl,
12 | load_scaler,
13 | load_weight,
14 | version,
15 | )
16 | from .prom_types import get_valid_feature_group_from_queries
17 | from .saver import assure_path, save_csv, save_json, save_metadata, save_pkl, save_scaler, save_weight
18 | from .train_types import (
19 | BPF_FEATURES,
20 | COUNTER_FEAUTRES,
21 | IRQ_FEATURES,
22 | SYSTEM_FEATURES,
23 | WORKLOAD_FEATURES,
24 | FeatureGroup,
25 | FeatureGroups,
26 | ModelOutputType,
27 | PowerSourceMap,
28 | get_feature_group,
29 | )
30 |
31 | __all__ = [
32 | "load_json",
33 | "load_csv",
34 | "load_pkl",
35 | "load_metadata",
36 | "load_scaler",
37 | "load_weight",
38 | "load_remote_pkl",
39 | "list_model_names",
40 | "default_train_output_pipeline",
41 | "class_to_json",
42 | "version",
43 | "assure_path",
44 | "save_csv",
45 | "save_json",
46 | "save_pkl",
47 | "save_metadata",
48 | "save_scaler",
49 | "save_weight",
50 | "get_config",
51 | "model_toppath",
52 | "SYSTEM_FEATURES",
53 | "COUNTER_FEAUTRES",
54 | "BPF_FEATURES",
55 | "IRQ_FEATURES",
56 | "WORKLOAD_FEATURES",
57 | "PowerSourceMap",
58 | "FeatureGroup",
59 | "FeatureGroups",
60 | "ModelOutputType",
61 | "get_feature_group",
62 | "get_valid_feature_group_from_queries",
63 | ]
64 |
--------------------------------------------------------------------------------
/src/kepler_model/util/extract_types.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from .prom_types import TIMESTAMP_COL, pkg_id_column
4 | from .train_types import PowerSourceMap
5 |
6 | container_id_colname = "id"
7 | all_container_key = "all containers"
8 | accelerator_type_colname = "type"
9 |
10 | node_level_index = [TIMESTAMP_COL]
11 | pkg_level_index = [TIMESTAMP_COL, pkg_id_column]
12 | container_level_index = [TIMESTAMP_COL, container_id_colname]
13 |
14 |
15 | def component_to_col(component, unit_col=None, unit_val=None):
16 | power_colname = f"{component}_power"
17 | if unit_col is None:
18 | return power_colname
19 | return f"{unit_col}_{unit_val}_{power_colname}"
20 |
21 |
22 | def col_to_component(component_col):
23 | splits = component_col.split("_")
24 | component = splits[-2:][0]
25 | if component == "dynamic" or component == "background":
26 | return splits[-3:][0]
27 | return component
28 |
29 |
30 | def col_to_unit_val(component_col):
31 | return component_col.split("_")[-3:][0]
32 |
33 |
34 | def ratio_to_col(unit_val):
35 | return f"packge_ratio_{unit_val}"
36 |
37 |
38 | def get_unit_vals(power_columns):
39 | return np.unique([col_to_unit_val(col) for col in power_columns if "package" in col])
40 |
41 |
42 | def get_num_of_unit(energy_source, label_cols):
43 | energy_components = PowerSourceMap(energy_source)
44 | num_of_unit = len(label_cols) / len(energy_components)
45 | return num_of_unit
46 |
47 |
48 | def get_expected_power_columns(energy_components, num_of_unit=1):
49 | # TODO: if ratio applied,
50 | # return [component_to_col(component, "package", unit_val) for component in energy_components for unit_val in range(0,num_of_unit)]
51 | return [component_to_col(component) for component in energy_components]
52 |
--------------------------------------------------------------------------------
/src/kepler_model/util/format.py:
--------------------------------------------------------------------------------
1 | def print_bounded_multiline_message(input_lines, maxlength=200):
2 | lines = []
3 | for line in input_lines:
4 | i = 0
5 | while len(line) > maxlength:
6 | lines += [line[0:maxlength]]
7 | line = line[maxlength:-1]
8 | i = maxlength
9 | if len(line) > 0:
10 | lines += [line]
11 |
12 | max_line_length = max(len(line) for line in lines)
13 | border = "#" * (max_line_length + 4)
14 | print(border)
15 |
16 | for line in lines:
17 | formatted_line = f"# {line.ljust(max_line_length)} #"
18 | print(formatted_line)
19 |
20 | print(border)
21 |
22 |
23 | from datetime import datetime
24 |
25 |
26 | def time_to_str(time):
27 | if isinstance(time, datetime):
28 | return time.strftime("%Y-%m-%d %H:%M:%S")
29 | return time
30 |
--------------------------------------------------------------------------------
/src/kepler_model/util/saver.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 |
4 | import joblib
5 |
6 | METADATA_FILENAME = "metadata"
7 | SCALER_FILENAME = "scaler"
8 | WEIGHT_FILENAME = "weight"
9 | TRAIN_ARGS_FILENAME = "train_arguments"
10 | NODE_TYPE_INDEX_FILENAME = "node_type_index"
11 |
12 | MACHINE_SPEC_PATH = "machine_spec"
13 |
14 |
15 | def _pipeline_model_metadata_filename(energy_source, model_type):
16 | return f"{energy_source}_{model_type}_model_metadata"
17 |
18 |
19 | def _power_curve_filename(energy_source, model_type):
20 | return f"{energy_source}_{model_type}_power_curve"
21 |
22 |
23 | def assure_path(path):
24 | if path == "":
25 | return ""
26 | if not os.path.exists(path):
27 | os.makedirs(path, exist_ok=True)
28 | return path
29 |
30 |
31 | def save_json(path, name, data):
32 | if name.endswith(".json") is False:
33 | name = name + ".json"
34 |
35 | assure_path(path)
36 | filename = os.path.join(path, name)
37 | with open(filename, "w") as f:
38 | json.dump(data, f)
39 | return name
40 |
41 |
42 | def save_pkl(path, name, data):
43 | if ".pkl" not in name:
44 | name = name + ".pkl"
45 | assure_path(path)
46 | filename = os.path.join(path, name)
47 | joblib.dump(data, filename)
48 | return name
49 |
50 |
51 | def save_csv(path, name, data):
52 | if ".csv" not in name:
53 | name = name + ".csv"
54 | assure_path(path)
55 | filename = os.path.join(path, name)
56 | data.to_csv(filename)
57 | return name
58 |
59 |
60 | def save_machine_spec(data_path, machine_id, spec):
61 | machine_spec_path = os.path.join(data_path, MACHINE_SPEC_PATH)
62 | assure_path(machine_spec_path)
63 | save_json(machine_spec_path, machine_id, spec.get_json())
64 |
65 |
66 | def save_node_type_index(pipeline_path, node_type_index):
67 | return save_json(pipeline_path, NODE_TYPE_INDEX_FILENAME, node_type_index)
68 |
69 |
70 | def save_metadata(model_path, metadata):
71 | return save_json(model_path, METADATA_FILENAME, metadata)
72 |
73 |
74 | def save_train_args(pipeline_path, args):
75 | return save_json(pipeline_path, TRAIN_ARGS_FILENAME, args)
76 |
77 |
78 | def save_scaler(model_path, scaler):
79 | return save_pkl(model_path, SCALER_FILENAME, scaler)
80 |
81 |
82 | def save_weight(model_path, weight):
83 | return save_json(model_path, WEIGHT_FILENAME, weight)
84 |
85 |
86 | def save_pipeline_metadata(pipeline_path, pipeline_metadata, energy_source, model_type, metadata_df):
87 | save_metadata(pipeline_path, pipeline_metadata)
88 | pipeline_model_metadata_filename = _pipeline_model_metadata_filename(energy_source, model_type)
89 | return save_csv(pipeline_path, pipeline_model_metadata_filename, metadata_df)
90 |
91 |
92 | def save_profile(profile_path, source, profile):
93 | profile_filename = os.path.join(profile_path, source + ".json")
94 | with open(profile_filename, "w") as f:
95 | json.dump(profile, f)
96 |
--------------------------------------------------------------------------------
/src/kepler_model/util/similarity.py:
--------------------------------------------------------------------------------
1 | from .train_types import NodeAttribute
2 |
3 | # simplified weights
4 | # TODO: experimental support for deciding the weight
5 | similarity_reference = {
6 | NodeAttribute.PROCESSOR: 5,
7 | NodeAttribute.CORES: 1,
8 | NodeAttribute.CHIPS: 1,
9 | NodeAttribute.MEMORY: 0.5,
10 | NodeAttribute.FREQ: 0.5,
11 | }
12 |
13 | similarity_total_weight = sum(similarity_reference.values())
14 |
15 |
16 | def get_similarity_weight(attr):
17 | return similarity_reference[attr] / similarity_total_weight
18 |
19 |
20 | def compute_jaccard_similarity(str1: str, str2: str) -> float:
21 | if str1.lower() == str2.lower(): # including the case of both are empty
22 | return 1
23 | if len(str1) == 0 or len(str2) == 0:
24 | return 0
25 | set1 = set(str1.lower()) # Convert to lowercase for case-insensitive comparison
26 | set2 = set(str2.lower())
27 |
28 | intersection = len(set1.intersection(set2))
29 | union = len(set1.union(set2))
30 |
31 | similarity = intersection / union * 0.5
32 | return similarity
33 |
34 |
35 | def compute_similarity(base: float, cmp: float) -> float:
36 | base = float(base)
37 | cmp = float(cmp)
38 | diff_ratio = 0
39 | if base > 0 or cmp > 0:
40 | diff_ratio = abs(cmp - base) / ((base + cmp) / 2)
41 | if diff_ratio >= 1:
42 | return 0
43 | else:
44 | return 1 - diff_ratio
45 |
46 |
47 | def compute_looseness(similarity):
48 | return 1 - similarity
49 |
50 |
51 | # get_candidate_score returns certainty
52 | def get_candidate_score(candidate_uncertain_attribute_freq, candidate_uncertain_attribute_total):
53 | candidate_score = dict()
54 | for attr, candidates in candidate_uncertain_attribute_freq.items():
55 | total = candidate_uncertain_attribute_total[attr]
56 | if total == 0:
57 | # no uncertainty
58 | continue
59 | for candidate in candidates:
60 | candidate_index = candidate[0]
61 | candidate_freq = candidate[1]
62 | if candidate_index not in candidate_score:
63 | candidate_score[candidate_index] = 0
64 | candidate_score[candidate_index] += float(candidate_freq) / total
65 | return candidate_score
66 |
67 |
68 | def find_best_candidate(candidate_score):
69 | max_score = 0
70 | best_candidate_index = -1
71 | for index, score in candidate_score.items():
72 | if score > max_score:
73 | best_candidate_index = index
74 | max_score = score
75 | return best_candidate_index, max_score
76 |
77 |
78 | def compute_uncertainty(max_score, num_of_none):
79 | if num_of_none == 0:
80 | return 0 # covered
81 | uncertainty = 1 - max_score / num_of_none
82 | return uncertainty
83 |
84 |
85 | def get_num_of_none(in_spec):
86 | num_of_none = 0
87 | for attr in NodeAttribute:
88 | if in_spec.attrs[attr] is None:
89 | num_of_none += 1
90 | return num_of_none
91 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/bd4c15cc1d66c8d5fa72e08c80041429ccc41dce/tests/__init__.py
--------------------------------------------------------------------------------
/tests/client_load_tester.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | from estimator import SERVE_SOCKET
4 | from estimator_model_test import generate_request, model_names
5 | from estimator_power_request_test import Client
6 |
7 | loads = range(10, 11, 10)
8 | duration = 120
9 |
10 | if __name__ == "__main__":
11 | client = Client(SERVE_SOCKET)
12 | for model_name in model_names:
13 | for load in loads:
14 | request_json = generate_request(model_name, load)
15 | start_time = time.time()
16 | client.make_request(request_json)
17 | elapsed_time = time.time() - start_time
18 | output = f"{model_name},{load},{elapsed_time}"
19 | print(output)
20 | time.sleep(1)
21 |
--------------------------------------------------------------------------------
/tests/data/machine/spec.json:
--------------------------------------------------------------------------------
1 | {"processor": "intel_xeon_platinum_8259cl", "cores": 96, "chips": 2, "memory": 377, "frequency": 3500}
2 |
--------------------------------------------------------------------------------
/tests/data/node_type_index.json:
--------------------------------------------------------------------------------
1 | {"0": {"attrs": {"processor": "intel_xeon_platinum_8259cl", "cores": 96, "chips": 2, "memory": 377, "frequency": 3500}, "members": ["m5.metal-ami-0e4d0bb9670ea8db0"]}, "1": {"attrs": {"processor": "intel_xeon_e5_2686v4", "cores": 72, "chips": 2, "memory": 503, "frequency": 3000}, "members": ["i3.metal-ami-0e4d0bb9670ea8db0"]}, "2": {"attrs": {"processor": "intel_xeon_platinum_8275cl", "cores": 96, "chips": 2, "memory": 188, "frequency": 3900}, "members": ["c5.metal-ami-0e4d0bb9670ea8db0"]}, "3": {"attrs": {"processor": "intel_xeon_platinum_8259cl", "cores": 96, "chips": 2, "memory": 755, "frequency": 3500}, "members": ["r5.metal-ami-0e4d0bb9670ea8db0"]}, "4": {"attrs": {"processor": "intel_xeon_platinum_8252c", "cores": 48, "chips": 2, "memory": 188, "frequency": 4500}, "members": ["m5zn.metal-ami-0e4d0bb9670ea8db0"]}, "5": {"attrs": {"processor": "intel_xeon_platinum_8488c", "cores": 96, "chips": 1, "memory": 377, "frequency": 3800}, "members": ["m7i.metal-24xl-ami-0e4d0bb9670ea8db0"]}}
2 |
--------------------------------------------------------------------------------
/tests/estimator_power_request_test.py:
--------------------------------------------------------------------------------
1 | import json
2 | import socket
3 |
4 | from kepler_model.util.config import SERVE_SOCKET
5 | from kepler_model.util.train_types import (
6 | CATEGORICAL_LABEL_TO_VOCAB,
7 | SYSTEM_FEATURES,
8 | WORKLOAD_FEATURES,
9 | ModelOutputType,
10 | )
11 | from tests.extractor_test import test_energy_source
12 |
13 | trainer_names = ["SGDRegressorTrainer"]
14 | test_energy_sources = ["acpi", "rapl-sysfs"]
15 |
16 |
17 | def generate_request(
18 | train_name, n=1, metrics=WORKLOAD_FEATURES, system_features=SYSTEM_FEATURES, output_type=ModelOutputType.DynPower.name, energy_source=test_energy_source
19 | ):
20 | request_json = dict()
21 | if train_name is not None:
22 | request_json["trainer_name"] = train_name
23 | request_json["metrics"] = metrics
24 | request_json["system_features"] = system_features
25 | request_json["system_values"] = []
26 | for m in system_features:
27 | request_json["system_values"] += [CATEGORICAL_LABEL_TO_VOCAB[m][0]]
28 | request_json["values"] = [[1.0] * len(metrics)] * n
29 | request_json["output_type"] = output_type
30 | request_json["source"] = energy_source
31 | return request_json
32 |
33 |
34 | def process(client, energy_source):
35 | request_json = generate_request(trainer_names[0], 2, output_type="AbsPower", energy_source=energy_source)
36 | res = client.make_request(request_json)
37 | res_json = json.loads(res)
38 | print(res_json)
39 | assert res_json["msg"] == "", "response error: {}".format(res_json["msg"])
40 | assert len(res_json["powers"]) > 0, "zero powers"
41 |
42 |
43 | class Client:
44 | def __init__(self, socket_path):
45 | self.socket_path = socket_path
46 |
47 | def make_request(self, request_json):
48 | s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
49 | s.connect(self.socket_path)
50 | data = json.dumps(request_json)
51 | print(data)
52 | s.send(data.encode())
53 | data = b""
54 | while True:
55 | shunk = s.recv(1024).strip()
56 | data += shunk
57 | if shunk is None or len(shunk.decode()) == 0 or shunk.decode()[-1] == "}":
58 | break
59 | decoded_data = data.decode()
60 | s.close()
61 | return decoded_data
62 |
63 |
64 | def test_estimator_power_request():
65 | client = Client(SERVE_SOCKET)
66 | for energy_source in test_energy_sources:
67 | process(client, energy_source)
68 |
69 |
70 | if __name__ == "__main__":
71 | test_estimator_power_request()
72 |
--------------------------------------------------------------------------------
/tests/http_server.py:
--------------------------------------------------------------------------------
1 | import atexit
2 | import http.server
3 | import os
4 | import socketserver
5 | import threading
6 |
7 | from kepler_model.util.config import model_toppath
8 |
9 |
10 | def cleanup_task(server):
11 | print("Shutdown server...")
12 | server.shutdown()
13 |
14 |
15 | def get_server(file_server_port):
16 | Handler = http.server.SimpleHTTPRequestHandler
17 | httpd = socketserver.TCPServer(("", file_server_port), Handler)
18 |
19 | # Register the cleanup task to be executed on program exit
20 | atexit.register(cleanup_task, httpd)
21 |
22 | print("Http File Serve Serving at Port", file_server_port, " for ", model_toppath)
23 | return httpd
24 |
25 |
26 | def http_file_server(file_server_port):
27 | try:
28 | httpd = get_server(file_server_port)
29 | # Start the server in a separate thread
30 | server_thread = threading.Thread(target=httpd.serve_forever)
31 | server_thread.daemon = True
32 | server_thread.start()
33 | except Exception as err:
34 | print(f"File server is running: {err}")
35 |
36 |
37 | def run():
38 | os.chdir(model_toppath)
39 | httpd = get_server(8110)
40 | httpd.serve_forever()
41 |
42 |
43 | if __name__ == "__main__":
44 | run()
45 |
--------------------------------------------------------------------------------
/tests/minimal_trainer.py:
--------------------------------------------------------------------------------
1 | from pipeline_test import process
2 |
3 | from kepler_model.util import FeatureGroup
4 |
5 | trainer_names = ["GradientBoostingRegressorTrainer", "SGDRegressorTrainer", "XgboostFitTrainer"]
6 | valid_feature_groups = [FeatureGroup.BPFOnly]
7 |
8 | if __name__ == "__main__":
9 | process(
10 | target_energy_sources=["acpi", "rapl-sysfs"],
11 | abs_trainer_names=trainer_names,
12 | dyn_trainer_names=trainer_names,
13 | valid_feature_groups=valid_feature_groups,
14 | )
15 |
--------------------------------------------------------------------------------
/tests/pipeline_test.py:
--------------------------------------------------------------------------------
1 | from kepler_model.train import NewPipeline, NodeTypeSpec
2 | from kepler_model.util import PowerSourceMap, get_valid_feature_group_from_queries
3 | from kepler_model.util.loader import default_node_type, default_train_output_pipeline
4 | from tests.extractor_test import test_energy_source, test_extractors
5 | from tests.isolator_test import test_isolators
6 | from tests.prom_test import get_query_results, prom_output_filename, prom_output_path
7 | from tests.trainer_test import assert_train, test_trainer_names
8 |
9 | # fake spec value
10 | spec_values = {"processor": "test", "cores": 1, "chips": 1, "memory": -1, "frequency": -1}
11 | spec = NodeTypeSpec(**spec_values)
12 |
13 | test_energy_sources = ["acpi", "rapl-sysfs"]
14 |
15 |
16 | def assert_pipeline(pipeline, query_results, feature_group, energy_source, energy_components):
17 | success, abs_data, dyn_data = pipeline.process(
18 | query_results, energy_components, energy_source, feature_group=feature_group.name, replace_node_type=default_node_type
19 | )
20 | assert success, f"failed to process pipeline {pipeline.name}"
21 | for trainer in pipeline.trainers:
22 | if trainer.feature_group == feature_group and trainer.energy_source == energy_source:
23 | if trainer.node_level:
24 | assert_train(trainer, abs_data, energy_components)
25 | else:
26 | assert_train(trainer, dyn_data, energy_components)
27 |
28 |
29 | def process(
30 | save_pipeline_name=default_train_output_pipeline,
31 | prom_save_path=prom_output_path,
32 | prom_save_name=prom_output_filename,
33 | abs_trainer_names=test_trainer_names,
34 | dyn_trainer_names=test_trainer_names,
35 | extractors=test_extractors,
36 | isolators=test_isolators,
37 | target_energy_sources=[test_energy_source],
38 | valid_feature_groups=None,
39 | ):
40 | query_results = get_query_results(save_path=prom_save_path, save_name=prom_save_name)
41 | if valid_feature_groups is None:
42 | valid_feature_groups = get_valid_feature_group_from_queries(query_results.keys())
43 | for extractor in extractors:
44 | for isolator in isolators:
45 | pipeline = NewPipeline(
46 | save_pipeline_name,
47 | abs_trainer_names,
48 | dyn_trainer_names,
49 | extractor=extractor,
50 | isolator=isolator,
51 | target_energy_sources=target_energy_sources,
52 | valid_feature_groups=valid_feature_groups,
53 | )
54 | global spec
55 | pipeline.node_collection.index_train_machine("test", spec)
56 | for energy_source in target_energy_sources:
57 | energy_components = PowerSourceMap[energy_source]
58 | for feature_group in valid_feature_groups:
59 | assert_pipeline(pipeline, query_results, feature_group, energy_source, energy_components)
60 | # save metadata
61 | pipeline.save_metadata()
62 | # save node collection
63 | pipeline.node_collection.save()
64 | # save pipeline
65 | pipeline.archive_pipeline()
66 |
67 |
68 | def test_process():
69 | process(target_energy_sources=test_energy_sources)
70 |
--------------------------------------------------------------------------------
/tests/prom_test.py:
--------------------------------------------------------------------------------
1 | # prom_test.py
2 | # - prom_client.query
3 | # - prom_client.snapshot_query_result
4 | #
5 | # save response to prom_output_path/prom_output_filename.json
6 | #
7 | # To use output:
8 | # from prom_test import get_prom_output
9 | # response = get_prom_response()
10 | # or
11 | # query_result = get_query_results()
12 |
13 | import os
14 |
15 | from kepler_model.train.prom import PrometheusClient
16 | from kepler_model.util import load_json, save_json
17 | from kepler_model.util.prom_types import prom_responses_to_results
18 |
19 | prom_output_path = os.path.join(os.path.dirname(__file__), "data", "prom_output")
20 | prom_output_filename = "prom_response"
21 |
22 |
23 | def get_prom_response(save_path=prom_output_path, save_name=prom_output_filename):
24 | return load_json(save_path, save_name)
25 |
26 |
27 | def get_query_results(save_path=prom_output_path, save_name=prom_output_filename):
28 | response = get_prom_response(save_path=save_path, save_name=save_name)
29 | return prom_responses_to_results(response)
30 |
31 |
32 | def process(save_path=prom_output_path, save_name=prom_output_filename, server=None, interval=None, step=None):
33 | if server is not None:
34 | os.environ["PROM_SERVER"] = server
35 | if interval is not None:
36 | os.environ["PROM_QUERY_INTERVAL"] = interval
37 | if step is not None:
38 | os.environ["PROM_QUERY_STEP"] = step
39 | prom_client = PrometheusClient()
40 | response_dict = prom_client.query()
41 | results = prom_client.snapshot_query_result()
42 | print("Available metrics: ", results.keys())
43 | # print query data in csv
44 | for metric, data in results.items():
45 | print(metric)
46 | print(data.head())
47 | save_json(save_path, save_name, response_dict)
48 |
49 |
50 | def test_prom_process():
51 | process()
52 |
--------------------------------------------------------------------------------
/tests/weight_model_request_test.py:
--------------------------------------------------------------------------------
1 | #########################
2 | # weight_mode_request.py
3 | #
4 | # This file covers the following cases.
5 | # - getting weight from model server based on available features
6 | #
7 | #########################
8 |
9 | import json
10 | import os
11 | import sys
12 | import time
13 |
14 | import requests
15 |
16 | from kepler_model.estimate.model_server_connector import list_all_models
17 | from kepler_model.util.config import download_path, get_model_server_req_endpoint
18 | from kepler_model.util.loader import get_download_output_path
19 | from kepler_model.util.train_types import FeatureGroup, FeatureGroups, ModelOutputType
20 | from tests.estimator_power_request_test import generate_request
21 | from tests.extractor_test import test_energy_source
22 |
23 | os.environ["MODEL_SERVER_URL"] = "http://localhost:8100"
24 |
25 | weight_available_trainers = ["SGDRegressorTrainer"]
26 |
27 | if __name__ == "__main__":
28 | # test getting model from server
29 | os.environ["MODEL_SERVER_ENABLE"] = "true"
30 | energy_source = test_energy_source
31 |
32 | available_models = list_all_models(energy_source=energy_source)
33 | while len(available_models) == 0:
34 | time.sleep(1)
35 | print("wait for kepler model server response")
36 | available_models = list_all_models(energy_source=energy_source)
37 |
38 | for output_type_name, valid_fgs in available_models.items():
39 | output_type = ModelOutputType[output_type_name]
40 | output_path = get_download_output_path(download_path, energy_source, output_type)
41 | for fg_name, best_model in valid_fgs.items():
42 | for trainer in weight_available_trainers:
43 | print("feature group: ", fg_name)
44 | metrics = FeatureGroups[FeatureGroup[fg_name]]
45 | request_json = generate_request(trainer, n=10, metrics=metrics, output_type=output_type_name)
46 | request_json["metrics"] += request_json["system_features"]
47 | request_json["weight"] = "true"
48 | del request_json["system_features"]
49 | del request_json["values"]
50 | del request_json["system_values"]
51 | try:
52 | response = requests.post(get_model_server_req_endpoint(), json=request_json)
53 | except Exception as err:
54 | print(f"cannot get response from model server: {err}")
55 | sys.exit(1)
56 | assert response.status_code == 200, f"response {request_json} not OK"
57 | loaded_weight = json.loads(response.content)
58 | print(loaded_weight)
59 |
--------------------------------------------------------------------------------
/tests/xgboost_test.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 |
4 | from kepler_model.train import DefaultExtractor
5 | from kepler_model.train.profiler.profiler import response_to_result
6 | from kepler_model.train.trainer.XGBoostTrainer.main import XGBoostRegressionStandalonePipeline
7 | from kepler_model.util.train_types import FeatureGroup, XGBoostRegressionTrainType
8 |
9 | energy_components = ["package", "core", "uncore", "dram"]
10 | feature_group = FeatureGroup.BPFIRQ.name
11 | energy_source = "rapl-sysfs"
12 |
13 | prom_response_file = os.path.join(os.path.dirname(__file__), "data", "prom_output", "prom_response.json")
14 |
15 |
16 | def read_sample_query_results():
17 | with open(prom_response_file) as f:
18 | response = json.load(f)
19 | return response_to_result(response)
20 |
21 |
22 | if __name__ == "__main__":
23 | # Note that extractor mutates the query results
24 | query_results = read_sample_query_results()
25 | assert len(query_results) > 0, "cannot read_sample_query_results"
26 | instance = DefaultExtractor()
27 | extracted_data, power_columns, _, _ = instance.extract(query_results, energy_components, feature_group, energy_source, node_level=True)
28 | xgb_container_level_pipeline_kfold = XGBoostRegressionStandalonePipeline(
29 | XGBoostRegressionTrainType.KFoldCrossValidation, "test_models/XGBoost/", node_level=True
30 | )
31 | xgb_node_pipeline_kfold = XGBoostRegressionStandalonePipeline(XGBoostRegressionTrainType.KFoldCrossValidation, "test_models/XGBoost/", node_level=False)
32 | xgb_container_level_pipeline_tts = XGBoostRegressionStandalonePipeline(
33 | XGBoostRegressionTrainType.TrainTestSplitFit, "test_models/XGBoost/", node_level=False
34 | )
35 | xgb_node_pipeline_tts = XGBoostRegressionStandalonePipeline(XGBoostRegressionTrainType.TrainTestSplitFit, "test_models/XGBoost/", node_level=True)
36 | xgb_node_pipeline_kfold.train(None, query_results)
37 | xgb_container_level_pipeline_tts.train(None, query_results)
38 | xgb_node_pipeline_tts.train(None, query_results)
39 | xgb_container_level_pipeline_kfold.train(None, query_results)
40 |
--------------------------------------------------------------------------------