├── .ansible-lint-ignore ├── .github └── workflows │ └── main.yml ├── .gitignore ├── .gitleaks.toml ├── .osdk-scorecard.yaml ├── .yamllint ├── .zuul.yaml ├── LICENSE ├── Makefile ├── README.md ├── build ├── Dockerfile ├── build.sh ├── buildConfig.yaml.template ├── create_standard_pvs.sh ├── generate_bundle.sh ├── get_go.sh ├── get_operator_sdk.sh ├── metadata.sh ├── new_csv_version.sh ├── ocp_metadata.sh ├── push_container2ocp.sh ├── run-ci.yaml ├── stf-collect-logs │ ├── README.md │ ├── defaults │ │ └── main.yml │ ├── meta │ │ └── main.yml │ ├── tasks │ │ └── main.yml │ └── vars │ │ └── main.yml ├── stf-run-ci │ ├── .ansible-lint │ ├── README.md │ ├── defaults │ │ └── main.yml │ ├── filter_plugins │ │ └── parser.py │ ├── handlers │ │ └── main.yml │ ├── meta │ │ └── main.yml │ ├── requirements.txt │ ├── tasks │ │ ├── clone_repos.yml │ │ ├── create_builds.yml │ │ ├── create_catalog.yml │ │ ├── deploy_stf.yml │ │ ├── main.yml │ │ ├── pre-clean.yml │ │ ├── preflight_checks.yml │ │ ├── setup_base.yml │ │ ├── setup_elasticsearch.yml │ │ ├── setup_registry_auth.yml │ │ ├── setup_stf.yml │ │ ├── setup_stf_from_bundles.yml │ │ └── setup_stf_local_build.yml │ ├── templates │ │ ├── config-json.j2 │ │ ├── index-yaml.j2 │ │ └── manifest_elasticsearch.j2 │ ├── tests │ │ ├── inventory │ │ └── test.yml │ └── vars │ │ └── main.yml ├── test-framework │ ├── Dockerfile │ └── ansible-test.sh ├── update_csv.sh └── validate_deployment.sh ├── ci ├── README.md ├── common-tasks.yml ├── deploy_stf.yml ├── post-collect_logs.yml ├── pre-2node.yml ├── prepare.yml ├── test_stf.yml ├── vars-local_build-index_deploy.yml ├── vars-local_build.yml ├── vars-nightly_bundles-index_deploy.yml ├── vars-nightly_bundles.yml └── vars-zuul-common.yml ├── deploy ├── alerts │ ├── README.md │ └── alerts.yaml ├── containerlogs.sh ├── crds │ ├── infra.watch_servicetelemetrys_crd.yaml │ └── infra.watch_v1beta1_servicetelemetry_cr.yaml ├── olm-catalog │ └── service-telemetry-operator │ │ ├── Dockerfile.in │ │ ├── manifests │ │ ├── infra.watch_servicetelemetrys_crd.yaml │ │ └── service-telemetry-operator.clusterserviceversion.yaml │ │ ├── metadata │ │ ├── annotations.yaml │ │ └── properties.yaml │ │ └── tests │ │ └── scorecard │ │ └── config.yaml ├── operator.yaml ├── operator_group.yaml ├── quickstart.sh ├── remove_stf.sh ├── role.yaml ├── role_binding.yaml └── service_account.yaml ├── docs └── development.md ├── molecule ├── default │ ├── asserts.yml │ ├── molecule.yml │ ├── playbook.yml │ └── prepare.yml ├── test-cluster │ ├── molecule.yml │ └── playbook.yml └── test-local │ ├── molecule.yml │ ├── playbook.yml │ └── prepare.yml ├── requirements.yml ├── roles └── servicetelemetry │ ├── README.md │ ├── defaults │ └── main.yml │ ├── files │ ├── memcached-dashboard.json │ ├── rhos-cloud-dashboard.json │ ├── rhos-dashboard.json │ └── virtual-machine-view.json │ ├── filter_plugins │ └── stripnone.py │ ├── handlers │ └── main.yml │ ├── meta │ └── main.yml │ ├── tasks │ ├── base_smartgateway.yml │ ├── component_alertmanager.yml │ ├── component_certificates.yml │ ├── component_clouds.yml │ ├── component_elasticsearch.yml │ ├── component_es_certificates.yml │ ├── component_grafana.yml │ ├── component_prometheus.yml │ ├── component_prometheus_reader.yml │ ├── component_qdr.yml │ ├── component_scrapeconfig.yml │ ├── component_snmp_traps.yml │ ├── main.yml │ ├── post.yml │ └── pre.yml │ ├── templates │ ├── manifest_alertmanager.j2 │ ├── manifest_alertmanager_config.j2 │ ├── manifest_alertmanager_route.j2 │ ├── manifest_alertmanager_service.j2 │ ├── manifest_elasticsearch.j2 │ ├── manifest_grafana.j2 │ ├── manifest_grafana_ds.j2 │ ├── manifest_grafana_ds_prometheus.j2 │ ├── manifest_grafana_v5.j2 │ ├── manifest_prometheus.j2 │ ├── manifest_prometheus_route.j2 │ ├── manifest_prometheus_service.j2 │ ├── manifest_smartgateway_events.j2 │ ├── manifest_smartgateway_metrics.j2 │ ├── manifest_snmp_traps.j2 │ └── manifest_snmp_traps_service.j2 │ └── vars │ ├── dummy_user_certs.yml │ └── main.yml ├── tests ├── infrared │ ├── 13 │ │ ├── .gitignore │ │ ├── baremetal-scripts │ │ │ └── install-and-run-minishift.sh │ │ ├── enable-stf.yaml.template │ │ ├── infrared-openstack.sh │ │ ├── minishift-stf.sh │ │ ├── outputs │ │ │ └── .KEEPIT │ │ └── stf-connectors.yaml.template │ ├── .gitignore │ ├── 16.1 │ │ ├── .gitignore │ │ ├── enable-stf.yaml.template │ │ ├── gnocchi-connectors.yaml.template │ │ ├── infrared-openstack.sh │ │ ├── outputs │ │ │ └── .KEEPIT │ │ └── stf-connectors.yaml.template │ ├── 16.2 │ │ ├── .gitignore │ │ ├── enable-stf.yaml.template │ │ ├── gnocchi-connectors.yaml.template │ │ ├── infrared-openstack.sh │ │ ├── outputs │ │ │ └── .KEEPIT │ │ └── stf-connectors.yaml.template │ ├── 17.0 │ │ ├── .gitignore │ │ ├── enable-stf.yaml.template │ │ ├── gnocchi-connectors.yaml.template │ │ ├── infrared-openstack.sh │ │ ├── outputs │ │ │ └── .KEEPIT │ │ └── stf-connectors.yaml.template │ ├── 17.1 │ │ ├── .gitignore │ │ ├── README.md │ │ ├── enable-stf.yaml.template │ │ ├── extra-hosts.yaml.template │ │ ├── gnocchi-connectors.yaml.template │ │ ├── infrared-openstack.sh │ │ ├── outputs │ │ │ └── .KEEPIT │ │ └── stf-connectors.yaml.template │ ├── README.md │ └── crc-stf.sh ├── performance-test │ ├── README.md │ ├── dashboards │ │ ├── perftest-dashboard.yaml │ │ └── prom2-dashboard.yaml │ ├── deploy │ │ ├── datasources.yaml │ │ └── qdr-servicemonitor.yml │ ├── images │ │ └── dashboard.png │ ├── job.yaml │ ├── legacy │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── dashboards │ │ │ ├── legacy-perftest-dashboard.yaml │ │ │ └── prom2-dashboard.yaml │ │ ├── deploy │ │ │ ├── config │ │ │ │ ├── minimal-collectd.conf │ │ │ │ └── test-configs.yml │ │ │ ├── entrypoint.sh │ │ │ ├── performance-test-job-events.yml.template │ │ │ ├── performance-test-job-tb.yml.template │ │ │ ├── performance-test-tb.sh │ │ │ ├── prom-servicemonitor.yml │ │ │ ├── qdr-servicemonitor.yml │ │ │ └── qdrouterd.yaml │ │ ├── docker-push.sh │ │ ├── grafana │ │ │ ├── datasource.yaml │ │ │ ├── grafana-deploy.yml │ │ │ ├── grafana-launcher.sh │ │ │ ├── grafana-route.yml │ │ │ ├── grafana-service.yml │ │ │ ├── grafana.ini │ │ │ ├── perftest-dashboard.json │ │ │ └── prom2-dashboard.json │ │ ├── parser.go │ │ └── performance-test.sh │ └── run.sh ├── promxy │ ├── README.md │ ├── promxy-launcher.sh │ └── promxy-manifests.yaml └── smoketest │ ├── README.md │ ├── ceilometer_publish.py │ ├── collectd-sensubility.conf │ ├── healthcheck.log │ ├── minimal-collectd.conf.template │ ├── qdr-test.conf.yaml.template │ ├── qdr-test.yaml │ ├── smoketest.sh │ ├── smoketest_ceilometer_entrypoint.sh │ ├── smoketest_collectd_entrypoint.sh │ └── smoketest_job.yaml.template └── watches.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | watches.local 2 | .venv 3 | build/working/ 4 | bundle.Dockerfile 5 | build/stf-run-ci/filter_plugins/__pycache__/ 6 | roles/servicetelemetry/filter_plugins/__pycache__/ 7 | -------------------------------------------------------------------------------- /.gitleaks.toml: -------------------------------------------------------------------------------- 1 | [allowlist] 2 | description = "Global Allowlist" 3 | 4 | # Ignore based on any subset of the file path 5 | paths = [ 6 | # Ignore all example certs 7 | '''roles\/servicetelemetry\/vars\/dummy_user_certs\.yml''' 8 | ] 9 | -------------------------------------------------------------------------------- /.osdk-scorecard.yaml: -------------------------------------------------------------------------------- 1 | scorecard: 2 | version: v1alpha2 3 | output: text 4 | bundle: deploy/olm-catalog/service-telemetry-operator 5 | plugins: 6 | - basic: 7 | cr-manifest: 8 | - "deploy/crds/infra.watch_v1beta1_servicetelemetry_cr.yaml" 9 | - olm: 10 | cr-manifest: 11 | - "deploy/crds/infra.watch_v1beta1_servicetelemetry_cr.yaml" 12 | csv-path: "deploy/olm-catalog/service-telemetry-operator/manifests/service-telemetry-operator.clusterserviceversion.yaml" 13 | -------------------------------------------------------------------------------- /.yamllint: -------------------------------------------------------------------------------- 1 | --- 2 | extends: default 3 | 4 | rules: 5 | document-start: disable 6 | line-length: disable 7 | indentation: disable 8 | comments: 9 | min-spaces-from-content: 1 10 | comments-indentation: disable 11 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all oc-create-build oc-start-build 2 | BUILD_DIR = ./build 3 | ROOTDIR = $(realpath .) 4 | NAME = $(notdir $(ROOTDIR)) 5 | 6 | all: oc-create-build oc-start-build 7 | 8 | oc-create-build: 9 | @oc new-build --name service-telemetry-operator --dockerfile - < $(BUILD_DIR)/Dockerfile 10 | 11 | oc-start-build: 12 | @oc start-build service-telemetry-operator --wait --from-dir . 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # service-telemetry-operator 2 | 3 | Umbrella Operator to instantiate all required components for Service Telemetry 4 | Framework. 5 | 6 | ## Getting Started 7 | 8 | You'll need to do the following steps in order to load the prerequisites for 9 | deploying to an OpenShift 4.10 environment: 10 | 11 | * import catalog containing Service Telemetry and Smart Gateway Operators via 12 | OperatorSource file 13 | * install the Certificate Manager for OpenShift before installing Service 14 | Telemetry Operator 15 | * install the Service Telemetry Operator 16 | 17 | ## Starting up Service Telemetry 18 | 19 | In the OperatorHub, select "Service Telemetry Operator" and install it. You can 20 | use the defaults. 21 | 22 | Once the Service Telemetry Operator is available in the _Installed Operators_ 23 | page, select _Service Telemetry Operator_ and select _Create instance_ within 24 | the _STF Cluster_ box under _Provided APIs_. Then press _Create_. 25 | 26 | ## Overriding Default Manifests 27 | 28 | The following variables can be passed to a new instance of STF Cluster (`kind: 29 | ServiceTelemetry`) via the YAML configuration to override the default manifests 30 | loaded for you. 31 | 32 | * prometheusManifest 33 | * alertmanagerConfigManifest 34 | * alertmanagerManifest 35 | * elasticsearchSecretManifest 36 | * interconnectManifest 37 | * elasticsearchManifest 38 | * grafanaManifest 39 | * smartgatewayCollectdMetricsManifest 40 | * smartgatewayCollectdEventsManifest 41 | * smartgatewayCeilometerEventsManifest 42 | * servicemonitorManifest 43 | * scrapeconfigManifest 44 | 45 | ## Development 46 | 47 | The quickest way to start up Service Telemetry Framework for development is to 48 | run the `quickstart.sh` script located in the `deploy/` directory after starting 49 | up a [CodeReady Containers](https://github.com/code-ready/crc) environment. 50 | 51 | ```shell 52 | crc setup 53 | crc config set memory 16384 54 | crc config set enable-cluster-monitoring true 55 | crc start 56 | crc console --credentials 57 | oc login -u kubeadmin https://api.crc.testing:6443 58 | ``` 59 | 60 | To deploy a local build of the Service Telemetry Operator itself, start by 61 | running `ansible-playbook build/run-ci.yaml`. If you have code to coordinate 62 | across the supporting InfraWatch repositories, you can pass the 63 | `working_branch` paramater to the `--extra-vars` flag like so: 64 | 65 | ```shell 66 | ansible-playbook \ 67 | --extra-vars working_branch="username-new_feature" \ 68 | build/run-ci.yaml 69 | ``` 70 | 71 | Additional flags for overriding various branch and path names is documented in 72 | `build/stf-run-ci/README.md`. 73 | -------------------------------------------------------------------------------- /build/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM quay.io/operator-framework/ansible-operator:v1.36.1 2 | 3 | # temporarily switch to root user to adjust image layers 4 | USER 0 5 | # Upstream CI builds need the additional EPEL sources for python3-passlib and python3-bcrypt but have no working repos to install epel-release 6 | # NO_PROXY is undefined in upstream CI builds, but defined (usually blank) during openshift builds (a possibly brittle hack) 7 | RUN bash -c -- 'if [ "${NO_PROXY:-__ZZZZZ}" == "__ZZZZZ" ]; then echo "Applying upstream EPEL hacks" && echo -e "-----BEGIN PGP PUBLIC KEY BLOCK-----\nmQINBGE3mOsBEACsU+XwJWDJVkItBaugXhXIIkb9oe+7aadELuVo0kBmc3HXt/Yp\nCJW9hHEiGZ6z2jwgPqyJjZhCvcAWvgzKcvqE+9i0NItV1rzfxrBe2BtUtZmVcuE6\n2b+SPfxQ2Hr8llaawRjt8BCFX/ZzM4/1Qk+EzlfTcEcpkMf6wdO7kD6ulBk/tbsW\nDHX2lNcxszTf+XP9HXHWJlA2xBfP+Dk4gl4DnO2Y1xR0OSywE/QtvEbN5cY94ieu\nn7CBy29AleMhmbnx9pw3NyxcFIAsEZHJoU4ZW9ulAJ/ogttSyAWeacW7eJGW31/Z\n39cS+I4KXJgeGRI20RmpqfH0tuT+X5Da59YpjYxkbhSK3HYBVnNPhoJFUc2j5iKy\nXLgkapu1xRnEJhw05kr4LCbud0NTvfecqSqa+59kuVc+zWmfTnGTYc0PXZ6Oa3rK\n44UOmE6eAT5zd/ToleDO0VesN+EO7CXfRsm7HWGpABF5wNK3vIEF2uRr2VJMvgqS\n9eNwhJyOzoca4xFSwCkc6dACGGkV+CqhufdFBhmcAsUotSxe3zmrBjqA0B/nxIvH\nDVgOAMnVCe+Lmv8T0mFgqZSJdIUdKjnOLu/GRFhjDKIak4jeMBMTYpVnU+HhMHLq\nuDiZkNEvEEGhBQmZuI8J55F/a6UURnxUwT3piyi3Pmr2IFD7ahBxPzOBCQARAQAB\ntCdGZWRvcmEgKGVwZWw5KSA8ZXBlbEBmZWRvcmFwcm9qZWN0Lm9yZz6JAk4EEwEI\nADgWIQT/itE0RZcQbs6BO5GKOHK/MihGfAUCYTeY6wIbDwULCQgHAgYVCgkICwIE\nFgIDAQIeAQIXgAAKCRCKOHK/MihGfFX/EACBPWv20+ttYu1A5WvtHJPzwbj0U4yF\n3zTQpBglQ2UfkRpYdipTlT3Ih6j5h2VmgRPtINCc/ZE28adrWpBoeFIS2YAKOCLC\nnZYtHl2nCoLq1U7FSttUGsZ/t8uGCBgnugTfnIYcmlP1jKKA6RJAclK89evDQX5n\nR9ZD+Cq3CBMlttvSTCht0qQVlwycedH8iWyYgP/mF0W35BIn7NuuZwWhgR00n/VG\n4nbKPOzTWbsP45awcmivdrS74P6mL84WfkghipdmcoyVb1B8ZP4Y/Ke0RXOnLhNe\nCfrXXvuW+Pvg2RTfwRDtehGQPAgXbmLmz2ZkV69RGIr54HJv84NDbqZovRTMr7gL\n9k3ciCzXCiYQgM8yAyGHV0KEhFSQ1HV7gMnt9UmxbxBE2pGU7vu3CwjYga5DpwU7\nw5wu1TmM5KgZtZvuWOTDnqDLf0cKoIbW8FeeCOn24elcj32bnQDuF9DPey1mqcvT\n/yEo/Ushyz6CVYxN8DGgcy2M9JOsnmjDx02h6qgWGWDuKgb9jZrvRedpAQCeemEd\nfhEs6ihqVxRFl16HxC4EVijybhAL76SsM2nbtIqW1apBQJQpXWtQwwdvgTVpdEtE\nr4ArVJYX5LrswnWEQMOelugUG6S3ZjMfcyOa/O0364iY73vyVgaYK+2XtT2usMux\nVL469Kj5m13T6w==\n=Mjs/\n-----END PGP PUBLIC KEY BLOCK-----" > /etc/pki/rpm-gpg/RPM-GPG-KEY-EPEL-9 && echo -e "[epel]\nname=Extra Packages for Enterprise Linux 9 - \$basearch\nmetalink=https://mirrors.fedoraproject.org/metalink?repo=epel-9&arch=\$basearch&infra=\$infra&content=\$contentdir\nenabled=1\ngpgcheck=1\ngpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-EPEL-9" > /etc/yum.repos.d/epel.repo; fi' 8 | 9 | # update the base image to allow forward-looking optimistic updates during the testing phase, with the added benefit of helping move closer to passing security scans. 10 | # -- excludes ansible so it remains at 2.9 tag as shipped with the base image 11 | # -- installs python3-passlib and python3-bcrypt for oauth-proxy interface 12 | # -- cleans up the cached data from dnf to keep the image as small as possible 13 | RUN dnf update -y --exclude=ansible* && dnf install -y python3-passlib python3-bcrypt && dnf clean all && rm -rf /var/cache/dnf 14 | 15 | COPY requirements.yml ${HOME}/requirements.yml 16 | RUN ansible-galaxy collection install -r ${HOME}/requirements.yml \ 17 | && chmod -R ug+rwx ${HOME}/.ansible 18 | 19 | # switch back to user 1001 when running the base image (non-root) 20 | USER 1001 21 | 22 | # copy in required artifacts for the operator 23 | COPY watches.yaml ${HOME}/watches.yaml 24 | COPY roles/ ${HOME}/roles/ 25 | -------------------------------------------------------------------------------- /build/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | REL=$(dirname "$0"); source "${REL}/metadata.sh"; source "${REL}/ocp_metadata.sh" 4 | 5 | oc create imagestream "${OPERATOR_NAME}" || true 6 | 7 | oc apply -f <(sed " 8 | s|<>|${OPERATOR_NAME}|g; 9 | s|<>|${OCP_TAG}|g"\ 10 | "${REL}/buildConfig.yaml.template") 11 | 12 | oc start-build "${OPERATOR_NAME}" --wait --follow --from-dir "${REL}/.." 13 | -------------------------------------------------------------------------------- /build/buildConfig.yaml.template: -------------------------------------------------------------------------------- 1 | kind: BuildConfig 2 | apiVersion: build.openshift.io/v1 3 | metadata: 4 | name: "<>" 5 | spec: 6 | runPolicy: "Serial" 7 | source: 8 | git: 9 | uri: https://github.com/infrawatch/service-telemetry-operator.git 10 | strategy: 11 | dockerStrategy: 12 | dockerfilePath: build/Dockerfile 13 | output: 14 | to: 15 | kind: "ImageStreamTag" 16 | name: "<>:<>" 17 | -------------------------------------------------------------------------------- /build/create_standard_pvs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Create the standard storage class 4 | if ! oc describe sc standard; then 5 | oc create -f - < "$log_file" 2>&1 7 | oc -n {{ namespace }} get "$resource" >> "$log_file" 2>&1 8 | echo "[INFO] oc get '$resource' -oyaml" >> "$log_file" 2>&1 9 | oc -n {{ namespace }} get "$resource" -oyaml >> "$log_file" 2>&1 10 | done 11 | delay: 10 12 | retries: 3 13 | ignore_errors: true 14 | changed_when: false 15 | 16 | - name: "Get Additional Information details" 17 | ansible.builtin.shell: 18 | cmd: | 19 | oc -n {{ namespace }} describe subscription service-telemetry-operator >> {{ logfile_dir }}/post_oc_describe_subscriptions_STO.log 2>&1 20 | ignore_errors: true 21 | 22 | - name: "Get STO info" 23 | ansible.builtin.shell: 24 | cmd: | 25 | oc -n {{ namespace }} describe pod $(oc -n {{ namespace }} get pod -l name=service-telemetry-operator -ojsonpath='{ .items[].metadata.name }') >> {{ logfile_dir }}/describe_sto.log 2>&1 26 | ignore_errors: true 27 | retries: 3 28 | delay: 10 29 | 30 | - name: "Question the deployment" 31 | ansible.builtin.shell: 32 | cmd: | 33 | echo "What images were created in the internal registry?" > {{ logfile_dir }}/post_question_deployment.log 2>&1 34 | oc -n {{ namespace }} get images | grep $(oc -n {{ namespace }} registry info --internal) >> {{ logfile_dir }}/post_question_deployment.log 2>&1 35 | echo "What state is the STO csv in?" >> {{ logfile_dir }}/post_question_deployment.log 2>&1 36 | oc -n {{ namespace }} get csv | grep service-telemetry-operator >> {{ logfile_dir }}/post_question_deployment.log 2>&1 37 | oc -n {{ namespace }} get csv $(oc -n {{ namespace }} get csv | grep "service-telemetry-operator" | awk '{ print $1}') -oyaml >> {{ logfile_dir }}/post_question_deployment.log 2>&1 38 | register: output 39 | ignore_errors: true 40 | retries: 3 41 | delay: 10 42 | 43 | - name: "Describe non-completed, non-running pods" 44 | ansible.builtin.shell: 45 | cmd: | 46 | for pod in $(oc get pods | grep -v NAME | grep -v Running | awk '{ print $1 }'); 47 | do 48 | oc -n {{ namespace }} describe pod $pod > {{ logfile_dir }}/post_oc_describe_pod_${pod}.log 2>&1 49 | done 50 | ignore_errors: true 51 | retries: 3 52 | delay: 10 53 | 54 | - name: "Describe builds" 55 | ansible.builtin.shell: 56 | cmd: | 57 | for build in $(oc -n {{ namespace }} get builds -o json | jq -r '.items[].metadata.name'); 58 | do 59 | oc -n {{ namespace }} describe build $build > {{ logfile_dir }}/post_oc_describe_build_${build}.log 2>&1 60 | done 61 | ignore_errors: true 62 | retries: 3 63 | delay: 10 64 | 65 | - name: "Get PV and PVC information details" 66 | ansible.builtin.shell: 67 | cmd: | 68 | oc -n {{ namespace }} get pv >> {{ logfile_dir }}/post_pv.log 2>&1 69 | oc -n {{ namespace }} get pvc >> {{ logfile_dir }}/post_pvc.log 2>&1 70 | ignore_errors: true 71 | 72 | - name: "Get SGO,STO and QDR logs" 73 | ansible.builtin.shell: 74 | cmd: | 75 | oc -n {{ namespace }} logs $(oc -n {{ namespace }} get pod -l name=service-telemetry-operator -o jsonpath='{.items[].metadata.name}') >> {{ logfile_dir }}/logs_sto.log 2>&1 76 | oc -n {{ namespace }} logs $(oc -n {{ namespace }} get pod -l app=smart-gateway-operator -o jsonpath='{.items[].metadata.name}') >> {{ logfile_dir }}/logs_sgo.log 2>&1 77 | oc -n {{ namespace }} logs $(oc -n {{ namespace }} get pod -l qdr -o jsonpath='{.items[].metadata.name}') >> {{ logfile_dir }}/logs_qdr.log 2>&1 78 | ignore_errors: true 79 | retries: 3 80 | delay: 10 -------------------------------------------------------------------------------- /build/stf-collect-logs/vars/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # vars file for stf-collect-logs 3 | resource_types: 4 | - builds 5 | - subscriptions 6 | - images 7 | - imagestream 8 | - pods -------------------------------------------------------------------------------- /build/stf-run-ci/.ansible-lint: -------------------------------------------------------------------------------- 1 | --- 2 | profile: null 3 | skip_list: 4 | - args 5 | - avoid-implicit 6 | - command-instead-of-module 7 | - command-instead-of-shell 8 | - complexity 9 | - deprecated-bare-vars 10 | - deprecated-local-action 11 | - deprecated-module 12 | - empty-string-compare 13 | - galaxy 14 | - ignore-errors 15 | - inline-env-var 16 | - internal-error 17 | - jinja 18 | - key-order 19 | - latest 20 | - literal-compare 21 | - loop-var-prefix 22 | - meta-incorrect 23 | - meta-no-tags 24 | - meta-runtime 25 | - meta-video-links 26 | - name 27 | - no-changed-when 28 | - no-free-form 29 | - no-handler 30 | - no-jinja-when 31 | - no-log-password 32 | - no-prompting 33 | - no-relative-paths 34 | - no-same-owner 35 | - no-tabs 36 | - only-builtins 37 | - package-latest 38 | - parser-error 39 | - partial-become 40 | - playbook-extension 41 | - risky-file-permissions 42 | - risky-octal 43 | - risky-shell-pipe 44 | - role-name 45 | - run-once 46 | - sanity 47 | - schema 48 | - var-naming 49 | - warning 50 | - yaml 51 | 52 | # vimrc: ft=yaml 53 | -------------------------------------------------------------------------------- /build/stf-run-ci/defaults/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # defaults file for stf-run-ci 3 | list_of_stf_objects: 4 | - service-telemetry-operator 5 | - smart-gateway-operator 6 | - smart-gateway 7 | 8 | __local_build_enabled: true 9 | __deploy_from_bundles_enabled: false 10 | __deploy_from_index_enabled: false 11 | __deploy_stf: true 12 | 13 | __disconnected_deploy: false 14 | 15 | __service_telemetry_events_certificates_endpoint_cert_duration: 70080h 16 | __service_telemetry_events_certificates_ca_cert_duration: 70080h 17 | __service_telemetry_events_enabled: true 18 | __service_telemetry_high_availability_enabled: false 19 | __service_telemetry_metrics_enabled: true 20 | __service_telemetry_storage_ephemeral_enabled: false 21 | __service_telemetry_snmptraps_enabled: true 22 | __service_telemetry_snmptraps_target: "192.168.24.254" 23 | __service_telemetry_snmptraps_community: "public" 24 | __service_telemetry_snmptraps_retries: 5 25 | __service_telemetry_snmptraps_timeout: 1 26 | __service_telemetry_snmptraps_port: 162 27 | __service_telemetry_snmptraps_alert_oid_label: "oid" 28 | __service_telemetry_snmptraps_trap_oid_prefix: "1.3.6.1.4.1.50495.15" 29 | __service_telemetry_snmptraps_trap_default_oid: "1.3.6.1.4.1.50495.15.1.2.1" 30 | __service_telemetry_snmptraps_trap_default_severity: "" 31 | __service_telemetry_observability_strategy: use_redhat 32 | __service_telemetry_transports_certificates_endpoint_cert_duration: 70080h 33 | __service_telemetry_transports_certificates_ca_cert_duration: 70080h 34 | __internal_registry_path: image-registry.openshift-image-registry.svc:5000 35 | __service_telemetry_bundle_image_path: "quay.io/infrawatch-operators/service-telemetry-operator-bundle:nightly-head" 36 | __smart_gateway_bundle_image_path: "quay.io/infrawatch-operators/smart-gateway-operator-bundle:nightly-head" 37 | 38 | default_operator_registry_image_base: registry.redhat.io/openshift4/ose-operator-registry 39 | default_operator_registry_image_tag: v4.13 40 | 41 | elasticsearch_version: 7.17.20 42 | elasticsearch_image: registry.connect.redhat.com/elastic/elasticsearch 43 | 44 | sgo_image_tag: latest 45 | sto_image_tag: latest 46 | sg_core_image_tag: latest 47 | sg_bridge_image_tag: latest 48 | prometheus_webhook_snmp_image_tag: latest 49 | sgo_bundle_image_tag: latest 50 | sto_bundle_image_tag: latest 51 | stf_index_image_tag: latest 52 | operator_sdk: v1.39.2 53 | namespace: service-telemetry 54 | pull_secret_registry: 55 | pull_secret_user: 56 | pull_secret_pass: 57 | 58 | redhat_operators: redhat-operators 59 | community_operators: community-operators 60 | certified_operators: certified-operators 61 | infrawatch_operators: infrawatch-operators 62 | 63 | stf_channel: unstable 64 | 65 | # used when building images to default to correct version branch for STF subcomponents per STF version 66 | version_branches: 67 | sgo: master 68 | sg_core: master 69 | sg_bridge: master 70 | prometheus_webhook_snmp: master 71 | 72 | sgo_repository: https://github.com/infrawatch/smart-gateway-operator 73 | sg_core_repository: https://github.com/infrawatch/sg-core 74 | sg_bridge_repository: https://github.com/infrawatch/sg-bridge 75 | prometheus_webhook_snmp_repository: https://github.com/infrawatch/prometheus-webhook-snmp 76 | 77 | base_dir: '' 78 | 79 | clone_repos: true 80 | setup_bundle_registry_auth: true 81 | setup_bundle_registry_tls_ca: true 82 | -------------------------------------------------------------------------------- /build/stf-run-ci/filter_plugins/parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | class FilterModule(object): 3 | def filters(self): 4 | return { 5 | 'parse_image': self.parse_image, 6 | 'parse_tag': self.parse_tag 7 | } 8 | 9 | def parse_image(self, value): 10 | this_split = value.split(':')[:-1] 11 | return ':'.join(this_split) 12 | 13 | def parse_tag(self, value): 14 | return value.split(':')[-1] 15 | -------------------------------------------------------------------------------- /build/stf-run-ci/handlers/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # handlers file for stf-run-ci -------------------------------------------------------------------------------- /build/stf-run-ci/meta/main.yml: -------------------------------------------------------------------------------- 1 | galaxy_info: 2 | role_name: stf_run_ci # if absent directory name hosting role is used instead 3 | namespace: infrawatch 4 | 5 | author: InfraWatch 6 | description: Helper CI role for Service Telemetry Framework 7 | company: Red Hat 8 | 9 | # If the issue tracker for your role is not on github, uncomment the 10 | # next line and provide a value 11 | # issue_tracker_url: http://example.com/issue/tracker 12 | 13 | # Choose a valid license ID from https://spdx.org - some suggested licenses: 14 | # - BSD-3-Clause (default) 15 | # - MIT 16 | # - GPL-2.0-or-later 17 | # - GPL-3.0-only 18 | # - Apache-2.0 19 | # - CC-BY-4.0 20 | license: Apache-2.0 21 | 22 | min_ansible_version: '2.14' 23 | 24 | galaxy_tags: [] 25 | # List tags for your role here, one per line. A tag is a keyword that describes 26 | # and categorizes the role. Users find roles by searching for tags. Be sure to 27 | # remove the '[]' above, if you add tags to this list. 28 | # 29 | # NOTE: A tag is limited to a single word comprised of alphanumeric characters. 30 | # Maximum 20 tags per role. 31 | 32 | dependencies: [] 33 | # List your role dependencies here, one per line. Be sure to remove the '[]' above, 34 | # if you add dependencies to this list. 35 | -------------------------------------------------------------------------------- /build/stf-run-ci/requirements.txt: -------------------------------------------------------------------------------- 1 | # https://stackoverflow.com/questions/64073422/importerror-cannot-import-name-oauth1session-from-requests-oauthlib 2 | requests==2.32.0 3 | requests_oauthlib==1.3.0 4 | # https://github.com/domainaware/parsedmarc/issues/318 5 | oauthlib==3.2.2 6 | kubernetes==24.2.0 7 | openshift==0.13.1 8 | ansible-core==2.17.8 9 | -------------------------------------------------------------------------------- /build/stf-run-ci/tasks/create_builds.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Get current BuildConfig for artifact to check if it exists 3 | kubernetes.core.k8s_info: 4 | api_version: build.openshift.io/v1 5 | kind: BuildConfig 6 | namespace: "{{ namespace }}" 7 | name: "{{ artifact.name }}" 8 | register: build_config_lookup 9 | 10 | - name: Get current Builds for artifact to check if it exists 11 | kubernetes.core.k8s_info: 12 | api_version: build.openshift.io/v1 13 | kind: Build 14 | namespace: "{{ namespace }}" 15 | label_selectors: 16 | - "build={{ artifact.name }}" 17 | register: build_lookup 18 | 19 | - when: build_config_lookup.resources | length == 0 20 | block: 21 | - name: Create BuildConfig and ImageStream 22 | ansible.builtin.shell: oc new-build -n "{{ namespace }}" --name {{ artifact.name }} --dockerfile - < {{ artifact.working_build_dir }}/{{ artifact.dockerfile_path }} 23 | 24 | - name: Kill first build since it will always fail (triggered on BuildConfig creation) 25 | ansible.builtin.shell: sleep 10 ; oc delete build {{ artifact.name }}-1 -n "{{ namespace }}" 26 | retries: 3 27 | delay: 10 28 | register: kill_build 29 | until: kill_build.rc == 0 30 | 31 | - block: 32 | - name: Start local image build 33 | ansible.builtin.command: oc start-build {{ artifact.name }} -n "{{ namespace }}" --follow --wait --from-dir "{{ artifact.working_build_dir }}" 34 | register: build_results 35 | when: build_lookup.resources | length == 0 36 | retries: 3 37 | delay: 10 38 | until: build_results.rc == 0 39 | always: 40 | - name: "Show build results" 41 | ansible.builtin.debug: 42 | var: build_results 43 | 44 | - name: Get latest build information for artifact 45 | ansible.builtin.command: oc get build --selector build={{ artifact.name }} -n "{{ namespace }}" -ojsonpath='{.items[-1:]}' 46 | register: build_describe_results 47 | 48 | - name: Set build_describe from json results 49 | ansible.builtin.set_fact: 50 | build_describe: "{{ build_describe_results.stdout | from_json }}" 51 | 52 | - name: Get the build results 53 | ansible.builtin.debug: 54 | var: build_describe 55 | 56 | - name: Show the outputDockerImageReference, which will be used for the image reference name 57 | ansible.builtin.debug: 58 | var: build_describe.status.outputDockerImageReference 59 | 60 | - name: Set unique image reference for this artifact 61 | ansible.builtin.set_fact: 62 | "{{ artifact.image_reference_name }}": "{{ build_describe.status.outputDockerImageReference }}" 63 | 64 | - name: Show the image reference name for the build 65 | ansible.builtin.debug: 66 | var: "{{ artifact.image_reference_name }}" 67 | -------------------------------------------------------------------------------- /build/stf-run-ci/tasks/preflight_checks.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Try for 10 minutes to get an output 3 | - block: 4 | - name: "Wait for up to 10 minutes for Service Telemetry Operator to be Succeeded" 5 | ansible.builtin.shell: | 6 | oc get csv -n "{{ namespace }}" | grep service-telemetry-operator | grep Succeeded 7 | register: output 8 | retries: 60 9 | delay: 10 10 | until: output.stdout | length != 0 11 | ignore_errors: true 12 | rescue: 13 | - name: "Show CSV statuses" 14 | ansible.builtin.command: 15 | cmd: | 16 | oc get csv -n "{{ namespace }}" 17 | 18 | - name: "Get service-telemetry-operator CSV information" 19 | ansible.builtin.command: 20 | cmd: | 21 | oc describe csv $(oc get csv | grep "service-telemetry-operator" | awk '{print $1}') > {{ logfile_dir }}/oc_get_csv_sto.log 2>&1 22 | cat {{ logfile_dir }}/oc_get_csv_sto.log 23 | register: csv_sto 24 | 25 | - name: "Show service-telemetry-operator CSV information" 26 | ansible.builtin.debug: 27 | var: csv_sto.stdout 28 | 29 | - name: "Show fail message if CSV isn't Succeeded after the alotted time" 30 | ansible.builtin.fail: 31 | msg: "Service Telemetry Operator CSV not Succeeded after 10 minutes. Check {{ logfile_dir }}/oc_get_csv_sto.log for more information" 32 | when: output.rc != 0 33 | -------------------------------------------------------------------------------- /build/stf-run-ci/tasks/setup_elasticsearch.yml: -------------------------------------------------------------------------------- 1 | - name: Set default ElasticSearch manifest 2 | ansible.builtin.set_fact: 3 | elasticsearch_manifest: "{{ lookup('template', './manifest_elasticsearch.j2') | from_yaml }}" 4 | when: elasticsearch_manifest is not defined 5 | 6 | - name: Create an instance of Elasticsearch 7 | kubernetes.core.k8s: 8 | state: present 9 | definition: 10 | '{{ elasticsearch_manifest }}' 11 | 12 | - name: Look up the newly generated ES Certs 13 | kubernetes.core.k8s_info: 14 | api_version: v1 15 | kind: Secret 16 | name: elasticsearch-es-http-certs-public 17 | namespace: '{{ namespace }}' 18 | register: elasticsearch_certs 19 | until: elasticsearch_certs.resources[0].data["ca.crt"] is defined 20 | retries: 5 21 | delay: 30 22 | 23 | - name: Copy the ES CA cert to our TLS secret 24 | kubernetes.core.k8s: 25 | definition: 26 | apiVersion: v1 27 | kind: Secret 28 | metadata: 29 | name: elasticsearch-es-cert 30 | namespace: '{{ namespace }}' 31 | data: 32 | ca.crt: '{{ elasticsearch_certs.resources[0].data["ca.crt"] }}' -------------------------------------------------------------------------------- /build/stf-run-ci/tasks/setup_stf.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Set default InfraWatch OperatorSource manifest 3 | when: 4 | - __deploy_from_index_enabled | bool 5 | - infrawatch_catalog_source_manifest is not defined 6 | ansible.builtin.set_fact: 7 | infrawatch_catalog_source_manifest: | 8 | apiVersion: operators.coreos.com/v1alpha1 9 | kind: CatalogSource 10 | metadata: 11 | name: infrawatch-operators 12 | namespace: openshift-marketplace 13 | spec: 14 | displayName: InfraWatch Operators 15 | image: quay.io/infrawatch-operators/infrawatch-catalog:nightly 16 | publisher: InfraWatch 17 | sourceType: grpc 18 | updateStrategy: 19 | registryPoll: 20 | interval: 30m 21 | 22 | - name: Set default Smart Gateway Operator Subscription manifest 23 | when: smart_gateway_operator_subscription_manifest is not defined 24 | ansible.builtin.set_fact: 25 | smart_gateway_operator_subscription_manifest: | 26 | apiVersion: operators.coreos.com/v1alpha1 27 | kind: Subscription 28 | metadata: 29 | name: smart-gateway-operator 30 | namespace: "{{ namespace }}" 31 | spec: 32 | channel: "{{ stf_channel }}" 33 | installPlanApproval: Automatic 34 | name: smart-gateway-operator 35 | source: "{{ infrawatch_operators }}" 36 | sourceNamespace: openshift-marketplace 37 | 38 | - name: Set default Service Telemetry Operator Subscription manifest 39 | when: service_telemetry_operator_subscription_manifest is not defined 40 | ansible.builtin.set_fact: 41 | service_telemetry_operator_subscription_manifest: | 42 | apiVersion: operators.coreos.com/v1alpha1 43 | kind: Subscription 44 | metadata: 45 | name: service-telemetry-operator 46 | namespace: "{{ namespace }}" 47 | spec: 48 | channel: "{{ stf_channel }}" 49 | installPlanApproval: Automatic 50 | name: service-telemetry-operator 51 | source: "{{ infrawatch_operators }}" 52 | sourceNamespace: openshift-marketplace 53 | 54 | # enable catalogsource 55 | - name: Enable InfraWatch Catalog Source 56 | when: __deploy_from_index_enabled | bool 57 | kubernetes.core.k8s: 58 | definition: 59 | '{{ infrawatch_catalog_source_manifest }}' 60 | 61 | # subscribe to the Operators from the defined CatalogSource sources. 62 | # STO will automatically install SGO via dependencies but pre-subscribe in case deployment from different CatalogSources is specified in an override (for testing purposes). 63 | - name: Subscribe to Smart Gateway Operator 64 | kubernetes.core.k8s: 65 | definition: 66 | '{{ smart_gateway_operator_subscription_manifest }}' 67 | 68 | - name: Subscribe to Service Telemetry Operator 69 | kubernetes.core.k8s: 70 | definition: 71 | '{{ service_telemetry_operator_subscription_manifest }}' 72 | -------------------------------------------------------------------------------- /build/stf-run-ci/tasks/setup_stf_from_bundles.yml: -------------------------------------------------------------------------------- 1 | # When the task is skipped, pull_secret is still defined. It is set to the task output i.e. 2 | # "pull_secret": { 3 | # "changed": false, 4 | # "skip_reason": "Conditional result was False", 5 | # "skipped": true 6 | # } 7 | - name: "Set pull_secret to a zero-length string, if setup_bundle_registry_auth is false" 8 | when: not (setup_bundle_registry_auth | bool) 9 | ansible.builtin.set_fact: 10 | pull_secret: '' 11 | 12 | - name: Deploy SGO via OLM bundle 13 | ansible.builtin.shell: 14 | cmd: "{{ base_dir }}/working/operator-sdk-{{ operator_sdk }} --verbose run bundle {{ __smart_gateway_bundle_image_path }} {% if pull_secret | length > 0 %} --pull-secret-name=pull-secret --ca-secret-name=registry-tls-ca {% endif %} --namespace={{ namespace }} --timeout 600s" 15 | 16 | - name: Deploy STO via OLM bundle 17 | ansible.builtin.shell: 18 | cmd: "{{ base_dir }}/working/operator-sdk-{{ operator_sdk }} --verbose run bundle {{ __service_telemetry_bundle_image_path }} {% if pull_secret | length > 0 %} --pull-secret-name=pull-secret --ca-secret-name=registry-tls-ca {% endif %} --namespace={{ namespace }} --timeout 600s" 19 | -------------------------------------------------------------------------------- /build/stf-run-ci/templates/config-json.j2: -------------------------------------------------------------------------------- 1 | {"auths":{"image-registry.openshift-image-registry.svc:5000":<< internal_registry >>}} 2 | -------------------------------------------------------------------------------- /build/stf-run-ci/templates/index-yaml.j2: -------------------------------------------------------------------------------- 1 | --- 2 | defaultChannel: {{ sto_bundle_info.bundle_default_channel }} 3 | name: service-telemetry-operator 4 | schema: olm.package 5 | --- 6 | schema: olm.channel 7 | package: service-telemetry-operator 8 | name: {{ sto_bundle_info.bundle_channels }} 9 | entries: 10 | - name: {{ sto_bundle_info.package_name }} 11 | --- 12 | defaultChannel: {{ sgo_bundle_info.bundle_default_channel }} 13 | name: smart-gateway-operator 14 | schema: olm.package 15 | --- 16 | schema: olm.channel 17 | package: smart-gateway-operator 18 | name: {{ sgo_bundle_info.bundle_channels }} 19 | entries: 20 | - name: {{ sgo_bundle_info.package_name }} 21 | -------------------------------------------------------------------------------- /build/stf-run-ci/templates/manifest_elasticsearch.j2: -------------------------------------------------------------------------------- 1 | apiVersion: elasticsearch.k8s.elastic.co/v1 2 | kind: Elasticsearch 3 | metadata: 4 | name: elasticsearch 5 | namespace: {{ namespace }} 6 | spec: 7 | auth: {} 8 | http: 9 | service: 10 | metadata: {} 11 | spec: {} 12 | tls: 13 | certificate: {} 14 | monitoring: 15 | logs: {} 16 | metrics: {} 17 | nodeSets: 18 | - count: 1 19 | name: default 20 | config: 21 | node.roles: 22 | - master 23 | - data 24 | - ingest 25 | node.store.allow_mmap: true 26 | podTemplate: 27 | metadata: 28 | labels: 29 | tuned.openshift.io/elasticsearch: elasticsearch 30 | spec: 31 | containers: 32 | - name: elasticsearch 33 | resources: 34 | limits: 35 | cpu: "2" 36 | memory: 2Gi 37 | requests: 38 | cpu: "1" 39 | memory: 2Gi 40 | volumes: 41 | - emptyDir: {} 42 | name: elasticsearch-data 43 | transport: 44 | service: 45 | metadata: {} 46 | spec: {} 47 | tls: 48 | certificate: {} 49 | certificateAuthorities: {} 50 | updateStrategy: 51 | changeBudget: {} 52 | version: {{ elasticsearch_version }} 53 | image: {{ elasticsearch_image }}:{{ elasticsearch_version }} 54 | -------------------------------------------------------------------------------- /build/stf-run-ci/tests/inventory: -------------------------------------------------------------------------------- 1 | localhost 2 | 3 | -------------------------------------------------------------------------------- /build/stf-run-ci/tests/test.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - hosts: localhost 3 | remote_user: root 4 | roles: 5 | - stf-run-ci -------------------------------------------------------------------------------- /build/stf-run-ci/vars/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # vars file for stf-run-ci -------------------------------------------------------------------------------- /build/test-framework/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASEIMAGE 2 | FROM ${BASEIMAGE} 3 | USER 0 4 | 5 | # Ensure fresh metadata rather than cached metadata in the base by running 6 | # yum clean all && rm -rf /var/yum/cache/* first 7 | RUN yum clean all && rm -rf /var/cache/yum/* \ 8 | && yum install -y python-devel gcc libffi-devel 9 | RUN pip install molecule==2.20.1 10 | 11 | ARG NAMESPACEDMAN 12 | ADD $NAMESPACEDMAN /namespaced.yaml 13 | ADD build/test-framework/ansible-test.sh /ansible-test.sh 14 | RUN chmod +x /ansible-test.sh 15 | USER 1001 16 | ADD . /opt/ansible/project 17 | -------------------------------------------------------------------------------- /build/test-framework/ansible-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | export WATCH_NAMESPACE=${TEST_NAMESPACE} 3 | (/usr/local/bin/entrypoint)& 4 | trap "kill $!" SIGINT SIGTERM EXIT 5 | 6 | cd ${HOME}/project 7 | exec molecule test -s test-cluster 8 | -------------------------------------------------------------------------------- /build/update_csv.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Run this script from the root directory to update the CSV whenever changes 4 | # are made to /deploy/crds/. Changes are written to 5 | # /deploy/olm-manifests/service-telemetry-operator/. 6 | operator-sdk generate bundle --channels unstable --default-channel unstable 7 | -------------------------------------------------------------------------------- /build/validate_deployment.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | if [ -n "${OCP_PROJECT+x}" ]; then 5 | oc project "$OCP_PROJECT" 6 | fi 7 | 8 | VALIDATION_SCOPE="${VALIDATION_SCOPE:-use_community}" 9 | 10 | # Play the (automated!) waiting game 11 | echo -e "\n* [info] Waiting for QDR deployment to complete\n" 12 | until timeout 300 oc rollout status deployment.apps/default-interconnect; do sleep 3; done 13 | 14 | case "${VALIDATION_SCOPE}" in 15 | "use_community" | "use_hybrid") 16 | echo -e "\n* [info] Waiting for prometheus deployment to complete\n" 17 | until timeout 300 oc rollout status statefulset.apps/prometheus-default; do sleep 3; done 18 | echo -e "\n* [info] Waiting for elasticsearch deployment to complete \n" 19 | while true; do 20 | sleep 3 21 | ES_READY=$(oc get statefulsets elasticsearch-es-default -ogo-template='{{ .status.readyReplicas }}') || continue 22 | if [ "${ES_READY}" == "1" ]; then 23 | break 24 | fi 25 | done 26 | echo -e "\n* [info] Waiting for alertmanager deployment to complete\n" 27 | until timeout 300 oc rollout status statefulset.apps/alertmanager-default; do sleep 3; done 28 | echo -e "\n* [info] Waiting for smart-gateway deployment to complete\n" 29 | until timeout 300 oc rollout status deployment.apps/default-cloud1-coll-meter-smartgateway; do sleep 3; done 30 | until timeout 300 oc rollout status deployment.apps/default-cloud1-coll-event-smartgateway; do sleep 3; done 31 | until timeout 300 oc rollout status deployment.apps/default-cloud1-ceil-event-smartgateway; do sleep 3; done 32 | until timeout 300 oc rollout status deployment.apps/default-cloud1-ceil-meter-smartgateway; do sleep 3; done 33 | until timeout 300 oc rollout status deployment.apps/default-cloud1-sens-meter-smartgateway; do sleep 3; done 34 | ;; 35 | 36 | "use_redhat") 37 | echo -e "\n* [info] Waiting for prometheus deployment to complete\n" 38 | until timeout 300 oc rollout status statefulset.apps/prometheus-default; do sleep 3; done 39 | echo -e "\n* [info] Waiting for alertmanager deployment to complete\n" 40 | until timeout 300 oc rollout status statefulset.apps/alertmanager-default; do sleep 3; done 41 | echo -e "\n* [info] Waiting for smart-gateway deployment to complete\n" 42 | until timeout 300 oc rollout status deployment.apps/default-cloud1-coll-meter-smartgateway; do sleep 3; done 43 | until timeout 300 oc rollout status deployment.apps/default-cloud1-ceil-meter-smartgateway; do sleep 3; done 44 | until timeout 300 oc rollout status deployment.apps/default-cloud1-sens-meter-smartgateway; do sleep 3; done 45 | ;; 46 | 47 | "none") 48 | echo -e "\n* [info] Waiting for smart-gateway deployment to complete\n" 49 | until timeout 300 oc rollout status deployment.apps/default-cloud1-coll-meter-smartgateway; do sleep 3; done 50 | until timeout 300 oc rollout status deployment.apps/default-cloud1-ceil-meter-smartgateway; do sleep 3; done 51 | until timeout 300 oc rollout status deployment.apps/default-cloud1-sens-meter-smartgateway; do sleep 3; done 52 | ;; 53 | esac 54 | 55 | echo -e "\n* [info] Waiting for all pods to show Ready/Complete\n" 56 | while oc get pods --selector '!openshift.io/build.name' | tail -n +2 | grep -v -E 'Running|Completed'; do 57 | sleep 3 58 | done 59 | 60 | echo -e "\n* [info] CI Build complete. You can now run tests.\n" 61 | -------------------------------------------------------------------------------- /ci/common-tasks.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: "Set the sto_dir if it isn't already set" 3 | ansible.builtin.set_fact: 4 | sto_dir: '{{ ansible_env.HOME }}/{{ zuul.projects["github.com/infrawatch/service-telemetry-operator"].src_dir }}' 5 | when: sto_dir | default('') | length == 0 6 | 7 | - name: "Get vars common to all jobs" 8 | ansible.builtin.include_vars: 9 | file: "vars-zuul-common.yml" 10 | 11 | - name: "Get scenario-specific vars" 12 | ansible.builtin.include_vars: 13 | file: "vars-{{ scenario }}.yml" 14 | -------------------------------------------------------------------------------- /ci/deploy_stf.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: "Deploy STF" 3 | hosts: controller 4 | tasks: 5 | - name: "Setup play vars" 6 | ansible.builtin.include_tasks: "common-tasks.yml" 7 | 8 | - name: "Log into the cluster" 9 | ansible.builtin.import_role: 10 | name: rhol_crc 11 | tasks_from: add_crc_creds.yml 12 | 13 | - name: "Deploy STF using stf-run-ci" 14 | ansible.builtin.import_role: 15 | name: '../build/stf-run-ci' 16 | -------------------------------------------------------------------------------- /ci/post-collect_logs.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Based on https://raw.githubusercontent.com/openstack-k8s-operators/nova-operator/bc10c4f579f8538899ac7bc5f87bfdb62d7042a4/ci/nova-operator-base/playbooks/collect-logs.yaml 3 | - hosts: all 4 | name: Create zuul-output log dir 5 | gather_facts: false 6 | tasks: 7 | - name: Create log dir 8 | ansible.builtin.file: 9 | path: "{{ ansible_user_dir }}/zuul-output/logs" 10 | state: directory 11 | mode: "0755" 12 | 13 | - hosts: controller 14 | name: Collect logs on the controller 15 | gather_facts: false 16 | tasks: 17 | - name: "Setup play vars" 18 | ansible.builtin.include_tasks: "common-tasks.yml" 19 | 20 | - name: "Create log dir" 21 | ansible.builtin.file: 22 | path: "{{ logfile_dir }}" 23 | state: directory 24 | mode: "0755" 25 | 26 | - name: "Log into the cluster" 27 | ansible.builtin.import_role: 28 | name: rhol_crc 29 | tasks_from: add_crc_creds.yml 30 | 31 | - name: "Gather logs from stf deployment" 32 | ansible.builtin.import_role: 33 | name: '../build/stf-collect-logs' 34 | 35 | - name: "Copy generated logs" 36 | ansible.builtin.shell: | 37 | cp {{ ansible_env.HOME }}/*.log . 38 | args: 39 | chdir: "{{ logfile_dir }}" 40 | changed_when: true 41 | ignore_errors: true 42 | 43 | - hosts: all 44 | name: Copy files from controller on node 45 | gather_facts: false 46 | tasks: 47 | - name: Copy files from controller on node 48 | ansible.builtin.include_role: 49 | name: fetch-output 50 | -------------------------------------------------------------------------------- /ci/pre-2node.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: "Do pre-work to get kubeconfig" 3 | hosts: controller 4 | vars: 5 | ci_framework_dir: "{{ ansible_user_dir }}/{{ zuul.projects['github.com/openstack-k8s-operators/ci-framework'].src_dir }}" 6 | environment: 7 | PATH: "~/.crc/bin:~/.crc/bin/oc:~/bin:{{ ansible_env.PATH }}" 8 | tasks: 9 | - name: "Set the sto_dir if it isn't already set" 10 | ansible.builtin.set_fact: 11 | sto_dir: '{{ ansible_env.HOME }}/{{ zuul.projects["github.com/infrawatch/service-telemetry-operator"].src_dir }}' 12 | when: sto_dir | default('') | length == 0 13 | 14 | - name: "Run bootstrap playbook" 15 | ansible.builtin.shell: 16 | cmd: | 17 | ansible-playbook -e@{{ ansible_user_dir }}/ci-framework-data/artifacts/parameters/zuul-params.yml {{ ci_framework_dir }}/playbooks/01-bootstrap.yml 18 | chdir: "{{ ci_framework_dir }}" 19 | 20 | - name: Run ci_framework infra playbook 21 | ansible.builtin.shell: 22 | cmd: | 23 | ansible-playbook -e cifmw_use_opn=false -e cifmw_use_devscripts=false -e cifmw_basedir={{ ansible_user_dir }}/ci-framework-data/ -e cifmw_openshift_setup_skip_internal_registry_tls_verify=true playbooks/02-infra.yml 24 | chdir: "{{ ci_framework_dir }}" 25 | 26 | - name: Run make targets for setup 27 | community.general.make: 28 | chdir: '{{ ansible_env.HOME }}/{{ zuul.projects["github.com/openstack-k8s-operators/ci-framework"].src_dir }}' 29 | target: "{{ item }}" 30 | with_items: 31 | - setup_tests 32 | - setup_molecule 33 | 34 | -------------------------------------------------------------------------------- /ci/prepare.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: "Prepare the environment for running stf" 3 | hosts: controller 4 | tasks: 5 | - name: "Setup play vars" 6 | ansible.builtin.include_tasks: "common-tasks.yml" 7 | 8 | - name: "Install Python 3.12" 9 | ansible.builtin.package: 10 | name: 11 | - python3.12 12 | - python3.12-pip 13 | state: latest 14 | become: true 15 | 16 | - name: "Update pip" 17 | ansible.builtin.pip: 18 | name: pip 19 | state: latest 20 | extra_args: "-U" 21 | executable: pip3.12 22 | 23 | - name: "Install pre-reqs from pip" 24 | ansible.builtin.pip: 25 | requirements: "{{ sto_dir }}/build/stf-run-ci/requirements.txt" 26 | chdir: "{{ sto_dir }}" 27 | state: present 28 | executable: pip3.12 29 | 30 | - name: "Install ansible collections" 31 | community.general.ansible_galaxy_install: 32 | type: collection 33 | name: "{{ item }}" 34 | with_items: 35 | - "kubernetes.core:5.0.0" 36 | - "community.general" 37 | 38 | - name: "Log into the cluster" 39 | ansible.builtin.import_role: 40 | name: rhol_crc 41 | tasks_from: add_crc_creds.yml 42 | 43 | - name: "Create the service-telemetry project" 44 | kubernetes.core.k8s: 45 | api_version: v1 46 | kind: Namespace 47 | name: "{{ namespace }}" 48 | state: present 49 | retries: 3 50 | delay: 30 51 | -------------------------------------------------------------------------------- /ci/test_stf.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: "Run tests to verify that STF runs as expected" 3 | hosts: controller 4 | tasks: 5 | - name: "Setup play vars" 6 | ansible.builtin.include_tasks: "common-tasks.yml" 7 | 8 | - name: "Log into the cluster" 9 | ansible.builtin.import_role: 10 | name: rhol_crc 11 | tasks_from: add_crc_creds.yml 12 | 13 | - name: "Run STF smoketests" 14 | ansible.builtin.shell: 15 | cmd: | 16 | OCP_PROJECT={{ namespace }} CLEANUP=false ./tests/smoketest/smoketest.sh > {{ logfile_dir }}/smoketest.log 2>&1 17 | chdir: "{{ sto_dir }}" 18 | changed_when: false 19 | -------------------------------------------------------------------------------- /ci/vars-local_build-index_deploy.yml: -------------------------------------------------------------------------------- 1 | --- 2 | #ansible-playbook --extra-vars __local_build_enabled=true -e __deploy_from_index_enabled=true --extra-vars working_branch="$(git rev-parse --abbrev-ref HEAD)" --extra-vars __service_telemetry_observability_strategy=use_redhat ./run-ci.yaml 3 | __local_build_enabled: true 4 | __deploy_from_index_enabled: true 5 | __service_telemetry_observability_strategy: use_redhat 6 | -------------------------------------------------------------------------------- /ci/vars-local_build.yml: -------------------------------------------------------------------------------- 1 | --- 2 | __deploy_stf: true 3 | __local_build_enabled: true 4 | __service_telemetry_snmptraps_enabled: true 5 | -------------------------------------------------------------------------------- /ci/vars-nightly_bundles-index_deploy.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # ansible-playbook -e __local_build_enabled=false -e __deploy_from_index_enabled=true -e __deploy_from_bundles_enabled=true -e __service_telemetry_bundle_image_path=quay.io/infrawatch-operators/service-telemetry-operator-bundle:nightly-head -e __smart_gateway_bundle_image_path=quay.io/infrawatch-operators/smart-gateway-operator-bundle:nightly-head --skip-tags bundle_registry_tls_ca --skip-tags bundle_registry_auth build/run-ci.yaml 3 | __local_build_enabled: false 4 | __deploy_from_bundles_enabled: true 5 | __deploy_from_index_enabled: true 6 | -------------------------------------------------------------------------------- /ci/vars-nightly_bundles.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # from: https://github.com/infrawatch/service-telemetry-operator/pull/437 3 | # ansible-playbook -e __service_telemetry_storage_ephemeral_enabled=true -e __local_build_enabled=false -e __deploy_from_bundles_enabled=true -e __service_telemetry_bundle_image_path=quay.io/infrawatch-operators/service-telemetry-operator-bundle:nightly-head -e __smart_gateway_bundle_image_path=quay.io/infrawatch-operators/smart-gateway-operator-bundle:nightly-head --skip-tags bundle_registry_tls_ca --skip-tags bundle_registry_auth build/run-ci.yaml 4 | 5 | __local_build_enabled: false 6 | __deploy_from_bundles_enabled: true 7 | -------------------------------------------------------------------------------- /ci/vars-zuul-common.yml: -------------------------------------------------------------------------------- 1 | --- 2 | namespace: "service-telemetry" 3 | setup_bundle_registry_tls_ca: false 4 | setup_bundle_registry_auth: false 5 | base_dir: "{{ sto_dir }}/build" 6 | logfile_dir: "{{ ansible_user_dir }}/zuul-output/logs/controller" 7 | clone_repos: false 8 | sgo_dir: "{{ ansible_env.HOME }}/{{ zuul.projects['github.com/infrawatch/smart-gateway-operator'].src_dir }}" 9 | sg_core_dir: "{{ ansible_env.HOME }}/{{ zuul.projects['github.com/infrawatch/sg-core'].src_dir }}" 10 | sg_bridge_dir: "{{ ansible_env.HOME }}/{{ zuul.projects['github.com/infrawatch/sg-bridge'].src_dir }}" 11 | prometheus_webhook_snmp_dir: "{{ ansible_env.HOME }}/{{ zuul.projects['github.com/infrawatch/prometheus-webhook-snmp'].src_dir }}" 12 | __service_telemetry_storage_persistent_storage_class: "crc-csi-hostpath-provisioner" -------------------------------------------------------------------------------- /deploy/alerts/README.md: -------------------------------------------------------------------------------- 1 | # Predefined alerts 2 | 3 | ## alerts.yaml 4 | These alarms are triggered by a substantial change (over a certain 5 | amount of standard deviation). They provide a default and may 6 | require adjustment for specific deployments. 7 | 8 | ## Installation 9 | 10 | Log into OpenShift and 11 | 12 | ``` 13 | oc apply -f alerts.yaml 14 | ``` 15 | -------------------------------------------------------------------------------- /deploy/containerlogs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PODS=$(oc get pods -o jsonpath="{.items[*].metadata.name}") 4 | podArr=($PODS) 5 | 6 | echo "[INFO]******* Last 15 lines of POD:CONTAINER logs" 7 | for pod in "${podArr[@]}" 8 | do 9 | containers=$(oc get pod "$pod" -ojsonpath="{.spec.containers[*].name}") 10 | containerArr=($containers) 11 | 12 | for container in "${containerArr[@]}" 13 | do 14 | echo 15 | echo 16 | echo 17 | echo "[Container Logs]********$pod:$container" 18 | oc logs "$pod" -c $container | tail -n15 19 | 20 | done 21 | 22 | echo 23 | echo 24 | done 25 | exit 0 26 | ~ 27 | -------------------------------------------------------------------------------- /deploy/crds/infra.watch_v1beta1_servicetelemetry_cr.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: infra.watch/v1beta1 2 | kind: ServiceTelemetry 3 | metadata: 4 | name: default 5 | spec: 6 | observabilityStrategy: use_redhat 7 | alerting: 8 | enabled: true 9 | alertmanager: 10 | receivers: 11 | snmpTraps: 12 | enabled: false 13 | community: public 14 | target: 192.168.24.254 15 | retries: 5 16 | port: 162 17 | timeout: 1 18 | alertOidLabel: oid 19 | trapOidPrefix: "1.3.6.1.4.1.50495.15" 20 | trapDefaultOid: "1.3.6.1.4.1.50495.15.1.2.1" 21 | trapDefaultSeverity: "" 22 | storage: 23 | strategy: persistent 24 | persistent: 25 | pvcStorageRequest: 20G 26 | backends: 27 | metrics: 28 | prometheus: 29 | enabled: true 30 | scrapeInterval: 30s 31 | storage: 32 | strategy: persistent 33 | retention: 24h 34 | persistent: 35 | pvcStorageRequest: 20G 36 | events: 37 | elasticsearch: 38 | enabled: false 39 | forwarding: 40 | hostUrl: https://elasticsearch-es-http:9200 41 | tlsServerName: "" 42 | tlsSecretName: elasticsearch-es-cert 43 | userSecretName: elasticsearch-es-elastic-user 44 | useBasicAuth: true 45 | useTls: true 46 | version: 7.16.1 47 | storage: 48 | strategy: persistent 49 | persistent: 50 | pvcStorageRequest: 20Gi 51 | certificates: 52 | endpointCertDuration: 70080h 53 | caCertDuration: 70080h 54 | clouds: 55 | - name: cloud1 56 | metrics: 57 | collectors: 58 | - collectorType: collectd 59 | subscriptionAddress: collectd/cloud1-telemetry 60 | debugEnabled: false 61 | bridge: 62 | ringBufferSize: 16384 63 | ringBufferCount: 15000 64 | verbose: false 65 | - collectorType: ceilometer 66 | subscriptionAddress: anycast/ceilometer/cloud1-metering.sample 67 | debugEnabled: false 68 | bridge: 69 | ringBufferSize: 16384 70 | ringBufferCount: 15000 71 | verbose: false 72 | - collectorType: sensubility 73 | subscriptionAddress: sensubility/cloud1-telemetry 74 | debugEnabled: false 75 | bridge: 76 | ringBufferSize: 65535 77 | ringBufferCount: 15000 78 | verbose: false 79 | graphing: 80 | enabled: false 81 | grafana: 82 | ingressEnabled: true 83 | disableSignoutMenu: false 84 | baseImage: registry.redhat.io/rhel8/grafana:9 85 | dashboards: 86 | enabled: true 87 | transports: 88 | qdr: 89 | enabled: true 90 | auth: basic 91 | web: 92 | enabled: false 93 | certificates: 94 | endpointCertDuration: 70080h 95 | caCertDuration: 70080h 96 | highAvailability: 97 | enabled: false 98 | # vim: set ft=yaml shiftwidth=2 tabstop=2 expandtab: 99 | -------------------------------------------------------------------------------- /deploy/olm-catalog/service-telemetry-operator/Dockerfile.in: -------------------------------------------------------------------------------- 1 | FROM scratch 2 | 3 | COPY ./manifests /manifests/ 4 | COPY ./metadata /metadata/ 5 | 6 | LABEL operators.operatorframework.io.bundle.mediatype.v1=registry+v1 7 | LABEL operators.operatorframework.io.bundle.manifests.v1=manifests/ 8 | LABEL operators.operatorframework.io.bundle.metadata.v1=metadata/ 9 | LABEL operators.operatorframework.io.bundle.package.v1=service-telemetry-operator 10 | LABEL operators.operatorframework.io.bundle.channels.v1=<> 11 | LABEL operators.operatorframework.io.bundle.channel.default.v1=<> 12 | LABEL operators.operatorframework.io.metrics.mediatype.v1=metrics+v1 13 | LABEL operators.operatorframework.io.metrics.builder=operator-sdk-v0.19.4 14 | LABEL operators.operatorframework.io.metrics.project_layout=ansible 15 | LABEL com.redhat.delivery.operator.bundle=true 16 | LABEL com.redhat.openshift.versions="v4.16-v4.18" 17 | LABEL com.redhat.delivery.backport=false 18 | 19 | LABEL com.redhat.component="service-telemetry-operator-bundle-container" \ 20 | name="stf/service-telemetry-operator-bundle" \ 21 | version="<>" \ 22 | summary="service-telemetry-operator-bundle" \ 23 | License="ASL 2.0" \ 24 | io.openshift.expose-services="" \ 25 | io.openshift.tags="monitoring,telemetry,faulting,serviceassurance" \ 26 | io.k8s.display-name="service-telemetry-operator-bundle" \ 27 | maintainer="['leif+service-telemetry-operator@redhat.com']" \ 28 | description="service-telemetry-operator-bundle" 29 | -------------------------------------------------------------------------------- /deploy/olm-catalog/service-telemetry-operator/metadata/annotations.yaml: -------------------------------------------------------------------------------- 1 | annotations: 2 | operators.operatorframework.io.bundle.channel.default.v1: unstable 3 | operators.operatorframework.io.bundle.channels.v1: unstable 4 | operators.operatorframework.io.bundle.manifests.v1: manifests/ 5 | operators.operatorframework.io.bundle.mediatype.v1: registry+v1 6 | operators.operatorframework.io.bundle.metadata.v1: metadata/ 7 | operators.operatorframework.io.bundle.package.v1: service-telemetry-operator 8 | operators.operatorframework.io.metrics.builder: operator-sdk-v0.19.4 9 | operators.operatorframework.io.metrics.mediatype.v1: metrics+v1 10 | operators.operatorframework.io.metrics.project_layout: ansible 11 | -------------------------------------------------------------------------------- /deploy/olm-catalog/service-telemetry-operator/metadata/properties.yaml: -------------------------------------------------------------------------------- 1 | properties: 2 | - type: olm.maxOpenShiftVersion 3 | value: "4.16" 4 | - type: olm.constraint 5 | value: 6 | failureMessage: Require Smart Gateway for Service Telemetry Framework 7 | all: 8 | constraints: 9 | - failureMessage: Package smart-gateway-operator is needed for Service Telemetry Framework 10 | package: 11 | packageName: smart-gateway-operator 12 | versionRange: '>=5.0.0' 13 | - type: olm.constraint 14 | value: 15 | failureMessage: Require data transport for Service Telemetry Framework 16 | all: 17 | constraints: 18 | - failureMessage: Package amq7-interconnect-operator is needed for data transport with STF 19 | package: 20 | packageName: amq7-interconnect-operator 21 | versionRange: '>=1.10.0' 22 | -------------------------------------------------------------------------------- /deploy/olm-catalog/service-telemetry-operator/tests/scorecard/config.yaml: -------------------------------------------------------------------------------- 1 | kind: Configuration 2 | apiversion: scorecard.operatorframework.io/v1alpha3 3 | metadata: 4 | name: config 5 | stages: 6 | - parallel: true 7 | tests: 8 | - image: quay.io/operator-framework/scorecard-test:latest 9 | entrypoint: 10 | - scorecard-test 11 | - basic-check-spec 12 | labels: 13 | suite: basic 14 | test: basic-check-spec-test 15 | - image: quay.io/operator-framework/scorecard-test:latest 16 | entrypoint: 17 | - scorecard-test 18 | - olm-bundle-validation 19 | labels: 20 | suite: olm 21 | test: olm-bundle-validation-test 22 | -------------------------------------------------------------------------------- /deploy/operator.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: service-telemetry-operator 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | name: service-telemetry-operator 10 | template: 11 | metadata: 12 | labels: 13 | name: service-telemetry-operator 14 | spec: 15 | serviceAccountName: service-telemetry-operator 16 | containers: 17 | - name: operator 18 | image: <> 19 | imagePullPolicy: Always 20 | volumeMounts: 21 | - mountPath: /tmp/ansible-operator/runner 22 | name: runner 23 | env: 24 | - name: WATCH_NAMESPACE 25 | valueFrom: 26 | fieldRef: 27 | fieldPath: metadata.namespace 28 | - name: POD_NAME 29 | valueFrom: 30 | fieldRef: 31 | fieldPath: metadata.name 32 | - name: OPERATOR_NAME 33 | value: "service-telemetry-operator" 34 | - name: ANSIBLE_GATHERING 35 | value: explicit 36 | - name: RELATED_IMAGE_PROMETHEUS_WEBHOOK_SNMP_IMAGE 37 | value: <> 38 | - name: RELATED_IMAGE_OAUTH_PROXY_IMAGE 39 | value: <> 40 | - name: RELATED_IMAGE_PROMETHEUS_IMAGE 41 | value: <> 42 | - name: RELATED_IMAGE_ALERTMANAGER_IMAGE 43 | value: <> 44 | volumes: 45 | - name: runner 46 | emptyDir: {} 47 | -------------------------------------------------------------------------------- /deploy/operator_group.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: operators.coreos.com/v1 2 | kind: OperatorGroup 3 | metadata: 4 | name: service-telemetry-operator-group 5 | namespace: service-telemetry 6 | spec: 7 | targetNamespaces: 8 | - service-telemetry 9 | -------------------------------------------------------------------------------- /deploy/quickstart.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | REL=$(dirname "$0"); source "${REL}/../build/metadata.sh" 3 | EPHEMERAL_STORAGE="${EPHEMERAL_STORAGE:-false}" 4 | 5 | oc new-project "${OCP_PROJECT}" 6 | ansible-playbook \ 7 | --extra-vars namespace="${OCP_PROJECT}" \ 8 | --extra-vars __local_build_enabled=true \ 9 | --extra-vars __service_telemetry_snmptraps_enabled=true \ 10 | --extra-vars __service_telemetry_storage_ephemeral_enabled=${EPHEMERAL_STORAGE} \ 11 | ${REL}/../build/run-ci.yaml 12 | -------------------------------------------------------------------------------- /deploy/remove_stf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Removes STF and (optionally) Cert-Manager For OpenShift from your cluster 4 | # 5 | REL=$(dirname "$0"); . "${REL}/../build/metadata.sh" 6 | REMOVE_CERTMANAGER=${REMOVE_CERTMANAGER:-true} 7 | REMOVE_OBO=${REMOVE_OBO:-true} 8 | 9 | # The whole STF project (start this first since it's slow) 10 | oc delete project "${OCP_PROJECT}" 11 | 12 | # Our custom OperatorSource 13 | oc delete OperatorSource infrawatch-operators -n openshift-marketplace 14 | 15 | # Revert our OperatorHub.io catalog for default built-in Community Operators 16 | oc delete CatalogSource operatorhubio-operators -n openshift-marketplace 17 | 18 | oc apply -f - < /dev/null; do echo "Waiting for ${OCP_PROJECT} to disappear"; sleep 5; done 63 | 64 | for i in $(oc get images | grep "${OCP_PROJECT}" | cut -f1 -d' '); do oc delete image $i; done 65 | -------------------------------------------------------------------------------- /deploy/role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | creationTimestamp: null 5 | name: service-telemetry-operator 6 | rules: 7 | - apiGroups: 8 | - authentication.k8s.io 9 | resources: 10 | - tokenreviews 11 | verbs: 12 | - create 13 | - apiGroups: 14 | - rbac.authorization.k8s.io 15 | resources: 16 | - clusterroles 17 | - clusterrolebindings 18 | verbs: 19 | - create 20 | - get 21 | - list 22 | - watch 23 | - update 24 | - patch 25 | - delete 26 | - apiGroups: 27 | - authorization.k8s.io 28 | resources: 29 | - subjectaccessreviews 30 | verbs: 31 | - create 32 | - apiGroups: 33 | - security.openshift.io 34 | resourceNames: 35 | - nonroot 36 | - nonroot-v2 37 | resources: 38 | - securitycontextconstraints 39 | verbs: 40 | - use 41 | - apiGroups: 42 | - "" 43 | resources: 44 | - nodes/metrics 45 | verbs: 46 | - get 47 | - apiGroups: 48 | - "" 49 | resources: 50 | - namespaces 51 | verbs: 52 | - get 53 | - nonResourceURLs: 54 | - /metrics 55 | verbs: 56 | - get 57 | 58 | --- 59 | 60 | apiVersion: rbac.authorization.k8s.io/v1 61 | kind: Role 62 | metadata: 63 | creationTimestamp: null 64 | name: service-telemetry-operator 65 | rules: 66 | - apiGroups: 67 | - "" 68 | resources: 69 | - pods 70 | - services 71 | - services/finalizers 72 | - endpoints 73 | - persistentvolumeclaims 74 | - events 75 | - configmaps 76 | - secrets 77 | verbs: 78 | - '*' 79 | - apiGroups: 80 | - route.openshift.io 81 | resources: 82 | - routes 83 | verbs: 84 | - create 85 | - get 86 | - list 87 | - watch 88 | - update 89 | - patch 90 | - apiGroups: 91 | - "" 92 | resources: 93 | - serviceaccounts 94 | verbs: 95 | - create 96 | - get 97 | - list 98 | - watch 99 | - update 100 | - patch 101 | - apiGroups: 102 | - apps 103 | resources: 104 | - deployments 105 | - daemonsets 106 | - replicasets 107 | - statefulsets 108 | verbs: 109 | - '*' 110 | - apiGroups: 111 | - cert-manager.io 112 | resources: 113 | - issuers 114 | - certificates 115 | verbs: 116 | - '*' 117 | - apiGroups: 118 | - interconnectedcloud.github.io 119 | - smartgateway.infra.watch 120 | - monitoring.coreos.com 121 | - monitoring.rhobs 122 | - elasticsearch.k8s.elastic.co 123 | - grafana.integreatly.org 124 | - integreatly.org 125 | resources: 126 | - '*' 127 | verbs: 128 | - '*' 129 | - apiGroups: 130 | - monitoring.coreos.com 131 | resources: 132 | - scrapeconfigs 133 | - servicemonitors 134 | verbs: 135 | - get 136 | - create 137 | - delete 138 | - apiGroups: 139 | - monitoring.rhobs 140 | resources: 141 | - scrapeconfigs 142 | - servicemonitors 143 | verbs: 144 | - get 145 | - create 146 | - delete 147 | - apiGroups: 148 | - apps 149 | resourceNames: 150 | - service-telemetry-operator 151 | resources: 152 | - deployments/finalizers 153 | verbs: 154 | - update 155 | - apiGroups: 156 | - "" 157 | resources: 158 | - pods 159 | verbs: 160 | - get 161 | - apiGroups: 162 | - apps 163 | resources: 164 | - replicasets 165 | verbs: 166 | - get 167 | - apiGroups: 168 | - infra.watch 169 | resources: 170 | - '*' 171 | verbs: 172 | - '*' 173 | - apiGroups: 174 | - rbac.authorization.k8s.io 175 | resources: 176 | - roles 177 | - rolebindings 178 | verbs: 179 | - create 180 | - get 181 | - list 182 | - watch 183 | - update 184 | - patch 185 | - apiGroups: 186 | - extensions 187 | - networking.k8s.io 188 | resources: 189 | - ingresses 190 | verbs: 191 | - get 192 | - list 193 | - watch 194 | -------------------------------------------------------------------------------- /deploy/role_binding.yaml: -------------------------------------------------------------------------------- 1 | kind: ClusterRoleBinding 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | metadata: 4 | name: service-telemetry-operator 5 | subjects: 6 | - kind: ServiceAccount 7 | name: service-telemetry-operator 8 | namespace: placeholder 9 | roleRef: 10 | kind: ClusterRole 11 | name: service-telemetry-operator 12 | apiGroup: rbac.authorization.k8s.io 13 | 14 | --- 15 | 16 | kind: RoleBinding 17 | apiVersion: rbac.authorization.k8s.io/v1 18 | metadata: 19 | name: service-telemetry-operator 20 | subjects: 21 | - kind: ServiceAccount 22 | name: service-telemetry-operator 23 | roleRef: 24 | kind: Role 25 | name: service-telemetry-operator 26 | apiGroup: rbac.authorization.k8s.io 27 | -------------------------------------------------------------------------------- /deploy/service_account.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: service-telemetry-operator 5 | -------------------------------------------------------------------------------- /molecule/default/asserts.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | - name: Verify 4 | hosts: localhost 5 | connection: local 6 | vars: 7 | ansible_python_interpreter: '{{ ansible_playbook_python }}' 8 | tasks: 9 | - name: Get all pods in {{ namespace }} 10 | k8s_facts: 11 | api_version: v1 12 | kind: Pod 13 | namespace: '{{ namespace }}' 14 | register: pods 15 | 16 | - name: Output pods 17 | debug: var=pods 18 | -------------------------------------------------------------------------------- /molecule/default/molecule.yml: -------------------------------------------------------------------------------- 1 | --- 2 | dependency: 3 | name: galaxy 4 | driver: 5 | name: docker 6 | lint: 7 | name: yamllint 8 | enabled: False 9 | platforms: 10 | - name: kind-default 11 | groups: 12 | - k8s 13 | image: bsycorp/kind:latest-1.14 14 | privileged: True 15 | override_command: no 16 | exposed_ports: 17 | - 8443/tcp 18 | - 10080/tcp 19 | published_ports: 20 | - 0.0.0.0:${TEST_CLUSTER_PORT:-9443}:8443/tcp 21 | pre_build_image: yes 22 | provisioner: 23 | name: ansible 24 | log: True 25 | lint: 26 | name: ansible-lint 27 | enabled: False 28 | inventory: 29 | group_vars: 30 | all: 31 | namespace: ${TEST_NAMESPACE:-osdk-test} 32 | env: 33 | K8S_AUTH_KUBECONFIG: /tmp/molecule/kind-default/kubeconfig 34 | KUBECONFIG: /tmp/molecule/kind-default/kubeconfig 35 | ANSIBLE_ROLES_PATH: ${MOLECULE_PROJECT_DIRECTORY}/roles 36 | KIND_PORT: '${TEST_CLUSTER_PORT:-9443}' 37 | scenario: 38 | name: default 39 | verifier: 40 | name: testinfra 41 | lint: 42 | name: flake8 43 | -------------------------------------------------------------------------------- /molecule/default/playbook.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Converge 3 | hosts: localhost 4 | connection: local 5 | vars: 6 | ansible_python_interpreter: '{{ ansible_playbook_python }}' 7 | roles: 8 | - servicetelemetry 9 | 10 | - import_playbook: '{{ playbook_dir }}/asserts.yml' 11 | -------------------------------------------------------------------------------- /molecule/default/prepare.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Prepare 3 | hosts: k8s 4 | gather_facts: no 5 | vars: 6 | kubeconfig: "{{ lookup('env', 'KUBECONFIG') }}" 7 | tasks: 8 | - name: delete the kubeconfig if present 9 | file: 10 | path: '{{ kubeconfig }}' 11 | state: absent 12 | delegate_to: localhost 13 | 14 | - name: Fetch the kubeconfig 15 | fetch: 16 | dest: '{{ kubeconfig }}' 17 | flat: yes 18 | src: /root/.kube/config 19 | 20 | - name: Change the kubeconfig port to the proper value 21 | replace: 22 | regexp: 8443 23 | replace: "{{ lookup('env', 'KIND_PORT') }}" 24 | path: '{{ kubeconfig }}' 25 | delegate_to: localhost 26 | 27 | - name: Wait for the Kubernetes API to become available (this could take a minute) 28 | uri: 29 | url: "http://localhost:10080/kubernetes-ready" 30 | status_code: 200 31 | validate_certs: no 32 | register: result 33 | until: (result.status|default(-1)) == 200 34 | retries: 60 35 | delay: 5 36 | -------------------------------------------------------------------------------- /molecule/test-cluster/molecule.yml: -------------------------------------------------------------------------------- 1 | --- 2 | dependency: 3 | name: galaxy 4 | driver: 5 | name: delegated 6 | options: 7 | managed: False 8 | ansible_connection_options: {} 9 | lint: 10 | name: yamllint 11 | enabled: False 12 | platforms: 13 | - name: test-cluster 14 | groups: 15 | - k8s 16 | provisioner: 17 | name: ansible 18 | inventory: 19 | group_vars: 20 | all: 21 | namespace: ${TEST_NAMESPACE:-osdk-test} 22 | lint: 23 | name: ansible-lint 24 | enabled: False 25 | env: 26 | ANSIBLE_ROLES_PATH: ${MOLECULE_PROJECT_DIRECTORY}/roles 27 | scenario: 28 | name: test-cluster 29 | test_sequence: 30 | - lint 31 | - destroy 32 | - dependency 33 | - syntax 34 | - create 35 | - prepare 36 | - converge 37 | - side_effect 38 | - verify 39 | - destroy 40 | verifier: 41 | name: testinfra 42 | lint: 43 | name: flake8 44 | -------------------------------------------------------------------------------- /molecule/test-cluster/playbook.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | - name: Converge 4 | hosts: localhost 5 | connection: local 6 | vars: 7 | ansible_python_interpreter: '{{ ansible_playbook_python }}' 8 | deploy_dir: "{{ lookup('env', 'MOLECULE_PROJECT_DIRECTORY') }}/deploy" 9 | image_name: infra.watch/service-telemetry-operator:testing 10 | custom_resource: "{{ lookup('file', '/'.join([deploy_dir, 'crds/infra.watch_v1alpha1_servicetelemetry_cr.yaml'])) | from_yaml }}" 11 | tasks: 12 | - name: Create the infra.watch/v1alpha1.ServiceTelemetry 13 | k8s: 14 | namespace: '{{ namespace }}' 15 | definition: "{{ lookup('file', '/'.join([deploy_dir, 'crds/infra.watch_v1alpha1_servicetelemetry_cr.yaml'])) }}" 16 | 17 | - name: Get the newly created Custom Resource 18 | debug: 19 | msg: "{{ lookup('k8s', group='infra.watch', api_version='v1alpha1', kind='ServiceTelemetry', namespace=namespace, resource_name=custom_resource.metadata.name) }}" 20 | 21 | - name: Wait 60s for reconciliation to run 22 | k8s_facts: 23 | api_version: 'v1alpha1' 24 | kind: 'ServiceTelemetry' 25 | namespace: '{{ namespace }}' 26 | name: '{{ custom_resource.metadata.name }}' 27 | register: reconcile_cr 28 | until: 29 | - "'Successful' in (reconcile_cr | json_query('resources[].status.conditions[].reason'))" 30 | delay: 6 31 | retries: 10 32 | 33 | - import_playbook: '{{ playbook_dir }}/../default/asserts.yml' 34 | -------------------------------------------------------------------------------- /molecule/test-local/molecule.yml: -------------------------------------------------------------------------------- 1 | --- 2 | dependency: 3 | name: galaxy 4 | driver: 5 | name: docker 6 | lint: 7 | name: yamllint 8 | enabled: False 9 | platforms: 10 | - name: kind-test-local 11 | groups: 12 | - k8s 13 | image: bsycorp/kind:latest-1.14 14 | privileged: True 15 | override_command: no 16 | exposed_ports: 17 | - 8443/tcp 18 | - 10080/tcp 19 | published_ports: 20 | - 0.0.0.0:${TEST_CLUSTER_PORT:-10443}:8443/tcp 21 | pre_build_image: yes 22 | volumes: 23 | - ${MOLECULE_PROJECT_DIRECTORY}:/build:Z 24 | provisioner: 25 | name: ansible 26 | log: True 27 | lint: 28 | name: ansible-lint 29 | enabled: False 30 | inventory: 31 | group_vars: 32 | all: 33 | namespace: ${TEST_NAMESPACE:-osdk-test} 34 | env: 35 | K8S_AUTH_KUBECONFIG: /tmp/molecule/kind-test-local/kubeconfig 36 | KUBECONFIG: /tmp/molecule/kind-test-local/kubeconfig 37 | ANSIBLE_ROLES_PATH: ${MOLECULE_PROJECT_DIRECTORY}/roles 38 | KIND_PORT: '${TEST_CLUSTER_PORT:-10443}' 39 | scenario: 40 | name: test-local 41 | test_sequence: 42 | - lint 43 | - destroy 44 | - dependency 45 | - syntax 46 | - create 47 | - prepare 48 | - converge 49 | - side_effect 50 | - verify 51 | - destroy 52 | verifier: 53 | name: testinfra 54 | lint: 55 | name: flake8 56 | -------------------------------------------------------------------------------- /molecule/test-local/prepare.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - import_playbook: ../default/prepare.yml 3 | 4 | - name: Prepare operator resources 5 | hosts: localhost 6 | connection: local 7 | vars: 8 | ansible_python_interpreter: '{{ ansible_playbook_python }}' 9 | deploy_dir: "{{ lookup('env', 'MOLECULE_PROJECT_DIRECTORY') }}/deploy" 10 | tasks: 11 | - name: Create Custom Resource Definition 12 | k8s: 13 | definition: "{{ lookup('file', '/'.join([deploy_dir, 'crds/service-telemetry.infra.watch_servicetelemetrys_crd.yaml'])) }}" 14 | 15 | - name: Ensure specified namespace is present 16 | k8s: 17 | api_version: v1 18 | kind: Namespace 19 | name: '{{ namespace }}' 20 | 21 | - name: Create RBAC resources 22 | k8s: 23 | definition: "{{ lookup('template', '/'.join([deploy_dir, item])) }}" 24 | namespace: '{{ namespace }}' 25 | with_items: 26 | - role.yaml 27 | - role_binding.yaml 28 | - service_account.yaml 29 | -------------------------------------------------------------------------------- /requirements.yml: -------------------------------------------------------------------------------- 1 | # NOTE(vkmc) Great docs from https://github.com/jmazzitelli/kiali-operator/blob/master/requirements.yml 2 | # 3 | # This is the Ansible Galaxy requirements that need to be installed locally to be able to run 4 | # the operator Ansible playbook locally. 5 | # 6 | # To install these into your local Ansible environment: 7 | # ansible-galaxy collection install -r requirements.yml --force-with-deps 8 | # 9 | # Make sure these collections match that which is inside the Ansible Operator SDK base image. 10 | # You can determine what collections are installed by looking in the base image like this: 11 | # 12 | # podman run \ 13 | # -it --rm --entrypoint '' \ 14 | # quay.io/openshift/origin-ansible-operator:4.13 \ 15 | # ls /opt/ansible/.ansible/collections/ansible_collections 16 | # 17 | # To determine the version of a specific collection, look at the MANIFEST.json: 18 | # 19 | # podman run \ 20 | # -it --rm --entrypoint '' \ 21 | # quay.io/openshift/origin-ansible-operator:4.13 \ 22 | # cat /opt/ansible/.ansible/collections/ansible_collections/kubernetes/core/MANIFEST.json | grep version 23 | # 24 | # It is best if you have the same version of Ansible installed locally as found in the base image. You can determine 25 | # the version of Ansible in the base image via: 26 | # 27 | # podman run \ 28 | # -it --rm --entrypoint '' \ 29 | # quay.io/openshift/origin-ansible-operator:4.13 \ 30 | # ansible --version 31 | # 32 | # To install that version locally, you can git clone the source via: 33 | # git clone -b v --depth 1 https://github.com/ansible/ansible.git 34 | # and then set up your environment via: 35 | # source ./ansible/hacking/env-setup -q 36 | 37 | collections: 38 | - name: kubernetes.core 39 | version: 5.0.0 40 | - name: operator_sdk.util 41 | version: 0.4.0 42 | -------------------------------------------------------------------------------- /roles/servicetelemetry/README.md: -------------------------------------------------------------------------------- 1 | Role Name 2 | ========= 3 | 4 | A brief description of the role goes here. 5 | 6 | Requirements 7 | ------------ 8 | 9 | Any pre-requisites that may not be covered by Ansible itself or the role should be mentioned here. For instance, if the role uses the EC2 module, it may be a good idea to mention in this section that the boto package is required. 10 | 11 | Role Variables 12 | -------------- 13 | 14 | A description of the settable variables for this role should go here, including any variables that are in defaults/main.yml, vars/main.yml, and any variables that can/should be set via parameters to the role. Any variables that are read from other roles and/or the global scope (ie. hostvars, group vars, etc.) should be mentioned here as well. 15 | 16 | Dependencies 17 | ------------ 18 | 19 | A list of other roles hosted on Galaxy should go here, plus any details in regards to parameters that may need to be set for other roles, or variables that are used from other roles. 20 | 21 | Example Playbook 22 | ---------------- 23 | 24 | Including an example of how to use your role (for instance, with variables passed in as parameters) is always nice for users too: 25 | 26 | - hosts: servers 27 | roles: 28 | - { role: username.rolename, x: 42 } 29 | 30 | License 31 | ------- 32 | 33 | BSD 34 | 35 | Author Information 36 | ------------------ 37 | 38 | An optional section for the role authors to include contact information, or a website (HTML is not allowed). 39 | -------------------------------------------------------------------------------- /roles/servicetelemetry/filter_plugins/stripnone.py: -------------------------------------------------------------------------------- 1 | from __future__ import (absolute_import, division, print_function) 2 | __metaclass__ = type 3 | 4 | ANSIBLE_METADATA = { 5 | 'metadata_version': '1.1', 6 | 'status': ['preview'], 7 | 'supported_by': 'community' 8 | } 9 | 10 | # Process recursively the given value if it is a dict and remove all keys that have a None value 11 | def strip_none(value): 12 | if isinstance(value, dict): 13 | dicts = {} 14 | for k,v in value.items(): 15 | if isinstance(v, dict): 16 | dicts[k] = strip_none(v) 17 | elif v is not None: 18 | dicts[k] = v 19 | return dicts 20 | else: 21 | return value 22 | 23 | # ---- Ansible filters ---- 24 | class FilterModule(object): 25 | def filters(self): 26 | return { 27 | 'stripnone': strip_none 28 | } 29 | -------------------------------------------------------------------------------- /roles/servicetelemetry/handlers/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # handlers file for servicetelemetry 3 | -------------------------------------------------------------------------------- /roles/servicetelemetry/meta/main.yml: -------------------------------------------------------------------------------- 1 | galaxy_info: 2 | role_name: servicetelemetry 3 | author: Leif Madsen 4 | description: Service Telemetry Framework Umbrella Operator 5 | company: Red Hat, Inc. 6 | license: Apache 7 | min_ansible_version: 2.9 8 | 9 | platforms: 10 | - name: RHEL 11 | versions: 12 | - 8 13 | - name: CentOS 14 | versions: 15 | - 8 16 | 17 | galaxy_tags: 18 | - monitoring 19 | - servicetelemetry 20 | 21 | dependencies: [] 22 | 23 | collections: 24 | - operator_sdk.util 25 | -------------------------------------------------------------------------------- /roles/servicetelemetry/tasks/base_smartgateway.yml: -------------------------------------------------------------------------------- 1 | - name: Append Smart Gateway to list of SGs 2 | set_fact: 3 | smartgateway_list: "{{ smartgateway_list }} + [ '{{ this_smartgateway }}' ]" 4 | 5 | - name: Deploy instance of Smart Gateway 6 | k8s: 7 | definition: "{{ lookup('template', manifest) | from_yaml }}" 8 | 9 | - name: Deploy SG-specific ScrapeConfig for metrics SGs 10 | include_tasks: component_scrapeconfig.yml 11 | when: 12 | - data_type == 'metrics' 13 | - has_monitoring_api | bool 14 | - observability_strategy != 'none' 15 | -------------------------------------------------------------------------------- /roles/servicetelemetry/tasks/component_certificates.yml: -------------------------------------------------------------------------------- 1 | - name: Create configmap for OAUTH CA certs 2 | k8s: 3 | definition: 4 | api_version: v1 5 | kind: ConfigMap 6 | metadata: 7 | name: serving-certs-ca-bundle 8 | namespace: '{{ ansible_operator_meta.namespace }}' 9 | annotations: 10 | service.beta.openshift.io/inject-cabundle: 'true' 11 | 12 | - name: Check for existing cookie secret 13 | k8s_info: 14 | api_version: v1 15 | kind: Secret 16 | namespace: '{{ ansible_operator_meta.namespace }}' 17 | name: '{{ ansible_operator_meta.name }}-session-secret' 18 | register: session_secret 19 | 20 | - name: Create cookie secret 21 | no_log: true 22 | k8s: 23 | definition: 24 | api_version: v1 25 | kind: Secret 26 | metadata: 27 | name: '{{ ansible_operator_meta.name }}-session-secret' 28 | namespace: '{{ ansible_operator_meta.namespace }}' 29 | stringData: 30 | session_secret: "{{ lookup('password', '/dev/null') }}" 31 | when: session_secret.resources|length == 0 32 | -------------------------------------------------------------------------------- /roles/servicetelemetry/tasks/component_elasticsearch.yml: -------------------------------------------------------------------------------- 1 | # DEPRECATED 2 | # 3 | # This code in the servicetelemetry role is deprecated as of STF 1.5.3, after 4 | # which only forwarding to an external elasticsearch is supported. 5 | # 6 | # The code lives on in the stf-run-ci role for CI testing of the forwarding 7 | # feature. 8 | - name: Lookup template 9 | debug: 10 | msg: "{{ lookup('template', './manifest_elasticsearch.j2') | from_yaml }}" 11 | 12 | - name: Set default ElasticSearch manifest 13 | set_fact: 14 | elasticsearch_manifest: "{{ lookup('template', './manifest_elasticsearch.j2') | from_yaml }}" 15 | when: elasticsearch_manifest is not defined 16 | 17 | - name: Create an instance of ElasticSearch 18 | k8s: 19 | state: '{{ "present" if servicetelemetry_vars.backends.events.elasticsearch.enabled else "absent" }}' 20 | definition: 21 | '{{ elasticsearch_manifest }}' 22 | -------------------------------------------------------------------------------- /roles/servicetelemetry/tasks/component_es_certificates.yml: -------------------------------------------------------------------------------- 1 | # Build out initial selfsigning issuer 2 | - name: Create selfsigned Issuer 3 | k8s: 4 | definition: 5 | apiVersion: cert-manager.io/v1 6 | kind: Issuer 7 | metadata: 8 | name: '{{ ansible_operator_meta.namespace }}-selfsigned' 9 | namespace: '{{ ansible_operator_meta.namespace }}' 10 | spec: 11 | selfSigned: {} 12 | 13 | - name: Create CA certificate 14 | k8s: 15 | definition: 16 | apiVersion: cert-manager.io/v1 17 | kind: Certificate 18 | metadata: 19 | name: '{{ ansible_operator_meta.namespace }}-ca' 20 | namespace: '{{ ansible_operator_meta.namespace }}' 21 | spec: 22 | duration: '{{ servicetelemetry_vars.backends.events.elasticsearch.certificates.ca_cert_duration }}' 23 | secretName: '{{ ansible_operator_meta.namespace }}-ca' 24 | commonName: '{{ ansible_operator_meta.namespace }}-ca' 25 | isCA: true 26 | issuerRef: 27 | name: '{{ ansible_operator_meta.namespace }}-selfsigned' 28 | 29 | - name: Create namespace CA Issuer 30 | k8s: 31 | definition: 32 | apiVersion: cert-manager.io/v1 33 | kind: Issuer 34 | metadata: 35 | name: '{{ ansible_operator_meta.namespace }}-ca' 36 | namespace: '{{ ansible_operator_meta.namespace }}' 37 | spec: 38 | ca: 39 | secretName: '{{ ansible_operator_meta.namespace }}-ca' 40 | 41 | # Create the ElasticSearch certificate using our new Issuer 42 | - name: Create certificate for elasticsearch 43 | set_fact: 44 | elasticsearch_certificate_manifest: 45 | apiVersion: cert-manager.io/v1 46 | kind: Certificate 47 | metadata: 48 | name: elasticsearch-es-http 49 | namespace: '{{ ansible_operator_meta.namespace }}' 50 | spec: 51 | duration: '{{ servicetelemetry_vars.backends.events.elasticsearch.certificates.endpoint_cert_duration }}' 52 | commonName: elasticsearch-es-http 53 | secretName: 'elasticsearch-es-cert' 54 | dnsNames: 55 | - elasticsearch-es-http 56 | - elasticsearch-es-http.{{ ansible_operator_meta.namespace }}.svc 57 | - elasticsearch-es-http.{{ ansible_operator_meta.namespace }}.svc.cluster.local 58 | issuerRef: 59 | kind: Issuer 60 | name: '{{ ansible_operator_meta.namespace }}-ca' 61 | when: elasticsearch_certificate_manifest is not defined 62 | 63 | - name: Create certificate for Elasticsearch 64 | k8s: 65 | definition: 66 | '{{ elasticsearch_certificate_manifest }}' 67 | -------------------------------------------------------------------------------- /roles/servicetelemetry/tasks/component_prometheus_reader.yml: -------------------------------------------------------------------------------- 1 | - name: Create ServiceAccount/stf-prometheus-reader 2 | k8s: 3 | state: '{{ "present" if servicetelemetry_vars.backends.metrics.prometheus.enabled else "absent" }}' 4 | definition: 5 | apiVersion: v1 6 | kind: ServiceAccount 7 | metadata: 8 | name: stf-prometheus-reader 9 | namespace: '{{ ansible_operator_meta.namespace }}' 10 | 11 | - name: Create prometheus-reader Role 12 | k8s: 13 | state: '{{ "present" if servicetelemetry_vars.backends.metrics.prometheus.enabled else "absent" }}' 14 | definition: 15 | apiVersion: rbac.authorization.k8s.io/v1 16 | kind: Role 17 | metadata: 18 | name: prometheus-reader 19 | namespace: '{{ ansible_operator_meta.namespace }}' 20 | rules: 21 | - apiGroups: 22 | - '{{ prometheus_operator_api_string | replace("/v1","") }}' 23 | resources: 24 | - prometheuses 25 | verbs: 26 | - get 27 | namespaces: 28 | - '{{ ansible_operator_meta.namespace }}' 29 | 30 | - name: Create prometheus-reader RoleBinding for stf-prometheus-reader 31 | k8s: 32 | state: '{{ "present" if servicetelemetry_vars.backends.metrics.prometheus.enabled else "absent" }}' 33 | definition: 34 | apiVersion: rbac.authorization.k8s.io/v1 35 | kind: RoleBinding 36 | metadata: 37 | name: stf-prometheus-reader 38 | namespace: '{{ ansible_operator_meta.namespace }}' 39 | roleRef: 40 | apiGroup: rbac.authorization.k8s.io 41 | kind: Role 42 | name: prometheus-reader 43 | subjects: 44 | - kind: ServiceAccount 45 | name: stf-prometheus-reader 46 | 47 | - name: Create an access token for stf-prometheus-reader 48 | k8s: 49 | state: '{{ "present" if servicetelemetry_vars.backends.metrics.prometheus.enabled else "absent" }}' 50 | definition: 51 | apiVersion: v1 52 | kind: Secret 53 | metadata: 54 | name: stf-prometheus-reader-token 55 | namespace: '{{ ansible_operator_meta.namespace }}' 56 | annotations: 57 | kubernetes.io/service-account.name: stf-prometheus-reader 58 | type: kubernetes.io/service-account-token 59 | -------------------------------------------------------------------------------- /roles/servicetelemetry/tasks/component_scrapeconfig.yml: -------------------------------------------------------------------------------- 1 | - name: Create an access token for prometheus-stf to use in scrapeconfigs 2 | k8s: 3 | state: '{{ "present" if servicetelemetry_vars.backends.metrics.prometheus.enabled else "absent" }}' 4 | definition: 5 | apiVersion: v1 6 | kind: Secret 7 | metadata: 8 | name: prometheus-stf-token 9 | namespace: '{{ ansible_operator_meta.namespace }}' 10 | annotations: 11 | kubernetes.io/service-account.name: prometheus-stf 12 | type: kubernetes.io/service-account-token 13 | 14 | - name: Create SG-specific Scrape Config manifest 15 | set_fact: 16 | sg_specific_scrapeconfig_manifest: | 17 | apiVersion: {{ prometheus_operator_api_string | replace("/v1","/v1alpha1") }} 18 | kind: ScrapeConfig 19 | metadata: 20 | labels: 21 | app: smart-gateway 22 | name: '{{ this_smartgateway }}' 23 | namespace: '{{ ansible_operator_meta.namespace }}' 24 | spec: 25 | authorization: 26 | type: bearer 27 | credentials: 28 | name: prometheus-stf-token 29 | key: token 30 | metricRelabelings: 31 | - action: labeldrop 32 | regex: pod 33 | - action: labeldrop 34 | regex: namespace 35 | - action: labeldrop 36 | regex: instance 37 | - action: replace 38 | regex: '.*/(.*)$' 39 | replacement: $1 40 | sourceLabels: [job] 41 | targetLabel: service 42 | - action: labeldrop 43 | regex: job 44 | - action: labeldrop 45 | regex: publisher 46 | - action: replace 47 | targetLabel: container 48 | replacement: sg-core 49 | - action: replace 50 | targetLabel: endpoint 51 | replacement: prom-https 52 | scheme: HTTPS 53 | scrapeInterval: {{ servicetelemetry_vars.backends.metrics.prometheus.scrape_interval }} 54 | staticConfigs: 55 | - targets: 56 | - '{{ this_smartgateway }}.{{ ansible_operator_meta.namespace }}.svc:8083' 57 | tlsConfig: 58 | ca: 59 | configMap: 60 | name: serving-certs-ca-bundle 61 | key: service-ca.crt 62 | serverName: '{{ this_smartgateway }}.{{ ansible_operator_meta.namespace }}.svc' 63 | 64 | - name: Create ScrapeConfig to scrape Smart Gateway 65 | k8s: 66 | state: '{{ "present" if servicetelemetry_vars.backends.metrics.prometheus.enabled else "absent" }}' 67 | definition: 68 | '{{ sg_specific_scrapeconfig_manifest }}' 69 | 70 | - name: Create additional ScrapeConfig if provided 71 | k8s: 72 | state: '{{ "present" if servicetelemetry_vars.backends.metrics.prometheus.enabled else "absent" }}' 73 | definition: 74 | '{{ scrapeconfig_manifest }}' 75 | when: scrapeconfig_manifest is defined 76 | 77 | - name: Create additional ServiceMonitor if provided (legacy) 78 | k8s: 79 | state: '{{ "present" if servicetelemetry_vars.backends.metrics.prometheus.enabled else "absent" }}' 80 | definition: 81 | '{{ servicemonitor_manifest }}' 82 | when: servicemonitor_manifest is defined 83 | 84 | - name: Remove (legacy) default ServiceMonitors 85 | k8s: 86 | state: absent 87 | api_version: '{{ prometheus_operator_api_string }}' 88 | kind: ServiceMonitor 89 | namespace: '{{ ansible_operator_meta.namespace }}' 90 | name: '{{ this_smartgateway }}' -------------------------------------------------------------------------------- /roles/servicetelemetry/tasks/component_snmp_traps.yml: -------------------------------------------------------------------------------- 1 | - name: Lookup template 2 | debug: 3 | msg: "{{ lookup('template', './manifest_snmp_traps.j2') | from_yaml }}" 4 | 5 | - name: Set default snmp traps manifest 6 | set_fact: 7 | snmp_traps_manifest: "{{ lookup('template', './manifest_snmp_traps.j2') | from_yaml }}" 8 | when: snmp_traps_manifest is not defined 9 | 10 | - name: Create an instance of snmp webhook 11 | k8s: 12 | definition: 13 | '{{ snmp_traps_manifest }}' 14 | 15 | - name: Set default snmp traps service manifest 16 | set_fact: 17 | snmp_traps_service_manifest: "{{ lookup('template', './manifest_snmp_traps_service.j2') | from_yaml }}" 18 | when: snmp_traps_service_manifest is not defined 19 | 20 | - name: Create an instance of snmp webhook service 21 | k8s: 22 | definition: 23 | '{{ snmp_traps_service_manifest }}' 24 | -------------------------------------------------------------------------------- /roles/servicetelemetry/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Installing service telemetry 3 | debug: 4 | msg: INSTALLING SERVICE TELEMETRY 5 | 6 | # Default setup and platform querying 7 | - name: Pre-setup 8 | include_tasks: pre.yml 9 | 10 | # --> qdr 11 | - name: Check if we have interconnect API 12 | set_fact: 13 | has_interconnect_api: "{{ True if 'interconnectedcloud.github.io' in api_groups else False }}" 14 | 15 | - name: Check if we have cert-manager.io API 16 | set_fact: 17 | has_certmanager_api: "{{ True if 'cert-manager.io' in api_groups else False }}" 18 | 19 | - name: Create QDR instance 20 | include_tasks: component_qdr.yml 21 | when: 22 | - has_interconnect_api | bool 23 | - has_certmanager_api | bool 24 | 25 | # --> backends.metrics 26 | - name: Setup Certificates for metrics components 27 | include_tasks: component_certificates.yml 28 | when: 29 | - has_certmanager_api | bool 30 | 31 | - name: Set community monitoring API string and labels 32 | set_fact: 33 | prometheus_operator_api_string: monitoring.coreos.com/v1 34 | prometheus_operator_label: 35 | when: observability_strategy == 'use_community' 36 | 37 | - name: Set Red Hat monitoring API string 38 | set_fact: 39 | prometheus_operator_api_string: monitoring.rhobs/v1 40 | prometheus_operator_label: 'app.kubernetes.io/managed-by: observability-operator' 41 | when: observability_strategy in ['use_redhat', 'use_hybrid'] 42 | 43 | - name: Check if we have the requested monitoring API 44 | set_fact: 45 | has_monitoring_api: "{{ True if (prometheus_operator_api_string | dirname) in api_groups else False }}" 46 | 47 | - block: 48 | - name: Create Prometheus instance 49 | include_tasks: component_prometheus.yml 50 | - name: Create Prometheus read-only user 51 | include_tasks: component_prometheus_reader.yml 52 | 53 | # --> alerting 54 | - name: Create Alertmanager instance 55 | include_tasks: component_alertmanager.yml 56 | when: 57 | - has_monitoring_api | bool 58 | - observability_strategy != 'none' 59 | 60 | # --> backends.events 61 | - name: Check if we have elasticsearch API 62 | set_fact: 63 | has_elasticsearch_api: "{{ True if 'elasticsearch.k8s.elastic.co' in api_groups else False }}" 64 | 65 | - name: Deploy ElasticSearch events backend 66 | block: 67 | - name: Setup Certificates for ElasticSearch 68 | include_tasks: component_es_certificates.yml 69 | 70 | - name: Setup ElasticSearch 71 | include_tasks: component_elasticsearch.yml 72 | when: 73 | - has_elasticsearch_api | bool 74 | - has_certmanager_api | bool 75 | - observability_strategy in ['use_community', 'use_hybrid'] 76 | - servicetelemetry_vars.backends.events.elasticsearch.enabled | bool 77 | 78 | # --> clouds 79 | - name: Get data about clouds 80 | debug: 81 | var: servicetelemetry_vars.clouds 82 | 83 | - name: Loop through cloud instances to setup transport receivers 84 | include_tasks: component_clouds.yml 85 | loop: "{{ servicetelemetry_vars.clouds }}" 86 | loop_control: 87 | loop_var: this_cloud 88 | 89 | # --> graphing 90 | - name: Check if we have integreatly.org API (Grafana Operator v4) 91 | set_fact: 92 | has_integreatly_api: "{{ True if 'integreatly.org' in api_groups else False }}" 93 | 94 | - name: Check if we have grafana.integreatly.org API (Grafana Operator v5) 95 | set_fact: 96 | has_grafana_integreatly_api: "{{ True if 'grafana.integreatly.org' in api_groups else False }}" 97 | 98 | - when: 99 | - (has_integreatly_api | bool) or (has_grafana_integreatly_api | bool) 100 | name: Start graphing component plays 101 | include_tasks: component_grafana.yml 102 | 103 | # Post deployment tasks 104 | - name: Post-setup 105 | include_tasks: post.yml 106 | -------------------------------------------------------------------------------- /roles/servicetelemetry/tasks/post.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Remove old Smart Gateways that are no longer in the clouds list 3 | block: 4 | - name: Initialize smartgateway_loaded_list 5 | set_fact: 6 | smartgateway_loaded_list: [] 7 | 8 | - name: Append SmartGateway metadata.name to smartgateway_loaded_list 9 | set_fact: 10 | smartgateway_loaded_list: "{{ smartgateway_loaded_list }} + ['{{ item.metadata.name }}']" 11 | loop: "{{ smartgateways_loaded.resources }}" 12 | loop_control: 13 | label: "{{ item.metadata.name }}" 14 | 15 | - name: Show difference between requested and existing Smart Gateway object list 16 | debug: 17 | msg: | 18 | {{ smartgateway_loaded_list | difference(smartgateway_list) }} 19 | 20 | - name: Remove unlisted Smart Gateway 21 | k8s: 22 | api_version: v2 23 | kind: SmartGateway 24 | name: "{{ item }}" 25 | namespace: "{{ ansible_operator_meta.namespace }}" 26 | state: absent 27 | loop: "{{ smartgateway_loaded_list | difference(smartgateway_list) }}" 28 | when: clouds_remove_on_missing | bool 29 | -------------------------------------------------------------------------------- /roles/servicetelemetry/templates/manifest_alertmanager.j2: -------------------------------------------------------------------------------- 1 | apiVersion: {{ prometheus_operator_api_string }} 2 | kind: Alertmanager 3 | metadata: 4 | labels: 5 | alertmanager: '{{ ansible_operator_meta.name }}' 6 | {{ prometheus_operator_label }} 7 | name: '{{ ansible_operator_meta.name }}' 8 | namespace: '{{ ansible_operator_meta.namespace }}' 9 | spec: 10 | {% if observability_strategy != "use_community" %} 11 | image: {{ alertmanager_image_path }} 12 | {% endif %} 13 | replicas: {{ servicetelemetry_vars.alerting.alertmanager.deployment_size }} 14 | serviceAccountName: alertmanager-stf 15 | scrapeConfigSelector: 16 | matchLabels: 17 | app: smart-gateway 18 | listenLocal: true 19 | podMetadata: 20 | labels: 21 | alertmanager: '{{ ansible_operator_meta.name }}' 22 | containers: 23 | - name: oauth-proxy 24 | image: {{ oauth_proxy_image }} 25 | args: 26 | - -https-address=:9095 27 | - -tls-cert=/etc/tls/private/tls.crt 28 | - -tls-key=/etc/tls/private/tls.key 29 | - -upstream=http://localhost:9093/ 30 | - -cookie-secret-file=/etc/proxy/secrets/session_secret 31 | - -openshift-service-account=alertmanager-stf 32 | - '-openshift-sar={"namespace":"{{ ansible_operator_meta.namespace }}", "resource": "alertmanagers", "resourceAPIGroup":"{{ prometheus_operator_api_string | replace("/v1","") }}", "verb":"get"}' 33 | - '-openshift-delegate-urls={"/": {"namespace":"{{ ansible_operator_meta.namespace }}", "resource": "alertmanagers", "group":"{{ prometheus_operator_api_string | replace("/v1","") }}", "verb":"get"}}' 34 | ports: 35 | - containerPort: 9095 36 | name: https 37 | protocol: TCP 38 | volumeMounts: 39 | - mountPath: /etc/tls/private 40 | name: secret-{{ ansible_operator_meta.name }}-alertmanager-proxy-tls 41 | - mountPath: /etc/proxy/secrets 42 | name: secret-{{ ansible_operator_meta.name }}-session-secret 43 | secrets: 44 | - '{{ ansible_operator_meta.name }}-alertmanager-proxy-tls' 45 | - '{{ ansible_operator_meta.name }}-session-secret' 46 | {% if servicetelemetry_vars.alerting.alertmanager.storage.strategy == "persistent" %} 47 | storage: 48 | volumeClaimTemplate: 49 | spec: 50 | resources: 51 | requests: 52 | storage: {{ servicetelemetry_vars.alerting.alertmanager.storage.persistent.pvc_storage_request }} 53 | {% if servicetelemetry_vars.alerting.alertmanager.storage.persistent.storage_selector is defined %} 54 | selector: {{ servicetelemetry_vars.alerting.alertmanager.storage.persistent.storage_selector }} 55 | {% endif %} 56 | {% if servicetelemetry_vars.alerting.alertmanager.storage.persistent.storage_class | length %} 57 | storageClassName: {{ servicetelemetry_vars.alerting.alertmanager.storage.persistent.storage_class }} 58 | {% endif %} 59 | {% endif %} 60 | -------------------------------------------------------------------------------- /roles/servicetelemetry/templates/manifest_alertmanager_config.j2: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: 'alertmanager-{{ ansible_operator_meta.name }}' 5 | namespace: '{{ ansible_operator_meta.namespace }}' 6 | type: Opaque 7 | stringData: 8 | alertmanager.yaml: |- 9 | global: 10 | resolve_timeout: 5m 11 | route: 12 | group_by: ['job'] 13 | group_wait: 30s 14 | group_interval: 5m 15 | repeat_interval: 12h 16 | {% if servicetelemetry_vars.alerting.alertmanager.receivers.snmp_traps.enabled %} 17 | receiver: 'snmp_wh' 18 | {% else %} 19 | receiver: 'null' 20 | {% endif %} 21 | receivers: 22 | {% if servicetelemetry_vars.alerting.alertmanager.receivers.snmp_traps.enabled %} 23 | - name: 'snmp_wh' 24 | webhook_configs: 25 | - url: 'http://{{ ansible_operator_meta.name }}-prometheus-webhook-snmp:9099' 26 | {% else %} 27 | - name: 'null' 28 | {% endif %} 29 | -------------------------------------------------------------------------------- /roles/servicetelemetry/templates/manifest_alertmanager_route.j2: -------------------------------------------------------------------------------- 1 | apiVersion: route.openshift.io/v1 2 | kind: Route 3 | metadata: 4 | name: '{{ ansible_operator_meta.name }}-alertmanager-proxy' 5 | namespace: '{{ ansible_operator_meta.namespace }}' 6 | spec: 7 | port: 8 | targetPort: web 9 | to: 10 | kind: Service 11 | name: '{{ ansible_operator_meta.name }}-alertmanager-proxy' 12 | tls: 13 | insecureEdgeTerminationPolicy: Redirect 14 | termination: Reencrypt 15 | -------------------------------------------------------------------------------- /roles/servicetelemetry/templates/manifest_alertmanager_service.j2: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: '{{ ansible_operator_meta.name }}-alertmanager-proxy' 5 | namespace: '{{ ansible_operator_meta.namespace }}' 6 | annotations: 7 | service.beta.openshift.io/serving-cert-secret-name: {{ ansible_operator_meta.name }}-alertmanager-proxy-tls 8 | spec: 9 | ports: 10 | - name: web 11 | port: 9095 12 | targetPort: https 13 | selector: 14 | alertmanager: '{{ ansible_operator_meta.name }}' 15 | -------------------------------------------------------------------------------- /roles/servicetelemetry/templates/manifest_elasticsearch.j2: -------------------------------------------------------------------------------- 1 | apiVersion: elasticsearch.k8s.elastic.co/v1 2 | kind: Elasticsearch 3 | metadata: 4 | name: elasticsearch 5 | namespace: '{{ ansible_operator_meta.namespace }}' 6 | spec: 7 | auth: {} 8 | http: 9 | service: 10 | metadata: {} 11 | spec: {} 12 | tls: 13 | certificate: 14 | secretName: elasticsearch-es-cert 15 | monitoring: 16 | logs: {} 17 | metrics: {} 18 | nodeSets: 19 | - count: {{ servicetelemetry_vars.backends.events.elasticsearch.node_count }} 20 | name: default 21 | config: 22 | node.roles: 23 | - master 24 | - data 25 | - ingest 26 | node.store.allow_mmap: true 27 | {% if servicetelemetry_vars.backends.events.elasticsearch.storage.strategy == "persistent" %} 28 | volumeClaimTemplates: 29 | - metadata: 30 | name: elasticsearch-data 31 | spec: 32 | accessModes: 33 | - ReadWriteOnce 34 | resources: 35 | requests: 36 | storage: {{ servicetelemetry_vars.backends.events.elasticsearch.storage.persistent.pvc_storage_request }} 37 | {% if servicetelemetry_vars.backends.events.elasticsearch.storage.persistent.storage_selector is defined %} 38 | selector: {{ servicetelemetry_vars.backends.events.elasticsearch.storage.persistent.storage_selector }} 39 | {% endif %} 40 | {% if servicetelemetry_vars.backends.events.elasticsearch.storage.persistent.storage_class | length %} 41 | storageClassName: {{ servicetelemetry_vars.backends.events.elasticsearch.storage.persistent.storage_class }} 42 | {% endif %} 43 | {% endif %} 44 | podTemplate: 45 | metadata: 46 | labels: 47 | tuned.openshift.io/elasticsearch: elasticsearch 48 | spec: 49 | containers: 50 | - name: elasticsearch 51 | resources: 52 | limits: 53 | cpu: "2" 54 | memory: 4Gi 55 | requests: 56 | cpu: "1" 57 | memory: 4Gi 58 | {% if servicetelemetry_vars.backends.events.elasticsearch.storage.strategy == "ephemeral" %} 59 | volumes: 60 | - emptyDir: {} 61 | name: elasticsearch-data 62 | {% endif %} 63 | transport: 64 | service: 65 | metadata: {} 66 | spec: {} 67 | tls: 68 | certificate: {} 69 | updateStrategy: 70 | changeBudget: {} 71 | version: {{ servicetelemetry_vars.backends.events.elasticsearch.version }} 72 | volumeClaimDeletePolicy: DeleteOnScaledownOnly 73 | -------------------------------------------------------------------------------- /roles/servicetelemetry/templates/manifest_grafana.j2: -------------------------------------------------------------------------------- 1 | apiVersion: integreatly.org/v1alpha1 2 | kind: Grafana 3 | metadata: 4 | name: {{ ansible_operator_meta.name }} 5 | namespace: {{ ansible_operator_meta.namespace }} 6 | spec: 7 | serviceAccount: 8 | annotations: 9 | serviceaccounts.openshift.io/oauth-redirectreference.primary: '{{ grafana_oauth_redir_ref | to_json }}' 10 | baseImage: {{ servicetelemetry_vars.graphing.grafana.base_image }} 11 | ingress: 12 | enabled: {{ servicetelemetry_vars.graphing.grafana.ingress_enabled }} 13 | targetPort: web 14 | termination: reencrypt 15 | client: 16 | preferService: true 17 | config: 18 | auth: 19 | disable_signout_menu: {{ servicetelemetry_vars.graphing.grafana.disable_signout_menu }} 20 | disable_login_form: true 21 | auth.anonymous: 22 | enabled: true 23 | org_role: 'Admin' 24 | log: 25 | level: warn 26 | mode: console 27 | dashboardLabelSelector: 28 | - matchExpressions: 29 | - key: app 30 | operator: In 31 | values: 32 | - grafana 33 | containers: 34 | - name: oauth-proxy 35 | image: {{ oauth_proxy_image }} 36 | args: 37 | - -provider=openshift 38 | - -pass-basic-auth=false 39 | - -https-address=:3002 40 | - -tls-cert=/etc/tls/private/tls.crt 41 | - -tls-key=/etc/tls/private/tls.key 42 | - -upstream=http://localhost:3000 43 | - -cookie-secret-file=/etc/proxy/secrets/session_secret 44 | - -openshift-service-account=grafana-serviceaccount 45 | - '-openshift-sar={"namespace":"{{ ansible_operator_meta.namespace }}","resource": "grafanas", "resourceAPIGroup":"integreatly.org", "verb":"get"}' 46 | - -openshift-ca=/etc/pki/tls/cert.pem 47 | - -openshift-ca=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt 48 | ports: 49 | - containerPort: 3002 50 | name: https 51 | protocol: TCP 52 | volumeMounts: 53 | - mountPath: /etc/tls/private 54 | name: secret-{{ ansible_operator_meta.name }}-grafana-proxy-tls 55 | - mountPath: /etc/proxy/secrets 56 | name: secret-{{ ansible_operator_meta.name }}-session-secret 57 | secrets: 58 | - '{{ ansible_operator_meta.name }}-grafana-proxy-tls' 59 | - '{{ ansible_operator_meta.name }}-session-secret' 60 | service: 61 | ports: 62 | - name: web 63 | port: 3002 64 | protocol: TCP 65 | targetPort: https 66 | annotations: 67 | service.alpha.openshift.io/serving-cert-secret-name: {{ ansible_operator_meta.name }}-grafana-proxy-tls 68 | 69 | -------------------------------------------------------------------------------- /roles/servicetelemetry/templates/manifest_grafana_ds.j2: -------------------------------------------------------------------------------- 1 | apiVersion: integreatly.org/v1alpha1 2 | kind: GrafanaDataSource 3 | metadata: 4 | name: {{ ansible_operator_meta.name }}-datasources 5 | namespace: {{ ansible_operator_meta.namespace }} 6 | spec: 7 | datasources: 8 | {% if servicetelemetry_vars.backends.metrics.prometheus.enabled %} 9 | - access: proxy 10 | editable: true 11 | isDefault: true 12 | jsonData: 13 | timeInterval: 5s 14 | tlsAuthWithCACert: true 15 | httpHeaderName1: 'Authorization' 16 | name: STFPrometheus 17 | type: prometheus 18 | url: 'https://{{ ansible_operator_meta.name }}-prometheus-proxy.{{ ansible_operator_meta.namespace }}.svc:9092' 19 | version: 1 20 | secureJsonData: 21 | httpHeaderValue1: 'Bearer {{prometheus_reader_token}}' 22 | tlsCACert: | 23 | {{ serving_certs_ca.resources[0].data['service-ca.crt'] | indent(10) }} 24 | {% endif %} 25 | 26 | {% if servicetelemetry_vars.backends.events.elasticsearch.enabled %} 27 | - name: es_collectd 28 | access: proxy 29 | editable: true 30 | isDefault: false 31 | url: {{ servicetelemetry_vars.backends.events.elasticsearch.forwarding.host_url }} 32 | type: elasticsearch 33 | basicAuth: {{ servicetelemetry_vars.backends.events.elasticsearch.forwarding.use_basic_auth }} 34 | basicAuthUser: {{ elastic_user }} 35 | basicAuthPassword: {{ elastic_pass }} 36 | database: collectd_* 37 | jsonData: 38 | tlsSkipVerify: true 39 | timeField: generated 40 | esVersion: 70 41 | 42 | - name: es_ceilometer 43 | access: proxy 44 | editable: true 45 | isDefault: false 46 | url: {{ servicetelemetry_vars.backends.events.elasticsearch.forwarding.host_url }} 47 | type: elasticsearch 48 | basicAuth: {{ servicetelemetry_vars.backends.events.elasticsearch.forwarding.use_basic_auth }} 49 | basicAuthUser: {{ elastic_user }} 50 | basicAuthPassword: {{ elastic_pass }} 51 | database: ceilometer_* 52 | jsonData: 53 | tlsSkipVerify: true 54 | timeField: generated 55 | esVersion: 70 56 | {% endif %} 57 | name: {{ ansible_operator_meta.name }}-ds-stf.yaml 58 | -------------------------------------------------------------------------------- /roles/servicetelemetry/templates/manifest_grafana_ds_prometheus.j2: -------------------------------------------------------------------------------- 1 | apiVersion: grafana.integreatly.org/v1beta1 2 | kind: GrafanaDatasource 3 | metadata: 4 | name: {{ ansible_operator_meta.name }}-ds-stf-prometheus 5 | namespace: {{ ansible_operator_meta.namespace }} 6 | spec: 7 | instanceSelector: 8 | matchLabels: 9 | dashboards: "stf" 10 | datasource: 11 | name: STFPrometheus 12 | type: prometheus 13 | access: proxy 14 | url: 'https://{{ ansible_operator_meta.name }}-prometheus-proxy.{{ ansible_operator_meta.namespace }}.svc:9092' 15 | isDefault: true 16 | editable: true 17 | jsonData: 18 | 'timeInterval': "5s" 19 | 'tlsAuthWithCACert': true 20 | 'httpHeaderName1': 'Authorization' 21 | secureJsonData: 22 | 'httpHeaderValue1': 'Bearer {{prometheus_reader_token}}' 23 | 'tlsCACert': | 24 | {{ serving_certs_ca.resources[0].data['service-ca.crt'] | indent(8) }} 25 | -------------------------------------------------------------------------------- /roles/servicetelemetry/templates/manifest_prometheus.j2: -------------------------------------------------------------------------------- 1 | apiVersion: {{ prometheus_operator_api_string }} 2 | kind: Prometheus 3 | metadata: 4 | labels: 5 | prometheus: '{{ ansible_operator_meta.name }}' 6 | {{ prometheus_operator_label }} 7 | name: '{{ ansible_operator_meta.name }}' 8 | namespace: '{{ ansible_operator_meta.namespace }}' 9 | spec: 10 | {% if observability_strategy != "use_community" %} 11 | version: null 12 | image: {{ prometheus_image_path }} 13 | {% else %} 14 | version: v2.43.0 15 | {% endif %} 16 | replicas: {{ servicetelemetry_vars.backends.metrics.prometheus.deployment_size }} 17 | ruleSelector: {} 18 | securityContext: {} 19 | serviceAccountName: prometheus-stf 20 | scrapeConfigSelector: 21 | matchLabels: 22 | app: smart-gateway 23 | listenLocal: true 24 | podMetadata: 25 | labels: 26 | prometheus: '{{ ansible_operator_meta.name }}' 27 | annotations: 28 | {% if servicetelemetry_vars.alerting.enabled %} 29 | alerting: 30 | alertmanagers: 31 | - name: '{{ ansible_operator_meta.name }}-alertmanager-proxy' 32 | namespace: '{{ ansible_operator_meta.namespace }}' 33 | port: web 34 | scheme: https 35 | tlsConfig: 36 | caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt 37 | serverName: '{{ ansible_operator_meta.name }}-alertmanager-proxy.{{ ansible_operator_meta.namespace }}.svc' 38 | bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 39 | {% endif %} 40 | retention: {{ servicetelemetry_vars.backends.metrics.prometheus.storage.retention }} 41 | containers: 42 | - name: oauth-proxy 43 | image: {{ oauth_proxy_image }} 44 | args: 45 | - -https-address=:9092 46 | - -tls-cert=/etc/tls/private/tls.crt 47 | - -tls-key=/etc/tls/private/tls.key 48 | - -upstream=http://localhost:9090/ 49 | - -cookie-secret-file=/etc/proxy/secrets/session_secret 50 | - -openshift-service-account=prometheus-stf 51 | - '-openshift-sar={"namespace":"{{ ansible_operator_meta.namespace }}","resource": "prometheuses", "resourceAPIGroup":"{{ prometheus_operator_api_string | replace("/v1","") }}", "verb":"get"}' 52 | - '-openshift-delegate-urls={"/":{"namespace":"{{ ansible_operator_meta.namespace }}","resource": "prometheuses", "group":"{{ prometheus_operator_api_string | replace("/v1","") }}", "verb":"get"}}' 53 | 54 | ports: 55 | - containerPort: 9092 56 | name: https 57 | protocol: TCP 58 | volumeMounts: 59 | - mountPath: /etc/tls/private 60 | name: secret-{{ ansible_operator_meta.name }}-prometheus-proxy-tls 61 | - mountPath: /etc/proxy/secrets 62 | name: secret-{{ ansible_operator_meta.name }}-session-secret 63 | configMaps: 64 | - serving-certs-ca-bundle 65 | secrets: 66 | - '{{ ansible_operator_meta.name }}-prometheus-proxy-tls' 67 | - '{{ ansible_operator_meta.name }}-session-secret' 68 | {% if servicetelemetry_vars.backends.metrics.prometheus.storage.strategy == "persistent" %} 69 | storage: 70 | volumeClaimTemplate: 71 | spec: 72 | resources: 73 | requests: 74 | storage: {{ servicetelemetry_vars.backends.metrics.prometheus.storage.persistent.pvc_storage_request }} 75 | {% if servicetelemetry_vars.backends.metrics.prometheus.storage.persistent.storage_selector is defined %} 76 | selector: {{ servicetelemetry_vars.backends.metrics.prometheus.storage.persistent.storage_selector }} 77 | {% endif %} 78 | {% if servicetelemetry_vars.backends.metrics.prometheus.storage.persistent.storage_class | length %} 79 | storageClassName: {{ servicetelemetry_vars.backends.metrics.prometheus.storage.persistent.storage_class }} 80 | {% endif %} 81 | {% endif %} 82 | -------------------------------------------------------------------------------- /roles/servicetelemetry/templates/manifest_prometheus_route.j2: -------------------------------------------------------------------------------- 1 | apiVersion: route.openshift.io/v1 2 | kind: Route 3 | metadata: 4 | name: '{{ ansible_operator_meta.name }}-prometheus-proxy' 5 | namespace: '{{ ansible_operator_meta.namespace }}' 6 | spec: 7 | port: 8 | targetPort: web 9 | to: 10 | kind: Service 11 | name: '{{ ansible_operator_meta.name }}-prometheus-proxy' 12 | tls: 13 | insecureEdgeTerminationPolicy: Redirect 14 | termination: Reencrypt 15 | -------------------------------------------------------------------------------- /roles/servicetelemetry/templates/manifest_prometheus_service.j2: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: '{{ ansible_operator_meta.name }}-prometheus-proxy' 5 | namespace: '{{ ansible_operator_meta.namespace }}' 6 | annotations: 7 | service.beta.openshift.io/serving-cert-secret-name: '{{ ansible_operator_meta.name }}-prometheus-proxy-tls' 8 | spec: 9 | ports: 10 | - name: web 11 | port: 9092 12 | targetPort: https 13 | selector: 14 | prometheus: {{ ansible_operator_meta.name }} 15 | -------------------------------------------------------------------------------- /roles/servicetelemetry/templates/manifest_smartgateway_events.j2: -------------------------------------------------------------------------------- 1 | apiVersion: smartgateway.infra.watch/v2 2 | kind: SmartGateway 3 | metadata: 4 | name: {{ this_smartgateway }} 5 | namespace: {{ ansible_operator_meta.namespace }} 6 | spec: 7 | {% if this_collector.debug_enabled is defined and this_collector.debug_enabled %} 8 | logLevel: debug 9 | {% else %} 10 | logLevel: info 11 | {% endif %} 12 | handleErrors: true 13 | size: {{ smartgateway_deployment_size }} 14 | applications: 15 | - config: | 16 | hostURL: {{ elasticsearch.forwarding.host_url }} 17 | useTLS: {{ elasticsearch.forwarding.use_tls }} 18 | tlsClientCert: /config/certs/user.crt 19 | tlsClientKey: /config/certs/user.key 20 | tlsCaCert: /config/certs/ca.crt 21 | tlsServerName: {{ elastic_tls_server_name }} 22 | tlsSecretName: {{ elasticsearch.forwarding.tls_secret_name }} 23 | user: {{ elastic_user | default('elastic') }} 24 | password: {{ elastic_pass | default('') }} 25 | useBasicAuth: {{ elasticsearch.forwarding.use_basic_auth }} 26 | name: elasticsearch 27 | bridge: 28 | amqpUrl: amqp://{{ ansible_operator_meta.name }}-interconnect.{{ ansible_operator_meta.namespace }}.svc.cluster.local:5673/{{ this_collector.subscription_address }} 29 | ringBufferSize: {{ this_collector.bridge.ring_buffer_size | default(16384) }} 30 | ringBufferCount: {{ this_collector.bridge.ring_buffer_count | default(15000) }} 31 | verbose: {{ this_collector.bridge.verbose | default(false) }} 32 | transports: 33 | - config: | 34 | path: /tmp/smartgateway 35 | handlers: 36 | - config: "" 37 | name: events 38 | strictSource: {{ this_collector.collector_type }} 39 | name: socket 40 | -------------------------------------------------------------------------------- /roles/servicetelemetry/templates/manifest_smartgateway_metrics.j2: -------------------------------------------------------------------------------- 1 | apiVersion: smartgateway.infra.watch/v2 2 | kind: SmartGateway 3 | metadata: 4 | name: '{{ this_smartgateway }}' 5 | namespace: '{{ ansible_operator_meta.namespace }}' 6 | spec: 7 | {% if this_collector.debug_enabled is defined and this_collector.debug_enabled %} 8 | logLevel: "debug" 9 | {% else %} 10 | logLevel: "info" 11 | {% endif %} 12 | handleErrors: true 13 | size: {{ smartgateway_deployment_size }} 14 | applications: 15 | - config: | 16 | host: 127.0.0.1 17 | port: 8081 18 | withTimeStamp: true 19 | name: prometheus 20 | bridge: 21 | amqpUrl: amqp://{{ ansible_operator_meta.name }}-interconnect.{{ ansible_operator_meta.namespace }}.svc.cluster.local:5673/{{ this_collector.subscription_address }} 22 | ringBufferSize: {{ this_collector.bridge.ring_buffer_size | default(16384) }} 23 | ringBufferCount: {{ this_collector.bridge.ring_buffer_count | default(15000) }} 24 | verbose: {{ this_collector.bridge.verbose | default(false)}} 25 | services: 26 | - name: {{ this_smartgateway }} 27 | ports: 28 | - name: prom-https 29 | port: 8083 30 | protocol: TCP 31 | targetPort: 8083 32 | transports: 33 | - config: | 34 | path: /tmp/smartgateway 35 | handlers: 36 | - config: "" 37 | name: {{ this_collector.collector_type }}-metrics 38 | name: socket 39 | -------------------------------------------------------------------------------- /roles/servicetelemetry/templates/manifest_snmp_traps.j2: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: '{{ ansible_operator_meta.name }}-snmp-webhook' 5 | namespace: '{{ ansible_operator_meta.namespace }}' 6 | spec: 7 | selector: 8 | matchLabels: 9 | app: '{{ ansible_operator_meta.name }}-snmp-webhook' 10 | replicas: 1 11 | template: 12 | metadata: 13 | labels: 14 | app: '{{ ansible_operator_meta.name }}-snmp-webhook' 15 | spec: 16 | containers: 17 | - name: 'prometheus-webhook-snmp' 18 | image: '{{ prometheus_webhook_snmp_container_image_path }}' 19 | ports: 20 | - containerPort: 9099 21 | env: 22 | - name: SNMP_COMMUNITY 23 | value: "{{ servicetelemetry_vars.alerting.alertmanager.receivers.snmp_traps.community }}" 24 | - name: SNMP_RETRIES 25 | value: "{{ servicetelemetry_vars.alerting.alertmanager.receivers.snmp_traps.retries }}" 26 | - name: SNMP_HOST 27 | value: "{{ servicetelemetry_vars.alerting.alertmanager.receivers.snmp_traps.target }}" 28 | - name: SNMP_PORT 29 | value: "{{ servicetelemetry_vars.alerting.alertmanager.receivers.snmp_traps.port }}" 30 | - name: SNMP_TIMEOUT 31 | value: "{{ servicetelemetry_vars.alerting.alertmanager.receivers.snmp_traps.timeout }}" 32 | - name: ALERT_OID_LABEL 33 | value: "{{ servicetelemetry_vars.alerting.alertmanager.receivers.snmp_traps.alert_oid_label }}" 34 | - name: TRAP_OID_PREFIX 35 | value: "{{ servicetelemetry_vars.alerting.alertmanager.receivers.snmp_traps.trap_oid_prefix }}" 36 | - name: TRAP_DEFAULT_OID 37 | value: "{{ servicetelemetry_vars.alerting.alertmanager.receivers.snmp_traps.trap_default_oid }}" 38 | - name: TRAP_DEFAULT_SEVERITY 39 | value: "{{ servicetelemetry_vars.alerting.alertmanager.receivers.snmp_traps.trap_default_severity }}" 40 | -------------------------------------------------------------------------------- /roles/servicetelemetry/templates/manifest_snmp_traps_service.j2: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: '{{ ansible_operator_meta.name}}-prometheus-webhook-snmp' 5 | namespace: '{{ ansible_operator_meta.namespace }}' 6 | spec: 7 | selector: 8 | app: '{{ ansible_operator_meta.name }}-snmp-webhook' 9 | ports: 10 | - protocol: TCP 11 | port: 9099 12 | targetPort: 9099 13 | -------------------------------------------------------------------------------- /roles/servicetelemetry/vars/dummy_user_certs.yml: -------------------------------------------------------------------------------- 1 | # These are required because sg-core always expects there to be a user cert + key, whether we need it for auth or not 2 | # CN = STF DUMMY USER CERT - DO NOT USE 3 | elastic_user_cert_dummy: | 4 | -----BEGIN CERTIFICATE----- 5 | MIIEAzCCAuugAwIBAgIUVwi6wEIGgmyQfZ8s1+oqaf+yTpcwDQYJKoZIhvcNAQEL 6 | BQAwgZAxCzAJBgNVBAYTAlVTMRcwFQYDVQQIDA5Ob3J0aCBDYXJvbGluYTEQMA4G 7 | A1UEBwwHUmFsZWlnaDEUMBIGA1UECgwLUmVkIEhhdCBJbmMxFTATBgNVBAsMDE9T 8 | UCBDbG91ZG9wczEpMCcGA1UEAwwgU1RGIERVTU1ZIFVTRVIgQ0VSVCAtIERPIE5P 9 | VCBVU0UwHhcNMjMwNjEyMTgxODQ1WhcNMjMwNjEzMTgxODQ1WjCBkDELMAkGA1UE 10 | BhMCVVMxFzAVBgNVBAgMDk5vcnRoIENhcm9saW5hMRAwDgYDVQQHDAdSYWxlaWdo 11 | MRQwEgYDVQQKDAtSZWQgSGF0IEluYzEVMBMGA1UECwwMT1NQIENsb3Vkb3BzMSkw 12 | JwYDVQQDDCBTVEYgRFVNTVkgVVNFUiBDRVJUIC0gRE8gTk9UIFVTRTCCASIwDQYJ 13 | KoZIhvcNAQEBBQADggEPADCCAQoCggEBANQU/9/BEJbuX2xJUozSbUvG7qlk6yEi 14 | KcFjkUwnXT+131ho+UWUn29yuqXI60E+8trWsL3uFlMbGh9t2VRfbfNNZiqon197 15 | CfzqS596AP8HtTZZx0Qy4sZrPRs8ffR/3wMjp8kMj+2jPpMq0zngJ1efHK7Z6GSR 16 | IveXbCCfPQU4tvT3aQ5JQkIWvIo7kuS/u9K6LvOspYP04YNLUZdMCJDNE8hSpEkv 17 | KfG7ZL2cfWF1nsX5+qyU5aIrUS7RYd/HGMKvpA0/Lvzl5FBMZ0BCF00LmY1tjUzK 18 | DhHR62g/IkRaq8rrjdE+H2isVgSAIPAvnC039ePE4OOsoqO+aYYWqEsCAwEAAaNT 19 | MFEwHQYDVR0OBBYEFMKfKoCQcbkb9BBDxXAQjYLSUWtoMB8GA1UdIwQYMBaAFMKf 20 | KoCQcbkb9BBDxXAQjYLSUWtoMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEL 21 | BQADggEBAI9q3AhqodLtsbET7yoZ2vAj8fzJyu5FXyyEf9wVgsyoJ56H77s2yp+b 22 | iixc/MI9jsLX9wXa8monxdbHu01rlB8c9OwbcAyAhlnBWYcPqVFz4bjqNw5sH2Jg 23 | vqIaGncn62IZv3mvN1VpyKZp2MbZGJdbgU8X3h1C6DCLf4fToFlqiiZ/XVvbk824 24 | j/OZ9l6Eu8VVekIQXVp2m9PPndOuEsBIMBkiB/xf32RTiOYWSG5mp70fxD7n2of/ 25 | yb7hY+fL/wlucqS4ryT+2307ouEcTmpDSjHwKZRUYUDBZ4TmxCx5LlkuTO9MRnRy 26 | 9hCGFF1rI+K33F952hxjkNaSSZvt3lQ= 27 | -----END CERTIFICATE----- 28 | 29 | elastic_user_key_dummy: | 30 | -----BEGIN RSA PRIVATE KEY----- 31 | MIIEpAIBAAKCAQEA1BT/38EQlu5fbElSjNJtS8buqWTrISIpwWORTCddP7XfWGj5 32 | RZSfb3K6pcjrQT7y2tawve4WUxsaH23ZVF9t801mKqifX3sJ/OpLn3oA/we1NlnH 33 | RDLixms9Gzx99H/fAyOnyQyP7aM+kyrTOeAnV58crtnoZJEi95dsIJ89BTi29Pdp 34 | DklCQha8ijuS5L+70rou86ylg/Thg0tRl0wIkM0TyFKkSS8p8btkvZx9YXWexfn6 35 | rJTloitRLtFh38cYwq+kDT8u/OXkUExnQEIXTQuZjW2NTMoOEdHraD8iRFqryuuN 36 | 0T4faKxWBIAg8C+cLTf148Tg46yio75phhaoSwIDAQABAoIBABXMUsBcx6e7uHMY 37 | 1jNDLZisSbt/c+tj54bJBRYetabmup4LrBNKw1hhIm4HyKZcIfn8Nw5OelzwXC7+ 38 | y2ewp0xqmCWqTzcxHkWwjzVFBPUxhZ6ge6q20Dg0rYMvJIMM4Y8hCw3PDLwQG05l 39 | CHDaaTDIWdpe61Pq1v07wxFXTJ5MlgjoIfDN3xCFhHOEpbNCl6yVie4irjmxItS9 40 | Xp1/tdqtq8xSAAo9wWGb9SjsOn/C/AMtxerdHFjv8QErrA/ta/5qXa3KdEnElHqc 41 | 2HkGt5w5FcRXCwrUW1MwnBzwbK5kEZth3D/i41y/F4vjwYwPfHRh3AeOpDpul0XW 42 | qH+8qQECgYEA9iiUvbepX4mnj7CIQGKlDRCvvhdCUBnIAgA9/L8WWZIDvAEl1Rka 43 | avAIvLMCTzoAO+TframNef0dNJWOAo/WQ/ViiLaqg7gbGE2DPjLitk0XaeKl+XAv 44 | ip0K1Qouzxv2FFJR4h9iDCWjRIeClKIhE1sEMyJk45qyR4bMx0jQZJMCgYEA3I+l 45 | wOO0kLD2lk/t9JBiBSLUrr6/mkPkCT7wn9U7owwHuoPDJHYX8+7y2u8vow6fgQyD 46 | Jvud8wQOV4owBOBNafBT8a3Vp3W1lLTm1r0jJ7qbVNuMAnXcj1S0Q3VNX/jvO6wn 47 | q6Ddxqh9p9+tYSNzwnD5XqxLeZiHXWCE2fB1+GkCgYEAillwj9iD52BUvtu3GIjY 48 | vykbvTkRWjfDQ+yi6kTz6M+6LZZvjv+W63eRUY1CxQiSTRdr6A0dqOxr17wenq38 49 | /SETikcwOuvkvpoCI5kx9sgJWse6BSHadouhJO+eM2VBv1YtE2wUDUOyKbgH2kXt 50 | VRWYnKy+C3ZMsQrAWVlBVuUCgYBI6LNCMANgUR8yUPm3/oJocDseCLANrqOS6ttf 51 | +nzcSP3FCglX5DHG0RY2iRqWLB9N6XTxTfvIeW7EQUneUsdEXc1h9rTJxn9fyO0F 52 | zz/vwh/WzTxbE9r1BmsQYZZSQ1fRwfbbJTIqmUfwVmBZ2/5IKFBGm23XpDQbCezg 53 | njxhAQKBgQD0lOpKtL8qz9gmqtkhDRe+EPHSX8rfirqqRrPUiwK7kAJeW2vtU8aa 54 | hFT7lEDjb7ERyZfybIkTVVBipKx2yse9nE+1dPGIgZop3E1guDuF9aOAzIUd/+/s 55 | CI7s/lIBZsPD3PyxXXRtsvN7iUv5tLvNFhfomB7miTYHE+MC5QHJVQ== 56 | -----END RSA PRIVATE KEY----- 57 | -------------------------------------------------------------------------------- /roles/servicetelemetry/vars/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # vars file for servicetelemetry 3 | servicetelemetry_vars: 4 | high_availability: | 5 | {%- if high_availability is defined and high_availability is iterable -%} 6 | {{ servicetelemetry_defaults.high_availability | combine((high_availability | stripnone), recursive=True) }} 7 | {%- else -%} 8 | {{ servicetelemetry_defaults.high_availability }} 9 | {%- endif -%} 10 | alerting: | 11 | {%- if alerting is defined and alerting is iterable -%} 12 | {{ servicetelemetry_defaults.alerting | combine((alerting | stripnone), recursive=True) }} 13 | {%- else -%} 14 | {{ servicetelemetry_defaults.alerting }} 15 | {%- endif -%} 16 | backends: | 17 | {%- if backends is defined and backends is iterable -%} 18 | {{ servicetelemetry_defaults.backends | combine((backends | stripnone), recursive=True) }} 19 | {%- else -%} 20 | {{ servicetelemetry_defaults.backends }} 21 | {%- endif -%} 22 | transports: | 23 | {%- if transports is defined and transports is iterable -%} 24 | {{ servicetelemetry_defaults.transports | combine((transports | stripnone), recursive=True) }} 25 | {%- else -%} 26 | {{ servicetelemetry_defaults.transports }} 27 | {%- endif -%} 28 | graphing: | 29 | {%- if graphing is defined and graphing is iterable -%} 30 | {{ servicetelemetry_defaults.graphing | combine((graphing | stripnone), recursive=True) }} 31 | {%- else -%} 32 | {{ servicetelemetry_defaults.graphing }} 33 | {%- endif -%} 34 | clouds: | 35 | {%- if clouds is not defined -%} 36 | {{ servicetelemetry_defaults.clouds }} 37 | {%- else -%} 38 | {{ clouds }} 39 | {%- endif -%} 40 | -------------------------------------------------------------------------------- /tests/infrared/.gitignore: -------------------------------------------------------------------------------- 1 | outputs/** 2 | !outputs/.KEEPIT 3 | 4 | -------------------------------------------------------------------------------- /tests/infrared/13/.gitignore: -------------------------------------------------------------------------------- 1 | outputs/** 2 | !outputs/.KEEPIT 3 | 4 | -------------------------------------------------------------------------------- /tests/infrared/13/baremetal-scripts/install-and-run-minishift.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #set -e 3 | 4 | # Get minishift 5 | MINISHIFT_VER=1.34.1 6 | MINISHIFT_NAME="minishift-${MINISHIFT_VER}-linux-amd64" 7 | wget https://github.com/minishift/minishift/releases/download/v${MINISHIFT_VER}/${MINISHIFT_NAME}.tgz 8 | tar -xvzf ${MINISHIFT_NAME}.tgz 9 | cd ${MINISHIFT_NAME} 10 | 11 | # Get KVM driver 12 | # https://docs.okd.io/latest/minishift/getting-started/setting-up-virtualization-environment.html#kvm-driver-fedora 13 | curl -L https://github.com/dhiltgen/docker-machine-kvm/releases/download/v0.10.0/docker-machine-driver-kvm-centos7 -o docker-machine-driver-kvm 14 | chmod +x docker-machine-driver-kvm 15 | 16 | # So we keep to our directory but minishift still finds docker-machine-driver 17 | PATH=$PATH:. 18 | 19 | # Start minishift 20 | ./minishift addons enable registry-route 21 | ./minishift addons enable admin-user 22 | ./minishift start 23 | ./minishift ssh -- sudo sysctl -w vm.max_map_count=262144 -------------------------------------------------------------------------------- /tests/infrared/13/enable-stf.yaml.template: -------------------------------------------------------------------------------- 1 | # enable data collection that is compatible with the STF data model 2 | # enablement of the collectors and transport are done with separate enviroment files 3 | # recommended: 4 | # - environments/metrics/ceilometer-qdr-write.yaml 5 | # - environments/metrics/collectd-qdr-write.yaml 6 | # - environments/metrics/qdr-edge-only.yaml 7 | 8 | --- 9 | tripleo_heat_templates: 10 | [] 11 | 12 | custom_templates: 13 | # matches the documentation for enable-stf.yaml in stable-1.3 documentation 14 | parameter_defaults: 15 | # only send to STF, not other publishers 16 | EventPipelinePublishers: [] 17 | PipelinePublishers: [] 18 | 19 | # manage the polling and pipeline configuration files for Ceilometer agents 20 | ManagePolling: true 21 | ManagePipeline: true 22 | 23 | # enable Ceilometer metrics and events 24 | CeilometerQdrPublishMetrics: true 25 | CeilometerQdrPublishEvents: true 26 | 27 | # set collectd overrides for higher telemetry resolution and extra plugins 28 | # to load 29 | CollectdConnectionType: amqp1 30 | CollectdAmqpInterval: 5 31 | CollectdDefaultPollingInterval: 5 32 | CollectdDefaultPlugins: 33 | - cpu 34 | - df 35 | - disk 36 | - hugepages 37 | - interface 38 | - load 39 | - memory 40 | - processes 41 | - unixsock 42 | - uptime 43 | CollectdExtraPlugins: 44 | - vmem 45 | 46 | # set standard prefixes for where metrics and events are published to QDR 47 | MetricsQdrAddresses: 48 | - prefix: 'collectd' 49 | distribution: multicast 50 | - prefix: 'anycast/ceilometer' 51 | distribution: multicast 52 | 53 | ExtraConfig: 54 | ceilometer::agent::polling::polling_interval: 30 55 | ceilometer::agent::polling::polling_meters: 56 | - cpu 57 | - disk.* 58 | - ip.* 59 | - image.* 60 | - memory 61 | - memory.* 62 | - network.* 63 | - perf.* 64 | - port 65 | - port.* 66 | - switch 67 | - switch.* 68 | - storage.* 69 | - volume.* 70 | 71 | # to avoid filling the memory buffers if disconnected from the message bus 72 | collectd::plugin::amqp1::send_queue_limit: 50 73 | 74 | # receive extra information about virtual memory 75 | collectd::plugin::vmem::verbose: true 76 | 77 | # provide the human-friendly name of the virtual instance 78 | collectd::plugin::virt::plugin_instance_format: metadata 79 | -------------------------------------------------------------------------------- /tests/infrared/13/minishift-stf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/env bash 2 | LOCAL_TUNNEL_PORT="${LOCAL_TUNNEL_PORT:-8787}" 3 | 4 | # Install minishift on $VIRTHOST 5 | scp -r baremetal-scripts/ "root@${VIRTHOST}:" 6 | ssh "root@${VIRTHOST}" ./baremetal-scripts/install-and-run-minishift.sh 7 | 8 | # Get a functioning kubeconfig for the minishift instance 9 | KUBECONFIG="$(pwd)/outputs/kubeconfig-${VIRTHOST}" 10 | scp "root@${VIRTHOST}:.kube/config" "${KUBECONFIG}" 11 | 12 | 13 | # Tunnel the k8s API port 14 | MINISHIFT_HOST_PORT=$(grep server "${KUBECONFIG}" | awk -F '://' '{print $2}') 15 | ssh -N -L "${LOCAL_TUNNEL_PORT}:${MINISHIFT_HOST_PORT}" "root@${VIRTHOST}" & 16 | sed -i.orig -e "s#https://${MINISHIFT_HOST_PORT}#https://localhost:${LOCAL_TUNNEL_PORT}#" "${KUBECONFIG}" 17 | 18 | # Switch to tunneled minishift context 19 | export KUBECONFIG 20 | 21 | pushd . 22 | cd ../../deploy/ 23 | ./quickstart.sh 24 | 25 | cd ../tests 26 | ./smoketest.sh 27 | popd -------------------------------------------------------------------------------- /tests/infrared/13/outputs/.KEEPIT: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infrawatch/service-telemetry-operator/b21be12e3da5fc020d4d4b80808b86e8b8e7484c/tests/infrared/13/outputs/.KEEPIT -------------------------------------------------------------------------------- /tests/infrared/13/stf-connectors.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | tripleo_heat_templates: 3 | [] 4 | 5 | custom_templates: 6 | # don't load collectd-write-qdr.yaml when using multi-cloud and instead load collectd service directly 7 | resource_registry: 8 | OS::TripleO::Services::Collectd: /usr/share/openstack-tripleo-heat-templates/docker/services/metrics/collectd.yaml 9 | 10 | # set parameter defaults to match stable-1.3 documentation 11 | parameter_defaults: 12 | CeilometerQdrEventsConfig: 13 | driver: amqp 14 | topic: <>-event 15 | CeilometerQdrMetricsConfig: 16 | driver: amqp 17 | topic: <>-metering 18 | CollectdAmqpInstances: 19 | <>-notify: 20 | format: JSON 21 | notify: true 22 | presettle: false 23 | <>-telemetry: 24 | format: JSON 25 | presettle: false 26 | MetricsQdrConnectors: 27 | - host: <> 28 | port: <> 29 | role: edge 30 | sslProfile: sslProfile 31 | verifyHostname: false 32 | MetricsQdrSSLProfiles: 33 | - name: sslProfile 34 | caCertFileContent: | 35 | <> 36 | 37 | -------------------------------------------------------------------------------- /tests/infrared/16.1/.gitignore: -------------------------------------------------------------------------------- 1 | outputs/** 2 | !outputs/.KEEPIT 3 | 4 | -------------------------------------------------------------------------------- /tests/infrared/16.1/enable-stf.yaml.template: -------------------------------------------------------------------------------- 1 | # enable data collection that is compatible with the STF data model 2 | # enablement of the collectors and transport are done with separate enviroment files 3 | # recommended: 4 | # - environments/metrics/ceilometer-qdr-write.yaml 5 | # - environments/metrics/collectd-qdr-write.yaml 6 | # - environments/metrics/qdr-edge-only.yaml 7 | 8 | --- 9 | tripleo_heat_templates: 10 | [] 11 | 12 | custom_templates: 13 | # matches the documentation for enable-stf.yaml in stable-1.3 documentation 14 | parameter_defaults: 15 | # only send to STF, not other publishers 16 | EventPipelinePublishers: [] 17 | PipelinePublishers: [] 18 | 19 | # manage the polling and pipeline configuration files for Ceilometer agents 20 | ManagePolling: true 21 | ManagePipeline: true 22 | 23 | # enable Ceilometer metrics and events 24 | CeilometerQdrPublishMetrics: true 25 | CeilometerQdrPublishEvents: true 26 | 27 | # enable collection of API status 28 | CollectdEnableSensubility: true 29 | CollectdSensubilityTransport: amqp1 30 | 31 | # enable collection of containerized service metrics 32 | CollectdEnableLibpodstats: true 33 | 34 | # set collectd overrides for higher telemetry resolution and extra plugins 35 | # to load 36 | CollectdConnectionType: amqp1 37 | CollectdAmqpInterval: 5 38 | CollectdDefaultPollingInterval: 5 39 | CollectdExtraPlugins: 40 | - vmem 41 | 42 | # set standard prefixes for where metrics and events are published to QDR 43 | MetricsQdrAddresses: 44 | - prefix: 'collectd' 45 | distribution: multicast 46 | - prefix: 'anycast/ceilometer' 47 | distribution: multicast 48 | 49 | ExtraConfig: 50 | ceilometer::agent::polling::polling_interval: 30 51 | ceilometer::agent::polling::polling_meters: 52 | - cpu 53 | - disk.* 54 | - ip.* 55 | - image.* 56 | - memory 57 | - memory.* 58 | - network.* 59 | - perf.* 60 | - port 61 | - port.* 62 | - switch 63 | - switch.* 64 | - storage.* 65 | - volume.* 66 | 67 | # to avoid filling the memory buffers if disconnected from the message bus 68 | collectd::plugin::amqp1::send_queue_limit: 50 69 | 70 | # receive extra information about virtual memory 71 | collectd::plugin::vmem::verbose: true 72 | 73 | # provide name and uuid in addition to hostname for better correlation 74 | # to ceilometer data 75 | collectd::plugin::virt::hostname_format: "name uuid hostname" 76 | 77 | # provide the human-friendly name of the virtual instance 78 | collectd::plugin::virt::plugin_instance_format: metadata 79 | 80 | # set memcached collectd plugin to report its metrics by hostname 81 | # rather than host IP, ensuring metrics in the dashboard remain uniform 82 | collectd::plugin::memcached::instances: 83 | local: 84 | host: "%{hiera('fqdn_canonical')}" 85 | port: 11211 86 | -------------------------------------------------------------------------------- /tests/infrared/16.1/gnocchi-connectors.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | tripleo_heat_templates: 3 | [] 4 | 5 | custom_templates: 6 | resource_registry: 7 | OS::TripleO::Services::GnocchiApi: /usr/share/openstack-tripleo-heat-templates/deployment/gnocchi/gnocchi-api-container-puppet.yaml 8 | OS::TripleO::Services::GnocchiMetricd: /usr/share/openstack-tripleo-heat-templates/deployment/gnocchi/gnocchi-metricd-container-puppet.yaml 9 | OS::TripleO::Services::GnocchiStatsd: /usr/share/openstack-tripleo-heat-templates/deployment/gnocchi/gnocchi-statsd-container-puppet.yaml 10 | OS::TripleO::Services::AodhApi: /usr/share/openstack-tripleo-heat-templates/deployment/aodh/aodh-api-container-puppet.yaml 11 | OS::TripleO::Services::AodhEvaluator: /usr/share/openstack-tripleo-heat-templates/deployment/aodh/aodh-evaluator-container-puppet.yaml 12 | OS::TripleO::Services::AodhNotifier: /usr/share/openstack-tripleo-heat-templates/deployment/aodh/aodh-notifier-container-puppet.yaml 13 | OS::TripleO::Services::AodhListener: /usr/share/openstack-tripleo-heat-templates/deployment/aodh/aodh-listener-container-puppet.yaml 14 | 15 | parameter_defaults: 16 | CeilometerEnableGnocchi: true 17 | CeilometerEnablePanko: false 18 | GnocchiArchivePolicy: 'high' 19 | GnocchiBackend: 'rbd' 20 | GnocchiRbdPoolName: 'metrics' 21 | 22 | EventPipelinePublishers: ['gnocchi://?filter_project=service'] 23 | PipelinePublishers: ['gnocchi://?filter_project=service'] 24 | 25 | -------------------------------------------------------------------------------- /tests/infrared/16.1/outputs/.KEEPIT: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infrawatch/service-telemetry-operator/b21be12e3da5fc020d4d4b80808b86e8b8e7484c/tests/infrared/16.1/outputs/.KEEPIT -------------------------------------------------------------------------------- /tests/infrared/16.1/stf-connectors.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | tripleo_heat_templates: 3 | [] 4 | 5 | custom_templates: 6 | # don't load collectd-write-qdr.yaml when using multi-cloud and instead load collectd service directly 7 | resource_registry: 8 | OS::TripleO::Services::Collectd: /usr/share/openstack-tripleo-heat-templates/deployment/metrics/collectd-container-puppet.yaml 9 | 10 | # set parameter defaults to match stable-1.3 documentation 11 | parameter_defaults: 12 | MetricsQdrConnectors: 13 | - host: <> 14 | port: <> 15 | role: edge 16 | verifyHostname: false 17 | sslProfile: sslProfile 18 | 19 | MetricsQdrSSLProfiles: 20 | - name: sslProfile 21 | 22 | CeilometerQdrEventsConfig: 23 | driver: amqp 24 | topic: <>-event 25 | 26 | CeilometerQdrMetricsConfig: 27 | driver: amqp 28 | topic: <>-metering 29 | 30 | CollectdAmqpInstances: 31 | <>-notify: 32 | format: JSON 33 | notify: true 34 | presettle: false 35 | <>-telemetry: 36 | format: JSON 37 | presettle: false 38 | 39 | CollectdSensubilityResultsChannel: sensubility/<>-telemetry 40 | 41 | # --- below here, extended configuration for environment beyond what is documented in stable-1.3 42 | CollectdSensubilityLogLevel: DEBUG 43 | CephStorageExtraConfig: 44 | tripleo::profile::base::metrics::collectd::amqp_host: "%{hiera('storage')}" 45 | tripleo::profile::base::metrics::qdr::listener_addr: "%{hiera('storage')}" 46 | 47 | ExtraConfig: 48 | collectd::plugin::ceph::daemons: 49 | - ceph-osd.0 50 | - ceph-osd.1 51 | - ceph-osd.2 52 | - ceph-osd.3 53 | - ceph-osd.4 54 | - ceph-osd.5 55 | - ceph-osd.6 56 | - ceph-osd.7 57 | - ceph-osd.8 58 | - ceph-osd.9 59 | - ceph-osd.10 60 | - ceph-osd.11 61 | - ceph-osd.12 62 | - ceph-osd.13 63 | - ceph-osd.14 64 | -------------------------------------------------------------------------------- /tests/infrared/16.2/.gitignore: -------------------------------------------------------------------------------- 1 | outputs/** 2 | !outputs/.KEEPIT 3 | 4 | -------------------------------------------------------------------------------- /tests/infrared/16.2/enable-stf.yaml.template: -------------------------------------------------------------------------------- 1 | # enable data collection that is compatible with the STF data model 2 | # enablement of the collectors and transport are done with separate enviroment files 3 | # recommended: 4 | # - environments/metrics/ceilometer-qdr-write.yaml 5 | # - environments/metrics/collectd-qdr-write.yaml 6 | # - environments/metrics/qdr-edge-only.yaml 7 | 8 | --- 9 | tripleo_heat_templates: 10 | [] 11 | 12 | custom_templates: 13 | # matches the documentation for enable-stf.yaml in stable-1.3 documentation 14 | parameter_defaults: 15 | # only send to STF, not other publishers 16 | EventPipelinePublishers: [] 17 | PipelinePublishers: [] 18 | 19 | # manage the polling and pipeline configuration files for Ceilometer agents 20 | ManagePolling: true 21 | ManagePipeline: true 22 | 23 | # enable Ceilometer metrics and events 24 | CeilometerQdrPublishMetrics: true 25 | CeilometerQdrPublishEvents: true 26 | 27 | # enable collection of API status 28 | CollectdEnableSensubility: true 29 | CollectdSensubilityTransport: amqp1 30 | 31 | # enable collection of containerized service metrics 32 | CollectdEnableLibpodstats: true 33 | 34 | # set collectd overrides for higher telemetry resolution and extra plugins 35 | # to load 36 | CollectdConnectionType: amqp1 37 | CollectdAmqpInterval: 5 38 | CollectdDefaultPollingInterval: 5 39 | CollectdExtraPlugins: 40 | - vmem 41 | 42 | # set standard prefixes for where metrics and events are published to QDR 43 | MetricsQdrAddresses: 44 | - prefix: 'collectd' 45 | distribution: multicast 46 | - prefix: 'anycast/ceilometer' 47 | distribution: multicast 48 | 49 | ExtraConfig: 50 | ceilometer::agent::polling::polling_interval: 30 51 | ceilometer::agent::polling::polling_meters: 52 | - cpu 53 | - disk.* 54 | - ip.* 55 | - image.* 56 | - memory 57 | - memory.* 58 | - network.* 59 | - perf.* 60 | - port 61 | - port.* 62 | - switch 63 | - switch.* 64 | - storage.* 65 | - volume.* 66 | 67 | # to avoid filling the memory buffers if disconnected from the message bus 68 | collectd::plugin::amqp1::send_queue_limit: 50 69 | 70 | # receive extra information about virtual memory 71 | collectd::plugin::vmem::verbose: true 72 | 73 | # provide name and uuid in addition to hostname for better correlation 74 | # to ceilometer data 75 | collectd::plugin::virt::hostname_format: "name uuid hostname" 76 | 77 | # provide the human-friendly name of the virtual instance 78 | collectd::plugin::virt::plugin_instance_format: metadata 79 | 80 | # set memcached collectd plugin to report its metrics by hostname 81 | # rather than host IP, ensuring metrics in the dashboard remain uniform 82 | collectd::plugin::memcached::instances: 83 | local: 84 | host: "%{hiera('fqdn_canonical')}" 85 | port: 11211 86 | -------------------------------------------------------------------------------- /tests/infrared/16.2/gnocchi-connectors.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | tripleo_heat_templates: 3 | [] 4 | 5 | custom_templates: 6 | resource_registry: 7 | OS::TripleO::Services::GnocchiApi: /usr/share/openstack-tripleo-heat-templates/deployment/gnocchi/gnocchi-api-container-puppet.yaml 8 | OS::TripleO::Services::GnocchiMetricd: /usr/share/openstack-tripleo-heat-templates/deployment/gnocchi/gnocchi-metricd-container-puppet.yaml 9 | OS::TripleO::Services::GnocchiStatsd: /usr/share/openstack-tripleo-heat-templates/deployment/gnocchi/gnocchi-statsd-container-puppet.yaml 10 | OS::TripleO::Services::AodhApi: /usr/share/openstack-tripleo-heat-templates/deployment/aodh/aodh-api-container-puppet.yaml 11 | OS::TripleO::Services::AodhEvaluator: /usr/share/openstack-tripleo-heat-templates/deployment/aodh/aodh-evaluator-container-puppet.yaml 12 | OS::TripleO::Services::AodhNotifier: /usr/share/openstack-tripleo-heat-templates/deployment/aodh/aodh-notifier-container-puppet.yaml 13 | OS::TripleO::Services::AodhListener: /usr/share/openstack-tripleo-heat-templates/deployment/aodh/aodh-listener-container-puppet.yaml 14 | 15 | parameter_defaults: 16 | CeilometerEnableGnocchi: true 17 | CeilometerEnablePanko: false 18 | GnocchiArchivePolicy: 'high' 19 | GnocchiBackend: 'rbd' 20 | GnocchiRbdPoolName: 'metrics' 21 | 22 | EventPipelinePublishers: ['gnocchi://?filter_project=service'] 23 | PipelinePublishers: ['gnocchi://?filter_project=service'] 24 | 25 | -------------------------------------------------------------------------------- /tests/infrared/16.2/outputs/.KEEPIT: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infrawatch/service-telemetry-operator/b21be12e3da5fc020d4d4b80808b86e8b8e7484c/tests/infrared/16.2/outputs/.KEEPIT -------------------------------------------------------------------------------- /tests/infrared/16.2/stf-connectors.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | tripleo_heat_templates: 3 | [] 4 | 5 | custom_templates: 6 | # don't load collectd-write-qdr.yaml when using multi-cloud and instead load collectd service directly 7 | resource_registry: 8 | OS::TripleO::Services::Collectd: /usr/share/openstack-tripleo-heat-templates/deployment/metrics/collectd-container-puppet.yaml 9 | 10 | # set parameter defaults to match stable-1.3 documentation 11 | parameter_defaults: 12 | MetricsQdrConnectors: 13 | - host: <> 14 | port: <> 15 | role: edge 16 | verifyHostname: false 17 | sslProfile: sslProfile 18 | 19 | MetricsQdrSSLProfiles: 20 | - name: sslProfile 21 | 22 | CeilometerQdrEventsConfig: 23 | driver: amqp 24 | topic: <>-event 25 | 26 | CeilometerQdrMetricsConfig: 27 | driver: amqp 28 | topic: <>-metering 29 | 30 | CollectdAmqpInstances: 31 | <>-notify: 32 | format: JSON 33 | notify: true 34 | presettle: false 35 | <>-telemetry: 36 | format: JSON 37 | presettle: false 38 | 39 | CollectdSensubilityResultsChannel: sensubility/<>-telemetry 40 | 41 | # --- below here, extended configuration for environment beyond what is documented in stable-1.3 42 | CollectdSensubilityLogLevel: DEBUG 43 | CephStorageExtraConfig: 44 | tripleo::profile::base::metrics::collectd::amqp_host: "%{hiera('storage')}" 45 | tripleo::profile::base::metrics::qdr::listener_addr: "%{hiera('storage')}" 46 | 47 | ExtraConfig: 48 | collectd::plugin::ceph::daemons: 49 | - ceph-osd.0 50 | - ceph-osd.1 51 | - ceph-osd.2 52 | - ceph-osd.3 53 | - ceph-osd.4 54 | - ceph-osd.5 55 | - ceph-osd.6 56 | - ceph-osd.7 57 | - ceph-osd.8 58 | - ceph-osd.9 59 | - ceph-osd.10 60 | - ceph-osd.11 61 | - ceph-osd.12 62 | - ceph-osd.13 63 | - ceph-osd.14 64 | -------------------------------------------------------------------------------- /tests/infrared/17.0/.gitignore: -------------------------------------------------------------------------------- 1 | outputs/** 2 | !outputs/.KEEPIT 3 | 4 | -------------------------------------------------------------------------------- /tests/infrared/17.0/enable-stf.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | tripleo_heat_templates: 3 | [] 4 | 5 | custom_templates: 6 | # matches the documentation for enable-stf.yaml in stable-1.3 documentation 7 | parameter_defaults: 8 | # only send to STF, not other publishers 9 | EventPipelinePublishers: [] 10 | PipelinePublishers: [] 11 | 12 | # manage the polling and pipeline configuration files for Ceilometer agents 13 | ManagePolling: true 14 | ManagePipeline: true 15 | 16 | # enable Ceilometer metrics and events 17 | CeilometerQdrPublishMetrics: true 18 | CeilometerQdrPublishEvents: true 19 | 20 | # enable collection of API status 21 | CollectdEnableSensubility: true 22 | CollectdSensubilityTransport: amqp1 23 | 24 | # enable collection of containerized service metrics 25 | CollectdEnableLibpodstats: true 26 | 27 | # set collectd overrides for higher telemetry resolution and extra plugins 28 | # to load 29 | CollectdConnectionType: amqp1 30 | CollectdAmqpInterval: 5 31 | CollectdDefaultPollingInterval: 5 32 | CollectdExtraPlugins: 33 | - vmem 34 | 35 | # set standard prefixes for where metrics and events are published to QDR 36 | MetricsQdrAddresses: 37 | - prefix: 'collectd' 38 | distribution: multicast 39 | - prefix: 'anycast/ceilometer' 40 | distribution: multicast 41 | 42 | ExtraConfig: 43 | ceilometer::agent::polling::polling_interval: 30 44 | ceilometer::agent::polling::polling_meters: 45 | - cpu 46 | - disk.* 47 | - ip.* 48 | - image.* 49 | - memory 50 | - memory.* 51 | - network.* 52 | - perf.* 53 | - port 54 | - port.* 55 | - switch 56 | - switch.* 57 | - storage.* 58 | - volume.* 59 | 60 | # to avoid filling the memory buffers if disconnected from the message bus 61 | collectd::plugin::amqp1::send_queue_limit: 50 62 | 63 | # receive extra information about virtual memory 64 | collectd::plugin::vmem::verbose: true 65 | 66 | # provide name and uuid in addition to hostname for better correlation 67 | # to ceilometer data 68 | collectd::plugin::virt::hostname_format: "name uuid hostname" 69 | 70 | # provide the human-friendly name of the virtual instance 71 | collectd::plugin::virt::plugin_instance_format: metadata 72 | 73 | # set memcached collectd plugin to report its metrics by hostname 74 | # rather than host IP, ensuring metrics in the dashboard remain uniform 75 | collectd::plugin::memcached::instances: 76 | local: 77 | host: "%{hiera('fqdn_canonical')}" 78 | port: 11211 79 | -------------------------------------------------------------------------------- /tests/infrared/17.0/gnocchi-connectors.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | tripleo_heat_templates: 3 | [] 4 | 5 | custom_templates: 6 | resource_registry: 7 | OS::TripleO::Services::GnocchiApi: /usr/share/openstack-tripleo-heat-templates/deployment/gnocchi/gnocchi-api-container-puppet.yaml 8 | OS::TripleO::Services::GnocchiMetricd: /usr/share/openstack-tripleo-heat-templates/deployment/gnocchi/gnocchi-metricd-container-puppet.yaml 9 | OS::TripleO::Services::GnocchiStatsd: /usr/share/openstack-tripleo-heat-templates/deployment/gnocchi/gnocchi-statsd-container-puppet.yaml 10 | OS::TripleO::Services::AodhApi: /usr/share/openstack-tripleo-heat-templates/deployment/aodh/aodh-api-container-puppet.yaml 11 | OS::TripleO::Services::AodhEvaluator: /usr/share/openstack-tripleo-heat-templates/deployment/aodh/aodh-evaluator-container-puppet.yaml 12 | OS::TripleO::Services::AodhNotifier: /usr/share/openstack-tripleo-heat-templates/deployment/aodh/aodh-notifier-container-puppet.yaml 13 | OS::TripleO::Services::AodhListener: /usr/share/openstack-tripleo-heat-templates/deployment/aodh/aodh-listener-container-puppet.yaml 14 | 15 | parameter_defaults: 16 | CeilometerEnableGnocchi: true 17 | CeilometerEnablePanko: false 18 | GnocchiArchivePolicy: 'high' 19 | GnocchiBackend: 'rbd' 20 | GnocchiRbdPoolName: 'metrics' 21 | 22 | EventPipelinePublishers: ['gnocchi://?filter_project=service'] 23 | PipelinePublishers: ['gnocchi://?filter_project=service'] 24 | 25 | -------------------------------------------------------------------------------- /tests/infrared/17.0/outputs/.KEEPIT: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infrawatch/service-telemetry-operator/b21be12e3da5fc020d4d4b80808b86e8b8e7484c/tests/infrared/17.0/outputs/.KEEPIT -------------------------------------------------------------------------------- /tests/infrared/17.0/stf-connectors.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | tripleo_heat_templates: 3 | [] 4 | 5 | custom_templates: 6 | # don't load collectd-write-qdr.yaml when using multi-cloud and instead load collectd service directly 7 | resource_registry: 8 | OS::TripleO::Services::Collectd: /usr/share/openstack-tripleo-heat-templates/deployment/metrics/collectd-container-puppet.yaml 9 | 10 | # set parameter defaults to match stable-1.3 documentation 11 | parameter_defaults: 12 | MetricsQdrConnectors: 13 | - host: <> 14 | port: <> 15 | role: edge 16 | verifyHostname: false 17 | sslProfile: sslProfile 18 | 19 | MetricsQdrSSLProfiles: 20 | - name: sslProfile 21 | caCertFileContent: | 22 | <> 23 | 24 | CeilometerQdrEventsConfig: 25 | driver: amqp 26 | topic: <>-event 27 | 28 | CeilometerQdrMetricsConfig: 29 | driver: amqp 30 | topic: <>-metering 31 | 32 | CollectdAmqpInstances: 33 | <>-notify: 34 | format: JSON 35 | notify: true 36 | presettle: false 37 | <>-telemetry: 38 | format: JSON 39 | presettle: false 40 | 41 | CollectdSensubilityResultsChannel: sensubility/<>-telemetry 42 | 43 | # --- below here, extended configuration for environment beyond what is documented in stable-1.3 44 | CollectdSensubilityLogLevel: DEBUG 45 | CephStorageExtraConfig: 46 | tripleo::profile::base::metrics::collectd::amqp_host: "%{hiera('storage')}" 47 | tripleo::profile::base::metrics::qdr::listener_addr: "%{hiera('storage')}" 48 | 49 | ExtraConfig: 50 | collectd::plugin::ceph::daemons: 51 | - ceph-osd.0 52 | - ceph-osd.1 53 | - ceph-osd.2 54 | - ceph-osd.3 55 | - ceph-osd.4 56 | - ceph-osd.5 57 | - ceph-osd.6 58 | - ceph-osd.7 59 | - ceph-osd.8 60 | - ceph-osd.9 61 | - ceph-osd.10 62 | - ceph-osd.11 63 | - ceph-osd.12 64 | - ceph-osd.13 65 | - ceph-osd.14 66 | -------------------------------------------------------------------------------- /tests/infrared/17.1/.gitignore: -------------------------------------------------------------------------------- 1 | outputs/** 2 | !outputs/.KEEPIT 3 | 4 | -------------------------------------------------------------------------------- /tests/infrared/17.1/README.md: -------------------------------------------------------------------------------- 1 | # Deployments 2 | 3 | ## Basic deployment 4 | 5 | A basic deployment can be deployed and connected to an existing STF deployment automatically after logging into the OpenShift cluster hosting STF from the host system. 6 | 7 | ### Prequisites 8 | 9 | * Logged into the host system where you'll deploy the virtualized OpenStack infrastructure. 10 | * Installed infrared and cloned the infrawatch/service-telemetry-operator repository. 11 | * DNS resolution (or `/etc/hosts` entry) of the OpenShift cluster API endpoint. 12 | * Downloaded the `oc` binary, made it executable, and placed in $PATH. 13 | * Logged into the OpenShift hosting STF and changed to the `service-telemetry` project from the host system. 14 | 15 | ### Procedure 16 | 17 | * Deploy the overcloud using the infrawatch-openstack.sh script: 18 | ```bash 19 | OCP_ROUTE_IP="10.0.111.41" \ 20 | CA_CERT_FILE_CONTENT="$(oc get secret/default-interconnect-selfsigned -o jsonpath='{.data.ca\.crt}' | base64 -d)" \ 21 | AMQP_HOST="$(oc get route default-interconnect-5671 -ojsonpath='{.spec.host}')" \ 22 | AMQP_PASS="$(oc get secret default-interconnect-users -o json | jq -r .data.guest | base64 -d)" \ 23 | ENABLE_STF_CONNECTORS=true \ 24 | ENABLE_GNOCCHI_CONNECTORS=false \ 25 | CONTROLLER_MEMORY="24000" \ 26 | COMPUTE_CPU="6" \ 27 | COMPUTE_MEMORY="24000" \ 28 | LIBVIRT_DISKPOOL="/home/libvirt/images" \ 29 | ./infrared-openstack.sh 30 | ``` 31 | 32 | ## Running a test workload 33 | 34 | You can run a test workload on the deployed overcloud by logging into the undercloud and completing some additional setup to allow for virtual machine workloads to run. 35 | 36 | ### Procedure 37 | 38 | * Login to the undercloud from the host system: 39 | ```bash 40 | ir ssh undercloud-0 41 | ``` 42 | * Complete the deployment of a private network, router, and other aspects to allow the virtual machine to be deployed: 43 | ```bash 44 | source overcloudrc 45 | export PRIVATE_NETWORK_CIDR=192.168.100.0/24 46 | openstack flavor create --ram 512 --disk 1 --vcpu 1 --public tiny 47 | curl -L -O https://download.cirros-cloud.net/0.5.0/cirros-0.5.0-x86_64-disk.img 48 | openstack image create cirros --container-format bare --disk-format qcow2 --public --file cirros-0.5.0-x86_64-disk.img 49 | openstack keypair create --public-key ~/.ssh/id_rsa.pub default 50 | openstack security group create basic 51 | openstack security group rule create basic --protocol tcp --dst-port 22:22 --remote-ip 0.0.0.0/0 52 | openstack security group rule create --protocol icmp basic 53 | openstack security group rule create --protocol udp --dst-port 53:53 basic 54 | openstack network create --internal private 55 | openstack subnet create private-net \ 56 | --subnet-range $PRIVATE_NETWORK_CIDR \ 57 | --network private 58 | openstack router create vrouter 59 | openstack router set vrouter --external-gateway public 60 | openstack router add subnet vrouter private-net 61 | openstack server create --flavor tiny --image cirros --key-name default --security-group basic --network private myserver 62 | until [ "$(openstack server list --name myserver --column Status --format value)" = "ACTIVE" ]; do echo "Waiting for server to be ACTIVE..."; sleep 10; done 63 | openstack server add floating ip myserver $(openstack floating ip create public --format json | jq .floating_ip_address | tr -d '"') 64 | openstack server list 65 | ``` 66 | -------------------------------------------------------------------------------- /tests/infrared/17.1/enable-stf.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | tripleo_heat_templates: 3 | [] 4 | 5 | custom_templates: 6 | # matches the documentation for enable-stf.yaml in stable-1.3 documentation 7 | parameter_defaults: 8 | # only send to STF, not other publishers 9 | PipelinePublishers: [] 10 | 11 | # manage the polling and pipeline configuration files for Ceilometer agents 12 | ManagePolling: true 13 | ManagePipeline: true 14 | ManageEventPipeline: false 15 | 16 | # enable Ceilometer metrics 17 | CeilometerQdrPublishMetrics: true 18 | 19 | # enable collection of API status 20 | CollectdEnableSensubility: true 21 | CollectdSensubilityTransport: amqp1 22 | 23 | # enable collection of containerized service metrics 24 | CollectdEnableLibpodstats: true 25 | 26 | # set collectd overrides for higher telemetry resolution and extra plugins 27 | # to load 28 | CollectdConnectionType: amqp1 29 | CollectdAmqpInterval: 30 30 | CollectdDefaultPollingInterval: 30 31 | CollectdExtraPlugins: 32 | - vmem 33 | 34 | # set standard prefixes for where metrics are published to QDR 35 | MetricsQdrAddresses: 36 | - prefix: 'collectd' 37 | distribution: multicast 38 | - prefix: 'anycast/ceilometer' 39 | distribution: multicast 40 | 41 | ExtraConfig: 42 | ceilometer::agent::polling::polling_interval: 30 43 | ceilometer::agent::polling::polling_meters: 44 | - cpu 45 | - memory.usage 46 | 47 | # to avoid filling the memory buffers if disconnected from the message bus 48 | # note: this may need an adjustment if there are many metrics to be sent. 49 | collectd::plugin::amqp1::send_queue_limit: 5000 50 | 51 | # receive extra information about virtual memory 52 | collectd::plugin::vmem::verbose: true 53 | 54 | # provide name and uuid in addition to hostname for better correlation 55 | # to ceilometer data 56 | collectd::plugin::virt::hostname_format: "name uuid hostname" 57 | 58 | # provide the human-friendly name of the virtual instance 59 | collectd::plugin::virt::plugin_instance_format: metadata 60 | 61 | # set memcached collectd plugin to report its metrics by hostname 62 | # rather than host IP, ensuring metrics in the dashboard remain uniform 63 | collectd::plugin::memcached::instances: 64 | local: 65 | host: "%{hiera('fqdn_canonical')}" 66 | port: 11211 67 | -------------------------------------------------------------------------------- /tests/infrared/17.1/extra-hosts.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | tripleo_heat_templates: 3 | [] 4 | 5 | custom_templates: 6 | parameter_defaults: 7 | ExtraHostFileEntries: 8 | - '<>' 9 | 10 | -------------------------------------------------------------------------------- /tests/infrared/17.1/gnocchi-connectors.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | tripleo_heat_templates: 3 | [] 4 | 5 | custom_templates: 6 | resource_registry: 7 | OS::TripleO::Services::GnocchiApi: /usr/share/openstack-tripleo-heat-templates/deployment/gnocchi/gnocchi-api-container-puppet.yaml 8 | OS::TripleO::Services::GnocchiMetricd: /usr/share/openstack-tripleo-heat-templates/deployment/gnocchi/gnocchi-metricd-container-puppet.yaml 9 | OS::TripleO::Services::GnocchiStatsd: /usr/share/openstack-tripleo-heat-templates/deployment/gnocchi/gnocchi-statsd-container-puppet.yaml 10 | OS::TripleO::Services::AodhApi: /usr/share/openstack-tripleo-heat-templates/deployment/aodh/aodh-api-container-puppet.yaml 11 | OS::TripleO::Services::AodhEvaluator: /usr/share/openstack-tripleo-heat-templates/deployment/aodh/aodh-evaluator-container-puppet.yaml 12 | OS::TripleO::Services::AodhNotifier: /usr/share/openstack-tripleo-heat-templates/deployment/aodh/aodh-notifier-container-puppet.yaml 13 | OS::TripleO::Services::AodhListener: /usr/share/openstack-tripleo-heat-templates/deployment/aodh/aodh-listener-container-puppet.yaml 14 | 15 | parameter_defaults: 16 | CeilometerEnableGnocchi: true 17 | CeilometerEnablePanko: false 18 | GnocchiArchivePolicy: 'high' 19 | GnocchiBackend: 'rbd' 20 | GnocchiRbdPoolName: 'metrics' 21 | 22 | EventPipelinePublishers: ['gnocchi://?filter_project=service'] 23 | PipelinePublishers: ['gnocchi://?filter_project=service'] 24 | 25 | -------------------------------------------------------------------------------- /tests/infrared/17.1/outputs/.KEEPIT: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infrawatch/service-telemetry-operator/b21be12e3da5fc020d4d4b80808b86e8b8e7484c/tests/infrared/17.1/outputs/.KEEPIT -------------------------------------------------------------------------------- /tests/infrared/17.1/stf-connectors.yaml.template: -------------------------------------------------------------------------------- 1 | --- 2 | tripleo_heat_templates: 3 | [] 4 | 5 | custom_templates: 6 | resource_registry: 7 | OS::TripleO::Services::Collectd: /usr/share/openstack-tripleo-heat-templates/deployment/metrics/collectd-container-puppet.yaml 8 | 9 | parameter_defaults: 10 | ExtraConfig: 11 | qdr::router_id: "%{::hostname}.<>" 12 | 13 | MetricsQdrConnectors: 14 | - host: <> 15 | port: <> 16 | role: edge 17 | verifyHostname: false 18 | sslProfile: sslProfile 19 | saslUsername: guest@default-interconnect 20 | saslPassword: pass:<> 21 | 22 | MetricsQdrSSLProfiles: 23 | - name: sslProfile 24 | caCertFileContent: | 25 | <> 26 | 27 | CeilometerQdrMetricsConfig: 28 | driver: amqp 29 | topic: <>-metering 30 | 31 | CollectdAmqpInstances: 32 | <>-telemetry: 33 | format: JSON 34 | presettle: false 35 | 36 | CollectdSensubilityResultsChannel: sensubility/<>-telemetry 37 | 38 | # --- below here, extended configuration for environment beyond what is documented in stable-1.3 39 | CollectdSensubilityLogLevel: DEBUG 40 | -------------------------------------------------------------------------------- /tests/infrared/README.md: -------------------------------------------------------------------------------- 1 | # Integration tests 2 | 3 | Use Infrared (and optionally minishift) to test a simple OSP cluster connected 4 | to an STF instance all on one (large) baremetal machine. 5 | 6 | ## Usage 7 | 8 | 1. Have `ir --version` working with at least these plugins: 9 | * virsh tripleo-undercloud tripleo-overcloud cloud-config tempest 10 | 1. Set VIRTHOST and have key based SSH access to root@$VIRTHOST 11 | 1. Set AMQP_HOST and AMQP_PORT 12 | 1. (OSP 13/17) `export CA_CERT_FILE_CONTENT=$(oc get secret/default-interconnect-selfsigned -o jsonpath='{.data.ca\.crt}' | base64 -d)` 13 | 1. Run `infrared-openstack.sh` to install OSP on $VIRTHOST 14 | 15 | ## Verification 16 | 17 | Once the deployment is complete, you can check prometheus for data, like so: 18 | 19 | ```shells 20 | $ PROM_HOST=$(oc get route default-prometheus-proxy -o jsonpath='{.spec.host}') 21 | $ curl "http://${PROM_HOST}/api/v1/query?query=collectd_uptime\[10s\]" 22 | {"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"collectd_uptime","endpoint":"prom-http","host":"compute-0.localdomain","service":"white-smartgateway","type":"base","uptime":"base"},"values":[[1566500715.207,"88719"],[1566500716.214,"88720"],[1566500717.207,"88721"],[1566500718.207,"88722"],[1566500720.207,"88724"],[1566500721.207,"88725"],[1566500722.207,"88726"],[1566500723.207,"88727"]]},{"metric":{"__name__":"collectd_uptime","endpoint":"prom-http","host":"controller-0.localdomain","service":"white-smartgateway","type":"base","uptime":"base"},"values":[[1566500715.207,"88700"],[1566500717.207,"88701"],[1566500718.207,"88702"],[1566500719.209,"88703"],[1566500721.207,"88704"],[1566500723.207,"88705"]]}]}} 23 | ``` 24 | 25 | You should be seeing samples for both compute-0 and controller-0 26 | 27 | ## Versions 28 | 29 | Tested with the following versions: 30 | 31 | * infrared 32 | * 2.0.1.dev3952 (ansible-2.7.16, python-2.7.5) 33 | * "Virthost" 34 | * RHEL 7.6 35 | 36 | ## TODO 37 | * Stamp out the few remaining IP addresses and RH internal defaults 38 | * Get the crc on VIRTHOST scenario working 39 | * Automated verification script 40 | -------------------------------------------------------------------------------- /tests/infrared/crc-stf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function check_prerequisites() { 4 | if [ ! -f ~/.crc/pull-secret ]; then 5 | echo "Please create a file 'pull-secret' in ~/.crc containing the" 6 | echo "pull secret obtained from" 7 | echo "https://cloud.redhat.com/openshift/install/crc/installer-provisioned" 8 | echo "and try again." 9 | exit 1 10 | fi 11 | 12 | command -v crc >/dev/null 2>&1 || { echo >&2 "Please install crc from https://cloud.redhat.com/openshift/install/crc/installer-provisioned and restart."; exit 1; } 13 | 14 | } 15 | 16 | check_prerequisites 17 | crc delete --force 18 | 19 | # takes probably a lot shorter 20 | sleep 60 21 | 22 | crc setup 23 | crc start --memory 49152 --cpus 8 --pull-secret-file ~/.crc/pull-secret 24 | 25 | # oc startup can take some time 26 | sleep 60 27 | 28 | # scale up internal cluster monitoring 29 | oc login -u kubeadmin -p `cat ~/.crc/cache/*/kubeadmin-password` https://api.crc.testing:6443 30 | oc scale --replicas=1 statefulset --all -n openshift-monitoring; oc scale --replicas=1 deployment --all -n openshift-monitoring 31 | 32 | cd ../deploy && ./quickstart.sh 33 | -------------------------------------------------------------------------------- /tests/performance-test/README.md: -------------------------------------------------------------------------------- 1 | Ensure STF is installed with metrics and graphing enable and make sure an instance of Grafana is running in the STF namespace 2 | 3 | Create resources 4 | ``` 5 | OCP_PASS=$(oc get secret -n openshift-monitoring grafana-datasources -ojsonpath='{.data.prometheus\.yaml}' | base64 -d | jq -r .datasources[0].basicAuthPassword) 6 | 7 | sed "s/OCP_PASS/$OC_PASS/" deploy/datasource.yaml | oc create -f - 8 | 9 | oc create -f deploy/qdr-servicemonitor.yml \ 10 | -f dashboards/perftest-dashboard.yaml 11 | ``` 12 | 13 | Run Test 14 | ``` 15 | ./run.sh -c 10000000 16 | ``` 17 | 18 | View results in the grafana dashboard 19 | -------------------------------------------------------------------------------- /tests/performance-test/deploy/datasources.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: integreatly.org/v1alpha1 2 | kind: GrafanaDataSource 3 | metadata: 4 | name: performance-test-datasource 5 | namespace: service-telemetry 6 | spec: 7 | datasources: 8 | - name: OCPPrometheus 9 | type: prometheus 10 | access: proxy 11 | orgId: 1 12 | url: "https://prometheus-k8s.openshift-monitoring.svc:9091" 13 | database: "" 14 | basicAuth: true 15 | basicAuthUser: "internal" 16 | basicAuthPassword: OCP_PASS 17 | withCredentials: false 18 | isDefault: false 19 | jsonData: 20 | tlsSkipVerify: true 21 | timeInterval: "1s" 22 | secureJsonData: {} 23 | version: 3 24 | editable: true 25 | name: performance-test-datasources 26 | -------------------------------------------------------------------------------- /tests/performance-test/deploy/qdr-servicemonitor.yml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | name: 'stf-default-interconnect' 5 | labels: 6 | app: smart-gateway 7 | spec: 8 | selector: 9 | matchLabels: 10 | application: stf-default-interconnect 11 | endpoints: 12 | - port: "8672" 13 | interval: 10s 14 | 15 | --- 16 | apiVersion: monitoring.coreos.com/v1 17 | kind: ServiceMonitor 18 | metadata: 19 | name: 'qdr-test' 20 | labels: 21 | app: smart-gateway 22 | spec: 23 | selector: 24 | matchLabels: 25 | application: qdr-test 26 | endpoints: 27 | - port: "8672" 28 | interval: 10s -------------------------------------------------------------------------------- /tests/performance-test/images/dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/infrawatch/service-telemetry-operator/b21be12e3da5fc020d4d4b80808b86e8b8e7484c/tests/performance-test/images/dashboard.png -------------------------------------------------------------------------------- /tests/performance-test/job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: generator 5 | spec: 6 | parallelism: 1 7 | completions: 1 8 | spec: 9 | template: 10 | metadata: 11 | labels: 12 | application: generator 13 | spec: 14 | restartPolicy: Never 15 | containers: 16 | - name: generator 17 | #image: image-registry.openshift-image-registry.svc:5000/service-telemetry/generator:latest 18 | image: quay.io/infrawatch/generator:latest 19 | command: [ '/gen', '-i','generator','SLEEP','-c',"COUNT",'-t','10','-o','5','-m','100','-a','collectd/telemetry', 'default-interconnect.service-telemetry.svc', '5672'] 20 | resources: 21 | limits: 22 | memory: "1Gi" 23 | 24 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/Dockerfile: -------------------------------------------------------------------------------- 1 | #--- Build STF performance test --- 2 | FROM golang:1.13 3 | WORKDIR /go/src/performance-test/ 4 | 5 | COPY ./main.go ./parser.go ./ 6 | 7 | RUN go get gopkg.in/yaml.v2 && \ 8 | go get github.com/grafana-tools/sdk && \ 9 | go build -o main && \ 10 | mv main /tmp/ 11 | 12 | #--- Create performance test layer --- 13 | FROM tripleomaster/centos-binary-collectd:current-tripleo-rdo 14 | USER root 15 | 16 | RUN yum install golang -y && \ 17 | yum update-minimal --security -y && \ 18 | #issue with full update: 19 | #https://github.com/infrawatch/telemetry-framework/issues/81 20 | yum clean all 21 | 22 | COPY --from=0 /tmp/main /performance-test/exec/main 23 | COPY deploy/scripts/launch-test.sh /performance-test/exec/launch-test.sh 24 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/README.md: -------------------------------------------------------------------------------- 1 | # STF Performance Test 2 | 3 | ## Introduction 4 | 5 | The performance test provides an automated environment in which to to run stress 6 | tests on STF. Collectd-tg or telemetry-bench are used to simulate extensive 7 | metrics data to pump through STF. Results of testing can be analyzed in a 8 | grafana dashboard. 9 | 10 | Two additional pods are deployed by the performance test: one that hosts a 11 | grafana instance and one that executes the testing logic. 12 | 13 | ![A Performance Test Dashboard](../images/dashboard.png) 14 | 15 | ## Environment 16 | 17 | * openshift v4.2.7 18 | 19 | ## Setup 20 | 21 | STF must already be deployed including the default ServiceTelemetry example CR. 22 | A quick way to do this is using the `quickstart.sh` script in 23 | `service-telemetry-operator/deploy/` directory to run STF. 24 | 25 | Here is an example of how to do that in crc: 26 | 27 | ```shell 28 | crc start 29 | eval $(crc oc-env) 30 | cd service-telemetry-operator/deploy/; ./quickstart.sh 31 | ``` 32 | 33 | ## Deploying Grafana 34 | 35 | Ensure that all of the STF pods are already marked running with `oc get pods`. 36 | Next, launch the grafana instance for test results gathering. This only needs 37 | to be done once: 38 | 39 | ```shell 40 | cd service-telemetry-operator/tests/performance-test/grafana 41 | ./grafana-launcher.sh 42 | ``` 43 | 44 | The grafana launcher script will output a URL that can be used to log into the 45 | dashboard. This Grafana instance has all authentication disabled - if, in the 46 | future, the performance test should report to an authenticated grafana instance, 47 | the test scripts must be modified. 48 | 49 | ## Launching the test 50 | 51 | Once the Grafana instance is running, launch the performance test OpenShift job: 52 | 53 | ```shell 54 | ./performance-test.sh 55 | ``` 56 | 57 | Monitor the performance test status by watching the job with 58 | `oc get job -l app=stf-performance-test -w`. Logs can be viewed with 59 | `oc logs stf-perftest--runner- -f` 60 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/deploy/config/minimal-collectd.conf: -------------------------------------------------------------------------------- 1 | Interval 1 2 | 3 | LoadPlugin "log_logstash" 4 | 5 | LogLevel "notice" 6 | File "/tmp/events.json" 7 | 8 | 9 | LoadPlugin cpu 10 | LoadPlugin amqp1 11 | 12 | 13 | Host "qdr-test" 14 | Port "5672" 15 | Address "collectd" 16 | 17 | Format JSON 18 | PreSettle false 19 | Notify true 20 | 21 | 22 | 23 | 24 | LoadPlugin interface 25 | 26 | IgnoreSelected true 27 | ReportInactive true 28 | 29 | 30 | LoadPlugin threshold 31 | 32 | 33 | Instance "lo" 34 | 35 | FailureMax 0 36 | DataSource "rx" 37 | Persist true 38 | PersistOK true 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/deploy/config/test-configs.yml: -------------------------------------------------------------------------------- 1 | - metadata: 2 | name: STF Performance Test 1 3 | spec: 4 | value-lists: 20000 5 | hosts: 20 6 | plugins: 1000 7 | interval: 1 8 | length: 180 9 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/deploy/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | /usr/sbin/collectd -C /tmp/minimal-collectd.conf -f 2>&1 | tee /tmp/collectd_output 4 | 5 | sleep 5 -------------------------------------------------------------------------------- /tests/performance-test/legacy/deploy/performance-test-job-events.yml.template: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: stf-perftest-notify 5 | spec: 6 | parallelism: 1 7 | completions: 1 8 | spec: 9 | template: 10 | metadata: 11 | labels: 12 | app: stf-performance-test 13 | spec: 14 | restartPolicy: Never 15 | containers: 16 | - name: stf-perftest-notify 17 | image: tripleomaster/centos-binary-collectd:current-tripleo-rdo 18 | command: 19 | - /entrypoint.sh 20 | volumeMounts: 21 | - name: entrypoint 22 | mountPath: /entrypoint.sh 23 | subPath: entrypoint.sh 24 | - name: collectd-config 25 | mountPath: /tmp/minimal-collectd.conf 26 | subPath: minimal-collectd.conf 27 | volumes: 28 | - name: entrypoint 29 | configMap: 30 | name: stf-performance-test-events-entry 31 | defaultMode: 0777 32 | - name: collectd-config 33 | configMap: 34 | name: stf-performance-test-collectd-config 35 | 36 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/deploy/performance-test-job-tb.yml.template: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: <>runner 5 | labels: 6 | app: stf-performance-test 7 | spec: 8 | parallelism: 1 9 | completions: 1 10 | template: 11 | metadata: 12 | labels: 13 | app: stf-performance-test 14 | spec: 15 | restartPolicy: Never 16 | containers: 17 | - name: performance-test 18 | image: quay.io/infrawatch/telemetry-bench 19 | imagePullPolicy: Always 20 | args: ["-hostprefix", "<>", "-hosts", "<>", "-plugins", "<>", "-instances", "1", "-send", "<>", "-interval", "<>", "-startmetricenable", "-verbose", "amqp://stf-default-interconnect:5672/collectd/telemetry/"] 21 | affinity: 22 | podAntiAffinity: 23 | preferredDuringSchedulingIgnoredDuringExecution: 24 | - weight: 100 25 | podAffinityTerm: 26 | labelSelector: 27 | matchExpressions: 28 | - key: app 29 | operator: In 30 | values: 31 | - stf-performance-test 32 | - smart-gateway 33 | topologyKey: kubernetes.io/hostname 34 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/deploy/performance-test-tb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | COUNT=${COUNT:-180} 4 | HOSTS=${HOSTS:-20} 5 | PLUGINS=${PLUGINS:-1000} 6 | INTERVAL=${INTERVAL:-1} 7 | CONCURRENT=${CONCURRENT:-1} 8 | 9 | oc delete job -l app=stf-performance-test || true 10 | 11 | oc delete configmap/stf-performance-test-collectd-config \ 12 | configmap/stf-performance-test-events-entry \ 13 | job/stf-perftest-notify || true 14 | 15 | oc create configmap stf-performance-test-collectd-config --from-file \ 16 | ./config/minimal-collectd.conf 17 | 18 | oc create configmap stf-performance-test-events-entry --from-file \ 19 | ./entrypoint.sh 20 | 21 | oc create -f ./performance-test-job-events.yml.template 22 | 23 | for i in $(seq 1 ${CONCURRENT}); do 24 | oc create -f <(sed -e "s/<>/stf-perftest-${i}-/g; 25 | s/<>/${COUNT}/g; 26 | s/<>/${HOSTS}/g; 27 | s/<>/${PLUGINS}/g; 28 | s/<>/${INTERVAL}/g"\ 29 | performance-test-job-tb.yml.template) 30 | done 31 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/deploy/prom-servicemonitor.yml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | name: prometheus 5 | spec: 6 | selector: 7 | matchLabels: 8 | operated-prometheus: "true" 9 | endpoints: 10 | - port: web 11 | interval: 1s 12 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/deploy/qdr-servicemonitor.yml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | name: default-interconnect 5 | namespace: service-telemetry 6 | labels: 7 | app: smart-gateway 8 | spec: 9 | selector: 10 | matchLabels: 11 | application: default-interconnect 12 | interconnect_cr: default-interconnect 13 | endpoints: 14 | - port: "8672" 15 | interval: 10s 16 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/deploy/qdrouterd.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: interconnectedcloud.github.io/v1alpha1 2 | kind: Interconnect 3 | metadata: 4 | name: qdr-test 5 | spec: 6 | deploymentPlan: 7 | role: edge 8 | size: 1 9 | placement: Any 10 | addresses: 11 | - prefix: closest 12 | distribution: closest 13 | - prefix: multicast 14 | distribution: multicast 15 | - prefix: unicast 16 | distribution: closest 17 | - prefix: exclusive 18 | distribution: closest 19 | - prefix: broadcast 20 | distribution: multicast 21 | - prefix: collectd 22 | distribution: multicast 23 | edgeListeners: 24 | - host: "0.0.0.0" 25 | port: 5671 26 | expose: true 27 | listeners: 28 | - port: 5672 29 | - port: 8672 30 | http: true 31 | edgeConnectors: 32 | - name: router 33 | host: stf-default-interconnect 34 | port: 5671 35 | verifyHostname: no 36 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/docker-push.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #Automates creating and pushing new performance test image to openshift registry 4 | PROJECT="$(oc project -q)" 5 | DOCKER_IMAGE="$(oc get route docker-registry -n default -o jsonpath='{.spec.host}')/${PROJECT}/performance-test:dev" 6 | 7 | docker build -t "$DOCKER_IMAGE" . 8 | 9 | oc delete is performance-test:dev 10 | docker push "$DOCKER_IMAGE" 11 | 12 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/grafana/datasource.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | datasources: 4 | - name: STFElasticsearch 5 | type: elasticsearch 6 | access: proxy 7 | orgId: 1 8 | url: "https://elasticsearch-es-http:9200" 9 | database: "performance-test" 10 | basicAuth: true 11 | basicAuthUser: elastic 12 | basicAuthPassword: ${ES_PASS} 13 | withCredentials: false 14 | isDefault: false 15 | jsonData: 16 | esVersion: 5 17 | keepCookies: [] 18 | maxConcurrentShardRequests: 256 19 | timeField: "startsAt" 20 | tlsAuth: false 21 | tlsAuthWithCACert: true 22 | secureJsonData: 23 | tlsCACert: ${CACERT} 24 | version: 1 25 | editable: true 26 | 27 | - name: STFPrometheus 28 | type: prometheus 29 | access: proxy 30 | orgId: 1 31 | url: "http://prometheus-operated:9090" 32 | database: "" 33 | basicAuth: false 34 | withCredentials: false 35 | isDefault: false 36 | jsonData: 37 | timeInterval: "1s" 38 | secureJsonData: {} 39 | version: 2 40 | editable: true 41 | 42 | - name: OCPPrometheus 43 | type: prometheus 44 | access: proxy 45 | orgId: 1 46 | url: "https://prometheus-k8s.openshift-monitoring.svc:9091" 47 | database: "" 48 | basicAuth: true 49 | basicAuthUser: "internal" 50 | basicAuthPassword: <> 51 | withCredentials: false 52 | isDefault: false 53 | jsonData: 54 | tlsSkipVerify: true 55 | timeInterval: "1s" 56 | secureJsonData: {} 57 | version: 3 58 | editable: true 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/grafana/grafana-deploy.yml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: grafana-deployment 5 | labels: 6 | app: grafana 7 | spec: 8 | replicas: 1 9 | selector: 10 | matchLabels: 11 | app: grafana 12 | template: 13 | metadata: 14 | labels: 15 | app: grafana 16 | spec: 17 | containers: 18 | - name: grafana 19 | image: grafana/grafana 20 | ports: 21 | - containerPort: 3000 22 | env: 23 | - name: CACERT 24 | valueFrom: 25 | secretKeyRef: 26 | name: elasticsearch-es-http-certs-public 27 | key: ca.crt 28 | - name: ES_PASS 29 | valueFrom: 30 | secretKeyRef: 31 | name: elasticsearch-es-elastic-user 32 | key: elastic 33 | volumeMounts: 34 | - name: grafana-config 35 | mountPath: /etc/grafana/grafana.ini 36 | subPath: grafana.ini 37 | - name: datasources 38 | mountPath: /etc/grafana/provisioning/datasources/datasource.yaml 39 | subPath: datasource.yaml 40 | volumes: 41 | - name: grafana-config 42 | configMap: 43 | name: grafana-config 44 | - name: datasources 45 | configMap: 46 | name: datasources-config 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/grafana/grafana-launcher.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Launches a grafana pod with a pre-determined graph format and exposes routes 4 | # Important: this grafana instance is initialized with Admin permissions on the anonymous user 5 | # That is, authentication is disabled 6 | 7 | oc delete svc/grafana \ 8 | route/grafana \ 9 | deployment/grafana-deployment \ 10 | configmap/grafana-config \ 11 | configmap/datasources-config 12 | 13 | if ! oc get project openshift-monitoring; then 14 | echo "Error: openshift monitoring does not exist in cluster. Make sure monitoring is enabled" 1>&2 15 | exit 1 16 | fi 17 | 18 | OCP_PASS=$(oc get secret -n openshift-monitoring grafana-datasources -o jsonpath='{.data.prometheus\.yaml}' | base64 -d | 19 | python -c "import sys, json; print json.load(sys.stdin)['datasources'][0]['basicAuthPassword']") 20 | sed -e "s|<>|${OCP_PASS}|g" ./datasource.yaml > /tmp/datasource.yaml 21 | 22 | oc create configmap grafana-config --from-file grafana.ini 23 | oc create configmap datasources-config --from-file /tmp/datasource.yaml 24 | oc create -f grafana-service.yml 25 | oc create -f grafana-route.yml 26 | oc create -f grafana-deploy.yml 27 | 28 | # First the deployment will say it's Running! 29 | oc rollout status deployment/grafana-deployment 30 | 31 | # Then the pod will say it's ready (it will be a lie) 32 | while oc get pod -l app=grafana -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}' | grep False; do 33 | oc get pod -l app=grafana 34 | sleep 3 35 | done 36 | 37 | # Finally the logs will say it's listening - Hi Grafana! 38 | pod=$(oc get pod -l app=grafana -o jsonpath='{.items[0].metadata.name}') 39 | echo -n "Waiting for grafana to be listening" 40 | while ! oc logs $pod | grep -o "HTTP Server Listen"; do echo ...; done 41 | 42 | if ! GRAF_HOST=$(oc get routes --field-selector metadata.name=grafana -o jsonpath="{.items[0].spec.host}") 2> /dev/null; then 43 | echo "Error: cannot find Grafana instance in cluster." 1>&2 44 | exit 1 45 | fi 46 | 47 | printf "\n*** Creating new dashboards in Grafana ***\n" 48 | curl -d "{\"overwrite\": true, \"dashboard\": $(cat perftest-dashboard.json)}" \ 49 | -H 'Content-Type: application/json' "$GRAF_HOST/api/dashboards/db" 50 | echo 51 | curl -d "{\"overwrite\": true, \"dashboard\": $(cat prom2-dashboard.json)}" \ 52 | -H 'Content-Type: application/json' "$GRAF_HOST/api/dashboards/db" 53 | 54 | printf "\nGraphing dashboard available at: \n" 55 | oc get routes --field-selector metadata.name=grafana -o jsonpath="{.items[0].spec.host}" 56 | printf "\n" 57 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/grafana/grafana-route.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Route 3 | metadata: 4 | name: grafana 5 | labels: 6 | app: performance-test 7 | spec: 8 | to: 9 | kind: Service 10 | name: grafana 11 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/grafana/grafana-service.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: grafana 5 | spec: 6 | selector: 7 | app: grafana 8 | ports: 9 | - protocol: TCP 10 | port: 3000 11 | targetPort: 3000 12 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/grafana/grafana.ini: -------------------------------------------------------------------------------- 1 | [auth] 2 | disable_login_form = false 3 | 4 | [auth.basic] 5 | enabled = false 6 | 7 | [auth.anonymous] 8 | enabled = true 9 | org_name = Main Org. 10 | org_role = Admin 11 | -------------------------------------------------------------------------------- /tests/performance-test/legacy/parser.go: -------------------------------------------------------------------------------- 1 | /* 2 | * parser --- Paul Leimer --- 19 July 2019 3 | * Reads in and builds test configuration objects 4 | */ 5 | 6 | package main 7 | 8 | import ( 9 | "gopkg.in/yaml.v2" 10 | "io/ioutil" 11 | "strconv" 12 | "time" 13 | ) 14 | 15 | var ipaddr string = "127.0.0.1" 16 | var args []string = []string{} 17 | 18 | // test type holds test configuration data 19 | type test struct { 20 | Metadata struct { 21 | Name string `yaml:"name"` 22 | } 23 | Spec struct { 24 | Valuelists uint64 `yaml:"value-lists"` 25 | Hosts uint64 `yaml:"hosts"` 26 | Plugins uint64 `yaml:"plugins"` 27 | Interval uint64 `yaml:"interval"` 28 | Length uint64 `yaml:"length"` 29 | Queries []string `yaml:"queries"` 30 | } 31 | } 32 | 33 | type Parser struct { 34 | tests []test 35 | } 36 | 37 | //LoadTests loads in a test onfiguration file for parsing 38 | func (p *Parser) LoadTests(fn string) error { 39 | yamlFile, err := ioutil.ReadFile(fn) 40 | if err != nil { 41 | 42 | return err 43 | } 44 | 45 | err = yaml.UnmarshalStrict(yamlFile, &p.tests) 46 | if err != nil { 47 | return err 48 | } 49 | return nil 50 | } 51 | 52 | // ArgStrings generates the correct parameters for a unit test from the test configuration 53 | func (p Parser) ArgStrings(t test) []string { 54 | retArgs := []string{} 55 | 56 | if t.Spec.Valuelists != 0 { 57 | retArgs = append(retArgs, "-n") 58 | retArgs = append(retArgs, strconv.FormatUint(t.Spec.Valuelists, 10)) 59 | } 60 | 61 | if t.Spec.Hosts != 0 { 62 | retArgs = append(retArgs, "-H") 63 | retArgs = append(retArgs, strconv.FormatUint(t.Spec.Hosts, 10)) 64 | } 65 | 66 | if t.Spec.Plugins != 0 { 67 | retArgs = append(retArgs, "-p") 68 | retArgs = append(retArgs, strconv.FormatUint(t.Spec.Plugins, 10)) 69 | } 70 | 71 | if t.Spec.Interval != 0 { 72 | retArgs = append(retArgs, "-i") 73 | retArgs = append(retArgs, strconv.FormatUint(t.Spec.Interval, 10)) 74 | } 75 | 76 | //using default collectd network port 77 | retArgs = append(retArgs, []string{"-d", ipaddr, "-l", strconv.FormatUint(t.Spec.Length, 10)}...) 78 | 79 | return retArgs 80 | } 81 | 82 | // GetTimes returns time duration for test as time objects in UTC. 83 | // Calculates times relative to now 84 | func (p Parser) GetTimes(testIndex int) (time.Time, time.Time) { 85 | test := p.tests[testIndex] 86 | start := time.Now() 87 | end := start.Add(time.Second * time.Duration(test.Spec.Length)) 88 | 89 | return start, end 90 | } 91 | 92 | // Tests provides access to a list of test configuration objects 93 | func (p Parser) Tests() []test { 94 | return p.tests 95 | } 96 | -------------------------------------------------------------------------------- /tests/performance-test/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | COUNT=1000000 4 | SLEEP=0 5 | ADDRESS="127.0.0.1" 6 | PORT="5672" 7 | 8 | function usage { 9 | cat << EOF 10 | ./$(basename $0) [OPTIONS] 11 | 12 | Options 13 | -h show help 14 | -s number of usec to sleep between each credit interval 15 | -c number of messages to send 16 | EOF 17 | exit 0 18 | } 19 | 20 | while getopts "hc:s:" o; do 21 | case "${o}" in 22 | h) 23 | usage 24 | ;; 25 | c) 26 | COUNT=${OPTARG} 27 | ;; 28 | s) 29 | SLEEP=${OPTARG} 30 | ;; 31 | *) 32 | usage 33 | ;; 34 | esac 35 | done 36 | 37 | echo "Job sending $COUNT messages" 38 | 39 | sed -e "s/COUNT/$COUNT/" \ 40 | -e "s/SLEEP/-s $SLEEP/" job.yaml | oc create -f - 41 | DONE="" 42 | until [ "$DONE" == "1" ]; do DONE=$(oc get job generator -ojsonpath='{.status.succeeded}'); echo "waiting for job to finish"; sleep 5; done; 43 | oc delete -f job.yaml 44 | -------------------------------------------------------------------------------- /tests/promxy/README.md: -------------------------------------------------------------------------------- 1 | # Promxy POC 2 | 3 | This is a test deployment of promxy to help validate HA work. 4 | 5 | You can use http://promxy:8082 as a prometheus datasource (for example in grafana) that abstracts over the real Prometheuses and fills gaps in data. 6 | -------------------------------------------------------------------------------- /tests/promxy/promxy-launcher.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | oc delete \ 4 | deployment/promxy \ 5 | configmap/promxy-config 6 | 7 | oc create -f promxy-manifests.yaml 8 | 9 | oc rollout status deployment/promxy 10 | -------------------------------------------------------------------------------- /tests/promxy/promxy-manifests.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: promxy-config 5 | data: 6 | config.yaml: | 7 | ## 8 | ## Regular prometheus configuration 9 | ## 10 | global: 11 | evaluation_interval: 5s 12 | external_labels: 13 | source: promxy 14 | 15 | # remote_write configuration is used by promxy as its local Appender, meaning all 16 | # metrics promxy would "write" (not export) would be sent to this. Examples 17 | # of this include: recording rules, metrics on alerting rules, etc. 18 | # remote_write: 19 | # - url: http://localhost:8083/receive 20 | 21 | ## 22 | ### Promxy configuration 23 | ## 24 | promxy: 25 | server_groups: 26 | - static_configs: 27 | - targets: 28 | - prometheus-default-0.prometheus-operated:9090 29 | - prometheus-default-1.prometheus-operated:9090 30 | 31 | --- 32 | apiVersion: extensions/v1beta1 33 | kind: Deployment 34 | metadata: 35 | labels: 36 | app: promxy 37 | name: promxy 38 | spec: 39 | replicas: 1 40 | selector: 41 | matchLabels: 42 | app: promxy 43 | template: 44 | metadata: 45 | labels: 46 | app: promxy 47 | spec: 48 | serviceAccountName: prometheus-k8s 49 | containers: 50 | - args: 51 | - "--config=/etc/promxy/config.yaml" 52 | - "--web.enable-lifecycle" 53 | - "--log-level=trace" 54 | env: 55 | - name: ROLE 56 | value: "1" 57 | command: 58 | - "/bin/promxy" 59 | image: quay.io/jacksontj/promxy:latest 60 | imagePullPolicy: Always 61 | name: promxy 62 | ports: 63 | - containerPort: 8082 64 | name: web 65 | volumeMounts: 66 | - mountPath: "/etc/promxy/" 67 | name: promxy-config 68 | readOnly: true 69 | # container to reload configs on configmap change 70 | - args: 71 | - "--volume-dir=/etc/promxy" 72 | - "--webhook-url=http://localhost:8082/-/reload" 73 | image: jimmidyson/configmap-reload:v0.1 74 | name: promxy-server-configmap-reload 75 | volumeMounts: 76 | - mountPath: "/etc/promxy/" 77 | name: promxy-config 78 | readOnly: true 79 | volumes: 80 | - configMap: 81 | name: promxy-config 82 | name: promxy-config 83 | 84 | --- 85 | apiVersion: v1 86 | kind: Service 87 | metadata: 88 | name: promxy 89 | spec: 90 | ports: 91 | - name: web 92 | port: 8082 93 | protocol: TCP 94 | targetPort: web 95 | selector: 96 | app: promxy 97 | type: ClusterIP 98 | -------------------------------------------------------------------------------- /tests/smoketest/README.md: -------------------------------------------------------------------------------- 1 | # STF Testing notes 2 | 3 | Here are some artifacts to assist with testing the STF after it is deployed. 4 | 5 | Currently this is just a "smoke test" that runs internal to the OCP cluster. It 6 | delivers collectd data to the STF amqp node and verifies that it can be seen in 7 | prometheus. This is intended to be usable for developers and TravisCI to 8 | validate our builds before merging changes to this repo. 9 | 10 | ## Usage 11 | 12 | 1. Have `oc` pointing at your service-telemetry project and run `./smoketest.sh` 13 | 1. Run `oc get jobs` and check the result of the stf-smoketest job 14 | 1. (If necessary) Check the logs of the stf-smoketest pod 15 | 16 | ### Example 17 | 18 | ``` 19 | $ ./smoketest.sh 20 | configmap "stf-smoketest-collectd-config" deleted 21 | configmap "stf-smoketest-entrypoint-script" deleted 22 | job.batch "stf-smoketest" deleted 23 | configmap/stf-smoketest-collectd-config created 24 | configmap/stf-smoketest-entrypoint-script created 25 | job.batch/stf-smoketest created 26 | 27 | $ oc get jobs 28 | NAME DESIRED SUCCESSFUL AGE 29 | stf-smoketest 1 1 18s 30 | 31 | $ oc get pods -l name=stf-smoketest 32 | NAME READY STATUS RESTARTS AGE 33 | stf-smoketest-md967 0/1 Completed 0 18s 34 | 35 | $ oc logs stf-smoketest-md967 36 | Sleeping for 3 seconds waiting for collectd to enter read-loop 37 | plugin_load: plugin "cpu" successfully loaded. 38 | plugin_load: plugin "amqp1" successfully loaded. 39 | Initialization complete, entering read-loop. 40 | Initialization complete, entering read-loop. 41 | % Total % Received % Xferd Average Speed Time Time Time Current 42 | Dload Upload Total Spent Left Speed 43 | 100 328 100 262 100 66 16926 4263 --:--:-- --:--:-- --:--:-- 17466 44 | {"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"collectd_cpu_total","cpu":"0","endpoint":"metrics","host":"stf-smoketest-md967","service":"white-smartgateway","type_instance":"user"},"value":[1562363042.123,"518777"]}]}} 45 | ``` 46 | 47 | ## Improvements 48 | 49 | These are some things that would make this better: 50 | 51 | * Would like to actually test via the AMQP+TLS interface as the system boundary 52 | instead of directly to the internal AMQP broker 53 | * Option to do internal vs. external 54 | -------------------------------------------------------------------------------- /tests/smoketest/collectd-sensubility.conf: -------------------------------------------------------------------------------- 1 | 2 | [default] 3 | log_level=DEBUG 4 | 5 | [sensu] 6 | keepalive_interval=20 7 | tmp_base_dir=/var/tmp/collectd-sensubility-checks 8 | shell_path=/usr/bin/sh 9 | worker_count=2 10 | checks={"check-container-health":{"command":"cat /healthcheck.log","handlers":[],"interval":3,"occurrences":3,"refresh":90,"standalone":true}} 11 | 12 | [amqp1] 13 | connection=amqp://qdr-test:5672 14 | results_channel=sensubility/cloud1-telemetry 15 | client_name=smoketest.redhat.com 16 | results_format=smartgateway 17 | 18 | -------------------------------------------------------------------------------- /tests/smoketest/healthcheck.log: -------------------------------------------------------------------------------- 1 | [{"service":"smoketest-svc","container":"smoketest-container","status":"unhealthy","healthy":0}] 2 | -------------------------------------------------------------------------------- /tests/smoketest/minimal-collectd.conf.template: -------------------------------------------------------------------------------- 1 | Interval 1 2 | 3 | LoadPlugin "logfile" 4 | 5 | LogLevel "debug" 6 | File stdout 7 | Timestamp true 8 | 9 | 10 | LoadPlugin cpu 11 | LoadPlugin amqp1 12 | 13 | 14 | Host "qdr-test" 15 | Port "5672" 16 | Address "collectd" 17 | 18 | Format JSON 19 | PreSettle false 20 | 21 | 22 | Format JSON 23 | PreSettle false 24 | Notify true 25 | 26 | 27 | 28 | 29 | LoadPlugin interface 30 | 31 | IgnoreSelected true 32 | ReportInactive true 33 | 34 | 35 | LoadPlugin threshold 36 | 37 | 38 | Instance "lo" 39 | 40 | FailureMax 0 41 | DataSource "rx" 42 | Persist true 43 | PersistOK true 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /tests/smoketest/qdr-test.conf.yaml.template: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: qdr-test-config 5 | data: 6 | qdrouterd.conf: | 7 | router { 8 | mode: edge 9 | id: qdr-test.smoketest 10 | workerThreads: 2 11 | saslConfigDir: /etc/sasl2 12 | saslConfigName: qdrouterd 13 | } 14 | 15 | sslProfile { 16 | name: sslProfile 17 | caCertFile: /etc/pki/tls/certs/ca.crt 18 | } 19 | 20 | listener { 21 | host: 0.0.0.0 22 | port: 5672 23 | authenticatePeer: false 24 | saslMechanisms: ANONYMOUS 25 | } 26 | 27 | connector { 28 | host: default-interconnect 29 | port: 5671 30 | role: edge 31 | saslPassword: pass:<> 32 | saslUsername: guest@default-interconnect 33 | sslProfile: sslProfile 34 | verifyHostname: false 35 | } 36 | 37 | address { 38 | prefix: unicast 39 | distribution: closest 40 | } 41 | 42 | address { 43 | prefix: exclusive 44 | distribution: closest 45 | } 46 | 47 | address { 48 | prefix: broadcast 49 | distribution: multicast 50 | } 51 | 52 | address { 53 | distribution: multicast 54 | prefix: collectd 55 | } 56 | 57 | address { 58 | distribution: multicast 59 | prefix: anycast/ceilometer 60 | } 61 | 62 | log { 63 | module: DEFAULT 64 | enable: info+ 65 | includeTimestamp: true 66 | } 67 | -------------------------------------------------------------------------------- /tests/smoketest/qdr-test.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | annotations: 5 | openshift.io/scc: restricted-v2 6 | name: qdr-test 7 | labels: 8 | qdr: qdr-test 9 | spec: 10 | containers: 11 | - name: qdr 12 | image: quay.io/tripleowallabycentos9/openstack-qdrouterd:current-tripleo 13 | imagePullPolicy: IfNotPresent 14 | command: ['/usr/sbin/qdrouterd','-c','/etc/qpid-dispatch/qdrouterd.conf'] 15 | securityContext: 16 | allowPrivilegeEscalation: false 17 | capabilities: 18 | drop: 19 | - ALL 20 | ports: 21 | - containerPort: 5672 22 | name: amqp 23 | protocol: TCP 24 | volumeMounts: 25 | - mountPath: /etc/pki/tls/certs/ 26 | name: default-interconnect-selfsigned-cert 27 | - mountPath: /etc/qpid-dispatch/ 28 | name: qdr-test-config 29 | resources: {} 30 | volumes: 31 | - name: default-interconnect-selfsigned-cert 32 | secret: 33 | defaultMode: 420 34 | secretName: default-interconnect-selfsigned 35 | - name: qdr-test-config 36 | configMap: 37 | defaultMode: 420 38 | name: qdr-test-config 39 | 40 | --- 41 | 42 | apiVersion: v1 43 | kind: Service 44 | metadata: 45 | name: qdr-test 46 | spec: 47 | ports: 48 | - name: amqp 49 | port: 5672 50 | targetPort: amqp 51 | selector: 52 | qdr: qdr-test 53 | -------------------------------------------------------------------------------- /tests/smoketest/smoketest_ceilometer_entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set +e 3 | 4 | # Executes inside the test harness container to start collectd and look for resulting metrics in prometheus 5 | PROMETHEUS=${PROMETHEUS:-"https://default-prometheus-proxy:9092"} 6 | ELASTICSEARCH=${ELASTICSEARCH:-"https://elasticsearch-es-http:9200"} 7 | ELASTICSEARCH_AUTH_PASS=${ELASTICSEARCH_AUTH_PASS:-""} 8 | PROMETHEUS_AUTH_TOKEN=${PROMETHEUS_AUTH_TOKEN:-""} 9 | CLOUDNAME=${CLOUDNAME:-"smoke1"} 10 | POD=$(hostname) 11 | 12 | 13 | echo "*** [INFO] My pod is: ${POD}" 14 | 15 | # Run ceilometer_publisher script 16 | python3 /ceilometer_publish.py qdr-test:5672 'driver=amqp&topic=cloud1-metering' 'driver=amqp&topic=cloud1-event' 17 | 18 | # Sleeping to produce data 19 | echo "*** [INFO] Sleeping for 30 seconds to produce all metrics and events" 20 | sleep 30 21 | 22 | echo "*** [INFO] List of metric names for debugging..." 23 | curl -sk -H "Authorization: Bearer ${PROMETHEUS_AUTH_TOKEN}" -g "${PROMETHEUS}/api/v1/label/__name__/values" 2>&2 | tee /tmp/label_names 24 | echo; echo 25 | 26 | # Checks that the metrics actually appear in prometheus 27 | echo "*** [INFO] Checking for recent image metrics..." 28 | 29 | echo "[DEBUG] Running the curl command to return a query" 30 | curl -k -H "Authorization: Bearer ${PROMETHEUS_AUTH_TOKEN}" -g "${PROMETHEUS}/api/v1/query?" --data-urlencode 'query=ceilometer_image_size' 2>&1 | grep '"result":\[{"metric":{"__name__":"ceilometer_image_size"' 31 | metrics_result=$? 32 | echo "[DEBUG] Set metrics_result to $metrics_result" 33 | 34 | if [ "$OBSERVABILITY_STRATEGY" != "use_redhat" ]; then 35 | echo "*** [INFO] Get documents for this test from ElasticSearch..." 36 | DOCUMENT_HITS=$(curl -sk -u "elastic:${ELASTICSEARCH_AUTH_PASS}" -X GET "${ELASTICSEARCH}/_search" -H 'Content-Type: application/json' -d'{ 37 | "query": { 38 | "bool": { 39 | "filter": [ 40 | { "term" : { "labels.instance" : { "value" : "'${CLOUDNAME}'", "boost" : 1.0 } } }, 41 | { "range" : { "startsAt" : { "gte" : "now-1m", "lt" : "now" } } } 42 | ] 43 | } 44 | } 45 | }' | python3 -c "import sys, json; parsed = json.load(sys.stdin); print(parsed['hits']['total']['value'])") 46 | 47 | 48 | echo "*** [INFO] List of indices for debugging..." 49 | curl -sk -u "elastic:${ELASTICSEARCH_AUTH_PASS}" -X GET "${ELASTICSEARCH}/_cat/indices/ceilometer_*?s=index" 50 | echo 51 | 52 | echo "*** [INFO] Get documents for this test from ElasticSearch..." 53 | ES_INDEX=ceilometer_image 54 | DOCUMENT_HITS=$(curl -sk -u "elastic:${ELASTICSEARCH_AUTH_PASS}" -X GET "${ELASTICSEARCH}/${ES_INDEX}/_search" -H 'Content-Type: application/json' -d'{ 55 | "query": { 56 | "match_all": {} 57 | } 58 | }'| python3 -c "import sys, json; parsed = json.load(sys.stdin); print(parsed['hits']['total']['value'])") 59 | 60 | echo "*** [INFO] Found ${DOCUMENT_HITS} documents" 61 | echo; echo 62 | 63 | # check if we got documents back for this test 64 | events_result=1 65 | if [ "$DOCUMENT_HITS" -gt "0" ]; then 66 | events_result=0 67 | fi 68 | else 69 | events_result=0 70 | fi 71 | 72 | echo "[INFO] Verification exit codes (0 is passing, non-zero is a failure): events=${events_result} metrics=${metrics_result}" 73 | echo; echo 74 | 75 | if [ "$metrics_result" = "0" ] && [ "$events_result" = "0" ]; then 76 | echo "*** [INFO] Testing completed with success" 77 | exit 0 78 | else 79 | echo "*** [INFO] Testing completed without success" 80 | exit 1 81 | fi 82 | -------------------------------------------------------------------------------- /tests/smoketest/smoketest_job.yaml.template: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: stf-smoketest-<> 5 | spec: 6 | parallelism: 1 7 | completions: 1 8 | spec: 9 | template: 10 | metadata: 11 | labels: 12 | app: stf-smoketest 13 | spec: 14 | restartPolicy: Never 15 | containers: 16 | - name: smoketest-collectd 17 | image: quay.io/tripleomastercentos9/openstack-collectd:current-tripleo 18 | command: 19 | - /smoketest_collectd_entrypoint.sh 20 | env: 21 | - name: CLOUDNAME 22 | value: <> 23 | - name: ELASTICSEARCH_AUTH_PASS 24 | value: "<>" 25 | - name: PROMETHEUS_AUTH_TOKEN 26 | value: "<>" 27 | - name: OBSERVABILITY_STRATEGY 28 | value: "<>" 29 | volumeMounts: 30 | - name: collectd-config 31 | mountPath: /etc/minimal-collectd.conf.template 32 | subPath: minimal-collectd.conf.template 33 | - name: sensubility-config 34 | mountPath: /etc/collectd-sensubility.conf 35 | subPath: collectd-sensubility.conf 36 | - name: healthcheck-log 37 | mountPath: /healthcheck.log 38 | subPath: healthcheck.log 39 | - name: collectd-entrypoint-script 40 | mountPath: /smoketest_collectd_entrypoint.sh 41 | subPath: smoketest_collectd_entrypoint.sh 42 | securityContext: 43 | allowPrivilegeEscalation: false 44 | 45 | - name: smoketest-ceilometer 46 | image: quay.io/tripleomastercentos9/openstack-ceilometer-notification:current-tripleo 47 | command: 48 | - /smoketest_ceilometer_entrypoint.sh 49 | env: 50 | - name: CLOUDNAME 51 | value: <> 52 | - name: ELASTICSEARCH_AUTH_PASS 53 | value: "<>" 54 | - name: PROMETHEUS_AUTH_TOKEN 55 | value: "<>" 56 | - name: OBSERVABILITY_STRATEGY 57 | value: "<>" 58 | volumeMounts: 59 | - name: ceilometer-publisher 60 | mountPath: /ceilometer_publish.py 61 | subPath: ceilometer_publish.py 62 | - name: ceilometer-entrypoint-script 63 | mountPath: /smoketest_ceilometer_entrypoint.sh 64 | subPath: smoketest_ceilometer_entrypoint.sh 65 | volumes: 66 | - name: collectd-config 67 | configMap: 68 | name: stf-smoketest-collectd-config 69 | - name: sensubility-config 70 | configMap: 71 | name: stf-smoketest-sensubility-config 72 | - name: healthcheck-log 73 | configMap: 74 | name: stf-smoketest-healthcheck-log 75 | - name: collectd-entrypoint-script 76 | configMap: 77 | name: stf-smoketest-collectd-entrypoint-script 78 | defaultMode: 0555 79 | - name: ceilometer-entrypoint-script 80 | configMap: 81 | name: stf-smoketest-ceilometer-entrypoint-script 82 | defaultMode: 0555 83 | - name: ceilometer-publisher 84 | configMap: 85 | name: stf-smoketest-ceilometer-publisher 86 | defaultMode: 0555 87 | -------------------------------------------------------------------------------- /watches.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | - version: v1beta1 3 | group: infra.watch 4 | kind: ServiceTelemetry 5 | role: /opt/ansible/roles/servicetelemetry 6 | --------------------------------------------------------------------------------