├── Dockerfile ├── .hadolint.yml ├── .gitignore ├── dashboard ├── rmv-influx-uid.sed ├── oshm-grafana-gxscs-20220923-1.png ├── oshm-grafana-gxscs-20220923-2.png ├── README.md ├── config.toml └── grafana.ini ├── .yamllint.yml ├── .github ├── renovate.json └── workflows │ ├── check-docker-syntax.yml │ ├── check-yaml-syntax.yml │ ├── build-container-image.yml │ └── release-container-image.yml ├── run_in_loop_pluspco4.sh ├── run_in_loop_poc-wgcloud.sh ├── run_in_loop_artcodix.sh ├── run_in_loop_pluspco.sh ├── files └── requirements.txt ├── startup ├── apimon.service ├── run-apimon-in-tmux.sh ├── kill-apimon-in-tmux.sh ├── kill-apimon-in-tmux-plus.sh ├── run-apimon-in-tmux-plus.sh └── README.md ├── scripts ├── push.sh └── build.sh ├── docs ├── openstackclient-az-list-fallback-f3207bd.diff └── Debian12-Install.md ├── run_poc-wgcloud.sh ├── Containerfile ├── run.sh ├── stats.py ├── run_otc.sh ├── run_pluspco3.sh ├── run_pluspco4.sh ├── run_artcodix.sh ├── run_pluspco.sh ├── run_pluspco2.sh ├── run_wave.sh ├── run_stackit.sh ├── run_gx_scs.sh ├── run_ciab.sh ├── README.md └── LICENSE /Dockerfile: -------------------------------------------------------------------------------- 1 | Containerfile -------------------------------------------------------------------------------- /.hadolint.yml: -------------------------------------------------------------------------------- 1 | --- 2 | ignored: [] 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | *.psv 3 | *.yaml 4 | *.pem 5 | APIMonitor_* 6 | -------------------------------------------------------------------------------- /dashboard/rmv-influx-uid.sed: -------------------------------------------------------------------------------- 1 | #!/usr/bin/sed -f 2 | /"influxdb",$/{ 3 | s/,$// 4 | n 5 | d 6 | } 7 | -------------------------------------------------------------------------------- /.yamllint.yml: -------------------------------------------------------------------------------- 1 | --- 2 | extends: default 3 | 4 | rules: 5 | comments: enable 6 | line-length: disable 7 | -------------------------------------------------------------------------------- /.github/renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "config:base" 4 | ], 5 | "commitBody": "Signed-off-by: Renovate Bot " 6 | } 7 | -------------------------------------------------------------------------------- /dashboard/oshm-grafana-gxscs-20220923-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SovereignCloudStack/openstack-health-monitor/HEAD/dashboard/oshm-grafana-gxscs-20220923-1.png -------------------------------------------------------------------------------- /dashboard/oshm-grafana-gxscs-20220923-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SovereignCloudStack/openstack-health-monitor/HEAD/dashboard/oshm-grafana-gxscs-20220923-2.png -------------------------------------------------------------------------------- /run_in_loop_pluspco4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #export WAITLB=60 3 | while true; do ./run_pluspco4.sh -s -i 200; echo -n "Hit ^C to abort ..."; sleep 15; echo; done 4 | 5 | -------------------------------------------------------------------------------- /run_in_loop_poc-wgcloud.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rm stop-os-hm 2>/dev/null 3 | while true; do 4 | ./run_poc-wgcloud.sh -i 50 5 | if test -e stop-os-hm; then break; fi 6 | echo -n "Hit ^C to abort ..." 7 | sleep 30; echo 8 | done 9 | -------------------------------------------------------------------------------- /run_in_loop_artcodix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # run_in_loop_pluspco.sh 3 | #export WAITLB=60 4 | rm stop-os-hm 2>/dev/null 5 | while true; do 6 | ./run_artcodix.sh -s -i 200 7 | if test -e stop-os-hm; then break; fi 8 | echo -n "Hit ^C to abort ..." 9 | sleep 15 10 | echo 11 | done 12 | 13 | -------------------------------------------------------------------------------- /run_in_loop_pluspco.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # run_in_loop_pluspco.sh 3 | #export WAITLB=60 4 | rm stop-os-hm 2>/dev/null 5 | while true; do 6 | ./run_pluspco.sh -s -i 200 7 | if test -e stop-os-hm; then break; fi 8 | echo -n "Hit ^C to abort ..." 9 | sleep 15 10 | echo 11 | done 12 | 13 | -------------------------------------------------------------------------------- /files/requirements.txt: -------------------------------------------------------------------------------- 1 | keystoneauth-oidc 2 | python-cinderclient 3 | python-designateclient 4 | python-glanceclient 5 | python-heatclient 6 | python-keystoneclient 7 | python-manilaclient 8 | python-neutronclient 9 | python-novaclient 10 | python-octaviaclient 11 | python-openstackclient 12 | python-swiftclient 13 | -------------------------------------------------------------------------------- /startup/apimon.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Start the OpenStack Health Monitor 3 | #After=network.target systemd-user-sessions.service 4 | 5 | [Service] 6 | #User=%i 7 | ExecStart=%h/openstack-health-monitor/startup/run-apimon-in-tmux.sh 8 | ExecStop=%h/openstack-health-monitor/startup/kill-apimon-in-tmux.sh 9 | Type=forking 10 | #Slice=session.slice 11 | #Type=oneshot 12 | #RemainAfterExit=yes 13 | 14 | [Install] 15 | WantedBy=default.target 16 | Alias=os-health-mon.service 17 | -------------------------------------------------------------------------------- /.github/workflows/check-docker-syntax.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Check docker syntax 3 | 4 | "on": 5 | push: 6 | paths: 7 | - Dockerfile 8 | - .github/workflows/check-docker-syntax.yml 9 | branches: 10 | - master 11 | pull_request: 12 | paths: 13 | - Dockerfile 14 | - .github/workflows/check-docker-syntax.yml 15 | 16 | jobs: 17 | check-docker-syntax: 18 | 19 | runs-on: ubuntu-latest 20 | 21 | steps: 22 | - uses: actions/checkout@v3 23 | - uses: reviewdog/action-hadolint@v1.6.0 24 | with: 25 | dockerfile: Containerfile 26 | recursive: true 27 | -------------------------------------------------------------------------------- /.github/workflows/check-yaml-syntax.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Check yaml syntax 3 | 4 | "on": 5 | push: 6 | paths: 7 | - '**.yaml' 8 | - '**.yml' 9 | - .github/workflows/check-yaml-syntax.yml 10 | branches: 11 | - master 12 | pull_request: 13 | paths: 14 | - '**.yaml' 15 | - '**.yml' 16 | - .github/workflows/check-yaml-syntax.yml 17 | 18 | jobs: 19 | check-yaml-syntax: 20 | 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - uses: actions/checkout@v3 25 | - uses: actions/setup-python@v3 26 | with: 27 | python-version: '3.x' 28 | - run: pip3 install yamllint 29 | - run: yamllint --config-file .yamllint.yml . 30 | -------------------------------------------------------------------------------- /scripts/push.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -x 3 | 4 | # Available environment variables 5 | # 6 | # DOCKER_REGISTRY 7 | # REPOSITORY 8 | # VERSION 9 | 10 | # Set default values 11 | 12 | DOCKER_REGISTRY=${DOCKER_REGISTRY:-quay.io} 13 | VERSION=${VERSION:-latest} 14 | 15 | if [[ -n $DOCKER_REGISTRY ]]; then 16 | REPOSITORY="$DOCKER_REGISTRY/$REPOSITORY" 17 | fi 18 | 19 | if [[ $VERSION == "latest" ]]; then 20 | docker push "$REPOSITORY:$VERSION" 21 | else 22 | if skopeo inspect --creds "${DOCKER_USERNAME}:${DOCKER_PASSWORD}" "docker://${REPOSITORY}:${VERSION}" > /dev/null; then 23 | echo "The image ${REPOSITORY}:${VERSION} already exists." 24 | else 25 | docker push "$REPOSITORY:$VERSION" 26 | fi 27 | fi 28 | 29 | docker rmi "$REPOSITORY:$VERSION" 30 | -------------------------------------------------------------------------------- /startup/run-apimon-in-tmux.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | session="oshealthmon" 3 | export OS_CLOUD=gxscs-hm 4 | #export OS_CACERT=/etc/ca-cert-ciab.crt 5 | tmux start-server 6 | tmux new-session -d -s $session -n apimon1 7 | tmux new-window -t $session:1 -n shell1 8 | tmux send-keys "cd ~/openstack-health-monitor; export OS_CLOUD=$OS_CLOUD" C-m 9 | #tmux send-keys "export OS_CACERT=$OS_CACERT" C-m 10 | tmux select-window -t $session:0 11 | tmux send-keys "cd ~/openstack-health-monitor; export OS_CLOUD=$OS_CLOUD" C-m 12 | #tmux send-keys "export OS_CACERT=$OS_CACERT" C-m 13 | tmux send-keys "./run_in_loop_pluspco.sh" C-m 14 | #export OS_CLOUD=plus-hm2 15 | #tmux new-window -t $session:2 -n apimon2 16 | #tmux new-window -t $session:3 -n shell2 17 | #tmux send-keys "cd ~/openstack-health-monitor; export OS_CLOUD=$OS_CLOUD" C-m 18 | #tmux select-window -t $session:2 19 | #tmux send-keys "cd ~/openstack-health-monitor; export OS_CLOUD=$OS_CLOUD" C-m 20 | #sleep 1 21 | #tmux send-keys "./run_in_loop_pluspco2.sh" C-m 22 | -------------------------------------------------------------------------------- /startup/kill-apimon-in-tmux.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "oshealthmon session going down" | wall 3 | session="oshealthmon" 4 | tmux select-window -t $session:0 5 | # Tell master loop to exit 6 | cd ~/openstack-health-monitor 7 | touch stop-os-hm 8 | # Send two ^C to the api_monitor for immediate cleanup 9 | # Sidenote: If we are patient, we could just leave it with the touched stop file 10 | tmux send-keys C-c 11 | sleep 1 12 | tmux send-keys C-c 13 | #tmux select-window -t $session:2 14 | #tmux send-keys C-c 15 | #sleep 1 16 | #tmux send-keys C-c 17 | sync 18 | tmux select-window -t $session:0 19 | # Give it max 4min to cleanup and exit, so we don't delay a reboot by more than 5 mins 20 | MAXW=242 21 | let -i ctr=0 22 | while test $ctr -lt $MAXW; do 23 | if test -z "$(ps a | grep run_in_loop_pluspco | grep -v grep)"; then break; fi 24 | sleep 1 25 | let ctr+=1 26 | done 27 | if test $ctr = $MAXW; then 28 | killall run_in_loop_pluspco.sh 29 | #killall run_in_loop_pluspco2.sh 30 | sleep 1 31 | fi 32 | tmux kill-session -t $session 33 | -------------------------------------------------------------------------------- /scripts/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -x 3 | 4 | # Available environment variables 5 | # 6 | # BUILD_OPTS 7 | # DOCKER_REGISTRY 8 | # REPOSITORY 9 | # VERSION 10 | 11 | # Set default values 12 | 13 | BUILD_OPTS=${BUILD_OPTS:-} 14 | CREATED=$(date --rfc-3339=ns) 15 | DOCKER_REGISTRY=${DOCKER_REGISTRY:-quay.io} 16 | REVISION=$(git rev-parse --short HEAD) 17 | VERSION=${VERSION:-latest} 18 | 19 | if [[ -n $DOCKER_REGISTRY ]]; then 20 | REPOSITORY="$DOCKER_REGISTRY/$REPOSITORY" 21 | fi 22 | 23 | docker buildx build \ 24 | --load \ 25 | --build-arg "VERSION=$VERSION" \ 26 | --tag "$REPOSITORY:$VERSION" \ 27 | --label "org.opencontainers.image.created=$CREATED" \ 28 | --label "org.opencontainers.image.revision=$REVISION" \ 29 | --label "org.opencontainers.image.source=https://github.com/SovereignCloudStack/openstack-health-monitor" \ 30 | --label "org.opencontainers.image.title=openstack-health-monitor" \ 31 | --label "org.opencontainers.image.url=https://scs.community" \ 32 | --label "org.opencontainers.image.version=$VERSION" \ 33 | $BUILD_OPTS . 34 | -------------------------------------------------------------------------------- /.github/workflows/build-container-image.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Build container image 3 | 4 | "on": 5 | schedule: 6 | - cron: "0 3 * * *" 7 | push: 8 | branches: 9 | - main 10 | pull_request: 11 | 12 | jobs: 13 | 14 | build-container-image: 15 | 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - uses: actions/checkout@v3 20 | - uses: docker/setup-qemu-action@v1 21 | - uses: docker/setup-buildx-action@v1 22 | - uses: docker/login-action@v1 23 | with: 24 | registry: ${{ secrets.DOCKER_REGISTRY }} 25 | username: ${{ secrets.DOCKER_USERNAME }} 26 | password: ${{ secrets.DOCKER_PASSWORD }} 27 | if: github.ref == 'refs/heads/main' 28 | - run: scripts/build.sh 29 | env: 30 | DOCKER_REGISTRY: ${{ secrets.DOCKER_REGISTRY }} 31 | REPOSITORY: sovereigncloudstack/openstack-health-monitor 32 | - run: | 33 | scripts/push.sh 34 | env: 35 | DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} 36 | DOCKER_REGISTRY: ${{ secrets.DOCKER_REGISTRY }} 37 | DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} 38 | REPOSITORY: sovereigncloudstack/openstack-health-monitor 39 | if: github.ref == 'refs/heads/main' 40 | -------------------------------------------------------------------------------- /startup/kill-apimon-in-tmux-plus.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "oshealthmon session going down" | wall 3 | session="oshealthmon" 4 | tmux select-window -t $session:0 5 | # Tell master loop to exit 6 | cd ~/openstack-health-monitor 7 | touch stop-os-hm 8 | # Send two ^C to the api_monitor for immediate cleanup 9 | # Sidenote: If we are patient, we could just leave it with the touched stop file 10 | tmux send-keys C-c 11 | sleep 1 12 | tmux send-keys C-c 13 | tmux select-window -t $session:2 14 | tmux send-keys C-c 15 | sleep 1 16 | tmux send-keys C-c 17 | tmux select-window -t $session:4 18 | tmux send-keys C-c 19 | sleep 1 20 | tmux send-keys C-c 21 | tmux select-window -t $session:6 22 | tmux send-keys C-c 23 | sleep 1 24 | tmux send-keys C-c 25 | sync 26 | tmux select-window -t $session:0 27 | # Give it max 4min to cleanup and exit, so we don't delay a reboot by more than 5 mins 28 | MAXW=242 29 | let -i ctr=0 30 | while test $ctr -lt $MAXW; do 31 | if test -z "$(ps a | grep run_in_loop_pluspco | grep -v grep)"; then break; fi 32 | sleep 1 33 | let ctr+=1 34 | done 35 | if test $ctr = $MAXW; then 36 | killall run_in_loop_pluspco.sh 37 | killall run_in_loop_pluspco2.sh 38 | killall run_in_loop_pluspco3.sh 39 | killall run_in_loop_pluspco4.sh 40 | sleep 1 41 | fi 42 | tmux kill-session -t $session 43 | -------------------------------------------------------------------------------- /.github/workflows/release-container-image.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Release container image 3 | 4 | "on": 5 | push: 6 | tags: 7 | - '*' 8 | 9 | jobs: 10 | release-container-image: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v3 15 | 16 | - uses: docker/login-action@v1 17 | with: 18 | registry: ${{ secrets.DOCKER_REGISTRY }} 19 | username: ${{ secrets.DOCKER_USERNAME }} 20 | password: ${{ secrets.DOCKER_PASSWORD }} 21 | 22 | - name: Get the version 23 | id: get_version 24 | run: echo ::set-output name=VERSION::${GITHUB_REF#refs/tags/} 25 | 26 | - name: Build container image 27 | run: scripts/build.sh 28 | env: 29 | DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} 30 | DOCKER_REGISTRY: ${{ secrets.DOCKER_REGISTRY }} 31 | DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} 32 | REPOSITORY: sovereigncloudstack/openstack-health-monitor 33 | VERSION: ${{ steps.get_version.outputs.VERSION }} 34 | 35 | - name: Push container image 36 | run: | 37 | scripts/push.sh 38 | env: 39 | DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} 40 | DOCKER_REGISTRY: ${{ secrets.DOCKER_REGISTRY }} 41 | DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} 42 | REPOSITORY: sovereigncloudstack/openstack-health-monitor 43 | VERSION: ${{ steps.get_version.outputs.VERSION }} 44 | -------------------------------------------------------------------------------- /docs/openstackclient-az-list-fallback-f3207bd.diff: -------------------------------------------------------------------------------- 1 | See https://storyboard.openstack.org/#!/story/2010989 2 | 3 | diff --git a/openstackclient/common/availability_zone.py b/openstackclient/common/availability_zone.py 4 | index af6980f..5d62ecd 100644 5 | --- a/openstackclient/common/availability_zone.py 6 | +++ b/openstackclient/common/availability_zone.py 7 | @@ -16,7 +16,7 @@ 8 | import copy 9 | import logging 10 | 11 | -from novaclient import exceptions as nova_exceptions 12 | +from openstack import exceptions as sdk_exceptions 13 | from osc_lib.command import command 14 | from osc_lib import utils 15 | 16 | @@ -119,8 +119,8 @@ 17 | def _get_compute_availability_zones(self, parsed_args): 18 | compute_client = self.app.client_manager.sdk_connection.compute 19 | try: 20 | - data = compute_client.availability_zones(details=True) 21 | - except nova_exceptions.Forbidden: # policy doesn't allow 22 | + data = list(compute_client.availability_zones(details=True)) 23 | + except sdk_exceptions.ForbiddenException: # policy doesn't allow 24 | try: 25 | data = compute_client.availability_zones(details=False) 26 | except Exception: 27 | @@ -135,7 +135,7 @@ 28 | volume_client = self.app.client_manager.sdk_connection.volume 29 | data = [] 30 | try: 31 | - data = volume_client.availability_zones() 32 | + data = list(volume_client.availability_zones()) 33 | except Exception as e: 34 | LOG.debug('Volume availability zone exception: %s', e) 35 | if parsed_args.volume: 36 | -------------------------------------------------------------------------------- /startup/run-apimon-in-tmux-plus.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | session="oshealthmon" 3 | export OS_CLOUD=plus-hm1 4 | #export OS_CACERT=/etc/ca-cert-ciab.crt 5 | tmux start-server 6 | tmux new-session -d -s $session -n apimon1 7 | tmux new-window -t $session:1 -n shell1 8 | tmux send-keys "cd ~/openstack-health-monitor; export OS_CLOUD=$OS_CLOUD" C-m 9 | #tmux send-keys "export OS_CACERT=$OS_CACERT" C-m 10 | tmux select-window -t $session:0 11 | tmux send-keys "cd ~/openstack-health-monitor; export OS_CLOUD=$OS_CLOUD" C-m 12 | #tmux send-keys "export OS_CACERT=$OS_CACERT" C-m 13 | tmux send-keys "./run_in_loop_pluspco.sh" C-m 14 | export OS_CLOUD=plus-hm2 15 | tmux new-window -t $session:2 -n apimon2 16 | tmux new-window -t $session:3 -n shell2 17 | tmux send-keys "cd ~/openstack-health-monitor; export OS_CLOUD=$OS_CLOUD" C-m 18 | tmux select-window -t $session:2 19 | tmux send-keys "cd ~/openstack-health-monitor; export OS_CLOUD=$OS_CLOUD" C-m 20 | sleep 1 21 | tmux send-keys "./run_in_loop_pluspco2.sh" C-m 22 | export OS_CLOUD=plus-hm3 23 | tmux new-window -t $session:4 -n apimon3 24 | tmux new-window -t $session:5 -n shell3 25 | tmux send-keys "cd ~/openstack-health-monitor; export OS_CLOUD=$OS_CLOUD" C-m 26 | tmux select-window -t $session:4 27 | tmux send-keys "cd ~/openstack-health-monitor; export OS_CLOUD=$OS_CLOUD" C-m 28 | sleep 1 29 | tmux send-keys "./run_in_loop_pluspco3.sh" C-m 30 | export OS_CLOUD=plus-hm4 31 | tmux new-window -t $session:6 -n apimon4 32 | tmux new-window -t $session:7 -n shell4 33 | tmux send-keys "cd ~/openstack-health-monitor; export OS_CLOUD=$OS_CLOUD" C-m 34 | tmux select-window -t $session:6 35 | tmux send-keys "cd ~/openstack-health-monitor; export OS_CLOUD=$OS_CLOUD" C-m 36 | sleep 1 37 | tmux send-keys "./run_in_loop_pluspco4.sh" C-m 38 | -------------------------------------------------------------------------------- /run_poc-wgcloud.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Specify image names, JumpHost ideally has sfw2-snat 4 | # Options for the images: my openSUSE 15.2 (linux), Ubuntu 20.04 (ubuntu), 5 | # openSUSE Leap 15.2 (opensuse), CentOS 8 (centos) 6 | # You can freely mix ... 7 | export JHIMG="Debian 12" 8 | export IMG="Debian 12" 9 | 10 | # Terminate early on auth error 11 | openstack server list >/dev/null || exit 1 12 | 13 | # Find Floating IPs 14 | FIPLIST="" 15 | FIPS=$(openstack floating ip list -f value -c ID) 16 | for fip in $FIPS; do 17 | FIP=$(openstack floating ip show $fip | grep -o "APIMonitor_[0-9]*") 18 | if test -n "$FIP"; then FIPLIST="${FIPLIST}${FIP}_ 19 | "; fi 20 | done 21 | FIPLIST=$(echo "$FIPLIST" | grep -v '^$' | sort -u) 22 | # Cleanup previous interrupted runs 23 | SERVERS=$(openstack server list | grep -o "APIMonitor_[0-9]*_" | sort -u) 24 | KEYPAIR=$(openstack keypair list | grep -o "APIMonitor_[0-9]*_" | sort -u) 25 | VOLUMES=$(openstack volume list | grep -o "APIMonitor_[0-9]*_" | sort -u) 26 | NETWORK=$(openstack network list | grep -o "APIMonitor_[0-9]*_" | sort -u) 27 | ROUTERS=$(openstack router list | grep -o "APIMonitor_[0-9]*_" | sort -u) 28 | SECGRPS=$(openstack security group list | grep -o "APIMonitor_[0-9]*_" | sort -u) 29 | echo CLEANUP: FIPs $FIPLIST Servers $SERVERS Keypairs $KEYPAIR Volumes $VOLUMES Networks $NETWORK LoadBalancers $LOADBAL Routers $ROUTERS SecGrps $SECGRPS 30 | for ENV in $FIPLIST; do 31 | echo "******************************" 32 | echo "CLEAN $ENV" 33 | bash ./api_monitor.sh -o -T -q -c CLEANUP $ENV 34 | echo "******************************" 35 | done 36 | TOCLEAN=$(echo "$SERVERS 37 | $KEYPAIR 38 | $VOLUMES 39 | $NETWORK 40 | $ROUTERS 41 | $SECGRPS 42 | " | grep -v '^$' | sort -u) 43 | for ENV in $TOCLEAN; do 44 | echo "******************************" 45 | echo "CLEAN $ENV" 46 | bash ./api_monitor.sh -o -q -c CLEANUP $ENV 47 | echo "******************************" 48 | done 49 | 50 | exec ./api_monitor.sh -O -C -D -n 2 -s -b -B -M -T -X -S poc-wgcloud "$@" 51 | -------------------------------------------------------------------------------- /Containerfile: -------------------------------------------------------------------------------- 1 | ARG PYTHON_VERSION=3.8 2 | FROM python:${PYTHON_VERSION}-alpine 3 | 4 | ARG VERSION=latest 5 | ARG OPENSTACK_VERSION=zed 6 | 7 | ARG USER_ID=45000 8 | ARG GROUP_ID=45000 9 | 10 | COPY files/requirements.txt /requirements.txt 11 | COPY api_monitor.sh /api_monitor.sh 12 | 13 | ENV CRYPTOGRAPHY_DONT_BUILD_RUST 1 14 | # hadolint ignore=DL3018 15 | RUN apk add --no-cache \ 16 | bash \ 17 | iputils \ 18 | jq \ 19 | libstdc++ \ 20 | openssh-client \ 21 | curl \ 22 | && apk add --no-cache --virtual .build-deps \ 23 | build-base \ 24 | libffi-dev \ 25 | openssl-dev \ 26 | python3-dev \ 27 | && if [ $VERSION = "latest" ]; then wget -P / -O requirements.tar.gz https://tarballs.opendev.org/openstack/requirements/requirements-master.tar.gz; fi \ 28 | && if [ $VERSION != "latest" ]; then wget -P / -O requirements.tar.gz https://tarballs.opendev.org/openstack/requirements/requirements-stable-${OPENSTACK_VERSION}.tar.gz; fi \ 29 | && mkdir /requirements \ 30 | && tar xzf /requirements.tar.gz -C /requirements --strip-components=1 \ 31 | && rm -rf /requirements.tar.gz \ 32 | && while read -r package; do \ 33 | grep -q "$package" /requirements/upper-constraints.txt && \ 34 | echo "$package" >> /packages.txt || true; \ 35 | done < /requirements.txt \ 36 | && pip3 --no-cache-dir install -c /requirements/upper-constraints.txt -r /packages.txt \ 37 | && rm -rf /requirements \ 38 | /requirements.txt \ 39 | /packages.txt \ 40 | && apk del .build-deps \ 41 | && openstack complete > /osc.bash_completion \ 42 | && addgroup -g $GROUP_ID dragon \ 43 | && adduser -D -u $USER_ID -G dragon dragon \ 44 | && mkdir /configuration /data \ 45 | && chown -R dragon: /configuration /data \ 46 | && chmod +x /api_monitor.sh 47 | 48 | USER dragon 49 | 50 | WORKDIR /configuration 51 | VOLUME ["/configuration", "/data"] 52 | 53 | CMD ["/api_monitor.sh -O -C -D -N 2 -n 8 -s -L -b -B -a 2 -t -T -R -s -i 5"] 54 | ENTRYPOINT ["/api_monitor.sh"] 55 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Specify image names, JumpHost ideally has sfw2-snat 4 | # Options for the images: my openSUSE 15.2 (linux), Ubuntu 20.04 (ubuntu), 5 | # openSUSE Leap 15.2 (opensuse), CentOS 8 (centos) 6 | # You can freely mix ... 7 | #export JHIMG="Ubuntu 22.04" 8 | #export JHIMG="openSUSE 15.6" 9 | export JHIMG="Debian 12" 10 | #export ADDJHVOLSIZE=2 11 | #export IMG="Ubuntu 22.04" 12 | #export IMG="openSUSE 15.6" 13 | export IMG="Debian 12" 14 | #export DEFLTUSER=ubuntu 15 | #export JHDEFLTUSER=ubuntu 16 | # You can use a filter when listing images (because your catalog is huge) 17 | #export JHIMGFILT="--property-filter os_version=openSUSE-15.0" 18 | #export IMGFILT="--property-filter os_version=openSUSE-15.0" 19 | # ECP flavors 20 | #if test $OS_REGION_NAME == Kna1; then 21 | #export JHFLAVOR=1C-1GB 22 | #export FLAVOR=1C-1GB 23 | #else 24 | #export JHFLAVOR=1C-1GB-10GB 25 | #export FLAVOR=1C-1GB-10GB 26 | #fi 27 | # EMail notifications sender address 28 | #export FROM=sender@domain.org 29 | # Only use one AZ 30 | #export AZS="nova" 31 | 32 | # Assume OS_ parameters have already been sourced from some .openrc file 33 | # or just set OS_CLOUD (using clouds.yaml/secure.yaml) 34 | #export OS_CLOUD=cloudname 35 | 36 | export EMAIL_PARAM=${EMAIL_PARAM:-"sender@domain.org"} 37 | 38 | # Terminate early on auth error 39 | openstack server list >/dev/null 40 | 41 | # Cleanup previous interrupted runs 42 | SERVERS=$(openstack server list | grep -o "APIMonitor_[0-9]*_" | sort -u) 43 | VOLUMES=$(openstack volume list | grep -o "APIMonitor_[0-9]*_" | sort -u) 44 | NETWORK=$(openstack network list | grep -o "APIMonitor_[0-9]*_" | sort -u) 45 | ROUTERS=$(openstack router list | grep -o "APIMonitor_[0-9]*_" | sort -u) 46 | SECGRPS=$(openstack security group list | grep -o "APIMonitor_[0-9]*_" | sort -u) 47 | TOCLEAN=$(echo "$SERVERS 48 | $VOLUMES 49 | $NETWORK 50 | $ROUTERS 51 | $SECGRPS 52 | " | sort -u) 53 | for ENV in $TOCLEAN; do 54 | echo "******************************" 55 | echo "CLEAN $ENV" 56 | bash ./api_monitor.sh -q -o -c CLEANUP $ENV 57 | echo "******************************" 58 | done 59 | 60 | #bash ./api_monitor.sh -c -x -d -n 8 -l last.log -e $EMAIL_PARAM -S -i 9 61 | exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -LL -b -B -T -e recipient@domain.org "$@" 62 | 63 | -------------------------------------------------------------------------------- /stats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # 3 | # Calculate stats over array of numbers (read from stdin) 4 | # Outpus Num Min Med Avg Pct% Max 5 | # The percentile can be specified on the command line (-p) 6 | # The number of digits as well (-d) 7 | # 8 | # (c) Kurt Garloff 9 | # SPDX-License-Identifier: CC-BY-SA-4.0 10 | 11 | import sys, os 12 | 13 | prec = 1e-5 14 | 15 | def stats(arr, pct=95, digi=2, machine=False): 16 | aln = len(arr) 17 | arr.sort() 18 | middle = int(aln/2) 19 | if aln%2: 20 | med = arr[middle] 21 | else: 22 | med = (arr[middle-1]+arr[middle])/2.0 23 | avg = sum(arr)/aln 24 | pctpos = (aln-1)*pct/100 25 | pctposi = int(pctpos+prec) 26 | wgt = pctpos - pctposi 27 | if abs(wgt) < prec: 28 | pctl = arr[pctposi] 29 | else: 30 | pctl = arr[pctposi+1]*wgt + arr[pctposi]*(1-wgt) 31 | fmt = ".%if" % digi 32 | if abs(pct - int(pct)) == 0: 33 | pctfmt = "%i%%" % pct 34 | else: 35 | pctfmt = "%.2f%%" % pct 36 | if machine: 37 | fstr = "{}|{:%s}|{:%s}|{:%s}|{:%s}|{:%s}" % (fmt, fmt, fmt, fmt, fmt) 38 | else: 39 | if pct > 50: 40 | fstr = "Num {0} Min {1:%s} Med {2:%s} Avg {3:%s} %s {4:%s} Max {5:%s}" % (fmt, fmt, fmt, pctfmt, fmt, fmt) 41 | else: 42 | fstr = "Num {0} Min {1:%s} %s {4:%s} Med {2:%s} Avg {3:%s} Max {5:%s}" % (fmt, pctfmt, fmt, fmt, fmt, fmt) 43 | #print(fstr) 44 | print(fstr.format(aln, arr[0], med, avg, pctl, arr[aln-1])) 45 | 46 | def main(argv): 47 | pct = 95 48 | dig = 2 49 | machine = False 50 | optidx = 0 51 | while optidx < len(argv): 52 | if argv[optidx] == '-p': 53 | optidx += 1 54 | pct = float(argv[optidx]) 55 | elif argv[optidx] == '-d': 56 | optidx += 1 57 | dig = int(argv[optidx]) 58 | elif argv[optidx] == '-m': 59 | machine = True 60 | else: 61 | print("Error: Unknown option \"%s\". Usage stats.py [-p pct] [-d dig] [-m] < data" % argv[optidx]) 62 | sys.exit(1) 63 | optidx += 1 64 | 65 | arr = list(map(lambda x: float(x), sys.stdin.read().rstrip("\n").split(" "))) 66 | stats(arr, pct, dig, machine) 67 | 68 | if __name__ == "__main__": 69 | main(sys.argv[1:]) 70 | -------------------------------------------------------------------------------- /startup/README.md: -------------------------------------------------------------------------------- 1 | # How to enable autostart for OpenStack Health Monitor (aka apimon). 2 | 3 | ## Preparation 4 | * Checkout a copy of openstack health monitor 5 | `git clone https://github.com/SovereignCloudStack/openstack-health-monitor` 6 | * Install the python3-openstackclient tools 7 | * Configure your cloud access in `~/.config/openstack/clouds.yaml` and 8 | `secure.yaml` 9 | * Ensure your openstack client tools work: 10 | ``` 11 | export OS_CLOUD=YOURCLOUD 12 | openstack image list 13 | ``` 14 | * Ensure that apimon works 15 | `./api_monitor.sh -O -C -D -N 2 -n 8 -L -b -B -T -i 1` 16 | This will run one iteration of the monitor, creating 10 VMs with default 17 | flavors (SCS-1V-2 and SCS-1L-1) and images (Ubuntu 22.04). 18 | This will take 5 to 10 minutes. 19 | The option `-L` enables the loadbalancer testing; you might have to remove 20 | it if your cloud does not support octavia/lbaasv2. 21 | See `./api_monitor.sh --help` for an overview over options. 22 | * Create a run script by copying e.g. `run_wave.sh` and editing it according 23 | to your needs. 24 | * Create a file `run_in_loop.sh` which runs `run_YOURCLOUD.sh` in a loop: 25 | ``` 26 | #!/bin/bash 27 | rm stop-os-hm 2>/dev/null 28 | while true; do 29 | ./run_YOURCLOUD.sh -s -i 200 30 | if test -e stop-os-hm; then break; fi 31 | echo -n "Hit ^C to abort ..." 32 | sleep 15; echo 33 | done 34 | ``` 35 | This will run 200 iterations in `api_monitor.sh` and then restart. 36 | 37 | ## System startup 38 | * Edit the tmux startup script `run-apimon-in-tmux.sh` to set `OS_CLOUD` 39 | correctly for your cloud. 40 | * If you are not using ~/openstack-health-monitor for the checked out git 41 | tree, you need to adjust the scripts and the systemd unit file here 42 | accordingly. 43 | * Copy `apimon.service` to `~/.config/systemd/user`. (You might need to 44 | create that directory first.) 45 | * Test that you can start the service by calling 46 | `systemctl --user start apimon` 47 | * This should create a tmux session in which the OpenStack Health Momitor 48 | is running. Attach to the tmux session `tmux attach -t oshealthmon`. 49 | * The apimon service uses `run-apimon-in-tmux.sh` and `kill-apimon-in-tmux.sh` 50 | scripts for startup and stopping. There are also scripts that open four 51 | pairs of windows to start 4 jobs and kill 4 jobs wiht `-plus` in the name. 52 | * You can stop the service by hitting ^C (Control-c), possibly several times. 53 | * Now enable the service: `systemctl --user enable apimon` 54 | * And tell systemd that it should create a user session on startup: 55 | `sudo loginctl enable-linger $USER` 56 | -------------------------------------------------------------------------------- /run_otc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Specify image names, JumpHost ideally has sfw2-snat 4 | # Options for the images: my openSUSE 15.2 (linux), Ubuntu 20.04 (ubuntu), 5 | # openSUSE Leap 15.2 (opensuse), CentOS 8 (centos) 6 | # You can freely mix ... 7 | #export JHIMG="Ubuntu 20.04" 8 | #export JHIMG="openSUSE 15.2" 9 | export JHIMG="Standard_Ubuntu_20.04_latest" 10 | #export ADDJHVOLSIZE=2 11 | export IMG="Standard_Ubuntu_20.04_latest" 12 | #export IMG="openSUSE 15.2" 13 | #export IMG="CentOS 8" 14 | # DEFLTUSER from image_original_user property 15 | #export DEFLTUSER=opensuse 16 | export DEFLTUSER=ubuntu 17 | export JHDEFLTUSER=ubuntu 18 | # You can use a filter when listing images (because your catalog is huge) 19 | #export JHIMGFILT="--property-filter os_version=openSUSE-15.0" 20 | #export IMGFILT="--property-filter os_version=openSUSE-15.0" 21 | # ECP flavors 22 | #if test $OS_REGION_NAME == Kna1; then 23 | export JHFLAVOR=s2.medium.1 24 | export FLAVOR=s2.medium.1 25 | #else 26 | #export JHFLAVOR=1C-1GB-10GB 27 | #export FLAVOR=1C-1GB-10GB 28 | #fi 29 | # EMail notifications sender address 30 | export FROM=kurt@garloff.de 31 | # Only use one AZ 32 | #export AZS="eu-de-01 eu-de-02 eu-de-03" 33 | export AZS="eu-de-01 eu-de-03" 34 | export VAZS="eu-de-01 eu-de-03" 35 | # Upload (compressed) logfiles and stats to container 36 | export SWIFTCONTAINER=OS-HM-Logfiles 37 | # OTC Settings: NAMESERVER ... 38 | export NAMESERVER=100.125.4.25 39 | export DEFAULTNAMESERVER=1 40 | export FIPWAITPORTDEVOWNER=1 41 | export OLD_OCTAVIA=1 42 | 43 | # Assume OS_ parameters have already been sourced from some .openrc file 44 | # export OS_CLOUD=gx-scs-healthmgr 45 | 46 | export EMAIL_PARAM=${EMAIL_PARAM:-"scs@garloff.de"} 47 | 48 | # Notifications & Alarms (pass as list, arrays can't be exported) 49 | ALARM_EMAIL_ADDRESSES="scs@garloff.de" 50 | NOTE_EMAIL_ADDRESSES="scs@garloff.de" 51 | #ALARM_EMAIL_ADDRESSES="scs@garloff.de scs-monitoring@plusserver.com" 52 | #NOTE_EMAIL_ADDRESSES="scs@garloff.de" 53 | export ALARM_EMAIL_ADDRESSES NOTE_EMAIL_ADDRESSES 54 | 55 | # Terminate early on auth error 56 | openstack server list >/dev/null 57 | 58 | # Find Floating IPs 59 | FIPLIST="" 60 | FIPS=$(openstack floating ip list -f value -c ID) 61 | for fip in $FIPS; do 62 | FIP=$(openstack floating ip show $fip | grep -o "APIMonitor_[0-9]*") 63 | if test -n "$FIP"; then FIPLIST="${FIPLIST}${FIP}_ 64 | "; fi 65 | done 66 | # Cleanup previous interrupted runs 67 | SERVERS=$(openstack server list | grep -o "APIMonitor_[0-9]*_" | sort -u) 68 | KEYPAIR=$(openstack keypair list | grep -o "APIMonitor_[0-9]*_" | sort -u) 69 | VOLUMES=$(openstack volume list | grep -o "APIMonitor_[0-9]*_" | sort -u) 70 | NETWORK=$(openstack network list | grep -o "APIMonitor_[0-9]*_" | sort -u) 71 | LOADBAL=$(openstack loadbalancer list | grep -o "APIMonitor_[0-9]*_" | sort -u) 72 | ROUTERS=$(openstack router list | grep -o "APIMonitor_[0-9]*_" | sort -u) 73 | SECGRPS=$(openstack security group list | grep -o "APIMonitor_[0-9]*_" | sort -u) 74 | echo CLEANUP: FIPs $FIPLIST Servers $SERVERS Volumes $VOLUMES Networks $NETWORK LoadBalancers $LOADBAL Routers $ROUTERS SecGrps $SECGRPS 75 | TOCLEAN=$(echo "$FIPLIST 76 | $SERVERS 77 | $KEYPAIR 78 | $VOLUMES 79 | $NETWORK 80 | $LOADBAL 81 | $ROUTERS 82 | $SECGRPS 83 | " | grep -v '^$' | sort -u) 84 | for ENV in $TOCLEAN; do 85 | echo "******************************" 86 | echo "CLEAN $ENV" 87 | bash ./api_monitor.sh -o -q -c CLEANUP $ENV 88 | echo "******************************" 89 | done 90 | 91 | #bash ./api_monitor.sh -c -x -d -n 8 -l last.log -e $EMAIL_PARAM -S -i 9 92 | #exec api_monitor.sh -o -C -D -N 2 -n 8 -s -e sender@domain.org "$@" 93 | exec ./api_monitor.sh -O -C -D -n 6 -s -b -B -a 2 -R -X "$@" 94 | 95 | -------------------------------------------------------------------------------- /run_pluspco3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Specify image names, JumpHost ideally has sfw2-snat 4 | # Options for the images: my openSUSE 15.2 (linux), Ubuntu 20.04 (ubuntu), 5 | # openSUSE Leap 15.2 (opensuse), CentOS 8 (centos) 6 | # You can freely mix ... 7 | #export JHIMG="Ubuntu 22.04" 8 | #export JHIMG="openSUSE 15.5" 9 | export JHIMG="Debian 12" 10 | #export ADDJHVOLSIZE=2 11 | #export IMG="Ubuntu 22.04" 12 | #export IMG="openSUSE 15.5" 13 | export IMG="Debian 12" 14 | # DEFLTUSER from image_original_user property 15 | #export DEFLTUSER=opensuse 16 | #export DEFLTUSER=ubuntu 17 | #export JHDEFLTUSER=ubuntu 18 | # You can use a filter when listing images (because your catalog is huge) 19 | #export JHIMGFILT="--property-filter os_version=openSUSE-15.0" 20 | #export IMGFILT="--property-filter os_version=openSUSE-15.0" 21 | # ECP flavors 22 | #if test $OS_REGION_NAME == Kna1; then 23 | #export JHFLAVOR=SCS-1L:1:5 24 | export JHFLAVOR=SCS-1V-2 25 | #export FLAVOR=SCS-1L:1:5 26 | export FLAVOR=SCS-1L-1 27 | #else 28 | #export JHFLAVOR=1C-1GB-10GB 29 | #export FLAVOR=1C-1GB-10GB 30 | #fi 31 | # EMail notifications sender address 32 | export FROM=kurt.garloff@sovereignit.cloud 33 | # Only use one AZ 34 | export AZS="az1" 35 | # Upload (compressed) logfiles and stats to container 36 | export SWIFTCONTAINER=OS-HM-Logfiles 37 | 38 | # Assume OS_ parameters have already been sourced from some .openrc file 39 | # export OS_CLOUD=gx-scs-healthmgr 40 | 41 | export EMAIL_PARAM=${EMAIL_PARAM:-"scs@garloff.de"} 42 | 43 | # Notifications & Alarms (pass as list, arrays can't be exported) 44 | ALARM_EMAIL_ADDRESSES="scs@garloff.de" 45 | NOTE_EMAIL_ADDRESSES="scs@garloff.de" 46 | #ALARM_EMAIL_ADDRESSES="scs@garloff.de scs-monitoring@plusserver.com" 47 | #NOTE_EMAIL_ADDRESSES="scs@garloff.de" 48 | export ALARM_EMAIL_ADDRESSES NOTE_EMAIL_ADDRESSES 49 | 50 | # Terminate early on auth error 51 | openstack server list >/dev/null 52 | 53 | # Find Floating IPs 54 | FIPLIST="" 55 | FIPS=$(openstack floating ip list -f value -c ID) 56 | for fip in $FIPS; do 57 | FIP=$(openstack floating ip show $fip | grep -o "APIMonitor_[0-9]*") 58 | if test -n "$FIP"; then FIPLIST="${FIPLIST}${FIP}_ 59 | "; fi 60 | done 61 | FIPLIST=$(echo "$FIPLIST" | grep -v '^$' | sort -u) 62 | # Cleanup previous interrupted runs 63 | SERVERS=$(openstack server list | grep -o "APIMonitor_[0-9]*_" | sort -u) 64 | VOLUMES=$(openstack volume list | grep -o "APIMonitor_[0-9]*_" | sort -u) 65 | NETWORK=$(openstack network list | grep -o "APIMonitor_[0-9]*_" | sort -u) 66 | LOADBAL=$(openstack loadbalancer list | grep -o "APIMonitor_[0-9]*_" | sort -u) 67 | ROUTERS=$(openstack router list | grep -o "APIMonitor_[0-9]*_" | sort -u) 68 | SECGRPS=$(openstack security group list | grep -o "APIMonitor_[0-9]*_" | sort -u) 69 | echo CLEANUP: FIPs $FIPLIST Servers $SERVERS Volumes $VOLUMES Networks $NETWORK LoadBalancers $LOADBAL Routers $ROUTERS SecGrps $SECGRPS 70 | for ENV in $FIPLIST; do 71 | echo "******************************" 72 | echo "CLEAN $ENV" 73 | bash ./api_monitor.sh -o -T -q -c CLEANUP $ENV 74 | echo "******************************" 75 | done 76 | TOCLEAN=$(echo "$SERVERS 77 | $VOLUMES 78 | $NETWORK 79 | $LOADBAL 80 | $ROUTERS 81 | $SECGRPS 82 | " | grep -v '^$' | sort -u) 83 | for ENV in $TOCLEAN; do 84 | echo "******************************" 85 | echo "CLEAN $ENV" 86 | #bash ./api_monitor.sh -o -T -q -LL -c CLEANUP $ENV 87 | bash ./api_monitor.sh -o -q -LL -c CLEANUP $ENV 88 | echo "******************************" 89 | done 90 | 91 | #bash ./api_monitor.sh -c -x -d -n 8 -l last.log -e $EMAIL_PARAM -S -i 9 92 | #exec api_monitor.sh -o -C -D -N 2 -n 8 -s -e sender@domain.org "$@" 93 | #exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -L -b -B -M -a 2 -t -T -R -X "$@" 94 | exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -LL -b -B -M -a 2 -t -T -R -X -S plus-prod3 "$@" 95 | 96 | -------------------------------------------------------------------------------- /run_pluspco4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Specify image names, JumpHost ideally has sfw2-snat 4 | # Options for the images: my openSUSE 15.2 (linux), Ubuntu 20.04 (ubuntu), 5 | # openSUSE Leap 15.2 (opensuse), CentOS 8 (centos) 6 | # You can freely mix ... 7 | #export JHIMG="Ubuntu 22.04" 8 | #export JHIMG="openSUSE 15.5" 9 | export JHIMG="Debian 12" 10 | #export ADDJHVOLSIZE=2 11 | #export IMG="Ubuntu 22.04" 12 | #export IMG="openSUSE 15.5" 13 | export IMG="Debian 12" 14 | # DEFLTUSER from image_original_user property 15 | #export DEFLTUSER=opensuse 16 | #export DEFLTUSER=ubuntu 17 | #export JHDEFLTUSER=ubuntu 18 | # You can use a filter when listing images (because your catalog is huge) 19 | #export JHIMGFILT="--property-filter os_version=openSUSE-15.0" 20 | #export IMGFILT="--property-filter os_version=openSUSE-15.0" 21 | # ECP flavors 22 | #if test $OS_REGION_NAME == Kna1; then 23 | #export JHFLAVOR=SCS-1L:1:5 24 | export JHFLAVOR=SCS-1V-2 25 | #export FLAVOR=SCS-1L:1:5 26 | export FLAVOR=SCS-1L-1 27 | #else 28 | #export JHFLAVOR=1C-1GB-10GB 29 | #export FLAVOR=1C-1GB-10GB 30 | #fi 31 | # EMail notifications sender address 32 | export FROM=kurt.garloff@sovereignit.cloud 33 | # Only use one AZ 34 | #export AZS="az1" 35 | # Upload (compressed) logfiles and stats to container 36 | export SWIFTCONTAINER=OS-HM-Logfiles 37 | 38 | # Assume OS_ parameters have already been sourced from some .openrc file 39 | # export OS_CLOUD=gx-scs-healthmgr 40 | 41 | export EMAIL_PARAM=${EMAIL_PARAM:-"scs@garloff.de"} 42 | 43 | # Notifications & Alarms (pass as list, arrays can't be exported) 44 | ALARM_EMAIL_ADDRESSES="scs@garloff.de" 45 | NOTE_EMAIL_ADDRESSES="scs@garloff.de" 46 | #ALARM_EMAIL_ADDRESSES="scs@garloff.de scs-monitoring@plusserver.com" 47 | #NOTE_EMAIL_ADDRESSES="scs@garloff.de" 48 | export ALARM_EMAIL_ADDRESSES NOTE_EMAIL_ADDRESSES 49 | 50 | # Terminate early on auth error 51 | openstack server list >/dev/null 52 | 53 | # Find Floating IPs 54 | FIPLIST="" 55 | FIPS=$(openstack floating ip list -f value -c ID) 56 | for fip in $FIPS; do 57 | FIP=$(openstack floating ip show $fip | grep -o "APIMonitor_[0-9]*") 58 | if test -n "$FIP"; then FIPLIST="${FIPLIST}${FIP}_ 59 | "; fi 60 | done 61 | FIPLIST=$(echo "$FIPLIST" | grep -v '^$' | sort -u) 62 | # Cleanup previous interrupted runs 63 | SERVERS=$(openstack server list | grep -o "APIMonitor_[0-9]*_" | sort -u) 64 | VOLUMES=$(openstack volume list | grep -o "APIMonitor_[0-9]*_" | sort -u) 65 | NETWORK=$(openstack network list | grep -o "APIMonitor_[0-9]*_" | sort -u) 66 | LOADBAL=$(openstack loadbalancer list | grep -o "APIMonitor_[0-9]*_" | sort -u) 67 | ROUTERS=$(openstack router list | grep -o "APIMonitor_[0-9]*_" | sort -u) 68 | SECGRPS=$(openstack security group list | grep -o "APIMonitor_[0-9]*_" | sort -u) 69 | echo CLEANUP: FIPs $FIPLIST Servers $SERVERS Volumes $VOLUMES Networks $NETWORK LoadBalancers $LOADBAL Routers $ROUTERS SecGrps $SECGRPS 70 | for ENV in $FIPLIST; do 71 | echo "******************************" 72 | echo "CLEAN $ENV" 73 | bash ./api_monitor.sh -o -T -q -c CLEANUP $ENV 74 | echo "******************************" 75 | done 76 | TOCLEAN=$(echo "$SERVERS 77 | $VOLUMES 78 | $NETWORK 79 | $LOADBAL 80 | $ROUTERS 81 | $SECGRPS 82 | " | grep -v '^$' | sort -u) 83 | for ENV in $TOCLEAN; do 84 | echo "******************************" 85 | echo "CLEAN $ENV" 86 | #bash ./api_monitor.sh -o -T -q -LL -c CLEANUP $ENV 87 | bash ./api_monitor.sh -o -q -LL -c CLEANUP $ENV 88 | echo "******************************" 89 | done 90 | 91 | #bash ./api_monitor.sh -c -x -d -n 8 -l last.log -e $EMAIL_PARAM -S -i 9 92 | #exec api_monitor.sh -o -C -D -N 2 -n 8 -s -e sender@domain.org "$@" 93 | #exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -L -b -B -M -a 2 -t -T -R -X "$@" 94 | exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -LL -b -B -M -a 2 -t -T -R -X -S plus-prod4 "$@" 95 | 96 | -------------------------------------------------------------------------------- /run_artcodix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Specify image names, JumpHost ideally has sfw2-snat 4 | # Options for the images: my openSUSE 15.2 (linux), Ubuntu 20.04 (ubuntu), 5 | # openSUSE Leap 15.2 (opensuse), CentOS 8 (centos) 6 | # You can freely mix ... 7 | #export JHIMG="Ubuntu 22.04" 8 | #export JHIMG="openSUSE 15.5" 9 | export JHIMG="Debian 12" 10 | #export ADDJHVOLSIZE=2 11 | #export IMG="Ubuntu 22.04" 12 | #export IMG="openSUSE 15.5" 13 | export IMG="Debian 12" 14 | # DEFLTUSER from image_original_user property 15 | #export DEFLTUSER=opensuse 16 | #export DEFLTUSER=ubuntu 17 | #export JHDEFLTUSER=ubuntu 18 | # You can use a filter when listing images (because your catalog is huge) 19 | #export JHIMGFILT="--property-filter os_version=openSUSE-15.0" 20 | #export IMGFILT="--property-filter os_version=openSUSE-15.0" 21 | # ECP flavors 22 | #if test $OS_REGION_NAME == Kna1; then 23 | export JHFLAVOR=SCS-1L-1 24 | #export JHFLAVOR=SCS-1V:1:10 25 | export FLAVOR=SCS-1L-1 26 | #else 27 | #export JHFLAVOR=1C-1GB-10GB 28 | #export FLAVOR=1C-1GB-10GB 29 | #fi 30 | # EMail notifications sender address 31 | export FROM=kurt.garloff@sovereignit.cloud 32 | # Only use one AZ 33 | export AZS="AZ1" 34 | # Upload (compressed) logfiles and stats to container 35 | export SWIFTCONTAINER=OS-HM-Logfiles 36 | 37 | # Assume OS_ parameters have already been sourced from some .openrc file 38 | # export OS_CLOUD=gx-scs-healthmgr 39 | 40 | export EMAIL_PARAM=${EMAIL_PARAM:-"scs@garloff.de"} 41 | 42 | # Notifications & Alarms (pass as list, arrays can't be exported) 43 | ALARM_EMAIL_ADDRESSES="scs@garloff.de" 44 | NOTE_EMAIL_ADDRESSES="scs@garloff.de" 45 | #ALARM_EMAIL_ADDRESSES="scs@garloff.de scs-monitoring@plusserver.com" 46 | #NOTE_EMAIL_ADDRESSES="scs@garloff.de" 47 | export ALARM_EMAIL_ADDRESSES NOTE_EMAIL_ADDRESSES 48 | 49 | # Terminate early on auth error 50 | openstack server list >/dev/null 51 | 52 | # Find Floating IPs 53 | FIPLIST="" 54 | FIPS=$(openstack floating ip list -f value -c ID) 55 | for fip in $FIPS; do 56 | FIP=$(openstack floating ip show $fip | grep -o "APIMonitor_[0-9]*") 57 | if test -n "$FIP"; then FIPLIST="${FIPLIST}${FIP}_ 58 | "; fi 59 | done 60 | FIPLIST=$(echo "$FIPLIST" | grep -v '^$' | sort -u) 61 | # Cleanup previous interrupted runs 62 | SERVERS=$(openstack server list | grep -o "APIMonitor_[0-9]*_" | sort -u) 63 | KEYPAIR=$(openstack keypair list | grep -o "APIMonitor_[0-9]*_" | sort -u) 64 | VOLUMES=$(openstack volume list | grep -o "APIMonitor_[0-9]*_" | sort -u) 65 | NETWORK=$(openstack network list | grep -o "APIMonitor_[0-9]*_" | sort -u) 66 | LOADBAL=$(openstack loadbalancer list | grep -o "APIMonitor_[0-9]*_" | sort -u) 67 | ROUTERS=$(openstack router list | grep -o "APIMonitor_[0-9]*_" | sort -u) 68 | SECGRPS=$(openstack security group list | grep -o "APIMonitor_[0-9]*_" | sort -u) 69 | echo CLEANUP: FIPs $FIPLIST Servers $SERVERS Keypairs $KEYPAIR Volumes $VOLUMES Networks $NETWORK LoadBalancers $LOADBAL Routers $ROUTERS SecGrps $SECGRPS 70 | for ENV in $FIPLIST; do 71 | echo "******************************" 72 | echo "CLEAN $ENV" 73 | bash ./api_monitor.sh -o -T -q -c CLEANUP $ENV 74 | echo "******************************" 75 | done 76 | TOCLEAN=$(echo "$SERVERS 77 | $KEYPAIR 78 | $VOLUMES 79 | $NETWORK 80 | $LOADBAL 81 | $ROUTERS 82 | $SECGRPS 83 | " | grep -v '^$' | sort -u) 84 | for ENV in $TOCLEAN; do 85 | echo "******************************" 86 | echo "CLEAN $ENV" 87 | #bash ./api_monitor.sh -o -T -q -LL -c CLEANUP $ENV 88 | bash ./api_monitor.sh -o -q -LL -c CLEANUP $ENV 89 | echo "******************************" 90 | done 91 | 92 | #bash ./api_monitor.sh -c -x -d -n 8 -l last.log -e $EMAIL_PARAM -S -i 9 93 | #exec api_monitor.sh -o -C -D -N 2 -n 8 -s -e sender@domain.org "$@" 94 | #exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -L -b -B -M -a 2 -T -R -X "$@" 95 | exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -LO -b -B -M -a 2 -t -T -R -X -S artcodix "$@" 96 | 97 | -------------------------------------------------------------------------------- /run_pluspco.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Specify image names, JumpHost ideally has sfw2-snat 4 | # Options for the images: my openSUSE 15.2 (linux), Ubuntu 20.04 (ubuntu), 5 | # openSUSE Leap 15.2 (opensuse), CentOS 8 (centos) 6 | # You can freely mix ... 7 | #export JHIMG="Ubuntu 22.04" 8 | #export JHIMG="openSUSE 15.5" 9 | export JHIMG="Debian 12" 10 | #export ADDJHVOLSIZE=2 11 | #export IMG="Ubuntu 22.04" 12 | #export IMG="openSUSE 15.5" 13 | export IMG="Debian 12" 14 | # DEFLTUSER from image_original_user property 15 | #export DEFLTUSER=opensuse 16 | #export DEFLTUSER=ubuntu 17 | #export JHDEFLTUSER=ubuntu 18 | # You can use a filter when listing images (because your catalog is huge) 19 | #export JHIMGFILT="--property-filter os_version=openSUSE-15.0" 20 | #export IMGFILT="--property-filter os_version=openSUSE-15.0" 21 | # ECP flavors 22 | #if test $OS_REGION_NAME == Kna1; then 23 | export JHFLAVOR=SCS-1L-1 24 | #export JHFLAVOR=SCS-1V:1:10 25 | export FLAVOR=SCS-1L-1 26 | #else 27 | #export JHFLAVOR=1C-1GB-10GB 28 | #export FLAVOR=1C-1GB-10GB 29 | #fi 30 | # EMail notifications sender address 31 | export FROM=kurt.garloff@sovereignit.cloud 32 | # Only use one AZ 33 | export AZS="az1" 34 | # Upload (compressed) logfiles and stats to container 35 | export SWIFTCONTAINER=OS-HM-Logfiles 36 | 37 | # Assume OS_ parameters have already been sourced from some .openrc file 38 | # export OS_CLOUD=gx-scs-healthmgr 39 | 40 | export EMAIL_PARAM=${EMAIL_PARAM:-"scs@garloff.de"} 41 | 42 | # Notifications & Alarms (pass as list, arrays can't be exported) 43 | ALARM_EMAIL_ADDRESSES="scs@garloff.de" 44 | NOTE_EMAIL_ADDRESSES="scs@garloff.de" 45 | #ALARM_EMAIL_ADDRESSES="scs@garloff.de scs-monitoring@plusserver.com" 46 | #NOTE_EMAIL_ADDRESSES="scs@garloff.de" 47 | export ALARM_EMAIL_ADDRESSES NOTE_EMAIL_ADDRESSES 48 | 49 | # Terminate early on auth error 50 | openstack server list >/dev/null 51 | 52 | # Find Floating IPs 53 | FIPLIST="" 54 | FIPS=$(openstack floating ip list -f value -c ID) 55 | for fip in $FIPS; do 56 | FIP=$(openstack floating ip show $fip | grep -o "APIMonitor_[0-9]*") 57 | if test -n "$FIP"; then FIPLIST="${FIPLIST}${FIP}_ 58 | "; fi 59 | done 60 | FIPLIST=$(echo "$FIPLIST" | grep -v '^$' | sort -u) 61 | # Cleanup previous interrupted runs 62 | SERVERS=$(openstack server list | grep -o "APIMonitor_[0-9]*_" | sort -u) 63 | KEYPAIR=$(openstack keypair list | grep -o "APIMonitor_[0-9]*_" | sort -u) 64 | VOLUMES=$(openstack volume list | grep -o "APIMonitor_[0-9]*_" | sort -u) 65 | NETWORK=$(openstack network list | grep -o "APIMonitor_[0-9]*_" | sort -u) 66 | LOADBAL=$(openstack loadbalancer list | grep -o "APIMonitor_[0-9]*_" | sort -u) 67 | ROUTERS=$(openstack router list | grep -o "APIMonitor_[0-9]*_" | sort -u) 68 | SECGRPS=$(openstack security group list | grep -o "APIMonitor_[0-9]*_" | sort -u) 69 | echo CLEANUP: FIPs $FIPLIST Servers $SERVERS Keypairs $KEYPAIR Volumes $VOLUMES Networks $NETWORK LoadBalancers $LOADBAL Routers $ROUTERS SecGrps $SECGRPS 70 | for ENV in $FIPLIST; do 71 | echo "******************************" 72 | echo "CLEAN $ENV" 73 | bash ./api_monitor.sh -o -T -q -c CLEANUP $ENV 74 | echo "******************************" 75 | done 76 | TOCLEAN=$(echo "$SERVERS 77 | $KEYPAIR 78 | $VOLUMES 79 | $NETWORK 80 | $LOADBAL 81 | $ROUTERS 82 | $SECGRPS 83 | " | grep -v '^$' | sort -u) 84 | for ENV in $TOCLEAN; do 85 | echo "******************************" 86 | echo "CLEAN $ENV" 87 | #bash ./api_monitor.sh -o -T -q -LL -c CLEANUP $ENV 88 | bash ./api_monitor.sh -o -q -LL -c CLEANUP $ENV 89 | echo "******************************" 90 | done 91 | 92 | #bash ./api_monitor.sh -c -x -d -n 8 -l last.log -e $EMAIL_PARAM -S -i 9 93 | #exec api_monitor.sh -o -C -D -N 2 -n 8 -s -e sender@domain.org "$@" 94 | #exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -L -b -B -M -a 2 -t -T -R -X "$@" 95 | exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -LL -b -B -M -a 2 -t -T -R -X -S plus-pco "$@" 96 | 97 | -------------------------------------------------------------------------------- /run_pluspco2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Specify image names, JumpHost ideally has sfw2-snat 4 | # Options for the images: my openSUSE 15.2 (linux), Ubuntu 20.04 (ubuntu), 5 | # openSUSE Leap 15.2 (opensuse), CentOS 8 (centos) 6 | # You can freely mix ... 7 | #export JHIMG="Ubuntu 22.04" 8 | export JHIMG="Debian 12" 9 | #export JHIMG="openSUSE 15.5" 10 | #export ADDJHVOLSIZE=2 11 | #export IMG="Ubuntu 22.04" 12 | export IMG="Debian 12" 13 | #export IMG="openSUSE 15.5" 14 | #export IMG="CentOS 8" 15 | # DEFLTUSER from image_original_user property 16 | #export DEFLTUSER=opensuse 17 | #export DEFLTUSER=ubuntu 18 | #export JHDEFLTUSER=ubuntu 19 | # You can use a filter when listing images (because your catalog is huge) 20 | #export JHIMGFILT="--property-filter os_version=openSUSE-15.0" 21 | #export IMGFILT="--property-filter os_version=openSUSE-15.0" 22 | # ECP flavors 23 | #if test $OS_REGION_NAME == Kna1; then 24 | #export JHFLAVOR=SCS-1L:1:5 25 | export JHFLAVOR=SCS-1V-1 26 | #export JHFLAVOR=SCS-1V:1:10 27 | #export FLAVOR=SCS-1L:1:5 28 | #export FLAVOR=SCS-1V:1:10 29 | export FLAVOR=SCS-1L-1 30 | #else 31 | #export JHFLAVOR=1C-1GB-10GB 32 | #export FLAVOR=1C-1GB-10GB 33 | #fi 34 | # EMail notifications sender address 35 | export FROM=kurt.garloff@sovereignit.cloud 36 | # Only use one AZ 37 | export AZS="az1" 38 | # Upload (compressed) logfiles and stats to container 39 | export SWIFTCONTAINER=OS-HM-Logfiles 40 | 41 | # Assume OS_ parameters have already been sourced from some .openrc file 42 | # export OS_CLOUD=gx-scs-healthmgr 43 | 44 | export EMAIL_PARAM=${EMAIL_PARAM:-"scs@garloff.de"} 45 | 46 | # Notifications & Alarms (pass as list, arrays can't be exported) 47 | ALARM_EMAIL_ADDRESSES="scs@garloff.de" 48 | NOTE_EMAIL_ADDRESSES="scs@garloff.de" 49 | #ALARM_EMAIL_ADDRESSES="scs@garloff.de scs-monitoring@plusserver.com" 50 | #NOTE_EMAIL_ADDRESSES="scs@garloff.de" 51 | export ALARM_EMAIL_ADDRESSES NOTE_EMAIL_ADDRESSES 52 | 53 | # Terminate early on auth error 54 | openstack server list >/dev/null 55 | 56 | # Find Floating IPs 57 | FIPLIST="" 58 | FIPS=$(openstack floating ip list -f value -c ID) 59 | for fip in $FIPS; do 60 | FIP=$(openstack floating ip show $fip | grep -o "APIMonitor_[0-9]*") 61 | if test -n "$FIP"; then FIPLIST="${FIPLIST}${FIP}_ 62 | "; fi 63 | done 64 | FIPLIST=$(echo "$FIPLIST" | grep -v '^$' | sort -u) 65 | # Cleanup previous interrupted runs 66 | SERVERS=$(openstack server list | grep -o "APIMonitor_[0-9]*_" | sort -u) 67 | VOLUMES=$(openstack volume list | grep -o "APIMonitor_[0-9]*_" | sort -u) 68 | NETWORK=$(openstack network list | grep -o "APIMonitor_[0-9]*_" | sort -u) 69 | LOADBAL=$(openstack loadbalancer list | grep -o "APIMonitor_[0-9]*_" | sort -u) 70 | ROUTERS=$(openstack router list | grep -o "APIMonitor_[0-9]*_" | sort -u) 71 | SECGRPS=$(openstack security group list | grep -o "APIMonitor_[0-9]*_" | sort -u) 72 | echo CLEANUP: FIPs $FIPLIST Servers $SERVERS Volumes $VOLUMES Networks $NETWORK LoadBalancers $LOADBAL Routers $ROUTERS SecGrps $SECGRPS 73 | for ENV in $FIPLIST; do 74 | echo "******************************" 75 | echo "CLEAN $ENV" 76 | bash ./api_monitor.sh -o -T -q -c CLEANUP $ENV 77 | echo "******************************" 78 | done 79 | TOCLEAN=$(echo "$SERVERS 80 | $VOLUMES 81 | $NETWORK 82 | $LOADBAL 83 | $ROUTERS 84 | $SECGRPS 85 | " | grep -v '^$' | sort -u) 86 | for ENV in $TOCLEAN; do 87 | echo "******************************" 88 | echo "CLEAN $ENV" 89 | #bash ./api_monitor.sh -o -T -q -LL -c CLEANUP $ENV 90 | bash ./api_monitor.sh -o -q -LL -c CLEANUP $ENV 91 | echo "******************************" 92 | done 93 | 94 | #bash ./api_monitor.sh -c -x -d -n 8 -l last.log -e $EMAIL_PARAM -S -i 9 95 | #exec api_monitor.sh -o -C -D -N 2 -n 8 -s -e sender@domain.org "$@" 96 | #exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -L -b -B -M -a 2 -t -T -R -X "$@" 97 | exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -LL -b -B -M -a 2 -t -T -R -X -S plus-prod2 "$@" 98 | 99 | -------------------------------------------------------------------------------- /run_wave.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Specify image names, JumpHost ideally has sfw2-snat 4 | # Options for the images: my openSUSE 15.2 (linux), Ubuntu 20.04 (ubuntu), 5 | # openSUSE Leap 15.2 (opensuse), CentOS 8 (centos) 6 | # You can freely mix ... 7 | #export JHIMG="Ubuntu 22.04" 8 | #export JHIMG="openSUSE 15.5" 9 | export JHIMG="Debian 12" 10 | #export ADDJHVOLSIZE=2 11 | #export IMG="Ubuntu 22.04" 12 | #export IMG="openSUSE 15.5" 13 | export IMG="Debian 12" 14 | #export IMG="CentOS 8" 15 | # DEFLTUSER from image_original_user property 16 | #export DEFLTUSER=opensuse 17 | #export DEFLTUSER=ubuntu 18 | #export JHDEFLTUSER=ubuntu 19 | # You can use a filter when listing images (because your catalog is huge) 20 | #export JHIMGFILT="--property-filter os_version=openSUSE-15.0" 21 | #export IMGFILT="--property-filter os_version=openSUSE-15.0" 22 | # ECP flavors 23 | #if test $OS_REGION_NAME == Kna1; then 24 | export JHFLAVOR=SCS-1V-2 25 | export FLAVOR=SCS-1L-1 26 | #else 27 | #export JHFLAVOR=1C-1GB-10GB 28 | #export FLAVOR=1C-1GB-10GB 29 | #fi 30 | # EMail notifications sender address 31 | export FROM=kurt@garloff.de 32 | # Only use one AZ 33 | #export AZS="muc5-a" 34 | # Upload (compressed) logfiles and stats to container 35 | #export SWIFTCONTAINER=OS-HM-Logfiles 36 | export DEFAULTNAMESERVER="true" 37 | 38 | # Assume OS_ parameters have already been sourced from some .openrc file 39 | # export OS_CLOUD=gx-scs-healthmgr 40 | 41 | export EMAIL_PARAM=${EMAIL_PARAM:-"scs@garloff.de"} 42 | 43 | # Notifications & Alarms (pass as list, arrays can't be exported) 44 | ALARM_EMAIL_ADDRESSES="scs@garloff.de" 45 | NOTE_EMAIL_ADDRESSES="scs@garloff.de" 46 | #ALARM_EMAIL_ADDRESSES="scs@garloff.de scs-monitoring@plusserver.com" 47 | #NOTE_EMAIL_ADDRESSES="scs@garloff.de" 48 | export ALARM_EMAIL_ADDRESSES NOTE_EMAIL_ADDRESSES 49 | 50 | # Terminate early on auth error 51 | openstack server list >/dev/null 52 | 53 | # Find Floating IPs 54 | FIPLIST="" 55 | FIPS=$(openstack floating ip list -f value -c ID) 56 | for fip in $FIPS; do 57 | FIP=$(openstack floating ip show $fip | grep -o "APIMonitor_[0-9]*") 58 | if test -n "$FIP"; then FIPLIST="${FIPLIST}${FIP}_ 59 | "; fi 60 | done 61 | FIPLIST=$(echo "$FIPLIST" | grep -v '^$' | sort -u) 62 | # Cleanup previous interrupted runs 63 | SERVERS=$(openstack server list | grep -o "APIMonitor_[0-9]*_" | sort -u) 64 | KEYPAIR=$(openstack keypair list | grep -o "APIMonitor_[0-9]*_" | sort -u) 65 | VOLUMES=$(openstack volume list | grep -o "APIMonitor_[0-9]*_" | sort -u) 66 | NETWORK=$(openstack network list | grep -o "APIMonitor_[0-9]*_" | sort -u) 67 | LOADBAL=$(openstack loadbalancer list | grep -o "APIMonitor_[0-9]*_" | sort -u) 68 | ROUTERS=$(openstack router list | grep -o "APIMonitor_[0-9]*_" | sort -u) 69 | SECGRPS=$(openstack security group list | grep -o "APIMonitor_[0-9]*_" | sort -u) 70 | echo CLEANUP: FIPs $FIPLIST Servers $SERVERS Keypairs $KEYPAIR Volumes $VOLUMES Networks $NETWORK LoadBalancers $LOADBAL Routers $ROUTERS SecGrps $SECGRPS 71 | for ENV in $FIPLIST; do 72 | echo "******************************" 73 | echo "CLEAN $ENV" 74 | bash ./api_monitor.sh -o -T -q -c CLEANUP $ENV 75 | echo "******************************" 76 | done 77 | TOCLEAN=$(echo "$SERVERS 78 | $KEYPAIR 79 | $VOLUMES 80 | $NETWORK 81 | $LOADBAL 82 | $ROUTERS 83 | $SECGRPS 84 | " | grep -v '^$' | sort -u) 85 | for ENV in $TOCLEAN; do 86 | echo "******************************" 87 | echo "CLEAN $ENV" 88 | #bash ./api_monitor.sh -o -T -q -LL -c CLEANUP $ENV 89 | bash ./api_monitor.sh -o -q -LL -c CLEANUP $ENV 90 | echo "******************************" 91 | done 92 | 93 | #bash ./api_monitor.sh -c -x -d -n 8 -l last.log -e $EMAIL_PARAM -S -i 9 94 | #exec api_monitor.sh -o -C -D -N 2 -n 8 -s -e sender@domain.org "$@" 95 | #exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -L -b -B -M -a 2 -t -T -R -X "$@" 96 | exec ./api_monitor.sh -O -C -D -n 9 -s -LL -b -B -M -a 2 -t -T -R -X -S wavestack1 "$@" 97 | #exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -b -B -M -a 2 -t -T -R -X -S wavestack1 "$@" 98 | 99 | -------------------------------------------------------------------------------- /run_stackit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Specify image names, JumpHost ideally has sfw2-snat 4 | # Options for the images: my openSUSE 15.2 (linux), Ubuntu 20.04 (ubuntu), 5 | # openSUSE Leap 15.2 (opensuse), CentOS 8 (centos) 6 | # You can freely mix ... 7 | export JHIMG="Ubuntu 22.04" 8 | #export JHIMG="openSUSE 15.2" 9 | export ADDJHVOLSIZE=5 10 | export IMG="Ubuntu 22.04" 11 | #export IMG="openSUSE 15.2" 12 | export ADDVOLSIZE=10 13 | #export IMG="CentOS 8" 14 | # DEFLTUSER from image_original_user property 15 | #export DEFLTUSER=opensuse 16 | export DEFLTUSER=ubuntu 17 | export JHDEFLTUSER=ubuntu 18 | # You can use a filter when listing images (because your catalog is huge) 19 | #export JHIMGFILT="--property-filter os_version=openSUSE-15.0" 20 | #export IMGFILT="--property-filter os_version=openSUSE-15.0" 21 | # ECP flavors 22 | #if test $OS_REGION_NAME == Kna1; then 23 | export JHFLAVOR=t1.2 24 | export FLAVOR=t1.1 25 | #else 26 | #export JHFLAVOR=1C-1GB-10GB 27 | #export FLAVOR=1C-1GB-10GB 28 | #fi 29 | # EMail notifications sender address 30 | export FROM=kurt@garloff.de 31 | # Only use AZs 32 | #export AZS="eu01-1 eu01-2 eu01-3 eu01-m" 33 | # Upload (compressed) logfiles and stats to container 34 | export SWIFTCONTAINER=OS-HM-Logfiles 35 | # NAMESERVER 36 | export NAMESERVER=8.8.8.8 37 | # External network selection pattern (grep RE) 38 | export EXTSEARCH=floating.net 39 | 40 | # Set region 41 | export OS_REGION_NAME=RegionOne 42 | 43 | # Assume OS_ parameters have already been sourced from some .openrc file 44 | # export OS_CLOUD=gx-scs-healthmgr 45 | 46 | export EMAIL_PARAM=${EMAIL_PARAM:-"scs@garloff.de"} 47 | 48 | # Notifications & Alarms (pass as list, arrays can't be exported) 49 | #ALARM_EMAIL_ADDRESSES="scs@garloff.de" 50 | #NOTE_EMAIL_ADDRESSES="scs@garloff.de" 51 | #ALARM_EMAIL_ADDRESSES="scs@garloff.de scs-monitoring@plusserver.com" 52 | #NOTE_EMAIL_ADDRESSES="scs@garloff.de" 53 | #export ALARM_EMAIL_ADDRESSES NOTE_EMAIL_ADDRESSES 54 | 55 | # Terminate early on auth error 56 | openstack server list >/dev/null 57 | 58 | # Find Floating IPs 59 | FIPLIST="" 60 | FIPS=$(openstack floating ip list -f value -c ID) 61 | for fip in $FIPS; do 62 | FIP=$(openstack floating ip show $fip | grep -o "APIMonitor_[0-9]*") 63 | if test -n "$FIP"; then FIPLIST="${FIPLIST}${FIP}_ 64 | "; fi 65 | done 66 | FIPLIST=$(echo "$FIPLIST" | grep -v '^$' | sort -u) 67 | # Cleanup previous interrupted runs 68 | SERVERS=$(openstack server list | grep -o "APIMonitor_[0-9]*_" | sort -u) 69 | KEYPAIR=$(openstack keypair list | grep -o "APIMonitor_[0-9]*_" | sort -u) 70 | VOLUMES=$(openstack volume list | grep -o "APIMonitor_[0-9]*_" | sort -u) 71 | NETWORK=$(openstack network list | grep -o "APIMonitor_[0-9]*_" | sort -u) 72 | LOADBAL=$(openstack loadbalancer list | grep -o "APIMonitor_[0-9]*_" | sort -u) 73 | ROUTERS=$(openstack router list | grep -o "APIMonitor_[0-9]*_" | sort -u) 74 | SECGRPS=$(openstack security group list | grep -o "APIMonitor_[0-9]*_" | sort -u) 75 | echo CLEANUP: FIPs $FIPLIST Servers $SERVERS Keypair $KEYPAIR Volumes $VOLUMES Networks $NETWORK LoadBalancers $LOADBAL Routers $ROUTERS SecGrps $SECGRPS 76 | for ENV in $FIPLIST; do 77 | echo "******************************" 78 | echo "CLEAN $ENV" 79 | bash ./api_monitor.sh -o -T -q -c CLEANUP $ENV 80 | #bash ./api_monitor.sh -o -q -c CLEANUP $ENV 81 | echo "******************************" 82 | done 83 | TOCLEAN=$(echo "$SERVERS 84 | $KEYPAIR 85 | $VOLUMES 86 | $NETWORK 87 | $LOADBAL 88 | $ROUTERS 89 | $SECGRPS 90 | " | grep -v '^$' | sort -u) 91 | for ENV in $TOCLEAN; do 92 | echo "******************************" 93 | echo "CLEAN $ENV" 94 | bash ./api_monitor.sh -o -q -c CLEANUP $ENV 95 | echo "******************************" 96 | done 97 | 98 | #bash ./api_monitor.sh -c -x -d -n 8 -l last.log -e $EMAIL_PARAM -S -i 9 99 | #exec api_monitor.sh -o -C -D -N 2 -n 8 -s -e sender@domain.org "$@" 100 | #exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -L -b -B -a 2 -T -R -X "$@" 101 | exec ./api_monitor.sh -O -C -D -n 8 -s -b -B -a 2 -z 5 -T -R -X -S stackit "$@" 102 | #exec ./api_monitor.sh -o -C -D -N 2 -n 8 -s -L -b -B -a 2 -T -R -X "$@" 103 | 104 | -------------------------------------------------------------------------------- /run_gx_scs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Specify image names, JumpHost ideally has sfw2-snat 4 | # Options for the images: my openSUSE 15.2 (linux), Ubuntu 20.04 (ubuntu), 5 | # openSUSE Leap 15.2 (opensuse), CentOS 8 (centos) 6 | # You can freely mix ... 7 | #export JHIMG="Ubuntu 22.04" 8 | #export JHIMG="openSUSE 15.6" 9 | export JHIMG="Debian 12" 10 | #export ADDJHVOLSIZE=2 11 | #export IMG="Ubuntu 22.04" 12 | #export IMG="openSUSE 15.6" 13 | export IMG="Debian 12" 14 | #export IMG="CentOS 8" 15 | # DEFLTUSER from image_original_user property 16 | #export DEFLTUSER=opensuse 17 | #export DEFLTUSER=ubuntu 18 | #export JHDEFLTUSER=ubuntu 19 | # You can use a filter when listing images (because your catalog is huge) 20 | #export JHIMGFILT="--property-filter os_version=openSUSE-15.0" 21 | #export IMGFILT="--property-filter os_version=openSUSE-15.0" 22 | # ECP flavors 23 | #if test $OS_REGION_NAME == Kna1; then 24 | export JHFLAVOR=SCS-1V-2 25 | export FLAVOR=SCS-1L-1 26 | #else 27 | #export JHFLAVOR=1C-1GB-10GB 28 | #export FLAVOR=1C-1GB-10GB 29 | #fi 30 | # EMail notifications sender address 31 | export FROM=kurt@garloff.de 32 | # Only use one AZ 33 | #export AZS="nova" 34 | # Upload (compressed) logfiles and stats to container 35 | export SWIFTCONTAINER=OS-HM-Logfiles 36 | # NAMESERVER 37 | export NAMESERVER=8.8.8.8 38 | # VOLUMETYPE override 39 | #export VOLUMETYPE="LUKS" 40 | 41 | # Assume OS_ parameters have already been sourced from some .openrc file 42 | # export OS_CLOUD=gx-scs-healthmgr 43 | 44 | export EMAIL_PARAM=${EMAIL_PARAM:-"scs@garloff.de"} 45 | 46 | # Notifications & Alarms (pass as list, arrays can't be exported) 47 | ALARM_EMAIL_ADDRESSES="scs@garloff.de" 48 | NOTE_EMAIL_ADDRESSES="scs@garloff.de" 49 | #ALARM_EMAIL_ADDRESSES="scs@garloff.de scs-monitoring@plusserver.com" 50 | #NOTE_EMAIL_ADDRESSES="scs@garloff.de" 51 | export ALARM_EMAIL_ADDRESSES NOTE_EMAIL_ADDRESSES 52 | 53 | # Terminate early on auth error 54 | openstack server list >/dev/null 55 | 56 | # Find Floating IPs 57 | FIPLIST="" 58 | FIPS=$(openstack floating ip list -f value -c ID) 59 | for fip in $FIPS; do 60 | FIP=$(openstack floating ip show $fip | grep -o "APIMonitor_[0-9]*") 61 | if test -n "$FIP"; then FIPLIST="${FIPLIST}${FIP}_ 62 | "; fi 63 | done 64 | FIPLIST=$(echo "$FIPLIST" | grep -v '^$' | sort -u) 65 | # Cleanup previous interrupted runs 66 | SERVERS=$(openstack server list | grep -o "APIMonitor_[0-9]*_" | sort -u) 67 | KEYPAIR=$(openstack keypair list | grep -o "APIMonitor_[0-9]*_" | sort -u) 68 | VOLUMES=$(openstack volume list | grep -o "APIMonitor_[0-9]*_" | sort -u) 69 | NETWORK=$(openstack network list | grep -o "APIMonitor_[0-9]*_" | sort -u) 70 | LOADBAL=$(openstack loadbalancer list | grep -o "APIMonitor_[0-9]*_" | sort -u) 71 | ROUTERS=$(openstack router list | grep -o "APIMonitor_[0-9]*_" | sort -u) 72 | SECGRPS=$(openstack security group list | grep -o "APIMonitor_[0-9]*_" | sort -u) 73 | echo CLEANUP: FIPs $FIPLIST Servers $SERVERS Keypairs $KEYPAIR Volumes $VOLUMES Networks $NETWORK LoadBalancers $LOADBAL Routers $ROUTERS SecGrps $SECGRPS 74 | for ENV in $FIPLIST; do 75 | echo "******************************" 76 | echo "CLEAN $ENV" 77 | bash ./api_monitor.sh -o -T -q -c CLEANUP $ENV 78 | #bash ./api_monitor.sh -o -q -c CLEANUP $ENV 79 | echo "******************************" 80 | done 81 | TOCLEAN=$(echo "$SERVERS 82 | $KEYPAIR 83 | $VOLUMES 84 | $NETWORK 85 | $LOADBAL 86 | $ROUTERS 87 | $SECGRPS 88 | " | grep -v '^$' | sort -u) 89 | for ENV in $TOCLEAN; do 90 | echo "******************************" 91 | echo "CLEAN $ENV" 92 | bash ./api_monitor.sh -o -q -L -c CLEANUP $ENV 93 | echo "******************************" 94 | done 95 | 96 | #bash ./api_monitor.sh -c -x -d -n 8 -l last.log -e $EMAIL_PARAM -S -i 9 97 | #exec api_monitor.sh -o -C -D -N 2 -n 8 -s -e sender@domain.org "$@" 98 | #exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -L -b -B -M -a 2 -T -R -X "$@" 99 | exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -LO -b -B -M -a 2 -T -R -X -S gx-scs "$@" 100 | #exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -LO -LR -b -B -M -a 2 -T -R -X -S gx-scs "$@" 101 | #exec ./api_monitor.sh -o -C -D -N 2 -n 8 -s -L -b -B -M -a 2 -T -R "$@" 102 | 103 | -------------------------------------------------------------------------------- /run_ciab.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Specify image names, JumpHost ideally has sfw2-snat 4 | # Options for the images: my openSUSE 15.2 (linux), Ubuntu 20.04 (ubuntu), 5 | # openSUSE Leap 15.2 (opensuse), CentOS 8 (centos) 6 | # You can freely mix ... 7 | #export JHIMG="Ubuntu 22.04" 8 | #export JHIMG="openSUSE 15.5" 9 | export JHIMG="Debian 12" 10 | #export ADDJHVOLSIZE=2 11 | #export IMG="Ubuntu 22.04" 12 | #export IMG="openSUSE 15.5 raw" 13 | #export IMG="openSUSE 15.5" 14 | export IMG="Debian 12" 15 | #export IMG="CentOS 8" 16 | # DEFLTUSER from image_original_user property 17 | #export DEFLTUSER=opensuse 18 | #export DEFLTUSER=ubuntu 19 | #export JHDEFLTUSER=ubuntu 20 | # You can use a filter when listing images (because your catalog is huge) 21 | #export JHIMGFILT="--property-filter os_version=openSUSE-15.0" 22 | #export IMGFILT="--property-filter os_version=openSUSE-15.0" 23 | # ECP flavors 24 | #if test $OS_REGION_NAME == Kna1; then 25 | export JHFLAVOR=SCS-1V-2 26 | export FLAVOR=SCS-1L-1 27 | #else 28 | #export JHFLAVOR=1C-1GB-10GB 29 | #export FLAVOR=1C-1GB-10GB 30 | #fi 31 | # EMail notifications sender address 32 | export FROM=kurt@garloff.de 33 | # Only use one AZ 34 | #export AZS="muc5-a" 35 | # Upload (compressed) logfiles and stats to container 36 | #export SWIFTCONTAINER=OS-HM-Logfiles 37 | export DEFAULTNAMESERVER="true" 38 | 39 | # Assume OS_ parameters have already been sourced from some .openrc file 40 | # export OS_CLOUD=gx-scs-healthmgr 41 | 42 | export EMAIL_PARAM=${EMAIL_PARAM:-"scs@garloff.de"} 43 | 44 | # Ensure that all commands find custom CA, place the custom CiaB CA cert here: 45 | export OS_CACERT=/etc/ca-cert-ciab.crt 46 | 47 | # Notifications & Alarms (pass as list, arrays can't be exported) 48 | ALARM_EMAIL_ADDRESSES="scs@garloff.de" 49 | NOTE_EMAIL_ADDRESSES="scs@garloff.de" 50 | #ALARM_EMAIL_ADDRESSES="scs@garloff.de scs-monitoring@plusserver.com" 51 | #NOTE_EMAIL_ADDRESSES="scs@garloff.de" 52 | export ALARM_EMAIL_ADDRESSES NOTE_EMAIL_ADDRESSES 53 | 54 | # Terminate early on auth error 55 | openstack server list >/dev/null || exit 1 56 | 57 | echo "Finding resources from previous runs to clean up ..." 58 | # Find Floating IPs 59 | FIPLIST="" 60 | FIPS=$(openstack floating ip list -f value -c ID) 61 | for fip in $FIPS; do 62 | FIP=$(openstack floating ip show $fip | grep -o "APIMonitor_[0-9]*") 63 | if test -n "$FIP"; then FIPLIST="${FIPLIST}${FIP}_ 64 | "; fi 65 | done 66 | FIPLIST=$(echo "$FIPLIST" | grep -v '^$' | sort -u) 67 | # Cleanup previous interrupted runs 68 | SERVERS=$(openstack server list | grep -o "APIMonitor_[0-9]*_" | sort -u) 69 | KEYPAIR=$(openstack keypair list | grep -o "APIMonitor_[0-9]*_" | sort -u) 70 | VOLUMES=$(openstack volume list | grep -o "APIMonitor_[0-9]*_" | sort -u) 71 | NETWORK=$(openstack network list | grep -o "APIMonitor_[0-9]*_" | sort -u) 72 | LOADBAL=$(openstack loadbalancer list | grep -o "APIMonitor_[0-9]*_" | sort -u) 73 | ROUTERS=$(openstack router list | grep -o "APIMonitor_[0-9]*_" | sort -u) 74 | SECGRPS=$(openstack security group list | grep -o "APIMonitor_[0-9]*_" | sort -u) 75 | echo CLEANUP: FIPs $FIPLIST Servers $SERVERS Keypairs $KEYPAIR Volumes $VOLUMES Networks $NETWORK LoadBalancers $LOADBAL Routers $ROUTERS SecGrps $SECGRPS 76 | for ENV in $FIPLIST; do 77 | echo "******************************" 78 | echo "CLEAN $ENV" 79 | bash ./api_monitor.sh -o -T -q -c CLEANUP $ENV 80 | echo "******************************" 81 | done 82 | TOCLEAN=$(echo "$SERVERS 83 | $KEYPAIR 84 | $VOLUMES 85 | $NETWORK 86 | $LOADBAL 87 | $ROUTERS 88 | $SECGRPS 89 | " | grep -v '^$' | sort -u) 90 | for ENV in $TOCLEAN; do 91 | echo "******************************" 92 | echo "CLEAN $ENV" 93 | #bash ./api_monitor.sh -o -T -q -LL -c CLEANUP $ENV 94 | bash ./api_monitor.sh -o -q -LL -c CLEANUP $ENV 95 | echo "******************************" 96 | done 97 | 98 | #bash ./api_monitor.sh -c -x -d -n 8 -l last.log -e $EMAIL_PARAM -S -i 9 99 | #exec api_monitor.sh -o -C -D -N 2 -n 8 -s -e sender@domain.org "$@" 100 | #exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -L -b -B -M -a 2 -t -T -R -X "$@" 101 | exec ./api_monitor.sh -O -C -D -N 2 -n 6 -s -LO -b -B -M -a 2 -t -T -R -X -S ciab "$@" 102 | #exec ./api_monitor.sh -O -C -D -N 2 -n 8 -s -b -B -M -a 2 -t -T -R -S ciab "$@" 103 | 104 | -------------------------------------------------------------------------------- /dashboard/README.md: -------------------------------------------------------------------------------- 1 | # Dashboards for the openstack-health-monitor 2 | 3 | The openstack-health-monitor has a capability to report the results (errors 4 | and execution times) to a local telegraf. This can be used to feed the results 5 | to an influxdb which can feed grafana for dashboards. 6 | 7 | This directory contains configuration files that can be used to setup a local 8 | telegraf, influxdb and grafana setup. Note that this is for demonstration 9 | purposes. You can disable the SSL setup and just use it locally by doing 10 | an ssh port-forward of port 3000. 11 | 12 | The default config in grafana.ini needs some work to complete the SSL setup 13 | by having a DNS resolvable hostname (you can get one on sovereignit.cloud 14 | from the SCS project if you want) and generate a valid cert for it. 15 | You can also enable Viewer access for SovereignCloudStack org members on github. 16 | 17 | The below config files are for an openSUSE 15.3/15.4 image -- grab one from 18 | and install 19 | telegraf, influxdb and grafana: `sudo zypper install telegraf influxdb grafana`. 20 | 21 | ## The config files 22 | 23 | * `telegraf.conf` is a default config file for [telegraf](https://www.influxdata.com/time-series-platform/telegraf/) 24 | from openSUSE 15.3 with minimal edits to work for us. The relevant pieces here are the 25 | `inputs.influxdb_listener` (on `:8186`) and the `outputs.influxdb` (to `localhost:8086`). 26 | Put it into `/etc/telegraf/` (root:root 0644). 27 | * `config.toml` is the default config file for [influxdb](https://www.influxdata.com/time-series-platform/) 28 | from openSUSE 15.3 without any edits. This belongs to to `/etc/influxdb/` (root:influxdb 0640). 29 | * `grafana.ini` is the default config file for [grafana](https://grafana.com/) 30 | from openSUSE 15.3 with the admin password changed to `SCS_Admin` and `allow_signup` set to `false`. 31 | It belongs into `/etc/grafana/` (root:grafana 0640). 32 | The configuration is prepared to be exposed to the internet -- to do so, change the admin password, 33 | fill in a hostname that you control (or reach out to SCS for getting a registration on sovereignit.cloud), 34 | generate SSL certs (e.g. via Let's Encrypt) and put them to `/etc/grafana/health-fullchain.pem` 35 | and `health-key.pem` (belonging to group grafana and group-readable). Ensure to open up port 3000 36 | in your firewall config and security groups. 37 | Note that you can make all github users that belong to the SovereignCloudStack org 38 | getting Viewer access to the dashboards by adding a `client_id` and `client_secret` in the 39 | `[github.auth]` section that you request from the SCS github admins (github's oauth auth). 40 | * `openstack-health-dashboard.json` contains the dashboard exported to JSON and is the one piece here 41 | that has received significant work. Screenshots from the dashboard can be seen below. 42 | To set up the dashboard, first create an influxdb datasource via `localhost:8086` connecting to 43 | the `telegraf` database and then import the JSON as dashboard. 44 | * `openstack-health-dashboard-10.json` contains the same dashboard exported from Grafana 10.x. 45 | 46 | ## Screenshots 47 | 48 | ![](oshm-grafana-gxscs-20220923-1.png) 49 | ![](oshm-grafana-gxscs-20220923-2.png) 50 | 51 | These screenshots show two days in the gx-scs development cloud from plusserver, 52 | nothing filtered out (all OpenStack services, all commands, all resource types, 53 | all benchmarks). 54 | 55 | There are a few things that can be observed, from most obvious to least obvious: 56 | 57 | * Something happened on Sept 22, ~12:30: Suddenly deployment times for Loadbalancers and 58 | VMs dropped to consistently <~60s, while they varied before and went up to ~250s. 59 | * At the same time, the iperf3 test stopped failing, while it only succeeded occasionally. 60 | The 89% iperf3 success rate is an average between the ~25% before the change and the 100% 61 | afterwards. 62 | (What you can't see from the dashboard, but could see in the logs: The reason for iperf3 63 | failing is that the installation of the iperf3 package through cloud-init does fail due 64 | to temporary failures in name resolution.) 65 | * The long totalDuration test conincided with the iperf3 fails; only a single slow iteration 66 | happened after the change (which is really harmless in itself). 67 | * You get roughly 5Mbps for 1vCPU instances east-west traffic and calculating 4000 digits 68 | of pi with bc takes ~10.7s on the used vCPUs. (This is about full speed on a good Skylake/ 69 | Cascade Lake CPU. No signs of oversubscription really affecting you.) 70 | * A single resource failure, where roughly 0:45 on Sept 23, a LoadBalancer failed to become 71 | active. (Very good!) 72 | * Two API calls failed (out of 35500), both at 09:00 on Spet 23, neutron floating-ip-create 73 | and neutron router-gateway-set were hit. (This is a very good result!) 74 | * Overall API performance is very consistent; the slowest API call is the lbaas-listener-create 75 | call which is still consistently < 9s. Only two notable spikes; one is the failed router-gateway-set 76 | call (17.6s) and a slow (28s) router-delete call. You can tell that the control plane of 77 | this cloud easily handles the control plane load that it's subjected to. 78 | * No ssh errors, i.e. all VMs that were created successfully (which is all that were attempted 79 | to be created, could be connected to successfully, i.e. their network connections came 80 | up fine, cloud-init injected the meta-data with the ssh keys and the login succeeded). 81 | 82 | TL;DR: This cloud was in serious trouble until Sep 22, ~12:30 (VMs and LBs taking a long 83 | time to get deployed; iperf failing in the majority of cases) and since then works like 84 | a charm. 85 | 86 | Curious about this specific case? 87 | 88 | OK, here's what was found: One compute node had serious networking trouble; VMs and 89 | amphorae deployed there would often not report success with getting the network 90 | port attached; a rescheduling after a few minutes explains the slowliness in VM 91 | and LB deployment most of the time. It seems that after rescheduling the VM on 92 | a different compute host, we observe the temporary DNS failure; there might be 93 | a missing hook into OVN informing it that the VM was rescheduled somewhere else 94 | and that it needs to change the DNS injection magic. 95 | After the faulty host was removed from the scheduler, both the deployment times 96 | and the DNS failures disappeared. 97 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenStack Health Monitor 2 | 3 | This is a test script for testing the reliability and performance of OpenStack API. 4 | It works by doing a real scenario test: Setting up a real environment 5 | With routers, nets, jumphosts, disks, VMs, ... 6 | 7 | We collect statistics on API call performance as well as on resource creation times. 8 | Failures are noted and alarms are generated. 9 | 10 | ## Status 11 | 12 | - Errors not yet handled everywhere 13 | - Live Volume and NIC attachment not yet implemented 14 | - Log too verbose for permament operation ... 15 | - Script allows to create multiple nets/subnets independent from no of AZs, which may need more testing. 16 | - Done: Convert from neutron/cinder/nova/... to openstack (`-o` / `-O`) 17 | 18 | ## TODO 19 | 20 | - Align sendalarm with telegraf database entries 21 | 22 | ## Status 23 | 24 | This project has outgrown its development design (a simple bash script to test 25 | a cloud with the OpenStack CLI with a real-world scenario) and has become very 26 | hard to maintain. A successor has been built with the 27 | [scs-health-monitor](https://github.com/SovereignCloudStack/scs-health-monitor) 28 | which is also described in a [blog article](https://scs.community/de/tech/2024/09/06/vp12-scs-health-monitor-tech-preview/). 29 | 30 | We plan to migrate our own monitoring over to the new monitor (until ~mid 2025). 31 | For the time being, we do still care about bugs in this old health-monitor and try to fix them, 32 | but this script does no longer receive new features nor do we make promises for the long-term. 33 | We recommend migration as well to the new solution. 34 | 35 | ## Copyright 36 | 37 | (c) Kurt Garloff , 2/2017-7/2017 38 | (c) Kurt Garloff , 2/2020-4/2021 39 | (c) Kurt Garloff , 5/2021-11/2024 40 | 41 | License: CC-BY-SA (2.0) 42 | 43 | ## Description of the flow 44 | 45 | - create router (VPC in OTC/AWS speak) 46 | - create 1+$NONETS (1+2) nets -- $NONETS is normally the # of AZs 47 | - create 1+$NONETS subnets 48 | - create security groups 49 | - create virtual IP (for outbound SNAT via JumpHosts) 50 | - create SSH keys 51 | - create $NOAZS JumpHost VMs by 52 | a) creating disks (from image) 53 | b) creating ports 54 | c) creating VMs 55 | - associating a floating IP to each Jumphost 56 | - configuring the virtIP as default route 57 | - JumpHosts do SNAT for outbound traffic and port forwarding for inbound 58 | (this requires SUSE images with SFW2-snat package to work) 59 | - create N internal VMs striped over the nets and AZs by 60 | a) creating disks (from image) -- if option `-d` is not used 61 | b) creating a port -- if option `-P` is not used 62 | c) creating VM (from volume or from image, dep. on `-d`) 63 | (Steps a and c take long, so we do many in parallel and poll for progress) 64 | d) do some property changes to VMs 65 | - after everything is complete, we wait for the VMs to be up 66 | - we ping them, log in via ssh and see whether they can ping to the outside world (quad9) 67 | - do some simplistic benchmarks, CPU (4k digits of pi), disk (fio), network (iperf3) 68 | - a full cross connectivity check (can each VM ping each other?) with `-C` 69 | - we create a loadbalancer and check accessing all VMs as members (RR) with `-L`/`-LL` 70 | - we kill some backends and check that the LB's health monitor detects this and 71 | routes the requests to the alive backend members 72 | - attach additional NICs and test (options `-2`, `-3`, `-4`) 73 | - NOT YET: attach additional disks to running VMs 74 | 75 | - Finally, we clean up ev'thing in reverse order 76 | (We have kept track of resources to clean up. 77 | We can also identify them by name, which helps if we got interrupted, or 78 | some cleanup action failed.) 79 | 80 | ## Coverage 81 | 82 | So we end up testing: Router, incl. default route (for SNAT instance), 83 | networks, subnets, and virtual IP, security groups and floating IPs, 84 | volume creation from image, deletion after VM destruction, 85 | VM creation from bootable volume (and from image if `-d` is given,) 86 | Metadata service (without it ssh key injection fails of course), 87 | Images (openSUSE OTC, upstream, CentOS and Ubuntu work), 88 | Loadbalancer (`-L`/`-LL`), 89 | Waiting for volumes and VMs, 90 | Destroying all of these resources again 91 | 92 | ## Alarming and reporting 93 | 94 | We do some statistics on the duration of the steps (min, avg, median, 95% quantile, max). 95 | We of course also note any errors and timeouts and report these, optionally sending email 96 | or SMN (OTC notifications via SMS and other media) alarms. 97 | 98 | ## Runtime 99 | 100 | This takes rather long, as typical API calls take b/w 1 and 2s on OpenStack (including the round trip to keystone for the token). 101 | 102 | Optimization possibilities: 103 | Cache token and reuse when creating a large number of resources in a loop. 104 | Completed (use option `-O` (not used for volume create)). 105 | 106 | ## Prerequisites 107 | 108 | - Working python-XXXclient tools (openstack, glance, neutron, nova, cinder) 109 | - `OS_` environment variables set to run openstack CLI commands or `OS_CLOUD` with `clouds.yaml`/`secure.yaml` 110 | - `otc.sh` from otc-tools (only if using optional SMN `-m` and project creation `-p`) 111 | - `sendmail` (only if email notification is requested) 112 | - `jq` (for JSON processing) 113 | - `bc` and python3 (or python2) for math used to calc statistics 114 | - `netcat` 115 | - Any image for the VMs that allows login as user DEFLTUSER (linux) with injected key 116 | (If we use `-2`/`-3`/`-4`, we also need a SUSE image to have the `cloud-multiroute` pkg in there.) 117 | 118 | I typically set this up on openSUSE-15.x images that come with all these tools (except sendmail) 119 | preinstalled -- get them at . 120 | I tiny flavor is enough to run this (1GiB RAM, 5GB disk) -- watch the logfiles though to 121 | avoid them filling up your disk. If you set up the dashboard with telegraf, influxdb, grafana, 122 | I would recommend a larger flavor (4GiB RAM, 20GB disk). 123 | 124 | ## Usage 125 | 126 | Use `api_monitor.sh -h` to get a list of the command line options. For reference find the output (from v1.109) here: 127 | 128 | ``` 129 | Running api_monitor.sh v1.113 on host framekurt with arguments -h 130 | Using APIMonitor_1743587410_ prefix for resrcs on CLOUD (AZ) 131 | Usage: api_monitor.sh [options] 132 | --debug Use set -x to print every line executed 133 | -n N number of VMs to create (beyond #AZ JumpHosts, def: 12) 134 | -N N number of networks/subnets/jumphosts to create (def: # AZs) 135 | -l LOGFILE record all command in LOGFILE 136 | -a N send at most N alarms per iteration (first plus N-1 summarized) 137 | -R send recovery email after a completely successful iteration and alarms before 138 | -e ADR sets eMail address for notes/alarms (assumes working MTA) 139 | second -e splits eMails; notes go to first, alarms to second eMail 140 | -E exit on error (for CONNTEST) 141 | -m URN sets notes/alarms by SMN (pass URN of queue) 142 | second -m splits notifications; notes to first, alarms to second URN 143 | -s [SH] sends stats as well once per day (or every SH hours), not just alarms 144 | -S [NM] sends stats to grafana via local telegraf http_listener (def for NM=api-monitoring) 145 | -q do not send any alarms 146 | -d boot Directly from image (not via volume) 147 | -vt TP use volumetype TP (overrides env VOLUMETYPE) 148 | -z SZ boots VMs from volume of size SZ 149 | -Z do not create volume for JHs separately 150 | -P do not create Port before VM creation 151 | -D create all VMs with one API call (implies -d -P) 152 | -i N sets max number of iterations (def = -1 = inf) 153 | -r N only recreate router after each Nth iteration 154 | -g N increase VM volume size by N GB (ignored for -d/-D) 155 | -G N increase JH volume size by N GB 156 | -w N sets error wait (API, VM): 0-inf seconds or neg value for interactive wait 157 | -W N sets error wait (VM only): 0-inf seconds or neg value for interactive wait 158 | -V N set success wait: Stop for N seconds (neg val: interactive) before tearing down 159 | -p N use a new project every N iterations 160 | -c noColors: don't use bold/red/... ASCII sequences 161 | -C full Connectivity check: Every VM pings every other 162 | -o translate nova/cinder/neutron/glance into openstack client commands 163 | -O like -o, but use token_endpoint auth (after getting token) 164 | -x assume eXclusive project, clean all floating IPs found 165 | -I dIsassociate floating IPs before deleting them 166 | -L create HTTP Loadbalancer (LBaaSv2/octavia) and test it 167 | -LL create TCP Loadbalancer (LBaaSv2/octavia) and test it 168 | -LP PROV create TCP LB with provider PROV test it (-LO is short for -LP ovn) 169 | -LR reverse order of LB healthmon and member creation and deletion 170 | -X test list requests GET octavia, swift, heat, designate, barbican, manila, aodh, 171 | gnocchi, magnum, senlin, ironic if those are advertised in the catalog 172 | and client tools are installed 173 | -b run a simple compute benchmark (4k pi with bc) 174 | -B measure TCP BW b/w VMs (iperf3) 175 | -M measure disk I/O bandwidth & latency (fio) 176 | -t long Timeouts (2x, multiple times for 3x, 4x, ...) 177 | -T assign tags to resources; use to clean up floating IPs 178 | -2 Create 2ndary subnets and attach 2ndary NICs to VMs and test 179 | -3 Create 2ndary subnets, attach, test, reshuffle and retest 180 | -4 Create 2ndary subnets, reshuffle, attach, test, reshuffle and retest 181 | -R2 Recreate 2ndary ports after detaching (OpenStack <= Mitaka bug) 182 | Or: api_monitor.sh [-f] [-o/-O] CLEANUP XXX to clean up all resources with prefix XXX 183 | Option -f forces the deletion 184 | Or: api_monitor.sh [Options] CONNTEST XXX for full conn test for existing env XXX 185 | Options: [-2/3/4] [-o/O] [-i N] [-e ADR] [-E] [-w/W/V N] [-l LOGFILE] 186 | You need to have the OS_ variables set to allow OpenStack CLI tools to work. 187 | You can override defaults by exporting the environment variables AZS, VAZS, NAZS, RPRE, 188 | PINGTARGET, PINGTARGET2, GRAFANANM, [JH]IMG, [JH]IMGFILT, [JH]FLAVOR, [JH]DEFLTUSER, 189 | ADDJHVOLSIZE, ADDVMVOLSIZE, SUCCWAIT, ALARMPRE, FROM, ALARM_/NOTE_EMAIL_ADDRESSES, VOLUMETYPE, 190 | NAMESERVER/DEFAULTNAMESERVER, SWIFTCONTAINER, FIPWAITPORTDEVOWNER, EXTSEARCH, OS_EXTRA_PARAMS. 191 | Typically, you should configure OS_CLOUD, [JH]IMG, [JH]FLAVOR, [JH]DEFLTUSER. 192 | ``` 193 | 194 | ## Examples 195 | 196 | Run 100 loops deploying (and deleting) 2+8 VMs (including nets, volumes etc.), 197 | with daily statistics sent to SMN...API-Notes and Alarms to SMN...APIMonitor: 198 | 199 | ```shell 200 | ./api_monitor.sh -n 8 -s -m urn:smn:eu-de:0ee085d22f6a413293a2c37aaa1f96fe:APIMon-Notes -m urn:smn:eu-de:0ee085d22f6a413293a2c37aaa1f96fe:APIMonitor -i 100 201 | ``` 202 | 203 | The included file `run.sh` also demonstrates how to use `api_monitor.sh`. 204 | 205 | The script has been used successfully on several OpenStack clouds with keystone v3 (OTC, ECP, CityCloud), 206 | started manually or from Jenkins, partially with recording stats to a local Telegraf to report timings 207 | and failures into a Grafana dashboard. 208 | Configuration files for telegraf, influxdb and a nice grafana dashboard can be found in the `dashboard/` 209 | subdirectory. 210 | 211 | ## HOWTO Guide 212 | 213 | The directory docs contains a complete [setup guide](https://github.com/SovereignCloudStack/openstack-health-monitor/blob/main/docs/Debian12-Install.md) 214 | using Debian 12 VMs on an SCS reference deployement. 215 | 216 | ## Benchmarks 217 | 218 | There are three simple benchmarks included, `-b` for simple compute benchmark (calculating 219 | 4000 digits of pi with `bc`), `-B` for an iperf TCP benchmark between VMs and `-M` to measure 220 | the bandwidth, IOPS and Latency (the percentage of samples with >10ms latency is output) of 221 | the root disk. These are meant to detect trends and can help with capacity monitoring. 222 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Attribution-NonCommercial-ShareAlike 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International 58 | Public License 59 | 60 | By exercising the Licensed Rights (defined below), You accept and agree 61 | to be bound by the terms and conditions of this Creative Commons 62 | Attribution-NonCommercial-ShareAlike 4.0 International Public License 63 | ("Public License"). To the extent this Public License may be 64 | interpreted as a contract, You are granted the Licensed Rights in 65 | consideration of Your acceptance of these terms and conditions, and the 66 | Licensor grants You such rights in consideration of benefits the 67 | Licensor receives from making the Licensed Material available under 68 | these terms and conditions. 69 | 70 | 71 | Section 1 -- Definitions. 72 | 73 | a. Adapted Material means material subject to Copyright and Similar 74 | Rights that is derived from or based upon the Licensed Material 75 | and in which the Licensed Material is translated, altered, 76 | arranged, transformed, or otherwise modified in a manner requiring 77 | permission under the Copyright and Similar Rights held by the 78 | Licensor. For purposes of this Public License, where the Licensed 79 | Material is a musical work, performance, or sound recording, 80 | Adapted Material is always produced where the Licensed Material is 81 | synched in timed relation with a moving image. 82 | 83 | b. Adapter's License means the license You apply to Your Copyright 84 | and Similar Rights in Your contributions to Adapted Material in 85 | accordance with the terms and conditions of this Public License. 86 | 87 | c. BY-NC-SA Compatible License means a license listed at 88 | creativecommons.org/compatiblelicenses, approved by Creative 89 | Commons as essentially the equivalent of this Public License. 90 | 91 | d. Copyright and Similar Rights means copyright and/or similar rights 92 | closely related to copyright including, without limitation, 93 | performance, broadcast, sound recording, and Sui Generis Database 94 | Rights, without regard to how the rights are labeled or 95 | categorized. For purposes of this Public License, the rights 96 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 97 | Rights. 98 | 99 | e. Effective Technological Measures means those measures that, in the 100 | absence of proper authority, may not be circumvented under laws 101 | fulfilling obligations under Article 11 of the WIPO Copyright 102 | Treaty adopted on December 20, 1996, and/or similar international 103 | agreements. 104 | 105 | f. Exceptions and Limitations means fair use, fair dealing, and/or 106 | any other exception or limitation to Copyright and Similar Rights 107 | that applies to Your use of the Licensed Material. 108 | 109 | g. License Elements means the license attributes listed in the name 110 | of a Creative Commons Public License. The License Elements of this 111 | Public License are Attribution, NonCommercial, and ShareAlike. 112 | 113 | h. Licensed Material means the artistic or literary work, database, 114 | or other material to which the Licensor applied this Public 115 | License. 116 | 117 | i. Licensed Rights means the rights granted to You subject to the 118 | terms and conditions of this Public License, which are limited to 119 | all Copyright and Similar Rights that apply to Your use of the 120 | Licensed Material and that the Licensor has authority to license. 121 | 122 | j. Licensor means the individual(s) or entity(ies) granting rights 123 | under this Public License. 124 | 125 | k. NonCommercial means not primarily intended for or directed towards 126 | commercial advantage or monetary compensation. For purposes of 127 | this Public License, the exchange of the Licensed Material for 128 | other material subject to Copyright and Similar Rights by digital 129 | file-sharing or similar means is NonCommercial provided there is 130 | no payment of monetary compensation in connection with the 131 | exchange. 132 | 133 | l. Share means to provide material to the public by any means or 134 | process that requires permission under the Licensed Rights, such 135 | as reproduction, public display, public performance, distribution, 136 | dissemination, communication, or importation, and to make material 137 | available to the public including in ways that members of the 138 | public may access the material from a place and at a time 139 | individually chosen by them. 140 | 141 | m. Sui Generis Database Rights means rights other than copyright 142 | resulting from Directive 96/9/EC of the European Parliament and of 143 | the Council of 11 March 1996 on the legal protection of databases, 144 | as amended and/or succeeded, as well as other essentially 145 | equivalent rights anywhere in the world. 146 | 147 | n. You means the individual or entity exercising the Licensed Rights 148 | under this Public License. Your has a corresponding meaning. 149 | 150 | 151 | Section 2 -- Scope. 152 | 153 | a. License grant. 154 | 155 | 1. Subject to the terms and conditions of this Public License, 156 | the Licensor hereby grants You a worldwide, royalty-free, 157 | non-sublicensable, non-exclusive, irrevocable license to 158 | exercise the Licensed Rights in the Licensed Material to: 159 | 160 | a. reproduce and Share the Licensed Material, in whole or 161 | in part, for NonCommercial purposes only; and 162 | 163 | b. produce, reproduce, and Share Adapted Material for 164 | NonCommercial purposes only. 165 | 166 | 2. Exceptions and Limitations. For the avoidance of doubt, where 167 | Exceptions and Limitations apply to Your use, this Public 168 | License does not apply, and You do not need to comply with 169 | its terms and conditions. 170 | 171 | 3. Term. The term of this Public License is specified in Section 172 | 6(a). 173 | 174 | 4. Media and formats; technical modifications allowed. The 175 | Licensor authorizes You to exercise the Licensed Rights in 176 | all media and formats whether now known or hereafter created, 177 | and to make technical modifications necessary to do so. The 178 | Licensor waives and/or agrees not to assert any right or 179 | authority to forbid You from making technical modifications 180 | necessary to exercise the Licensed Rights, including 181 | technical modifications necessary to circumvent Effective 182 | Technological Measures. For purposes of this Public License, 183 | simply making modifications authorized by this Section 2(a) 184 | (4) never produces Adapted Material. 185 | 186 | 5. Downstream recipients. 187 | 188 | a. Offer from the Licensor -- Licensed Material. Every 189 | recipient of the Licensed Material automatically 190 | receives an offer from the Licensor to exercise the 191 | Licensed Rights under the terms and conditions of this 192 | Public License. 193 | 194 | b. Additional offer from the Licensor -- Adapted Material. 195 | Every recipient of Adapted Material from You 196 | automatically receives an offer from the Licensor to 197 | exercise the Licensed Rights in the Adapted Material 198 | under the conditions of the Adapter's License You apply. 199 | 200 | c. No downstream restrictions. You may not offer or impose 201 | any additional or different terms or conditions on, or 202 | apply any Effective Technological Measures to, the 203 | Licensed Material if doing so restricts exercise of the 204 | Licensed Rights by any recipient of the Licensed 205 | Material. 206 | 207 | 6. No endorsement. Nothing in this Public License constitutes or 208 | may be construed as permission to assert or imply that You 209 | are, or that Your use of the Licensed Material is, connected 210 | with, or sponsored, endorsed, or granted official status by, 211 | the Licensor or others designated to receive attribution as 212 | provided in Section 3(a)(1)(A)(i). 213 | 214 | b. Other rights. 215 | 216 | 1. Moral rights, such as the right of integrity, are not 217 | licensed under this Public License, nor are publicity, 218 | privacy, and/or other similar personality rights; however, to 219 | the extent possible, the Licensor waives and/or agrees not to 220 | assert any such rights held by the Licensor to the limited 221 | extent necessary to allow You to exercise the Licensed 222 | Rights, but not otherwise. 223 | 224 | 2. Patent and trademark rights are not licensed under this 225 | Public License. 226 | 227 | 3. To the extent possible, the Licensor waives any right to 228 | collect royalties from You for the exercise of the Licensed 229 | Rights, whether directly or through a collecting society 230 | under any voluntary or waivable statutory or compulsory 231 | licensing scheme. In all other cases the Licensor expressly 232 | reserves any right to collect such royalties, including when 233 | the Licensed Material is used other than for NonCommercial 234 | purposes. 235 | 236 | 237 | Section 3 -- License Conditions. 238 | 239 | Your exercise of the Licensed Rights is expressly made subject to the 240 | following conditions. 241 | 242 | a. Attribution. 243 | 244 | 1. If You Share the Licensed Material (including in modified 245 | form), You must: 246 | 247 | a. retain the following if it is supplied by the Licensor 248 | with the Licensed Material: 249 | 250 | i. identification of the creator(s) of the Licensed 251 | Material and any others designated to receive 252 | attribution, in any reasonable manner requested by 253 | the Licensor (including by pseudonym if 254 | designated); 255 | 256 | ii. a copyright notice; 257 | 258 | iii. a notice that refers to this Public License; 259 | 260 | iv. a notice that refers to the disclaimer of 261 | warranties; 262 | 263 | v. a URI or hyperlink to the Licensed Material to the 264 | extent reasonably practicable; 265 | 266 | b. indicate if You modified the Licensed Material and 267 | retain an indication of any previous modifications; and 268 | 269 | c. indicate the Licensed Material is licensed under this 270 | Public License, and include the text of, or the URI or 271 | hyperlink to, this Public License. 272 | 273 | 2. You may satisfy the conditions in Section 3(a)(1) in any 274 | reasonable manner based on the medium, means, and context in 275 | which You Share the Licensed Material. For example, it may be 276 | reasonable to satisfy the conditions by providing a URI or 277 | hyperlink to a resource that includes the required 278 | information. 279 | 3. If requested by the Licensor, You must remove any of the 280 | information required by Section 3(a)(1)(A) to the extent 281 | reasonably practicable. 282 | 283 | b. ShareAlike. 284 | 285 | In addition to the conditions in Section 3(a), if You Share 286 | Adapted Material You produce, the following conditions also apply. 287 | 288 | 1. The Adapter's License You apply must be a Creative Commons 289 | license with the same License Elements, this version or 290 | later, or a BY-NC-SA Compatible License. 291 | 292 | 2. You must include the text of, or the URI or hyperlink to, the 293 | Adapter's License You apply. You may satisfy this condition 294 | in any reasonable manner based on the medium, means, and 295 | context in which You Share Adapted Material. 296 | 297 | 3. You may not offer or impose any additional or different terms 298 | or conditions on, or apply any Effective Technological 299 | Measures to, Adapted Material that restrict exercise of the 300 | rights granted under the Adapter's License You apply. 301 | 302 | 303 | Section 4 -- Sui Generis Database Rights. 304 | 305 | Where the Licensed Rights include Sui Generis Database Rights that 306 | apply to Your use of the Licensed Material: 307 | 308 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 309 | to extract, reuse, reproduce, and Share all or a substantial 310 | portion of the contents of the database for NonCommercial purposes 311 | only; 312 | 313 | b. if You include all or a substantial portion of the database 314 | contents in a database in which You have Sui Generis Database 315 | Rights, then the database in which You have Sui Generis Database 316 | Rights (but not its individual contents) is Adapted Material, 317 | including for purposes of Section 3(b); and 318 | 319 | c. You must comply with the conditions in Section 3(a) if You Share 320 | all or a substantial portion of the contents of the database. 321 | 322 | For the avoidance of doubt, this Section 4 supplements and does not 323 | replace Your obligations under this Public License where the Licensed 324 | Rights include other Copyright and Similar Rights. 325 | 326 | 327 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 328 | 329 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 330 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 331 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 332 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 333 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 334 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 335 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 336 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 337 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 338 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 339 | 340 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 341 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 342 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 343 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 344 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 345 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 346 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 347 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 348 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 349 | 350 | c. The disclaimer of warranties and limitation of liability provided 351 | above shall be interpreted in a manner that, to the extent 352 | possible, most closely approximates an absolute disclaimer and 353 | waiver of all liability. 354 | 355 | 356 | Section 6 -- Term and Termination. 357 | 358 | a. This Public License applies for the term of the Copyright and 359 | Similar Rights licensed here. However, if You fail to comply with 360 | this Public License, then Your rights under this Public License 361 | terminate automatically. 362 | 363 | b. Where Your right to use the Licensed Material has terminated under 364 | Section 6(a), it reinstates: 365 | 366 | 1. automatically as of the date the violation is cured, provided 367 | it is cured within 30 days of Your discovery of the 368 | violation; or 369 | 370 | 2. upon express reinstatement by the Licensor. 371 | 372 | For the avoidance of doubt, this Section 6(b) does not affect any 373 | right the Licensor may have to seek remedies for Your violations 374 | of this Public License. 375 | 376 | c. For the avoidance of doubt, the Licensor may also offer the 377 | Licensed Material under separate terms or conditions or stop 378 | distributing the Licensed Material at any time; however, doing so 379 | will not terminate this Public License. 380 | 381 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 382 | License. 383 | 384 | 385 | Section 7 -- Other Terms and Conditions. 386 | 387 | a. The Licensor shall not be bound by any additional or different 388 | terms or conditions communicated by You unless expressly agreed. 389 | 390 | b. Any arrangements, understandings, or agreements regarding the 391 | Licensed Material not stated herein are separate from and 392 | independent of the terms and conditions of this Public License. 393 | 394 | 395 | Section 8 -- Interpretation. 396 | 397 | a. For the avoidance of doubt, this Public License does not, and 398 | shall not be interpreted to, reduce, limit, restrict, or impose 399 | conditions on any use of the Licensed Material that could lawfully 400 | be made without permission under this Public License. 401 | 402 | b. To the extent possible, if any provision of this Public License is 403 | deemed unenforceable, it shall be automatically reformed to the 404 | minimum extent necessary to make it enforceable. If the provision 405 | cannot be reformed, it shall be severed from this Public License 406 | without affecting the enforceability of the remaining terms and 407 | conditions. 408 | 409 | c. No term or condition of this Public License will be waived and no 410 | failure to comply consented to unless expressly agreed to by the 411 | Licensor. 412 | 413 | d. Nothing in this Public License constitutes or may be interpreted 414 | as a limitation upon, or waiver of, any privileges and immunities 415 | that apply to the Licensor or You, including from the legal 416 | processes of any jurisdiction or authority. 417 | 418 | ======================================================================= 419 | 420 | Creative Commons is not a party to its public 421 | licenses. Notwithstanding, Creative Commons may elect to apply one of 422 | its public licenses to material it publishes and in those instances 423 | will be considered the “Licensor.” The text of the Creative Commons 424 | public licenses is dedicated to the public domain under the CC0 Public 425 | Domain Dedication. Except for the limited purpose of indicating that 426 | material is shared under a Creative Commons public license or as 427 | otherwise permitted by the Creative Commons policies published at 428 | creativecommons.org/policies, Creative Commons does not authorize the 429 | use of the trademark "Creative Commons" or any other trademark or logo 430 | of Creative Commons without its prior written consent including, 431 | without limitation, in connection with any unauthorized modifications 432 | to any of its public licenses or any other arrangements, 433 | understandings, or agreements concerning use of licensed material. For 434 | the avoidance of doubt, this paragraph does not form part of the 435 | public licenses. 436 | 437 | Creative Commons may be contacted at creativecommons.org. 438 | 439 | -------------------------------------------------------------------------------- /dashboard/config.toml: -------------------------------------------------------------------------------- 1 | ### Welcome to the InfluxDB configuration file. 2 | 3 | # The values in this file override the default values used by the system if 4 | # a config option is not specified. The commented out lines are the configuration 5 | # field and the default value used. Uncommenting a line and changing the value 6 | # will change the value used at runtime when the process is restarted. 7 | 8 | # Once every 24 hours InfluxDB will report usage data to usage.influxdata.com 9 | # The data includes a random ID, os, arch, version, the number of series and other 10 | # usage data. No data from user databases is ever transmitted. 11 | # Change this option to true to disable reporting. 12 | reporting-disabled = true 13 | 14 | # Bind address to use for the RPC service for backup and restore. 15 | # bind-address = "127.0.0.1:8088" 16 | 17 | ### 18 | ### [meta] 19 | ### 20 | ### Controls the parameters for the Raft consensus group that stores metadata 21 | ### about the InfluxDB cluster. 22 | ### 23 | 24 | [meta] 25 | # Where the metadata/raft database is stored 26 | dir = "/var/lib/influxdb/meta" 27 | 28 | # Automatically create a default retention policy when creating a database. 29 | # retention-autocreate = true 30 | 31 | # If log messages are printed for the meta service 32 | # logging-enabled = true 33 | 34 | ### 35 | ### [data] 36 | ### 37 | ### Controls where the actual shard data for InfluxDB lives and how it is 38 | ### flushed from the WAL. "dir" may need to be changed to a suitable place 39 | ### for your system, but the WAL settings are an advanced configuration. The 40 | ### defaults should work for most systems. 41 | ### 42 | 43 | [data] 44 | # The directory where the TSM storage engine stores TSM files. 45 | dir = "/var/lib/influxdb/data" 46 | 47 | # The directory where the TSM storage engine stores WAL files. 48 | wal-dir = "/var/lib/influxdb/wal" 49 | 50 | # The amount of time that a write will wait before fsyncing. A duration 51 | # greater than 0 can be used to batch up multiple fsync calls. This is useful for slower 52 | # disks or when WAL write contention is seen. A value of 0s fsyncs every write to the WAL. 53 | # Values in the range of 0-100ms are recommended for non-SSD disks. 54 | # wal-fsync-delay = "0s" 55 | 56 | 57 | # The type of shard index to use for new shards. The default is an in-memory index that is 58 | # recreated at startup. A value of "tsi1" will use a disk based index that supports higher 59 | # cardinality datasets. 60 | # index-version = "inmem" 61 | 62 | # Trace logging provides more verbose output around the tsm engine. Turning 63 | # this on can provide more useful output for debugging tsm engine issues. 64 | # trace-logging-enabled = false 65 | 66 | # Whether queries should be logged before execution. Very useful for troubleshooting, but will 67 | # log any sensitive data contained within a query. 68 | # query-log-enabled = true 69 | 70 | # Validates incoming writes to ensure keys only have valid unicode characters. 71 | # This setting will incur a small overhead because every key must be checked. 72 | # validate-keys = false 73 | 74 | # Settings for the TSM engine 75 | 76 | # CacheMaxMemorySize is the maximum size a shard's cache can 77 | # reach before it starts rejecting writes. 78 | # Valid size suffixes are k, m, or g (case insensitive, 1024 = 1k). 79 | # Values without a size suffix are in bytes. 80 | # cache-max-memory-size = "1g" 81 | 82 | # CacheSnapshotMemorySize is the size at which the engine will 83 | # snapshot the cache and write it to a TSM file, freeing up memory 84 | # Valid size suffixes are k, m, or g (case insensitive, 1024 = 1k). 85 | # Values without a size suffix are in bytes. 86 | # cache-snapshot-memory-size = "25m" 87 | 88 | # CacheSnapshotWriteColdDuration is the length of time at 89 | # which the engine will snapshot the cache and write it to 90 | # a new TSM file if the shard hasn't received writes or deletes 91 | # cache-snapshot-write-cold-duration = "10m" 92 | 93 | # CompactFullWriteColdDuration is the duration at which the engine 94 | # will compact all TSM files in a shard if it hasn't received a 95 | # write or delete 96 | # compact-full-write-cold-duration = "4h" 97 | 98 | # The maximum number of concurrent full and level compactions that can run at one time. A 99 | # value of 0 results in 50% of runtime.GOMAXPROCS(0) used at runtime. Any number greater 100 | # than 0 limits compactions to that value. This setting does not apply 101 | # to cache snapshotting. 102 | # max-concurrent-compactions = 0 103 | 104 | # CompactThroughput is the rate limit in bytes per second that we 105 | # will allow TSM compactions to write to disk. Note that short bursts are allowed 106 | # to happen at a possibly larger value, set by CompactThroughputBurst 107 | # compact-throughput = "48m" 108 | 109 | # CompactThroughputBurst is the rate limit in bytes per second that we 110 | # will allow TSM compactions to write to disk. 111 | # compact-throughput-burst = "48m" 112 | 113 | # If true, then the mmap advise value MADV_WILLNEED will be provided to the kernel with respect to 114 | # TSM files. This setting has been found to be problematic on some kernels, and defaults to off. 115 | # It might help users who have slow disks in some cases. 116 | # tsm-use-madv-willneed = false 117 | 118 | # Settings for the inmem index 119 | 120 | # The maximum series allowed per database before writes are dropped. This limit can prevent 121 | # high cardinality issues at the database level. This limit can be disabled by setting it to 122 | # 0. 123 | # max-series-per-database = 1000000 124 | 125 | # The maximum number of tag values per tag that are allowed before writes are dropped. This limit 126 | # can prevent high cardinality tag values from being written to a measurement. This limit can be 127 | # disabled by setting it to 0. 128 | # max-values-per-tag = 100000 129 | 130 | # Settings for the tsi1 index 131 | 132 | # The threshold, in bytes, when an index write-ahead log file will compact 133 | # into an index file. Lower sizes will cause log files to be compacted more 134 | # quickly and result in lower heap usage at the expense of write throughput. 135 | # Higher sizes will be compacted less frequently, store more series in-memory, 136 | # and provide higher write throughput. 137 | # Valid size suffixes are k, m, or g (case insensitive, 1024 = 1k). 138 | # Values without a size suffix are in bytes. 139 | # max-index-log-file-size = "1m" 140 | 141 | # The size of the internal cache used in the TSI index to store previously 142 | # calculated series results. Cached results will be returned quickly from the cache rather 143 | # than needing to be recalculated when a subsequent query with a matching tag key/value 144 | # predicate is executed. Setting this value to 0 will disable the cache, which may 145 | # lead to query performance issues. 146 | # This value should only be increased if it is known that the set of regularly used 147 | # tag key/value predicates across all measurements for a database is larger than 100. An 148 | # increase in cache size may lead to an increase in heap usage. 149 | series-id-set-cache-size = 100 150 | 151 | ### 152 | ### [coordinator] 153 | ### 154 | ### Controls the clustering service configuration. 155 | ### 156 | 157 | [coordinator] 158 | # The default time a write request will wait until a "timeout" error is returned to the caller. 159 | # write-timeout = "10s" 160 | 161 | # The maximum number of concurrent queries allowed to be executing at one time. If a query is 162 | # executed and exceeds this limit, an error is returned to the caller. This limit can be disabled 163 | # by setting it to 0. 164 | # max-concurrent-queries = 0 165 | 166 | # The maximum time a query will is allowed to execute before being killed by the system. This limit 167 | # can help prevent run away queries. Setting the value to 0 disables the limit. 168 | # query-timeout = "0s" 169 | 170 | # The time threshold when a query will be logged as a slow query. This limit can be set to help 171 | # discover slow or resource intensive queries. Setting the value to 0 disables the slow query logging. 172 | # log-queries-after = "0s" 173 | 174 | # The maximum number of points a SELECT can process. A value of 0 will make 175 | # the maximum point count unlimited. This will only be checked every second so queries will not 176 | # be aborted immediately when hitting the limit. 177 | # max-select-point = 0 178 | 179 | # The maximum number of series a SELECT can run. A value of 0 will make the maximum series 180 | # count unlimited. 181 | # max-select-series = 0 182 | 183 | # The maximum number of group by time bucket a SELECT can create. A value of zero will max the maximum 184 | # number of buckets unlimited. 185 | # max-select-buckets = 0 186 | 187 | ### 188 | ### [retention] 189 | ### 190 | ### Controls the enforcement of retention policies for evicting old data. 191 | ### 192 | 193 | [retention] 194 | # Determines whether retention policy enforcement enabled. 195 | # enabled = true 196 | 197 | # The interval of time when retention policy enforcement checks run. 198 | # check-interval = "30m" 199 | 200 | ### 201 | ### [shard-precreation] 202 | ### 203 | ### Controls the precreation of shards, so they are available before data arrives. 204 | ### Only shards that, after creation, will have both a start- and end-time in the 205 | ### future, will ever be created. Shards are never precreated that would be wholly 206 | ### or partially in the past. 207 | 208 | [shard-precreation] 209 | # Determines whether shard pre-creation service is enabled. 210 | # enabled = true 211 | 212 | # The interval of time when the check to pre-create new shards runs. 213 | # check-interval = "10m" 214 | 215 | # The default period ahead of the endtime of a shard group that its successor 216 | # group is created. 217 | # advance-period = "30m" 218 | 219 | ### 220 | ### Controls the system self-monitoring, statistics and diagnostics. 221 | ### 222 | ### The internal database for monitoring data is created automatically if 223 | ### if it does not already exist. The target retention within this database 224 | ### is called 'monitor' and is also created with a retention period of 7 days 225 | ### and a replication factor of 1, if it does not exist. In all cases the 226 | ### this retention policy is configured as the default for the database. 227 | 228 | [monitor] 229 | # Whether to record statistics internally. 230 | # store-enabled = true 231 | 232 | # The destination database for recorded statistics 233 | # store-database = "_internal" 234 | 235 | # The interval at which to record statistics 236 | # store-interval = "10s" 237 | 238 | ### 239 | ### [http] 240 | ### 241 | ### Controls how the HTTP endpoints are configured. These are the primary 242 | ### mechanism for getting data into and out of InfluxDB. 243 | ### 244 | 245 | [http] 246 | # Determines whether HTTP endpoint is enabled. 247 | # enabled = true 248 | 249 | # Determines whether the Flux query endpoint is enabled. 250 | # flux-enabled = false 251 | 252 | # Determines whether the Flux query logging is enabled. 253 | # flux-log-enabled = false 254 | 255 | # The bind address used by the HTTP service. 256 | # bind-address = ":8086" 257 | 258 | # Determines whether user authentication is enabled over HTTP/HTTPS. 259 | # auth-enabled = false 260 | 261 | # The default realm sent back when issuing a basic auth challenge. 262 | # realm = "InfluxDB" 263 | 264 | # Determines whether HTTP request logging is enabled. 265 | # log-enabled = true 266 | 267 | # Determines whether the HTTP write request logs should be suppressed when the log is enabled. 268 | # suppress-write-log = false 269 | 270 | # When HTTP request logging is enabled, this option specifies the path where 271 | # log entries should be written. If unspecified, the default is to write to stderr, which 272 | # intermingles HTTP logs with internal InfluxDB logging. 273 | # 274 | # If influxd is unable to access the specified path, it will log an error and fall back to writing 275 | # the request log to stderr. 276 | # access-log-path = "" 277 | 278 | # Filters which requests should be logged. Each filter is of the pattern NNN, NNX, or NXX where N is 279 | # a number and X is a wildcard for any number. To filter all 5xx responses, use the string 5xx. 280 | # If multiple filters are used, then only one has to match. The default is to have no filters which 281 | # will cause every request to be printed. 282 | # access-log-status-filters = [] 283 | 284 | # Determines whether detailed write logging is enabled. 285 | # write-tracing = false 286 | 287 | # Determines whether the pprof endpoint is enabled. This endpoint is used for 288 | # troubleshooting and monitoring. 289 | # pprof-enabled = true 290 | 291 | # Enables a pprof endpoint that binds to localhost:6060 immediately on startup. 292 | # This is only needed to debug startup issues. 293 | # debug-pprof-enabled = false 294 | 295 | # Determines whether HTTPS is enabled. 296 | # https-enabled = false 297 | 298 | # The SSL certificate to use when HTTPS is enabled. 299 | # https-certificate = "/etc/ssl/influxdb.pem" 300 | 301 | # Use a separate private key location. 302 | # https-private-key = "" 303 | 304 | # The JWT auth shared secret to validate requests using JSON web tokens. 305 | # shared-secret = "" 306 | 307 | # The default chunk size for result sets that should be chunked. 308 | # max-row-limit = 0 309 | 310 | # The maximum number of HTTP connections that may be open at once. New connections that 311 | # would exceed this limit are dropped. Setting this value to 0 disables the limit. 312 | # max-connection-limit = 0 313 | 314 | # Enable http service over unix domain socket 315 | # unix-socket-enabled = false 316 | 317 | # The path of the unix domain socket. 318 | # bind-socket = "/var/run/influxdb.sock" 319 | 320 | # The maximum size of a client request body, in bytes. Setting this value to 0 disables the limit. 321 | # max-body-size = 25000000 322 | 323 | # The maximum number of writes processed concurrently. 324 | # Setting this to 0 disables the limit. 325 | # max-concurrent-write-limit = 0 326 | 327 | # The maximum number of writes queued for processing. 328 | # Setting this to 0 disables the limit. 329 | # max-enqueued-write-limit = 0 330 | 331 | # The maximum duration for a write to wait in the queue to be processed. 332 | # Setting this to 0 or setting max-concurrent-write-limit to 0 disables the limit. 333 | # enqueued-write-timeout = 0 334 | 335 | ### 336 | ### [logging] 337 | ### 338 | ### Controls how the logger emits logs to the output. 339 | ### 340 | 341 | [logging] 342 | # Determines which log encoder to use for logs. Available options 343 | # are auto, logfmt, and json. auto will use a more a more user-friendly 344 | # output format if the output terminal is a TTY, but the format is not as 345 | # easily machine-readable. When the output is a non-TTY, auto will use 346 | # logfmt. 347 | # format = "auto" 348 | 349 | # Determines which level of logs will be emitted. The available levels 350 | # are error, warn, info, and debug. Logs that are equal to or above the 351 | # specified level will be emitted. 352 | # level = "info" 353 | 354 | # Suppresses the logo output that is printed when the program is started. 355 | # The logo is always suppressed if STDOUT is not a TTY. 356 | # suppress-logo = false 357 | 358 | ### 359 | ### [subscriber] 360 | ### 361 | ### Controls the subscriptions, which can be used to fork a copy of all data 362 | ### received by the InfluxDB host. 363 | ### 364 | 365 | [subscriber] 366 | # Determines whether the subscriber service is enabled. 367 | # enabled = true 368 | 369 | # The default timeout for HTTP writes to subscribers. 370 | # http-timeout = "30s" 371 | 372 | # Allows insecure HTTPS connections to subscribers. This is useful when testing with self- 373 | # signed certificates. 374 | # insecure-skip-verify = false 375 | 376 | # The path to the PEM encoded CA certs file. If the empty string, the default system certs will be used 377 | # ca-certs = "" 378 | 379 | # The number of writer goroutines processing the write channel. 380 | # write-concurrency = 40 381 | 382 | # The number of in-flight writes buffered in the write channel. 383 | # write-buffer-size = 1000 384 | 385 | 386 | ### 387 | ### [[graphite]] 388 | ### 389 | ### Controls one or many listeners for Graphite data. 390 | ### 391 | 392 | [[graphite]] 393 | # Determines whether the graphite endpoint is enabled. 394 | # enabled = false 395 | # database = "graphite" 396 | # retention-policy = "" 397 | # bind-address = ":2003" 398 | # protocol = "tcp" 399 | # consistency-level = "one" 400 | 401 | # These next lines control how batching works. You should have this enabled 402 | # otherwise you could get dropped metrics or poor performance. Batching 403 | # will buffer points in memory if you have many coming in. 404 | 405 | # Flush if this many points get buffered 406 | # batch-size = 5000 407 | 408 | # number of batches that may be pending in memory 409 | # batch-pending = 10 410 | 411 | # Flush at least this often even if we haven't hit buffer limit 412 | # batch-timeout = "1s" 413 | 414 | # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max. 415 | # udp-read-buffer = 0 416 | 417 | ### This string joins multiple matching 'measurement' values providing more control over the final measurement name. 418 | # separator = "." 419 | 420 | ### Default tags that will be added to all metrics. These can be overridden at the template level 421 | ### or by tags extracted from metric 422 | # tags = ["region=us-east", "zone=1c"] 423 | 424 | ### Each template line requires a template pattern. It can have an optional 425 | ### filter before the template and separated by spaces. It can also have optional extra 426 | ### tags following the template. Multiple tags should be separated by commas and no spaces 427 | ### similar to the line protocol format. There can be only one default template. 428 | # templates = [ 429 | # "*.app env.service.resource.measurement", 430 | # # Default template 431 | # "server.*", 432 | # ] 433 | 434 | ### 435 | ### [collectd] 436 | ### 437 | ### Controls one or many listeners for collectd data. 438 | ### 439 | 440 | [[collectd]] 441 | # enabled = false 442 | # bind-address = ":25826" 443 | # database = "collectd" 444 | # retention-policy = "" 445 | # 446 | # The collectd service supports either scanning a directory for multiple types 447 | # db files, or specifying a single db file. 448 | # typesdb = "/usr/local/share/collectd" 449 | # 450 | # security-level = "none" 451 | # auth-file = "/etc/collectd/auth_file" 452 | 453 | # These next lines control how batching works. You should have this enabled 454 | # otherwise you could get dropped metrics or poor performance. Batching 455 | # will buffer points in memory if you have many coming in. 456 | 457 | # Flush if this many points get buffered 458 | # batch-size = 5000 459 | 460 | # Number of batches that may be pending in memory 461 | # batch-pending = 10 462 | 463 | # Flush at least this often even if we haven't hit buffer limit 464 | # batch-timeout = "10s" 465 | 466 | # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max. 467 | # read-buffer = 0 468 | 469 | # Multi-value plugins can be handled two ways. 470 | # "split" will parse and store the multi-value plugin data into separate measurements 471 | # "join" will parse and store the multi-value plugin as a single multi-value measurement. 472 | # "split" is the default behavior for backward compatibility with previous versions of influxdb. 473 | # parse-multivalue-plugin = "split" 474 | ### 475 | ### [opentsdb] 476 | ### 477 | ### Controls one or many listeners for OpenTSDB data. 478 | ### 479 | 480 | [[opentsdb]] 481 | # enabled = false 482 | # bind-address = ":4242" 483 | # database = "opentsdb" 484 | # retention-policy = "" 485 | # consistency-level = "one" 486 | # tls-enabled = false 487 | # certificate= "/etc/ssl/influxdb.pem" 488 | 489 | # Log an error for every malformed point. 490 | # log-point-errors = true 491 | 492 | # These next lines control how batching works. You should have this enabled 493 | # otherwise you could get dropped metrics or poor performance. Only points 494 | # metrics received over the telnet protocol undergo batching. 495 | 496 | # Flush if this many points get buffered 497 | # batch-size = 1000 498 | 499 | # Number of batches that may be pending in memory 500 | # batch-pending = 5 501 | 502 | # Flush at least this often even if we haven't hit buffer limit 503 | # batch-timeout = "1s" 504 | 505 | ### 506 | ### [[udp]] 507 | ### 508 | ### Controls the listeners for InfluxDB line protocol data via UDP. 509 | ### 510 | 511 | [[udp]] 512 | # enabled = false 513 | # bind-address = ":8089" 514 | # database = "udp" 515 | # retention-policy = "" 516 | 517 | # InfluxDB precision for timestamps on received points ("" or "n", "u", "ms", "s", "m", "h") 518 | # precision = "" 519 | 520 | # These next lines control how batching works. You should have this enabled 521 | # otherwise you could get dropped metrics or poor performance. Batching 522 | # will buffer points in memory if you have many coming in. 523 | 524 | # Flush if this many points get buffered 525 | # batch-size = 5000 526 | 527 | # Number of batches that may be pending in memory 528 | # batch-pending = 10 529 | 530 | # Will flush at least this often even if we haven't hit buffer limit 531 | # batch-timeout = "1s" 532 | 533 | # UDP Read buffer size, 0 means OS default. UDP listener will fail if set above OS max. 534 | # read-buffer = 0 535 | 536 | ### 537 | ### [continuous_queries] 538 | ### 539 | ### Controls how continuous queries are run within InfluxDB. 540 | ### 541 | 542 | [continuous_queries] 543 | # Determines whether the continuous query service is enabled. 544 | # enabled = true 545 | 546 | # Controls whether queries are logged when executed by the CQ service. 547 | # log-enabled = true 548 | 549 | # Controls whether queries are logged to the self-monitoring data store. 550 | # query-stats-enabled = false 551 | 552 | # interval for how often continuous queries will be checked if they need to run 553 | # run-interval = "1s" 554 | 555 | ### 556 | ### [tls] 557 | ### 558 | ### Global configuration settings for TLS in InfluxDB. 559 | ### 560 | 561 | [tls] 562 | # Determines the available set of cipher suites. See https://golang.org/pkg/crypto/tls/#pkg-constants 563 | # for a list of available ciphers, which depends on the version of Go (use the query 564 | # SHOW DIAGNOSTICS to see the version of Go used to build InfluxDB). If not specified, uses 565 | # the default settings from Go's crypto/tls package. 566 | # ciphers = [ 567 | # "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305", 568 | # "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256", 569 | # ] 570 | 571 | # Minimum version of the tls protocol that will be negotiated. If not specified, uses the 572 | # default settings from Go's crypto/tls package. 573 | # min-version = "tls1.2" 574 | 575 | # Maximum version of the tls protocol that will be negotiated. If not specified, uses the 576 | # default settings from Go's crypto/tls package. 577 | # max-version = "tls1.2" 578 | -------------------------------------------------------------------------------- /dashboard/grafana.ini: -------------------------------------------------------------------------------- 1 | ##################### Grafana Configuration Example ##################### 2 | # 3 | # Everything has defaults so you only need to uncomment things you want to 4 | # change 5 | 6 | # possible values : production, development 7 | ;app_mode = production 8 | 9 | # instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty 10 | ;instance_name = ${HOSTNAME} 11 | 12 | #################################### Paths #################################### 13 | [paths] 14 | # Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used) 15 | ;data = /var/lib/grafana 16 | 17 | # Temporary files in `data` directory older than given duration will be removed 18 | ;temp_data_lifetime = 24h 19 | 20 | # Directory where grafana can store logs 21 | ;logs = /var/log/grafana 22 | 23 | # Directory where grafana will automatically scan and look for plugins 24 | ;plugins = /var/lib/grafana/plugins 25 | 26 | # folder that contains provisioning config files that grafana will apply on startup and while running. 27 | ;provisioning = conf/provisioning 28 | 29 | #################################### Server #################################### 30 | [server] 31 | # Protocol (http, https, h2, socket) 32 | ;protocol = http 33 | protocol = https 34 | 35 | # The ip address to bind to, empty will bind to all interfaces 36 | ;http_addr = 37 | 38 | # The http port to use 39 | ;http_port = 3000 40 | 41 | # The public facing domain name used to access grafana from a browser 42 | domain = health.CLOUDNAME.sovereignit.cloud 43 | 44 | # Redirect to correct domain if host header does not match domain 45 | # Prevents DNS rebinding attacks 46 | ;enforce_domain = false 47 | 48 | # The full public facing url you use in browser, used for redirects and emails 49 | # If you use reverse proxy and sub path specify full url (with sub path) 50 | ;root_url = %(protocol)s://%(domain)s:%(http_port)s/ 51 | 52 | # Serve Grafana from subpath specified in `root_url` setting. By default it is set to `false` for compatibility reasons. 53 | ;serve_from_sub_path = false 54 | 55 | # Log web requests 56 | ;router_logging = false 57 | 58 | # the path relative working path 59 | ;static_root_path = public 60 | 61 | # enable gzip 62 | ;enable_gzip = false 63 | 64 | # https certs & key file 65 | cert_file = /etc/grafana/health-fullchain.pem 66 | cert_key = /etc/grafana/health-key.pem 67 | 68 | # Unix socket path 69 | ;socket = 70 | 71 | # CDN Url 72 | ;cdn_url = 73 | 74 | # Sets the maximum time using a duration format (5s/5m/5ms) before timing out read of an incoming request and closing idle connections. 75 | # `0` means there is no timeout for reading the request. 76 | ;read_timeout = 0 77 | 78 | #################################### Database #################################### 79 | [database] 80 | # You can configure the database connection by specifying type, host, name, user and password 81 | # as separate properties or as on string using the url properties. 82 | 83 | # Either "mysql", "postgres" or "sqlite3", it's your choice 84 | ;type = sqlite3 85 | ;host = 127.0.0.1:3306 86 | ;name = grafana 87 | ;user = root 88 | # If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;""" 89 | ;password = 90 | 91 | # Use either URL or the previous fields to configure the database 92 | # Example: mysql://user:secret@host:port/database 93 | ;url = 94 | 95 | # For "postgres" only, either "disable", "require" or "verify-full" 96 | ;ssl_mode = disable 97 | 98 | # Database drivers may support different transaction isolation levels. 99 | # Currently, only "mysql" driver supports isolation levels. 100 | # If the value is empty - driver's default isolation level is applied. 101 | # For "mysql" use "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ" or "SERIALIZABLE". 102 | ;isolation_level = 103 | 104 | ;ca_cert_path = 105 | ;client_key_path = 106 | ;client_cert_path = 107 | ;server_cert_name = 108 | 109 | # For "sqlite3" only, path relative to data_path setting 110 | ;path = grafana.db 111 | 112 | # Max idle conn setting default is 2 113 | ;max_idle_conn = 2 114 | 115 | # Max conn setting default is 0 (mean not set) 116 | ;max_open_conn = 117 | 118 | # Connection Max Lifetime default is 14400 (means 14400 seconds or 4 hours) 119 | ;conn_max_lifetime = 14400 120 | 121 | # Set to true to log the sql calls and execution times. 122 | ;log_queries = 123 | 124 | # For "sqlite3" only. cache mode setting used for connecting to the database. (private, shared) 125 | ;cache_mode = private 126 | 127 | ################################### Data sources ######################### 128 | [datasources] 129 | # Upper limit of data sources that Grafana will return. This limit is a temporary configuration and it will be deprecated when pagination will be introduced on the list data sources API. 130 | ;datasource_limit = 5000 131 | 132 | #################################### Cache server ############################# 133 | [remote_cache] 134 | # Either "redis", "memcached" or "database" default is "database" 135 | ;type = database 136 | 137 | # cache connectionstring options 138 | # database: will use Grafana primary database. 139 | # redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=0,ssl=false`. Only addr is required. ssl may be 'true', 'false', or 'insecure'. 140 | # memcache: 127.0.0.1:11211 141 | ;connstr = 142 | 143 | #################################### Data proxy ########################### 144 | [dataproxy] 145 | 146 | # This enables data proxy logging, default is false 147 | ;logging = false 148 | 149 | # How long the data proxy waits before timing out, default is 30 seconds. 150 | # This setting also applies to core backend HTTP data sources where query requests use an HTTP client with timeout set. 151 | ;timeout = 30 152 | 153 | # How many seconds the data proxy waits before sending a keepalive probe request. 154 | ;keep_alive_seconds = 30 155 | 156 | # How many seconds the data proxy waits for a successful TLS Handshake before timing out. 157 | ;tls_handshake_timeout_seconds = 10 158 | 159 | # How many seconds the data proxy will wait for a server's first response headers after 160 | # fully writing the request headers if the request has an "Expect: 100-continue" 161 | # header. A value of 0 will result in the body being sent immediately, without 162 | # waiting for the server to approve. 163 | ;expect_continue_timeout_seconds = 1 164 | 165 | # Optionally limits the total number of connections per host, including connections in the dialing, 166 | # active, and idle states. On limit violation, dials will block. 167 | # A value of zero (0) means no limit. 168 | ;max_conns_per_host = 0 169 | 170 | # The maximum number of idle connections that Grafana will keep alive. 171 | ;max_idle_connections = 100 172 | 173 | # The maximum number of idle connections per host that Grafana will keep alive. 174 | ;max_idle_connections_per_host = 2 175 | 176 | # How many seconds the data proxy keeps an idle connection open before timing out. 177 | ;idle_conn_timeout_seconds = 90 178 | 179 | # If enabled and user is not anonymous, data proxy will add X-Grafana-User header with username into the request, default is false. 180 | ;send_user_header = false 181 | 182 | #################################### Analytics #################################### 183 | [analytics] 184 | # Server reporting, sends usage counters to stats.grafana.org every 24 hours. 185 | # No ip addresses are being tracked, only simple counters to track 186 | # running instances, dashboard and error counts. It is very helpful to us. 187 | # Change this option to false to disable reporting. 188 | ;reporting_enabled = true 189 | 190 | # The name of the distributor of the Grafana instance. Ex hosted-grafana, grafana-labs 191 | ;reporting_distributor = grafana-labs 192 | 193 | # Set to false to disable all checks to https://grafana.net 194 | # for new versions (grafana itself and plugins), check is used 195 | # in some UI views to notify that grafana or plugin update exists 196 | # This option does not cause any auto updates, nor send any information 197 | # only a GET request to http://grafana.com to get latest versions 198 | ;check_for_updates = true 199 | 200 | # Google Analytics universal tracking code, only enabled if you specify an id here 201 | ;google_analytics_ua_id = 202 | 203 | # Google Tag Manager ID, only enabled if you specify an id here 204 | ;google_tag_manager_id = 205 | 206 | #################################### Security #################################### 207 | [security] 208 | # disable creation of admin user on first start of grafana 209 | ;disable_initial_admin_creation = false 210 | 211 | # default admin user, created on startup 212 | admin_user = admin 213 | 214 | # default admin password, can be changed before first start of grafana, or in profile settings 215 | admin_password = SCS_Admin 216 | 217 | # used for signing 218 | ;secret_key = SW2YcwTIb9zpOOhoPsMm 219 | 220 | # disable gravatar profile images 221 | ;disable_gravatar = false 222 | 223 | # data source proxy whitelist (ip_or_domain:port separated by spaces) 224 | data_source_proxy_whitelist = localhost:8088 localhost:8086 225 | 226 | # disable protection against brute force login attempts 227 | ;disable_brute_force_login_protection = false 228 | 229 | # set to true if you host Grafana behind HTTPS. default is false. 230 | ;cookie_secure = false 231 | 232 | # set cookie SameSite attribute. defaults to `lax`. can be set to "lax", "strict", "none" and "disabled" 233 | ;cookie_samesite = lax 234 | 235 | # set to true if you want to allow browsers to render Grafana in a ,