├── .github ├── CODEOWNERS ├── dependabot.yml ├── pull_request_template.md └── workflows │ ├── common-workflows.yaml │ ├── go-version.yaml │ ├── image-version-update.yaml │ ├── release.yaml │ ├── update-libraries-to-commits.yaml │ └── update-libraries.yaml ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── cmd └── podmon │ ├── Makefile │ ├── features │ └── main.feature │ ├── main.go │ ├── main_steps_test.go │ ├── main_test.go │ └── resources │ ├── driver-config-params-bad-format1.yaml │ ├── driver-config-params-bad-format2.yaml │ ├── driver-config-params-bad-level1.yaml │ ├── driver-config-params-bad-level2.yaml │ ├── driver-config-params-bad-value1.yaml │ ├── driver-config-params-bad-value2.yaml │ ├── driver-config-params-bad-value3.yaml │ ├── driver-config-params-bad-value4.yaml │ ├── driver-config-params-bad-value5.yaml │ ├── driver-config-params.yaml │ └── driver-config-params2.yaml ├── core ├── .gitignore ├── core.go ├── semver.tpl └── semver │ ├── semver.go │ └── semver_test.go ├── dev-docker ├── docker.mk ├── go.mod ├── go.sum ├── internal ├── criapi │ ├── criapi.go │ ├── criapi_test.go │ └── interface.go ├── csiapi │ ├── csi.go │ ├── csi_test.go │ └── interface.go ├── k8sapi │ ├── interface.go │ ├── k8sapi.go │ └── k8sapi_test.go ├── mocks │ ├── criapi_mock.go │ ├── csiapi_mock.go │ ├── k8sapi_mock.go │ └── utils_mock.go ├── monitor │ ├── Makefile │ ├── controller.go │ ├── driver.go │ ├── features │ │ ├── controller.feature │ │ ├── integration.feature │ │ ├── monitor.feature │ │ ├── node.feature │ │ └── virtualization.feature │ ├── integration_steps_test.go │ ├── integration_test.go │ ├── longevity_opvirt.sh │ ├── monitor.go │ ├── monitor_steps_test.go │ ├── monitor_test.go │ ├── monitor_test_helpers.go │ ├── node.go │ ├── run.integration │ ├── short_integration_test.go │ └── virtualization_integration_test.go └── utils │ ├── linuxLoopBackDevice.go │ ├── linuxLoopBackDevice_test.go │ ├── linuxUnmount.go │ ├── linuxUnmount_test.go │ ├── winUnmount.go │ └── winUnmount_test.go ├── licenses └── LICENSE ├── podmon ├── Chart.yaml ├── templates │ └── podman.yaml └── values.yaml ├── test ├── podmontest │ ├── Dockerfile │ ├── Makefile │ ├── deploy │ │ ├── Chart.yaml │ │ ├── templates │ │ │ ├── _helpers.tpl │ │ │ ├── pvc-vm-block.yaml │ │ │ ├── pvc-vm-filesystem.yaml │ │ │ ├── pvc0.yaml │ │ │ ├── pvc1.yaml │ │ │ ├── test.yaml │ │ │ └── vm-template.yaml │ │ ├── values-isilon.yaml │ │ ├── values-powermax-iscsi.yaml │ │ ├── values-powermax-nfs.yaml │ │ ├── values-powermax-nvme.yaml │ │ ├── values-powerstore-iscsi.yaml │ │ ├── values-powerstore-nfs.yaml │ │ ├── values-powerstore-nvme.yaml │ │ ├── values-unity-nfs.yaml │ │ ├── values-unity.yaml │ │ ├── values-vm.yaml │ │ └── values-vxflex.yaml │ ├── insi.sh │ ├── inspm.sh │ ├── insps.sh │ ├── insu.sh │ ├── insv.sh │ ├── podmontest.go │ └── uns.sh ├── proxy │ ├── README.md │ └── main.go ├── sh │ ├── SCALE_TEST.md │ ├── basic.sh │ ├── bounce.ip │ ├── bounce.kubelet │ ├── failnodes.sh │ ├── failpods.sh │ ├── invoke.sh │ ├── mon.sh │ ├── nway.sh │ ├── plot_scale_test.py │ ├── rebalance.sh │ ├── reboot.node │ ├── scaleup-powerflex.sh │ ├── scaleup-powermax.sh │ ├── scaleup-powerscale.sh │ ├── scaleup-powerstore.sh │ └── scaleup-unity.sh └── ssh │ ├── cli │ └── main.go │ ├── client.go │ ├── client_test.go │ └── mocks │ ├── mock_client_wrapper.go │ └── mock_session_wrapper.go └── tools ├── collect_logs.sh ├── mon.sh └── monx.sh /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # CODEOWNERS 2 | # 3 | # documentation for this file can be found at: 4 | # https://help.github.com/en/articles/about-code-owners 5 | 6 | # These are the default owners for the code and will 7 | # be requested for review when someone opens a pull request. 8 | # order is alphabetical for easier maintenance. 9 | # 10 | # Aaron Tye (atye) 11 | # Aly Nathoo (anathoodell) 12 | # Chiman Jain (chimanjain) 13 | # Evgeny Uglov (EvgenyUglov) 14 | # Harish H (HarishH-DELL) 15 | # Nitesh Rewatkar (nitesh3108) 16 | # Oleksandr Babiychuk (alexemc) 17 | # Rajendra Indukuri (rajendraindukuri) 18 | # Shayna Finocchiaro (shaynafinocchiaro) 19 | # Spandita Panigrahi (panigs7) 20 | # Tom Watson (rbo54) 21 | # Alik Saring (alikdell) 22 | 23 | # for all files: 24 | * @atye @anathoodell @chimanjain @EvgenyUglov @HarishH-DELL @nitesh3108 @alexemc @rajendraindukuri @shaynafinocchiaro @panigs7 @rbo54 @alikdell 25 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # To get started with Dependabot version updates, you'll need to specify which 3 | # package ecosystems to update and where the package manifests are located. 4 | # Please see the documentation for all configuration options: 5 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 6 | 7 | version: 2 8 | updates: 9 | # Schedule for go module updates 10 | - package-ecosystem: "gomod" 11 | directory: "/" 12 | schedule: 13 | interval: "weekly" 14 | day: "sunday" 15 | time: "18:00" 16 | allow: 17 | # Allow direct updates for packages 18 | - dependency-type: direct 19 | ignore: 20 | - dependency-name: "*" 21 | update-types: 22 | - version-update:semver-patch 23 | # a group of dependencies will be updated together in one pull request 24 | groups: 25 | golang: 26 | # group all semantic versioning levels together in one pull request 27 | update-types: 28 | - major 29 | - minor 30 | patterns: 31 | - "*" 32 | 33 | # github actions 34 | - package-ecosystem: "github-actions" 35 | directory: "/" 36 | schedule: 37 | # Check for updates to GitHub Actions every week 38 | interval: "weekly" 39 | day: "saturday" 40 | groups: 41 | github-actions: 42 | patterns: 43 | - "*" 44 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 16 | # Description 17 | A few sentences describing the overall goals of the pull request's commits. 18 | 19 | # GitHub Issues 20 | List the GitHub issues impacted by this PR: 21 | 22 | | GitHub Issue # | 23 | | -------------- | 24 | | | 25 | 26 | # Checklist: 27 | 28 | - [ ] I have performed a self-review of my own code to ensure there are no formatting, vetting, linting, or security issues 29 | - [ ] I have verified that new and existing unit tests pass locally with my changes 30 | - [ ] I have not allowed coverage numbers to degenerate 31 | - [ ] I have maintained at least 90% code coverage 32 | - [ ] I have commented my code, particularly in hard-to-understand areas 33 | - [ ] I have made corresponding changes to the documentation 34 | - [ ] I have added tests that prove my fix is effective or that my feature works 35 | - [ ] Backward compatibility is not broken 36 | 37 | # How Has This Been Tested? 38 | Please describe the tests that you ran to verify your changes. Please also list any relevant details for your test configuration 39 | 40 | - [ ] Test A 41 | - [ ] Test B 42 | -------------------------------------------------------------------------------- /.github/workflows/common-workflows.yaml: -------------------------------------------------------------------------------- 1 | name: Common Workflows 2 | on: # yamllint disable-line rule:truthy 3 | push: 4 | branches: [main] 5 | pull_request: 6 | branches: ["**"] 7 | 8 | jobs: 9 | 10 | # golang static analysis checks 11 | go-static-analysis: 12 | uses: dell/common-github-actions/.github/workflows/go-static-analysis.yaml@main 13 | name: Golang Validation 14 | 15 | common: 16 | name: Quality Checks 17 | uses: dell/common-github-actions/.github/workflows/go-common.yml@main 18 | -------------------------------------------------------------------------------- /.github/workflows/go-version.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 Dell Inc., or its subsidiaries. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Reusable workflow to perform go version update on Golang based projects 10 | name: Go Version Update 11 | 12 | on: # yamllint disable-line rule:truthy 13 | workflow_dispatch: 14 | repository_dispatch: 15 | types: [go-update-workflow] 16 | 17 | jobs: 18 | # go version update 19 | go-version-update: 20 | uses: dell/common-github-actions/.github/workflows/go-version-workflow.yaml@main 21 | name: Go Version Update 22 | secrets: inherit 23 | -------------------------------------------------------------------------------- /.github/workflows/image-version-update.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025 Dell Inc., or its subsidiaries. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Reusable workflow to perform image version update on Golang based projects 10 | name: Image Version Update 11 | 12 | on: # yamllint disable-line rule:truthy 13 | workflow_dispatch: 14 | inputs: 15 | version: 16 | description: "Version to release (major, minor, patch) Ex: minor" 17 | required: true 18 | repository_dispatch: 19 | types: [image-update-workflow] 20 | 21 | jobs: 22 | # image version update 23 | image-version-update: 24 | uses: dell/common-github-actions/.github/workflows/image-version-workflow.yaml@main 25 | with: 26 | version: "${{ github.event.inputs.version || 'minor' }}" 27 | secrets: inherit 28 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Release Karavi-Resiliency 2 | # Invocable as a reusable workflow 3 | # Can be manually triggered 4 | on: # yamllint disable-line rule:truthy 5 | workflow_call: 6 | workflow_dispatch: 7 | inputs: 8 | option: 9 | description: 'Select version to release' 10 | required: true 11 | type: choice 12 | default: 'minor' 13 | options: 14 | - major 15 | - minor 16 | - patch 17 | - n-1/n-2 patch (Provide input in the below box) 18 | version: 19 | description: "Patch version to release. example: 2.1.x (Use this only if n-1/n-2 patch is selected)" 20 | required: false 21 | type: string 22 | repository_dispatch: 23 | types: [auto-release-workflow] 24 | jobs: 25 | process-inputs: 26 | name: Process Inputs 27 | runs-on: ubuntu-latest 28 | outputs: 29 | processedVersion: ${{ steps.set-version.outputs.versionEnv }} 30 | steps: 31 | - name: Process input 32 | id: set-version 33 | shell: bash 34 | run: | 35 | echo "Triggered by: ${{ github.event_name }}" 36 | if [[ "${{ github.event_name }}" == "repository_dispatch" ]]; then 37 | echo "versionEnv=minor" >> $GITHUB_OUTPUT 38 | exit 0 39 | fi 40 | if [[ "${{ github.event.inputs.version }}" != "" && "${{ github.event.inputs.option }}" == "n-1/n-2 patch (Provide input in the below box)" ]]; then 41 | # if both version and option are provided, then version takes precedence i.e. patch release for n-1/n-2 42 | echo "versionEnv=${{ github.event.inputs.version }}" >> $GITHUB_OUTPUT 43 | exit 0 44 | fi 45 | if [[ "${{ github.event.inputs.option }}" != "n-1/n-2 patch (Provide input in the below box)" ]]; then 46 | # if only option is provided, then option takes precedence i.e. minor, major or patch release 47 | echo "versionEnv=${{ github.event.inputs.option }}" >> $GITHUB_OUTPUT 48 | exit 0 49 | fi 50 | # if neither option nor version is provided, then minor release is taken by default (Auto-release) 51 | echo "versionEnv=minor" >> $GITHUB_OUTPUT 52 | csm-release: 53 | needs: [process-inputs] 54 | uses: dell/common-github-actions/.github/workflows/csm-release-driver-module.yaml@main 55 | name: Release CSM Drivers and Modules 56 | with: 57 | version: ${{ needs.process-inputs.outputs.processedVersion }} 58 | images: 'podmon' 59 | secrets: inherit 60 | -------------------------------------------------------------------------------- /.github/workflows/update-libraries-to-commits.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025 Dell Inc., or its subsidiaries. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Reusable workflow to perform updates of Dell client libraries to latest commits 10 | name: Dell Libraries Commit Update 11 | on: # yamllint disable-line rule:truthy 12 | workflow_dispatch: 13 | repository_dispatch: 14 | types: [latest-commits-libraries] 15 | 16 | jobs: 17 | package-update: 18 | uses: dell/common-github-actions/.github/workflows/update-libraries-to-commits.yml@main 19 | name: Dell Libraries Update 20 | secrets: inherit 21 | -------------------------------------------------------------------------------- /.github/workflows/update-libraries.yaml: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025 Dell Inc., or its subsidiaries. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Reusable workflow to perform updates of Dell client libraries 10 | name: Dell Libraries Release Update 11 | on: # yamllint disable-line rule:truthy 12 | workflow_dispatch: 13 | repository_dispatch: 14 | types: [latest-released-libraries] 15 | 16 | jobs: 17 | package-update: 18 | uses: dell/common-github-actions/.github/workflows/update-libraries.yml@main 19 | name: Dell Libraries Update 20 | secrets: inherit 21 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021-2024 Dell Inc., or its subsidiaries. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ARG GOIMAGE 15 | ARG BASEIMAGE 16 | 17 | # Build the module binary 18 | FROM $GOIMAGE as builder 19 | 20 | WORKDIR /workspace 21 | COPY . . 22 | 23 | # Build the binary 24 | RUN GOOS=linux CGO_ENABLED=0 go build -o podmon ./cmd/podmon/ 25 | 26 | # Stage to build the module image 27 | FROM $BASEIMAGE AS final 28 | LABEL vendor="Dell Technologies" \ 29 | maintainer="Dell Technologies" \ 30 | name="csm-resiliency" \ 31 | summary="Dell Container Storage Modules (CSM) for Resiliency" \ 32 | description="Makes Kubernetes applications, including those that utilize persistent storage, more resilient to various failures" \ 33 | release="1.14.0" \ 34 | version="1.13.0" \ 35 | license="Apache-2.0" 36 | 37 | COPY licenses licenses/ 38 | COPY --from=builder /workspace/podmon / 39 | 40 | ENTRYPOINT [ "/podmon" ] 41 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021-2025 Dell Inc., or its subsidiaries. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Includes the following generated file to get semantic version information 15 | 16 | all: clean podman push 17 | 18 | unit-test: 19 | (cd cmd/podmon; make unit-test) 20 | 21 | clean: 22 | go clean ./... 23 | 24 | build: 25 | GOOS=linux CGO_ENABLED=0 go build -o podmon ./cmd/podmon/ 26 | 27 | dev-docker: build 28 | docker build -t podmon -f dev-docker --network host . 29 | 30 | podman: download-csm-common 31 | $(eval include csm-common.mk) 32 | go run core/semver/semver.go -f mk >semver.mk 33 | make -f docker.mk podman DEFAULT_GOIMAGE=$(DEFAULT_GOIMAGE) CSM_BASEIMAGE=$(CSM_BASEIMAGE) 34 | 35 | push: 36 | make -f docker.mk push 37 | 38 | download-csm-common: 39 | curl -O -L https://raw.githubusercontent.com/dell/csm/main/config/csm-common.mk 40 | 41 | .PHONY: actions action-help 42 | actions: ## Run all GitHub Action checks that run on a pull request creation 43 | @echo "Running all GitHub Action checks for pull request events..." 44 | @act -l | grep -v ^Stage | grep pull_request | grep -v image_security_scan | awk '{print $$2}' | while read WF; do \ 45 | echo "Running workflow: $${WF}"; \ 46 | act pull_request --no-cache-server --platform ubuntu-latest=ghcr.io/catthehacker/ubuntu:act-latest --job "$${WF}"; \ 47 | done 48 | 49 | action-help: ## Echo instructions to run one specific workflow locally 50 | @echo "GitHub Workflows can be run locally with the following command:" 51 | @echo "act pull_request --no-cache-server --platform ubuntu-latest=ghcr.io/catthehacker/ubuntu:act-latest --job " 52 | @echo "" 53 | @echo "Where '' is a Job ID returned by the command:" 54 | @echo "act -l" 55 | @echo "" 56 | @echo "NOTE: if act is not installed, it can be downloaded from https://github.com/nektos/act" 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Dell Container Storage Modules (CSM) for Resiliency 18 | 19 | [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg)](https://github.com/dell/csm/blob/main/docs/CODE_OF_CONDUCT.md) 20 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://www.apache.org/licenses/LICENSE-2.0) 21 | [![Podmam Pulls](https://img.shields.io/docker/pulls/dellemc/podmon)](https://hub.docker.com/r/dellemc/podmon) 22 | [![Go version](https://img.shields.io/github/go-mod/go-version/dell/karavi-resiliency)](go.mod) 23 | [![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/dell/karavi-resiliency?include_prereleases&label=latest&style=flat-square)](https://github.com/dell/karavi-resiliency/releases/latest) 24 | [![Releases](https://img.shields.io/badge/Releases-green.svg)](https://github.com/dell/karavi-resiliency/releases) 25 | 26 | CSM for Resiliency is part of the [CSM (Container Storage Modules)](https://github.com/dell/csm) open-source suite of Kubernetes storage enablers for Dell products. CSM for Resiliency is a project designed to make Kubernetes Applications, including those that utilize persistent storage, more resilient to various failures. The first component of CSM for Resiliency is a pod monitor that is specifically designed to protect stateful applications from various failures. It is not a standalone application, but rather is deployed as a _sidecar_ to Dell CSI (Container Storage Interface) drivers, in both the driver's controller pods and the driver's node pods. Deploying CSM for Resiliency as a sidecar allows it to make direct requests to the driver through the Unix domain socket that Kubernetes sidecars use to make CSI requests. 27 | 28 | Some of the methods CSM for Resiliency invokes in the driver are standard CSI methods, such as NodeUnpublishVolume, NodeUnstageVolume, and ControllerUnpublishVolume. CSM for Resiliency also uses proprietary calls that are not part of the standard CSI specification. Currently, there is only one, ValidateVolumeHostConnectivity that returns information on whether a host is connected to the storage system and/or whether any I/O activity has happened in the recent past from a list of specified volumes. This allows CSM for Resiliency to make more accurate determinations about the state of the system and its persistent volumes. 29 | 30 | Accordingly, CSM for Resiliency is adapted to, and qualified with each Dell CSI driver it is to be used with. Different storage systems have different nuances and characteristics that CSM for Resiliency must take into account. 31 | 32 | For documentation, please visit [Container Storage Modules documentation](https://dell.github.io/csm-docs/). 33 | 34 | # Table of Contents 35 | 36 | - [Code of Conduct](https://github.com/dell/csm/blob/main/docs/CODE_OF_CONDUCT.md) 37 | - [Maintainer Guide](https://github.com/dell/csm/blob/main/docs/MAINTAINER_GUIDE.md) 38 | - [Committer Guide](https://github.com/dell/csm/blob/main/docs/COMMITTER_GUIDE.md) 39 | - [Contributing Guide](https://github.com/dell/csm/blob/main/docs/CONTRIBUTING.md) 40 | - [List of Adopters](https://github.com/dell/csm/blob/main/docs/ADOPTERS.md) 41 | - [Dell support](https://www.dell.com/support/incidents-online/en-us/contactus/product/container-storage-modules) 42 | - [Security](https://github.com/dell/csm/blob/main/docs/SECURITY.md) 43 | - [About](#about) 44 | 45 | ## Building CSM for Resiliency 46 | 47 | If you wish to clone and build CSM for Resiliency, a Linux host is required with the following installed: 48 | 49 | | Component | Version | Additional Information | 50 | | --------------- | --------- | ---------------------------------------------------------------------- | 51 | | Podman | v4.4.1+ | [Podman installation](https://podman.io/docs/installation) | 52 | | Buildah | v1.29.1+ | [Buildah installation](https://www.redhat.com/sysadmin/getting-started-buildah) | 53 | | Golang | v1.21+ | [Golang installation](https://go.dev/dl/) | 54 | | git | latest | [Git installation](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) | 55 | 56 | Once all prerequisites are on the Linux host, follow the steps below to clone, build and deploy CSM for Resiliency: 57 | 58 | 1. Clone the repository: `git clone https://github.com/dell/karavi-resiliency.git` 59 | 2. Define and export the following environment variables to point to your Podman registry: 60 | 61 | ```sh 62 | export REGISTRY_HOST= 63 | export REGISTRY_PORT= 64 | export VERSION= 65 | ``` 66 | 67 | 3. At the root of the source tree, run the following to build and deploy: `make` 68 | 69 | ## Testing CSM for Resiliency 70 | 71 | From the root directory where the repo was cloned, the unit tests can be executed as follows: 72 | 73 | ```sh 74 | make unit-test 75 | ``` 76 | 77 | ## Versioning 78 | 79 | This project is adhering to [Semantic Versioning](https://semver.org/). 80 | 81 | ## About 82 | 83 | Dell Container Storage Modules (CSM) is 100% open source and community-driven. All components are available 84 | under [Apache 2 License](https://www.apache.org/licenses/LICENSE-2.0.html) on 85 | GitHub. 86 | -------------------------------------------------------------------------------- /cmd/podmon/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021-2023 Dell Inc., or its subsidiaries. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Includes the following generated file to get semantic version information 15 | 16 | unit-test: 17 | go test -race -v -coverprofile=c.out ./... 18 | 19 | godog: 20 | go clean -cache 21 | go test -v -coverprofile=c.out -test.run TestMain ./... 22 | -------------------------------------------------------------------------------- /cmd/podmon/main_steps_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2021-2022 Dell Inc. or its subsidiaries. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | package main 15 | 16 | import ( 17 | "context" 18 | "fmt" 19 | "os" 20 | "podmon/internal/csiapi" 21 | "podmon/internal/k8sapi" 22 | "podmon/internal/mocks" 23 | "podmon/internal/monitor" 24 | "strings" 25 | "sync" 26 | "time" 27 | 28 | "github.com/cucumber/godog" 29 | "github.com/dell/gofsutil" 30 | logtest "github.com/sirupsen/logrus/hooks/test" 31 | "github.com/stretchr/testify/assert" 32 | "google.golang.org/grpc" 33 | "k8s.io/client-go/kubernetes" 34 | ) 35 | 36 | type mainFeature struct { 37 | // Logrus test hook 38 | loghook *logtest.Hook 39 | k8sapiMock *mocks.K8sMock 40 | csiapiMock *mocks.CSIMock 41 | leaderElect *mockLeaderElect 42 | failStartAPIMonitor bool 43 | } 44 | 45 | var ( 46 | saveOriginalArgs sync.Once 47 | originalArgs []string 48 | ) 49 | 50 | func (m *mainFeature) aPodmonInstance() error { 51 | if m.loghook == nil { 52 | m.loghook = logtest.NewGlobal() 53 | } else { 54 | fmt.Printf("loghook last-entry %+v\n", m.loghook.LastEntry()) 55 | } 56 | monitor.PodMonitor.CSIExtensionsPresent = false 57 | m.csiapiMock = new(mocks.CSIMock) 58 | m.k8sapiMock = new(mocks.K8sMock) 59 | GetCSIClient = m.mockGetCSIClient 60 | K8sAPI = m.k8sapiMock 61 | m.leaderElect = &mockLeaderElect{} 62 | LeaderElection = m.mockLeaderElection 63 | StartAPIMonitorFn = m.mockStartAPIMonitor 64 | StartPodMonitorFn = m.mockStartPodMonitor 65 | StartNodeMonitorFn = m.mockStartNodeMonitor 66 | monitor.K8sAPI = m.k8sapiMock 67 | gofsutil.UseMockFS() 68 | PodMonWait = m.mockPodMonWait 69 | saveOriginalArgs.Do(func() { 70 | originalArgs = os.Args 71 | }) 72 | 73 | return nil 74 | } 75 | 76 | type mockLeaderElect struct { 77 | failLeaderElection bool 78 | } 79 | 80 | func (le *mockLeaderElect) Run() error { 81 | if le.failLeaderElection { 82 | return fmt.Errorf("induced leaderElection failure") 83 | } 84 | return nil 85 | } 86 | 87 | func (le *mockLeaderElect) WithNamespace(_ string) { 88 | } 89 | 90 | func (m *mainFeature) mockGetCSIClient(_ string, _ ...grpc.DialOption) (csiapi.CSIApi, error) { 91 | return m.csiapiMock, nil 92 | } 93 | 94 | func (m *mainFeature) mockStartPodMonitor(_ k8sapi.K8sAPI, _ kubernetes.Interface, _, _ string, _ time.Duration) { 95 | } 96 | 97 | func (m *mainFeature) mockStartNodeMonitor(_ k8sapi.K8sAPI, _ kubernetes.Interface, _, _ string, _ time.Duration) { 98 | } 99 | 100 | func (m *mainFeature) mockStartAPIMonitor(_ k8sapi.K8sAPI, _, _, _ time.Duration, _ func(interval time.Duration) bool) error { 101 | if m.failStartAPIMonitor { 102 | return fmt.Errorf("induced StorageAPIMonitor failure") 103 | } 104 | return nil 105 | } 106 | 107 | func (m *mainFeature) mockPodMonWait() bool { 108 | return true 109 | } 110 | 111 | func (m *mainFeature) mockLeaderElection(_ func(ctx context.Context)) leaderElection { 112 | return m.leaderElect 113 | } 114 | 115 | func (m *mainFeature) podmonEnvVarsSetTo(k8sSvc, k8sSvcPort string) error { 116 | os.Setenv("KUBERNETES_SERVICE_HOST", k8sSvc) 117 | os.Setenv("KUBERNETES_SERVICE_PORT", k8sSvcPort) 118 | return nil 119 | } 120 | 121 | func (m *mainFeature) invokeMainFunction(args string) error { 122 | os.Args = append(originalArgs, strings.Split(args, " ")...) 123 | main() 124 | return nil 125 | } 126 | 127 | func (m *mainFeature) theLastLogMessageContains(errormsg string) error { 128 | lastEntry := m.loghook.LastEntry() 129 | if errormsg == "none" { 130 | if lastEntry != nil && len(lastEntry.Message) > 0 { 131 | return fmt.Errorf("expected no error for test case, but got: %s", lastEntry.Message) 132 | } 133 | return nil 134 | } 135 | if lastEntry == nil { 136 | return fmt.Errorf("expected error message to contain: %s, but last log entry was nil", errormsg) 137 | } else if strings.Contains(lastEntry.Message, errormsg) { 138 | return nil 139 | } 140 | return fmt.Errorf("expected error message to contain: %s, but it was %s", errormsg, lastEntry.Message) 141 | } 142 | 143 | func (m *mainFeature) csiExtensionsPresentIsFalse(expectedStr string) error { 144 | expected := strings.ToLower(expectedStr) == "true" 145 | return monitor.AssertExpectedAndActual(assert.Equal, expected, monitor.PodMonitor.CSIExtensionsPresent, 146 | fmt.Sprintf("Expected CSIExtensionsPresent flag to be %s, but was %v", 147 | expectedStr, monitor.PodMonitor.CSIExtensionsPresent)) 148 | } 149 | 150 | func (m *mainFeature) iInduceError(induced string) error { 151 | switch induced { 152 | case "none": 153 | break 154 | case "Connect": 155 | m.k8sapiMock.InducedErrors.Connect = true 156 | case "DeletePod": 157 | m.k8sapiMock.InducedErrors.DeletePod = true 158 | case "GetPod": 159 | m.k8sapiMock.InducedErrors.GetPod = true 160 | case "GetVolumeAttachments": 161 | m.k8sapiMock.InducedErrors.GetVolumeAttachments = true 162 | case "DeleteVolumeAttachment": 163 | m.k8sapiMock.InducedErrors.DeleteVolumeAttachment = true 164 | case "GetPersistentVolumeClaimsInNamespace": 165 | m.k8sapiMock.InducedErrors.GetPersistentVolumeClaimsInNamespace = true 166 | case "GetPersistentVolumeClaimsInPod": 167 | m.k8sapiMock.InducedErrors.GetPersistentVolumeClaimsInPod = true 168 | case "GetPersistentVolumesInPod": 169 | m.k8sapiMock.InducedErrors.GetPersistentVolumesInPod = true 170 | case "IsVolumeAttachmentToPod": 171 | m.k8sapiMock.InducedErrors.IsVolumeAttachmentToPod = true 172 | case "GetPersistentVolumeClaimName": 173 | m.k8sapiMock.InducedErrors.GetPersistentVolumeClaimName = true 174 | case "GetPersistentVolume": 175 | m.k8sapiMock.InducedErrors.GetPersistentVolume = true 176 | case "GetPersistentVolumeClaim": 177 | m.k8sapiMock.InducedErrors.GetPersistentVolumeClaim = true 178 | case "GetNode": 179 | m.k8sapiMock.InducedErrors.GetNode = true 180 | case "GetNodeWithTimeout": 181 | m.k8sapiMock.InducedErrors.GetNodeWithTimeout = true 182 | case "GetVolumeHandleFromVA": 183 | m.k8sapiMock.InducedErrors.GetVolumeHandleFromVA = true 184 | case "GetPVNameFromVA": 185 | m.k8sapiMock.InducedErrors.GetPVNameFromVA = true 186 | case "ControllerUnpublishVolume": 187 | m.csiapiMock.InducedErrors.ControllerUnpublishVolume = true 188 | case "NodeUnpublishVolume": 189 | m.csiapiMock.InducedErrors.NodeUnpublishVolume = true 190 | case "NodeUnstageVolume": 191 | m.csiapiMock.InducedErrors.NodeUnstageVolume = true 192 | case "ValidateVolumeHostConnectivity": 193 | m.csiapiMock.InducedErrors.ValidateVolumeHostConnectivity = true 194 | case "NodeConnected": 195 | m.csiapiMock.ValidateVolumeHostConnectivityResponse.Connected = true 196 | case "NodeNotConnected": 197 | m.csiapiMock.ValidateVolumeHostConnectivityResponse.Connected = false 198 | case "Unmount": 199 | gofsutil.GOFSMock.InduceUnmountError = true 200 | case "LeaderElection": 201 | m.leaderElect.failLeaderElection = true 202 | case "StartAPIMonitor": 203 | m.failStartAPIMonitor = true 204 | case "CSIClientClose": 205 | m.csiapiMock.InducedErrors.Close = true 206 | default: 207 | return fmt.Errorf("unknown induced error: %s", induced) 208 | } 209 | return nil 210 | } 211 | 212 | func ScenarioInit(context *godog.ScenarioContext) { 213 | m := &mainFeature{} 214 | context.Step(`^a podmon instance$`, m.aPodmonInstance) 215 | context.Step(`^Podmon env vars set to "([^"]*)":"([^"]*)"$`, m.podmonEnvVarsSetTo) 216 | context.Step(`^I invoke main with arguments "([^"]*)"$`, m.invokeMainFunction) 217 | context.Step(`^the last log message contains "([^"]*)"$`, m.theLastLogMessageContains) 218 | context.Step(`^I induce error "([^"]*)"$`, m.iInduceError) 219 | context.Step(`^CSIExtensionsPresent is "([^"]*)"`, m.csiExtensionsPresentIsFalse) 220 | } 221 | -------------------------------------------------------------------------------- /cmd/podmon/main_test.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2021-2022 Dell Inc. or its subsidiaries. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // Unless required by applicable law or agreed to in writing, software 8 | // distributed under the License is distributed on an "AS IS" BASIS, 9 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | // See the License for the specific language governing permissions and 11 | // limitations under the License. 12 | 13 | package main 14 | 15 | import ( 16 | "fmt" 17 | "os" 18 | "testing" 19 | 20 | "github.com/cucumber/godog" 21 | log "github.com/sirupsen/logrus" 22 | ) 23 | 24 | func TestMain(m *testing.M) { 25 | status := 0 26 | if st := m.Run(); st > status { 27 | status = st 28 | } 29 | fmt.Printf("status %d\n", status) 30 | os.Exit(status) 31 | } 32 | 33 | func TestMainFunc(t *testing.T) { 34 | log.Printf("Starting main-func test") 35 | godogOptions := godog.Options{ 36 | Format: "pretty,junit:main-func-junit-report.xml", 37 | Paths: []string{"features"}, 38 | } 39 | status := godog.TestSuite{ 40 | Name: "main-func", 41 | ScenarioInitializer: ScenarioInit, 42 | Options: &godogOptions, 43 | }.Run() 44 | if status != 0 { 45 | t.Error("There were failed main-func tests") 46 | } 47 | log.Printf("Main-func test finished") 48 | } 49 | -------------------------------------------------------------------------------- /cmd/podmon/resources/driver-config-params-bad-format1.yaml: -------------------------------------------------------------------------------- 1 | PODMON_CONTROLLER_LOG_LEVEL: "debug" 2 | PODMON_CONTROLLER_LOG_FORMAT: "bogus" 3 | PODMON_NODE_LOG_LEVEL: "debug" 4 | PODMON_NODE_LOG_FORMAT: "TEXT" 5 | -------------------------------------------------------------------------------- /cmd/podmon/resources/driver-config-params-bad-format2.yaml: -------------------------------------------------------------------------------- 1 | PODMON_CONTROLLER_LOG_LEVEL: "debug" 2 | PODMON_CONTROLLER_LOG_FORMAT: "TEXT" 3 | PODMON_NODE_LOG_LEVEL: "debug" 4 | PODMON_NODE_LOG_FORMAT: "bogus" 5 | -------------------------------------------------------------------------------- /cmd/podmon/resources/driver-config-params-bad-level1.yaml: -------------------------------------------------------------------------------- 1 | PODMON_CONTROLLER_LOG_LEVEL: "bogus" 2 | PODMON_CONTROLLER_LOG_FORMAT: "TEXT" 3 | PODMON_NODE_LOG_LEVEL: "debug" 4 | PODMON_NODE_LOG_FORMAT: "TEXT" 5 | -------------------------------------------------------------------------------- /cmd/podmon/resources/driver-config-params-bad-level2.yaml: -------------------------------------------------------------------------------- 1 | PODMON_CONTROLLER_LOG_LEVEL: "debug" 2 | PODMON_CONTROLLER_LOG_FORMAT: "TEXT" 3 | PODMON_NODE_LOG_LEVEL: "bogus" 4 | PODMON_NODE_LOG_FORMAT: "TEXT" 5 | -------------------------------------------------------------------------------- /cmd/podmon/resources/driver-config-params-bad-value1.yaml: -------------------------------------------------------------------------------- 1 | PODMON_CONTROLLER_LOG_LEVEL: "debug" 2 | PODMON_CONTROLLER_LOG_FORMAT: "TEXT" 3 | PODMON_NODE_LOG_LEVEL: "debug" 4 | PODMON_NODE_LOG_FORMAT: "TEXT" 5 | PODMON_ARRAY_CONNECTIVITY_POLL_RATE: 15 6 | PODMON_ARRAY_CONNECTIVITY_CONNECTION_LOSS_THRESHOLD: 5.323 7 | PODMON_SKIP_ARRAY_CONNECTION_VALIDATION: true 8 | -------------------------------------------------------------------------------- /cmd/podmon/resources/driver-config-params-bad-value2.yaml: -------------------------------------------------------------------------------- 1 | PODMON_CONTROLLER_LOG_LEVEL: "debug" 2 | PODMON_CONTROLLER_LOG_FORMAT: "TEXT" 3 | PODMON_NODE_LOG_LEVEL: "debug" 4 | PODMON_NODE_LOG_FORMAT: "TEXT" 5 | PODMON_ARRAY_CONNECTIVITY_POLL_RATE: 15.012 6 | PODMON_ARRAY_CONNECTIVITY_CONNECTION_LOSS_THRESHOLD: 5 7 | PODMON_SKIP_ARRAY_CONNECTION_VALIDATION: true 8 | -------------------------------------------------------------------------------- /cmd/podmon/resources/driver-config-params-bad-value3.yaml: -------------------------------------------------------------------------------- 1 | PODMON_CONTROLLER_LOG_LEVEL: "debug" 2 | PODMON_CONTROLLER_LOG_FORMAT: "TEXT" 3 | PODMON_NODE_LOG_LEVEL: "debug" 4 | PODMON_NODE_LOG_FORMAT: "TEXT" 5 | PODMON_ARRAY_CONNECTIVITY_POLL_RATE: 15 6 | PODMON_ARRAY_CONNECTIVITY_CONNECTION_LOSS_THRESHOLD: 5 7 | PODMON_SKIP_ARRAY_CONNECTION_VALIDATION: not-a-boolean 8 | -------------------------------------------------------------------------------- /cmd/podmon/resources/driver-config-params-bad-value4.yaml: -------------------------------------------------------------------------------- 1 | PODMON_CONTROLLER_LOG_LEVEL: "debug" 2 | PODMON_CONTROLLER_LOG_FORMAT: "TEXT" 3 | PODMON_NODE_LOG_LEVEL: "debug" 4 | PODMON_NODE_LOG_FORMAT: "TEXT" 5 | PODMON_ARRAY_CONNECTIVITY_POLL_RATE: -15 6 | PODMON_ARRAY_CONNECTIVITY_CONNECTION_LOSS_THRESHOLD: 5 7 | PODMON_SKIP_ARRAY_CONNECTION_VALIDATION: false 8 | -------------------------------------------------------------------------------- /cmd/podmon/resources/driver-config-params-bad-value5.yaml: -------------------------------------------------------------------------------- 1 | PODMON_CONTROLLER_LOG_LEVEL: "debug" 2 | PODMON_CONTROLLER_LOG_FORMAT: "TEXT" 3 | PODMON_NODE_LOG_LEVEL: "debug" 4 | PODMON_NODE_LOG_FORMAT: "TEXT" 5 | PODMON_ARRAY_CONNECTIVITY_POLL_RATE: 15 6 | PODMON_ARRAY_CONNECTIVITY_CONNECTION_LOSS_THRESHOLD: 0 7 | PODMON_SKIP_ARRAY_CONNECTION_VALIDATION: false 8 | -------------------------------------------------------------------------------- /cmd/podmon/resources/driver-config-params.yaml: -------------------------------------------------------------------------------- 1 | PODMON_CONTROLLER_LOG_LEVEL: "debug" 2 | PODMON_CONTROLLER_LOG_FORMAT: "TEXT" 3 | PODMON_NODE_LOG_LEVEL: "debug" 4 | PODMON_NODE_LOG_FORMAT: "TEXT" 5 | PODMON_ARRAY_CONNECTIVITY_POLL_RATE: 15 6 | PODMON_ARRAY_CONNECTIVITY_CONNECTION_LOSS_THRESHOLD: 5 7 | PODMON_SKIP_ARRAY_CONNECTION_VALIDATION: true 8 | -------------------------------------------------------------------------------- /cmd/podmon/resources/driver-config-params2.yaml: -------------------------------------------------------------------------------- 1 | PODMON_CONTROLLER_LOG_LEVEL: "debug" 2 | PODMON_CONTROLLER_LOG_FORMAT: "json" 3 | PODMON_NODE_LOG_LEVEL: "debug" 4 | PODMON_NODE_LOG_FORMAT: "json" 5 | PODMON_ARRAY_CONNECTIVITY_POLL_RATE: 15 6 | PODMON_ARRAY_CONNECTIVITY_CONNECTION_LOSS_THRESHOLD: 5 7 | PODMON_SKIP_ARRAY_CONNECTION_VALIDATION: true 8 | -------------------------------------------------------------------------------- /core/.gitignore: -------------------------------------------------------------------------------- 1 | core_generated.go 2 | -------------------------------------------------------------------------------- /core/core.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2025 Dell Inc. or its subsidiaries. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // http://www.apache.org/licenses/LICENSE-2.0 7 | // Unless required by applicable law or agreed to in writing, software 8 | // distributed under the License is distributed on an "AS IS" BASIS, 9 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | // See the License for the specific language governing permissions and 11 | // limitations under the License. 12 | 13 | //go:generate go run semver/semver.go -f semver.tpl -o core_generated.go 14 | 15 | package core 16 | 17 | import "time" 18 | 19 | var ( 20 | // SemVer is the semantic version. 21 | SemVer = "unknown" 22 | 23 | // CommitSha7 is the short version of the commit hash from which 24 | // this program was built. 25 | CommitSha7 string 26 | 27 | // CommitSha32 is the long version of the commit hash from which 28 | // this program was built. 29 | CommitSha32 string 30 | 31 | // CommitTime is the commit timestamp of the commit from which 32 | // this program was built. 33 | CommitTime time.Time 34 | ) 35 | -------------------------------------------------------------------------------- /core/semver.tpl: -------------------------------------------------------------------------------- 1 | package core 2 | 3 | import "time" 4 | 5 | func init() { 6 | SemVer = "{{.SemVer}}" 7 | CommitSha7 = "{{.Sha7}}" 8 | CommitSha32 = "{{.Sha32}}" 9 | CommitTime = time.Unix({{.Epoch}}, 0) 10 | } 11 | 12 | -------------------------------------------------------------------------------- /core/semver/semver_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright © 2025 Dell Inc. or its subsidiaries. All Rights Reserved. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | See the License for the specific language governing permissions and 12 | limitations under the License. 13 | */ 14 | 15 | package main 16 | 17 | import ( 18 | "errors" 19 | "fmt" 20 | "io" 21 | "os" 22 | "os/exec" 23 | "strings" 24 | "testing" 25 | 26 | "github.com/stretchr/testify/assert" 27 | ) 28 | 29 | func TestGetStatusError(_ *testing.T) { 30 | exitError := &exec.ExitError{ 31 | ProcessState: &os.ProcessState{}, 32 | } 33 | _, _ = GetStatusError(exitError) 34 | } 35 | 36 | func TestString(t *testing.T) { 37 | s := semver{"", "", "", "", 1, 2, 3, 4, "", "", true, "", "", 64, "", "", "", ""} 38 | assert.NotNil(t, s.String()) 39 | 40 | s = semver{"", "", "", "", 1, 2, 3, 4, "abc", "", true, "", "", 64, "", "", "", ""} 41 | assert.NotNil(t, s.String()) 42 | } 43 | 44 | func TestGetExitError(t *testing.T) { 45 | err := errors.New("error") 46 | _, ok := GetExitError(err) 47 | assert.False(t, ok) 48 | } 49 | 50 | func TestMainFunction(t *testing.T) { 51 | tests := []struct { 52 | name string 53 | format string 54 | outputFile string 55 | expectEmptyFile bool 56 | readFileFunc func(file string) ([]byte, error) 57 | }{ 58 | { 59 | name: "Write mk format to file", 60 | format: "mk", 61 | outputFile: "test_output.mk", 62 | }, 63 | { 64 | name: "Write env format to file", 65 | format: "env", 66 | outputFile: "test_output.env", 67 | }, 68 | { 69 | name: "Write json format to file", 70 | format: "json", 71 | outputFile: "test_output.json", 72 | }, 73 | { 74 | name: "Write ver format to file", 75 | format: "ver", 76 | outputFile: "test_output.ver", 77 | }, 78 | { 79 | name: "Write rpm format to file", 80 | format: "rpm", 81 | outputFile: "test_output.rpm", 82 | }, 83 | { 84 | name: "Write tpl format to file", 85 | format: "../semver.tpl", 86 | outputFile: "test_output.rpm", 87 | }, 88 | { 89 | name: "Write tpl format to file but error reading source file", 90 | format: "../semver.tpl", 91 | outputFile: "test_output.rpm", 92 | readFileFunc: func(_ string) ([]byte, error) { 93 | return nil, errors.New("error reading source file") 94 | }, 95 | expectEmptyFile: true, 96 | }, 97 | { 98 | // go format currently does not print any output, expect an empty file 99 | name: "Write go format to file", 100 | format: "go", 101 | outputFile: "test_output.go", 102 | expectEmptyFile: true, 103 | }, 104 | } 105 | 106 | for _, tt := range tests { 107 | t.Run(tt.name, func(t *testing.T) { 108 | osArgs := os.Args 109 | os.Args = append(os.Args, "-f", tt.format) 110 | os.Args = append(os.Args, "-o", tt.outputFile) 111 | os.Args = append(os.Args, "-x", "true") 112 | 113 | oldReadFile := ReadFile 114 | if tt.readFileFunc != nil { 115 | ReadFile = tt.readFileFunc 116 | } 117 | oldOSExit := OSExit 118 | OSExit = func(_ int) {} 119 | 120 | oldDoExec := doExec 121 | doExec = func(_ string, _ ...string) ([]byte, error) { 122 | return []byte("v2.13.0-77-g38b3a19-dirty"), nil 123 | } 124 | 125 | main() 126 | 127 | // Open the file 128 | file, err := os.Open(tt.outputFile) 129 | if err != nil { 130 | t.Error(err) 131 | } 132 | defer file.Close() 133 | 134 | // Read the file contents 135 | contents, err := io.ReadAll(file) 136 | if err != nil { 137 | t.Error(err) 138 | } 139 | 140 | defer os.Remove(tt.outputFile) 141 | 142 | // make sure file is not empty 143 | if tt.expectEmptyFile { 144 | assert.Equal(t, 0, len(contents)) 145 | } else { 146 | assert.NotEqual(t, 0, len(contents)) 147 | } 148 | os.Args = osArgs 149 | ReadFile = oldReadFile 150 | OSExit = oldOSExit 151 | doExec = oldDoExec 152 | }) 153 | } 154 | } 155 | 156 | func TestChkErr(t *testing.T) { 157 | tests := []struct { 158 | name string 159 | out []byte 160 | err error 161 | wantOut string 162 | wantErr bool 163 | getExitError func(err error) (*exec.ExitError, bool) 164 | getStatusError func(exitError *exec.ExitError) (int, bool) 165 | }{ 166 | { 167 | name: "No error", 168 | out: []byte("output"), 169 | err: nil, 170 | wantOut: "output", 171 | wantErr: false, 172 | getExitError: func(_ error) (*exec.ExitError, bool) { 173 | return nil, true 174 | }, 175 | getStatusError: func(_ *exec.ExitError) (int, bool) { 176 | return 0, true 177 | }, 178 | }, 179 | { 180 | name: "Error with command", 181 | out: []byte("output"), 182 | err: errors.New("error"), 183 | wantOut: "", 184 | wantErr: true, 185 | getExitError: func(_ error) (*exec.ExitError, bool) { 186 | return nil, false 187 | }, 188 | getStatusError: func(_ *exec.ExitError) (int, bool) { 189 | return 1, false 190 | }, 191 | }, 192 | { 193 | name: "Error casting to ExitError", 194 | out: []byte("output"), 195 | err: errors.New("error"), 196 | wantOut: "", 197 | wantErr: true, 198 | getExitError: func(_ error) (*exec.ExitError, bool) { 199 | return nil, true 200 | }, 201 | getStatusError: func(_ *exec.ExitError) (int, bool) { 202 | return 1, false 203 | }, 204 | }, 205 | { 206 | name: "Error getting status from ExitError", 207 | out: []byte("output"), 208 | err: errors.New("error"), 209 | wantOut: "", 210 | wantErr: true, 211 | getExitError: func(_ error) (*exec.ExitError, bool) { 212 | return nil, false 213 | }, 214 | getStatusError: func(_ *exec.ExitError) (int, bool) { 215 | return 0, true 216 | }, 217 | }, 218 | } 219 | 220 | for _, tt := range tests { 221 | t.Run(tt.name, func(t *testing.T) { 222 | GetExitError = tt.getExitError 223 | GetStatusError = tt.getStatusError 224 | OSExit = func(_ int) {} 225 | 226 | gotOut := chkErr(tt.out, tt.err) 227 | if gotOut != tt.wantOut { 228 | t.Errorf("chkErr() gotOut = %v, want %v", gotOut, tt.wantOut) 229 | } 230 | }) 231 | } 232 | } 233 | 234 | func TestFileExists(t *testing.T) { 235 | tests := []struct { 236 | name string 237 | filePath string 238 | want bool 239 | }{ 240 | { 241 | name: "File exists", 242 | filePath: "semver.go", 243 | want: true, 244 | }, 245 | { 246 | name: "File does not exist", 247 | filePath: "non-existent.txt", 248 | want: false, 249 | }, 250 | { 251 | name: "File path is empty", 252 | filePath: "", 253 | want: false, 254 | }, 255 | } 256 | 257 | for _, tt := range tests { 258 | t.Run(tt.name, func(t *testing.T) { 259 | got := fileExists(tt.filePath) 260 | if got != tt.want { 261 | t.Errorf("fileExists(%s) = %v, want %v", tt.filePath, got, tt.want) 262 | } 263 | }) 264 | } 265 | } 266 | 267 | func TestErrorExit(t *testing.T) { 268 | message := "error message" 269 | 270 | if os.Getenv("INVOKE_ERROR_EXIT") == "1" { 271 | errorExit(message) 272 | return 273 | } 274 | // call the test again with INVOKE_ERROR_EXIT=1 so the errorExit function is invoked and we can check the return code 275 | cmd := exec.Command(os.Args[0], "-test.run=TestErrorExit") // #nosec G204 276 | cmd.Env = append(os.Environ(), "INVOKE_ERROR_EXIT=1") 277 | 278 | stderr, err := cmd.StderrPipe() 279 | if err != nil { 280 | fmt.Println("Error creating stderr pipe:", err) 281 | return 282 | } 283 | 284 | if err := cmd.Start(); err != nil { 285 | t.Error(err) 286 | } 287 | 288 | buf := make([]byte, 1024) 289 | n, err := stderr.Read(buf) 290 | if err != nil { 291 | t.Error(err) 292 | } 293 | 294 | err = cmd.Wait() 295 | if e, ok := err.(*exec.ExitError); ok && e.Success() { 296 | t.Error(err) 297 | } 298 | 299 | // Trim the warning message from the actual output 300 | actualMessage := string(buf[:n]) 301 | if idx := strings.Index(actualMessage, "warning: GOCOVERDIR not set"); idx != -1 { 302 | actualMessage = actualMessage[:idx] 303 | } 304 | 305 | // check the output is the message we logged in errorExit 306 | assert.Equal(t, message, actualMessage) 307 | } 308 | -------------------------------------------------------------------------------- /dev-docker: -------------------------------------------------------------------------------- 1 | #Use rocky Linux as the base image 2 | FROM rockylinux/rockylinux:8.4 3 | 4 | # Copy the files from the host to the container 5 | COPY "podmon/podmon" . 6 | 7 | RUN yum install -y \ 8 | e2fsprogs \ 9 | which \ 10 | telnet \ 11 | && \ 12 | yum clean all \ 13 | && \ 14 | rm -rf /var/cache/run 15 | 16 | # validate some cli utilities are found 17 | RUN which mkfs.ext4 18 | 19 | # Set the command to run when the container starts 20 | ENTRYPOINT ["/podmon"] 21 | 22 | -------------------------------------------------------------------------------- /docker.mk: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025 Dell Inc., or its subsidiaries. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Includes the following generated file to get semantic version information 15 | include semver.mk 16 | 17 | ifeq ($(VERSION),) 18 | VERSION?="v$(MAJOR).$(MINOR).$(PATCH)" 19 | endif 20 | 21 | REGISTRY?="${REGISTRY_HOST}:${REGISTRY_PORT}/podmon" 22 | 23 | podman: 24 | podman build --pull --no-cache -t "$(REGISTRY):$(VERSION)" --build-arg GOIMAGE=$(DEFAULT_GOIMAGE) --build-arg BASEIMAGE=$(CSM_BASEIMAGE) -f ./Dockerfile --label commit=$(shell git log --max-count 1 --format="%H") . 25 | 26 | push: 27 | podman push "$(REGISTRY):$(VERSION)" 28 | 29 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module podmon 2 | 3 | go 1.24.0 4 | 5 | toolchain go1.24.2 6 | 7 | require ( 8 | github.com/bramvdbogaerde/go-scp v1.5.0 9 | github.com/container-storage-interface/spec v1.6.0 10 | github.com/cucumber/godog v0.15.0 11 | github.com/dell/dell-csi-extensions/podmon v1.8.0 12 | github.com/dell/gofsutil v1.19.0 13 | github.com/fsnotify/fsnotify v1.9.0 14 | github.com/golang/mock v1.6.0 15 | github.com/kubernetes-csi/csi-lib-utils v0.11.0 16 | github.com/sirupsen/logrus v1.9.3 17 | github.com/spf13/viper v1.20.0 18 | github.com/stretchr/testify v1.10.0 19 | golang.org/x/crypto v0.38.0 20 | google.golang.org/grpc v1.72.0 21 | k8s.io/api v0.33.0 22 | k8s.io/apimachinery v0.33.0 23 | k8s.io/client-go v0.33.0 24 | k8s.io/cri-api v0.33.0 25 | ) 26 | 27 | require ( 28 | github.com/cucumber/gherkin/go/v26 v26.2.0 // indirect 29 | github.com/cucumber/messages/go/v21 v21.0.1 // indirect 30 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect 31 | github.com/emicklei/go-restful/v3 v3.12.1 // indirect 32 | github.com/fxamacker/cbor/v2 v2.7.0 // indirect 33 | github.com/go-logr/logr v1.4.2 // indirect 34 | github.com/go-openapi/jsonpointer v0.21.0 // indirect 35 | github.com/go-openapi/jsonreference v0.21.0 // indirect 36 | github.com/go-openapi/swag v0.23.0 // indirect 37 | github.com/go-viper/mapstructure/v2 v2.2.1 // indirect 38 | github.com/gofrs/uuid v4.4.0+incompatible // indirect 39 | github.com/gogo/protobuf v1.3.2 // indirect 40 | github.com/golang/protobuf v1.5.4 // indirect 41 | github.com/google/gnostic-models v0.6.9 // indirect 42 | github.com/google/go-cmp v0.7.0 // indirect 43 | github.com/google/uuid v1.6.0 // indirect 44 | github.com/hashicorp/go-immutable-radix v1.3.1 // indirect 45 | github.com/hashicorp/go-memdb v1.3.4 // indirect 46 | github.com/hashicorp/golang-lru v1.0.2 // indirect 47 | github.com/josharian/intern v1.0.0 // indirect 48 | github.com/json-iterator/go v1.1.12 // indirect 49 | github.com/mailru/easyjson v0.9.0 // indirect 50 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 51 | github.com/modern-go/reflect2 v1.0.2 // indirect 52 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 53 | github.com/pelletier/go-toml/v2 v2.2.3 // indirect 54 | github.com/pkg/errors v0.9.1 // indirect 55 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect 56 | github.com/sagikazarmark/locafero v0.7.0 // indirect 57 | github.com/sourcegraph/conc v0.3.0 // indirect 58 | github.com/spf13/afero v1.12.0 // indirect 59 | github.com/spf13/cast v1.7.1 // indirect 60 | github.com/spf13/pflag v1.0.6 // indirect 61 | github.com/subosito/gotenv v1.6.0 // indirect 62 | github.com/x448/float16 v0.8.4 // indirect 63 | go.uber.org/multierr v1.11.0 // indirect 64 | golang.org/x/net v0.38.0 // indirect 65 | golang.org/x/oauth2 v0.27.0 // indirect 66 | golang.org/x/sys v0.33.0 // indirect 67 | golang.org/x/term v0.32.0 // indirect 68 | golang.org/x/text v0.25.0 // indirect 69 | golang.org/x/time v0.9.0 // indirect 70 | google.golang.org/genproto/googleapis/rpc v0.0.0-20250218202821-56aae31c358a // indirect 71 | google.golang.org/protobuf v1.36.5 // indirect 72 | gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect 73 | gopkg.in/inf.v0 v0.9.1 // indirect 74 | gopkg.in/yaml.v3 v3.0.1 // indirect 75 | k8s.io/klog/v2 v2.130.1 // indirect 76 | k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect 77 | k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect 78 | sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect 79 | sigs.k8s.io/randfill v1.0.0 // indirect 80 | sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect 81 | sigs.k8s.io/yaml v1.4.0 // indirect 82 | ) 83 | -------------------------------------------------------------------------------- /internal/criapi/criapi.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2022 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package criapi 18 | 19 | import ( 20 | "context" 21 | "errors" 22 | "fmt" 23 | "os" 24 | "time" 25 | 26 | log "github.com/sirupsen/logrus" 27 | "google.golang.org/grpc" 28 | v1 "k8s.io/cri-api/pkg/apis/runtime/v1" 29 | ) 30 | 31 | // Client represents the client grpc connection to the ContainerRuntimerInterface 32 | type Client struct { 33 | CRIConn *grpc.ClientConn // A grpc client connection to CRI 34 | RuntimeServiceClient v1.RuntimeServiceClient // A RuntimeService climent 35 | } 36 | 37 | // CRIClient is an intstance of the Client for the CRI connection 38 | var CRIClient Client 39 | 40 | // CRIClientDialRetry is the amount of time to wait before retrying 41 | var CRIClientDialRetry = 30 * time.Second 42 | 43 | // CRIMaxConnectionRetry is the maximum number of connection retries. 44 | var CRIMaxConnectionRetry = 3 45 | 46 | // CRINewClientTimeout is the timeout for making a new client. 47 | var CRINewClientTimeout = 90 * time.Second 48 | 49 | var getGrpcDialContext = func(ctx context.Context, target string, opts ...grpc.DialOption) (conn *grpc.ClientConn, err error) { 50 | return grpc.DialContext(ctx, target, opts...) 51 | } 52 | 53 | // NewCRIClient returns a new client connection to the ContainerRuntimeInterface or an error 54 | func NewCRIClient(criSock string, _ ...grpc.DialOption) (*Client, error) { 55 | var err error 56 | ctx, cancel := context.WithTimeout(context.Background(), CRINewClientTimeout) 57 | defer cancel() 58 | for i := 0; i < CRIMaxConnectionRetry; i++ { 59 | CRIClient.CRIConn, err = getGrpcDialContext(ctx, criSock, grpc.WithInsecure()) 60 | if err != nil || CRIClient.CRIConn == nil { 61 | var errMsg string 62 | if err == nil { 63 | errMsg = "No error returned, but CRIClient.CRIConn is nil" 64 | } else { 65 | errMsg = err.Error() 66 | } 67 | log.Errorf("Waiting on connection to CRI socket: %s: %s", criSock, errMsg) 68 | time.Sleep(CRIClientDialRetry) 69 | } else { 70 | log.Infof("Connected to CRI: %s", criSock) 71 | CRIClient.RuntimeServiceClient = v1.NewRuntimeServiceClient(CRIClient.CRIConn) 72 | return &CRIClient, nil 73 | } 74 | } 75 | return &CRIClient, err 76 | } 77 | 78 | // Connected returns true if the CRI connection is up. 79 | func (cri *Client) Connected() bool { 80 | return cri.CRIConn != nil 81 | } 82 | 83 | // Close closes the connection to the CRI. 84 | func (cri *Client) Close() error { 85 | if cri.Connected() { 86 | if err := cri.CRIConn.Close(); err != nil { 87 | return err 88 | } 89 | cri.CRIConn = nil 90 | return nil 91 | } 92 | return nil 93 | } 94 | 95 | // ListContainers lists all the containers in the Container Runtime. 96 | func (cri *Client) ListContainers(ctx context.Context, req *v1.ListContainersRequest) (*v1.ListContainersResponse, error) { 97 | return CRIClient.RuntimeServiceClient.ListContainers(ctx, req) 98 | } 99 | 100 | var knownPaths = [3]string{"/var/run/dockershim.sock", "/run/containerd/containerd.sock", "/run/crio/crio.sock"} 101 | 102 | // ChooseCRIPath chooses an appropriate unix domain socket path to the CRI interface. 103 | // This is done according to the ordering described for the crictl command. 104 | var osStat = os.Stat 105 | 106 | func (cri *Client) ChooseCRIPath() (string, error) { 107 | for _, path := range knownPaths { 108 | _, err := osStat(path) 109 | if err == nil { 110 | retval := fmt.Sprintf("unix:///%s", path) 111 | return retval, nil 112 | } 113 | } 114 | return "", errors.New("Could not find path for CRI runtime from knownPaths") 115 | } 116 | 117 | // GetContainerInfo gets current status of all the containers on this server using CRI interface. 118 | // The result is a map of ID to a structure containing the ID, Name, and State. 119 | func (cri *Client) GetContainerInfo(_ context.Context) (map[string]*ContainerInfo, error) { 120 | result := make(map[string]*ContainerInfo) 121 | 122 | path, err := cri.ChooseCRIPath() 123 | if err != nil { 124 | return result, err 125 | } 126 | client, err := NewCRIClient(path) 127 | if err != nil { 128 | return result, err 129 | } 130 | req := &v1.ListContainersRequest{} 131 | rep, err := client.ListContainers(context.Background(), req) 132 | if err != nil { 133 | return result, err 134 | } 135 | for _, cont := range rep.Containers { 136 | info := &ContainerInfo{ 137 | ID: cont.Id, 138 | Name: cont.Metadata.Name, 139 | State: cont.State, 140 | } 141 | result[cont.Id] = info 142 | } 143 | err = client.Close() 144 | if err != nil { 145 | log.Infof("close error: %s", err) 146 | } 147 | return result, nil 148 | } 149 | -------------------------------------------------------------------------------- /internal/criapi/interface.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2023 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package criapi 18 | 19 | import ( 20 | "context" 21 | 22 | v1 "k8s.io/cri-api/pkg/apis/runtime/v1" 23 | ) 24 | 25 | // ContainerInfo is the information obtained for each container: 26 | // 27 | // ID is the ContainerID that will match the ID in the Pod's container list. 28 | // Name is the name of the container. 29 | // State is the ContainerState. 30 | type ContainerInfo struct { 31 | ID string 32 | Name string 33 | State v1.ContainerState 34 | } 35 | 36 | // CRIAPI is an interface for retrieving information about containers using the Container Runtime Interface 37 | // that crictl uses. 38 | type CRIAPI interface { 39 | Connected() bool 40 | Close() error 41 | ListContainers(ctx context.Context, req *v1.ListContainersRequest) (*v1.ListContainersResponse, error) 42 | GetContainerInfo(ctx context.Context) (map[string]*ContainerInfo, error) 43 | } 44 | -------------------------------------------------------------------------------- /internal/csiapi/csi.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2023 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package csiapi 18 | 19 | import ( 20 | "context" 21 | "time" 22 | 23 | csi "github.com/container-storage-interface/spec/lib/go/csi" 24 | csiext "github.com/dell/dell-csi-extensions/podmon" 25 | log "github.com/sirupsen/logrus" 26 | "google.golang.org/grpc" 27 | ) 28 | 29 | // Client holds clients related to CSI access 30 | type Client struct { 31 | DriverConn *grpc.ClientConn // A grpc client connection to the driver 32 | PodmonClient csiext.PodmonClient // A grpc CSIPodmonClient 33 | ControllerClient csi.ControllerClient // A grpc CSI ControllerClient 34 | NodeClient csi.NodeClient // A grpc CSI NodeClient 35 | } 36 | 37 | // CSIClient is reference to CSI Client 38 | var CSIClient Client 39 | 40 | // CSIClientDialRetry is timeout after failure to connect to the CSI Driver 41 | var CSIClientDialRetry = 30 * time.Second 42 | 43 | var getGrpcDialContext = func(ctx context.Context, target string, opts ...grpc.DialOption) (conn *grpc.ClientConn, err error) { 44 | return grpc.DialContext(ctx, target, opts...) 45 | } 46 | 47 | // NewCSIClient returns a new CSIApi interface 48 | func NewCSIClient(csiSock string, clientOpts ...grpc.DialOption) (CSIApi, error) { 49 | var err error 50 | for { 51 | // Wait on the driver. It will not open its unix socket until it has become leader. 52 | CSIClient.DriverConn, err = getGrpcDialContext(context.Background(), csiSock, clientOpts...) 53 | log.Debugf("grpc.Dial returned %v %v", CSIClient.DriverConn, err) 54 | if err != nil || CSIClient.DriverConn == nil { 55 | var errMsg string 56 | if err == nil { 57 | errMsg = "No error returned, but CSIClient.DriverConn is nil" 58 | } else { 59 | errMsg = err.Error() 60 | } 61 | log.Errorf("Waiting on connection to driver csi.sock: %s", errMsg) 62 | time.Sleep(CSIClientDialRetry) 63 | } else { 64 | break 65 | } 66 | } 67 | log.Infof("Connected to driver: %s", csiSock) 68 | CSIClient.PodmonClient = csiext.NewPodmonClient(CSIClient.DriverConn) 69 | CSIClient.ControllerClient = csi.NewControllerClient(CSIClient.DriverConn) 70 | CSIClient.NodeClient = csi.NewNodeClient(CSIClient.DriverConn) 71 | return &CSIClient, nil 72 | } 73 | 74 | // Connected returns true if there is non-nil driver connection 75 | func (csi *Client) Connected() bool { 76 | return csi.DriverConn != nil 77 | } 78 | 79 | // Close will close connections on the driver connection, if it exists 80 | func (csi *Client) Close() error { 81 | if csi.Connected() { 82 | return csi.DriverConn.Close() 83 | } 84 | return nil 85 | } 86 | 87 | // ControllerUnpublishVolume calls the UnpublishVolume in the controller 88 | func (csi *Client) ControllerUnpublishVolume(ctx context.Context, req *csi.ControllerUnpublishVolumeRequest) (*csi.ControllerUnpublishVolumeResponse, error) { 89 | return CSIClient.ControllerClient.ControllerUnpublishVolume(ctx, req) 90 | } 91 | 92 | // NodeUnpublishVolume calls the UnpublishVolume in the node 93 | func (csi *Client) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpublishVolumeRequest) (*csi.NodeUnpublishVolumeResponse, error) { 94 | return CSIClient.NodeClient.NodeUnpublishVolume(ctx, req) 95 | } 96 | 97 | // NodeUnstageVolume calls UnstageVolume in the node 98 | func (csi *Client) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstageVolumeRequest) (*csi.NodeUnstageVolumeResponse, error) { 99 | return CSIClient.NodeClient.NodeUnstageVolume(ctx, req) 100 | } 101 | 102 | // ValidateVolumeHostConnectivity calls the ValidateVolumeHostConnectivity in the podmon client 103 | func (csi *Client) ValidateVolumeHostConnectivity(ctx context.Context, req *csiext.ValidateVolumeHostConnectivityRequest) (*csiext.ValidateVolumeHostConnectivityResponse, error) { 104 | return CSIClient.PodmonClient.ValidateVolumeHostConnectivity(ctx, req) 105 | } 106 | -------------------------------------------------------------------------------- /internal/csiapi/interface.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2023 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package csiapi 18 | 19 | import ( 20 | "context" 21 | 22 | csi "github.com/container-storage-interface/spec/lib/go/csi" 23 | csiext "github.com/dell/dell-csi-extensions/podmon" 24 | ) 25 | 26 | // CSIApi is an interface for CSI driver calls 27 | type CSIApi interface { 28 | // Returns if the podmon is connected to the driver 29 | Connected() bool 30 | Close() error 31 | ControllerUnpublishVolume(context.Context, *csi.ControllerUnpublishVolumeRequest) (*csi.ControllerUnpublishVolumeResponse, error) 32 | NodeUnstageVolume(context.Context, *csi.NodeUnstageVolumeRequest) (*csi.NodeUnstageVolumeResponse, error) 33 | NodeUnpublishVolume(context.Context, *csi.NodeUnpublishVolumeRequest) (*csi.NodeUnpublishVolumeResponse, error) 34 | ValidateVolumeHostConnectivity(context.Context, *csiext.ValidateVolumeHostConnectivityRequest) (*csiext.ValidateVolumeHostConnectivityResponse, error) 35 | } 36 | -------------------------------------------------------------------------------- /internal/k8sapi/interface.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2023 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Package k8sapi provides an interface for access Kubernetes objects. 18 | package k8sapi 19 | 20 | // k8sapi package provides facilities for csi-drivers to call the kubernetes API from their containers. 21 | // This is needed for some special use cases, like inspecting PVs or handling fail-over of pods from node failure. 22 | 23 | import ( 24 | "context" 25 | "time" 26 | 27 | v1 "k8s.io/api/core/v1" 28 | storagev1 "k8s.io/api/storage/v1" 29 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 | "k8s.io/apimachinery/pkg/runtime" 31 | "k8s.io/apimachinery/pkg/types" 32 | "k8s.io/apimachinery/pkg/watch" 33 | "k8s.io/client-go/kubernetes" 34 | ) 35 | 36 | // K8sAPI provides an API interface to kubernetes. 37 | type K8sAPI interface { 38 | // Connect connects to the Kubernetes system API 39 | Connect(kubeconfig *string) error 40 | 41 | // GetClient returns the kubernetes Clientset. 42 | GetClient() *kubernetes.Clientset 43 | 44 | // GetContext returns a context object for a certain timeout duration 45 | GetContext(duration time.Duration) (context.Context, context.CancelFunc) 46 | 47 | // DeletePod deletes a pod of the given namespace and name, an optionally uses force deletion. 48 | DeletePod(ctx context.Context, namespace, name string, podUID types.UID, force bool) error 49 | 50 | // GetPod retrieves a pod of the give namespace and name 51 | GetPod(ctx context.Context, namespace, name string) (*v1.Pod, error) 52 | 53 | // GetCachedVolumeAttachment will try to load the volumeattachment select by the persistent volume name and node name. 54 | // If found it is returned from the cache. If not found, the cache is reloaded and the result returned from the reloaded data. 55 | GetCachedVolumeAttachment(ctx context.Context, pvName, nodeName string) (*storagev1.VolumeAttachment, error) 56 | 57 | // GetVolumeAttachments gets all the volume attachments in the K8S system 58 | GetVolumeAttachments(ctx context.Context) (*storagev1.VolumeAttachmentList, error) 59 | 60 | // DeleteVolumeAttachment deletes a volume attachment by name. 61 | DeleteVolumeAttachment(ctx context.Context, va string) error 62 | 63 | // GetPersistentVolumeClaimsInNamespace returns all the pvcs in a namespace. 64 | GetPersistentVolumeClaimsInNamespace(ctx context.Context, namespace string) (*v1.PersistentVolumeClaimList, error) 65 | 66 | // GetPersistentVolumeClaimsInPod returns all the pvcs in a pod. 67 | GetPersistentVolumeClaimsInPod(ctx context.Context, pod *v1.Pod) ([]*v1.PersistentVolumeClaim, error) 68 | 69 | // GetPersistentVolumesInPod returns all the pvs referenced from a pod. 70 | // Any unbound pvcs are not returned. 71 | GetPersistentVolumesInPod(ctx context.Context, pod *v1.Pod) ([]*v1.PersistentVolume, error) 72 | 73 | // IsVolumeAttachmentToPod returns true if va is attached to the specified pod. 74 | IsVolumeAttachmentToPod(ctx context.Context, va *storagev1.VolumeAttachment, pod *v1.Pod) (bool, error) 75 | 76 | // GetPersistentVolumeClaimName returns the PVC name referenced from PV named as input argument 77 | GetPersistentVolumeClaimName(ctx context.Context, pvName string) (string, error) 78 | 79 | // GetPersistentVolume retrieves a persistent volume given the pv name. 80 | GetPersistentVolume(ctx context.Context, pvName string) (*v1.PersistentVolume, error) 81 | 82 | // GetPersistentVolumeClaim returns the PVC of the given namespace/pvcName. 83 | GetPersistentVolumeClaim(ctx context.Context, namespace, pvcName string) (*v1.PersistentVolumeClaim, error) 84 | 85 | // GetNode returns the node with the specified nodeName. 86 | GetNode(ctx context.Context, nodeName string) (*v1.Node, error) 87 | 88 | // GetNode returns the node with the specified nodeName but using a timeout duration rather than a context. 89 | GetNodeWithTimeout(duration time.Duration, nodeName string) (*v1.Node, error) 90 | 91 | // GetVolumeHandleFromVA returns the volume handle (storage system ID) from the volume attachment. 92 | GetVolumeHandleFromVA(ctx context.Context, va *storagev1.VolumeAttachment) (string, error) 93 | 94 | // GetPVNameFromVA returns the PVCName from a specified volume attachment. 95 | GetPVNameFromVA(va *storagev1.VolumeAttachment) (string, error) 96 | 97 | // SetupPodWatch setups up a pod watch. 98 | SetupPodWatch(ctx context.Context, namespace string, listOptions metav1.ListOptions) (watch.Interface, error) 99 | 100 | // SetupNodeWatch setups up a node watch. 101 | SetupNodeWatch(ctx context.Context, listOptions metav1.ListOptions) (watch.Interface, error) 102 | 103 | // TaintNode applies the specified 'taintKey' string and 'effect' to the node with 'nodeName' 104 | // The 'remove' flag indicates if the taint should be removed from the node, if it exists. 105 | TaintNode(ctx context.Context, nodeName, taintKey string, effect v1.TaintEffect, remove bool) error 106 | 107 | // CreateEvent creates an event on a runtime object. 108 | // sourceComponent is name of component producing event, e.g. "podmon" 109 | // eventType is the type of this event (Normal, Warning) 110 | // reason is why the action was taken. It is human-readable. 111 | // messageFmt and args for a human readable description of the status of this operation 112 | CreateEvent(sourceComponent string, object runtime.Object, eventType, reason, messageFmt string, args ...interface{}) error 113 | } 114 | 115 | const ( 116 | // EventTypeNormal will log a "Normal" event. 117 | EventTypeNormal = "Normal" 118 | // EventTypeWarning will log a "Warning" event. 119 | EventTypeWarning = "Warning" 120 | ) 121 | -------------------------------------------------------------------------------- /internal/mocks/criapi_mock.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2022 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package mocks 18 | 19 | import ( 20 | "context" 21 | "errors" 22 | "podmon/internal/criapi" 23 | 24 | v1 "k8s.io/cri-api/pkg/apis/runtime/v1" 25 | ) 26 | 27 | // MockClient is a mock client supporting the criapi. 28 | type MockClient struct { 29 | InducedErrors struct { 30 | GetContainerInfo bool 31 | } 32 | MockContainerInfos map[string]*criapi.ContainerInfo 33 | } 34 | 35 | // Initialize initializes the MockClient. 36 | func (mock *MockClient) Initialize() { 37 | mock.MockContainerInfos = make(map[string]*criapi.ContainerInfo) 38 | } 39 | 40 | // Connected returns true if connected. 41 | func (mock *MockClient) Connected() bool { 42 | return true 43 | } 44 | 45 | // Close closes the mock client. This is unimplemented for the mock client. 46 | func (mock *MockClient) Close() error { 47 | return errors.New("unimplemented") 48 | } 49 | 50 | // ListContainers would list individual containers but is not implemented for the mock client. 51 | func (mock *MockClient) ListContainers(_ context.Context, _ *v1.ListContainersRequest) (*v1.ListContainersResponse, error) { 52 | return nil, errors.New("unimplemented") 53 | } 54 | 55 | // ChooseCRIPath chooses an appropriate unix domain socket path to the CRI interface. This is unimplemented for the mock client. 56 | func (mock *MockClient) ChooseCRIPath() (string, error) { 57 | return "", errors.New("unimplemented") 58 | } 59 | 60 | // GetContainerInfo gets current status of all the containers on this server using CRI interface. 61 | // The result is a map of ID to a structure containing the ID, Name, and State. 62 | func (mock *MockClient) GetContainerInfo(_ context.Context) (map[string]*criapi.ContainerInfo, error) { 63 | if mock.InducedErrors.GetContainerInfo { 64 | return mock.MockContainerInfos, errors.New("GetContainerInfo induced error") 65 | } 66 | return mock.MockContainerInfos, nil 67 | } 68 | -------------------------------------------------------------------------------- /internal/mocks/csiapi_mock.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2023 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package mocks 18 | 19 | import ( 20 | "context" 21 | "errors" 22 | "fmt" 23 | 24 | "github.com/container-storage-interface/spec/lib/go/csi" 25 | csiext "github.com/dell/dell-csi-extensions/podmon" 26 | ) 27 | 28 | // CSIMock of csiapi.CSIApi 29 | type CSIMock struct { 30 | InducedErrors struct { 31 | NotConnected bool 32 | ControllerUnpublishVolume bool 33 | NodeUnpublishVolume bool 34 | NodeUnstageVolume bool 35 | ValidateVolumeHostConnectivity bool 36 | Close bool 37 | NodeUnpublishNFSShareNotFound bool 38 | NodeUnstageNFSShareNotFound bool 39 | } 40 | ValidateVolumeHostConnectivityResponse struct { 41 | Connected bool 42 | IosInProgress bool 43 | } 44 | } 45 | 46 | // Connected is a mock implementation of csiapi.CSIApi.Connected 47 | func (mock *CSIMock) Connected() bool { 48 | return !(mock.InducedErrors.NotConnected) 49 | } 50 | 51 | // Close is a mock implementation of csiapi.CSIApi.Close 52 | func (mock *CSIMock) Close() error { 53 | if mock.InducedErrors.Close { 54 | return fmt.Errorf("induced error for Close") 55 | } 56 | return nil 57 | } 58 | 59 | // ControllerUnpublishVolume is a mock implementation of csiapi.CSIApi.ControllerUnpublishVolume 60 | func (mock *CSIMock) ControllerUnpublishVolume(_ context.Context, _ *csi.ControllerUnpublishVolumeRequest) (*csi.ControllerUnpublishVolumeResponse, error) { 61 | rep := &csi.ControllerUnpublishVolumeResponse{} 62 | if mock.InducedErrors.ControllerUnpublishVolume { 63 | return rep, errors.New("ControllerUnpublishedVolume induced error") 64 | } 65 | return rep, nil 66 | } 67 | 68 | // NodeUnpublishVolume is a mock implementation of csiapi.CSIApi.NodeUnpublishVolume 69 | func (mock *CSIMock) NodeUnpublishVolume(_ context.Context, _ *csi.NodeUnpublishVolumeRequest) (*csi.NodeUnpublishVolumeResponse, error) { 70 | rep := &csi.NodeUnpublishVolumeResponse{} 71 | if mock.InducedErrors.NodeUnpublishVolume { 72 | return rep, errors.New("NodeUnpublishedVolume induced error") 73 | } 74 | if mock.InducedErrors.NodeUnpublishNFSShareNotFound { 75 | return rep, errors.New("NFS Share for filesystem not found") 76 | } 77 | return rep, nil 78 | } 79 | 80 | // NodeUnstageVolume is a mock implementation of csiapi.CSIApi.NodeUnstageVolume 81 | func (mock *CSIMock) NodeUnstageVolume(_ context.Context, _ *csi.NodeUnstageVolumeRequest) (*csi.NodeUnstageVolumeResponse, error) { 82 | rep := &csi.NodeUnstageVolumeResponse{} 83 | if mock.InducedErrors.NodeUnstageVolume { 84 | return rep, errors.New("NodeUnstageedVolume induced error") 85 | } 86 | if mock.InducedErrors.NodeUnstageNFSShareNotFound { 87 | return rep, errors.New("NFS Share for filesystem not found") 88 | } 89 | return rep, nil 90 | } 91 | 92 | // ValidateVolumeHostConnectivity is a mock implementation of csiapi.CSIApi.ValidateVolumeHostConnectivity 93 | func (mock *CSIMock) ValidateVolumeHostConnectivity(_ context.Context, _ *csiext.ValidateVolumeHostConnectivityRequest) (*csiext.ValidateVolumeHostConnectivityResponse, error) { 94 | rep := &csiext.ValidateVolumeHostConnectivityResponse{} 95 | if mock.InducedErrors.ValidateVolumeHostConnectivity { 96 | return rep, errors.New("ValidateVolumeHostConnectivity induced error") 97 | } 98 | rep.Connected = mock.ValidateVolumeHostConnectivityResponse.Connected 99 | rep.IosInProgress = mock.ValidateVolumeHostConnectivityResponse.IosInProgress 100 | return rep, nil 101 | } 102 | -------------------------------------------------------------------------------- /internal/mocks/utils_mock.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2023 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package mocks 18 | 19 | import ( 20 | "errors" 21 | ) 22 | 23 | // Mock is a mock structure used for testing 24 | type Mock struct { 25 | InducedErrors struct { 26 | GetLoopBackDevice bool 27 | DeleteLoopBackDevice bool 28 | Unmount bool 29 | Creat bool 30 | } 31 | } 32 | 33 | // GetLoopBackDevice gets the loopbackdevice for given pv 34 | func (mock *Mock) GetLoopBackDevice(pv string) (string, error) { 35 | if mock.InducedErrors.GetLoopBackDevice { 36 | return "", errors.New("induced GetLoopBackDevice error") 37 | } 38 | return pv, nil 39 | } 40 | 41 | // DeleteLoopBackDevice deletes a loopbackdevice. 42 | func (mock *Mock) DeleteLoopBackDevice(_ string) ([]byte, error) { 43 | delSucc := []byte("loopbackdevice") 44 | if mock.InducedErrors.DeleteLoopBackDevice { 45 | return nil, errors.New("induced DeleteLoopBackDevice error") 46 | } 47 | return delSucc, nil 48 | } 49 | 50 | // Unmount is a wrapper around syscall.Unmount 51 | func (mock *Mock) Unmount(_ string, _ int) error { 52 | if mock.InducedErrors.Unmount { 53 | return errors.New("induced Unmount error") 54 | } 55 | return nil 56 | } 57 | 58 | // Creat is a wrapper around syscall.Creat 59 | func (mock *Mock) Creat(_ string, _ int) (int, error) { 60 | if mock.InducedErrors.Creat { 61 | return 1, errors.New("induced Creat error") 62 | } 63 | return 0, nil 64 | } 65 | -------------------------------------------------------------------------------- /internal/monitor/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021-2023 Dell Inc., or its subsidiaries. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Includes the following generated file to get semantic version information 15 | .PHONY: clean unit-test gocover 16 | 17 | clean: 18 | go clean -cache 19 | 20 | unit-test: 21 | RESILIENCY_INT_TEST="false" \ 22 | go test -v -coverprofile=c.out ./... 23 | 24 | godog: 25 | go clean -cache 26 | RESILIENCY_INT_TEST="false" \ 27 | go test -v -coverprofile=c.out -test.run TestGoDog ./... 28 | 29 | gocover: 30 | go tool cover -html=c.out 31 | # VM-Integration Tests 32 | powerstore-vm-integration-test: 33 | RESILIENCY_VM_INT_TEST="true" \ 34 | RESILIENCY_TEST_CLEANUP="true" \ 35 | POLL_K8S="true" \ 36 | SCRIPTS_DIR="../../test/sh" \ 37 | go test -timeout 6h -test.v -test.run "^\QTestOcpVirtPowerStoreCheck\E|\QTestOcpVirtPowerStoreIntegration\E" 38 | 39 | powerscale-vm-integration-test: 40 | RESILIENCY_VM_INT_TEST="true" \ 41 | RESILIENCY_TEST_CLEANUP="true" \ 42 | POLL_K8S="true" \ 43 | SCRIPTS_DIR="../../test/sh" \ 44 | go test -timeout 6h -test.v -test.run "^\QTestOcpVirtPowerScaleCheck\E|\QTestOcpVirtPowerScaleIntegration\E" 45 | 46 | powerflex-vm-integration-test: 47 | RESILIENCY_VM_INT_TEST="true" \ 48 | RESILIENCY_TEST_CLEANUP="true" \ 49 | POLL_K8S="true" \ 50 | SCRIPTS_DIR="../../test/sh" \ 51 | go test -timeout 6h -test.v -test.run "^\QTestOcpVirtPowerFlexCheck\E|\QTestOcpVirtPowerFlexIntegration\E" 52 | 53 | powermax-vm-integration-test: 54 | RESILIENCY_VM_INT_TEST="true" \ 55 | RESILIENCY_TEST_CLEANUP="true" \ 56 | POLL_K8S="true" \ 57 | SCRIPTS_DIR="../../test/sh" \ 58 | go test -timeout 6h -test.v -test.run "^\QTestOcpVirtPowerMaxCheck\E|\QTestOcpVirtPowerMaxIntegration\E" 59 | 60 | powerflex-integration-test: 61 | RESILIENCY_INT_TEST="true" \ 62 | RESILIENCY_TEST_CLEANUP="true" \ 63 | POLL_K8S="true" \ 64 | SCRIPTS_DIR="../../test/sh" \ 65 | go test -timeout 6h -test.v -test.run "^\QTestPowerFlexFirstCheck\E|\QTestPowerFlexIntegration\E" 66 | 67 | unity-integration-test: 68 | RESILIENCY_INT_TEST="true" \ 69 | RESILIENCY_TEST_CLEANUP="true" \ 70 | POLL_K8S="true" \ 71 | SCRIPTS_DIR="../../test/sh" \ 72 | go test -timeout 6h -test.v -test.run "^\QTestUnityFirstCheck\E|\QTestUnityIntegration\E" 73 | 74 | powerscale-integration-test: 75 | RESILIENCY_INT_TEST="true" \ 76 | RESILIENCY_TEST_CLEANUP="true" \ 77 | POLL_K8S="true" \ 78 | SCRIPTS_DIR="../../test/sh" \ 79 | go test -timeout 6h -test.v -test.run "^\QTestPowerScaleFirstCheck\E|\QTestPowerScaleIntegration\E" 80 | 81 | powerstore-integration-test: 82 | RESILIENCY_INT_TEST="true" \ 83 | RESILIENCY_TEST_CLEANUP="true" \ 84 | POLL_K8S="true" \ 85 | SCRIPTS_DIR="../../test/sh" \ 86 | go test -timeout 6h -test.v -test.run "^\QTestPowerStoreFirstCheck\E|\QTestPowerStoreIntegration\E" 87 | 88 | powermax-integration-test: 89 | RESILIENCY_INT_TEST="true" \ 90 | RESILIENCY_TEST_CLEANUP="true" \ 91 | POLL_K8S="true" \ 92 | SCRIPTS_DIR="../../test/sh" \ 93 | go test -timeout 6h -test.v -test.run "^\QTestPowerMaxFirstCheck\E|\QTestPowerMaxIntegration\E" 94 | 95 | powerflex-short-integration-test: 96 | RESILIENCY_SHORT_INT_TEST="true" \ 97 | RESILIENCY_TEST_CLEANUP="true" \ 98 | POLL_K8S="true" \ 99 | SCRIPTS_DIR="../../test/sh" \ 100 | go test -timeout 6h -test.v -test.run "^\QTestPowerFlexShortCheck\E|\QTestPowerFlexShortIntegration\E" 101 | 102 | unity-short-integration-test: 103 | RESILIENCY_SHORT_INT_TEST="true" \ 104 | RESILIENCY_TEST_CLEANUP="true" \ 105 | POLL_K8S="true" \ 106 | SCRIPTS_DIR="../../test/sh" \ 107 | go test -timeout 6h -test.v -test.run "^\QTestUnityShortCheck\E|\QTestUnityShortIntegration\E" 108 | 109 | powerscale-short-integration-test: 110 | RESILIENCY_SHORT_INT_TEST="true" \ 111 | RESILIENCY_TEST_CLEANUP="true" \ 112 | POLL_K8S="true" \ 113 | SCRIPTS_DIR="../../test/sh" \ 114 | go test -timeout 6h -test.v -test.run "^\QTestPowerScaleShortCheck\E|\QTestPowerScaleShortIntegration\E" 115 | 116 | powerstore-short-integration-test: 117 | RESILIENCY_SHORT_INT_TEST="true" \ 118 | RESILIENCY_TEST_CLEANUP="true" \ 119 | POLL_K8S="true" \ 120 | SCRIPTS_DIR="../../test/sh" \ 121 | go test -timeout 6h -test.v -test.run "^\QTestPowerStoreShortCheck\E|\QTestPowerStoreShortIntegration\E" 122 | 123 | powermax-short-integration-test: 124 | RESILIENCY_SHORT_INT_TEST="true" \ 125 | RESILIENCY_TEST_CLEANUP="true" \ 126 | POLL_K8S="true" \ 127 | SCRIPTS_DIR="../../test/sh" \ 128 | go test -timeout 6h -test.v -test.run "^\QTestPowerMaxShortCheck\E|\QTestPowerMaxShortIntegration\E" 129 | 130 | powerflex-array-interface-test: 131 | RESILIENCY_INT_TEST="true" \ 132 | RESILIENCY_TEST_CLEANUP="true" \ 133 | POLL_K8S="true" \ 134 | SCRIPTS_DIR="../../test/sh" \ 135 | INTERFACE_A=${INTERFACE1} \ 136 | go test -timeout 6h -test.v -test.run "^\QTestPowerFlexFirstCheck\E|\QTestPowerflexArrayInterfaceDown\E" 137 | 138 | unity-array-interface-test: 139 | RESILIENCY_INT_TEST="true" \ 140 | RESILIENCY_TEST_CLEANUP="true" \ 141 | POLL_K8S="true" \ 142 | SCRIPTS_DIR="../../test/sh" \ 143 | INTERFACE_A=${INTERFACE1} \ 144 | INTERFACE_B=${INTERFACE2} \ 145 | go test -timeout 6h -test.v -test.run "^\QTestUnityFirstCheck\E|\QTestUnityArrayInterfaceDown\E" 146 | 147 | powerstore-array-interface-test: 148 | RESILIENCY_INT_TEST="true" \ 149 | RESILIENCY_TEST_CLEANUP="true" \ 150 | POLL_K8S="true" \ 151 | SCRIPTS_DIR="../../test/sh" \ 152 | INTERFACE_A=${INTERFACE1} \ 153 | INTERFACE_B=${INTERFACE2} \ 154 | INTERFACE_C=${INTERFACE3} \ 155 | go test -timeout 6h -test.v -test.run "^\QTestPowerStoreFirstCheck\E|\QTestPowerStoreArrayInterfaceDown\E" 156 | 157 | powermax-array-interface-test: 158 | RESILIENCY_INT_TEST="true" \ 159 | RESILIENCY_TEST_CLEANUP="true" \ 160 | POLL_K8S="true" \ 161 | SCRIPTS_DIR="../../test/sh" \ 162 | INTERFACE_A=${INTERFACE1} \ 163 | INTERFACE_B=${INTERFACE2} \ 164 | INTERFACE_C=${INTERFACE3} \ 165 | go test -timeout 6h -test.v -test.run "^\QTestPowerMaxFirstCheck\E|\QTestPowerMaxArrayInterfaceDown\E" -------------------------------------------------------------------------------- /internal/monitor/features/monitor.feature: -------------------------------------------------------------------------------- 1 | Feature: Monitor generic code 2 | As a podmon developer 3 | I want to test the monitor generic code 4 | So that it is known to work 5 | 6 | @monitor 7 | Scenario Outline: Test StartPodMonitorHandler 8 | Given a controller monitor "vxflex" 9 | And a pod for node with volumes condition "" 10 | And pod monitor mode 11 | And I induce error 12 | When I call StartPodMonitor with key "podmn" and value "csi-vxflexos" 13 | And I send a pod event type 14 | Then I close the Watcher 15 | And the last log message contains 16 | 17 | Examples: 18 | | podnode | nvol | error | mode | eventtype | errormsg | 19 | | "node1" | 0 | "Watch" | "none" | "None" | "none" | 20 | | "node1" | 0 | "none" | "none" | "None" | "Setup of PodWatcher complete" | 21 | | "node1" | 0 | "none" | "none" | "Add" | "PodMonitor.Mode not set" | 22 | | "node1" | 0 | "none" | "controller" | "Add" | "podMonitorHandler" | 23 | | "node1" | 0 | "GetPod" | "controller" | "Add" | "GetPod error" | 24 | | "node1" | 0 | "BadWatchObject" | "controller" | "Add" | "podMonitorHandler nil pod" | 25 | | "node1" | 0 | "none" | "node" | "Add" | "nodeModePodHandler" | 26 | | "node1" | 0 | "none" | "standalone" | "Add" | "podMonitorHandler" | 27 | | "node1" | 0 | "GetPod" | "standalone" | "Add" | "GetPod error" | 28 | | "node1" | 0 | "none" | "none" | "Modify" | "PodMonitor.Mode not set" | 29 | | "node1" | 0 | "none" | "none" | "Delete" | "PodMonitor.Mode not set" | 30 | | "node1" | 0 | "none" | "none" | "Stop" | "PodWatcher stopped..." | 31 | | "node1" | 0 | "none" | "none" | "Error" | "Setup of PodWatcher complete" | 32 | 33 | @monitor 34 | Scenario Outline: Test StartNodeMonitorHandler 35 | Given a controller monitor "vxflex" 36 | And a pod for node with volumes condition "" 37 | And I induce error 38 | When I call StartNodeMonitor with key "podmon" and value "csi-vxflexos" 39 | And I send a node event type 40 | Then I close the Watcher 41 | And the last log message contains 42 | 43 | Examples: 44 | | podnode | nvol | error | eventtype | errormsg | 45 | | "node1" | 0 | "Watch" | "None" | "none" | 46 | | "node1" | 0 | "none" | "None" | "Setup of NodeWatcher complete" | 47 | | "node1" | 0 | "none" | "Add" | "node name: node1" | 48 | | "node1" | 0 | "BadWatchObject" | "Add" | "nodeMonitorHandler nil node" | 49 | | "node1" | 0 | "none" | "Modify" | "node name: node1" | 50 | | "node1" | 0 | "none" | "Delete" | "node name: node1" | 51 | | "node1" | 0 | "none" | "Stop" | "NodeWatcher stopped..." | 52 | | "node1" | 0 | "none" | "Error" | "Setup of NodeWatcher complete" | 53 | 54 | @monitor 55 | Scenario Outline: Test Lock/Unlock and getPodKey 56 | Given a controller monitor "vxflex" 57 | And a pod for node with volumes condition "" 58 | And I send a node event type "Modify" 59 | When I call test lock and getPodKey 60 | # The previous step will fail if there is an error 61 | 62 | Examples: 63 | | podnode | nvol | 64 | | "node1" | 0 | 65 | 66 | -------------------------------------------------------------------------------- /internal/monitor/longevity_opvirt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright (c) 2021-2025 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # This script runs the longevity tests for CSI-Drivers that support Resiliency Module with OpenShift Virtualization 17 | 18 | # Execution: 19 | # ./longevity_opvirt.sh --driver --iterations --isOCPVirt 20 | # E.g. for CSI-PowerFlex driver: 21 | # ./longevity_opvirt.sh --driver powerflex --iterations 10 --isOCPVirt true 22 | 23 | # Arguments passed during the script execution: 24 | # driver: Name of the CSI-DRiver, value: powerflex/powerstore/powermax/powerscale 25 | # iterations: Number of iterations, value: 10,20,30... 26 | # isOCPVirt: Boolean value, value: true/false 27 | # bastionNode: IP address of bastion node of OCP cluster 28 | 29 | # export the environment variables before executing the script 30 | # export OPENSHIFT_BASTION= 31 | # export NODE_USER= 32 | # export PASSWORD= 33 | # export REGISTRY_HOST= 34 | # export REGISTRY_PORT= 35 | # export PODMON_VERSION= 36 | 37 | driver="" 38 | iterations=0 39 | isOCPVirt=false 40 | 41 | # Function to comment out lines matching a pattern 42 | comment_out() { 43 | pattern=$1 44 | sed -i "/$pattern/ s/^/# /" run.integration 45 | } 46 | 47 | # Function to uncomment lines matching a pattern 48 | uncomment() { 49 | pattern=$1 50 | sed -i "/$pattern/ s/^# //" run.integration 51 | } 52 | 53 | # Parse arguments 54 | while [[ $# -gt 0 ]]; do 55 | key="$1" 56 | 57 | case $key in 58 | --driver) 59 | driver="$2" 60 | shift # past argument 61 | shift # past value 62 | ;; 63 | --iterations) 64 | iterations="$2" 65 | shift 66 | shift 67 | ;; 68 | --isOCPVirt) 69 | isOCPVirt="$2" 70 | shift 71 | shift 72 | ;; 73 | *) # unknown option 74 | echo "Unknown option: $1" 75 | exit 1 76 | ;; 77 | esac 78 | done 79 | 80 | # Debug print (optional) 81 | echo "Driver: $driver" 82 | echo "Iterations: $iterations" 83 | echo "Is OCP Virt: $isOCPVirt" 84 | 85 | # Verify OPS environent and OpenShift virtualization operator installation 86 | if [[ $isOCPVirt == true ]]; then 87 | 88 | if kubectl get clusterversions.config.openshift.io &>/dev/null; then 89 | echo "OpenShift (OCP) cluster identified." 90 | 91 | virtualization_operator_version=$(oc describe kubevirt.kubevirt.io kubevirt-kubevirt-hyperconverged -n openshift-cnv | grep "Operator Version" | awk '{print $3}') 92 | if [[ -z "$virtualization_operator_version" ]]; then 93 | print_fail "Openshift Virtualization operator not found on the cluster." 94 | exit 1 95 | else 96 | echo "OpenShift Virtualization Operator Version: $virtualization_operator_version" 97 | 98 | # update run.integration to execute E2E tests for virtualized workloads 99 | sed -i 's/make "\${storage_type}-integration-test"/make "\${storage_type}-vm-integration-test"/' run.integration 100 | fi 101 | else 102 | echo "Given cluster is not an OpenShift (OCP) cluster, these tests are not applicable." 103 | exit 1 104 | fi 105 | fi 106 | 107 | # Replace default configurations in run.integration script 108 | comment_out "source" 109 | original_iterations=$(grep -oP '^ITERATIONS=\K\d+' run.integration) 110 | sed -i "s/^ITERATIONS=$original_iterations/ITERATIONS=$iterations/" run.integration 111 | 112 | if [[ $driver == "powerflex" ]]; then 113 | comment_out "powerscale" 114 | comment_out "powerstore" 115 | comment_out "powermax" 116 | elif [[ $driver == "powerscale" ]]; then 117 | comment_out "powerflex" 118 | comment_out "powerstore" 119 | comment_out "powermax" 120 | elif [[ $driver == "powerstore" ]]; then 121 | comment_out "powerflex" 122 | comment_out "powerscale" 123 | comment_out "powermax" 124 | elif [[ $driver == "powermax" ]]; then 125 | comment_out "powerflex" 126 | comment_out "powerscale" 127 | comment_out "powerstore" 128 | fi 129 | 130 | # Execute E2E tests only for the specific driver 131 | sh run.integration | tee karavi-resiliency-int-test.log 132 | 133 | # Extract the return code from the log 134 | returnCode=$(grep -oP 'Return code:\s+\K\d+' karavi-resiliency-int-test.log) 135 | 136 | # Revert the changes done in run.integration 137 | uncomment "source" 138 | sed -i "s/^ITERATIONS=$iterations/ITERATIONS=$original_iterations/" run.integration 139 | 140 | if [[ $driver == "powerflex" ]]; then 141 | uncomment "powerscale" 142 | uncomment "powerstore" 143 | uncomment "powermax" 144 | elif [[ $driver == "powerscale" ]]; then 145 | uncomment "powerflex" 146 | uncomment "powerstore" 147 | uncomment "powermax" 148 | elif [[ $driver == "powerstore" ]]; then 149 | uncomment "powerflex" 150 | uncomment "powerscale" 151 | uncomment "powermax" 152 | elif [[ $driver == "powermax" ]]; then 153 | uncomment "powerflex" 154 | uncomment "powerscale" 155 | uncomment "powerstore" 156 | fi 157 | 158 | sed -i 's/make "\${storage_type}-vm-integration-test"/make "\${storage_type}-integration-test"/' run.integration 159 | 160 | if [[ $returnCode -eq 0 ]]; then 161 | echo "Resiliency Longevity test(s) passed for $driver driver." 162 | else 163 | echo "run.integration failed with exit code $exit_code" 164 | fi 165 | 166 | exit $returnCode -------------------------------------------------------------------------------- /internal/monitor/monitor_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2022 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package monitor 18 | 19 | import ( 20 | "fmt" 21 | "os" 22 | "testing" 23 | 24 | "github.com/cucumber/godog" 25 | log "github.com/sirupsen/logrus" 26 | ) 27 | 28 | func TestMain(m *testing.M) { 29 | status := 0 30 | if st := m.Run(); st > status { 31 | status = st 32 | } 33 | fmt.Printf("status %d\n", status) 34 | os.Exit(status) 35 | } 36 | 37 | func TestMonitor(t *testing.T) { 38 | log.Printf("Starting monitor test") 39 | godogOptions := godog.Options{ 40 | Format: "pretty,junit:monitor-junit-report.xml", 41 | Paths: []string{"features"}, 42 | Tags: "monitor", 43 | } 44 | status := godog.TestSuite{ 45 | Name: "monitor", 46 | ScenarioInitializer: MonitorTestScenarioInit, 47 | Options: &godogOptions, 48 | }.Run() 49 | if status != 0 { 50 | t.Error("There were failed monitor tests") 51 | } 52 | log.Printf("Monitor test finished") 53 | } 54 | 55 | func TestControllerMode(t *testing.T) { 56 | log.Printf("Starting controller-mode test") 57 | godogOptions := godog.Options{ 58 | Format: "pretty,junit:controller-mode-junit-report.xml", 59 | Paths: []string{"features"}, 60 | Tags: "controller-mode", 61 | } 62 | status := godog.TestSuite{ 63 | Name: "monitor", 64 | ScenarioInitializer: MonitorTestScenarioInit, 65 | Options: &godogOptions, 66 | }.Run() 67 | if status != 0 { 68 | t.Error("There were failed node-mode tests") 69 | } 70 | log.Printf("Controller-mode test finished") 71 | } 72 | 73 | func TestNodeMode(t *testing.T) { 74 | log.Printf("Starting node-mode test") 75 | godogOptions := godog.Options{ 76 | Format: "pretty,junit:node-mode-junit-report.xml", 77 | Paths: []string{"features"}, 78 | Tags: "node-mode", 79 | } 80 | status := godog.TestSuite{ 81 | Name: "node-mode", 82 | ScenarioInitializer: MonitorTestScenarioInit, 83 | Options: &godogOptions, 84 | }.Run() 85 | if status != 0 { 86 | t.Error("There were failed node-mode tests") 87 | } 88 | log.Printf("Node-mode test finished") 89 | } 90 | 91 | func TestMapEqualsMap(t *testing.T) { 92 | cases := []struct { 93 | mapA, mapB map[string]string 94 | result bool 95 | }{ 96 | { 97 | map[string]string{"key": "value"}, 98 | map[string]string{"key": "value"}, 99 | true, 100 | }, 101 | { 102 | map[string]string{"key": "value"}, 103 | map[string]string{"value": "key"}, 104 | false, 105 | }, 106 | { 107 | map[string]string{"key": "value"}, 108 | map[string]string{}, 109 | false, 110 | }, 111 | } 112 | for caseNum, acase := range cases { 113 | result := mapEqualsMap(acase.mapA, acase.mapB) 114 | if result != acase.result { 115 | t.Errorf("Case %d: Expected %t got %t", caseNum, acase.result, result) 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /internal/monitor/monitor_test_helpers.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2023 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package monitor 18 | 19 | import ( 20 | "fmt" 21 | 22 | "github.com/stretchr/testify/assert" 23 | ) 24 | 25 | // AssertExpectedAndActual is a helper function to allow the step function to call 26 | // assertion functions where you want to compare an expected and an actual value. 27 | func AssertExpectedAndActual(a ExpectedAndActualAssertion, expected, actual interface{}, msgAndArgs ...interface{}) error { 28 | var t Asserter 29 | a(&t, expected, actual, msgAndArgs...) 30 | return t.err 31 | } 32 | 33 | // ExpectedAndActualAssertion represents an assert function that tests an actual value to an expected value 34 | type ExpectedAndActualAssertion func(t assert.TestingT, expected, actual interface{}, msgAndArgs ...interface{}) bool 35 | 36 | // AssertActual is a helper function to allow the step function to call 37 | // assertion functions where you want to compare an actual value to a 38 | // predefined state like nil, empty or true/false. 39 | func AssertActual(a ActualAssertion, actual interface{}, msgAndArgs ...interface{}) error { 40 | var t Asserter 41 | a(&t, actual, msgAndArgs...) 42 | return t.err 43 | } 44 | 45 | // ActualAssertion represents an assert function that tests the value of a function 46 | type ActualAssertion func(t assert.TestingT, actual interface{}, msgAndArgs ...interface{}) bool 47 | 48 | // Asserter is used to be able to retrieve the error reported by the called assertion 49 | type Asserter struct { 50 | err error 51 | } 52 | 53 | // Errorf is used by the called assertion to report an error 54 | func (a *Asserter) Errorf(format string, args ...interface{}) { 55 | a.err = fmt.Errorf(format, args...) 56 | } 57 | -------------------------------------------------------------------------------- /internal/monitor/run.integration: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright (c) 2021-2022 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | source int_test_params.sh 16 | 17 | ITERATIONS=100 18 | RESILIENCY=../.. 19 | SLEEP_TIME=${SLEEP_TIME:-300} 20 | 21 | # Function to run the e2e test. Does some initial cleanup and captures logs. 22 | run_e2e_test() { 23 | storage_type=$1 24 | driver_namespace=$2 25 | tester_prefix=$3 26 | 27 | # Clean up any left over test pods 28 | echo "Clean up any remaining $tester_prefix test pods" 29 | sh $RESILIENCY/test/podmontest/uns.sh --prefix "$tester_prefix" --instances 18 30 | sleep "$SLEEP_TIME" 31 | sh $RESILIENCY/tools/monx.sh --once 32 | 33 | # Run the e2e 34 | echo "****************** Execute the $storage_type E2E test *****************" 35 | make "${storage_type}-integration-test" 36 | RC=$? 37 | echo "E2E test for $storage_type had Return code: " $RC 38 | 39 | sh $RESILIENCY/tools/monx.sh --once 40 | 41 | # Collect logs 42 | echo "Collecting driver logs from $driver_namespace namespace" 43 | sh $RESILIENCY/tools/collect_logs.sh --ns "$driver_namespace" 44 | } 45 | 46 | ITER=1 47 | while [ $ITER -le $ITERATIONS ]; do 48 | date 49 | echo "Iteration: " $ITER 50 | 51 | run_e2e_test "powerflex" "vxflexos" "pmtv" 52 | run_e2e_test "unity" "unity" "pmtu" 53 | run_e2e_test "powerscale" "isilon" "pmti" 54 | run_e2e_test "powerstore" "powerstore" "pmtps" 55 | run_e2e_test "powermax" "powermax" "pmtpm" 56 | 57 | sleep "$SLEEP_TIME" 58 | sh $RESILIENCY/tools/monx.sh --once 59 | 60 | ITER=$((ITER + 1)) 61 | if [ -e stop ]; then 62 | echo "****************** Exiting due to stop file ******************" 63 | exit 0 64 | fi 65 | done 66 | -------------------------------------------------------------------------------- /internal/utils/linuxLoopBackDevice.go: -------------------------------------------------------------------------------- 1 | //go:build test || linux 2 | // +build test linux 3 | 4 | /* 5 | * Copyright (c) 2022 Dell Inc., or its subsidiaries. All Rights Reserved. 6 | * 7 | * Licensed under the Apache License, Version 2.0 (the "License"); 8 | * you may not use this file except in compliance with the License. 9 | * You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, 15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | * See the License for the specific language governing permissions and 17 | * limitations under the License. 18 | */ 19 | 20 | package utils 21 | 22 | import ( 23 | "bytes" 24 | "io" 25 | "os/exec" 26 | "strings" 27 | 28 | log "github.com/sirupsen/logrus" 29 | ) 30 | 31 | // Define the Commander interface 32 | type Commander interface { 33 | Output() ([]byte, error) 34 | SetStdin(io.Reader) 35 | } 36 | 37 | type RealCommander struct { 38 | cmd *exec.Cmd 39 | } 40 | 41 | func (c *RealCommander) Output() ([]byte, error) { 42 | return c.cmd.Output() 43 | } 44 | 45 | func (c *RealCommander) SetStdin(stdin io.Reader) { 46 | c.cmd.Stdin = stdin 47 | } 48 | 49 | // Create an execCommand function to return the wrapped exec.Cmd 50 | var execCommand = func(name string, arg ...string) Commander { 51 | return &RealCommander{cmd: exec.Command(name, arg...)} 52 | } 53 | 54 | // GetLoopBackDevice get the loopbackdevice for given pv 55 | func GetLoopBackDevice(pvname string) (string, error) { 56 | textBytes, err := execCommand("/usr/sbin/losetup", "-a").Output() 57 | if err != nil || string(textBytes) == "" { 58 | return "", err 59 | } 60 | 61 | cmd := execCommand("grep", pvname) 62 | cmd.SetStdin(bytes.NewBuffer(textBytes)) 63 | textBytes, err = cmd.Output() 64 | if err != nil || string(textBytes) == "" { 65 | return "", err 66 | } 67 | log.Debugf("losetup output: %s", string(textBytes)) 68 | loopDevices := strings.Split(string(textBytes), ":") 69 | return loopDevices[0], nil 70 | } 71 | 72 | // DeleteLoopBackDevice deletes the loopbackdevice from the system 73 | func DeleteLoopBackDevice(loopDev string) ([]byte, error) { 74 | cmd := execCommand("/usr/sbin/losetup", "-d", loopDev) 75 | return cmd.Output() 76 | } 77 | -------------------------------------------------------------------------------- /internal/utils/linuxLoopBackDevice_test.go: -------------------------------------------------------------------------------- 1 | // File: linuxLoopBackDevice_test.go 2 | //go:build test || linux 3 | // +build test linux 4 | 5 | package utils 6 | 7 | import ( 8 | "bytes" 9 | "errors" 10 | "io" 11 | "os/exec" 12 | "reflect" 13 | "strings" 14 | "testing" 15 | 16 | "github.com/stretchr/testify/assert" 17 | ) 18 | 19 | // Implement the MockCommander to satisfy the Commander interface 20 | type MockCommander struct { 21 | output []byte 22 | outputErr error 23 | stdin io.Reader 24 | } 25 | 26 | func (m *MockCommander) Output() ([]byte, error) { 27 | return m.output, m.outputErr 28 | } 29 | 30 | func (m *MockCommander) SetStdin(stdin io.Reader) { 31 | m.stdin = stdin 32 | } 33 | 34 | func TestGetLoopBackDevice(t *testing.T) { 35 | tests := []struct { 36 | name string 37 | pvname string 38 | losetupOutput string 39 | losetupErr error 40 | grepOutput string 41 | grepErr error 42 | want string 43 | expectErr bool 44 | }{ 45 | { 46 | name: "Valid loopback device", 47 | pvname: "test.img", 48 | losetupOutput: "/dev/loop0: 0 2048 /var/lib/libvirt/images/test.img\n/dev/loop1: 0 2048 /var/lib/libvirt/images/alpine.iso", 49 | losetupErr: nil, 50 | grepOutput: "/dev/loop0: 0 2048 /var/lib/libvirt/images/test.img", 51 | grepErr: nil, 52 | want: "/dev/loop0", 53 | expectErr: false, 54 | }, 55 | { 56 | name: "Invalid Case", 57 | pvname: "test.img", 58 | losetupOutput: "", 59 | losetupErr: nil, 60 | grepOutput: "/dev/loop0: 0 2048 /var/lib/libvirt/images/test.img", 61 | grepErr: nil, 62 | want: "", 63 | expectErr: false, 64 | }, 65 | { 66 | name: "Invalid loopback device", 67 | pvname: "nonexistent.img", 68 | losetupOutput: "/dev/loop0: 0 2048 /var/lib/libvirt/images/test.img\n/dev/loop1: 0 2048 /var/lib/libvirt/images/alpine.iso", 69 | losetupErr: nil, 70 | grepOutput: "", 71 | grepErr: errors.New("not found"), 72 | want: "", 73 | expectErr: true, 74 | }, 75 | } 76 | 77 | for _, tt := range tests { 78 | t.Run(tt.name, func(t *testing.T) { 79 | execCommand = func(name string, _ ...string) Commander { 80 | switch name { 81 | case "/usr/sbin/losetup": 82 | return &MockCommander{ 83 | output: []byte(tt.losetupOutput), 84 | outputErr: tt.losetupErr, 85 | } 86 | case "grep": 87 | return &MockCommander{ 88 | output: []byte(tt.grepOutput), 89 | outputErr: tt.grepErr, 90 | } 91 | default: 92 | return &MockCommander{} 93 | } 94 | } 95 | got, err := GetLoopBackDevice(tt.pvname) 96 | if tt.expectErr { 97 | assert.NotNil(t, err) 98 | } else { 99 | assert.Nil(t, err) 100 | assert.Equal(t, tt.want, got) 101 | } 102 | }) 103 | } 104 | 105 | // Reset execCommand to its original setting after tests 106 | resetExecCommand() 107 | } 108 | 109 | func TestDeleteLoopBackDevice(t *testing.T) { 110 | tests := []struct { 111 | name string 112 | loopDev string 113 | output []byte 114 | outputErr error 115 | expectErr bool 116 | }{ 117 | { 118 | name: "Successful deletion of loopback device", 119 | loopDev: "/dev/loop0", 120 | output: []byte(""), 121 | outputErr: nil, 122 | expectErr: false, 123 | }, 124 | { 125 | name: "Error during deletion of loopback device", 126 | loopDev: "/dev/loop1", 127 | output: nil, 128 | outputErr: errors.New("error deleting loopback device"), 129 | expectErr: true, 130 | }, 131 | } 132 | 133 | for _, tt := range tests { 134 | t.Run(tt.name, func(t *testing.T) { 135 | execCommand = func(name string, arg ...string) Commander { 136 | if name == "/usr/sbin/losetup" && len(arg) > 0 && arg[0] == "-d" { 137 | return &MockCommander{ 138 | output: tt.output, 139 | outputErr: tt.outputErr, 140 | } 141 | } 142 | return &MockCommander{} 143 | } 144 | 145 | got, err := DeleteLoopBackDevice(tt.loopDev) 146 | if tt.expectErr { 147 | assert.NotNil(t, err) 148 | } else { 149 | assert.Nil(t, err) 150 | assert.Equal(t, tt.output, got) 151 | } 152 | }) 153 | } 154 | 155 | // Reset execCommand to its original setting after tests 156 | resetExecCommand() 157 | } 158 | 159 | func resetExecCommand() { 160 | execCommand = func(name string, arg ...string) Commander { 161 | return &RealCommander{cmd: exec.Command(name, arg...)} 162 | } 163 | } 164 | 165 | func TestRealCommander_Output(t *testing.T) { 166 | cmd := exec.Command("echo", "test output") 167 | c := &RealCommander{cmd: cmd} 168 | 169 | want := []byte("test output\n") 170 | 171 | got, err := c.Output() 172 | if err != nil { 173 | t.Errorf("RealCommander.Output() error = %v, wantErr nil", err) 174 | return 175 | } 176 | if !reflect.DeepEqual(got, want) { 177 | t.Errorf("RealCommander.Output() = %v, want %v", got, want) 178 | } 179 | } 180 | 181 | func TestRealCommander_SetStdin(t *testing.T) { 182 | type fields struct { 183 | cmd *exec.Cmd 184 | } 185 | type args struct { 186 | stdin io.Reader 187 | } 188 | tests := []struct { 189 | name string 190 | fields fields 191 | args args 192 | want io.Reader 193 | }{ 194 | { 195 | name: "set stdin to a byte buffer", 196 | fields: fields{cmd: &exec.Cmd{}}, 197 | args: args{stdin: bytes.NewBuffer([]byte("test stdin"))}, 198 | want: bytes.NewBuffer([]byte("test stdin")), 199 | }, 200 | { 201 | name: "set stdin to a string reader", 202 | fields: fields{cmd: &exec.Cmd{}}, 203 | args: args{stdin: strings.NewReader("test stdin")}, 204 | want: strings.NewReader("test stdin"), 205 | }, 206 | } 207 | for _, tt := range tests { 208 | t.Run(tt.name, func(t *testing.T) { 209 | c := &RealCommander{ 210 | cmd: tt.fields.cmd, 211 | } 212 | c.SetStdin(tt.args.stdin) 213 | got := c.cmd.Stdin 214 | if !reflect.DeepEqual(got, tt.want) { 215 | t.Errorf("RealCommander.SetStdin() = %v, want %v", got, tt.want) 216 | } 217 | }) 218 | } 219 | } 220 | -------------------------------------------------------------------------------- /internal/utils/linuxUnmount.go: -------------------------------------------------------------------------------- 1 | //go:build test || linux 2 | // +build test linux 3 | 4 | /* 5 | * Copyright (c) 2021-2022 Dell Inc., or its subsidiaries. All Rights Reserved. 6 | * 7 | * Licensed under the Apache License, Version 2.0 (the "License"); 8 | * you may not use this file except in compliance with the License. 9 | * You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, 15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | * See the License for the specific language governing permissions and 17 | * limitations under the License. 18 | */ 19 | 20 | package utils 21 | 22 | import "syscall" 23 | 24 | // Unmount is a wrapper around syscall.Unmount 25 | func Unmount(devName string, flags int) error { 26 | return syscall.Unmount(devName, flags) 27 | } 28 | 29 | // Creat is a wrapper around syscall.Creat 30 | func Creat(filepath string, flags uint32) (int, error) { 31 | return syscall.Creat(filepath, flags) 32 | } 33 | -------------------------------------------------------------------------------- /internal/utils/linuxUnmount_test.go: -------------------------------------------------------------------------------- 1 | // File: linuxUnmount_test.go 2 | //go:build test || linux 3 | // +build test linux 4 | 5 | package utils 6 | 7 | import ( 8 | "os" 9 | "syscall" 10 | "testing" 11 | ) 12 | 13 | // setupTestFile creates a test file and returns its name and a cleanup function 14 | func setupTestFile(t *testing.T) (string, func()) { 15 | t.Helper() // Marks this function as a test helper 16 | 17 | tempFile, err := os.CreateTemp("", "testdevice") 18 | if err != nil { 19 | t.Fatalf("Failed to create temp file: %v", err) 20 | } 21 | tempFileName := tempFile.Name() 22 | tempFile.Close() 23 | 24 | // Cleanup function to remove the test file 25 | cleanup := func() { 26 | os.Remove(tempFileName) 27 | } 28 | 29 | return tempFileName, cleanup 30 | } 31 | 32 | // TestUnmount verifies Unmount wrapper 33 | func TestUnmount(t *testing.T) { 34 | tempFile, cleanup := setupTestFile(t) 35 | defer cleanup() 36 | 37 | // Attempt to unmount the temporary file 38 | err := Unmount(tempFile, 0) 39 | if err != nil { 40 | t.Log("Unmount failed with a known invalid argument for simply using a file as device-like") 41 | } else { 42 | t.Fatalf("Unmount expected to fail on regular file. Possible invalid test argument.") 43 | } 44 | } 45 | 46 | // TestCreat verifies Creat wrapper 47 | func TestCreat(t *testing.T) { 48 | tempDir := os.TempDir() 49 | testFilePath := tempDir + "/testcreatefile.txt" 50 | 51 | // Ensure the file does not already exist 52 | os.Remove(testFilePath) 53 | defer os.Remove(testFilePath) 54 | 55 | // Use syscall.Creat as per syscall requirement 56 | fd, err := Creat(testFilePath, 0) 57 | if err != nil { 58 | t.Fatalf("Creat() failed: %v", err) 59 | } 60 | defer syscall.Close(fd) // Clean up file descriptor after the test 61 | 62 | if fd < 0 { 63 | t.Fatalf("Creat() returned invalid file descriptor: %d", fd) 64 | } 65 | 66 | // Verify that the file has been created 67 | if _, err := os.Stat(testFilePath); os.IsNotExist(err) { 68 | t.Fatalf("File %s was not created", testFilePath) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /internal/utils/winUnmount.go: -------------------------------------------------------------------------------- 1 | //go:build test || windows 2 | // +build test windows 3 | 4 | /* 5 | * Copyright (c) 2021-2022 Dell Inc., or its subsidiaries. All Rights Reserved. 6 | * 7 | * Licensed under the Apache License, Version 2.0 (the "License"); 8 | * you may not use this file except in compliance with the License. 9 | * You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, 15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | * See the License for the specific language governing permissions and 17 | * limitations under the License. 18 | */ 19 | 20 | package utils 21 | 22 | import "os" 23 | 24 | // !!! This is not meant for production. This implementation is provided in case !!! 25 | // !!! a developer wishes to run the unit tests from an IDE on Windows !!! 26 | 27 | func Unmount(devName string, flags int) error { 28 | return os.Remove(devName) 29 | } 30 | 31 | func Creat(filepath string, flags int) (int, error) { 32 | if file, err := os.Create(filepath); err != nil { 33 | return -1, err 34 | } else { 35 | return int(file.Fd()), err 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /internal/utils/winUnmount_test.go: -------------------------------------------------------------------------------- 1 | //go:build test || windows 2 | // +build test windows 3 | 4 | package utils 5 | 6 | import ( 7 | "os" 8 | "testing" 9 | ) 10 | 11 | func setupTempFile() (string, error) { 12 | // Create a temporary file to simulate a device 13 | tempFile, err := os.CreateTemp("", "testdevice") 14 | if err != nil { 15 | return "", err 16 | } 17 | tempFileName := tempFile.Name() 18 | tempFile.Close() 19 | return tempFileName, nil 20 | } 21 | 22 | func teardownTempFile(fileName string) { 23 | if _, err := os.Stat(fileName); !os.IsNotExist(err) { 24 | // Clean up by removing the file 25 | os.Remove(fileName) 26 | } 27 | } 28 | 29 | // Ensure the test runs only on Windows (or with the test build tag) 30 | func TestUnmount(t *testing.T) { 31 | tempFileName, err := setupTempFile() 32 | if err != nil { 33 | t.Fatalf("Setup failed: %v", err) 34 | } 35 | defer teardownTempFile(tempFileName) 36 | 37 | // Attempt to unmount the simulated device (temporary file) 38 | if err := Unmount(tempFileName, 0); err != nil { 39 | t.Fatalf("Unmount() failed: %v", err) 40 | } 41 | 42 | // Verify that the file has been removed 43 | if _, err := os.Stat(tempFileName); !os.IsNotExist(err) { 44 | t.Fatalf("File %s was not removed", tempFileName) 45 | } 46 | } 47 | 48 | // Ensure the test runs only on Windows (or with the test build tag) 49 | func TestCreat(t *testing.T) { 50 | tempDir := os.TempDir() 51 | testFilePath := tempDir + "/testcreatefile.txt" 52 | 53 | // Ensure the file does not already exist 54 | os.Remove(testFilePath) 55 | defer os.Remove(testFilePath) 56 | 57 | // Attempt to create the file 58 | fd, err := Creat(testFilePath, 0) 59 | if err != nil { 60 | t.Fatalf("Creat() failed: %v", err) 61 | } else if fd < 0 { 62 | t.Fatalf("Creat() returned invalid file descriptor: %d", fd) 63 | } 64 | 65 | // Verify that the file has been created 66 | if _, err := os.Stat(testFilePath); os.IsNotExist(err) { 67 | t.Fatalf("File %s was not created", testFilePath) 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /podmon/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | name: podmon 3 | version: 1.0.0 4 | appVersion: 1...0 5 | description: | 6 | Deploys prometheus configured for monitoring a CSI driver. 7 | keywords: 8 | - csi 9 | - storage 10 | - prometheus 11 | maintainers: 12 | - name: Dell 13 | engine: gotpl 14 | -------------------------------------------------------------------------------- /podmon/templates/podman.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: podmon 5 | namespace: podmon 6 | --- 7 | kind: ClusterRole 8 | apiVersion: rbac.authorization.k8s.io/v1 9 | metadata: 10 | name: podmon 11 | rules: 12 | - apiGroups: [""] 13 | resources: ["pods"] 14 | verbs: ["get", "list", "watch", "update", "delete"] 15 | - apiGroups: [""] 16 | resources: ["nodes"] 17 | verbs: ["get", "list", "watch"] 18 | - apiGroups: ["storage.k8s.io"] 19 | resources: ["volumeattachments"] 20 | verbs: ["get", "list", "watch", "update", "delete"] 21 | - apiGroups: ["coordination.k8s.io"] 22 | resources: ["leases"] 23 | verbs: ["get", "watch", "list", "delete", "update", "create"] 24 | - apiGroups: [""] 25 | resources: ["persistentvolumeclaims"] 26 | verbs: ["get"] 27 | --- 28 | kind: ClusterRoleBinding 29 | apiVersion: rbac.authorization.k8s.io/v1 30 | metadata: 31 | name: podmon 32 | subjects: 33 | - kind: ServiceAccount 34 | name: podmon 35 | namespace: podmon 36 | roleRef: 37 | kind: ClusterRole 38 | name: podmon 39 | apiGroup: rbac.authorization.k8s.io 40 | --- 41 | apiVersion: apps/v1 42 | kind: Deployment 43 | metadata: 44 | name: podmon 45 | namespace: podmon 46 | spec: 47 | replicas: 2 48 | selector: 49 | matchLabels: 50 | app: podmon 51 | template: 52 | metadata: 53 | labels: 54 | app: podmon 55 | spec: 56 | serviceAccount: podmon 57 | containers: 58 | - name: podmon 59 | imagePullPolicy: Always 60 | image: {{ .Values.podmon.image }} 61 | -------------------------------------------------------------------------------- /podmon/values.yaml: -------------------------------------------------------------------------------- 1 | podmon: 2 | image: registry:port/podmon:v0.0.54 3 | -------------------------------------------------------------------------------- /test/podmontest/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021-2022 Dell Inc., or its subsidiaries. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | FROM scratch 15 | COPY podmontest /podmontest 16 | ENTRYPOINT [ "/podmontest" ] 17 | -------------------------------------------------------------------------------- /test/podmontest/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021-2023 Dell Inc., or its subsidiaries. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # Includes the following generated file to get semantic version information 15 | MAJOR=0 16 | MINOR=0 17 | PATCH=58 18 | VERSION?="v$(MAJOR).$(MINOR).$(PATCH)" 19 | REGISTRY?="${REGISTRY_HOST}:${REGISTRY_PORT}/podmontest" 20 | 21 | clean: 22 | go clean ./... 23 | 24 | build: 25 | rm -f podmontest 26 | CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -tags podmontest -ldflags '-w' -o podmontest *.go 27 | 28 | docker: build 29 | docker build -t podmontest -f Dockerfile --network host . 30 | 31 | podman: 32 | podman build -t "$(REGISTRY):$(VERSION)" --label commit=$(shell git log --max-count 1 --format="%H") . 33 | 34 | push: 35 | podman push "$(REGISTRY):$(VERSION)" 36 | -------------------------------------------------------------------------------- /test/podmontest/deploy/Chart.yaml: -------------------------------------------------------------------------------- 1 | name: podmontest 2 | version: 1.0.1 3 | apiVersion: v1 4 | appVersion: 1.0.0 5 | description: | 6 | Tests podmon 7 | icon: https://avatars1.githubusercontent.com/u/20958494?s=200&v=4 8 | keywords: 9 | - vxflexos-csi 10 | - unity-csi 11 | - storage 12 | engine: gotpl 13 | -------------------------------------------------------------------------------- /test/podmontest/deploy/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{- define "workloadType" -}} 2 | {{- if .Values.vmConfig -}} 3 | vm 4 | {{- else -}} 5 | pod 6 | {{- end -}} 7 | {{- end -}} -------------------------------------------------------------------------------- /test/podmontest/deploy/templates/pvc-vm-block.yaml: -------------------------------------------------------------------------------- 1 | # yamllint disable-file 2 | # This file is not valid YAML because it is a Helm template 3 | {{- if eq (include "workloadType" .) "vm" -}} 4 | {{- $root := . -}} 5 | {{- range $i, $e := untilStep 0 (int .Values.vmConfig.ndevices) 1 }} 6 | apiVersion: v1 7 | kind: PersistentVolumeClaim 8 | metadata: 9 | name: vm-block-pvc{{$i}} 10 | namespace: "{{ required "Must set namespace" $root.Values.vmConfig.namespace }}" 11 | spec: 12 | accessModes: 13 | - ReadWriteMany 14 | volumeMode: Block 15 | resources: 16 | requests: 17 | storage: 2Gi 18 | storageClassName: "{{ required "Must set storage class name" $root.Values.vmConfig.storageClassName }}" 19 | --- 20 | {{end}} 21 | {{- end -}} 22 | -------------------------------------------------------------------------------- /test/podmontest/deploy/templates/pvc-vm-filesystem.yaml: -------------------------------------------------------------------------------- 1 | # yamllint disable-file 2 | # This file is not valid YAML because it is a Helm template 3 | {{- if eq (include "workloadType" .) "vm" -}} 4 | {{- $root := . -}} 5 | {{- range $i, $e := untilStep 0 (int .Values.vmConfig.nvolumes) 1 }} 6 | apiVersion: v1 7 | kind: PersistentVolumeClaim 8 | metadata: 9 | name: vm-filesystem-pvc{{$i}} 10 | namespace: "{{ required "Must set namespace" $root.Values.vmConfig.namespace }}" 11 | spec: 12 | accessModes: 13 | - ReadWriteMany 14 | volumeMode: Filesystem 15 | resources: 16 | requests: 17 | storage: 2Gi 18 | storageClassName: "{{ required "Must set storage class name" $root.Values.vmConfig.storageClassName }}" 19 | --- 20 | {{end}} 21 | {{- end -}} 22 | -------------------------------------------------------------------------------- /test/podmontest/deploy/templates/pvc0.yaml: -------------------------------------------------------------------------------- 1 | # yamllint disable-file 2 | # This file is not valid YAML because it is a Helm template 3 | {{- if eq (include "workloadType" .) "pod" -}} 4 | {{- $root := . -}} 5 | {{- range $i, $e := untilStep 0 (int .Values.podmonTest.nvolumes) 1 }} 6 | kind: PersistentVolumeClaim 7 | apiVersion: v1 8 | metadata: 9 | name: "pvol{{$i}}" 10 | namespace: "{{- required "Must set namespace" $root.Values.podmonTest.namespace }}" 11 | spec: 12 | accessModes: 13 | - ReadWriteOnce 14 | volumeMode: Filesystem 15 | resources: 16 | requests: 17 | storage: 8Gi 18 | storageClassName: "{{- required "Must set storage class name" $root.Values.podmonTest.storageClassName }}" 19 | --- 20 | {{end}} 21 | {{- end -}} -------------------------------------------------------------------------------- /test/podmontest/deploy/templates/pvc1.yaml: -------------------------------------------------------------------------------- 1 | # yamllint disable-file 2 | # This file is not valid YAML because it is a Helm template 3 | {{- if eq (include "workloadType" .) "pod" -}} 4 | {{- $root := . -}} 5 | {{- range $i, $e := untilStep 0 (int .Values.podmonTest.ndevices) 1 }} 6 | kind: PersistentVolumeClaim 7 | apiVersion: v1 8 | metadata: 9 | name: "pdev{{$i}}" 10 | namespace: "{{ required "Must set namespace" $root.Values.podmonTest.namespace }}" 11 | spec: 12 | accessModes: 13 | - ReadWriteOnce 14 | volumeMode: Block 15 | resources: 16 | requests: 17 | storage: 8Gi 18 | storageClassName: "{{ required "Must set storage class name" $root.Values.podmonTest.storageClassName }}" 19 | --- 20 | {{end}} 21 | {{- end -}} -------------------------------------------------------------------------------- /test/podmontest/deploy/templates/test.yaml: -------------------------------------------------------------------------------- 1 | # yamllint disable-file 2 | # This file is not valid YAML because it is a Helm template 3 | {{- if ne (include "workloadType" .) "vm" }} 4 | apiVersion: v1 5 | kind: ServiceAccount 6 | metadata: 7 | name: podmontest 8 | namespace: {{ required "Must set namespace" .Values.podmonTest.namespace }} 9 | {{- end }} 10 | --- 11 | {{- if eq (include "workloadType" .) "pod" -}} 12 | {{- if eq .Values.podmonTest.deploymentType "statefulset" }} 13 | kind: StatefulSet 14 | {{ end }} 15 | {{- if eq .Values.podmonTest.deploymentType "deployment" }} 16 | kind: Deployment 17 | {{ end }} 18 | apiVersion: apps/v1 19 | metadata: 20 | name: podmontest 21 | namespace: {{ required "Must set namespace" .Values.podmonTest.namespace }} 22 | spec: 23 | selector: 24 | matchLabels: 25 | app: podmontest-{{ .Release.Namespace }} 26 | 27 | {{- if eq .Values.podmonTest.deploymentType "statefulset" }} 28 | serviceName: 2vols 29 | {{ end }} 30 | {{- if gt (int .Values.podmonTest.replicas) 1 }} 31 | replicas: {{ required "Number of replicas" .Values.podmonTest.replicas }} 32 | {{ end }} 33 | template: 34 | metadata: 35 | labels: 36 | app: podmontest-{{ .Release.Namespace }} 37 | podmon.dellemc.com/driver: {{ required "Must set driver label" .Values.podmonTest.driverLabel }} 38 | affinity: affinity 39 | spec: 40 | {{- if ne .Values.podmonTest.zone "" }} 41 | affinity: 42 | nodeAffinity: 43 | requiredDuringSchedulingIgnoredDuringExecution: 44 | nodeSelectorTerms: 45 | - matchExpressions: 46 | - key: "topology.kubernetes.io/zone" 47 | operator: In 48 | values: 49 | - {{.Values.podmonTest.zone}} 50 | {{end}} 51 | {{- if eq .Values.podmonTest.podAffinity true}} 52 | affinity: 53 | podAffinity: 54 | requiredDuringSchedulingIgnoredDuringExecution: 55 | - labelSelector: 56 | matchExpressions: 57 | - key: "app" 58 | operator: In 59 | values: 60 | - podmontest-{{ .Release.Namespace }} 61 | topologyKey: "kubernetes.io/hostname" 62 | {{end}} 63 | tolerations: 64 | - effect: NoExecute 65 | key: node.kubernetes.io/unreachable 66 | operator: Exists 67 | tolerationSeconds: {{ required "Number of seconds to tolerate unreachable taint" .Values.podmonTest.unreachableTolerationSeconds }} 68 | topologySpreadConstraints: 69 | - maxSkew: {{ required "Number of replicas" .Values.podmonTest.replicas }} # 70 | topologyKey: kubernetes.io/hostname 71 | whenUnsatisfiable: DoNotSchedule 72 | labelSelector: 73 | matchLabels: 74 | affinity: affinity 75 | serviceAccount: podmontest 76 | containers: 77 | - name: podmontest 78 | image: {{ required "Must set image for podmonTest" .Values.podmonTest.image }} 79 | imagePullPolicy: IfNotPresent 80 | command: [ "/podmontest" ] 81 | args: 82 | - "-doexit=true" 83 | env: 84 | - name: ROOT_DIR 85 | value: "/" 86 | volumeMounts: 87 | {{- range $i, $e := untilStep 0 (int .Values.podmonTest.nvolumes) 1 }} 88 | - mountPath: /data{{$i}} 89 | name: pvol{{$i}} 90 | {{end}} 91 | volumeDevices: 92 | {{- range $i, $e := untilStep 0 (int .Values.podmonTest.ndevices) 1 }} 93 | - devicePath: /blockdata1{{$i}} 94 | name: pdev{{$i}} 95 | {{end}} 96 | volumes: 97 | {{- range $i, $e := untilStep 0 (int .Values.podmonTest.nvolumes) 1 }} 98 | - name: pvol{{$i}} 99 | persistentVolumeClaim: 100 | claimName: pvol{{$i}} 101 | {{end}} 102 | {{- range $i, $e := untilStep 0 (int .Values.podmonTest.ndevices) 1 }} 103 | - name: pdev{{$i}} 104 | persistentVolumeClaim: 105 | claimName: pdev{{$i}} 106 | {{end}} 107 | {{- end -}} -------------------------------------------------------------------------------- /test/podmontest/deploy/templates/vm-template.yaml: -------------------------------------------------------------------------------- 1 | {{- if eq (include "workloadType" .) "vm" -}} 2 | {{- $root := . -}} 3 | apiVersion: kubevirt.io/v1 4 | kind: VirtualMachine 5 | metadata: 6 | labels: 7 | kubevirt.io/vm: vm-0 8 | name: vm-0 9 | spec: 10 | runStrategy: Always 11 | template: 12 | metadata: 13 | labels: 14 | kubevirt.io/vm: vm-0 15 | podmon.dellemc.com/driver: "{{ required "Must set driver label" $root.Values.vmConfig.driverLabel }}" 16 | spec: 17 | {{- if ne .Values.vmConfig.zone "" }} 18 | affinity: 19 | nodeAffinity: 20 | requiredDuringSchedulingIgnoredDuringExecution: 21 | nodeSelectorTerms: 22 | - matchExpressions: 23 | - key: "topology.kubernetes.io/zone" 24 | operator: In 25 | values: 26 | - {{.Values.vmConfig.zone}} 27 | {{end}} 28 | {{- if eq .Values.vmConfig.podAffinity true}} 29 | affinity: 30 | podAffinity: 31 | requiredDuringSchedulingIgnoredDuringExecution: 32 | - labelSelector: 33 | matchExpressions: 34 | - key: "app" 35 | operator: In 36 | values: 37 | - podmontest-{{ .Release.Namespace }} 38 | topologyKey: "kubernetes.io/hostname" 39 | {{end}} 40 | tolerations: 41 | - effect: NoExecute 42 | key: node.kubernetes.io/unreachable 43 | operator: Exists 44 | tolerationSeconds: {{ required "Number of seconds to tolerate unreachable taint" .Values.vmConfig.unreachableTolerationSeconds }} 45 | domain: 46 | devices: 47 | disks: 48 | - disk: 49 | bus: virtio 50 | name: containerdisk 51 | - disk: 52 | bus: virtio 53 | name: cloudinitdisk 54 | {{- range $j, $e := untilStep 0 (int $root.Values.vmConfig.ndevices) 1 }} 55 | - disk: 56 | bus: virtio 57 | shareable: true 58 | name: block-disk{{$j}} 59 | {{end}} 60 | {{- range $k, $e := untilStep 0 (int $root.Values.vmConfig.nvolumes) 1 }} 61 | - disk: 62 | bus: virtio 63 | shareable: true 64 | name: filesystem-disk{{$k}} 65 | {{end}} 66 | machine: 67 | type: "" 68 | resources: 69 | requests: 70 | memory: 2G 71 | terminationGracePeriodSeconds: 0 72 | volumes: 73 | - containerDisk: 74 | image: kubevirt/fedora-with-test-tooling-container-disk:devel 75 | name: containerdisk 76 | - cloudInitNoCloud: 77 | userData: |- 78 | #cloud-config 79 | password: fedora 80 | chpasswd: { expire: False } 81 | name: cloudinitdisk 82 | {{- range $j, $e := untilStep 0 (int $root.Values.vmConfig.ndevices) 1 }} 83 | - name: block-disk{{$j}} 84 | persistentVolumeClaim: 85 | claimName: vm-block-pvc{{$j}} 86 | {{end}} 87 | {{- range $k, $e := untilStep 0 (int $root.Values.vmConfig.nvolumes) 1 }} 88 | - name: filesystem-disk{{$k}} 89 | persistentVolumeClaim: 90 | claimName: vm-filesystem-pvc{{$k}} 91 | {{end}} 92 | --- 93 | {{- end -}} 94 | -------------------------------------------------------------------------------- /test/podmontest/deploy/values-isilon.yaml: -------------------------------------------------------------------------------- 1 | podmonTest: 2 | image: "$REGISTRY_HOST:$REGISTRY_PORT//podmontest:v0.0.54" 3 | namespace: "podmontest" 4 | driverLabel: csi-isilon 5 | storageClassName: isilon 6 | nvolumes: 1 7 | ndevices: 1 8 | # deploymentType can be "statefulset" or "deployment" 9 | deploymentType: statefulset 10 | # replicas is the number of replicas for deployments or statefulsets 11 | replicas: 1 12 | # set to "true" to locate replicates on the same node 13 | podAffinity: "false" 14 | # zone will restrict node placement by matching node label failure-domain.beta.kubernetes.io/zone 15 | zone: "" 16 | # Number of seconds to tolerate a node unreachable taint 17 | unreachableTolerationSeconds: 300 18 | -------------------------------------------------------------------------------- /test/podmontest/deploy/values-powermax-iscsi.yaml: -------------------------------------------------------------------------------- 1 | podmonTest: 2 | image: "$REGISTRY_HOST:$REGISTRY_PORT//podmontest:v0.0.54" 3 | namespace: "podmontest" 4 | driverLabel: csi-powermax 5 | storageClassName: powermax-iscsi 6 | nvolumes: 1 7 | ndevices: 1 8 | # deploymentType can be "statefulset" or "deployment" 9 | deploymentType: statefulset 10 | # replicas is the number of replicas for deployments or statefulsets 11 | replicas: 1 12 | # set to "true" to locate replicates on the same node 13 | podAffinity: "false" 14 | # zone will restrict node placement by matching node label failure-domain.beta.kubernetes.io/zone 15 | zone: "" 16 | # Number of seconds to tolerate a node unreachable taint 17 | unreachableTolerationSeconds: 300 18 | -------------------------------------------------------------------------------- /test/podmontest/deploy/values-powermax-nfs.yaml: -------------------------------------------------------------------------------- 1 | podmonTest: 2 | image: "$REGISTRY_HOST:$REGISTRY_PORT//podmontest:v0.0.54" 3 | namespace: "podmontest" 4 | driverLabel: csi-powermax 5 | storageClassName: powermax-nfs 6 | nvolumes: 1 7 | ndevices: 1 8 | # deploymentType can be "statefulset" or "deployment" 9 | deploymentType: statefulset 10 | # replicas is the number of replicas for deployments or statefulsets 11 | replicas: 1 12 | # set to "true" to locate replicates on the same node 13 | podAffinity: "false" 14 | # zone will restrict node placement by matching node label failure-domain.beta.kubernetes.io/zone 15 | zone: "" 16 | # Number of seconds to tolerate a node unreachable taint 17 | unreachableTolerationSeconds: 300 18 | -------------------------------------------------------------------------------- /test/podmontest/deploy/values-powermax-nvme.yaml: -------------------------------------------------------------------------------- 1 | podmonTest: 2 | image: "$REGISTRY_HOST:$REGISTRY_PORT//podmontest:v0.0.54" 3 | namespace: "podmontest" 4 | driverLabel: csi-powermax 5 | storageClassName: powermax-nvme 6 | nvolumes: 1 7 | ndevices: 1 8 | # deploymentType can be "statefulset" or "deployment" 9 | deploymentType: statefulset 10 | # replicas is the number of replicas for deployments or statefulsets 11 | replicas: 1 12 | # set to "true" to locate replicates on the same node 13 | podAffinity: "false" 14 | # zone will restrict node placement by matching node label failure-domain.beta.kubernetes.io/zone 15 | zone: "" 16 | # Number of seconds to tolerate a node unreachable taint 17 | unreachableTolerationSeconds: 300 18 | -------------------------------------------------------------------------------- /test/podmontest/deploy/values-powerstore-iscsi.yaml: -------------------------------------------------------------------------------- 1 | podmonTest: 2 | image: "$REGISTRY_HOST:$REGISTRY_PORT//podmontest:v0.0.54" 3 | namespace: "podmontest" 4 | driverLabel: csi-powerstore 5 | storageClassName: powerstore-iscsi 6 | nvolumes: 1 7 | ndevices: 1 8 | # deploymentType can be "statefulset" or "deployment" 9 | deploymentType: statefulset 10 | # replicas is the number of replicas for deployments or statefulsets 11 | replicas: 1 12 | # set to "true" to locate replicates on the same node 13 | podAffinity: "false" 14 | # zone will restrict node placement by matching node label failure-domain.beta.kubernetes.io/zone 15 | zone: "" 16 | # Number of seconds to tolerate a node unreachable taint 17 | unreachableTolerationSeconds: 300 18 | -------------------------------------------------------------------------------- /test/podmontest/deploy/values-powerstore-nfs.yaml: -------------------------------------------------------------------------------- 1 | podmonTest: 2 | image: "$REGISTRY_HOST:$REGISTRY_PORT//podmontest:v0.0.54" 3 | namespace: "podmontest" 4 | driverLabel: csi-powerstore 5 | storageClassName: powerstore-nfs 6 | nvolumes: 1 7 | ndevices: 1 8 | # deploymentType can be "statefulset" or "deployment" 9 | deploymentType: statefulset 10 | # replicas is the number of replicas for deployments or statefulsets 11 | replicas: 1 12 | # set to "true" to locate replicates on the same node 13 | podAffinity: "false" 14 | # zone will restrict node placement by matching node label failure-domain.beta.kubernetes.io/zone 15 | zone: "" 16 | # Number of seconds to tolerate a node unreachable taint 17 | unreachableTolerationSeconds: 300 18 | -------------------------------------------------------------------------------- /test/podmontest/deploy/values-powerstore-nvme.yaml: -------------------------------------------------------------------------------- 1 | podmonTest: 2 | image: "$REGISTRY_HOST:$REGISTRY_PORT//podmontest:v0.0.54" 3 | namespace: "podmontest" 4 | driverLabel: csi-powerstore 5 | storageClassName: powerstore-nvmetcp 6 | nvolumes: 1 7 | ndevices: 1 8 | # deploymentType can be "statefulset" or "deployment" 9 | deploymentType: statefulset 10 | # replicas is the number of replicas for deployments or statefulsets 11 | replicas: 1 12 | # set to "true" to locate replicates on the same node 13 | podAffinity: "false" 14 | # zone will restrict node placement by matching node label failure-domain.beta.kubernetes.io/zone 15 | zone: "" 16 | # Number of seconds to tolerate a node unreachable taint 17 | unreachableTolerationSeconds: 300 18 | -------------------------------------------------------------------------------- /test/podmontest/deploy/values-unity-nfs.yaml: -------------------------------------------------------------------------------- 1 | podmonTest: 2 | image: "registry:port/podmontest:v0.0.54" 3 | namespace: "podmontest" 4 | driverLabel: csi-unity 5 | storageClassName: unity-nfs 6 | nvolumes: 2 7 | ndevices: 0 8 | # deploymentType can be "statefulset" or "deployment" 9 | deploymentType: statefulset 10 | # replicas is the number of replicas for deployments or statefulsets 11 | replicas: 1 12 | # set to "true" to locate replicates on the same node 13 | podAffinity: "false" 14 | # zone will restrict node placement by matching node label failure-domain.beta.kubernetes.io/zone 15 | zone: "" 16 | # Number of seconds to tolerate a node unreachable taint 17 | unreachableTolerationSeconds: 300 18 | -------------------------------------------------------------------------------- /test/podmontest/deploy/values-unity.yaml: -------------------------------------------------------------------------------- 1 | podmonTest: 2 | image: "registry:port/podmontest:v0.0.54" 3 | namespace: "podmontest" 4 | driverLabel: csi-unity 5 | storageClassName: unity-notopo 6 | nvolumes: 1 7 | ndevices: 1 8 | # deploymentType can be "statefulset" or "deployment" 9 | deploymentType: statefulset 10 | # replicas is the number of replicas for deployments or statefulsets 11 | replicas: 1 12 | # set to "true" to locate replicates on the same node 13 | podAffinity: "false" 14 | # zone will restrict node placement by matching node label failure-domain.beta.kubernetes.io/zone 15 | zone: "" 16 | # Number of seconds to tolerate a node unreachable taint 17 | unreachableTolerationSeconds: 300 18 | -------------------------------------------------------------------------------- /test/podmontest/deploy/values-vm.yaml: -------------------------------------------------------------------------------- 1 | vmConfig: 2 | namespace: "vm-namespace" 3 | driverLabel: "csi-powerstore" 4 | storageClassName: "powerstore-nfs" 5 | nvolumes: 2 6 | ndevices: 0 7 | instances: 3 8 | # set to "true" to locate replicates on the same node 9 | podAffinity: false 10 | # zone will restrict node placement by matching node label failure-domain.beta.kubernetes.io/zone 11 | zone: "" 12 | # Number of seconds to tolerate a node unreachable taint 13 | unreachableTolerationSeconds: 300 14 | -------------------------------------------------------------------------------- /test/podmontest/deploy/values-vxflex.yaml: -------------------------------------------------------------------------------- 1 | podmonTest: 2 | image: "registry:port/podmontest:v0.0.54" 3 | namespace: "podmontest" 4 | driverLabel: csi-vxflexos 5 | storageClassName: vxflexos-notopo 6 | nvolumes: 2 7 | ndevices: 0 8 | # deploymentType can be "statefulset" or "deployment" 9 | deploymentType: statefulset 10 | # replicas is the number of replicas for deployments or statefulsets 11 | replicas: 1 12 | # set to "true" to locate replicates on the same node 13 | podAffinity: "false" 14 | # zone will restrict node placement by matching node label failure-domain.beta.kubernetes.io/zone 15 | zone: "" 16 | # Number of seconds to tolerate a node unreachable taint 17 | unreachableTolerationSeconds: 300 18 | -------------------------------------------------------------------------------- /test/podmontest/insi.sh: -------------------------------------------------------------------------------- 1 | #!/bin 2 | # Copyright (c) 2022-2025 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 17 | instances=${instances:-0} 18 | ndevices=${ndevices:-0} 19 | nvolumes=${nvolumes:-0} 20 | zone=${zone:-""} 21 | storageClassName=${storageClassName:-isilon} 22 | PODMONTEST_REGISTRY="$REGISTRY_HOST" 23 | if [ -n "$REGISTRY_PORT" ]; then 24 | PODMONTEST_REGISTRY="$PODMONTEST_REGISTRY:$REGISTRY_PORT" 25 | fi 26 | image="$PODMONTEST_REGISTRY/podmontest:v0.0.58" 27 | prefix="pmti" 28 | replicas=1 29 | deploymentType="statefulset" 30 | driverLabel="csi-isilon" 31 | podAffinity="false" 32 | unreachableTolerationSeconds=300 33 | workloadType=${workloadType:-"pod"} 34 | 35 | if [ "$DEBUG"x != "x" ]; then 36 | DEBUG="--dry-run --debug" 37 | fi 38 | 39 | for param in $* 40 | do 41 | case $param in 42 | "--instances") 43 | shift 44 | instances=$1 45 | shift 46 | ;; 47 | "--ndevices") 48 | shift 49 | ndevices=$1 50 | shift 51 | ;; 52 | "--nvolumes") 53 | shift 54 | nvolumes=$1 55 | shift 56 | ;; 57 | "--prefix") 58 | shift 59 | prefix=$1 60 | shift 61 | ;; 62 | "--storage-class") 63 | shift 64 | storageClassName=$1 65 | shift 66 | ;; 67 | "--replicas") 68 | shift 69 | replicas=$1 70 | shift 71 | ;; 72 | "--podAffinity") 73 | podAffinity="true" 74 | shift 75 | ;; 76 | "--deployment") 77 | deploymentType="deployment" 78 | shift 79 | ;; 80 | "--unreachableTolerationSeconds") 81 | shift 82 | unreachableTolerationSeconds=$1 83 | shift 84 | ;; 85 | "--label") 86 | shift 87 | driverLabel=$1 88 | shift 89 | ;; 90 | "--workload-type") 91 | shift 92 | workloadType=$1 93 | shift 94 | ;; 95 | esac 96 | 97 | done 98 | 99 | cd "$SCRIPTDIR" 100 | 101 | i=1 102 | while [ $i -le $instances ]; do 103 | 104 | echo $i 105 | kubectl create namespace ${prefix}$i 106 | if [ "$workloadType" == "pod" ]; then 107 | helm install -n "${prefix}$i" "${prefix}$i" "${SCRIPTDIR}"/deploy \ 108 | ${DEBUG} \ 109 | --values deploy/values-isilon.yaml \ 110 | --set podmonTest.namespace="${prefix}$i" \ 111 | --set podmonTest.storageClassName="$storageClassName" \ 112 | --set podmonTest.ndevices=$ndevices \ 113 | --set podmonTest.nvolumes=$nvolumes \ 114 | --set podmonTest.deploymentType=$deploymentType \ 115 | --set podmonTest.replicas=$replicas \ 116 | --set podmonTest.podAffinity=$podAffinity \ 117 | --set podmonTest.unreachableTolerationSeconds=$unreachableTolerationSeconds \ 118 | --set podmonTest.image="$image" \ 119 | --set podmonTest.zone="$zone" \ 120 | --set podmonTest.driverLabel="$driverLabel" 121 | else 122 | helm install -n "${prefix}${i}" "${prefix}${i}" "${SCRIPTDIR}"/deploy \ 123 | ${DEBUG} \ 124 | --values deploy/values-vm.yaml \ 125 | --set vmConfig.namespace="${prefix}${i}" \ 126 | --set vmConfig.storageClassName="$storageClassName" \ 127 | --set vmConfig.ndevices=$ndevices \ 128 | --set vmConfig.nvolumes=$nvolumes \ 129 | --set vmConfig.instances=$instances \ 130 | --set vmConfig.podAffinity=$podAffinity \ 131 | --set vmConfig.unreachableTolerationSeconds=$unreachableTolerationSeconds \ 132 | --set vmConfig.zone="$zone" \ 133 | --set vmConfig.driverLabel="$driverLabel" 134 | fi 135 | i=$((i + 1)) 136 | done 137 | -------------------------------------------------------------------------------- /test/podmontest/inspm.sh: -------------------------------------------------------------------------------- 1 | #!/bin 2 | # Copyright (c) 2023-2025 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 17 | instances=${instances:-0} 18 | ndevices=${ndevices:-0} 19 | nvolumes=${nvolumes:-0} 20 | zone=${zone:-""} 21 | storageClassName=${storageClassName:-powermax-iscsi} 22 | PODMONTEST_REGISTRY="$REGISTRY_HOST" 23 | if [ -n "$REGISTRY_PORT" ]; then 24 | PODMONTEST_REGISTRY="$PODMONTEST_REGISTRY:$REGISTRY_PORT" 25 | fi 26 | image="$PODMONTEST_REGISTRY/podmontest:v0.0.58" 27 | prefix="pmtpm" 28 | replicas=1 29 | deploymentType="statefulset" 30 | driverLabel="csi-powermax" 31 | podAffinity="false" 32 | unreachableTolerationSeconds=300 33 | workloadType=${workloadType:-"pod"} 34 | 35 | if [ "$DEBUG"x != "x" ]; then 36 | DEBUG="--dry-run --debug" 37 | fi 38 | 39 | for param in $* 40 | do 41 | case $param in 42 | "--instances") 43 | shift 44 | instances=$1 45 | shift 46 | ;; 47 | "--ndevices") 48 | shift 49 | ndevices=$1 50 | shift 51 | ;; 52 | "--nvolumes") 53 | shift 54 | nvolumes=$1 55 | shift 56 | ;; 57 | "--prefix") 58 | shift 59 | prefix=$1 60 | shift 61 | ;; 62 | "--storage-class") 63 | shift 64 | storageClassName=$1 65 | shift 66 | ;; 67 | "--replicas") 68 | shift 69 | replicas=$1 70 | shift 71 | ;; 72 | "--podAffinity") 73 | podAffinity="true" 74 | shift 75 | ;; 76 | "--deployment") 77 | deploymentType="deployment" 78 | shift 79 | ;; 80 | "--unreachableTolerationSeconds") 81 | shift 82 | unreachableTolerationSeconds=$1 83 | shift 84 | ;; 85 | "--label") 86 | shift 87 | driverLabel=$1 88 | shift 89 | ;; 90 | "--workload-type") 91 | 92 | shift 93 | workloadType=$1 94 | shift 95 | ;; 96 | esac 97 | 98 | done 99 | 100 | cd "$SCRIPTDIR" 101 | 102 | i=1 103 | while [ $i -le $instances ]; do 104 | echo $i 105 | kubectl create namespace ${prefix}$i 106 | if [ "$workloadType" == "pod" ]; then 107 | helm install -n "${prefix}$i" "${prefix}$i" "${SCRIPTDIR}"/deploy \ 108 | ${DEBUG} \ 109 | --values deploy/values-powermax-iscsi.yaml \ 110 | --set podmonTest.namespace="${prefix}$i" \ 111 | --set podmonTest.storageClassName="$storageClassName" \ 112 | --set podmonTest.ndevices=$ndevices \ 113 | --set podmonTest.nvolumes=$nvolumes \ 114 | --set podmonTest.deploymentType=$deploymentType \ 115 | --set podmonTest.replicas=$replicas \ 116 | --set podmonTest.podAffinity=$podAffinity \ 117 | --set podmonTest.unreachableTolerationSeconds=$unreachableTolerationSeconds \ 118 | --set podmonTest.image="$image" \ 119 | --set podmonTest.zone="$zone" \ 120 | --set podmonTest.driverLabel="$driverLabel" 121 | else 122 | helm install -n "${prefix}${i}" "${prefix}${i}" "${SCRIPTDIR}"/deploy \ 123 | ${DEBUG} \ 124 | --values deploy/values-vm.yaml \ 125 | --set vmConfig.namespace="${prefix}${i}" \ 126 | --set vmConfig.storageClassName="$storageClassName" \ 127 | --set vmConfig.ndevices=$ndevices \ 128 | --set vmConfig.nvolumes=$nvolumes \ 129 | --set vmConfig.instances=$instances \ 130 | --set vmConfig.podAffinity=$podAffinity \ 131 | --set vmConfig.unreachableTolerationSeconds=$unreachableTolerationSeconds \ 132 | --set vmConfig.zone="$zone" \ 133 | --set vmConfig.driverLabel="$driverLabel" 134 | fi 135 | i=$((i + 1)) 136 | 137 | done 138 | -------------------------------------------------------------------------------- /test/podmontest/insps.sh: -------------------------------------------------------------------------------- 1 | #!/bin 2 | # Copyright (c) 2023-2025 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 17 | instances=${instances:-0} 18 | ndevices=${ndevices:-0} 19 | nvolumes=${nvolumes:-0} 20 | zone=${zone:-""} 21 | storageClassName=${storageClassName:-powerstore-nfs} 22 | PODMONTEST_REGISTRY="$REGISTRY_HOST" 23 | if [ -n "$REGISTRY_PORT" ]; then 24 | PODMONTEST_REGISTRY="$PODMONTEST_REGISTRY:$REGISTRY_PORT" 25 | fi 26 | image="$PODMONTEST_REGISTRY/podmontest:v0.0.58" 27 | prefix="pmtps" 28 | replicas=1 29 | deploymentType="statefulset" 30 | driverLabel="csi-powerstore" 31 | podAffinity="false" 32 | unreachableTolerationSeconds=300 33 | workloadType=${workloadType:-"pod"} 34 | 35 | if [ "$DEBUG"x != "x" ]; then 36 | DEBUG="--dry-run --debug" 37 | fi 38 | 39 | for param in $* 40 | do 41 | case $param in 42 | "--instances") 43 | shift 44 | instances=$1 45 | shift 46 | ;; 47 | "--ndevices") 48 | shift 49 | ndevices=$1 50 | shift 51 | ;; 52 | "--nvolumes") 53 | shift 54 | nvolumes=$1 55 | shift 56 | ;; 57 | "--prefix") 58 | shift 59 | prefix=$1 60 | shift 61 | ;; 62 | "--storage-class") 63 | shift 64 | storageClassName=$1 65 | shift 66 | ;; 67 | "--replicas") 68 | shift 69 | replicas=$1 70 | shift 71 | ;; 72 | "--podAffinity") 73 | podAffinity="true" 74 | shift 75 | ;; 76 | "--deployment") 77 | deploymentType="deployment" 78 | shift 79 | ;; 80 | "--unreachableTolerationSeconds") 81 | shift 82 | unreachableTolerationSeconds=$1 83 | shift 84 | ;; 85 | "--label") 86 | shift 87 | driverLabel=$1 88 | shift 89 | ;; 90 | "--workload-type") 91 | shift 92 | workloadType=$1 93 | shift 94 | ;; 95 | esac 96 | 97 | done 98 | 99 | cd "$SCRIPTDIR" 100 | 101 | i=1 102 | while [ $i -le $instances ]; do 103 | echo $i 104 | kubectl create namespace ${prefix}$i 105 | if [ "$workloadType" == "pod" ]; then 106 | helm install -n "${prefix}${i}" "${prefix}${i}" "${SCRIPTDIR}"/deploy \ 107 | ${DEBUG} \ 108 | --values deploy/values-powerstore-nfs.yaml \ 109 | --set podmonTest.namespace="${prefix}$i" \ 110 | --set podmonTest.storageClassName="$storageClassName" \ 111 | --set podmonTest.ndevices=$ndevices \ 112 | --set podmonTest.nvolumes=$nvolumes \ 113 | --set podmonTest.deploymentType=$deploymentType \ 114 | --set podmonTest.replicas=$replicas \ 115 | --set podmonTest.podAffinity=$podAffinity \ 116 | --set podmonTest.unreachableTolerationSeconds=$unreachableTolerationSeconds \ 117 | --set podmonTest.image="$image" \ 118 | --set podmonTest.zone="$zone" \ 119 | --set podmonTest.driverLabel="$driverLabel" 120 | else 121 | helm install -n "${prefix}${i}" "${prefix}${i}" "${SCRIPTDIR}"/deploy \ 122 | ${DEBUG} \ 123 | --values deploy/values-vm.yaml \ 124 | --set vmConfig.namespace="${prefix}${i}" \ 125 | --set vmConfig.storageClassName="$storageClassName" \ 126 | --set vmConfig.ndevices=$ndevices \ 127 | --set vmConfig.nvolumes=$nvolumes \ 128 | --set vmConfig.instances=$instances \ 129 | --set vmConfig.podAffinity=$podAffinity \ 130 | --set vmConfig.unreachableTolerationSeconds=$unreachableTolerationSeconds \ 131 | --set vmConfig.zone="$zone" \ 132 | --set vmConfig.driverLabel="$driverLabel" 133 | fi 134 | i=$((i + 1)) 135 | done 136 | 137 | -------------------------------------------------------------------------------- /test/podmontest/insu.sh: -------------------------------------------------------------------------------- 1 | #!/bin 2 | # Copyright (c) 2021-2025 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 17 | instances=${instances:-4} 18 | ndevices=${ndevices:-0} 19 | nvolumes=${nvolumes:-4} 20 | zone=${zone:-""} 21 | storageClassName=${storageClassName:-unity-nfs} 22 | PODMONTEST_REGISTRY="$REGISTRY_HOST" 23 | if [ -n "$REGISTRY_PORT" ]; then 24 | PODMONTEST_REGISTRY="$PODMONTEST_REGISTRY:$REGISTRY_PORT" 25 | fi 26 | image="$PODMONTEST_REGISTRY/podmontest:v0.0.58" 27 | prefix="pmtu" 28 | replicas=1 29 | deploymentType="statefulset" 30 | driverLabel="csi-unity" 31 | podAffinity="false" 32 | unreachableTolerationSeconds=300 33 | workloadType=${workloadType:-"pod"} 34 | 35 | if [ "$DEBUG"x != "x" ]; then 36 | DEBUG="--dry-run --debug" 37 | fi 38 | 39 | for param in $* 40 | do 41 | case $param in 42 | "--instances") 43 | shift 44 | instances=$1 45 | shift 46 | ;; 47 | "--ndevices") 48 | shift 49 | ndevices=$1 50 | shift 51 | ;; 52 | "--nvolumes") 53 | shift 54 | nvolumes=$1 55 | shift 56 | ;; 57 | "--prefix") 58 | shift 59 | prefix=$1 60 | shift 61 | ;; 62 | "--storage-class") 63 | shift 64 | storageClassName=$1 65 | shift 66 | ;; 67 | "--replicas") 68 | shift 69 | replicas=$1 70 | shift 71 | ;; 72 | "--podAffinity") 73 | podAffinity="true" 74 | shift 75 | ;; 76 | "--deployment") 77 | deploymentType="deployment" 78 | shift 79 | ;; 80 | "--unreachableTolerationSeconds") 81 | shift 82 | unreachableTolerationSeconds=$1 83 | shift 84 | ;; 85 | "--label") 86 | shift 87 | driverLabel=$1 88 | shift 89 | ;; 90 | "--workload-type") 91 | shift 92 | workloadType=$1 93 | shift 94 | ;; 95 | esac 96 | done 97 | 98 | cd "$SCRIPTDIR" 99 | 100 | i=1 101 | while [ $i -le $instances ]; do 102 | echo $i 103 | kubectl create namespace ${prefix}$i 104 | if [ "$workloadType" == "pod" ]; then 105 | helm install -n "${prefix}$i" "${prefix}$i" "${SCRIPTDIR}"/deploy \ 106 | ${DEBUG} \ 107 | --values deploy/values-unity-nfs.yaml \ 108 | --set podmonTest.namespace="${prefix}$i" \ 109 | --set podmonTest.storageClassName="$storageClassName" \ 110 | --set podmonTest.ndevices=$ndevices \ 111 | --set podmonTest.nvolumes=$nvolumes \ 112 | --set podmonTest.deploymentType=$deploymentType \ 113 | --set podmonTest.replicas=$replicas \ 114 | --set podmonTest.podAffinity=$podAffinity \ 115 | --set podmonTest.unreachableTolerationSeconds=$unreachableTolerationSeconds \ 116 | --set podmonTest.image="$image" \ 117 | --set podmonTest.zone="$zone" \ 118 | --set podmonTest.driverLabel="$driverLabel" 119 | else 120 | helm install -n "${prefix}${i}" "${prefix}${i}" "${SCRIPTDIR}"/deploy \ 121 | ${DEBUG} \ 122 | --values deploy/values-vm.yaml \ 123 | --set vmConfig.namespace="${prefix}${i}" \ 124 | --set vmConfig.storageClassName="$storageClassName" \ 125 | --set vmConfig.ndevices=$ndevices \ 126 | --set vmConfig.nvolumes=$nvolumes \ 127 | --set vmConfig.instances=$instances \ 128 | --set vmConfig.podAffinity=$podAffinity \ 129 | --set vmConfig.unreachableTolerationSeconds=$unreachableTolerationSeconds \ 130 | --set vmConfig.zone="$zone" \ 131 | --set vmConfig.driverLabel="$driverLabel" 132 | fi 133 | i=$((i + 1)) 134 | done 135 | -------------------------------------------------------------------------------- /test/podmontest/insv.sh: -------------------------------------------------------------------------------- 1 | #!/bin 2 | # Copyright (c) 2021-2025 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" 17 | instances=${instances:-0} 18 | ndevices=${ndevices:-0} 19 | nvolumes=${nvolumes:-0} 20 | zone=${zone:-""} 21 | storageClassName=${storageClassName:-vxflexos-retain} 22 | PODMONTEST_REGISTRY="$REGISTRY_HOST" 23 | if [ -n "$REGISTRY_PORT" ]; then 24 | PODMONTEST_REGISTRY="$PODMONTEST_REGISTRY:$REGISTRY_PORT" 25 | fi 26 | image="$PODMONTEST_REGISTRY/podmontest:v0.0.58" 27 | prefix="pmtv" 28 | replicas=1 29 | deploymentType="statefulset" 30 | driverLabel="csi-vxflexos" 31 | podAffinity="false" 32 | unreachableTolerationSeconds=300 33 | workloadType=${workloadType:-"pod"} 34 | 35 | if [ "$DEBUG"x != "x" ]; then 36 | DEBUG="--dry-run --debug" 37 | fi 38 | 39 | for param in $* 40 | do 41 | case $param in 42 | "--instances") 43 | shift 44 | instances=$1 45 | shift 46 | ;; 47 | "--ndevices") 48 | shift 49 | ndevices=$1 50 | shift 51 | ;; 52 | "--nvolumes") 53 | shift 54 | nvolumes=$1 55 | shift 56 | ;; 57 | "--prefix") 58 | shift 59 | prefix=$1 60 | shift 61 | ;; 62 | "--storage-class") 63 | shift 64 | storageClassName=$1 65 | shift 66 | ;; 67 | "--replicas") 68 | shift 69 | replicas=$1 70 | shift 71 | ;; 72 | "--podAffinity") 73 | podAffinity="true" 74 | shift 75 | ;; 76 | "--deployment") 77 | deploymentType="deployment" 78 | shift 79 | ;; 80 | "--unreachableTolerationSeconds") 81 | shift 82 | unreachableTolerationSeconds=$1 83 | shift 84 | ;; 85 | "--label") 86 | shift 87 | driverLabel=$1 88 | shift 89 | ;; 90 | 91 | "--workload-type") 92 | shift 93 | workloadType=$1 94 | shift 95 | ;; 96 | esac 97 | 98 | done 99 | 100 | cd "$SCRIPTDIR" 101 | 102 | i=1 103 | while [ $i -le $instances ]; do 104 | echo $i 105 | kubectl create namespace ${prefix}$i 106 | if [ "$workloadType" == "pod" ]; then 107 | helm install -n "${prefix}$i" "${prefix}$i" deploy \ 108 | ${DEBUG} \ 109 | --values deploy/values-vxflex.yaml \ 110 | --set podmonTest.namespace="${prefix}$i" \ 111 | --set podmonTest.storageClassName="$storageClassName" \ 112 | --set podmonTest.ndevices=$ndevices \ 113 | --set podmonTest.nvolumes=$nvolumes \ 114 | --set podmonTest.deploymentType=$deploymentType \ 115 | --set podmonTest.replicas=$replicas \ 116 | --set podmonTest.podAffinity=$podAffinity \ 117 | --set podmonTest.unreachableTolerationSeconds=$unreachableTolerationSeconds \ 118 | --set podmonTest.image="$image" \ 119 | --set podmonTest.zone="$zone" \ 120 | --set podmonTest.driverLabel="$driverLabel" 121 | else 122 | helm install -n "${prefix}${i}" "${prefix}${i}" "${SCRIPTDIR}"/deploy \ 123 | ${DEBUG} \ 124 | --values deploy/values-vm.yaml \ 125 | --set vmConfig.namespace="${prefix}${i}" \ 126 | --set vmConfig.storageClassName="$storageClassName" \ 127 | --set vmConfig.ndevices=$ndevices \ 128 | --set vmConfig.nvolumes=$nvolumes \ 129 | --set vmConfig.instances=$instances \ 130 | --set vmConfig.podAffinity=$podAffinity \ 131 | --set vmConfig.unreachableTolerationSeconds=$unreachableTolerationSeconds \ 132 | --set vmConfig.zone="$zone" \ 133 | --set vmConfig.driverLabel="$driverLabel" 134 | fi 135 | i=$((i + 1)) 136 | 137 | done 138 | -------------------------------------------------------------------------------- /test/podmontest/podmontest.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2023 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | "bufio" 21 | "crypto/rand" 22 | "flag" 23 | "fmt" 24 | "os" 25 | "path/filepath" 26 | "strings" 27 | "time" 28 | ) 29 | 30 | // TAGSIZE standard size for a pod tab 31 | const TAGSIZE = 16 32 | 33 | // InitialPod is the prefix for the initial-pod tag line 34 | const InitialPod = "initial-pod" 35 | 36 | var ( 37 | rootDir = "/" 38 | enableDoExit bool 39 | blockFiles map[string]*os.File 40 | ) 41 | 42 | func main() { 43 | var err error 44 | fmt.Printf("hello world\n") 45 | flag.BoolVar(&enableDoExit, "doexit", false, "enables exit if I/O error") 46 | flag.Parse() 47 | blockFiles = make(map[string]*os.File) 48 | podTag := make([]byte, TAGSIZE) 49 | _, err = rand.Read(podTag) 50 | if err != nil { 51 | fmt.Printf("Couldn't generate podTag: %s", err.Error()) 52 | } 53 | rootDir := os.Getenv("ROOT_DIR") 54 | initialPod := readExistingEntries(rootDir) 55 | fmt.Printf("initialPod: %t\n", initialPod) 56 | for i := 0; ; i++ { 57 | makeEntry(string(podTag), rootDir, i, initialPod) 58 | } 59 | } 60 | 61 | // Returns true if initial pod instance 62 | func readExistingEntries(rootDir string) bool { 63 | var timeSamples int 64 | var prevTime time.Time 65 | var computeTimeDelta bool 66 | var key string 67 | printed := make(map[string]bool) 68 | reportedOtherKeys := make(map[string]bool) 69 | initialPod := true 70 | 71 | entries, err := os.ReadDir(rootDir) 72 | if err != nil { 73 | fmt.Printf("Couldn't read %s\n", rootDir) 74 | return true 75 | } 76 | for _, entry := range entries { 77 | if strings.HasPrefix(entry.Name(), "data") { 78 | f, err := os.OpenFile(filepath.Clean(rootDir+"/"+entry.Name()+"/log"), os.O_RDONLY, 0o600) 79 | if err != nil { 80 | fmt.Printf("Couldn't open %s %s\n", entry.Name(), err.Error()) 81 | continue 82 | } 83 | initialPod := false 84 | scanner := bufio.NewScanner(f) 85 | for scanner.Scan() { 86 | line := scanner.Text() 87 | // fmt.Printf("line: %s\n", line) 88 | if line == "" { 89 | key = "" 90 | computeTimeDelta = true 91 | continue 92 | } 93 | if strings.HasPrefix(line, InitialPod) { 94 | fmt.Printf("%s\n", line) 95 | continue 96 | } 97 | parts := strings.SplitN(line, " ", 2) 98 | if key == "" { 99 | key = parts[0] 100 | } 101 | if key != parts[0] && !reportedOtherKeys[parts[0]] { 102 | fmt.Printf("mixed keys (could be due to replicas on same node): %s and %s\n", key, parts[0]) 103 | reportedOtherKeys[parts[0]] = true 104 | } 105 | if !printed[key] { 106 | fmt.Printf("%s\n", line) 107 | printed[key] = true 108 | } 109 | if len(parts) < 2 { 110 | // Should have a pod id and a time as separate parts 111 | continue 112 | } 113 | time, err := time.Parse(time.Stamp, parts[1]) 114 | if err != nil { 115 | fmt.Printf("ERROR: could not parse time %s\n", parts[1]) 116 | continue 117 | } 118 | if computeTimeDelta && !prevTime.IsZero() && len(parts) > 0 { 119 | timeSamples = timeSamples + 1 120 | delta := time.Sub(prevTime) 121 | fmt.Printf("%s: delta time seconds %s\t%d\t%.0f\n", parts[1], key, timeSamples, delta.Seconds()) 122 | computeTimeDelta = false 123 | prevTime = time 124 | } 125 | prevTime = time 126 | } 127 | if err := scanner.Err(); err != nil { 128 | fmt.Printf("ERROR scannning %s\n", entry.Name()) 129 | } 130 | err = f.Close() 131 | if err != nil { 132 | fmt.Printf("closing file %s: %v", f.Name(), err) 133 | } 134 | return initialPod 135 | } 136 | } 137 | return initialPod 138 | } 139 | 140 | var counter int 141 | 142 | func makeEntry(podTag, rootDir string, index int, initialPod bool) { 143 | tag := fmt.Sprintf("%x %s\n", podTag, time.Now().Format(time.Stamp)) 144 | entries, err := os.ReadDir(rootDir) 145 | if err != nil { 146 | fmt.Printf("Couldn't read %s\n", rootDir) 147 | return 148 | } 149 | logged := false 150 | doExit := false 151 | for _, entry := range entries { 152 | if strings.HasPrefix(entry.Name(), "data") { 153 | f, err := os.OpenFile(filepath.Clean(rootDir+"/"+entry.Name()+"/log"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o600) 154 | if err != nil { 155 | fmt.Printf("Couldn't open %s %s\n", entry.Name(), err.Error()) 156 | doExit = true 157 | continue 158 | } 159 | if index == 0 { 160 | if initialPod { 161 | _, err := f.WriteString(InitialPod + " " + tag) 162 | if err != nil { 163 | fmt.Printf("writing to %s: %v", f.Name(), err) 164 | } 165 | fmt.Printf("%s %s\n", InitialPod, tag) 166 | } 167 | _, err := f.WriteString("\n") 168 | if err != nil { 169 | fmt.Printf("writing to %s :%v", f.Name(), err) 170 | } 171 | } 172 | _, err = f.WriteString(tag) 173 | if err != nil { 174 | doExit = true 175 | fmt.Printf("Couldn't write %s %s", entry.Name(), err.Error()) 176 | } 177 | err = f.Sync() 178 | if err != nil { 179 | doExit = true 180 | fmt.Printf("Couldn't sync %s %s", entry.Name(), err.Error()) 181 | } 182 | err = f.Close() 183 | if err != nil { 184 | fmt.Printf("closing file %s: %v", f.Name(), err) 185 | } 186 | if !logged { 187 | if (counter % 10) == 0 { 188 | fmt.Print(tag) 189 | logged = true 190 | } 191 | } 192 | } 193 | if strings.HasPrefix(entry.Name(), "blockdata") { 194 | var f *os.File 195 | if index == 0 { 196 | f, err = os.OpenFile(filepath.Clean(rootDir+"/"+entry.Name()), os.O_WRONLY, 0o600) 197 | if err != nil { 198 | fmt.Printf("Couldn't open %s %s\n", entry.Name(), err.Error()) 199 | } 200 | blockFiles[entry.Name()] = f 201 | } else { 202 | f = blockFiles[entry.Name()] 203 | } 204 | _, err := f.WriteString(tag) 205 | if err != nil { 206 | fmt.Printf("couldn't write %s: %v\n", tag, err) 207 | } 208 | err = f.Sync() 209 | if err != nil { 210 | doExit = true 211 | fmt.Printf("Couldn't sync %s %s\n", entry.Name(), err.Error()) 212 | } 213 | } 214 | } 215 | if enableDoExit && doExit { 216 | fmt.Printf("Exiting due to I/O error\n") 217 | os.Exit(2) 218 | } 219 | counter++ 220 | } 221 | -------------------------------------------------------------------------------- /test/podmontest/uns.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright (c) 2021-2022 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | instances=${instances:-4} 17 | prefix="pmt" 18 | remove_all="" 19 | start=${start:-1} 20 | 21 | for param in $* 22 | do 23 | case $param in 24 | "--instances") 25 | shift 26 | instances=$1 27 | shift 28 | ;; 29 | "--prefix") 30 | shift 31 | prefix=$1 32 | shift 33 | ;; 34 | "--all") 35 | shift 36 | remove_all=$1 37 | shift 38 | ;; 39 | "--start") 40 | shift 41 | start=$1 42 | shift 43 | ;; 44 | esac 45 | done 46 | 47 | if [ "$remove_all"x != "x" ]; then 48 | instances=$(kubectl get pods -l "podmon.dellemc.com/driver=csi-${remove_all}" -A | grep -c "$prefix") 49 | fi 50 | 51 | i=$start 52 | while [ $i -le $instances ]; do 53 | helm delete -n "${prefix}"$i "${prefix}"$i & 54 | i=$((i + 1)) 55 | done 56 | wait 57 | 58 | i=1 59 | while [ $i -le $instances ]; do 60 | kubectl delete namespace "${prefix}"$i & 61 | i=$((i + 1)) 62 | done 63 | wait 64 | -------------------------------------------------------------------------------- /test/proxy/README.md: -------------------------------------------------------------------------------- 1 | General-purpose https reverse proxy. 2 | 3 | # Certificates 4 | This tool generates `cert.pem` and `key.pem` in the working directory. Do not commit these files to source control. 5 | 6 | # Flags 7 | `--addr` - backend server (e.g., https://10.0.0.1) 8 | 9 | # Running 10 | ## Go Run 11 | `go run main.go --addr https://10.0.0.1` 12 | 13 | ## Build and Run 14 | ``` 15 | go build -o proxy main.go 16 | ./proxy --addr https://10.0.0.1 17 | ``` 18 | 19 | The proxy will be running on port 8080 on the machine. 20 | 21 | # Driver Configuration 22 | The endpoint in the driver secret (e.g., vxflexos-config) must be the address of the machine running the reverse proxy on port 8080. If you are running the proxy on `10.2.2.2`, the endpoint in the driver secret should be `https://10.2.2.2:8080`. 23 | -------------------------------------------------------------------------------- /test/proxy/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/rand" 5 | "crypto/rsa" 6 | "crypto/tls" 7 | "crypto/x509" 8 | "crypto/x509/pkix" 9 | "encoding/pem" 10 | "flag" 11 | "fmt" 12 | "math/big" 13 | "net/http" 14 | "net/http/httputil" 15 | "net/url" 16 | "os" 17 | "time" 18 | ) 19 | 20 | func main() { 21 | addr := flag.String("addr", "", "server address") 22 | flag.Parse() 23 | 24 | err := generateX509Certificate() 25 | if err != nil { 26 | fmt.Println(err) 27 | return 28 | } 29 | 30 | u, err := url.Parse(*addr) 31 | if err != nil { 32 | fmt.Println(err) 33 | return 34 | } 35 | 36 | rp := httputil.NewSingleHostReverseProxy(u) 37 | rp.Transport = &http.Transport{ 38 | TLSClientConfig: &tls.Config{ 39 | InsecureSkipVerify: true, // #nosec G402 40 | }, 41 | } 42 | 43 | svr := &http.Server{ 44 | Addr: ":8080", 45 | Handler: rp, 46 | TLSConfig: &tls.Config{ 47 | InsecureSkipVerify: true, // #nosec G402 48 | }, 49 | ReadHeaderTimeout: 5 * time.Second, 50 | } 51 | 52 | err = svr.ListenAndServeTLS("cert.pem", "key.pem") 53 | if err != nil { 54 | fmt.Println(err) 55 | return 56 | } 57 | } 58 | 59 | func generateX509Certificate() error { 60 | // Generate the private key. 61 | key, err := rsa.GenerateKey(rand.Reader, 2048) 62 | if err != nil { 63 | return fmt.Errorf("generating private key: %w", err) 64 | } 65 | 66 | // Use the private key to generate a PEM block. 67 | keyPem := pem.EncodeToMemory(&pem.Block{ 68 | Type: "RSA PRIVATE KEY", 69 | Bytes: x509.MarshalPKCS1PrivateKey(key), 70 | }) 71 | 72 | err = os.WriteFile("key.pem", keyPem, 0o600) 73 | if err != nil { 74 | return fmt.Errorf("writing key.pem: %w", err) 75 | } 76 | 77 | // Generate the certificate. 78 | serial, err := rand.Int(rand.Reader, big.NewInt(2048)) 79 | if err != nil { 80 | return fmt.Errorf("getting random number: %w", err) 81 | } 82 | tml := x509.Certificate{ 83 | SerialNumber: serial, 84 | Subject: pkix.Name{ 85 | CommonName: "array-proxy", 86 | Organization: []string{"Dell"}, 87 | }, 88 | BasicConstraintsValid: true, 89 | } 90 | cert, err := x509.CreateCertificate(rand.Reader, &tml, &tml, &key.PublicKey, key) 91 | if err != nil { 92 | return fmt.Errorf("creating certificate: %w", err) 93 | } 94 | 95 | // Use the certificate to generate a PEM block. 96 | certPem := pem.EncodeToMemory(&pem.Block{ 97 | Type: "CERTIFICATE", 98 | Bytes: cert, 99 | }) 100 | 101 | err = os.WriteFile("cert.pem", certPem, 0o600) 102 | if err != nil { 103 | return fmt.Errorf("writing cert.pem: %w", err) 104 | } 105 | return nil 106 | } 107 | -------------------------------------------------------------------------------- /test/sh/SCALE_TEST.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Scale Testing 18 | 19 | This page describes a script based facility for running scalability testing. Currently, it supports PowerFlex, Unity, PowerScale, PowerStore and PowerMax. 20 | 21 | It comprises multiple scripts that work together. The top level script is _scaleup-powerflex.sh_ / _scaleup-unity.sh_ / _scaleup-powerscale.sh_ / _scaleup-powerstore.sh_ / _scaleup-powermax.sh_. 22 | It uses the scripts in podmontest _insv.sh_ and _uns.sh_ to deploy or terminate pods/VMs. 23 | The number of pods/VMs deployed is configurable, and the scaleup scripts starts at a small scale 24 | and gradually scales up the number of deployed pods/VMs from a minimal amount to the maximum number of protected 25 | pods/VMs to be tested. While running the scaleup scripts, the caller can specify the maximum number of pods/VMs to be scaled using --maxinstances and to run VM workloads --isvirtualization should be enabled. 26 | 27 | At each number of pods/VMs to be tested, scaleup-powerflex.sh/scaleup-unity.sh/scaleup-powerscale.sh/scaleup-powerstore.sh invokes _nway.sh_ which runs the actual testing. 28 | Nway.sh provides up to 10 groups of nodes that are failed as a unit- so you can divide your cluster into 29 | any number of groups between 2 and 10. These are configured in the NODELIST1... NODELIST10 variables. 30 | The test fails each configured group in a successive iteration (empty groups are skipped). 31 | When invoked, the caller can specify how long the interfaces are to be down using --bounceipsec argument, 32 | the maximum number of iterations to do using --maxiterations, the timeout value for an iteration using 33 | --timeoutseconds, the workload type to be tested either vm/pod using --workload-type, the username and password of nodes for copying scripts into them using --node-user and --password. 34 | 35 | As mentioned above either we can specify values while invoking scripts or we can export them before running scripts as mentioned below: 36 | 37 | export REGISTRY_PORT='5000' 38 | export REGISTRY_HOST='10.247.98.98' 39 | export MAXINSTANCES= 40 | export ISVIRTUALIZATION=true/false 41 | export NODE_USER=core/root 42 | export PASSWORD= 43 | 44 | 45 | Each iteration is divided into three phases: 46 | 47 | 1. Evacuation of pods/VMs from nodes that are down. At the end of this phase a message is printed similar to 48 | "movedPods: 4 evacuation time seconds: 30". This allows you to determine how long until all pods were rescheduled. 49 | 50 | 2. Waiting on the pods/VMs that were rescheduled to reach a running state. At the end of this phase a message 51 | similar to "moving pods: 4 time for pod recovery seconds: 70" is printed. This is the time from the initiation of 52 | node failure until all the pods/VMs were moved and reach the running state again. This is the metric generally used 53 | for scalability, that plots on the X-axis the number of pods that were impacted, and the Y-axis the time until all 54 | pods were recovered. 55 | 56 | 3. Waiting on the taints to be removed from the failed nodes. At the end of this phase a message is printed 57 | that gives the length of time after the interfaces have been restored to operational state until all the taints 58 | have been removed (indicating the nodes are cleaned up.) 59 | 60 | At the end of an iteration after a 60 second delay, the status of all the protected pods is displayed, and plot will be generated for Number of Instances vs. Time Taken for Recovery of pods 61 | and if any pods are not running nway.sh exits. 62 | 63 | To generate plot we need to install below packages: 64 | yum install -y python3-pip 65 | pip3 install pandas matplotlib 66 | 67 | If the script times out, the collect_logs.sh script is called to collect all the logs necessary to analyze the potential failure. 68 | 69 | -------------------------------------------------------------------------------- /test/sh/basic.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright (c) 2021-2022 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # This is a basic test that cycles through a set of listed nodes and kills them, making sure all the labeled apps continue to run after time for recovery. 17 | NODELIST="" 18 | APP_RECOVERY_TIME=540 19 | NODE_DELAY_TIME=30 20 | REBOOT=off 21 | COUNT=0 22 | 23 | 24 | check_running() { 25 | kubectl get pods -l podmon.dellemc.com/driver -A -o wide 26 | non_running_pods=$(kubectl get pods -l podmon.dellemc.com/driver -A -o wide | grep -v Running | wc -l) 27 | # account for header 28 | if [ $non_running_pods -gt 1 ]; then 29 | echo "some applications not running- terminating test" 30 | exit 2 31 | fi 32 | return 0 33 | } 34 | check_running 35 | 36 | while true 37 | do 38 | for node in $NODELIST 39 | do 40 | # kill node 41 | date 42 | COUNT=$( expr $COUNT + 1) 43 | echo bouncing $node $COUNT 44 | ssh $node nohup sh /root/bounce.ip & 45 | if [ $REBOOT = "on" ] 46 | then 47 | sleep 60 48 | ssh $node nohup sh reboot.node 49 | fi 50 | sleep $APP_RECOVERY_TIME 51 | date 52 | check_running 53 | sleep $NODE_DELAY_TIME 54 | done 55 | done 56 | -------------------------------------------------------------------------------- /test/sh/bounce.ip: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright (c) 2021-2022 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # This script is compatible with RHEL 9+ 17 | # By default, get all 'ensNNN' interfaces 18 | interfaces=$(ip -o link show | awk -F': ' '/ens[0-9]+/ {print $2}') 19 | seconds=240 20 | 21 | for param in $*; do 22 | case $param in 23 | "--seconds") 24 | shift 25 | seconds=$1 26 | shift 27 | ;; 28 | "--interfaces") 29 | shift 30 | interfaces=$1 31 | shift 32 | ;; 33 | esac 34 | done 35 | 36 | echo "$(date +"%Y-%m-%d %H:%M:%S")" "-- invoking with seconds=$seconds interfaces=$interfaces" >>/root/bounce.ip.out 37 | for iface in $interfaces; do 38 | ip link set $iface down 39 | date >>/root/bounce.ip.out 40 | ip link show $iface >>/root/bounce.ip.out 41 | done 42 | 43 | echo "$(date +"%Y-%m-%d %H:%M:%S")" -- waiting $seconds before reenabling interfaces >>/root/bounce.ip.out 44 | sleep $seconds 45 | echo "$(date +"%Y-%m-%d %H:%M:%S")" -- done, going to renable interfaces >>/root/bounce.ip.out 46 | 47 | for iface in $interfaces; do 48 | ip link set $iface up 49 | date >>/root/bounce.ip.out 50 | ip link show $iface >>/root/bounce.ip.out 51 | done 52 | ip route flush cache 53 | sleep 180 54 | systemctl restart NetworkManager 55 | -------------------------------------------------------------------------------- /test/sh/bounce.kubelet: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright (c) 2022 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | seconds=600 17 | 18 | for param in $*; do 19 | case $param in 20 | "--seconds") 21 | shift 22 | seconds=$1 23 | shift 24 | ;; 25 | esac 26 | done 27 | 28 | echo "$(date +"%Y-%m-%d %H:%M:%S")" "-- stopping kubelet for seconds=$seconds" >>/root/bounce.kubelet.out 29 | systemctl stop kubelet 30 | sleep $seconds 31 | echo "$(date +"%Y-%m-%d %H:%M:%S")" "-- starting kubelet after seconds=$seconds" >>/root/bounce.kubelet.out 32 | systemctl start kubelet 33 | 34 | -------------------------------------------------------------------------------- /test/sh/failnodes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright (c) 2021-2022 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # Fails a list of nodes passed in as arguments. 17 | # Assumes that bounce.ip is installed an appropriately configured on each node. 18 | 19 | nodelist="" 20 | seconds=240 21 | 22 | for param in $*; do 23 | case $param in 24 | "--seconds") 25 | shift 26 | seconds=$1 27 | shift 28 | ;; 29 | *) 30 | nodelist="$nodelist $1" 31 | shift 32 | esac 33 | done 34 | 35 | for node in $nodelist 36 | do 37 | echo bounce.ip --seconds $seconds $node 38 | ssh $node nohup sh /root/bounce.ip --seconds $seconds & 39 | done 40 | -------------------------------------------------------------------------------- /test/sh/failpods.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright (c) 2022 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | namespace="" 17 | maxPods=10 18 | 19 | for param in $*; do 20 | case $param in 21 | "--ns") 22 | shift 23 | namespace=$1 24 | shift 25 | ;; 26 | "--timeoutseconds") 27 | shift 28 | timeoutseconds=$1 29 | shift 30 | ;; 31 | "--help") 32 | shift 33 | echo "parameters: --ns driver_namespace [--timeoutseconds value]" 34 | exit 35 | ;; 36 | esac 37 | done 38 | 39 | [ "$namespace" = "" ] && echo "Required argument: --ns driver_namespace --timeoutseconds value" && exit 2 40 | 41 | # nodeList returns a list of nodes 42 | nodeList() { 43 | kubectl get nodes -A | grep -v -E 'mast.r|control-plane' | grep -v NAME | awk '{ print $1 }' 44 | } 45 | 46 | # getNumOfPods returns the number of pods on a node $1 for a specific namespace 47 | getNumOfPods() { 48 | # Add an extra space on match string to differentiate worker-1 from worker-10 49 | kubectl get pods -A -o wide | grep "$1 " | grep $namespace | wc -l 50 | } 51 | 52 | # getWorker returns the first worker node that we are targeting 53 | getWorker(){ 54 | kubectl get nodes -A | grep -v -E 'mast.r|control-plane' | grep -v NAME | awk 'NR==1{ print $1 }' 55 | } 56 | 57 | # getRunningPods returns the names of the running pods that are on the targeted worker node for a specific namespace 58 | getRunningPods() { 59 | node=$(getWorker) 60 | # kubectl get pods -A -o wide | grep $node | grep $namespace | grep Running | awk '{ print $2 }' 61 | kubectl get pods -A -o wide | grep $node | grep $namespace | grep -v 'controller' | awk '{ print $2 }' 62 | } 63 | 64 | # getDriverImage returns the initial driver image before its patched 65 | getDriverImage() { 66 | ns=$namespace 67 | pods=$(getRunningPods) 68 | for pod in $pods; do 69 | kubectl get pod $pod -n $ns -o custom-columns=IMAGE:.spec.containers[1].image | awk 'FNR == 2 {print}' 70 | done 71 | } 72 | 73 | # failPodsInNS will fail the pods for a specific namespace by patching it with an unknown driver image 74 | failPodsInNS() { 75 | ns=$namespace 76 | pods=$(getRunningPods) 77 | for pod in $pods; do 78 | echo "Failing pods: $pods " 79 | kubectl patch pod $pod -n $ns --patch '{"spec": {"containers": [{"name": "driver", "image": "podmontest"}]}}' 80 | done 81 | } 82 | 83 | process_pods() { 84 | echo "Failing CSI driver pod for a single worker node..." 85 | initialImage=$(getDriverImage) 86 | 87 | # returns a list of nodes 88 | nodes=$(nodeList) 89 | echo Nodes: $nodes 90 | 91 | # returns # of pods on a node 92 | for n in $nodes; do 93 | pods=$(getNumOfPods $n) 94 | ns=$namespace 95 | echo node $n has $pods pods in namespace $ns 96 | done 97 | 98 | namespaces=$namespace 99 | echo "Begin failing pods in namespace $ns" 100 | for ns in $namespaces; do 101 | failPodsInNS $ns 102 | done 103 | 104 | echo "Fail time in seconds:" $timeoutseconds 105 | sleep $timeoutseconds 106 | 107 | echo "Begin patching pods in namespace $ns" 108 | for ns in $namespaces; do 109 | echo "Patching pods: $pods " 110 | pod=$(getRunningPods) 111 | for pod in $pods; do 112 | kubectl patch pod $pod -n $ns --patch '{"spec": {"containers": [{"name": "driver", "image": "'${initialImage}'"}]}}' 113 | done 114 | done 115 | 116 | echo "Waiting for $pods to come back" 117 | for ns in $namespaces; do 118 | node=$(getWorker) 119 | ns=$namespace 120 | podStatus=$(kubectl get pods -n $ns -o wide | grep $node | grep Running | grep -v NAME | wc -l) 121 | if [ $podStatus -gt 1 ]; then 122 | sleep 60 123 | fi 124 | done 125 | echo "Fail test complete" 126 | } 127 | 128 | process_pods 129 | -------------------------------------------------------------------------------- /test/sh/invoke.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright (c) 2021-2022 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # This is script is for programmatic invocation of a shell script 17 | echo "$(date +"%Y-%m-%d %H:%M:%S")" -- invoking script $* >>/tmp/invoke.log 18 | nohup sudo $* & 19 | -------------------------------------------------------------------------------- /test/sh/mon.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright (c) 2021-2022 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | namespace="vxflexos" 16 | once=0 17 | 18 | for param in $*; do 19 | case $param in 20 | "--ns") 21 | shift 22 | namespace=$1 23 | shift 24 | ;; 25 | "--once") 26 | shift 27 | once=1 28 | ;; 29 | "--help") 30 | shift 31 | echo "parameters: --ns driver_namespace [ --label podmon_label ]" 32 | exit 33 | ;; 34 | esac 35 | done 36 | 37 | alias k=kubectl 38 | while true; 39 | do 40 | date 41 | k get nodes -o wide 42 | k get nodes -o custom-columns=NAME:.metadata.name,TAINTS:.spec.taints 43 | k get leases -n $namespace 44 | k get pods -n $namespace -o wide 45 | k get pods -l podmon.dellemc.com/driver -A -o wide 46 | if [ $once -gt 0 ]; then exit 0; fi 47 | sleep 5 48 | done 49 | -------------------------------------------------------------------------------- /test/sh/plot_scale_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2025 Dell Inc., or its subsidiaries. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Pre-Req: Install pip, pandas and matplotlib if not already installed 16 | # yum install -y python3-pip 17 | # pip3 install pandas matplotlib 18 | 19 | import pandas as pd 20 | import matplotlib.pyplot as plt 21 | df = pd.read_csv('recovery_times.csv') 22 | plt.plot(df['num_instances'], df['recovery_time_sec'], marker='o') 23 | plt.title('Number of Instances vs. Time Taken for Recovery') 24 | plt.xlabel('Number of Instances') 25 | plt.ylabel('Recovery Time (Seconds)') 26 | plt.grid(True) 27 | plt.savefig('recovery_graph.png') 28 | -------------------------------------------------------------------------------- /test/sh/rebalance.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Copyright (c) 2022 Dell Inc., or its subsidiaries. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Rebalance pods by 18 | # 1. tainting overloaded nodes, 19 | # 2. removing pods from partially populated namespaces, 20 | # 3. Waiting 10 minutes for the pods to get recreated, 21 | # 4. Removing the taints that were applied 22 | 23 | maxPods=90 24 | 25 | # nodelist returns a list of nodes( 26 | nodelist() { 27 | kubectl get nodes -A | grep -v 'mast.r' | grep -v NAME | awk '{ print $1 }' 28 | } 29 | 30 | # get the number of pods on a node $1 31 | podsOnNode() { 32 | # Add an extra space on match string to differentiate worker-1 from worker-10 33 | kubectl get pods -A -o wide | grep "$1 " | wc -l 34 | } 35 | 36 | # get namespaces of the pending pods 37 | getNSOfPendingPods() { 38 | kubectl get pods -A -o wide | grep Pending | grep -v default | awk '{ print $1}' 39 | } 40 | 41 | # cordon a k8s node $1=node id 42 | cordon() { 43 | echo "cordoning node $1" 44 | kubectl cordon $1 45 | } 46 | 47 | # cordon a k8s node $1=node id 48 | uncordon() { 49 | echo "uncordoning node $1" 50 | kubectl uncordon $1 51 | } 52 | 53 | # delete pod names in namespace $1=namespace 54 | deletePodsInNS() { 55 | pods=$(kubectl get pods -n $1 | grep -v NAME | awk '{print $1}') 56 | echo pods "$pods to be deleted" 57 | for pod in $pods; do 58 | echo "kubctl delete pod -n $1 $pod" 59 | kubectl delete pod --grace-period 0 -n $1 $pod 60 | done 61 | } 62 | 63 | 64 | rebalance() { 65 | echo "Rebalancing pods to nodes..." 66 | cordonedNodes="" 67 | nodes=$(nodelist) 68 | echo nodes: $nodes 69 | for n in $nodes; do 70 | pods=$(podsOnNode $n) 71 | echo node $n has $pods pods 72 | if [ $pods -gt $maxPods ]; then 73 | cordon $n 74 | cordonedNodes="$cordonedNodes $n" 75 | fi 76 | done 77 | echo cordonedNodes: $cordonedNodes 78 | namespaces=$(getNSOfPendingPods) 79 | for ns in $namespaces; do 80 | echo "deleting pods in namespace $ns" 81 | deletePodsInNS $ns 82 | done 83 | echo "waiting for pods to get moved" 84 | for i in 1 2 3 4 5 6 7 8 9 10; do 85 | kubectl get pods -l podmon.dellemc.com/driver -A -o wide | grep -v NAME | grep -v Running 86 | sleep 60 87 | done 88 | for n in $cordonedNodes; do 89 | uncordon $n 90 | done 91 | echo "Rebalancing complete" 92 | } 93 | 94 | rebalance 95 | -------------------------------------------------------------------------------- /test/sh/reboot.node: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright (c) 2021-2022 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | OUTPUT_FILE="/root/reboot.out" 17 | # By default - all 'ensNNN' interfaces 18 | interfaces=$(ip -o link show | awk -F': ' '/ens[0-9]+/ {print $2}') 19 | seconds=0 20 | 21 | for param in $*; do 22 | case $param in 23 | "--seconds") 24 | shift 25 | seconds=$1 26 | shift 27 | ;; 28 | "--interfaces") 29 | shift 30 | interfaces=$1 31 | shift 32 | ;; 33 | esac 34 | done 35 | 36 | echo "$(date +"%Y-%m-%d %H:%M:%S")" "-- invoking with seconds=$seconds interfaces=$interfaces" >>"$OUTPUT_FILE" 37 | # If seconds was specified, then we want to bring down the interfaces, 38 | # wait that number of seconds, then reboot. This is to simulate a long 39 | # reboot time or a long recovery from a reboot. 40 | if [ $seconds -gt 0 ]; then 41 | echo "$(date +"%Y-%m-%d %H:%M:%S")" -- Bringing down interfaces >>"$OUTPUT_FILE" 42 | for iface in $interfaces; do 43 | ip link set "$iface" down 44 | date >>"$OUTPUT_FILE" 45 | ip link show "$iface" >>"$OUTPUT_FILE" 46 | done 47 | 48 | echo "$(date +"%Y-%m-%d %H:%M:%S")" -- Sleeping "$seconds" before rebooting >>"$OUTPUT_FILE" 49 | sleep "$seconds" 50 | fi 51 | 52 | echo "$(date +"%Y-%m-%d %H:%M:%S")" -- Rebooting node >>"$OUTPUT_FILE" 53 | reboot --reboot 54 | -------------------------------------------------------------------------------- /test/ssh/cli/main.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2021-2024 Dell Inc. or its subsidiaries. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package main 16 | 17 | import ( 18 | "context" 19 | "fmt" 20 | "os" 21 | "path/filepath" 22 | "podmon/test/ssh" 23 | "time" 24 | ) 25 | 26 | func main() { 27 | ctx := context.Background() 28 | info := ssh.AccessInfo{ 29 | Hostname: os.Getenv("HOST"), 30 | Port: "22", 31 | Username: os.Getenv("USER"), 32 | Password: os.Getenv("PASSWORD"), 33 | } 34 | 35 | wrapper := ssh.NewWrapper(&info) 36 | 37 | client := ssh.CommandExecution{ 38 | AccessInfo: &info, 39 | SSHWrapper: wrapper, 40 | Timeout: 4 * time.Second, 41 | } 42 | 43 | if err := client.Run("date; ls -ltr /tmp"); err == nil { 44 | for _, out := range client.GetOutput() { 45 | fmt.Printf("%s\n", out) 46 | } 47 | } else { 48 | fmt.Printf("ERROR %s\n", err) 49 | } 50 | 51 | err := client.Copy(ctx, filepath.Join("C:\\", "workspace", "karavi-resiliency", "test", "sh", "bounce.ip"), "/tmp/bounce.ip") 52 | if err != nil { 53 | fmt.Printf("ERROR %v", err) 54 | } 55 | 56 | if err := client.Run("date; ls -ltr /tmp; cat /tmp/bounce.ip; rm -f /tmp/bounce.ip"); err == nil { 57 | for _, out := range client.GetOutput() { 58 | fmt.Printf("%s\n", out) 59 | } 60 | } else { 61 | fmt.Printf("ERROR %s\n", err) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /test/ssh/mocks/mock_client_wrapper.go: -------------------------------------------------------------------------------- 1 | // Code generated by MockGen. DO NOT EDIT. 2 | // Source: podmon/test/ssh (interfaces: ClientWrapper) 3 | 4 | // Package mocks is a generated GoMock package. 5 | package mocks 6 | 7 | import ( 8 | context "context" 9 | os "os" 10 | ssh "podmon/test/ssh" 11 | reflect "reflect" 12 | 13 | gomock "github.com/golang/mock/gomock" 14 | ) 15 | 16 | // MockClientWrapper is a mock of ClientWrapper interface. 17 | type MockClientWrapper struct { 18 | ctrl *gomock.Controller 19 | recorder *MockClientWrapperMockRecorder 20 | } 21 | 22 | // MockClientWrapperMockRecorder is the mock recorder for MockClientWrapper. 23 | type MockClientWrapperMockRecorder struct { 24 | mock *MockClientWrapper 25 | } 26 | 27 | // NewMockClientWrapper creates a new mock instance. 28 | func NewMockClientWrapper(ctrl *gomock.Controller) *MockClientWrapper { 29 | mock := &MockClientWrapper{ctrl: ctrl} 30 | mock.recorder = &MockClientWrapperMockRecorder{mock} 31 | return mock 32 | } 33 | 34 | // EXPECT returns an object that allows the caller to indicate expected use. 35 | func (m *MockClientWrapper) EXPECT() *MockClientWrapperMockRecorder { 36 | return m.recorder 37 | } 38 | 39 | // Close mocks base method. 40 | func (m *MockClientWrapper) Close() error { 41 | m.ctrl.T.Helper() 42 | ret := m.ctrl.Call(m, "Close") 43 | ret0, _ := ret[0].(error) 44 | return ret0 45 | } 46 | 47 | // Close indicates an expected call of Close. 48 | func (mr *MockClientWrapperMockRecorder) Close() *gomock.Call { 49 | mr.mock.ctrl.T.Helper() 50 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Close", reflect.TypeOf((*MockClientWrapper)(nil).Close)) 51 | } 52 | 53 | // Copy mocks base method. 54 | func (m *MockClientWrapper) Copy(arg0 context.Context, arg1 os.File, arg2, arg3 string) error { 55 | m.ctrl.T.Helper() 56 | ret := m.ctrl.Call(m, "Copy", arg0, arg1, arg2, arg3) 57 | ret0, _ := ret[0].(error) 58 | return ret0 59 | } 60 | 61 | // Copy indicates an expected call of Copy. 62 | func (mr *MockClientWrapperMockRecorder) Copy(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { 63 | mr.mock.ctrl.T.Helper() 64 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Copy", reflect.TypeOf((*MockClientWrapper)(nil).Copy), arg0, arg1, arg2, arg3) 65 | } 66 | 67 | // GetSession mocks base method. 68 | func (m *MockClientWrapper) GetSession(arg0 string) (ssh.SessionWrapper, error) { 69 | m.ctrl.T.Helper() 70 | ret := m.ctrl.Call(m, "GetSession", arg0) 71 | ret0, _ := ret[0].(ssh.SessionWrapper) 72 | ret1, _ := ret[1].(error) 73 | return ret0, ret1 74 | } 75 | 76 | // GetSession indicates an expected call of GetSession. 77 | func (mr *MockClientWrapperMockRecorder) GetSession(arg0 interface{}) *gomock.Call { 78 | mr.mock.ctrl.T.Helper() 79 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetSession", reflect.TypeOf((*MockClientWrapper)(nil).GetSession), arg0) 80 | } 81 | 82 | // SendRequest mocks base method. 83 | func (m *MockClientWrapper) SendRequest(arg0 string, arg1 bool, arg2 []byte) (bool, error) { 84 | m.ctrl.T.Helper() 85 | ret := m.ctrl.Call(m, "SendRequest", arg0, arg1, arg2) 86 | ret0, _ := ret[0].(bool) 87 | ret1, _ := ret[1].(error) 88 | return ret0, ret1 89 | } 90 | 91 | // SendRequest indicates an expected call of SendRequest. 92 | func (mr *MockClientWrapperMockRecorder) SendRequest(arg0, arg1, arg2 interface{}) *gomock.Call { 93 | mr.mock.ctrl.T.Helper() 94 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SendRequest", reflect.TypeOf((*MockClientWrapper)(nil).SendRequest), arg0, arg1, arg2) 95 | } 96 | -------------------------------------------------------------------------------- /test/ssh/mocks/mock_session_wrapper.go: -------------------------------------------------------------------------------- 1 | // Code generated by MockGen. DO NOT EDIT. 2 | // Source: podmon/test/ssh (interfaces: SessionWrapper) 3 | 4 | // Package mocks is a generated GoMock package. 5 | package mocks 6 | 7 | import ( 8 | reflect "reflect" 9 | 10 | gomock "github.com/golang/mock/gomock" 11 | ) 12 | 13 | // MockSessionWrapper is a mock of SessionWrapper interface. 14 | type MockSessionWrapper struct { 15 | ctrl *gomock.Controller 16 | recorder *MockSessionWrapperMockRecorder 17 | } 18 | 19 | // MockSessionWrapperMockRecorder is the mock recorder for MockSessionWrapper. 20 | type MockSessionWrapperMockRecorder struct { 21 | mock *MockSessionWrapper 22 | } 23 | 24 | // NewMockSessionWrapper creates a new mock instance. 25 | func NewMockSessionWrapper(ctrl *gomock.Controller) *MockSessionWrapper { 26 | mock := &MockSessionWrapper{ctrl: ctrl} 27 | mock.recorder = &MockSessionWrapperMockRecorder{mock} 28 | return mock 29 | } 30 | 31 | // EXPECT returns an object that allows the caller to indicate expected use. 32 | func (m *MockSessionWrapper) EXPECT() *MockSessionWrapperMockRecorder { 33 | return m.recorder 34 | } 35 | 36 | // Close mocks base method. 37 | func (m *MockSessionWrapper) Close() error { 38 | m.ctrl.T.Helper() 39 | ret := m.ctrl.Call(m, "Close") 40 | ret0, _ := ret[0].(error) 41 | return ret0 42 | } 43 | 44 | // Close indicates an expected call of Close. 45 | func (mr *MockSessionWrapperMockRecorder) Close() *gomock.Call { 46 | mr.mock.ctrl.T.Helper() 47 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Close", reflect.TypeOf((*MockSessionWrapper)(nil).Close)) 48 | } 49 | 50 | // CombinedOutput mocks base method. 51 | func (m *MockSessionWrapper) CombinedOutput(arg0 string) ([]byte, error) { 52 | m.ctrl.T.Helper() 53 | ret := m.ctrl.Call(m, "CombinedOutput", arg0) 54 | ret0, _ := ret[0].([]byte) 55 | ret1, _ := ret[1].(error) 56 | return ret0, ret1 57 | } 58 | 59 | // CombinedOutput indicates an expected call of CombinedOutput. 60 | func (mr *MockSessionWrapperMockRecorder) CombinedOutput(arg0 interface{}) *gomock.Call { 61 | mr.mock.ctrl.T.Helper() 62 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CombinedOutput", reflect.TypeOf((*MockSessionWrapper)(nil).CombinedOutput), arg0) 63 | } 64 | 65 | // SendRequest mocks base method. 66 | func (m *MockSessionWrapper) SendRequest(arg0 string, arg1 bool, arg2 []byte) (bool, error) { 67 | m.ctrl.T.Helper() 68 | ret := m.ctrl.Call(m, "SendRequest", arg0, arg1, arg2) 69 | ret0, _ := ret[0].(bool) 70 | ret1, _ := ret[1].(error) 71 | return ret0, ret1 72 | } 73 | 74 | // SendRequest indicates an expected call of SendRequest. 75 | func (mr *MockSessionWrapperMockRecorder) SendRequest(arg0, arg1, arg2 interface{}) *gomock.Call { 76 | mr.mock.ctrl.T.Helper() 77 | return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SendRequest", reflect.TypeOf((*MockSessionWrapper)(nil).SendRequest), arg0, arg1, arg2) 78 | } 79 | -------------------------------------------------------------------------------- /tools/collect_logs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright (c) 2021-2022 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # collect_logs.sh is used to collect the CSI driver logs, podmon logs, and protected pod namespace events 17 | # for podmon failure analysis. Type "collect.logs.sh --help" for information. 18 | 19 | ns="" 20 | podmon_label="podmon.dellemc.com/driver" 21 | CWD=$(pwd) 22 | TAR=$(which tar) 23 | CONTAINERS="podmon driver" 24 | 25 | for param in $*; do 26 | case $param in 27 | "--ns") 28 | shift 29 | ns=$1 30 | shift 31 | ;; 32 | "--label") 33 | shift 34 | podmon_label=$1 35 | shift 36 | ;; 37 | "--help") 38 | shift 39 | echo "parameters: --ns driver_namespace [ --label podmon_label ]" 40 | exit 41 | ;; 42 | esac 43 | done 44 | 45 | [ "$ns" = "" ] && echo "Required argument: --ns driver_namespace" && exit 2 46 | echo "Collecting logs driver namespace $ns podmon label $podmon_label" 47 | 48 | getpods() { 49 | pods=$(kubectl get pods -n $ns | awk '/^NAME/ { next; }; /.*/ { print $1}') 50 | echo $pods 51 | } 52 | 53 | getprotectedpods() { 54 | protectedpods=$(kubectl get pods -A -l $podmon_label | awk '/^NAME/ { next; }; /.*/ { print $1}') 55 | echo $protectedpods 56 | } 57 | 58 | TEMPDIR=$(mktemp -d) 59 | echo "Using TEMPDIR $TEMPDIR" 60 | TIMESTAMP=$(date +%Y%m%d_%H%M) 61 | echo $TIMESTAMP 62 | pods=$(getpods) 63 | echo pods $pods 64 | 65 | # Collect the logs into the TEMPDIR 66 | cd $TEMPDIR 67 | kubectl get nodes -o wide >nodes.list 68 | kubectl get nodes -o custom-columns=NAME:.metadata.name,TAINTS:.spec.taints >taints.list 69 | kubectl get pods -n $ns -o wide >driver.pods.list 70 | kubectl get pods -A -o wide -l $podmon_label >protected.pods.list 71 | for pod in $pods; 72 | do 73 | for container in $CONTAINERS; 74 | do 75 | echo kubectl logs -n $ns $pod $container >$ns.$pod.$container.log 76 | kubectl logs -n $ns $pod $container >$ns.$pod.$container.log 77 | done 78 | done 79 | # Collect the events for the protected pod namespaces 80 | protectedpods=$(getprotectedpods) 81 | for podns in $protectedpods; 82 | do 83 | count=$(kubectl get events -n $podns | grep -v '^LAST' | wc -l) 84 | if [ $count -gt 0 ]; then 85 | kubectl get events -n $podns >$podns.events 86 | fi 87 | done 88 | 89 | DIRNAME=$(basename $TEMPDIR) 90 | TARNAME="$CWD/driver.logs.$TIMESTAMP.tgz" 91 | cd /tmp 92 | 93 | # Tar up the logs using the time stamp 94 | echo "$TAR" -c -z -v -f $TARNAME $DIRNAME 95 | $TAR -c -z -v -f $TARNAME $DIRNAME 96 | 97 | # Remove the temporary directory 98 | rm -rf $TEMPDIR 99 | 100 | -------------------------------------------------------------------------------- /tools/mon.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright (c) 2021-2022 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | namespace="vxflexos" 16 | once=0 17 | 18 | for param in $*; do 19 | case $param in 20 | "--ns") 21 | shift 22 | namespace=$1 23 | shift 24 | ;; 25 | "--once") 26 | shift 27 | once=1 28 | ;; 29 | "--help") 30 | shift 31 | echo "parameters: --ns driver_namespace [ --label podmon_label ]" 32 | exit 33 | ;; 34 | esac 35 | done 36 | 37 | alias k=kubectl 38 | while true; 39 | do 40 | date 41 | k get nodes -o wide 42 | k get nodes -o custom-columns=NAME:.metadata.name,TAINTS:.spec.taints 43 | k get leases -n $namespace 44 | k get pods -n $namespace -o wide 45 | k get pods -l podmon.dellemc.com/driver -A -o wide 46 | if [ $once -gt 0 ]; then exit 0; fi 47 | sleep 5 48 | done 49 | -------------------------------------------------------------------------------- /tools/monx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright (c) 2021-2022 Dell Inc., or its subsidiaries. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | alias k=kubectl 16 | once=$1 17 | while true; 18 | do 19 | date 20 | k get nodes -o wide 21 | k get nodes -o custom-columns=NAME:.metadata.name,TAINTS:.spec.taints 22 | k get pods -l podmon.dellemc.com/driver -A -o wide | awk ' 23 | BEGIN { 24 | totalpods=0; totalrunning=0; totalcreating=0; totalerrors=0; totalcrashloopbackoff=0; 25 | nonode = "unscheduled" 26 | } 27 | /^NAMESPACE/ { next; } 28 | { 29 | ns=$1; podstate=$4; time=$6; node=$8; gsub("\\..*","",node); \ 30 | nodes[node] = 1 31 | if (podstate !~ "Running") { print ns, podstate, time, node; } 32 | if (podstate == "Running") { 33 | runcounts[node]=runcounts[node]+1; 34 | totalrunning++; 35 | } 36 | if (podstate == "ContainerCreating") { 37 | creatingcounts[node]=creatingcounts[node]+1; 38 | totalcreating++; 39 | } 40 | if (podstate == "Error") { 41 | errorcounts[node]=errorcounts[node]+1; 42 | totalerrors++; 43 | } 44 | if (podstate == "CrashLoopBackOff") { 45 | crashloopbackoffcounts[node]=crashloopbackoffcounts[node]+1; 46 | totalcrashloopbackoff++; 47 | } 48 | if (podstate == "Pending") { 49 | pendingcounts[nonode]=pendingcounts[nonode]+1; 50 | } 51 | totalpods = totalpods+1; 52 | } 53 | END { 54 | pending = pendingcounts[nonode] 55 | if (pending == "") { pending=0 } 56 | print "Total Pods:", totalpods, "Running:", totalrunning, "Creating:", totalcreating, "Errors:", totalerrors, "CrashLoopBackoff:", totalcrashloopbackoff, "Pending (unscheduled):", pending 57 | for (node in nodes) { 58 | runners=runcounts[node]; 59 | if (runners == "") { runners=0 } 60 | creators=creatingcounts[node]; 61 | if (creators == "") { creators=0 } 62 | errors = errorcounts[node]; 63 | if (errors == "") { errors=0 } 64 | crashloopbackoffs = crashloopbackoffcounts[node]; 65 | if (crashloopbackoffs == "") { crashloopbackoffs=0 } 66 | printf "node %s running %s creating %d errors %d\n", node, runners, creators, errors; 67 | } 68 | } 69 | ' | sort 70 | if [ "$once" = "--once" ]; then exit 0; fi 71 | sleep 5 72 | done 73 | --------------------------------------------------------------------------------