├── tests ├── __init__.py ├── ha │ ├── crc32c.sh │ ├── mtls.sh │ ├── no_huge.sh │ ├── cluster_pool.sh │ ├── set_qos_2ms.sh │ ├── setup_crc32c.sh │ ├── flat_bdev_per_cluster.sh │ ├── setup_4gws_loop.sh │ ├── setup_rbd_qos.sh │ ├── setup_auto_resize.sh │ ├── setup_image_shrink.sh │ ├── setup_ns_read_only.sh │ ├── setup_reuse_image.sh │ ├── setup_reuse_image2.sh │ ├── setup_set_qos_2ms.sh │ ├── start_up_4gws_loop.sh │ ├── start_up_rbd_qos.sh │ ├── wait_gateways_crc32c.sh │ ├── setup_4gws_create_delete.sh │ ├── setup_rados_namespace.sh │ ├── start_up_image_shrink.sh │ ├── start_up_ns_read_only.sh │ ├── setup_4gws_create_delete_loop.sh │ ├── setup_set_qos.sh │ ├── start_up_4gws_create_delete.sh │ ├── wait_gateways_4gws_loop.sh │ ├── wait_gateways_rbd_qos.sh │ ├── start_up_4gws_create_delete_loop.sh │ ├── wait_gateways_image_shrink.sh │ ├── wait_gateways_ns_read_only.sh │ ├── wait_gateways_set_qos_2ms.sh │ ├── wait_gateways_4gws_create_delete.sh │ ├── wait_gateways_reuse_image2.sh │ ├── wait_gateways_4gws_create_delete_loop.sh │ ├── setup_main_exit.sh │ ├── start_up_main_exit.sh │ ├── wait_gateways_main_exit.sh │ ├── start_up_4gws.sh │ ├── wait_gateways_4gws.sh │ ├── wait_gateways_set_qos.sh │ ├── wait_gateways_auto_resize.sh │ ├── start_up_set_qos.sh │ ├── start_up_auto_resize.sh │ ├── start_up_mtls.sh │ ├── 4gws_loop.sh │ ├── start_up_crc32c.sh │ ├── start_up_no_huge.sh │ ├── wait_gateways_reuse_image.sh │ ├── namespaces_loop.sh │ ├── start_up_cluster_pool.sh │ ├── 4gws_create_delete_loop.sh │ ├── late_registration_loop.sh │ ├── state_transitions_loop.sh │ ├── start_up_flat_bdev_per_cluster.sh │ ├── wait_gateways_mtls.sh │ ├── start_up_reuse_image.sh │ ├── start_up_reuse_image2.sh │ ├── start_up_set_qos_2ms.sh │ ├── state_transitions_rand_loop.sh │ ├── notify.sh │ ├── ceph_status.sh │ ├── set_qos.sh │ ├── wreak_havoc.sh │ ├── setup_listener_hostname.sh │ ├── wait_gateways.sh │ ├── blocklist.sh │ ├── sanity.sh │ ├── setup.sh │ ├── setup_mtls.sh │ ├── start_up.sh │ ├── rados_namespace.sh │ ├── listener_hostname.sh │ ├── setup_4gws.sh │ ├── gateway_removal.sh │ ├── no_subsystems.sh │ ├── connect_panic.sh │ ├── namespaces.sh │ ├── ns_read_only.sh │ ├── reuse_image2.sh │ ├── main_exit.sh │ ├── state_transitions_both_gws.sh │ ├── late_registration.sh │ ├── image_shrink.sh │ ├── state_transitions.sh │ ├── ns_lb_change.sh │ ├── auto_load_balance.sh │ ├── rbd_qos.sh │ ├── 4gws.sh │ └── auto_listeners.sh ├── conftest.py ├── atom │ └── cpArtifactAndCleanup.sh ├── test_omap_read_lock_ignore_errors.py ├── test_omap_no_read_lock.py ├── ceph-nvmeof.tls.conf ├── ceph-nvmeof.cluster_pool.conf ├── ceph-nvmeof.flat_bdevs_per_cluster.conf ├── ceph-nvmeof.no-huge.conf ├── test_cli_force_tls.py ├── test_max_subsystems.py ├── ceph-nvmeof.crc32c.conf ├── test_subsys_grp_name_append.py ├── test_auto_listeners.py ├── test_erasure_pool.py └── test_grpc.py ├── control ├── __init__.py ├── proto │ ├── __init__.py │ └── monitor.proto ├── __main__.py └── config.py ├── lib └── go │ ├── .gitignore │ ├── Containerfile │ ├── nvmeof │ ├── doc.go │ ├── go.mod │ └── go.sum │ └── Makefile ├── pdm.toml ├── tox.ini ├── .gitmodules ├── monitoring └── Ceph-NVMe-oF-Gateways-Dashboard.png ├── .gitignore ├── .dockerignore ├── mk ├── misc.mk ├── autohelp.mk ├── demo.mk ├── demosecurepsk.mk ├── demosecuredhchap.mk └── containerized.mk ├── .github └── workflows │ ├── check-deps.yml │ ├── release.yml │ └── codeql.yml ├── COPYING ├── pyproject.toml ├── CONTRIBUTING.md ├── ceph-nvmeof.conf ├── Dockerfile.ceph ├── .env ├── Dockerfile └── Makefile /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /control/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /control/proto/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/ha/crc32c.sh: -------------------------------------------------------------------------------- 1 | sanity.sh -------------------------------------------------------------------------------- /tests/ha/mtls.sh: -------------------------------------------------------------------------------- 1 | sanity.sh -------------------------------------------------------------------------------- /tests/ha/no_huge.sh: -------------------------------------------------------------------------------- 1 | sanity.sh -------------------------------------------------------------------------------- /tests/ha/cluster_pool.sh: -------------------------------------------------------------------------------- 1 | sanity.sh -------------------------------------------------------------------------------- /tests/ha/set_qos_2ms.sh: -------------------------------------------------------------------------------- 1 | set_qos.sh -------------------------------------------------------------------------------- /tests/ha/setup_crc32c.sh: -------------------------------------------------------------------------------- 1 | setup.sh -------------------------------------------------------------------------------- /lib/go/.gitignore: -------------------------------------------------------------------------------- 1 | .container-id 2 | -------------------------------------------------------------------------------- /pdm.toml: -------------------------------------------------------------------------------- 1 | [python] 2 | use_venv = false 3 | -------------------------------------------------------------------------------- /tests/ha/flat_bdev_per_cluster.sh: -------------------------------------------------------------------------------- 1 | sanity.sh -------------------------------------------------------------------------------- /tests/ha/setup_4gws_loop.sh: -------------------------------------------------------------------------------- 1 | setup_4gws.sh -------------------------------------------------------------------------------- /tests/ha/setup_rbd_qos.sh: -------------------------------------------------------------------------------- 1 | setup_set_qos.sh -------------------------------------------------------------------------------- /tests/ha/setup_auto_resize.sh: -------------------------------------------------------------------------------- 1 | setup_set_qos.sh -------------------------------------------------------------------------------- /tests/ha/setup_image_shrink.sh: -------------------------------------------------------------------------------- 1 | setup_set_qos.sh -------------------------------------------------------------------------------- /tests/ha/setup_ns_read_only.sh: -------------------------------------------------------------------------------- 1 | setup_set_qos.sh -------------------------------------------------------------------------------- /tests/ha/setup_reuse_image.sh: -------------------------------------------------------------------------------- 1 | setup_set_qos.sh -------------------------------------------------------------------------------- /tests/ha/setup_reuse_image2.sh: -------------------------------------------------------------------------------- 1 | setup_set_qos.sh -------------------------------------------------------------------------------- /tests/ha/setup_set_qos_2ms.sh: -------------------------------------------------------------------------------- 1 | setup_set_qos.sh -------------------------------------------------------------------------------- /tests/ha/start_up_4gws_loop.sh: -------------------------------------------------------------------------------- 1 | start_up_4gws.sh -------------------------------------------------------------------------------- /tests/ha/start_up_rbd_qos.sh: -------------------------------------------------------------------------------- 1 | start_up_set_qos.sh -------------------------------------------------------------------------------- /tests/ha/wait_gateways_crc32c.sh: -------------------------------------------------------------------------------- 1 | wait_gateways.sh -------------------------------------------------------------------------------- /tests/ha/setup_4gws_create_delete.sh: -------------------------------------------------------------------------------- 1 | setup_4gws.sh -------------------------------------------------------------------------------- /tests/ha/setup_rados_namespace.sh: -------------------------------------------------------------------------------- 1 | setup_set_qos.sh -------------------------------------------------------------------------------- /tests/ha/start_up_image_shrink.sh: -------------------------------------------------------------------------------- 1 | start_up_set_qos.sh -------------------------------------------------------------------------------- /tests/ha/start_up_ns_read_only.sh: -------------------------------------------------------------------------------- 1 | start_up_set_qos.sh -------------------------------------------------------------------------------- /tests/ha/setup_4gws_create_delete_loop.sh: -------------------------------------------------------------------------------- 1 | setup_4gws.sh -------------------------------------------------------------------------------- /tests/ha/setup_set_qos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | exit 0 4 | -------------------------------------------------------------------------------- /tests/ha/start_up_4gws_create_delete.sh: -------------------------------------------------------------------------------- 1 | start_up_4gws.sh -------------------------------------------------------------------------------- /tests/ha/wait_gateways_4gws_loop.sh: -------------------------------------------------------------------------------- 1 | wait_gateways_4gws.sh -------------------------------------------------------------------------------- /tests/ha/wait_gateways_rbd_qos.sh: -------------------------------------------------------------------------------- 1 | wait_gateways_set_qos.sh -------------------------------------------------------------------------------- /tests/ha/start_up_4gws_create_delete_loop.sh: -------------------------------------------------------------------------------- 1 | start_up_4gws.sh -------------------------------------------------------------------------------- /tests/ha/wait_gateways_image_shrink.sh: -------------------------------------------------------------------------------- 1 | wait_gateways_set_qos.sh -------------------------------------------------------------------------------- /tests/ha/wait_gateways_ns_read_only.sh: -------------------------------------------------------------------------------- 1 | wait_gateways_set_qos.sh -------------------------------------------------------------------------------- /tests/ha/wait_gateways_set_qos_2ms.sh: -------------------------------------------------------------------------------- 1 | wait_gateways_set_qos.sh -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 100 3 | ignore = 4 | -------------------------------------------------------------------------------- /tests/ha/wait_gateways_4gws_create_delete.sh: -------------------------------------------------------------------------------- 1 | wait_gateways_4gws.sh -------------------------------------------------------------------------------- /tests/ha/wait_gateways_reuse_image2.sh: -------------------------------------------------------------------------------- 1 | wait_gateways_reuse_image.sh -------------------------------------------------------------------------------- /tests/ha/wait_gateways_4gws_create_delete_loop.sh: -------------------------------------------------------------------------------- 1 | wait_gateways_4gws.sh -------------------------------------------------------------------------------- /tests/ha/setup_main_exit.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | echo ℹ️ Skipping setup for this test 3 | -------------------------------------------------------------------------------- /tests/ha/start_up_main_exit.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | echo ℹ️ Skipping start up for this test 3 | -------------------------------------------------------------------------------- /tests/ha/wait_gateways_main_exit.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | echo ℹ️ Skipping wait gateways up for this test 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "spdk"] 2 | path = spdk 3 | url = https://github.com/ceph/spdk.git 4 | branch = ceph-nvmeof-v25.09 5 | -------------------------------------------------------------------------------- /monitoring/Ceph-NVMe-oF-Gateways-Dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceph/ceph-nvmeof/devel/monitoring/Ceph-NVMe-oF-Gateways-Dashboard.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *_pb2*.py* 3 | __pycache__ 4 | __pypackages__ 5 | .pdm-python 6 | server.crt 7 | server.key 8 | client.crt 9 | client.key 10 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | ** 2 | # Exclude everything except: 3 | !control/*.py 4 | !control/proto/__init__.py 5 | !control/proto/*.proto 6 | !pyproject.toml 7 | !pdm.lock 8 | !pdm.toml 9 | !README.md 10 | !LICENSE 11 | -------------------------------------------------------------------------------- /tests/ha/start_up_4gws.sh: -------------------------------------------------------------------------------- 1 | # Check if GITHUB_WORKSPACE is defined 2 | if [ -n "$GITHUB_WORKSPACE" ]; then 3 | test_dir="$GITHUB_WORKSPACE/tests/ha" 4 | else 5 | test_dir=$(dirname $0) 6 | fi 7 | 8 | $test_dir/start_up.sh 4 9 | -------------------------------------------------------------------------------- /tests/ha/wait_gateways_4gws.sh: -------------------------------------------------------------------------------- 1 | # Check if GITHUB_WORKSPACE is defined 2 | if [ -n "$GITHUB_WORKSPACE" ]; then 3 | test_dir="$GITHUB_WORKSPACE/tests/ha" 4 | else 5 | test_dir=$(dirname $0) 6 | fi 7 | 8 | $test_dir/wait_gateways.sh 4 9 | -------------------------------------------------------------------------------- /tests/ha/wait_gateways_set_qos.sh: -------------------------------------------------------------------------------- 1 | # Check if GITHUB_WORKSPACE is defined 2 | if [ -n "$GITHUB_WORKSPACE" ]; then 3 | test_dir="$GITHUB_WORKSPACE/tests/ha" 4 | else 5 | test_dir=$(dirname $0) 6 | fi 7 | 8 | $test_dir/wait_gateways.sh 1 9 | -------------------------------------------------------------------------------- /tests/ha/wait_gateways_auto_resize.sh: -------------------------------------------------------------------------------- 1 | # Check if GITHUB_WORKSPACE is defined 2 | if [ -n "$GITHUB_WORKSPACE" ]; then 3 | test_dir="$GITHUB_WORKSPACE/tests/ha" 4 | else 5 | test_dir=$(dirname $0) 6 | fi 7 | 8 | $test_dir/wait_gateways.sh 2 9 | -------------------------------------------------------------------------------- /tests/ha/start_up_set_qos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | # Check if GITHUB_WORKSPACE is defined 6 | if [ -n "$GITHUB_WORKSPACE" ]; then 7 | test_dir="$GITHUB_WORKSPACE/tests/ha" 8 | else 9 | test_dir=$(dirname $0) 10 | fi 11 | 12 | $test_dir/start_up.sh 1 13 | -------------------------------------------------------------------------------- /tests/ha/start_up_auto_resize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | # Check if GITHUB_WORKSPACE is defined 6 | if [ -n "$GITHUB_WORKSPACE" ]; then 7 | test_dir="$GITHUB_WORKSPACE/tests/ha" 8 | else 9 | test_dir=$(dirname $0) 10 | fi 11 | 12 | $test_dir/start_up.sh 2 13 | -------------------------------------------------------------------------------- /tests/ha/start_up_mtls.sh: -------------------------------------------------------------------------------- 1 | # Check if GITHUB_WORKSPACE is defined 2 | if [ -n "$GITHUB_WORKSPACE" ]; then 3 | test_dir="$GITHUB_WORKSPACE/tests/ha" 4 | else 5 | test_dir=$(dirname $0) 6 | fi 7 | 8 | export NVMEOF_CONFIG=./tests/ceph-nvmeof.tls.conf 9 | $test_dir/start_up.sh 1 10 | -------------------------------------------------------------------------------- /tests/ha/4gws_loop.sh: -------------------------------------------------------------------------------- 1 | # Check if GITHUB_WORKSPACE is defined 2 | if [ -n "$GITHUB_WORKSPACE" ]; then 3 | test_dir="$GITHUB_WORKSPACE/tests/ha" 4 | else 5 | test_dir=$(dirname $0) 6 | fi 7 | 8 | ITERATIONS=2 9 | for i in $(seq $ITERATIONS); do 10 | echo "Iteration #$i" 11 | source $test_dir/4gws.sh 12 | done 13 | -------------------------------------------------------------------------------- /tests/ha/start_up_crc32c.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -ex 4 | 5 | # Check if GITHUB_WORKSPACE is defined 6 | if [ -n "$GITHUB_WORKSPACE" ]; then 7 | test_dir="$GITHUB_WORKSPACE/tests/ha" 8 | else 9 | test_dir=$(dirname $0) 10 | fi 11 | 12 | export NVMEOF_CONFIG=./tests/ceph-nvmeof.crc32c.conf 13 | $test_dir/start_up.sh -------------------------------------------------------------------------------- /tests/ha/start_up_no_huge.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -ex 4 | 5 | # Check if GITHUB_WORKSPACE is defined 6 | if [ -n "$GITHUB_WORKSPACE" ]; then 7 | test_dir="$GITHUB_WORKSPACE/tests/ha" 8 | else 9 | test_dir=$(dirname $0) 10 | fi 11 | 12 | export NVMEOF_CONFIG=./tests/ceph-nvmeof.no-huge.conf 13 | $test_dir/start_up.sh 14 | -------------------------------------------------------------------------------- /tests/ha/wait_gateways_reuse_image.sh: -------------------------------------------------------------------------------- 1 | # Check if GITHUB_WORKSPACE is defined 2 | if [ -n "$GITHUB_WORKSPACE" ]; then 3 | test_dir="$GITHUB_WORKSPACE/tests/ha" 4 | else 5 | test_dir=$(dirname $0) 6 | fi 7 | 8 | $test_dir/wait_gateways.sh 1 9 | rm -f ./ceph-nvmeof.conf 10 | mv /tmp/ceph-nvmeof.conf ./ceph-nvmeof.conf 11 | -------------------------------------------------------------------------------- /tests/ha/namespaces_loop.sh: -------------------------------------------------------------------------------- 1 | # Check if GITHUB_WORKSPACE is defined 2 | if [ -n "$GITHUB_WORKSPACE" ]; then 3 | test_dir="$GITHUB_WORKSPACE/tests/ha" 4 | else 5 | test_dir=$(dirname $0) 6 | fi 7 | 8 | ITERATIONS=2 9 | for i in $(seq $ITERATIONS); do 10 | echo "Iteration #$i" 11 | source $test_dir/namespaces.sh 12 | done 13 | -------------------------------------------------------------------------------- /tests/ha/start_up_cluster_pool.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -ex 4 | 5 | # Check if GITHUB_WORKSPACE is defined 6 | if [ -n "$GITHUB_WORKSPACE" ]; then 7 | test_dir="$GITHUB_WORKSPACE/tests/ha" 8 | else 9 | test_dir=$(dirname $0) 10 | fi 11 | 12 | export NVMEOF_CONFIG=./tests/ceph-nvmeof.cluster_pool.conf 13 | $test_dir/start_up.sh 14 | -------------------------------------------------------------------------------- /tests/ha/4gws_create_delete_loop.sh: -------------------------------------------------------------------------------- 1 | # Check if GITHUB_WORKSPACE is defined 2 | if [ -n "$GITHUB_WORKSPACE" ]; then 3 | test_dir="$GITHUB_WORKSPACE/tests/ha" 4 | else 5 | test_dir=$(dirname $0) 6 | fi 7 | 8 | ITERATIONS=2 9 | for i in $(seq $ITERATIONS); do 10 | echo "Iteration #$i" 11 | source $test_dir/4gws_create_delete.sh 12 | done 13 | -------------------------------------------------------------------------------- /tests/ha/late_registration_loop.sh: -------------------------------------------------------------------------------- 1 | # Check if GITHUB_WORKSPACE is defined 2 | if [ -n "$GITHUB_WORKSPACE" ]; then 3 | test_dir="$GITHUB_WORKSPACE/tests/ha" 4 | else 5 | test_dir=$(dirname $0) 6 | fi 7 | 8 | ITERATIONS=3 9 | for i in $(seq $ITERATIONS); do 10 | echo "Iteration #$i" 11 | source $test_dir/late_registration.sh 12 | done 13 | -------------------------------------------------------------------------------- /tests/ha/state_transitions_loop.sh: -------------------------------------------------------------------------------- 1 | # Check if GITHUB_WORKSPACE is defined 2 | if [ -n "$GITHUB_WORKSPACE" ]; then 3 | test_dir="$GITHUB_WORKSPACE/tests/ha" 4 | else 5 | test_dir=$(dirname $0) 6 | fi 7 | 8 | ITERATIONS=7 9 | for i in $(seq $ITERATIONS); do 10 | echo "Iteration #$i" 11 | source $test_dir/state_transitions.sh 12 | done 13 | -------------------------------------------------------------------------------- /tests/ha/start_up_flat_bdev_per_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -ex 4 | 5 | # Check if GITHUB_WORKSPACE is defined 6 | if [ -n "$GITHUB_WORKSPACE" ]; then 7 | test_dir="$GITHUB_WORKSPACE/tests/ha" 8 | else 9 | test_dir=$(dirname $0) 10 | fi 11 | 12 | export NVMEOF_CONFIG=./tests/ceph-nvmeof.flat_bdevs_per_cluster.conf 13 | $test_dir/start_up.sh 14 | -------------------------------------------------------------------------------- /lib/go/Containerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.24 2 | 3 | ENV MODULE=github.com/ceph/ceph-nvmeof/lib/go/nvmeof 4 | 5 | # install tools 6 | RUN true \ 7 | && apt-get update \ 8 | && apt install -y protobuf-compiler \ 9 | && go install github.com/golang/protobuf/protoc-gen-go@latest \ 10 | && go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest \ 11 | && true 12 | -------------------------------------------------------------------------------- /tests/ha/wait_gateways_mtls.sh: -------------------------------------------------------------------------------- 1 | # Check if GITHUB_WORKSPACE is defined 2 | if [ -n "$GITHUB_WORKSPACE" ]; then 3 | test_dir="$GITHUB_WORKSPACE/tests/ha" 4 | else 5 | test_dir=$(dirname $0) 6 | fi 7 | 8 | export CLI_TLS_ARGS="--server-cert /etc/ceph/server.crt --client-key /etc/ceph/client.key --client-cert /etc/ceph/client.crt" 9 | $test_dir/wait_gateways.sh 1 10 | -------------------------------------------------------------------------------- /tests/ha/start_up_reuse_image.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | # Check if GITHUB_WORKSPACE is defined 6 | if [ -n "$GITHUB_WORKSPACE" ]; then 7 | test_dir="$GITHUB_WORKSPACE/tests/ha" 8 | else 9 | test_dir=$(dirname $0) 10 | fi 11 | 12 | rm -f /tmp/ceph-nvmeof.conf 13 | cp ceph-nvmeof.conf /tmp/ 14 | sed -i 's/^ *group *=.*$/group = group1/' ceph-nvmeof.conf 15 | $test_dir/start_up.sh 1 16 | -------------------------------------------------------------------------------- /tests/ha/start_up_reuse_image2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | # Check if GITHUB_WORKSPACE is defined 6 | if [ -n "$GITHUB_WORKSPACE" ]; then 7 | test_dir="$GITHUB_WORKSPACE/tests/ha" 8 | else 9 | test_dir=$(dirname $0) 10 | fi 11 | 12 | rm -f /tmp/ceph-nvmeof.conf 13 | cp ceph-nvmeof.conf /tmp/ 14 | sed -i 's/^ *group *=.*$/group = group_2/' ceph-nvmeof.conf 15 | $test_dir/start_up.sh 1 16 | -------------------------------------------------------------------------------- /lib/go/nvmeof/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025 International Business Machines 3 | * All rights reserved. 4 | * 5 | * SPDX-License-Identifier: MIT 6 | * 7 | * Authors: ndevos@ibm.com 8 | */ 9 | 10 | package nvmeof 11 | 12 | //go:generate protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative --proto_path=../../../control/proto ../../../control/proto/gateway.proto 13 | -------------------------------------------------------------------------------- /lib/go/nvmeof/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/ceph/ceph-nvmeof/lib/go/nvmeof 2 | 3 | go 1.24.4 4 | 5 | require ( 6 | google.golang.org/grpc v1.73.0 7 | google.golang.org/protobuf v1.36.6 8 | ) 9 | 10 | require ( 11 | golang.org/x/net v0.38.0 // indirect 12 | golang.org/x/sys v0.31.0 // indirect 13 | golang.org/x/text v0.23.0 // indirect 14 | google.golang.org/genproto/googleapis/rpc v0.0.0-20250324211829-b45e905df463 // indirect 15 | ) 16 | -------------------------------------------------------------------------------- /tests/ha/start_up_set_qos_2ms.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -ex 4 | 5 | # Check if GITHUB_WORKSPACE is defined 6 | if [ -n "$GITHUB_WORKSPACE" ]; then 7 | test_dir="$GITHUB_WORKSPACE/tests/ha" 8 | else 9 | test_dir=$(dirname $0) 10 | fi 11 | 12 | sed 's/^ *qos_timeslice_in_usecs.*$/qos_timeslice_in_usecs = 2000/' ceph-nvmeof.conf > /tmp/ceph-nvmeof.2ms.conf 13 | export NVMEOF_CONFIG=/tmp/ceph-nvmeof.2ms.conf 14 | $test_dir/start_up.sh 1 15 | rm -f /tmp/ceph-nvmeof.2ms.conf 16 | -------------------------------------------------------------------------------- /tests/ha/state_transitions_rand_loop.sh: -------------------------------------------------------------------------------- 1 | # Check if GITHUB_WORKSPACE is defined 2 | if [ -n "$GITHUB_WORKSPACE" ]; then 3 | test_dir="$GITHUB_WORKSPACE/tests/ha" 4 | else 5 | test_dir=$(dirname $0) 6 | fi 7 | 8 | ITERATIONS=7 9 | for i in $(seq $ITERATIONS); do 10 | test_name="state_transitions" 11 | if [ "$((RANDOM % 2))" -eq "1" ]; then 12 | test_name="state_transitions_both_gws" 13 | fi 14 | echo "Iteration #$i $test_name" 15 | source $test_dir/$test_name.sh 16 | done 17 | -------------------------------------------------------------------------------- /control/proto/monitor.proto: -------------------------------------------------------------------------------- 1 | // 2 | // Copyright (c) 2023 International Business Machines 3 | // All rights reserved. 4 | // 5 | // SPDX-License-Identifier: MIT 6 | // 7 | 8 | syntax = "proto3"; 9 | import "google/protobuf/empty.proto"; 10 | 11 | service MonitorGroup { 12 | // Called by the monitor client to set the gateway's group id 13 | rpc group_id(group_id_req) returns (google.protobuf.Empty) {} 14 | } 15 | 16 | // Request messages 17 | message group_id_req { 18 | uint32 id = 1; 19 | } 20 | -------------------------------------------------------------------------------- /mk/misc.mk: -------------------------------------------------------------------------------- 1 | ## Miscellaneous: 2 | 3 | # nvmeof_cli 4 | NVMEOF_CLI = $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE) run --rm nvmeof-cli --server-address $(NVMEOF_IP_ADDRESS) --server-port $(NVMEOF_GW_PORT) 5 | NVMEOF_CLI_IPV6 = $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE) run --rm nvmeof-cli --server-address $(NVMEOF_IPV6_ADDRESS) --server-port $(NVMEOF_GW_PORT) 6 | 7 | alias: ## Print bash alias command for the nvmeof-cli. Usage: "eval $(make alias)" 8 | @echo alias cephnvmf=\"$(strip $(NVMEOF_CLI))\"\; 9 | @echo alias cephnvmf-ipv6=\"$(strip $(NVMEOF_CLI_IPV6))\" 10 | 11 | .PHONY: alias 12 | -------------------------------------------------------------------------------- /lib/go/Makefile: -------------------------------------------------------------------------------- 1 | REPO := github.com/ceph/ceph-nvmeof 2 | CONTAINER := ceph-nvmeof:lib-go 3 | 4 | proto: .container-id 5 | podman run --rm -v $(CURDIR)/../..:/go/src/$(REPO):Z --workdir=/go/src/github.com/ceph/ceph-nvmeof/lib/go/nvmeof $(CONTAINER) go generate 6 | 7 | 8 | .PHONY: proto regenerate clean 9 | 10 | .container-id: 11 | podman build -t $(CONTAINER) -f Containerfile . 12 | podman inspect -f {{.Id}} $(CONTAINER) > .container-id 13 | 14 | regenerate: 15 | $(MAKE) clean 16 | $(RM) nvmeof/gateway_grpc.pb.go nvmeof/gateway.pb.go 17 | $(MAKE) proto 18 | 19 | clean: 20 | test -f .container-id && podman rmi $(CONTAINER) || true 21 | $(RM) .container-id 22 | -------------------------------------------------------------------------------- /tests/ha/notify.sh: -------------------------------------------------------------------------------- 1 | set -xe 2 | 3 | POOL="${RBD_POOL:-rbd}" 4 | CEPH_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | grep -v nvme | grep ceph | awk '{print $1}') 5 | GW1_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /1/ {print $1}') 6 | 7 | echo "ℹ️ Step 1: verify 4 watchers" 8 | 9 | docker exec $CEPH_NAME rados listwatchers -p $POOL nvmeof.state | grep "watcher=" | wc -l | grep 4 10 | 11 | echo "ℹ️ Step 2: stop a gateway" 12 | 13 | docker stop $GW1_NAME 14 | wait 15 | sleep 5 16 | 17 | echo "ℹ️ Step 3: verify 2 watchers" 18 | 19 | docker exec $CEPH_NAME rados listwatchers -p $POOL nvmeof.state | grep "watcher=" | wc -l | grep 2 20 | -------------------------------------------------------------------------------- /.github/workflows/check-deps.yml: -------------------------------------------------------------------------------- 1 | # Blocks PR merging if PR description contains "depends on" or "blocked by" 2 | # pointing to another PR until that PR is merged 3 | 4 | name: Check Dependencies 5 | on: [pull_request] 6 | 7 | jobs: 8 | check-deps: 9 | runs-on: ubuntu-latest 10 | name: Check Dependencies 11 | steps: 12 | # https://github.com/marketplace/actions/pr-dependency-check 13 | # Pinned for security reasons 14 | # Approved Github Actions: https://github.com/orgs/ceph/projects/11/views/1 15 | - uses: gregsdennis/dependencies-action@f98d55eee1f66e7aaea4a60e71892736ae2548c7 16 | env: 17 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 18 | -------------------------------------------------------------------------------- /tests/ha/ceph_status.sh: -------------------------------------------------------------------------------- 1 | set -xe 2 | 3 | POOL="${RBD_POOL:-rbd}" 4 | CEPH_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | grep -v nvme | grep ceph | awk '{print $1}') 5 | GW1_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /1/ {print $1}') 6 | 7 | docker compose exec -T ceph ceph service dump 8 | docker compose exec -T ceph ceph status 9 | 10 | echo "ℹ️ Step 1: verify 2 gateways" 11 | 12 | docker compose exec -T ceph ceph status | grep "2 gateways active" 13 | 14 | echo "ℹ️ Step 2: stop a gateway" 15 | 16 | docker stop $GW1_NAME 17 | wait 18 | sleep 5 19 | 20 | echo "ℹ️ Step 3: verify 1 gateway" 21 | 22 | docker compose exec -T ceph ceph status | grep "1 gateway active" 23 | -------------------------------------------------------------------------------- /tests/ha/set_qos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -xe 4 | 5 | NS_COUNT=400 6 | NQN="nqn.2016-06.io.spdk:cnode1QOS" 7 | GW1_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /1/ {print $1}') 8 | GW1_IP="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW1_NAME")" 9 | cephnvmf="docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500" 10 | 11 | $cephnvmf subsystem add --subsystem $NQN --max-namespaces 1024 --no-group-append 12 | 13 | for i in `seq 1 $NS_COUNT` 14 | do 15 | $cephnvmf namespace add -n $NQN --rbd-pool rbd --rbd-image image${i} --rbd-create-image --size 10MB 16 | $cephnvmf namespace set_qos -n $NQN --nsid $i --rw-ios-per-second 150 --rw-megabytes-per-second 19 --r-megabytes-per-second 19 --w-megabytes-per-second 19 17 | done 18 | 19 | $cephnvmf subsystem del --subsystem $NQN --force 20 | sleep 10 21 | -------------------------------------------------------------------------------- /tests/ha/wreak_havoc.sh: -------------------------------------------------------------------------------- 1 | set -xe 2 | echo "ℹ️ HA failover/failback test" 3 | eval $(make run SVC=bdevperf OPTS="--entrypoint=env" | grep BDEVPERF_TEST_DURATION | tr -d '\n\r' ) 4 | failover_step=$(expr $BDEVPERF_TEST_DURATION / 4) 5 | GW2_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /2/ {print $1}') 6 | wreak_havoc() { 7 | echo "Waiting $failover_step secs before failover..." 8 | sleep $failover_step 9 | echo "Stop gateway $GW2_NAME" 10 | docker stop $GW2_NAME 11 | echo "Waiting $failover_step secs before failback..." 12 | sleep $failover_step 13 | echo "Restart gateway $GW2_NAME" 14 | docker start $GW2_NAME 15 | echo "wreak_havoc() function completed." 16 | } 17 | 18 | # Check if GITHUB_WORKSPACE is defined 19 | if [ -n "$GITHUB_WORKSPACE" ]; then 20 | test_dir="$GITHUB_WORKSPACE/tests/ha" 21 | else 22 | test_dir=$(dirname $0) 23 | fi 24 | wreak_havoc & 25 | source $test_dir/sanity.sh 26 | wait 27 | 28 | -------------------------------------------------------------------------------- /mk/autohelp.mk: -------------------------------------------------------------------------------- 1 | # Auto-generate Makefile help from comments (##) in targets and global 2 | # variables. 3 | # Usage: 4 | # hello: ## This target prints Hello World 5 | # LANGUAGE := esperanto ## Set the language for the Hello World message 6 | 7 | autohelp: BOLD != [ -z "$$PS1" ] && tput bold 8 | autohelp: NORMAL != [ -z "$$PS1" ] && tput sgr0 9 | autohelp: 10 | @echo $(AUTOHELP_SUMMARY) 11 | @echo 12 | @echo "Usage:" 13 | @echo " make $(BOLD)[target] [target]$(NORMAL) ... $(BOLD)OPTION$(NORMAL)=value ..." 14 | @echo 15 | @echo Targets: 16 | @for file in $(MAKEFILE_LIST); do \ 17 | awk 'BEGIN {FS = "## "}; /^##/ {printf "\n %s\n", $$2}' $$file; \ 18 | awk 'BEGIN {FS = ":.*?## "}; \ 19 | /^\w.*:.*##/ {printf " $(BOLD)%-15s$(NORMAL) %s\n", $$1, $$2}' $$file | sort; \ 20 | grep -q "^\w.*=.*## " $$file && echo -e "\n Options:"; \ 21 | awk 'BEGIN {FS = "( [!?]?= | ?## )"}; \ 22 | /^\w.*=.*## / {printf " $(BOLD)%-15s$(NORMAL) %s (Default: %s)\n", $$1, $$3, $$2} \ 23 | ' $$file | sort; \ 24 | done 25 | -------------------------------------------------------------------------------- /tests/ha/setup_listener_hostname.sh: -------------------------------------------------------------------------------- 1 | set -xe 2 | 3 | GW1_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /1/ {print $1}') 4 | GW2_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /2/ {print $1}') 5 | GW1_IP="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW1_NAME")" 6 | GW2_IP="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW2_NAME")" 7 | NQN="nqn.2016-06.io.spdk:cnode1" 8 | 9 | docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 subsystem add --subsystem $NQN --no-group-append 10 | docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 namespace add --subsystem $NQN --rbd-pool rbd --rbd-image demo_image1 --size 10M --rbd-create-image 11 | docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 listener add --subsystem $NQN --host-name $GW2_NAME --traddr $GW2_IP --trsvcid 4420 12 | docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 host add --subsystem $NQN --host-nqn "*" 13 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from control.config import GatewayConfig 3 | 4 | 5 | def pytest_addoption(parser): 6 | """Sets command line options for testing.""" 7 | # Specify base config file for tests 8 | parser.addoption("--config", 9 | action="store", 10 | help="Path to config file", 11 | default="ceph-nvmeof.conf") 12 | parser.addoption("--image", 13 | action="store", 14 | help="RBD image name", 15 | default="mytestdevimage") 16 | 17 | 18 | @pytest.fixture(scope="session") 19 | def conffile(request): 20 | """Returns the command line input for the config file.""" 21 | return request.config.getoption("--config") 22 | 23 | 24 | @pytest.fixture(scope="session") 25 | def config(conffile): 26 | """Returns config file settings.""" 27 | return GatewayConfig(conffile) 28 | 29 | 30 | @pytest.fixture(scope="session") 31 | def image(request): 32 | """Returns the command line input for the test rbd image name.""" 33 | return request.config.getoption("--image") 34 | -------------------------------------------------------------------------------- /control/__main__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 International Business Machines 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | # 7 | # Authors: anita.shekar@ibm.com, sandy.kaur@ibm.com 8 | # 9 | 10 | import argparse 11 | from .server import GatewayServer 12 | from .config import GatewayConfig 13 | from .utils import GatewayLogger 14 | 15 | if __name__ == '__main__': 16 | parser = argparse.ArgumentParser(prog="python3 -m control", 17 | description="Manage NVMe gateways", 18 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 19 | parser.add_argument( 20 | "-c", 21 | "--config", 22 | default="ceph-nvmeof.conf", 23 | type=str, 24 | help="Path to config file", 25 | ) 26 | args = parser.parse_args() 27 | config = GatewayConfig(args.config) 28 | gw_logger = GatewayLogger(config) 29 | config.display_environment_info(gw_logger.logger) 30 | config.dump_config_file(gw_logger.logger) 31 | with GatewayServer(config) as gateway: 32 | gateway.serve() 33 | gateway.keep_alive() 34 | -------------------------------------------------------------------------------- /tests/atom/cpArtifactAndCleanup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | NIGHTLY=$1 4 | 5 | process_artifacts() { 6 | local artifact_dir=$1 7 | local backup_dir=$2 8 | local tar_file=$3 9 | local busy_file=$4 10 | 11 | sudo rm -rf "${artifact_dir:?}"/* 12 | sudo ls -lta "$artifact_dir" 13 | 14 | sudo rm -rf "$tar_file" 15 | sudo ls -lta "$(dirname "$artifact_dir")" 16 | sudo cp -r "$backup_dir" "$artifact_dir" 17 | sudo ls -lta "$artifact_dir" 18 | 19 | sudo tar -czf "$tar_file" -C "$artifact_dir" . 20 | sudo ls -lta "$artifact_dir" 21 | sudo ls -lta "$(dirname "$artifact_dir")" 22 | sudo chmod +rx "$tar_file" 23 | sudo rm -rf "$busy_file" 24 | } 25 | 26 | if [ "$NIGHTLY" != "nightly" ]; then 27 | process_artifacts \ 28 | "/home/cephnvme/artifact_m7" \ 29 | "/root/.ssh/atom_backup/artifact/multiIBMCloudServers_m7" \ 30 | "/home/cephnvme/artifact_m7.tar.gz" \ 31 | "/home/cephnvme/busyServer.txt" 32 | else 33 | process_artifacts \ 34 | "/home/cephnvme/artifact_m8" \ 35 | "/root/.ssh/atom_backup/artifact/multiIBMCloudServers_m8" \ 36 | "/home/cephnvme/artifact_m8.tar.gz" \ 37 | "/home/cephnvme/busyServerNightly.txt" 38 | fi 39 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | 2 | Source: http://github.com/ceph/ceph-nvmeof 3 | 4 | Files: * 5 | License: LGPL-3.0-or-later (see LICENSE) 6 | 7 | Files: proto/gateway.proto 8 | License: MIT 9 | Copyright: Copyright (c) 2021 International Business Machines 10 | 11 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | 17 | 18 | -------------------------------------------------------------------------------- /mk/demo.mk: -------------------------------------------------------------------------------- 1 | ## Demo: 2 | 3 | HOSTNQN=`cat /etc/nvme/hostnqn` 4 | NVMEOF_IO_PORT2=`expr $(NVMEOF_IO_PORT) + 1` 5 | # demo 6 | demo: 7 | $(NVMEOF_CLI) subsystem add --subsystem $(NQN) --no-group-append 8 | $(NVMEOF_CLI) namespace add --subsystem $(NQN) --rbd-pool $(RBD_POOL) --rbd-image $(RBD_IMAGE_NAME) --size $(RBD_IMAGE_SIZE) --rbd-create-image 9 | $(NVMEOF_CLI) namespace add --subsystem $(NQN) --rbd-pool $(RBD_POOL) --rbd-image $(RBD_IMAGE_NAME)2 --size $(RBD_IMAGE_SIZE) --rbd-create-image --no-auto-visible 10 | $(NVMEOF_CLI) listener add --subsystem $(NQN) --host-name `$(NVMEOF_CLI) --output stdio gw info | grep "Gateway's host name:" | cut -d: -f2 | sed 's/ //g'` --traddr $(NVMEOF_IP_ADDRESS) --trsvcid $(NVMEOF_IO_PORT) --verify-host-name 11 | $(NVMEOF_CLI) listener add --subsystem $(NQN) --host-name `$(NVMEOF_CLI) --output stdio gw info | grep "Gateway's host name:" | cut -d: -f2 | sed 's/ //g'` --traddr 0.0.0.0 --trsvcid $(NVMEOF_IO_PORT2) --verify-host-name 12 | $(NVMEOF_CLI_IPV6) listener add --subsystem $(NQN) --host-name `$(NVMEOF_CLI) --output stdio gw info | grep "Gateway's host name:" | cut -d: -f2 | sed 's/ //g'` --traddr $(NVMEOF_IPV6_ADDRESS) --trsvcid $(NVMEOF_IO_PORT) --adrfam IPV6 --verify-host-name 13 | $(NVMEOF_CLI) host add --subsystem $(NQN) --host-nqn "*" 14 | $(NVMEOF_CLI) namespace add_host --subsystem $(NQN) --nsid 2 --host-nqn $(HOSTNQN) 15 | 16 | .PHONY: demo 17 | -------------------------------------------------------------------------------- /tests/ha/wait_gateways.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -ex 3 | SCALE=2 4 | echo CLI_TLS_ARGS $CLI_TLS_ARGS 5 | # Check if argument is provided 6 | if [ $# -ge 1 ]; then 7 | # Check if argument is an integer greater than or equal to 1 8 | if [ "$1" -eq "$1" ] 2>/dev/null && [ "$1" -ge 1 ]; then 9 | # Set variable to the provided argument 10 | SCALE="$1" 11 | else 12 | echo "Error: Argument must be an integer larger than 1." >&2 13 | exit 1 14 | fi 15 | fi 16 | for i in $(seq $SCALE); do 17 | while true; do 18 | GW_NAME='' 19 | while [ ! -n "$GW_NAME" ]; do 20 | sleep 1 # Adjust the sleep duration as needed 21 | GW_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /'$i'/ {print $1}') 22 | done 23 | container_status=$(docker inspect -f '{{.State.Status}}' "$GW_NAME") 24 | if [ "$container_status" = "running" ]; then 25 | echo "Container $i $GW_NAME is now running." 26 | else 27 | echo "Container $i $GW_NAME is still not running. Waiting..." 28 | continue 29 | fi 30 | GW_IP="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW_NAME")" 31 | if ! docker compose run --rm nvmeof-cli $CLI_TLS_ARGS --server-address $GW_IP --server-port 5500 get_subsystems; then 32 | echo "Container $i $GW_NAME $GW_IP no subsystems. Waiting..." 33 | continue 34 | fi 35 | break; 36 | done 37 | done 38 | -------------------------------------------------------------------------------- /tests/ha/blocklist.sh: -------------------------------------------------------------------------------- 1 | source .env 2 | 3 | verify_blocklist() { 4 | stopped_gw_name=$1 5 | NODE_IP=$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' $stopped_gw_name) 6 | BLOCKLIST=$(docker compose exec -T ceph ceph osd blocklist ls) 7 | 8 | echo "verifying there is at least 1 entry in the blocklist related to the stopped gateway" 9 | if echo "$BLOCKLIST" | grep -q "$NODE_IP"; then 10 | echo "ip $NODE_IP for the stopped gateway was found the blocklist." 11 | else 12 | echo "ip $NODE_IP for node the stopped gateway was not found in blocklist." 13 | exit 1 14 | fi 15 | 16 | echo "verifying there are no entries in the blocklist which are not related to the stopped gateway" 17 | if echo "$BLOCKLIST" | grep -qv "$NODE_IP"; then 18 | echo "found at least 1 entry in blocklist which is not related to gateway in the stopped gateway. failing" 19 | exit 1 20 | else 21 | echo "didn't find unexpected entries which are not relaetd to the stopped gateway." 22 | fi 23 | echo "blocklist verification successful" 24 | } 25 | 26 | echo "obtaining gw1 container id and its ip" 27 | GW1_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /1/ {print $1}') 28 | 29 | echo "clearing blocklist" 30 | docker compose exec -T ceph ceph osd blocklist clear 31 | 32 | echo "shutting down gw1:$GW1_NAME" 33 | docker stop $GW1_NAME 34 | 35 | echo "waiting for 30s after shutdown" 36 | sleep 30 37 | 38 | verify_blocklist "$GW1_NAME" 39 | -------------------------------------------------------------------------------- /tests/ha/sanity.sh: -------------------------------------------------------------------------------- 1 | set -xe 2 | # See 3 | # - https://github.com/spdk/spdk/blob/master/doc/jsonrpc.md 4 | # - https://spdk.io/doc/nvmf_multipath_howto.html 5 | 6 | GW1_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /1/ {print $1}') 7 | ip="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW1_NAME")" 8 | echo -n "ℹ️ Starting bdevperf container" 9 | docker compose up -d bdevperf 10 | sleep 10 11 | echo "ℹ️ bdevperf start up logs" 12 | make logs SVC=bdevperf 13 | BDEVPERF_SOCKET=/tmp/bdevperf.sock 14 | NVMEOF_DISC_PORT=8009 15 | 16 | echo "ℹ️ Using discovery service in gateway $GW1 ip $ip" 17 | rpc="/usr/libexec/spdk/scripts/rpc.py" 18 | echo "ℹ️ bdevperf bdev_nvme_set_options" 19 | make exec SVC=bdevperf OPTS=-T CMD="$rpc -v -s $BDEVPERF_SOCKET bdev_nvme_set_options -r -1" 20 | echo "ℹ️ bdevperf start discovery ip: $ip port: $NVMEOF_DISC_PORT" 21 | make exec SVC=bdevperf OPTS=-T CMD="$rpc -v -s $BDEVPERF_SOCKET bdev_nvme_start_discovery -b Nvme0 -t tcp -a $ip -s $NVMEOF_DISC_PORT -f ipv4 -w" 22 | echo "ℹ️ bdevperf bdev_nvme_get_discovery_info" 23 | make exec SVC=bdevperf OPTS=-T CMD="$rpc -v -s $BDEVPERF_SOCKET bdev_nvme_get_discovery_info" 24 | echo "ℹ️ bdevperf perform_tests" 25 | eval $(make run SVC=bdevperf OPTS="--entrypoint=env" | grep BDEVPERF_TEST_DURATION | tr -d '\n\r' ) 26 | timeout=$(expr $BDEVPERF_TEST_DURATION \* 2) 27 | bdevperf="/usr/libexec/spdk/scripts/bdevperf.py" 28 | make exec SVC=bdevperf OPTS=-T CMD="$bdevperf -v -t $timeout -s $BDEVPERF_SOCKET perform_tests" 29 | -------------------------------------------------------------------------------- /mk/demosecurepsk.mk: -------------------------------------------------------------------------------- 1 | ## Demo secure PSK: 2 | 3 | HOSTNQN=`cat /etc/nvme/hostnqn` 4 | HOSTNQN2=`cat /etc/nvme/hostnqn | sed 's/......$$/ffffff/'` 5 | HOSTNQN3=`cat /etc/nvme/hostnqn | sed 's/......$$/fffffe/'` 6 | NVMEOF_IO_PORT2=`expr $(NVMEOF_IO_PORT) + 1` 7 | PSKKEY1=$(PSK_KEY1) 8 | PSKKEY2=$(PSK_KEY2) 9 | # demosecure-psk 10 | demosecurepsk: 11 | $(NVMEOF_CLI) subsystem add --subsystem $(NQN) --no-group-append 12 | $(NVMEOF_CLI) namespace add --subsystem $(NQN) --rbd-pool $(RBD_POOL) --rbd-image $(RBD_IMAGE_NAME) --size $(RBD_IMAGE_SIZE) --rbd-create-image 13 | $(NVMEOF_CLI) namespace add --subsystem $(NQN) --rbd-pool $(RBD_POOL) --rbd-image $(RBD_IMAGE_NAME)2 --size $(RBD_IMAGE_SIZE) --rbd-create-image --no-auto-visible 14 | $(NVMEOF_CLI) listener add --subsystem $(NQN) --host-name `$(NVMEOF_CLI) --output stdio gw info | grep "Gateway's host name:" | cut -d: -f2 | sed 's/ //g'` --traddr $(NVMEOF_IP_ADDRESS) --trsvcid $(NVMEOF_IO_PORT) --secure --verify-host-name 15 | $(NVMEOF_CLI) listener add --subsystem $(NQN) --host-name `$(NVMEOF_CLI) --output stdio gw info | grep "Gateway's host name:" | cut -d: -f2 | sed 's/ //g'` --traddr $(NVMEOF_IP_ADDRESS) --trsvcid $(NVMEOF_IO_PORT2) --verify-host-name 16 | $(NVMEOF_CLI) host add --subsystem $(NQN) --host-nqn "$(HOSTNQN)" --psk $(PSKKEY1) 17 | $(NVMEOF_CLI) host add --subsystem $(NQN) --host-nqn "$(HOSTNQN2)" 18 | $(NVMEOF_CLI) host add --subsystem $(NQN) --host-nqn "$(HOSTNQN3)" --psk $(PSKKEY2) 19 | $(NVMEOF_CLI) namespace add_host --subsystem $(NQN) --nsid 2 --host-nqn $(HOSTNQN) 20 | 21 | .PHONY: demosecurepsk 22 | -------------------------------------------------------------------------------- /tests/ha/setup.sh: -------------------------------------------------------------------------------- 1 | set -xe 2 | 3 | GW1_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /1/ {print $1}') 4 | GW2_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /2/ {print $1}') 5 | GW1_IP="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW1_NAME")" 6 | GW2_IP="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW2_NAME")" 7 | NQN="nqn.2016-06.io.spdk:cnode1" 8 | 9 | docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 subsystem add --subsystem $NQN --no-group-append 10 | docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 namespace add --subsystem $NQN --rbd-pool rbd --rbd-image demo_image1 --size 10M --rbd-create-image -l 1 11 | docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 namespace add --subsystem $NQN --rbd-pool rbd --rbd-image demo_image2 --size 10M --rbd-create-image -l 2 12 | docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 listener add --subsystem $NQN --host-name $GW1_NAME --traddr $GW1_IP --trsvcid 4420 13 | docker compose run --rm nvmeof-cli --server-address $GW2_IP --server-port 5500 listener add --subsystem $NQN --host-name $GW2_NAME --traddr $GW2_IP --trsvcid 4420 14 | docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 host add --subsystem $NQN --host-nqn "*" 15 | docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 get_subsystems 16 | docker compose run --rm nvmeof-cli --server-address $GW2_IP --server-port 5500 get_subsystems 17 | -------------------------------------------------------------------------------- /tests/ha/setup_mtls.sh: -------------------------------------------------------------------------------- 1 | set -xe 2 | 3 | GW1_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /1/ {print $1}') 4 | GW1_IP="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW1_NAME")" 5 | NQN="nqn.2016-06.io.spdk:cnode1" 6 | 7 | docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 --server-cert /etc/ceph/server.crt --client-key /etc/ceph/client.key --client-cert /etc/ceph/client.crt subsystem add --subsystem $NQN --no-group-append 8 | docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 --server-cert /etc/ceph/server.crt --client-key /etc/ceph/client.key --client-cert /etc/ceph/client.crt namespace add --subsystem $NQN --rbd-pool rbd --rbd-image demo_image1 --size 10M --rbd-create-image -l 1 9 | #docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 --server-cert /etc/ceph/server.crt --client-key /etc/ceph/client.key --client-cert /etc/ceph/client.crt namespace add --subsystem $NQN --rbd-pool rbd --rbd-image demo_image2 --size 10M --rbd-create-image -l 2 10 | docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 --server-cert /etc/ceph/server.crt --client-key /etc/ceph/client.key --client-cert /etc/ceph/client.crt listener add --subsystem $NQN --host-name $GW1_NAME --traddr $GW1_IP --trsvcid 4420 11 | docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 --server-cert /etc/ceph/server.crt --client-key /etc/ceph/client.key --client-cert /etc/ceph/client.crt host add --subsystem $NQN --host-nqn "*" 12 | docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 --server-cert /etc/ceph/server.crt --client-key /etc/ceph/client.key --client-cert /etc/ceph/client.crt get_subsystems 13 | -------------------------------------------------------------------------------- /tests/ha/start_up.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | SCALE=2 3 | POOL="${RBD_POOL:-rbd}" 4 | # Check if argument is provided 5 | if [ $# -ge 1 ]; then 6 | # Check if argument is an integer larger or equal than 1 7 | if [ "$1" -eq "$1" ] 2>/dev/null && [ "$1" -ge 1 ]; then 8 | # Set variable to the provided argument 9 | SCALE="$1" 10 | else 11 | echo "Error: Argument must be an integer larger than 1." >&2 12 | exit 1 13 | fi 14 | fi 15 | echo ℹ️ Starting $SCALE nvmeof gateways 16 | docker compose up -d --remove-orphans --scale nvmeof=$SCALE nvmeof 17 | 18 | # Waiting for the ceph container to become healthy 19 | while true; do 20 | container_status=$(docker inspect --format='{{.State.Health.Status}}' ceph) 21 | if [ "$container_status" = "healthy" ]; then 22 | # success 23 | break 24 | else 25 | # Wait for a specific time before checking again 26 | sleep 1 27 | printf . 28 | fi 29 | done 30 | echo ✅ ceph is healthy 31 | 32 | echo ℹ️ Increase debug logs level 33 | docker compose exec -T ceph ceph config get mon.a 34 | docker compose exec -T ceph ceph tell mon.a config set debug_mon 20/20 35 | docker compose exec -T ceph ceph tell mon.a config set debug_ms 1/1 36 | docker compose exec -T ceph ceph config get mon.a 37 | 38 | echo ℹ️ Running processes of services 39 | docker compose top 40 | 41 | echo ℹ️ Send nvme-gw create for all gateways 42 | GW_GROUP=$(grep group ceph-nvmeof.conf | sed 's/^[^=]*=//' | sed 's/^ *//' | sed 's/ *$//') 43 | for i in $(seq $SCALE); do 44 | GW_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | grep -v discovery | awk '$2 ~ /nvmeof/ && $2 ~ /'$i'/ {print $1}') 45 | echo 📫 nvme-gw create gateway: \'$GW_NAME\' pool: \'$POOL\', group: \'$GW_GROUP\' 46 | docker compose exec -T ceph ceph nvme-gw create $GW_NAME $POOL "$GW_GROUP" 47 | done 48 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["pdm-backend"] 3 | build-backend = "pdm.backend" 4 | 5 | [project] 6 | name = "ceph-nvmeof" 7 | version = "1.6.2" 8 | description = "Service to provide Ceph storage over NVMe-oF protocol" 9 | readme = "README.md" 10 | requires-python = ">3.9.1" 11 | license = {file = "LICENSE"} 12 | authors = [ 13 | {name = "Ilya Dryomov", email = "idryomov@gmail.com"}, 14 | {name = "Mykola Golub", email = "mykola.golub@clyso.com"}, 15 | {name = "Sandy Kaur", email = "sandy.kaur@ibm.com"}, 16 | {name = "Ernesto Puerta", email = "epuertat@redhat.com"}, 17 | {name = "Yin Congmin", email = "congmin.yin@intel.com"}, 18 | {name = "Scott Peterson", email = "scott.d.peterson@intel.com"}, 19 | {name = "Jason Dillaman", email = "dillaman@redhat.com"}, 20 | {name = "Anita Shekar", email = "anita.shekar@ibm.com"}, 21 | ] 22 | maintainers = [] 23 | keywords = [] 24 | classifiers = [] # https://pypi.org/classifiers/ 25 | dependencies = [ 26 | "grpcio ~= 1.53.0", 27 | "grpcio_tools ~= 1.53.0", 28 | "tabulate>=0.9.0", 29 | "pyyaml>=6.0.1", 30 | "prometheus_client ~= 0.19.0", 31 | "netifaces ~= 0.11.0", 32 | "cryptography>=43.0.3" 33 | ] 34 | 35 | [tool.pdm.scripts] 36 | protoc = {call = "grpc_tools.command:build_package_protos('.')"} 37 | pre_build = {composite = ["protoc"]} 38 | 39 | [tool.pytest.ini_options] 40 | log_cli = true 41 | log_cli_level = "INFO" 42 | log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)" 43 | log_cli_date_format = "%Y-%m-%d %H:%M:%S" 44 | 45 | [tool.pdm.dev-dependencies] 46 | test = [ 47 | "pytest>=7.4.0", 48 | ] 49 | 50 | [project.urls] 51 | #homepage = "" 52 | # documentation = "" 53 | repository = "https://github.com/ceph/ceph-nvmeof.git" 54 | # changelog = "" 55 | 56 | [project.scripts] 57 | ceph-nvmeof = "control.cli:main" 58 | -------------------------------------------------------------------------------- /tests/test_omap_read_lock_ignore_errors.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from control.server import GatewayServer 3 | from control.cephutils import CephUtils 4 | import grpc 5 | from control.proto import gateway_pb2_grpc as pb2_grpc 6 | import time 7 | 8 | pool = "rbd" 9 | group_name = "GROUPNAME" 10 | 11 | 12 | @pytest.fixture(scope="module") 13 | def gateway(config): 14 | """Sets up and tears down Gateway""" 15 | 16 | config.config["gateway"]["group"] = group_name 17 | addr = config.get("gateway", "addr") 18 | port = config.getint("gateway", "port") 19 | config.config["gateway"]["omap_file_ignore_unlock_errors"] = "True" 20 | config.config["gateway-logs"]["log_level"] = "debug" 21 | ceph_utils = CephUtils(config) 22 | 23 | with GatewayServer(config) as gateway: 24 | 25 | # Start gateway 26 | gateway.gw_logger_object.set_log_level("debug") 27 | ceph_utils.execute_ceph_monitor_command( 28 | "{" + f'"prefix":"nvme-gw create", "id": "{gateway.name}", "pool": "{pool}", ' 29 | f'"group": "{group_name}"' + "}" 30 | ) 31 | gateway.serve() 32 | 33 | # Bind the client and Gateway 34 | channel = grpc.insecure_channel(f"{addr}:{port}") 35 | pb2_grpc.GatewayStub(channel) 36 | yield gateway.gateway_rpc 37 | 38 | # Stop gateway 39 | gateway.server.stop(grace=1) 40 | gateway.gateway_rpc.gateway_state.delete_state() 41 | 42 | 43 | def test_ignore_unlock_errors(caplog, gateway): 44 | gw = gateway 45 | lookfor = "OMAP unlock errors will be ignored, the gateway will continue" 46 | found = 0 47 | time.sleep(10) 48 | for oneline in caplog.get_records("setup"): 49 | if oneline.message == lookfor: 50 | found += 1 51 | assert found == 1 52 | gw.rpc_lock.acquire() 53 | caplog.clear() 54 | gw.omap_lock.lock_omap() 55 | assert "Locked OMAP exclusive" in caplog.text 56 | time.sleep(25) # A little more than omap_file_lock_duration 57 | caplog.clear() 58 | gw.omap_lock.unlock_omap() 59 | assert "No such lock, the exclusive lock might have expired" in caplog.text 60 | -------------------------------------------------------------------------------- /tests/ha/rados_namespace.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function cephnvmf_func() 4 | { 5 | /usr/bin/docker compose run --rm nvmeof-cli --server-address ${NVMEOF_IP_ADDRESS} --server-port ${NVMEOF_GW_PORT} $@ 6 | } 7 | 8 | . .env 9 | RADOS_NS1="test_ns1" 10 | RADOS_NS2="test_ns2" 11 | IMAGE1="ns_image1" 12 | IMAGE2="ns_image2" 13 | IMAGE3="ns_image3" 14 | 15 | set -e 16 | set -x 17 | 18 | echo "ℹ️ Create RBD namespaces" 19 | docker exec ceph rbd namespace create ${RBD_POOL}/${RADOS_NS1} 20 | docker exec ceph rbd namespace create ${RBD_POOL}/${RADOS_NS2} 21 | 22 | echo "ℹ️ Create subsystem" 23 | cephnvmf_func subsystem add --subsystem ${NQN} --no-group-append 24 | 25 | echo "ℹ️ Test: Create namespace in rbd/${RADOS_NS1}" 26 | cephnvmf_func namespace add --subsystem ${NQN} --rbd-pool ${RBD_POOL} \ 27 | --rados-namespace ${RADOS_NS1} --rbd-image ${IMAGE1} --size 10MB --rbd-create-image 28 | 29 | echo "ℹ️ Test: Same image name in rbd/${RADOS_NS2} , different RADOS namespace should succeed" 30 | cephnvmf_func namespace add --subsystem ${NQN} --rbd-pool ${RBD_POOL} \ 31 | --rados-namespace ${RADOS_NS2} --rbd-image ${IMAGE1} --size 10MB --rbd-create-image 32 | 33 | echo "ℹ️ Test: Duplicate namespace in same RADOS namespace should fail" 34 | set +e 35 | cephnvmf_func --output stdio namespace add --subsystem ${NQN} --rbd-pool ${RBD_POOL} \ 36 | --rados-namespace ${RADOS_NS1} --rbd-image ${IMAGE1} --size 10MB --rbd-create-image > /tmp/ns_dup.txt 2>&1 37 | if [[ $? -eq 0 ]]; then 38 | echo "ERROR: Should not allow duplicate image in same namespace" 39 | exit 1 40 | fi 41 | set -e 42 | grep -q "is already used" /tmp/ns_dup.txt 43 | 44 | echo "ℹ️ Test: Namespace without namespace vs with RADOS namespace" 45 | cephnvmf_func namespace add --subsystem ${NQN} --rbd-pool ${RBD_POOL} \ 46 | --rbd-image ${IMAGE2} --size 10MB --rbd-create-image 47 | 48 | cephnvmf_func namespace add --subsystem ${NQN} --rbd-pool ${RBD_POOL} \ 49 | --rados-namespace ${RADOS_NS1} --rbd-image ${IMAGE2} --size 10MB --rbd-create-image 50 | 51 | echo "ℹ️ List namespaces" 52 | ns_list=$(cephnvmf_func --output stdio --format json namespace list --subsystem ${NQN}) 53 | echo "$ns_list" | jq . 54 | 55 | echo "✅ All tests passed" -------------------------------------------------------------------------------- /tests/ha/listener_hostname.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -xe 4 | rpc="/usr/libexec/spdk/scripts/rpc.py" 5 | NQN="nqn.2016-06.io.spdk:cnode1" 6 | 7 | GW1_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /1/ {print $1}') 8 | GW2_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /2/ {print $1}') 9 | GW1_IP="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW1_NAME")" 10 | GW2_IP="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW2_NAME")" 11 | 12 | echo -n "ℹ️ Starting bdevperf container" 13 | docker compose up -d bdevperf 14 | sleep 10 15 | echo "ℹ️ bdevperf start up logs" 16 | make logs SVC=bdevperf 17 | eval $(make run SVC=bdevperf OPTS="--entrypoint=env" | grep BDEVPERF_SOCKET | tr -d '\n\r' ) 18 | 19 | echo "ℹ️ bdevperf bdev_nvme_set_options" 20 | make exec SVC=bdevperf OPTS=-T CMD="$rpc -v -s $BDEVPERF_SOCKET bdev_nvme_set_options -r -1" 21 | 22 | echo "ℹ️ bdevperf tcp connect ip: $GW2_IP port: 4420 nqn: $NQN" 23 | devs=`make -s exec SVC=bdevperf OPTS=-T CMD="$rpc -v -s $BDEVPERF_SOCKET bdev_nvme_attach_controller -b Nvme0 -t tcp -a $GW2_IP -s 4420 -f ipv4 -n $NQN -q ${NQN}host -l -1 -o 10"` 24 | [[ "$devs" == "Nvme0n1" ]] 25 | 26 | grep "Received request to create $GW2_NAME TCP ipv4 listener for $NQN at ${GW2_IP}:4420, secure: False, verify host name: False, context: &1 | sed 's/Get subsystems://') 58 | 59 | # verify all resources found in get subsystems 60 | if [ "$(num_subs "$subs")" -ne $NUM_SUBSYSTEMS -o \ 61 | "$(num_nss "$subs" 0)" -ne $NUM_NAMESPACES -o \ 62 | "$(num_nss "$subs" 1)" -ne $NUM_NAMESPACES -o \ 63 | "$(num_listeners "$subs" 1)" -ne 1 -o \ 64 | "$(num_listeners "$subs" 1)" -ne 1 ]; then 65 | 66 | echo "Not ready $i $GW_NAME $GW_IP" 67 | sleep 5 68 | continue 69 | fi 70 | echo "Ready $i $GW_NAME $GW_IP" 71 | break 72 | done 73 | done 74 | -------------------------------------------------------------------------------- /mk/demosecuredhchap.mk: -------------------------------------------------------------------------------- 1 | ## Demo secure DHCHAP: 2 | 3 | SUBNQN1=$(NQN) 4 | SUBNQN2=$(NQN)2 5 | HOSTNQN=`cat /etc/nvme/hostnqn` 6 | HOSTNQN2=`cat /etc/nvme/hostnqn | sed 's/......$$/ffffff/'` 7 | HOSTNQN3=`cat /etc/nvme/hostnqn | sed 's/......$$/fffffe/'` 8 | HOSTNQN4=`cat /etc/nvme/hostnqn | sed 's/......$$/fffffd/'` 9 | NVMEOF_IO_PORT2=`expr $(NVMEOF_IO_PORT) + 1` 10 | NVMEOF_IO_PORT3=`expr $(NVMEOF_IO_PORT) + 2` 11 | NVMEOF_IO_PORT4=`expr $(NVMEOF_IO_PORT) + 3` 12 | DHCHAPKEY1=$(DHCHAP_KEY1) 13 | DHCHAPKEY2=$(DHCHAP_KEY2) 14 | DHCHAPKEY3=$(DHCHAP_KEY3) 15 | DHCHAPKEY4=$(DHCHAP_KEY4) 16 | PSKKEY1=$(PSK_KEY1) 17 | # demosecuredhchap 18 | demosecuredhchap: 19 | $(NVMEOF_CLI) subsystem add --subsystem $(SUBNQN1) --no-group-append 20 | $(NVMEOF_CLI) subsystem add --subsystem $(SUBNQN2) --no-group-append --dhchap-key $(DHCHAPKEY3) 21 | $(NVMEOF_CLI) namespace add --subsystem $(SUBNQN1) --rbd-pool $(RBD_POOL) --rbd-image $(RBD_IMAGE_NAME) --size $(RBD_IMAGE_SIZE) --rbd-create-image 22 | $(NVMEOF_CLI) namespace add --subsystem $(SUBNQN1) --rbd-pool $(RBD_POOL) --rbd-image $(RBD_IMAGE_NAME)2 --size $(RBD_IMAGE_SIZE) --rbd-create-image --no-auto-visible 23 | $(NVMEOF_CLI) listener add --subsystem $(SUBNQN1) --host-name `$(NVMEOF_CLI) --output stdio gw info | grep "Gateway's host name:" | cut -d: -f2 | sed 's/ //g'` --traddr $(NVMEOF_IP_ADDRESS) --trsvcid $(NVMEOF_IO_PORT) --verify-host-name 24 | $(NVMEOF_CLI) listener add --subsystem $(SUBNQN2) --host-name `$(NVMEOF_CLI) --output stdio gw info | grep "Gateway's host name:" | cut -d: -f2 | sed 's/ //g'` --traddr $(NVMEOF_IP_ADDRESS) --trsvcid $(NVMEOF_IO_PORT2) --verify-host-name 25 | $(NVMEOF_CLI) listener add --subsystem $(SUBNQN1) --host-name `$(NVMEOF_CLI) --output stdio gw info | grep "Gateway's host name:" | cut -d: -f2 | sed 's/ //g'` --traddr $(NVMEOF_IP_ADDRESS) --trsvcid $(NVMEOF_IO_PORT3) --verify-host-name 26 | $(NVMEOF_CLI) listener add --subsystem $(SUBNQN1) --host-name `$(NVMEOF_CLI) --output stdio gw info | grep "Gateway's host name:" | cut -d: -f2 | sed 's/ //g'` --traddr $(NVMEOF_IP_ADDRESS) --trsvcid $(NVMEOF_IO_PORT4) --secure --verify-host-name 27 | $(NVMEOF_CLI) host add --subsystem $(SUBNQN1) --host-nqn $(HOSTNQN) --dhchap-key $(DHCHAPKEY1) 28 | $(NVMEOF_CLI) host add --subsystem $(SUBNQN2) --host-nqn $(HOSTNQN2) --dhchap-key $(DHCHAPKEY2) 29 | $(NVMEOF_CLI) host add --subsystem $(SUBNQN1) --host-nqn $(HOSTNQN3) 30 | $(NVMEOF_CLI) namespace add_host --subsystem $(SUBNQN1) --nsid 2 --host-nqn $(HOSTNQN) 31 | $(NVMEOF_CLI) host add --subsystem $(SUBNQN1) --host-nqn $(HOSTNQN4) --dhchap-key $(DHCHAPKEY4) --psk $(PSKKEY1) 32 | 33 | .PHONY: demosecuredhchap 34 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Welcome to Ceph NVMe-oF Gateway contributing guide 2 | 3 | ## New contributor guide 4 | 5 | The Ceph NVMe-oF Gateway project pivots around 2 other major Open Source projects: 6 | - [SPDK](https://spdk.io/) [[source code](https://github.com/spdk/spdk/)], which internally relies on the DPDK project. 7 | - [Ceph](https://ceph.io/) [[source code](https://github.com/ceph/ceph)]. 8 | 9 | ## Engage with the community 10 | 11 | Besides the [regular Ceph community channels](https://ceph.io/en/community/connect/), the NVMe-oF 12 | Gateway team can be specifically reached at: 13 | - [Ceph Slack workspace](https://ceph-storage.slack.com/), `#nvmeof` channel, 14 | - [Weekly Sync Meeting](https://pad.ceph.com/p/rbd_nvmeof) 15 | 16 | ## Report issues 17 | 18 | If you find an issue, identify whether the issue comes from the NVMe-oF Gateway, or any of the underlying components: 19 | * For NVMe-oF Gateway issues (usually a Python traceback), check if [the issue has already been reported](https://github.com/ceph/ceph-nvmeof/issues). 20 | Otherwise, [open a new one](https://github.com/ceph/ceph-nvmeof/issues/new). 21 | * For SPDK-related issues, [open an issue in the SPDK GitHub Issues](https://github.com/spdk/spdk/issues/). 22 | * For Ceph-related issues, [open an issue in the Ceph Tracker](https://tracker.ceph.com/). 23 | 24 | ## Submit changes 25 | 26 | ### Coding conventions 27 | 28 | This project follows: 29 | * Python coding guidelines ([PEP-8](https://peps.python.org/pep-0008/)). 30 | * [gRPC and Protocol Buffers](https://grpc.io/docs/what-is-grpc/introduction/). 31 | 32 | ### Commit format 33 | 34 | When committing your changes: 35 | * Sign-off (`git commit -s`), which will automatically add the following trailer to your commit: `Signed-off-by: FirstName LastName `. 36 | This constitutes your [Developer Certificate of Origin](https://en.wikipedia.org/wiki/Developer_Certificate_of_Origin), and is enforced by a [CI check](https://probot.github.io/apps/dco/). 37 | * Follow the [Conventional Commit syntax](https://www.conventionalcommits.org/en/v1.0.0/). This is not yet enforced via CI checks. 38 | 39 | ### Testing 40 | 41 | TODO. 42 | 43 | ### Pull request 44 | 45 | Refer to Ceph's ["Submitting Patches to Ceph"](https://github.com/ceph/ceph/blob/main/SubmittingPatches.rst) documentation, with the difference that: 46 | * This project uses GitHub Issues instead of the Ceph Tracker, 47 | * Therefore commits and pull requests should use the `Fixes: #12345` syntax (where `#12345` is the GitHub issue number), instead of the `Fixes: https://tracker.ceph.com...`. 48 | 49 | ### Documentation 50 | 51 | TODO: No documentation is yet in place. 52 | -------------------------------------------------------------------------------- /tests/ha/gateway_removal.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -xe 4 | POOL="${RBD_POOL:-rbd}" 5 | 6 | # Get number of gateways from ceph nvme-gw show 7 | get_num_gateways() { 8 | local output 9 | if output=$(docker compose exec -T ceph ceph nvme-gw show $POOL '' 2>/dev/null); then 10 | echo "$output" | jq -r '."num gws"' 2>/dev/null || echo "0" 11 | else 12 | echo "0" 13 | fi 14 | } 15 | 16 | # Get all nvmeof container IDs directly 17 | get_all_gw_containers() { 18 | docker ps --filter name=nvmeof --format '{{.ID}}' 19 | } 20 | 21 | # 22 | # MAIN - Remove ALL gateways 23 | # 24 | 25 | # Get actual number of gateways dynamically 26 | NUM_GATEWAYS=$(get_num_gateways) 27 | 28 | echo "=== Gateway Removal Test ===" 29 | echo "Simulating: ceph orch rm nvmeof.$POOL." 30 | echo "" 31 | 32 | # 33 | # Step 1: Show initial state and get gateway count 34 | # 35 | echo "Step 1: Before removal" 36 | docker compose exec -T ceph ceph nvme-gw show $POOL '' || echo "Failed to show initial state" 37 | 38 | echo "" 39 | echo "Found $NUM_GATEWAYS gateways to remove" 40 | 41 | if [ "$NUM_GATEWAYS" -eq 0 ]; then 42 | echo "⚠️ No gateways found - nothing to remove" 43 | exit 0 44 | fi 45 | 46 | # 47 | # Step 2: Remove ALL gateways 48 | # 49 | echo "" 50 | echo "Step 2: Remove all $NUM_GATEWAYS gateways" 51 | 52 | gw_containers=$(get_all_gw_containers) 53 | removed_count=0 54 | 55 | if [ -z "$gw_containers" ]; then 56 | echo "⚠️ No nvmeof containers found to remove" 57 | exit 0 58 | fi 59 | 60 | for gw_container in $gw_containers; do 61 | echo "Stop gw $gw_container" 62 | docker stop $gw_container 63 | echo "nvme-gw delete gateway: '$gw_container' pool: '$POOL', group: '' (empty string)" 64 | docker compose exec -T ceph ceph nvme-gw delete $gw_container $POOL '' 65 | removed_count=$((removed_count + 1)) 66 | done 67 | 68 | sleep 2 69 | 70 | 71 | # 72 | # Step 3: Show final state 73 | # 74 | echo "" 75 | echo "Step 3: After removal" 76 | final_output=$(docker compose exec -T ceph ceph nvme-gw show $POOL '' 2>&1) 77 | final_gw_count=$(echo "$final_output" | grep -o '"num gws": [0-9]*' | grep -o '[0-9]*' || echo "unknown") 78 | 79 | echo "Final result:" 80 | echo "$final_output" 81 | echo "" 82 | 83 | if [ "$final_gw_count" = "0" ]; then 84 | echo "=== TEST PASSED ===" 85 | echo "✅ 'ceph nvme-gw show' confirms 0 gateways" 86 | echo "✅ Successfully simulated 'ceph orch rm nvmeof.$POOL.'" 87 | elif [ "$final_gw_count" = "unknown" ]; then 88 | echo "🎉 === TEST PASSED ===" 89 | echo "✅ Gateway group completely removed (command failed as expected)" 90 | echo "✅ Successfully simulated 'ceph orch rm nvmeof.$POOL.'" 91 | else 92 | echo "=== TEST FAILED ===" 93 | echo "❌ Expected 'num gws': 0 but got: $final_gw_count" 94 | echo "❌ Manual removal != orchestrator removal" 95 | exit 1 96 | fi -------------------------------------------------------------------------------- /tests/ha/no_subsystems.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -xe 3 | # See 4 | # - https://github.com/spdk/spdk/blob/master/doc/jsonrpc.md 5 | # - https://spdk.io/doc/nvmf_multipath_howto.html 6 | 7 | GW1_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /1/ {print $1}') 8 | GW2_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /2/ {print $1}') 9 | 10 | ip="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW1_NAME")" 11 | ip2="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW2_NAME")" 12 | 13 | NQN="nqn.2016-06.io.spdk:cnode1" 14 | 15 | verify_gw_exists_and_no_subs() 16 | { 17 | IP=$1 18 | subs=$(docker compose run -T --rm nvmeof-cli --server-address $IP --server-port 5500 --output stdio --format json subsystem list) 19 | echo "show subsystems after del : $subs" 20 | if echo "$subs" | grep -q '"subsystems": \[\]'; then 21 | echo "The string contains 'subsystems:[]' on GW ip $IP" 22 | else 23 | echo "The string does not contain 'subsystems:[]'on GW ip $IP " 24 | exit 1 25 | fi 26 | } 27 | 28 | 29 | echo "ℹ️ ℹ️ Start test: Delete the last subsystem:" 30 | 31 | for i in $(seq 2); do 32 | 33 | docker compose run -T --rm nvmeof-cli --server-address $ip --server-port 5500 subsystem del -n $NQN --force 34 | sleep 2 35 | verify_gw_exists_and_no_subs $ip 36 | verify_gw_exists_and_no_subs $ip2 37 | 38 | echo "ℹ️ ℹ️ next : Create subsystem:" 39 | 40 | docker compose run -T --rm nvmeof-cli --server-address $ip --server-port 5500 subsystem add -n $NQN 41 | docker compose run --rm nvmeof-cli --server-address $ip --server-port 5500 listener add --subsystem $NQN --host-name $GW1_NAME --traddr $ip --trsvcid 4420 42 | docker compose run --rm nvmeof-cli --server-address $ip2 --server-port 5500 listener add --subsystem $NQN --host-name $GW2_NAME --traddr $ip2 --trsvcid 4420 43 | 44 | sleep 5 45 | subs=$(docker compose run -T --rm nvmeof-cli --server-address $ip --server-port 5500 --output stdio --format json subsystem list) 46 | 47 | echo "subsystems $subs" 48 | #test that ana group is Active 49 | json=$(docker compose exec -T ceph ceph nvme-gw show rbd '') 50 | 51 | states=$(echo "$json" | jq -r '.["Created Gateways:"][] | ."ana states"') 52 | echo "$states" 53 | 54 | if echo "$states" | grep -q '1: ACTIVE'; then 55 | echo "state found ACTIVE in group 1" 56 | else 57 | echo "ACTIVE state not found for group 1" 58 | exit 1 59 | fi 60 | if echo "$states" | grep -q '2: ACTIVE'; then 61 | echo "state found ACTIVE in group 2" 62 | else 63 | echo "ACTIVE state not found for group 2" 64 | exit 1 65 | fi 66 | 67 | done 68 | echo "ℹ️ ℹ️ test passed" 69 | -------------------------------------------------------------------------------- /tests/ceph-nvmeof.tls.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 International Business Machines 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | # 7 | # Authors: anita.shekar@ibm.com, sandy.kaur@ibm.com 8 | # 9 | 10 | [gateway] 11 | name = 12 | group = 13 | addr = 192.168.13.3 14 | port = 5500 15 | enable_auth = True 16 | state_update_notify = True 17 | state_update_interval_sec = 5 18 | enable_spdk_discovery_controller = False 19 | encryption_key = /etc/ceph/encryption.key 20 | rebalance_period_sec = 7 21 | max_gws_in_grp = 16 22 | max_ns_to_change_lb_grp = 8 23 | #omap_file_lock_duration = 20 24 | #omap_file_lock_retries = 30 25 | #omap_file_lock_retry_sleep_interval = 1.0 26 | #omap_file_update_reloads = 10 27 | #enable_prometheus_exporter = True 28 | #prometheus_exporter_ssl = True 29 | #prometheus_port = 10008 30 | #prometheus_bdev_pools = rbd 31 | #prometheus_stats_interval = 10 32 | #verify_nqns = True 33 | #verify_keys = True 34 | #verify_listener_ip = True 35 | #allowed_consecutive_spdk_ping_failures = 1 36 | #spdk_ping_interval_in_seconds = 2.0 37 | #max_hosts_per_namespace = 8 38 | #max_namespaces_with_netmask = 1000 39 | #max_subsystems = 128 40 | #max_hosts = 2048 41 | #max_namespaces = 2048 42 | #max_namespaces_per_subsystem = 256 43 | #max_hosts_per_subsystem = 128 44 | 45 | [gateway-logs] 46 | log_level=debug 47 | #log_files_enabled = True 48 | #log_files_rotation_enabled = True 49 | #verbose_log_messages = True 50 | #max_log_file_size_in_mb=10 51 | #max_log_files_count=20 52 | #max_log_directory_backups=10 53 | # 54 | # Notice that if you change the log directory the log files will only be visible inside the container 55 | # 56 | #log_directory = /var/log/ceph/ 57 | 58 | [discovery] 59 | addr = 0.0.0.0 60 | port = 8009 61 | 62 | [ceph] 63 | pool = rbd 64 | config_file = /etc/ceph/ceph.conf 65 | 66 | [mtls] 67 | server_key = /etc/ceph/server.key 68 | client_key = /etc/ceph/client.key 69 | server_cert = /etc/ceph/server.crt 70 | client_cert = /etc/ceph/client.crt 71 | 72 | [spdk] 73 | cluster_connections = 32 74 | tgt_path = /usr/local/bin/nvmf_tgt 75 | #rpc_socket_dir = /var/tmp/ 76 | #rpc_socket_name = spdk.sock 77 | #tgt_cmd_extra_args = --env-context="--no-huge -m1024" --iova-mode=va 78 | timeout = 60.0 79 | #log_level = 80 | #protocol_log_level = WARNING 81 | #log_file_dir = 82 | 83 | # Example value: --lcores (0-1) -L all 84 | # tgt_cmd_extra_args = 85 | 86 | # transports = tcp 87 | 88 | # Example value: {"max_queue_depth" : 16, "max_io_size" : 4194304, "io_unit_size" : 1048576, "zcopy" : false} 89 | transport_tcp_options = {"in_capsule_data_size" : 8192, "max_io_qpairs_per_ctrlr" : 7} 90 | 91 | # Example value: {"small_pool_count" : 8192, "large_pool_count" : 1024, "small_bufsize" : 8192, "large_bufsize" : 135168} 92 | # iobuf_options = 93 | 94 | # qos_timeslice_in_usecs = 0 95 | 96 | [monitor] 97 | #timeout = 1.0 98 | #log_file_dir = 99 | -------------------------------------------------------------------------------- /tests/ceph-nvmeof.cluster_pool.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 International Business Machines 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | # 7 | # Authors: anita.shekar@ibm.com, sandy.kaur@ibm.com 8 | # 9 | 10 | [gateway] 11 | name = 12 | group = 13 | addr = 0.0.0.0 14 | port = 5500 15 | enable_auth = False 16 | state_update_notify = True 17 | state_update_timeout_in_msec = 2000 18 | state_update_interval_sec = 5 19 | enable_spdk_discovery_controller = False 20 | encryption_key = /etc/ceph/encryption.key 21 | rebalance_period_sec = 7 22 | max_gws_in_grp = 16 23 | max_ns_to_change_lb_grp = 8 24 | #omap_file_lock_duration = 20 25 | #omap_file_lock_retries = 30 26 | #omap_file_lock_retry_sleep_interval = 1.0 27 | #omap_file_update_reloads = 10 28 | #enable_prometheus_exporter = True 29 | #prometheus_exporter_ssl = True 30 | #prometheus_port = 10008 31 | #prometheus_bdev_pools = rbd 32 | #prometheus_stats_interval = 10 33 | #verify_nqns = True 34 | #verify_keys = True 35 | #verify_listener_ip = True 36 | #allowed_consecutive_spdk_ping_failures = 1 37 | #spdk_ping_interval_in_seconds = 2.0 38 | #max_hosts_per_namespace = 8 39 | #max_namespaces_with_netmask = 1000 40 | #max_subsystems = 128 41 | #max_hosts = 2048 42 | #max_namespaces = 2048 43 | #max_namespaces_per_subsystem = 256 44 | #max_hosts_per_subsystem = 32 45 | 46 | [gateway-logs] 47 | log_level=debug 48 | #log_files_enabled = True 49 | #log_files_rotation_enabled = True 50 | #verbose_log_messages = True 51 | #max_log_file_size_in_mb=10 52 | #max_log_files_count=20 53 | #max_log_directory_backups=10 54 | # 55 | # Notice that if you change the log directory the log files will only be visible inside the container 56 | # 57 | #log_directory = /var/log/ceph/ 58 | 59 | [discovery] 60 | addr = 0.0.0.0 61 | port = 8009 62 | 63 | [ceph] 64 | pool = rbd 65 | config_file = /etc/ceph/ceph.conf 66 | 67 | [mtls] 68 | server_key = ./server.key 69 | client_key = ./client.key 70 | server_cert = ./server.crt 71 | client_cert = ./client.crt 72 | 73 | [spdk] 74 | cluster_connections = 32 75 | tgt_path = /usr/local/bin/nvmf_tgt 76 | #rpc_socket_dir = /var/tmp/ 77 | #rpc_socket_name = spdk.sock 78 | #tgt_cmd_extra_args = --env-context="--no-huge -m1024" --iova-mode=va 79 | timeout = 60.0 80 | #log_level = 81 | #protocol_log_level = WARNING 82 | #log_file_dir = 83 | 84 | # Example value: --lcores (0-1) -L all 85 | # tgt_cmd_extra_args = 86 | 87 | # transports = tcp 88 | 89 | # Example value: {"max_queue_depth" : 16, "max_io_size" : 4194304, "io_unit_size" : 1048576, "zcopy" : false} 90 | transport_tcp_options = {"in_capsule_data_size" : 8192, "max_io_qpairs_per_ctrlr" : 7} 91 | 92 | # Example value: {"small_pool_count" : 8192, "large_pool_count" : 1024, "small_bufsize" : 8192, "large_bufsize" : 135168} 93 | # iobuf_options = 94 | 95 | # qos_timeslice_in_usecs = 0 96 | 97 | [monitor] 98 | #timeout = 1.0 99 | #log_file_dir = 100 | -------------------------------------------------------------------------------- /tests/ceph-nvmeof.flat_bdevs_per_cluster.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 International Business Machines 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | # 7 | # Authors: anita.shekar@ibm.com, sandy.kaur@ibm.com 8 | # 9 | 10 | [gateway] 11 | name = 12 | group = 13 | addr = 0.0.0.0 14 | port = 5500 15 | enable_auth = False 16 | state_update_notify = True 17 | state_update_timeout_in_msec = 2000 18 | state_update_interval_sec = 5 19 | enable_spdk_discovery_controller = False 20 | encryption_key = /etc/ceph/encryption.key 21 | rebalance_period_sec = 7 22 | max_gws_in_grp = 16 23 | max_ns_to_change_lb_grp = 8 24 | #omap_file_lock_duration = 20 25 | #omap_file_lock_retries = 30 26 | #omap_file_lock_retry_sleep_interval = 1.0 27 | #omap_file_update_reloads = 10 28 | #enable_prometheus_exporter = True 29 | #prometheus_exporter_ssl = True 30 | #prometheus_port = 10008 31 | #prometheus_bdev_pools = rbd 32 | #prometheus_stats_interval = 10 33 | #verify_nqns = True 34 | #verify_keys = True 35 | #verify_listener_ip = True 36 | #allowed_consecutive_spdk_ping_failures = 1 37 | #spdk_ping_interval_in_seconds = 2.0 38 | #max_hosts_per_namespace = 8 39 | #max_namespaces_with_netmask = 1000 40 | #max_subsystems = 128 41 | #max_hosts = 2048 42 | #max_namespaces = 2048 43 | #max_namespaces_per_subsystem = 256 44 | #max_hosts_per_subsystem = 32 45 | 46 | [gateway-logs] 47 | log_level=debug 48 | #log_files_enabled = True 49 | #log_files_rotation_enabled = True 50 | #verbose_log_messages = True 51 | #max_log_file_size_in_mb=10 52 | #max_log_files_count=20 53 | #max_log_directory_backups=10 54 | # 55 | # Notice that if you change the log directory the log files will only be visible inside the container 56 | # 57 | #log_directory = /var/log/ceph/ 58 | 59 | [discovery] 60 | addr = 0.0.0.0 61 | port = 8009 62 | 63 | [ceph] 64 | pool = rbd 65 | config_file = /etc/ceph/ceph.conf 66 | 67 | [mtls] 68 | server_key = ./server.key 69 | client_key = ./client.key 70 | server_cert = ./server.crt 71 | client_cert = ./client.crt 72 | 73 | [spdk] 74 | flat_bdevs_per_cluster = 32 75 | tgt_path = /usr/local/bin/nvmf_tgt 76 | #rpc_socket_dir = /var/tmp/ 77 | #rpc_socket_name = spdk.sock 78 | #tgt_cmd_extra_args = --env-context="--no-huge -m1024" --iova-mode=va 79 | timeout = 60.0 80 | #log_level = 81 | #protocol_log_level = WARNING 82 | #log_file_dir = 83 | 84 | # Example value: --lcores (0-1) -L all 85 | # tgt_cmd_extra_args = 86 | 87 | # transports = tcp 88 | 89 | # Example value: {"max_queue_depth" : 16, "max_io_size" : 4194304, "io_unit_size" : 1048576, "zcopy" : false} 90 | transport_tcp_options = {"in_capsule_data_size" : 8192, "max_io_qpairs_per_ctrlr" : 7} 91 | 92 | # Example value: {"small_pool_count" : 8192, "large_pool_count" : 1024, "small_bufsize" : 8192, "large_bufsize" : 135168} 93 | # iobuf_options = 94 | 95 | # qos_timeslice_in_usecs = 0 96 | 97 | [monitor] 98 | #timeout = 1.0 99 | #log_file_dir = 100 | -------------------------------------------------------------------------------- /tests/ceph-nvmeof.no-huge.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 International Business Machines 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | # 7 | # Authors: anita.shekar@ibm.com, sandy.kaur@ibm.com 8 | # 9 | 10 | [gateway] 11 | name = 12 | group = 13 | addr = 0.0.0.0 14 | port = 5500 15 | enable_auth = False 16 | state_update_notify = True 17 | state_update_timeout_in_msec = 2000 18 | state_update_interval_sec = 5 19 | enable_spdk_discovery_controller = False 20 | encryption_key = /etc/ceph/encryption.key 21 | rebalance_period_sec = 7 22 | max_gws_in_grp = 16 23 | max_ns_to_change_lb_grp = 8 24 | #omap_file_lock_duration = 20 25 | #omap_file_lock_retries = 30 26 | #omap_file_lock_retry_sleep_interval = 1.0 27 | #omap_file_update_reloads = 10 28 | #enable_prometheus_exporter = True 29 | #prometheus_exporter_ssl = True 30 | #prometheus_port = 10008 31 | #prometheus_bdev_pools = rbd 32 | #prometheus_stats_interval = 10 33 | #verify_nqns = True 34 | #verify_keys = True 35 | #verify_listener_ip = True 36 | #allowed_consecutive_spdk_ping_failures = 1 37 | #spdk_ping_interval_in_seconds = 2.0 38 | #max_hosts_per_namespace = 8 39 | #max_namespaces_with_netmask = 1000 40 | #max_subsystems = 128 41 | #max_hosts = 2048 42 | #max_namespaces = 2048 43 | #max_namespaces_per_subsystem = 256 44 | #max_hosts_per_subsystem = 128 45 | 46 | [gateway-logs] 47 | log_level=debug 48 | #log_files_enabled = True 49 | #log_files_rotation_enabled = True 50 | #verbose_log_messages = True 51 | #max_log_file_size_in_mb=10 52 | #max_log_files_count=20 53 | #max_log_directory_backups=10 54 | # 55 | # Notice that if you change the log directory the log files will only be visible inside the container 56 | # 57 | #log_directory = /var/log/ceph/ 58 | 59 | [discovery] 60 | addr = 0.0.0.0 61 | port = 8009 62 | 63 | [ceph] 64 | pool = rbd 65 | config_file = /etc/ceph/ceph.conf 66 | 67 | [mtls] 68 | server_key = ./server.key 69 | client_key = ./client.key 70 | server_cert = ./server.crt 71 | client_cert = ./client.crt 72 | 73 | [spdk] 74 | cluster_connections = 32 75 | mem_size=4096 76 | tgt_path = /usr/local/bin/nvmf_tgt 77 | #rpc_socket_dir = /var/tmp/ 78 | #rpc_socket_name = spdk.sock 79 | #tgt_cmd_extra_args = --env-context="--no-huge -m1024" --iova-mode=va 80 | timeout = 60.0 81 | #log_level = 82 | #protocol_log_level = WARNING 83 | #log_file_dir = 84 | 85 | # Example value: --lcores (0-1) -L all 86 | # tgt_cmd_extra_args = 87 | 88 | # transports = tcp 89 | 90 | # Example value: {"max_queue_depth" : 16, "max_io_size" : 4194304, "io_unit_size" : 1048576, "zcopy" : false} 91 | transport_tcp_options = {"in_capsule_data_size" : 8192, "max_io_qpairs_per_ctrlr" : 7} 92 | 93 | # Example value: {"small_pool_count" : 8192, "large_pool_count" : 1024, "small_bufsize" : 8192, "large_bufsize" : 135168} 94 | # iobuf_options = 95 | 96 | # qos_timeslice_in_usecs = 0 97 | 98 | [monitor] 99 | #timeout = 1.0 100 | #log_file_dir = 101 | -------------------------------------------------------------------------------- /lib/go/nvmeof/go.sum: -------------------------------------------------------------------------------- 1 | github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= 2 | github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= 3 | github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= 4 | github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= 5 | github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= 6 | github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= 7 | github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= 8 | github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= 9 | github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= 10 | github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 11 | go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= 12 | go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= 13 | go.opentelemetry.io/otel v1.35.0 h1:xKWKPxrxB6OtMCbmMY021CqC45J+3Onta9MqjhnusiQ= 14 | go.opentelemetry.io/otel v1.35.0/go.mod h1:UEqy8Zp11hpkUrL73gSlELM0DupHoiq72dR+Zqel/+Y= 15 | go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M= 16 | go.opentelemetry.io/otel/metric v1.35.0/go.mod h1:nKVFgxBZ2fReX6IlyW28MgZojkoAkJGaE8CpgeAU3oE= 17 | go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY= 18 | go.opentelemetry.io/otel/sdk v1.35.0/go.mod h1:+ga1bZliga3DxJ3CQGg3updiaAJoNECOgJREo9KHGQg= 19 | go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5JpUCaEqEI9o= 20 | go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w= 21 | go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt/xgMs= 22 | go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc= 23 | golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= 24 | golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= 25 | golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= 26 | golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= 27 | golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= 28 | golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= 29 | google.golang.org/genproto/googleapis/rpc v0.0.0-20250324211829-b45e905df463 h1:e0AIkUUhxyBKh6ssZNrAMeqhA7RKUj42346d1y02i2g= 30 | google.golang.org/genproto/googleapis/rpc v0.0.0-20250324211829-b45e905df463/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= 31 | google.golang.org/grpc v1.73.0 h1:VIWSmpI2MegBtTuFt5/JWy2oXxtjJ/e89Z70ImfD2ok= 32 | google.golang.org/grpc v1.73.0/go.mod h1:50sbHOUqWoCQGI8V2HQLJM0B+LMlIUjNSZmow7EVBQc= 33 | google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= 34 | google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= 35 | -------------------------------------------------------------------------------- /tests/test_cli_force_tls.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import socket 3 | import grpc 4 | from control.server import GatewayServer 5 | from control.cli import main as cli 6 | from control.cephutils import CephUtils 7 | from control.proto import gateway_pb2_grpc as pb2_grpc 8 | 9 | pool = "rbd" 10 | subsystem = "nqn.2016-06.io.spdk:cnode1" 11 | serial = "Ceph00000000000001" 12 | host = "nqn.2016-06.io.spdk:host1" 13 | config = "ceph-nvmeof.conf" 14 | group_name = "group1" 15 | addr = "127.0.0.1" 16 | host_name = socket.gethostname() 17 | 18 | 19 | @pytest.fixture(scope="module") 20 | def gateway(config): 21 | """Sets up and tears down Gateway""" 22 | 23 | addr = config.get("gateway", "addr") 24 | port = config.getint("gateway", "port") 25 | config.config["gateway"]["group"] = group_name 26 | config.config["gateway-logs"]["log_level"] = "debug" 27 | config.config["gateway"]["force_tls"] = "true" 28 | ceph_utils = CephUtils(config) 29 | 30 | with GatewayServer(config) as gateway: 31 | 32 | # Start gateway 33 | gateway.gw_logger_object.set_log_level("debug") 34 | ceph_utils.execute_ceph_monitor_command( 35 | "{" + f'"prefix":"nvme-gw create", "id": "{gateway.name}", "pool": "{pool}", ' 36 | f'"group": "{group_name}"' + "}" 37 | ) 38 | gateway.serve() 39 | 40 | # Bind the client and Gateway 41 | channel = grpc.insecure_channel(f"{addr}:{port}") 42 | pb2_grpc.GatewayStub(channel) 43 | yield gateway 44 | 45 | # Stop gateway 46 | gateway.gateway_rpc.gateway_state.delete_state() 47 | gateway.server.stop(grace=1) 48 | 49 | 50 | class TestForceTls: 51 | def test_force_tls(self, caplog, gateway): 52 | caplog.clear() 53 | cli(["subsystem", "add", "--subsystem", subsystem, 54 | "--no-group-append"]) 55 | assert f"Adding subsystem {subsystem}: Successful" in caplog.text 56 | caplog.clear() 57 | cli(["host", "add", "--subsystem", subsystem, "--host-nqn", host]) 58 | assert f"Failure adding host {host} to {subsystem}: host must " \ 59 | f"have a PSK key" in caplog.text 60 | caplog.clear() 61 | cli(["host", "add", "--subsystem", subsystem, "--host-nqn", "*"]) 62 | assert f"Allowing open host access to {subsystem}: Successful" in caplog.text 63 | assert f"Open host access to subsystem {subsystem} might be a " \ 64 | f"security breach" in caplog.text 65 | caplog.clear() 66 | cli(["host", "del", "--subsystem", subsystem, "--host-nqn", "*"]) 67 | assert f"Disabling open host access to {subsystem}: Successful" in caplog.text 68 | caplog.clear() 69 | cli(["listener", "add", "--subsystem", subsystem, "-a", addr, 70 | "-s", "5001", "-t", host_name, "--verify-host-name"]) 71 | assert f"Failure adding {subsystem} listener at {addr}:5001: " \ 72 | f"Secure channel must be used" in caplog.text 73 | caplog.clear() 74 | cli(["listener", "add", "--subsystem", subsystem, "-a", addr, 75 | "-s", "5002", "-t", host_name, "--verify-host-name", "--secure"]) 76 | assert f"Adding {subsystem} listener at {addr}:5002: Successful" in caplog.text 77 | -------------------------------------------------------------------------------- /tests/test_max_subsystems.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from control.server import GatewayServer 3 | from control.cli import main as cli 4 | from control.cephutils import CephUtils 5 | import grpc 6 | 7 | config = "ceph-nvmeof.conf" 8 | pool = "rbd" 9 | group_name = "group1" 10 | subsystem_prefix = "nqn.2016-06.io.spdk:cnode" 11 | max_subsystems = 4 12 | 13 | 14 | @pytest.fixture(scope="module") 15 | def gateway(config, request): 16 | """Sets up and tears down Gateway""" 17 | 18 | addr = config.get("gateway", "addr") 19 | port = config.getint("gateway", "port") 20 | config.config["gateway"]["group"] = group_name 21 | config.config["gateway"]["max_subsystems"] = str(max_subsystems) 22 | config.config["gateway-logs"]["log_level"] = "debug" 23 | ceph_utils = CephUtils(config) 24 | 25 | with GatewayServer(config) as gateway: 26 | 27 | # Start gateway 28 | gateway.gw_logger_object.set_log_level("debug") 29 | ceph_utils.execute_ceph_monitor_command( 30 | "{" + f'"prefix":"nvme-gw create", "id": "{gateway.name}", "pool": "{pool}", ' 31 | f'"group": "{group_name}"' + "}" 32 | ) 33 | gateway.serve() 34 | 35 | # Bind the client and Gateway 36 | grpc.insecure_channel(f"{addr}:{port}") 37 | yield gateway 38 | 39 | # Stop gateway 40 | gateway.server.stop(grace=1) 41 | gateway.gateway_rpc.gateway_state.delete_state() 42 | 43 | 44 | class TestMaxSubsystems: 45 | def test_max_subsystems(self, caplog, gateway): 46 | gw = gateway 47 | for i in range(max_subsystems): 48 | subsys = f"{subsystem_prefix}{i}" 49 | caplog.clear() 50 | cli(["subsystem", "add", "--subsystem", f"{subsys}", "--no-group-append"]) 51 | assert f"Adding subsystem {subsys}: Successful" in caplog.text 52 | 53 | caplog.clear() 54 | subsys = f"{subsystem_prefix}XXX" 55 | cli(["subsystem", "add", "--subsystem", f"{subsys}", "--no-group-append"]) 56 | assert f"Failure creating subsystem {subsys}: Maximal number of subsystems " \ 57 | f"({max_subsystems}) has already been reached" in caplog.text 58 | 59 | for i in range(max_subsystems): 60 | subsys = f"{subsystem_prefix}{i}" 61 | caplog.clear() 62 | cli(["subsystem", "del", "--subsystem", f"{subsys}"]) 63 | assert f"Deleting subsystem {subsys}: Successful" in caplog.text 64 | 65 | caplog.clear() 66 | cli(["subsystem", "list"]) 67 | assert "No subsystems" in caplog.text 68 | 69 | gw.gateway_rpc.max_subsystems = 100 70 | for i in range(max_subsystems): 71 | subsys = f"{subsystem_prefix}{i}" 72 | caplog.clear() 73 | cli(["subsystem", "add", "--subsystem", f"{subsys}", "--no-group-append"]) 74 | assert f"Adding subsystem {subsys}: Successful" in caplog.text 75 | 76 | subsys = f"{subsystem_prefix}XXX" 77 | caplog.clear() 78 | cli(["subsystem", "add", "--subsystem", f"{subsys}", "--no-group-append"]) 79 | assert f"Failure creating subsystem {subsys}: Maximal number of subsystems " \ 80 | f"({max_subsystems}) has already been reached" not in caplog.text 81 | # Make sure the error we got is from SPDK and not from the gateway 82 | assert f'"message": "Unable to create subsystem {subsys}"' in caplog.text 83 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | # Publishes official python package at https://pypi.org/project/ceph-nvmeof/ 2 | # This is official release, and will be pushed to pypi when a new tag is pushed in repo. 3 | # Usage: pip install ceph-nvmeof==1.2.15 4 | 5 | # Also publishes dev python package at https://test.pypi.org/p/ceph-nvmeof 6 | # This is for dev releases, and will be pushed to test pypi on all merges to "devel" branch. 7 | # Usage: `pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple ceph-nvmeof==1.2.15` 8 | 9 | name: Publish release to PyPI 10 | on: 11 | push: 12 | tags: 13 | - '*' 14 | branches: 15 | - 'devel' 16 | release: 17 | types: 18 | - published 19 | workflow_dispatch: 20 | inputs: 21 | python_version: 22 | description: 'Python Version' 23 | required: false 24 | type: string 25 | pdm_version: 26 | description: 'PDM Version' 27 | required: false 28 | type: string 29 | pypi_env: 30 | description: 'Push to test.pypi (dev) or pypi (prod). And `dev-debug-version` pushes to test.pypi with a test package version.' 31 | required: true 32 | default: 'dev' 33 | type: choice 34 | options: 35 | - dev 36 | - prod 37 | - dev-debug-version 38 | ref: 39 | description: 'Build using this branch, tag or SHA' 40 | required: false 41 | type: string 42 | 43 | env: 44 | DEFAULT_PYTHON: ${{ inputs.python_version || '3.9' }} 45 | DEFAULT_PDM: ${{ inputs.pdm_version || '2.7.4' }} 46 | REF: ${{ inputs.ref || 'devel' }} 47 | 48 | jobs: 49 | pypi-publish: 50 | name: Upload release to PyPI 51 | if: github.repository == 'ceph/ceph-nvmeof' 52 | runs-on: ubuntu-latest 53 | permissions: 54 | id-token: write 55 | steps: 56 | - uses: actions/checkout@v3 57 | with: 58 | ref: ${{env.REF}} 59 | - uses: pdm-project/setup-pdm@v3 60 | with: 61 | python-version: ${{env.DEFAULT_PYTHON}} 62 | version: ${{env.DEFAULT_PDM}} 63 | 64 | - name: Sync Dependencies 65 | run: pdm sync -v --no-isolation --no-self --no-editable 66 | - name: Compile Protocol Buffers 67 | run: pdm run protoc 68 | 69 | - name: Set current date as env variable 70 | run: echo "NOW=$(date +'%Y%m%d%H%M%S')" >> $GITHUB_ENV 71 | - name: (DEBUG) Set dev version number 72 | # For debug-release, set version "dev" (eg 1.2.15.dev20240710183756) 73 | # (to avoid messing current releases versions like 1.2.15). 74 | run: | 75 | NEW_VERSION="$(pdm show --version).dev$NOW" 76 | sed -i "s/^version = \".*\"/version = \"${NEW_VERSION}\"/" pyproject.toml 77 | if: (inputs.pypi_env && inputs.pypi_env == 'dev-debug-version') 78 | - name: Build package 79 | run: pdm build --config-setting="--build-number=$NOW" --no-sdist 80 | 81 | - name: Publish package distributions to PyPI 82 | run: pdm publish --no-build 83 | if: ( github.event_name == 'release' && github.event.action == 'published' ) || (github.event_name == 'workflow_dispatch' && inputs.pypi_env == 'prod') 84 | 85 | - name: Publish package distributions to Test PyPI 86 | run: pdm publish --no-build -r testpypi 87 | if: github.event_name == 'push' || (github.event_name == 'workflow_dispatch' && (inputs.pypi_env == 'dev' || inputs.pypi_env == 'dev-debug-version')) 88 | -------------------------------------------------------------------------------- /tests/ha/connect_panic.sh: -------------------------------------------------------------------------------- 1 | set -xe 2 | 3 | # GW name by index 4 | gw_name() { 5 | i=$1 6 | docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /'$i'/ {print $1}' 7 | } 8 | 9 | # Extended sleep to exceed monitor connect timeout 10 | extended_sleep() { 11 | # Sleep for 35 seconds to exceed typical monitor connect timeout (30 seconds) 12 | # This ensures the test triggers the connect panic behavior 13 | seconds=35 14 | echo "Sleeping for $seconds secs (extended delay to exceed monitor connect timeout)" 15 | sleep "$seconds" 16 | } 17 | 18 | # Check if a gateway has panicked by looking for the specific log message 19 | check_gateway_panic_log() { 20 | GW_NAME=$1 21 | # Look for the specific panic message in the gateway logs 22 | if docker logs "$GW_NAME" 2>&1 | grep -q "what(): Did not receive initial map from monitor (connect panic)."; then 23 | echo "✅ Found connect panic log message in gateway $GW_NAME" 24 | return 0 25 | else 26 | echo "❌ Connect panic log message not found in gateway $GW_NAME" 27 | return 1 28 | fi 29 | } 30 | 31 | # Check if a gateway container has exited 32 | check_gateway_exited() { 33 | GW_NAME=$1 34 | if ! docker ps --format '{{.Names}}' | grep -q "$GW_NAME"; then 35 | echo "✅ Gateway $GW_NAME has exited as expected" 36 | return 0 37 | else 38 | echo "❌ Gateway $GW_NAME is still running" 39 | return 1 40 | fi 41 | } 42 | 43 | # 44 | # MAIN 45 | # 46 | 47 | echo "🧪 Testing connect panic behavior when monitor client timeout is exceeded" 48 | 49 | # Step 1 Stop the existing deployment 50 | make down 51 | 52 | # Step 2 Start a new deployment with a single gateway 53 | echo "Starting deployment with single gateway..." 54 | docker compose up -d --scale nvmeof=1 nvmeof 55 | 56 | # Step 3 Wait for the gateway to be running 57 | echo "Waiting for gateway to be running..." 58 | timeout_seconds=60 59 | elapsed=0 60 | while [ $elapsed -lt $timeout_seconds ]; do 61 | GW_NAME=$(gw_name 1) 62 | if [ -n "$GW_NAME" ]; then 63 | container_status=$(docker inspect -f '{{.State.Status}}' "$GW_NAME" 2>/dev/null || echo "unknown") 64 | if [ "$container_status" = "running" ]; then 65 | echo "✅ Gateway $GW_NAME is running after ${elapsed}s" 66 | break 67 | fi 68 | fi 69 | sleep 1 70 | elapsed=$((elapsed + 1)) 71 | echo -n "." 72 | if [ $elapsed -eq $timeout_seconds ]; then 73 | echo "❌ Timeout waiting for gateway to be running after ${timeout_seconds}s" 74 | docker ps -a 75 | exit 1 76 | fi 77 | done 78 | 79 | # Step 4 Extended sleep to exceed monitor connect timeout 80 | echo "Sleeping to exceed monitor connect timeout..." 81 | extended_sleep 82 | 83 | # Step 5 Verify the gateway has panicked and exited 84 | echo "Checking gateway status after extended sleep..." 85 | if check_gateway_panic_log "$GW_NAME"; then 86 | echo "✅ Gateway panic log verified" 87 | else 88 | echo "❌ Gateway panic log not found" 89 | exit 1 90 | fi 91 | 92 | if check_gateway_exited "$GW_NAME"; then 93 | echo "✅ Gateway exit verified" 94 | else 95 | echo "❌ Gateway exit verification failed" 96 | exit 1 97 | fi 98 | 99 | echo "🎉 Connect panic test completed successfully!" 100 | echo "The test demonstrates that the monitor client timeout mechanism works correctly:" 101 | echo "- Gateways that cannot connect to the monitor within the timeout will panic and exit" 102 | echo "- This is the expected safety behavior to prevent gateways from running without monitor connectivity" 103 | echo "- The test verifies that the panic mechanism is working as designed" 104 | -------------------------------------------------------------------------------- /tests/ceph-nvmeof.crc32c.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 International Business Machines 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | # 7 | # Authors: anita.shekar@ibm.com, sandy.kaur@ibm.com 8 | # 9 | 10 | [gateway] 11 | name = 12 | group = 13 | addr = 0.0.0.0 14 | port = 5500 15 | enable_auth = False 16 | state_update_notify = True 17 | state_update_timeout_in_msec = 2000 18 | state_update_interval_sec = 5 19 | break_update_interval_sec = 25 20 | enable_spdk_discovery_controller = False 21 | encryption_key = /etc/ceph/encryption.key 22 | rebalance_period_sec = 7 23 | max_gws_in_grp = 16 24 | max_ns_to_change_lb_grp = 8 25 | #abort_on_errors = True 26 | #omap_file_ignore_unlock_errors = False 27 | #omap_file_lock_on_read = True 28 | #omap_file_lock_duration = 20 29 | #omap_file_lock_retries = 30 30 | #omap_file_lock_retry_sleep_interval = 1.0 31 | #omap_file_update_reloads = 10 32 | #enable_prometheus_exporter = True 33 | #prometheus_exporter_ssl = True 34 | #prometheus_port = 10008 35 | #prometheus_bdev_pools = rbd 36 | #prometheus_stats_interval = 10 37 | #prometheus_startup_delay = 240 38 | #prometheus_connection_list_cache_expiration = 60 39 | #verify_nqns = True 40 | #verify_keys = True 41 | #verify_listener_ip = True 42 | #allowed_consecutive_spdk_ping_failures = 1 43 | #spdk_ping_interval_in_seconds = 2.0 44 | #max_hosts_per_namespace = 8 45 | #max_namespaces_with_netmask = 1000 46 | #max_subsystems = 128 47 | #max_hosts = 2048 48 | #max_namespaces = 4096 49 | #max_namespaces_per_subsystem = 256 50 | #max_hosts_per_subsystem = 128 51 | #subsystem_cache_expiration = 5 52 | 53 | [gateway-logs] 54 | log_level=debug 55 | #log_files_enabled = True 56 | #log_files_rotation_enabled = True 57 | #verbose_log_messages = True 58 | #max_log_file_size_in_mb=10 59 | #max_log_files_count=20 60 | #max_log_directory_backups=10 61 | # 62 | # Notice that if you change the log directory the log files will only be visible inside the container 63 | # 64 | #log_directory = /var/log/ceph/ 65 | 66 | [discovery] 67 | addr = 0.0.0.0 68 | port = 8009 69 | #abort_on_errors = True 70 | 71 | [ceph] 72 | pool = rbd 73 | config_file = /etc/ceph/ceph.conf 74 | 75 | [mtls] 76 | server_key = ./server.key 77 | client_key = ./client.key 78 | server_cert = ./server.crt 79 | client_cert = ./client.crt 80 | 81 | [spdk] 82 | # Support multiple cluster allocation strategies 83 | # Legacy strategy, per ANA grp, max bdevs_per_cluster 84 | # bdevs_per_cluster = 32 85 | # Flat bdevs per cluster, ignore ANA grp id 86 | # flat_bdevs_per_cluster = 32 87 | # Cluster pool 88 | cluster_connections = 32 89 | tgt_path = /usr/local/bin/nvmf_tgt 90 | #rpc_socket_dir = /var/tmp/ 91 | #rpc_socket_name = spdk.sock 92 | #tgt_cmd_extra_args = --env-context="--no-huge -m1024" --iova-mode=va 93 | timeout = 60.0 94 | #log_level = 95 | #protocol_log_level = WARNING 96 | #log_file_dir = 97 | # DSA CRC32 acceleration, enabled by default 98 | #enable_dsa_acceleration = False 99 | 100 | # RBD CRC32C usage, enabled for testing 101 | rbd_with_crc32c = True 102 | 103 | # Example value: --lcores (0-1) -L all 104 | # tgt_cmd_extra_args = 105 | 106 | # transports = tcp 107 | 108 | # Example value: {"max_queue_depth" : 16, "max_io_size" : 4194304, "io_unit_size" : 1048576, "zcopy" : false} 109 | transport_tcp_options = {"in_capsule_data_size" : 8192, "max_io_qpairs_per_ctrlr" : 7} 110 | 111 | # Example value: {"small_pool_count" : 8192, "large_pool_count" : 1024, "small_bufsize" : 8192, "large_bufsize" : 135168} 112 | # iobuf_options = 113 | 114 | qos_timeslice_in_usecs = 0 115 | #notifications_interval = 60 116 | 117 | [monitor] 118 | #timeout = 1.0 119 | #log_file_dir = 120 | -------------------------------------------------------------------------------- /tests/ha/namespaces.sh: -------------------------------------------------------------------------------- 1 | set -xe 2 | 3 | GW1_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /1/ {print $1}') 4 | GW2_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /2/ {print $1}') 5 | GW1_IP="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW1_NAME")" 6 | GW2_IP="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW2_NAME")" 7 | NQN="nqn.2016-06.io.spdk:cnode17" 8 | 9 | create_namespace() { 10 | GW_IP=$1 11 | NSID=$2 12 | ANA_GRP=$3 13 | IMAGE="test_image_$NSID" 14 | 15 | docker compose run --rm nvmeof-cli --server-address $GW_IP --server-port 5500 namespace add --subsystem $NQN --nsid $NSID --rbd-pool rbd --rbd-image $IMAGE --size 10 --rbd-create-image -l $ANA_GRP 16 | } 17 | 18 | delete_namespaces() { 19 | GW_IP=$1 20 | FIRST=$2 21 | INC=$3 22 | LAST=$4 23 | 24 | for i in $(seq $FIRST $INC $LAST); do 25 | docker compose run --rm nvmeof-cli --server-address $GW_IP --server-port 5500 namespace del --subsystem $NQN --nsid $i 26 | done 27 | } 28 | 29 | create_namespaces() { 30 | GW_IP=$1 31 | ANA_GRP=$2 32 | FIRST=$3 33 | INC=$4 34 | LAST=$5 35 | 36 | for i in $(seq $FIRST $INC $LAST); do 37 | create_namespace $GW_IP $i $ANA_GRP 38 | done 39 | } 40 | 41 | 42 | 43 | num_nss() { 44 | echo "$1" | jq ".subsystems[$2].namespaces | length" 45 | } 46 | 47 | verify_num_namespaces() { 48 | GW_IP=$1 49 | EXPECTED_NAMESPACES=$2 50 | NQN_INDEX=1 # the tested subsystem is expected to be a second one, after tests/ha/setup. 51 | 52 | for i in $(seq 100); do 53 | subs=$(docker compose run -T --rm nvmeof-cli --server-address $GW_IP --server-port 5500 get_subsystems 2>&1 1>/dev/null | grep -v Creating | sed 's/Get subsystems://') 54 | nss="$(num_nss "$subs" $NQN_INDEX)" 55 | if [ "$nss" -ne "$EXPECTED_NAMESPACES" ]; then 56 | echo "Not ready $GW_IP $nss $EXPECTED_NAMESPACES" 57 | sleep 1 58 | continue 59 | fi 60 | echo "Ready $GW_IP $nss" 61 | return 62 | done 63 | echo ‼️TIMEOUT 64 | exit 1 65 | } 66 | 67 | # 68 | # MAIN 69 | # 70 | NO_NAMESPACE=0 71 | ALL_NAMESPACES=200 72 | HALF_NAMESPACES=$(expr $ALL_NAMESPACES / 2) 73 | GW1_ANA=1 74 | GW1_FIRST=1 75 | GW2_FIRST=2 76 | GW2_ANA=2 77 | GW_INC=2 78 | 79 | echo "ℹ️ Step 1: create subsystem $NQN" 80 | docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 subsystem add --subsystem $NQN --no-group-append 81 | verify_num_namespaces $GW1_IP $NO_NAMESPACE 82 | verify_num_namespaces $GW2_IP $NO_NAMESPACE 83 | 84 | echo "ℹ️ Step 2: create namespaces" 85 | create_namespaces $GW1_IP $GW1_ANA $GW1_FIRST $GW_INC $ALL_NAMESPACES & 86 | create_namespaces $GW2_IP $GW2_ANA $GW2_FIRST $GW_INC $ALL_NAMESPACES & 87 | wait 88 | verify_num_namespaces $GW1_IP $ALL_NAMESPACES 89 | verify_num_namespaces $GW2_IP $ALL_NAMESPACES 90 | 91 | echo "ℹ️ Step 3: delete half of namespaces" 92 | delete_namespaces $GW1_IP $(expr $HALF_NAMESPACES + $GW1_FIRST) $GW_INC $ALL_NAMESPACES & 93 | delete_namespaces $GW2_IP $(expr $HALF_NAMESPACES + $GW2_FIRST) $GW_INC $ALL_NAMESPACES & 94 | wait 95 | verify_num_namespaces $GW1_IP $HALF_NAMESPACES 96 | verify_num_namespaces $GW2_IP $HALF_NAMESPACES 97 | 98 | echo "ℹ️ Step 4: delete the restof namespaces" 99 | delete_namespaces $GW1_IP $GW1_FIRST $GW_INC $HALF_NAMESPACES 100 | delete_namespaces $GW2_IP $GW2_FIRST $GW_INC $HALF_NAMESPACES 101 | wait 102 | verify_num_namespaces $GW1_IP $NO_NAMESPACE 103 | verify_num_namespaces $GW2_IP $NO_NAMESPACE 104 | 105 | echo "ℹ️ Step 5: delete subsystem $NQN" 106 | docker compose run --rm nvmeof-cli --server-address $GW1_IP --server-port 5500 subsystem del --subsystem $NQN 107 | -------------------------------------------------------------------------------- /ceph-nvmeof.conf: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 International Business Machines 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | # 7 | # Authors: anita.shekar@ibm.com, sandy.kaur@ibm.com 8 | # 9 | 10 | [gateway] 11 | name = 12 | group = 13 | addr = 0.0.0.0 14 | port = 5500 15 | enable_auth = False 16 | state_update_notify = True 17 | state_update_timeout_in_msec = 2000 18 | state_update_interval_sec = 5 19 | break_update_interval_sec = 25 20 | enable_spdk_discovery_controller = False 21 | encryption_key = /etc/ceph/encryption.key 22 | rebalance_period_sec = 7 23 | max_gws_in_grp = 16 24 | max_ns_to_change_lb_grp = 8 25 | #abort_on_errors = True 26 | #omap_file_ignore_unlock_errors = False 27 | #omap_file_lock_on_read = True 28 | #omap_file_lock_duration = 20 29 | #omap_file_lock_retries = 30 30 | #omap_file_lock_retry_sleep_interval = 1.0 31 | #omap_file_update_reloads = 10 32 | #omap_file_update_attempts = 500 33 | #enable_prometheus_exporter = True 34 | #prometheus_exporter_ssl = True 35 | #prometheus_port = 10008 36 | #prometheus_bdev_pools = rbd 37 | #prometheus_stats_interval = 10 38 | #prometheus_frequency_slow_down_factor = 3.0 39 | #prometheus_cycles_to_adjust_speed = 3 40 | #prometheus_startup_delay = 240 41 | #prometheus_connection_list_cache_expiration = 60 42 | #verify_nqns = True 43 | #verify_keys = True 44 | #verify_listener_ip = True 45 | #allowed_consecutive_spdk_ping_failures = 1 46 | #spdk_ping_interval_in_seconds = 2.0 47 | #max_hosts_per_namespace = 8 48 | #max_namespaces_with_netmask = 1000 49 | #max_subsystems = 128 50 | #max_hosts = 2048 51 | #max_namespaces = 4096 52 | #max_namespaces_per_subsystem = 512 53 | #max_hosts_per_subsystem = 128 54 | #force_tls = False 55 | #max_message_length_in_mb = 8 56 | 57 | [gateway-logs] 58 | log_level=debug 59 | #log_files_enabled = True 60 | #log_files_rotation_enabled = True 61 | #verbose_log_messages = True 62 | #max_log_file_size_in_mb=10 63 | #max_log_files_count=20 64 | #max_log_directory_backups=10 65 | # 66 | # Notice that if you change the log directory the log files will only be visible inside the container 67 | # 68 | #log_directory = /var/log/ceph/ 69 | 70 | [discovery] 71 | addr = 0.0.0.0 72 | port = 8009 73 | 74 | [ceph] 75 | pool = rbd 76 | config_file = /etc/ceph/ceph.conf 77 | 78 | [mtls] 79 | server_key = ./server.key 80 | client_key = ./client.key 81 | server_cert = ./server.crt 82 | client_cert = ./client.crt 83 | 84 | [spdk] 85 | # Support multiple cluster allocation strategies 86 | # Legacy strategy, per ANA grp, max bdevs_per_cluster 87 | # bdevs_per_cluster = 32 88 | # Flat bdevs per cluster, ignore ANA grp id 89 | # flat_bdevs_per_cluster = 32 90 | # Cluster pool 91 | cluster_connections = 32 92 | tgt_path = /usr/local/bin/nvmf_tgt 93 | #rpc_socket_dir = /var/tmp/ 94 | #rpc_socket_name = spdk.sock 95 | #tgt_cmd_extra_args = --env-context="--no-huge -m1024" --iova-mode=va 96 | timeout = 60.0 97 | #log_level = 98 | #protocol_log_level = WARNING 99 | #log_file_dir = 100 | # DSA CRC32 acceleration, enabled by default 101 | #enable_dsa_acceleration = False 102 | 103 | # RBD CRC32C usage, disabled by default 104 | #rbd_with_crc32c = False 105 | 106 | # Example value: --lcores (0-1) -L all 107 | # tgt_cmd_extra_args = 108 | 109 | # transports = tcp 110 | 111 | # Example value: {"max_queue_depth" : 16, "max_io_size" : 4194304, "io_unit_size" : 1048576, "zcopy" : false} 112 | transport_tcp_options = {"in_capsule_data_size" : 8192, "max_io_qpairs_per_ctrlr" : 7} 113 | 114 | # Example value: {"small_pool_count" : 8192, "large_pool_count" : 1024, "small_bufsize" : 8192, "large_bufsize" : 135168, "enable_numa" : false} 115 | # iobuf_options = 116 | 117 | qos_timeslice_in_usecs = 0 118 | #notifications_interval = 60 119 | 120 | [monitor] 121 | #timeout = 1.0 122 | #log_file_dir = 123 | -------------------------------------------------------------------------------- /Dockerfile.ceph: -------------------------------------------------------------------------------- 1 | # syntax = docker/dockerfile:1.4 2 | # vim: syntax=dockerfile 3 | FROM quay.io/centos/centos:stream9-minimal AS build 4 | 5 | 6 | ARG CEPH_CLUSTER_VERSION 7 | ARG CEPH_CLUSTER_CEPH_REPO_BASEURL 8 | ARG CEPH_CLUSTER_EPEL_REPO_URL="https://copr.fedorainfracloud.org/coprs/ceph/el9/repo/epel-9/ceph-el9-epel-9.repo" 9 | 10 | ARG MICRODNF_OPTS="\ 11 | --enablerepo crb \ 12 | --nobest \ 13 | --nodocs \ 14 | --setopt=install_weak_deps=0 \ 15 | --setopt=keepcache=1 \ 16 | --setopt=cachedir=/var/cache/microdnf \ 17 | " 18 | 19 | ARG CEPH_PACKAGES="\ 20 | ceph-common \ 21 | ceph-mon \ 22 | ceph-osd \ 23 | ceph-mds \ 24 | ceph-mgr \ 25 | ceph-mgr-dashboard \ 26 | ceph-radosgw \ 27 | ceph-exporter \ 28 | hostname \ 29 | jq \ 30 | net-tools \ 31 | iproute \ 32 | " 33 | # TODO: To remove when ceph-mgr-dashboard defines these as deps 34 | ARG EXTRA_PACKAGES="\ 35 | python3-grpcio\ 36 | python3-grpcio-tools \ 37 | " 38 | ARG DEBUG_PACKAGES="\ 39 | procps-ng \ 40 | strace \ 41 | perf \ 42 | ltrace \ 43 | lsof \ 44 | " 45 | 46 | RUN < /dev/null 26 | make -s exec SVC=ceph OPTS=-T CMD="ceph osd pool set ${RBD_DATA_POOL} allow_ec_overwrites true" 2> /dev/null 27 | cephnvmf_func subsystem add --subsystem ${NQN} --no-group-append 28 | cephnvmf_func namespace add --subsystem ${NQN} --rbd-pool ${RBD_POOL} --rbd-data-pool ${RBD_DATA_POOL} --rbd-image ${RBD_IMAGE_NAME} --size ${RBD_IMAGE_SIZE} --rbd-create-image --uuid ${UUID} --location ${LOC} 29 | 30 | echo "ℹ️ list namespaces" 31 | ns_list=$(cephnvmf_func --output stdio --format json namespace list --subsystem ${NQN}) 32 | [[ `echo $ns_list | jq -r '.status'` == "0" ]] 33 | [[ `echo $ns_list | jq -r '.subsystem_nqn'` == "${NQN}" ]] 34 | [[ `echo $ns_list | jq -r '.namespaces[0].nsid'` == "1" ]] 35 | [[ `echo $ns_list | jq -r '.namespaces[0].rbd_image_name'` == "${RBD_IMAGE_NAME}" ]] 36 | [[ `echo $ns_list | jq -r '.namespaces[0].rbd_pool_name'` == "${RBD_POOL}" ]] 37 | [[ `echo $ns_list | jq -r '.namespaces[0].rbd_data_pool_name'` == "${RBD_DATA_POOL}" ]] 38 | [[ `echo $ns_list | jq -r '.namespaces[0].rbd_image_size'` == "10485760" ]] 39 | [[ `echo $ns_list | jq -r '.namespaces[0].uuid'` == "${UUID}" ]] 40 | [[ `echo $ns_list | jq -r '.namespaces[0].location'` == "${LOC}" ]] 41 | [[ `echo $ns_list | jq -r '.namespaces[1]'` == "null" ]] 42 | 43 | echo "ℹ️ get RBD id" 44 | rbd_id=`make -s exec SVC=ceph OPTS=-T CMD="rbd info -p ${RBD_POOL} ${RBD_IMAGE_NAME}" 2> /dev/null | grep " id: " | sed 's/\tid: //'` 45 | 46 | echo "ℹ️ check RBD metadata" 47 | rbd_meta=`make -s exec SVC=ceph OPTS=-T CMD="rbd image-meta get -p ${RBD_POOL} ${RBD_IMAGE_NAME} NVME_IMAGE_IDENTIFICATION" 2> /dev/null` 48 | [[ "$rbd_meta" == "${GROUP_NAME}_${NQN}_${UUID}_${FSID}_${rbd_id}" ]] 49 | 50 | echo "ℹ️ change group in RBD image metadata" 51 | make -s exec SVC=ceph OPTS=-T CMD="rbd image-meta set -p ${RBD_POOL} ${RBD_IMAGE_NAME} NVME_IMAGE_IDENTIFICATION ${GROUP_NAME2}_${NQN}_${UUID}_${FSID}_${rbd_id}" 52 | 53 | echo "ℹ️ try to reuse RBD image, with a different group and UUID" 54 | set +e 55 | cephnvmf_func --output stdio namespace add --subsystem ${NQN} --rbd-pool ${RBD_POOL} --rbd-image ${RBD_IMAGE_NAME} --uuid ${UUID2} > /tmp/ns_add1.txt 2>&1 56 | if [[ $? -eq 0 ]]; then 57 | echo "Shouldn't be able to reuse RBD image from another group" 58 | exit 1 59 | fi 60 | set -e 61 | grep "RBD image ${RBD_POOL}/${RBD_IMAGE_NAME} is already used by a namespace in subsystem ${NQN}, group ${GROUP_NAME2}" /tmp/ns_add1.txt 62 | rm -f /tmp/ns_add1.txt 63 | 64 | echo "ℹ️ change group in RBD image metadata to an empty value" 65 | make -s exec SVC=ceph OPTS=-T CMD="rbd image-meta set -p ${RBD_POOL} ${RBD_IMAGE_NAME} NVME_IMAGE_IDENTIFICATION _${NQN}_${UUID}_${FSID}_${rbd_id}" 66 | 67 | echo "ℹ️ try to reuse RBD image, with a different group and UUID" 68 | set +e 69 | cephnvmf_func --output stdio namespace add --subsystem ${NQN} --rbd-pool ${RBD_POOL} --rbd-image ${RBD_IMAGE_NAME} --uuid ${UUID3} > /tmp/ns_add2.txt 2>&1 70 | if [[ $? -eq 0 ]]; then 71 | echo "Shouldn't be able to reuse RBD image from another group" 72 | exit 1 73 | fi 74 | set -e 75 | grep "RBD image ${RBD_POOL}/${RBD_IMAGE_NAME} is already used by a namespace in subsystem ${NQN}" /tmp/ns_add2.txt 76 | grep -q -v ", group " /tmp/ns_add2.txt 77 | rm -f /tmp/ns_add2.txt 78 | -------------------------------------------------------------------------------- /tests/ha/main_exit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -xe 3 | SCALE=1 4 | POOL="${RBD_POOL:-rbd}" 5 | 6 | background_task() { 7 | 8 | # Give gateway some time 9 | sleep 5 10 | 11 | # Waiting for the ceph container to become healthy 12 | while true; do 13 | container_status=$(docker inspect --format='{{.State.Health.Status}}' ceph) 14 | if [ "$container_status" = "healthy" ]; then 15 | # success 16 | break 17 | else 18 | # Wait for a specific time before checking again 19 | sleep 1 20 | printf . 21 | fi 22 | done 23 | echo ✅ ceph is healthy 24 | 25 | echo ℹ️ Running processes of services 26 | docker compose top 27 | 28 | echo ℹ️ Send nvme-gw create for all gateways 29 | GW_NAME='' 30 | GW_GROUP='' 31 | i=1 # a single gw index 32 | while [ ! -n "$GW_NAME" ]; do 33 | sleep 1 34 | GW_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | grep -v discovery | awk '$2 ~ /nvmeof/ && $2 ~ /'$i'/ {print $1}') 35 | done 36 | echo 📫 nvme-gw create gateway: \'$GW_NAME\' pool: \'$POOL\', group: \'$GW_GROUP\' 37 | docker compose exec -T ceph ceph nvme-gw create $GW_NAME $POOL "$GW_GROUP" 38 | docker compose exec -T ceph ceph nvme-gw show $POOL "$GW_GROUP" 39 | 40 | echo ℹ️ Wait for gateway to be ready 41 | while true; do 42 | sleep 1 # Adjust the sleep duration as needed 43 | container_status=$(docker inspect -f '{{.State.Status}}' "$GW_NAME") 44 | if [ "$container_status" == "running" ]; then 45 | echo "Container $i $GW_NAME is now running." 46 | else 47 | echo "Container $i $GW_NAME is still not running. Waiting..." 48 | continue 49 | fi 50 | GW_IP="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW_NAME")" 51 | if ! docker compose run --rm nvmeof-cli $CLI_TLS_ARGS --server-address $GW_IP --server-port 5500 get_subsystems; then 52 | echo "Container $i $GW_NAME $GW_IP no subsystems. Waiting..." 53 | continue 54 | fi 55 | break 56 | done 57 | 58 | # Signal to send (e.g., SIGTERM or SIGKILL) 59 | SIGNAL="SIGABRT" 60 | 61 | # Get the PID of monitor_client inside the container 62 | PID=$(docker exec "$GW_NAME" sh -c "for pid in /proc/*; do 63 | if [ -f \"\$pid/comm\" ] && grep -q 'ceph-nvmeof-mon' \"\$pid/comm\"; then 64 | echo \$(basename \$pid) 65 | break 66 | fi 67 | done") 68 | 69 | if [ -n "$PID" ]; then 70 | echo "ℹ️ Sending $SIGNAL to monitor_client (PID: $PID) in $GW_NAME..." 71 | docker exec "$GW_NAME" kill -s "$SIGNAL" "$PID" 72 | else 73 | echo "❌ monitor_client process not found in $GW_NAME." 74 | exit 1 75 | fi 76 | 77 | } 78 | 79 | ## 80 | ## MAIN 81 | ## 82 | 83 | background_task & 84 | TASK_PID=$! # Capture the PID of the background task 85 | 86 | echo ℹ️ Starting $SCALE nvmeof gateways 87 | docker compose up --remove-orphans --scale nvmeof=$SCALE nvmeof 88 | GW_NAME=$(docker ps -a --format '{{.ID}}\t{{.Names}}' | grep -v discovery | awk '$2 ~ /nvmeof/ && $2 ~ /'$i'/ {print $1}') 89 | docker inspect "$GW_NAME" 90 | exit_code=$(docker inspect --format='{{.State.ExitCode}}' "$GW_NAME") 91 | 92 | # expect exit code 1 93 | if [ $exit_code -eq 1 ]; then 94 | echo ✅ gateway returned exit code 1, exiting with success. 95 | else 96 | echo ❌ gateway returned exit code $exit_code, exiting with failure. 97 | exit 1 # Failure exit code 98 | fi 99 | 100 | # Wait for the background task to finish 101 | wait $TASK_PID # Wait for the specific PID to complete 102 | background_task_exit_code=$? # Capture the exit code of the background task 103 | 104 | # Check the exit code and print the result 105 | if [ $background_task_exit_code -eq 0 ]; then 106 | echo ✅ background task completed successfully 107 | else 108 | echo ❌ background task failed with exit code: $background_task_exit_code 109 | fi 110 | 111 | # Exit with the same code as the background task 112 | exit $background_task_exit_code 113 | 114 | 115 | -------------------------------------------------------------------------------- /tests/test_subsys_grp_name_append.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from control.server import GatewayServer 3 | from control.cli import main as cli 4 | from control.cephutils import CephUtils 5 | import grpc 6 | from control.proto import gateway_pb2_grpc as pb2_grpc 7 | import copy 8 | import os 9 | 10 | image = "mytestdevimage" 11 | pool = "rbd" 12 | subsystem_prefix = "nqn.2016-06.io.spdk:cnode" 13 | config = "ceph-nvmeof.conf" 14 | group_name = "group1" 15 | 16 | 17 | @pytest.fixture(scope="module") 18 | def two_gateways(config): 19 | """Sets up and tears down two Gateways""" 20 | nameA = "GatewayAA" 21 | nameB = "GatewayBB" 22 | sockA = f"spdk_{nameA}.sock" 23 | sockB = f"spdk_{nameB}.sock" 24 | config.config["gateway-logs"]["log_level"] = "debug" 25 | config.config["gateway"]["group"] = group_name 26 | config.config["gateway"]["rebalance_period_sec"] = "0" 27 | config.config["gateway"]["state_update_interval_sec"] = "360" 28 | config.config["gateway"]["state_update_notify"] = "False" 29 | addr = config.get("gateway", "addr") 30 | configA = copy.deepcopy(config) 31 | configB = copy.deepcopy(config) 32 | configA.config["gateway"]["name"] = nameA 33 | configA.config["gateway"]["override_hostname"] = nameA 34 | configA.config["spdk"]["rpc_socket_name"] = sockA 35 | if os.cpu_count() >= 4: 36 | configA.config["spdk"]["tgt_cmd_extra_args"] = "--lcores (0-1)" 37 | else: 38 | configA.config["spdk"]["tgt_cmd_extra_args"] = "--disable-cpumask-locks" 39 | portA = configA.getint("gateway", "port") 40 | configB.config["gateway"]["name"] = nameB 41 | configB.config["gateway"]["override_hostname"] = nameB 42 | configB.config["spdk"]["rpc_socket_name"] = sockB 43 | portB = portA + 2 44 | discPortB = configB.getint("discovery", "port") + 1 45 | configB.config["gateway"]["port"] = str(portB) 46 | configB.config["discovery"]["port"] = str(discPortB) 47 | if os.cpu_count() >= 4: 48 | configB.config["spdk"]["tgt_cmd_extra_args"] = "--lcores (2-3)" 49 | else: 50 | configB.config["spdk"]["tgt_cmd_extra_args"] = "--disable-cpumask-locks" 51 | 52 | ceph_utils = CephUtils(config) 53 | with (GatewayServer(configA) as gatewayA, GatewayServer(configB) as gatewayB): 54 | ceph_utils.execute_ceph_monitor_command( 55 | "{" + f'"prefix":"nvme-gw create", "id": "{nameA}", "pool": "{pool}", ' 56 | f'"group": "{group_name}"' + "}" 57 | ) 58 | ceph_utils.execute_ceph_monitor_command( 59 | "{" + f'"prefix":"nvme-gw create", "id": "{nameB}", "pool": "{pool}", ' 60 | f'"group": "{group_name}"' + "}" 61 | ) 62 | gatewayA.serve() 63 | gatewayB.serve() 64 | 65 | channelA = grpc.insecure_channel(f"{addr}:{portA}") 66 | stubA = pb2_grpc.GatewayStub(channelA) 67 | channelB = grpc.insecure_channel(f"{addr}:{portB}") 68 | stubB = pb2_grpc.GatewayStub(channelB) 69 | 70 | yield gatewayA, stubA, gatewayB, stubB 71 | gatewayA.gateway_rpc.gateway_state.delete_state() 72 | gatewayB.gateway_rpc.gateway_state.delete_state() 73 | gatewayA.server.stop(grace=1) 74 | gatewayB.server.stop(grace=1) 75 | 76 | 77 | def test_create_subsystems(caplog, two_gateways): 78 | gatewayA, stubA, gatewayB, stubB = two_gateways 79 | for i in range(20): 80 | caplog.clear() 81 | subsystem = f"{subsystem_prefix}{i}" 82 | cli(["subsystem", "add", "--subsystem", subsystem]) 83 | subsystem += f".{group_name}" 84 | assert f"Adding subsystem {subsystem}: Successful" in caplog.text 85 | caplog.clear() 86 | subsystem = f"{subsystem_prefix}X" 87 | cli(["--server-port", "5502", "subsystem", "add", "--subsystem", subsystem]) 88 | subsystem += f".{group_name}" 89 | assert "differs from OMAP file version" in caplog.text 90 | assert "The file is not current, will reload it and try again" in caplog.text 91 | assert f"Adding subsystem {subsystem}: Successful" in caplog.text 92 | caplog.clear() 93 | cli(["--format", "json", "subsystem", "list"]) 94 | assert f".{group_name}.{{group_name}}" not in caplog.text 95 | -------------------------------------------------------------------------------- /mk/containerized.mk: -------------------------------------------------------------------------------- 1 | ## Deployment commands (docker-compose): 2 | 3 | # Docker and docker-compose specific commands 4 | DOCKER = docker 5 | # Require docker-compose v2 to support multi-platform build option 'services.xxx.build.platforms' 6 | DOCKER_COMPOSE != DOCKER=$$(command -v docker) && $$DOCKER compose version > /dev/null && printf "%s compose\n" $$DOCKER 7 | ifndef DOCKER_COMPOSE 8 | $(error DOCKER_COMPOSE command not found. Please install from: https://docs.docker.com/compose/install/) 9 | endif 10 | DOCKER_COMPOSE_COMMANDS = pull build run exec ps top images logs port \ 11 | pause unpause stop restart down events 12 | 13 | OPTS ?= ## Docker-compose subcommand options 14 | CMD ?= ## Command to run with run/exec targets 15 | 16 | .PHONY: $(DOCKER_COMPOSE_COMMANDS) shell 17 | $(DOCKER_COMPOSE_COMMANDS): 18 | $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE) $@ $(OPTS) $(SVC) $(CMD) 19 | 20 | pull: ## Download SVC images 21 | 22 | build: ## Build SVC images 23 | build: DOCKER_COMPOSE_ENV = DOCKER_BUILDKIT=1 COMPOSE_DOCKER_CLI_BUILD=1 COMPOSE_PROFILES=build 24 | 25 | push: QUAY := $(CONTAINER_REGISTRY) 26 | push: IMAGES := nvmeof nvmeof-cli 27 | push: TAG_SUFFIX := # e.g. "-aarch64" for multi-arch image push 28 | push: ## Push nvmeof, nvmeof-cli and spdk containers images to quay.io registries 29 | @echo "Push images $(IMAGES) to registry $(QUAY)"; \ 30 | short_version=$(shell echo $(VERSION) | cut -d. -f1-2); \ 31 | versions="$(VERSION) $${short_version} latest"; \ 32 | for image in $(IMAGES); do \ 33 | for version in $$versions; do \ 34 | if [ -n "$(TAG_SUFFIX)" ] && [ "$$version" = "$(VERSION)" ] || \ 35 | [ -z "$(TAG_SUFFIX)" ]; then \ 36 | echo "Pushing image $(QUAY)/$${image}:$${version}$(TAG_SUFFIX) ..."; \ 37 | docker tag $(CONTAINER_REGISTRY)/$${image}:$(VERSION)$(TAG_SUFFIX) $(QUAY)/$${image}:$${version}$(TAG_SUFFIX) && \ 38 | docker push $(QUAY)/$${image}:$${version}$(TAG_SUFFIX); \ 39 | fi; \ 40 | done; \ 41 | done; \ 42 | @echo "Pushing image \"$(QUAY)/spdk:$(SPDK_VERSION)\" ..."; \ 43 | docker tag $(CONTAINER_REGISTRY)/spdk:$(SPDK_VERSION) $(QUAY)/spdk:$(SPDK_VERSION) && \ 44 | docker push $(QUAY)/spdk:$(SPDK_VERSION); 45 | 46 | push-manifest-list: QUAY := $(CONTAINER_REGISTRY) 47 | push-manifest-list: IMAGES := nvmeof nvmeof-cli 48 | push-manifest-list: 49 | @echo "Push images $(IMAGES) manifestlists to $(QUAY)"; \ 50 | short_version=$(shell echo $(VERSION) | cut -d. -f1-2); \ 51 | versions="$(VERSION) $${short_version} latest"; \ 52 | for image in $(IMAGES); do \ 53 | source_list=$$(docker image list --filter reference="$(QUAY)/$${image}:$(VERSION)*" --format "{{.Repository}}:{{.Tag}}"); \ 54 | for version in $$versions; do \ 55 | echo "Pushing image manifestlist $(QUAY)/$${image}:$${version} ..."; \ 56 | docker buildx imagetools create --tag $(QUAY)/$${image}:$${version} $$source_list; \ 57 | done \ 58 | done 59 | 60 | run: ## Run command CMD inside SVC containers 61 | run: override OPTS += --rm 62 | run: DOCKER_COMPOSE_ENV = DOCKER_BUILDKIT=1 COMPOSE_DOCKER_CLI_BUILD=1 63 | 64 | shell: ## Exec shell inside running SVC containers 65 | shell: CMD = bash 66 | shell: exec 67 | 68 | exec: ## Run command inside an existing container 69 | 70 | ps: ## Display status of SVC containers 71 | 72 | top: ## Display running processes in SVC containers 73 | 74 | port: ## Print public port for a port binding 75 | 76 | logs: ## View SVC logs 77 | logs: MAX_LOGS = 40 78 | logs: OPTS ?= --follow --tail=$(MAX_LOGS) 79 | 80 | images: ## List images 81 | 82 | pause: ## Pause running deployment 83 | unpause: ## Resume paused deployment 84 | 85 | stop: ## Stop SVC 86 | 87 | restart: ## Restart SVC 88 | 89 | down: ## Shut down deployment 90 | down: override OPTS += --volumes --remove-orphans 91 | 92 | events: ## Receive real-time events from containers 93 | 94 | .PHONY: 95 | image_name: 96 | @$(DOCKER_COMPOSE) config --format=json | jq '.services."$(SVC)".image' 97 | 98 | .PHONY: 99 | docker_compose_clean: down 100 | $(DOCKER) system prune --all --force --volumes --filter label="io.ceph.nvmeof" 101 | 102 | .PHONY: 103 | clean_cache: ## Clean the Docker build cache 104 | $(DOCKER) builder prune --force --all 105 | 106 | CLEAN += docker_compose_clean 107 | ALL += pull up ps 108 | -------------------------------------------------------------------------------- /tests/ha/state_transitions_both_gws.sh: -------------------------------------------------------------------------------- 1 | set -xe 2 | rpc=/usr/libexec/spdk/scripts/rpc.py 3 | cmd=nvmf_subsystem_get_listeners 4 | nqn=nqn.2016-06.io.spdk:cnode1 5 | 6 | expect_optimized() { 7 | GW_NAME=$1 8 | EXPECTED_OPTIMIZED=$2 9 | 10 | socket=$(docker exec "$GW_NAME" find /var/tmp -name spdk.sock) 11 | # Verify expected number of "optimized" 12 | while true; do 13 | response=$(docker exec "$GW_NAME" "$rpc" "-s" "$socket" "$cmd" "$nqn") 14 | ana_states=$(echo "$response" | jq -r '.[0].ana_states') 15 | 16 | # Count the number of "optimized" groups 17 | optimized_count=$(jq -nr --argjson ana_states "$ana_states" '$ana_states | map(select(.ana_state == "optimized")) | length') 18 | 19 | # Check if there is expected number of "optimized" group 20 | if [ "$optimized_count" -eq "$EXPECTED_OPTIMIZED" ]; then 21 | # Iterate through JSON array 22 | for item in $(echo "$ana_states" | jq -c '.[]'); do 23 | ana_group=$(echo "$item" | jq -r '.ana_group') 24 | ana_state=$(echo "$item" | jq -r '.ana_state') 25 | 26 | # Check if ana_state is "optimized" 27 | if [ "$ana_state" = "optimized" ]; then 28 | echo "$ana_group" 29 | fi 30 | done 31 | break 32 | else 33 | sleep 1 34 | continue 35 | fi 36 | done 37 | } 38 | 39 | # GW name by index 40 | gw_name() { 41 | i=$1 42 | docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /'$i'/ {print $1}' 43 | } 44 | 45 | # Function to access numbers by index 46 | access_number_by_index() { 47 | numbers=$1 48 | index=$(expr $2 + 1) 49 | number=$(echo "$numbers" | awk -v idx="$index" 'NR == idx {print}') 50 | echo "$number" 51 | } 52 | 53 | # verify that given numbers must be either 1 and 2 or 2 and 1 54 | verify_ana_groups() { 55 | nr1=$1 56 | nr2=$2 57 | 58 | if [ "$nr1" -eq 1 ] && [ "$nr2" -eq 2 ]; then 59 | echo "Verified: first is 1 and second is 2" 60 | elif [ "$nr1" -eq 2 ] && [ "$nr2" -eq 1 ]; then 61 | echo "Verified: first is 2 and second is 1" 62 | else 63 | echo "Invalid numbers: first and second must be either 1 and 2 or 2 and 1" 64 | exit 1 65 | fi 66 | } 67 | 68 | # 69 | # MAIN 70 | # 71 | GW1_NAME=$(gw_name 1) 72 | GW2_NAME=$(gw_name 2) 73 | 74 | # 75 | # Step 1 validate both gataeways are optimized for one of ANA groups 1 and 2 76 | # 77 | GW1_OPTIMIZED=$(expect_optimized $GW1_NAME 1) 78 | gw1_ana=$(access_number_by_index "$GW1_OPTIMIZED" 0) 79 | 80 | GW2_OPTIMIZED=$(expect_optimized $GW2_NAME 1) 81 | gw2_ana=$(access_number_by_index "$GW2_OPTIMIZED" 0) 82 | 83 | verify_ana_groups "$gw1_ana" "$gw2_ana" 84 | 85 | # 86 | # Step 2 failover 87 | # 88 | echo "Stop GW2 $GW2_NAME" 89 | docker stop $GW2_NAME 90 | 91 | docker ps 92 | 93 | GW1_FAILOVER_OPTIMIZED=$(expect_optimized $GW1_NAME 2) 94 | gw1_ana1=$(access_number_by_index "$GW1_FAILOVER_OPTIMIZED" 0) 95 | gw1_ana2=$(access_number_by_index "$GW1_FAILOVER_OPTIMIZED" 1) 96 | verify_ana_groups "$gw1_ana1" "$gw1_ana2" 97 | 98 | # 99 | # Step 3 failback 100 | # 101 | echo "Start GW2 $GW2_NAME" 102 | docker start $GW2_NAME 103 | 104 | docker ps 105 | 106 | GW1_OPTIMIZED=$(expect_optimized $GW1_NAME 1) 107 | gw1_ana=$(access_number_by_index "$GW1_OPTIMIZED" 0) 108 | 109 | GW2_OPTIMIZED=$(expect_optimized $GW2_NAME 1) 110 | gw2_ana=$(access_number_by_index "$GW2_OPTIMIZED" 0) 111 | 112 | verify_ana_groups "$gw1_ana" "$gw2_ana" 113 | 114 | # 115 | # Step 4 failover 116 | # 117 | echo "Stop GW1 $GW1_NAME" 118 | docker stop $GW1_NAME 119 | 120 | docker ps 121 | 122 | GW2_FAILOVER_OPTIMIZED=$(expect_optimized $GW2_NAME 2) 123 | gw2_ana1=$(access_number_by_index "$GW2_FAILOVER_OPTIMIZED" 0) 124 | gw2_ana2=$(access_number_by_index "$GW2_FAILOVER_OPTIMIZED" 1) 125 | verify_ana_groups "$gw2_ana1" "$gw2_ana2" 126 | 127 | # 128 | # Step 5 failback 129 | # 130 | echo "Start GW1 $GW1_NAME" 131 | docker start $GW1_NAME 132 | 133 | docker ps 134 | 135 | GW2_OPTIMIZED=$(expect_optimized $GW2_NAME 1) 136 | gw2_ana=$(access_number_by_index "$GW2_OPTIMIZED" 0) 137 | 138 | GW1_OPTIMIZED=$(expect_optimized $GW1_NAME 1) 139 | gw1_ana=$(access_number_by_index "$GW1_OPTIMIZED" 0) 140 | 141 | verify_ana_groups "$gw1_ana" "$gw2_ana" 142 | 143 | 144 | -------------------------------------------------------------------------------- /tests/ha/late_registration.sh: -------------------------------------------------------------------------------- 1 | set -xe 2 | rpc=/usr/libexec/spdk/scripts/rpc.py 3 | cmd=nvmf_subsystem_get_listeners 4 | nqn=nqn.2016-06.io.spdk:cnode1 5 | 6 | expect_optimized() { 7 | GW_NAME=$1 8 | EXPECTED_OPTIMIZED=$2 9 | 10 | socket=$(docker exec "$GW_NAME" find /var/tmp -name spdk.sock) 11 | # Verify expected number of "optimized" 12 | while true; do 13 | response=$(docker exec "$GW_NAME" "$rpc" "-s" "$socket" "$cmd" "$nqn") 14 | ana_states=$(echo "$response" | jq -r '.[0].ana_states') 15 | 16 | # Count the number of "optimized" groups 17 | optimized_count=$(jq -nr --argjson ana_states "$ana_states" '$ana_states | map(select(.ana_state == "optimized")) | length') 18 | 19 | # Check if there is expected number of "optimized" group 20 | if [ "$optimized_count" -eq "$EXPECTED_OPTIMIZED" ]; then 21 | # Iterate through JSON array 22 | for item in $(echo "$ana_states" | jq -c '.[]'); do 23 | ana_group=$(echo "$item" | jq -r '.ana_group') 24 | ana_state=$(echo "$item" | jq -r '.ana_state') 25 | 26 | # Check if ana_state is "optimized" 27 | if [ "$ana_state" = "optimized" ]; then 28 | echo "$ana_group" 29 | fi 30 | done 31 | break 32 | else 33 | sleep 1 34 | continue 35 | fi 36 | done 37 | } 38 | 39 | # GW name by index 40 | gw_name() { 41 | i=$1 42 | docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /'$i'/ {print $1}' 43 | } 44 | 45 | # Function to access numbers by index 46 | access_number_by_index() { 47 | numbers=$1 48 | index=$(expr $2 + 1) 49 | number=$(echo "$numbers" | awk -v idx="$index" 'NR == idx {print}') 50 | echo "$number" 51 | } 52 | 53 | # verify that given numbers must be either 1 and 2 or 2 and 1 54 | verify_ana_groups() { 55 | nr1=$1 56 | nr2=$2 57 | 58 | if [ "$nr1" -eq 1 ] && [ "$nr2" -eq 2 ]; then 59 | echo "Verified: first is 1 and second is 2" 60 | elif [ "$nr1" -eq 2 ] && [ "$nr2" -eq 1 ]; then 61 | echo "Verified: first is 2 and second is 1" 62 | else 63 | echo "Invalid numbers: first and second must be either 1 and 2 or 2 and 1" 64 | exit 1 65 | fi 66 | } 67 | 68 | random_sleep() { 69 | # Generate a random number between 0 and 25 seconds 70 | # This ensures the sleep is safely below typical monitor connect timeout (30+ seconds) 71 | # while still providing enough randomness for late registration testing 72 | seconds=$(( RANDOM % 26 )) 73 | 74 | # Sleep for the random number of seconds 75 | echo "Sleeping for $seconds secs (random delay capped below monitor connect timeout)" 76 | sleep "$seconds" 77 | } 78 | 79 | # 80 | # MAIN 81 | # 82 | 83 | # Check if GITHUB_WORKSPACE is defined 84 | if [ -n "$GITHUB_WORKSPACE" ]; then 85 | test_dir="$GITHUB_WORKSPACE/tests/ha" 86 | else 87 | test_dir=$(dirname $0) 88 | fi 89 | 90 | # Step 1 Stop the existing deployement 91 | make down 92 | 93 | # Step 2 Start a new deployment 94 | docker compose up -d --scale nvmeof=2 nvmeof 95 | 96 | # Step 3 Wait for ceph container to become healthy" 97 | while true; do 98 | container_status=$(docker inspect --format='{{.State.Health.Status}}' ceph) 99 | if [[ $container_status == "healthy" ]]; then 100 | # success 101 | break 102 | else 103 | # Wait for a specific time before checking again 104 | sleep 1 105 | echo -n . 106 | fi 107 | done 108 | docker ps 109 | 110 | # Step 4 random sleep 111 | random_sleep 112 | 113 | # Step 5 Send nvme-gw create for both gateways 114 | for i in $(seq 2); do 115 | GW_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /'$i'/ {print $1}') 116 | docker compose exec -T ceph ceph nvme-gw create $GW_NAME rbd '' 117 | done 118 | 119 | # Step 6 Wait for both gateways to be ready 120 | source $test_dir/wait_gateways.sh 121 | 122 | # Step 7 Setup host 123 | source $test_dir/setup.sh 124 | 125 | # 126 | # Step 8 validate both gateways are optimized for one of ANA groups 1 and 2 127 | # 128 | GW1_NAME=$(gw_name 1) 129 | GW2_NAME=$(gw_name 2) 130 | GW1_OPTIMIZED=$(expect_optimized $GW1_NAME 1) 131 | gw1_ana=$(access_number_by_index "$GW1_OPTIMIZED" 0) 132 | 133 | GW2_OPTIMIZED=$(expect_optimized $GW2_NAME 1) 134 | gw2_ana=$(access_number_by_index "$GW2_OPTIMIZED" 0) 135 | 136 | verify_ana_groups "$gw1_ana" "$gw2_ana" 137 | -------------------------------------------------------------------------------- /tests/ha/image_shrink.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function cephnvmf_func() 4 | { 5 | /usr/bin/docker compose run --rm nvmeof-cli --server-address ${NVMEOF_IP_ADDRESS} --server-port ${NVMEOF_GW_PORT} $@ 6 | } 7 | 8 | . .env 9 | 10 | set -e 11 | set -x 12 | 13 | echo "ℹ️ create a 20MB big namespace" 14 | cephnvmf_func subsystem add --subsystem "${NQN}" --no-group-append 15 | cephnvmf_func namespace add --subsystem "${NQN}" --rbd-pool rbd --rbd-image shrink_image --size 20MB --rbd-create-image 16 | 17 | nslist=$(cephnvmf_func --output stdio --format json namespace list --subsystem $NQN) 18 | [[ `echo $nslist | jq -r '.status'` == "0" ]] 19 | [[ `echo $nslist | jq -r '.subsystem_nqn'` == "${NQN}" ]] 20 | [[ `echo $nslist | jq -r '.namespaces[0].nsid'` == "1" ]] 21 | [[ `echo $nslist | jq -r '.namespaces[0].rbd_image_size'` == "20971520" ]] 22 | [[ `echo $nslist | jq -r '.namespaces[0].image_was_shrunk'` == "false" ]] 23 | [[ `echo $nslist | jq -r '.namespaces[1]'` == "null" ]] 24 | 25 | echo "ℹ️ shrink image to 19MB" 26 | make -s exec SVC=ceph OPTS=-T CMD="rbd --pool rbd resize shrink_image --size 19MB --allow-shrink" 27 | sleep 120 28 | nslist=$(cephnvmf_func --output stdio --format json namespace list --subsystem $NQN) 29 | [[ `echo $nslist | jq -r '.status'` == "0" ]] 30 | [[ `echo $nslist | jq -r '.subsystem_nqn'` == "${NQN}" ]] 31 | [[ `echo $nslist | jq -r '.namespaces[0].nsid'` == "1" ]] 32 | [[ `echo $nslist | jq -r '.namespaces[0].rbd_image_size'` == "19922944" ]] 33 | [[ `echo $nslist | jq -r '.namespaces[0].image_was_shrunk'` == "true" ]] 34 | [[ `echo $nslist | jq -r '.namespaces[1]'` == "null" ]] 35 | 36 | echo "ℹ️ resize namespace using CLI" 37 | cephnvmf_func namespace resize --subsystem "${NQN}" --nsid 1 --size 25MB 38 | nslist=$(cephnvmf_func --output stdio --format json namespace list --subsystem $NQN) 39 | [[ `echo $nslist | jq -r '.status'` == "0" ]] 40 | [[ `echo $nslist | jq -r '.subsystem_nqn'` == "${NQN}" ]] 41 | [[ `echo $nslist | jq -r '.namespaces[0].nsid'` == "1" ]] 42 | [[ `echo $nslist | jq -r '.namespaces[0].image_was_shrunk'` == "false" ]] 43 | [[ `echo $nslist | jq -r '.namespaces[1]'` == "null" ]] 44 | 45 | echo "ℹ️ shrink image to 15MB" 46 | make -s exec SVC=ceph OPTS=-T CMD="rbd --pool rbd resize shrink_image --size 15MB --allow-shrink" 47 | sleep 120 48 | nslist=$(cephnvmf_func --output stdio --format json namespace list --subsystem $NQN) 49 | [[ `echo $nslist | jq -r '.status'` == "0" ]] 50 | [[ `echo $nslist | jq -r '.subsystem_nqn'` == "${NQN}" ]] 51 | [[ `echo $nslist | jq -r '.namespaces[0].nsid'` == "1" ]] 52 | [[ `echo $nslist | jq -r '.namespaces[0].image_was_shrunk'` == "true" ]] 53 | [[ `echo $nslist | jq -r '.namespaces[1]'` == "null" ]] 54 | 55 | echo "ℹ️ refresh size" 56 | cephnvmf_func namespace refresh_size --subsystem "${NQN}" --nsid 1 57 | nslist=$(cephnvmf_func --output stdio --format json namespace list --subsystem $NQN --nsid 1) 58 | [[ `echo $nslist | jq -r '.status'` == "0" ]] 59 | [[ `echo $nslist | jq -r '.subsystem_nqn'` == "${NQN}" ]] 60 | [[ `echo $nslist | jq -r '.namespaces[0].nsid'` == "1" ]] 61 | [[ `echo $nslist | jq -r '.namespaces[0].rbd_image_size'` == "15728640" ]] 62 | [[ `echo $nslist | jq -r '.namespaces[0].image_was_shrunk'` == "true" ]] 63 | [[ `echo $nslist | jq -r '.namespaces[1]'` == "null" ]] 64 | 65 | echo "ℹ️ disable SPDK notification handling" 66 | sed -i 's/^.*notifications_interval.*$/notifications_interval=0/' ceph-nvmeof.conf 67 | container_id=$(docker ps -q -f name=nvmeof) 68 | docker restart ${container_id} 69 | sleep 20 70 | nslist=$(cephnvmf_func --output stdio --format json namespace list --subsystem $NQN) 71 | [[ `echo $nslist | jq -r '.status'` == "0" ]] 72 | [[ `echo $nslist | jq -r '.subsystem_nqn'` == "${NQN}" ]] 73 | [[ `echo $nslist | jq -r '.namespaces[0].nsid'` == "1" ]] 74 | [[ `echo $nslist | jq -r '.namespaces[0].image_was_shrunk'` == "false" ]] 75 | [[ `echo $nslist | jq -r '.namespaces[1]'` == "null" ]] 76 | 77 | echo "ℹ️ shrink image to 10MB" 78 | make -s exec SVC=ceph OPTS=-T CMD="rbd --pool rbd resize shrink_image --size 10MB --allow-shrink" 79 | sleep 120 80 | nslist=$(cephnvmf_func --output stdio --format json namespace list --subsystem $NQN) 81 | [[ `echo $nslist | jq -r '.status'` == "0" ]] 82 | [[ `echo $nslist | jq -r '.subsystem_nqn'` == "${NQN}" ]] 83 | [[ `echo $nslist | jq -r '.namespaces[0].nsid'` == "1" ]] 84 | [[ `echo $nslist | jq -r '.namespaces[0].image_was_shrunk'` == "false" ]] 85 | [[ `echo $nslist | jq -r '.namespaces[1]'` == "null" ]] 86 | -------------------------------------------------------------------------------- /tests/ha/state_transitions.sh: -------------------------------------------------------------------------------- 1 | set -xe 2 | rpc=/usr/libexec/spdk/scripts/rpc.py 3 | cmd=nvmf_subsystem_get_listeners 4 | nqn=nqn.2016-06.io.spdk:cnode1 5 | 6 | expect_optimized() { 7 | GW_NAME=$1 8 | EXPECTED_OPTIMIZED=$2 9 | 10 | socket=$(docker exec "$GW_NAME" find /var/tmp -name spdk.sock) 11 | # Verify expected number of "optimized" 12 | while true; do 13 | response=$(docker exec "$GW_NAME" "$rpc" "-s" "$socket" "$cmd" "$nqn") 14 | ana_states=$(echo "$response" | jq -r '.[0].ana_states') 15 | 16 | # Count the number of "optimized" groups 17 | optimized_count=$(jq -nr --argjson ana_states "$ana_states" '$ana_states | map(select(.ana_state == "optimized")) | length') 18 | 19 | # Check if there is expected number of "optimized" group 20 | if [ "$optimized_count" -eq "$EXPECTED_OPTIMIZED" ]; then 21 | # Iterate through JSON array 22 | for item in $(echo "$ana_states" | jq -c '.[]'); do 23 | ana_group=$(echo "$item" | jq -r '.ana_group') 24 | ana_state=$(echo "$item" | jq -r '.ana_state') 25 | 26 | # Check if ana_state is "optimized" 27 | if [ "$ana_state" = "optimized" ]; then 28 | echo "$ana_group" 29 | fi 30 | done 31 | break 32 | else 33 | sleep 1 34 | continue 35 | fi 36 | done 37 | } 38 | 39 | # GW name by index 40 | gw_name() { 41 | i=$1 42 | docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /'$i'/ {print $1}' 43 | } 44 | 45 | # Function to access numbers by index 46 | access_number_by_index() { 47 | numbers=$1 48 | index=$(expr $2 + 1) 49 | number=$(echo "$numbers" | awk -v idx="$index" 'NR == idx {print}') 50 | echo "$number" 51 | } 52 | 53 | # verify that given numbers must be either 1 and 2 or 2 and 1 54 | verify_ana_groups() { 55 | nr1=$1 56 | nr2=$2 57 | 58 | if [ "$nr1" -eq 1 ] && [ "$nr2" -eq 2 ]; then 59 | echo "Verified: first is 1 and second is 2" 60 | elif [ "$nr1" -eq 2 ] && [ "$nr2" -eq 1 ]; then 61 | echo "Verified: first is 2 and second is 1" 62 | else 63 | echo "Invalid numbers: first and second must be either 1 and 2 or 2 and 1" 64 | exit 1 65 | fi 66 | } 67 | 68 | verify_blocklist() { 69 | stopped_gw_name=$1 70 | NODE_IP=$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' $stopped_gw_name) 71 | BLOCKLIST=$(docker compose exec -T ceph ceph osd blocklist ls) 72 | 73 | echo "verifying there is at least 1 entry in the blocklist related to the stopped gateway" 74 | if echo "$BLOCKLIST" | grep -q "$NODE_IP"; then 75 | echo "ip $NODE_IP for the stopped gateway was found the blocklist." 76 | else 77 | echo "ip $NODE_IP for node the stopped gateway was not found in blocklist." 78 | exit 1 79 | fi 80 | 81 | echo "verifying there are no entries in the blocklist which are not related to the stopped gateway" 82 | if echo "$BLOCKLIST" | grep -qv "$NODE_IP"; then 83 | echo "found at least 1 entry in blocklist which is not related to gateway in the stopped gateway. failing" 84 | exit 1 85 | else 86 | echo "didn't find unexpected entries which are not relaetd to the stopped gateway." 87 | fi 88 | echo "blocklist verification successful" 89 | } 90 | 91 | # 92 | # MAIN 93 | # 94 | GW1_NAME=$(gw_name 1) 95 | GW2_NAME=$(gw_name 2) 96 | 97 | # 98 | # Step 1 validate both gave are optimized for one of ANA groups 1 and 2 99 | # 100 | GW1_OPTIMIZED=$(expect_optimized $GW1_NAME 1) 101 | gw1_ana=$(access_number_by_index "$GW1_OPTIMIZED" 0) 102 | 103 | GW2_OPTIMIZED=$(expect_optimized $GW2_NAME 1) 104 | gw2_ana=$(access_number_by_index "$GW2_OPTIMIZED" 0) 105 | 106 | verify_ana_groups "$gw1_ana" "$gw2_ana" 107 | 108 | # 109 | # Step 2 failover 110 | # 111 | echo "Stop gw $GW2_NAME" 112 | docker stop $GW2_NAME 113 | 114 | docker ps 115 | 116 | GW1_FAILOVER_OPTIMIZED=$(expect_optimized $GW1_NAME 2) 117 | gw1_ana1=$(access_number_by_index "$GW1_FAILOVER_OPTIMIZED" 0) 118 | gw1_ana2=$(access_number_by_index "$GW1_FAILOVER_OPTIMIZED" 1) 119 | verify_ana_groups "$gw1_ana1" "$gw1_ana2" 120 | verify_blocklist "$GW2_NAME" 121 | 122 | # 123 | # Step 3 failback 124 | # 125 | echo "Start gw $GW2_NAME" 126 | docker start $GW2_NAME 127 | 128 | docker ps 129 | 130 | GW1_OPTIMIZED=$(expect_optimized $GW1_NAME 1) 131 | gw1_ana=$(access_number_by_index "$GW1_OPTIMIZED" 0) 132 | 133 | GW2_OPTIMIZED=$(expect_optimized $GW2_NAME 1) 134 | gw2_ana=$(access_number_by_index "$GW2_OPTIMIZED" 0) 135 | 136 | verify_ana_groups "$gw1_ana" "$gw2_ana" 137 | -------------------------------------------------------------------------------- /control/config.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2021 International Business Machines 3 | # All rights reserved. 4 | # 5 | # SPDX-License-Identifier: LGPL-3.0-or-later 6 | # 7 | # Authors: anita.shekar@ibm.com, sandy.kaur@ibm.com 8 | # 9 | 10 | import configparser 11 | import os 12 | 13 | 14 | class GatewayConfig: 15 | """Loads and returns config file settings. 16 | 17 | Instance attributes: 18 | config: Config parser object 19 | """ 20 | 21 | CEPH_RUN_DIRECTORY = "/var/run/ceph/" 22 | 23 | def __init__(self, conffile): 24 | self.filepath = conffile 25 | self.conffile_logged = False 26 | self.env_shown = False 27 | with open(conffile) as f: 28 | self.config = configparser.ConfigParser() 29 | self.config.read_file(f) 30 | 31 | def is_param_defined(self, section, param): 32 | if self.config.has_section(section): 33 | return self.config.has_option(section, param) 34 | return False 35 | 36 | def get(self, section, param): 37 | return self.config.get(section, param) 38 | 39 | def getboolean(self, section, param): 40 | return self.config.getboolean(section, param) 41 | 42 | def getint(self, section, param): 43 | return self.config.getint(section, param) 44 | 45 | def getfloat(self, section, param): 46 | return self.config.getfloat(section, param) 47 | 48 | def get_with_default(self, section, param, value): 49 | return self.config.get(section, param, fallback=value) 50 | 51 | def getboolean_with_default(self, section, param, value): 52 | return self.config.getboolean(section, param, fallback=value) 53 | 54 | def getint_with_default(self, section, param, value): 55 | return self.config.getint(section, param, fallback=value) 56 | 57 | def getfloat_with_default(self, section, param, value): 58 | return self.config.getfloat(section, param, fallback=value) 59 | 60 | def dump_config_file(self, logger): 61 | if self.conffile_logged: 62 | return 63 | 64 | try: 65 | logger.info(f"Using configuration file {self.filepath}") 66 | with open(self.filepath) as f: 67 | logger.info( 68 | "====================================== Configuration file content " 69 | "======================================") 70 | for line in f: 71 | line = line.rstrip() 72 | logger.info(f"{line}") 73 | logger.info( 74 | "=========================================================" 75 | "===============================================") 76 | self.conffile_logged = True 77 | except Exception: 78 | pass 79 | 80 | def display_environment_info(self, logger): 81 | if self.env_shown: 82 | return 83 | 84 | ver = os.getenv("NVMEOF_VERSION") 85 | if ver: 86 | logger.info(f"Using NVMeoF gateway version {ver}") 87 | spdk_ver = os.getenv("NVMEOF_SPDK_VERSION") 88 | if spdk_ver: 89 | logger.info(f"Configured SPDK version {spdk_ver}") 90 | ceph_ver = os.getenv("NVMEOF_CEPH_VERSION") 91 | if ceph_ver: 92 | logger.info(f"Using vstart cluster version based on {ceph_ver}") 93 | build_date = os.getenv("BUILD_DATE") 94 | if build_date: 95 | logger.info(f"NVMeoF gateway built on: {build_date}") 96 | git_rep = os.getenv("NVMEOF_GIT_REPO") 97 | if git_rep: 98 | logger.info(f"NVMeoF gateway Git repository: {git_rep}") 99 | git_branch = os.getenv("NVMEOF_GIT_BRANCH") 100 | if git_branch: 101 | logger.info(f"NVMeoF gateway Git branch: {git_branch}") 102 | git_commit = os.getenv("NVMEOF_GIT_COMMIT") 103 | if git_commit: 104 | logger.info(f"NVMeoF gateway Git commit: {git_commit}") 105 | git_modified = os.getenv("NVMEOF_GIT_MODIFIED_FILES") 106 | if git_modified: 107 | logger.info(f"NVMeoF gateway uncommitted modified files: {git_modified}") 108 | git_spdk_rep = os.getenv("SPDK_GIT_REPO") 109 | if git_spdk_rep: 110 | logger.info(f"SPDK Git repository: {git_spdk_rep}") 111 | git_spdk_branch = os.getenv("SPDK_GIT_BRANCH") 112 | if git_spdk_branch: 113 | logger.info(f"SPDK Git branch: {git_spdk_branch}") 114 | git_spdk_commit = os.getenv("SPDK_GIT_COMMIT") 115 | if git_spdk_commit: 116 | logger.info(f"SPDK Git commit: {git_spdk_commit}") 117 | self.env_shown = True 118 | -------------------------------------------------------------------------------- /tests/ha/ns_lb_change.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -xe 3 | # See 4 | # - https://github.com/spdk/spdk/blob/master/doc/jsonrpc.md 5 | # - https://spdk.io/doc/nvmf_multipath_howto.html 6 | 7 | GW1_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /1/ {print $1}') 8 | GW2_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /2/ {print $1}') 9 | 10 | ip="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW1_NAME")" 11 | ip2="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW2_NAME")" 12 | 13 | calc_written_bytes_in_sec() 14 | { 15 | IP=$1 16 | num_bytes=$(docker compose run --rm nvmeof-cli --server-address $IP --server-port 5500 --output stdio --format json namespace get_io_stats -n nqn.2016-06.io.spdk:cnode1 --nsid 1 | jq '.bytes_written'| sed 's/[^0-9]*//g'); 17 | 18 | sleep 1; 19 | num_bytes1=$(docker compose run --rm nvmeof-cli --server-address $IP --server-port 5500 --output stdio --format json namespace get_io_stats -n nqn.2016-06.io.spdk:cnode1 --nsid 1 | jq '.bytes_written'| sed 's/[^0-9]*//g'); 20 | 21 | res=$(expr $num_bytes1 - $num_bytes ); 22 | #echo "Bytes written in sec: $res"; 23 | if [ "$res" -gt 0 ]; then 24 | # limit values to boolean for simplify futher analysis 25 | res=1; 26 | else 27 | res=0; 28 | fi; 29 | echo "$res"; 30 | } 31 | 32 | 33 | echo -n "ℹ️ Starting bdevperf container" 34 | docker compose up -d bdevperf 35 | sleep 10 36 | echo "ℹ️ bdevperf start up logs" 37 | make logs SVC=bdevperf 38 | BDEVPERF_SOCKET=/tmp/bdevperf.sock 39 | NVMEOF_DISC_PORT=8009 40 | 41 | 42 | echo "ℹ️ Using discovery service in gateway $GW1 ip $ip" 43 | rpc="/usr/libexec/spdk/scripts/rpc.py" 44 | echo "ℹ️ bdevperf bdev_nvme_set_options" 45 | make exec SVC=bdevperf OPTS=-T CMD="$rpc -v -s $BDEVPERF_SOCKET bdev_nvme_set_options -r -1" 46 | echo "ℹ️ bdevperf start discovery ip: $ip port: $NVMEOF_DISC_PORT" 47 | make exec SVC=bdevperf OPTS=-T CMD="$rpc -v -s $BDEVPERF_SOCKET bdev_nvme_start_discovery -b Nvme0 -t tcp -a $ip -s $NVMEOF_DISC_PORT -f ipv4 -w" 48 | echo "ℹ️ bdevperf bdev_nvme_get_discovery_info" 49 | make exec SVC=bdevperf OPTS=-T CMD="$rpc -v -s $BDEVPERF_SOCKET bdev_nvme_get_discovery_info" 50 | echo "ℹ️ bdevperf perform_tests" 51 | eval $(make run SVC=bdevperf OPTS="--entrypoint=env" | grep BDEVPERF_TEST_DURATION | tr -d '\n\r' ) 52 | 53 | timeout=$(expr $BDEVPERF_TEST_DURATION \* 2) 54 | 55 | echo $timeout 56 | bdevperf="/usr/libexec/spdk/scripts/bdevperf.py" 57 | echo "run io test" 58 | make exec SVC=bdevperf OPTS=-T CMD="$bdevperf -v -t $timeout -s $BDEVPERF_SOCKET perform_tests" & 59 | #test write ios for the ns1 60 | 61 | ( 62 | sleep 8; 63 | lb_group=1; 64 | 65 | docker compose run -T --rm nvmeof-cli --server-address $ip --server-port 5500 namespace change_load_balancing_group -n nqn.2016-06.io.spdk:cnode1 --nsid 1 --load-balancing-group $lb_group; 66 | priv_res1=$(calc_written_bytes_in_sec $ip) ; 67 | 68 | echo "ℹ️ written bytes through $ip $priv_res1 "; 69 | 70 | priv_res2=$(calc_written_bytes_in_sec $ip2); 71 | 72 | echo "ℹ️ written bytes through $ip2 $priv_res2 "; 73 | 74 | 75 | for i in $(seq 6); do 76 | if [ $lb_group -eq 1 ]; then 77 | lb_group=2 78 | IP=$ip 79 | else 80 | lb_group=1 81 | IP=$ip2 82 | fi; 83 | 84 | echo "ℹ️ ℹ️ Change lb group of ns 1 to $lb_group :" ; 85 | docker compose run -T --rm nvmeof-cli --server-address $IP --server-port 5500 namespace change_load_balancing_group -n nqn.2016-06.io.spdk:cnode1 --nsid 1 --load-balancing-group $lb_group; 86 | sleep 4; 87 | 88 | res1=$(calc_written_bytes_in_sec $ip) ; 89 | 90 | echo "ℹ️ written bytes through $ip ?: $res1"; 91 | 92 | res2=$(calc_written_bytes_in_sec $ip2) ; 93 | 94 | echo "ℹ️ written bytes through $ip2 ?: $res2 "; 95 | echo "ℹ️ ℹ️ ℹ️ DEBUG iteration $i : priv_res1 and res1 : $priv_res1 , $res1 , priv_res2 and res2 : $priv_res2 , $res2 "; 96 | 97 | #check that io is switched each iteration to different Gateway 98 | if [ $res1 -eq $res2 ]; then 99 | echo " ℹ️ ℹ️ ℹ️ res1 and res2 : $res1 $res2 "; 100 | exit 1 #both eq 0 - no traffic at all 101 | fi; 102 | 103 | if [ $res1 -ne $priv_res1 ] && [ $res2 -ne $priv_res2 ]; then 104 | echo " ℹ️ ℹ️ Valid traffic results"; 105 | else 106 | echo "ℹ️ ℹ️ ℹ️ Not valid checks !!! : priv_res1 and res1 : $priv_res1 $res1 , priv_res2 and res2 : $priv_res2 $res2 "; 107 | exit 1; 108 | fi; 109 | 110 | priv_res1=$res1; 111 | priv_res2=$res2; 112 | 113 | done; 114 | 115 | 116 | echo "wait for join"; 117 | 118 | ) & 119 | 120 | wait 121 | exit 0 122 | -------------------------------------------------------------------------------- /tests/test_auto_listeners.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from control.server import GatewayServer 3 | import socket 4 | from control.cli import main as cli 5 | from control.cli import main_test as cli_test 6 | from control.cephutils import CephUtils 7 | import grpc 8 | from control.proto import gateway_pb2_grpc as pb2_grpc 9 | import time 10 | 11 | pool = "rbd" 12 | subsystem = "nqn.2016-06.io.spdk:cnode1" 13 | subsystem2 = "nqn.2016-06.io.spdk:cnode2" 14 | subsystem3 = "nqn.2016-06.io.spdk:cnode3" 15 | 16 | host_name = socket.gethostname() 17 | addr = "127.0.0.1" 18 | addr_ipv6 = "::1" 19 | config = "ceph-nvmeof.conf" 20 | group_name = "GROUPNAME" 21 | 22 | 23 | @pytest.fixture(scope="module") 24 | def gateway(config): 25 | """Sets up and tears down Gateway""" 26 | 27 | addr = config.get("gateway", "addr") 28 | port = config.getint("gateway", "port") 29 | config.config["gateway"]["group"] = group_name 30 | config.config["gateway-logs"]["log_level"] = "debug" 31 | ceph_utils = CephUtils(config) 32 | 33 | with GatewayServer(config) as gateway: 34 | 35 | # Start gateway 36 | gateway.gw_logger_object.set_log_level("debug") 37 | ceph_utils.execute_ceph_monitor_command( 38 | "{" + f'"prefix":"nvme-gw create", "id": "{gateway.name}", "pool": "{pool}", ' 39 | f'"group": "{group_name}"' + "}" 40 | ) 41 | gateway.serve() 42 | 43 | # Bind the client and Gateway 44 | channel = grpc.insecure_channel(f"{addr}:{port}") 45 | stub = pb2_grpc.GatewayStub(channel) 46 | yield gateway.gateway_rpc, stub 47 | 48 | # Stop gateway 49 | gateway.server.stop(grace=1) 50 | gateway.gateway_rpc.gateway_state.delete_state() 51 | 52 | 53 | class TestAutoListener: 54 | def test_auto_listener_ipv4(self, caplog, gateway): 55 | cli(["subsystem", "list"]) 56 | caplog.clear() 57 | cli(["subsystem", "add", "--subsystem", subsystem, "--no-group-append", 58 | '--network-mask', f'{addr}/24']) 59 | assert f"Adding subsystem {subsystem}: Successful" in caplog.text 60 | assert "ipv4" in caplog.text.lower() 61 | assert (f"Automatically created listener at {addr}:4420 for {subsystem}" 62 | in caplog.text) 63 | 64 | def test_auto_listener_secure(self, caplog, gateway): 65 | caplog.clear() 66 | cli(["subsystem", "add", "--subsystem", subsystem2, "--no-group-append", 67 | '--network-mask', f'{addr}/24', '--secure-listeners']) 68 | assert f"Adding subsystem {subsystem2}: Successful" in caplog.text 69 | assert "ipv4" in caplog.text.lower() 70 | assert (f"Automatically created listener at {addr}:4420 for {subsystem2}" 71 | in caplog.text) 72 | 73 | def test_auto_listener_ipv6(self, caplog, gateway): 74 | caplog.clear() 75 | cli(["subsystem", "add", "--subsystem", subsystem3, "--no-group-append", 76 | '--network-mask', f'{addr_ipv6}/120']) 77 | assert f"Adding subsystem {subsystem3}: Successful" in caplog.text 78 | assert "ipv6" in caplog.text.lower() 79 | assert (f"Automatically created listener at [{addr_ipv6}]:4420 for {subsystem3}" 80 | in caplog.text) 81 | 82 | def test_auto_listener_list_ipv4(self, caplog, gateway): 83 | cli(["subsystem", "list"]) 84 | time.sleep(30) 85 | caplog.clear() 86 | listeners = cli_test(["listener", "list", "--subsystem", subsystem]) 87 | assert listeners.listeners[0].trtype == "TCP" 88 | assert listeners.listeners[0].traddr == addr 89 | assert listeners.listeners[0].trsvcid == 4420 90 | assert listeners.listeners[0].active 91 | assert not listeners.listeners[0].secure 92 | assert not listeners.listeners[0].manual 93 | 94 | def test_auto_listener_list_secure(self, caplog, gateway): 95 | caplog.clear() 96 | listeners = cli_test(["listener", "list", "--subsystem", subsystem2]) 97 | assert listeners.listeners[0].trtype == "TCP" 98 | assert listeners.listeners[0].traddr == addr 99 | assert listeners.listeners[0].trsvcid == 4420 100 | assert listeners.listeners[0].active 101 | assert listeners.listeners[0].secure 102 | assert not listeners.listeners[0].manual 103 | 104 | def test_auto_listener_list_ipv6(self, caplog, gateway): 105 | caplog.clear() 106 | listeners = cli_test(["listener", "list", "--subsystem", subsystem3]) 107 | assert listeners.listeners[0].trtype == "TCP" 108 | assert listeners.listeners[0].traddr == addr_ipv6 109 | assert listeners.listeners[0].trsvcid == 4420 110 | assert listeners.listeners[0].active 111 | assert not listeners.listeners[0].secure 112 | assert not listeners.listeners[0].manual 113 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | # Globals 2 | VERSION="1.6.2" 3 | CEPH_VERSION="20.2.0" 4 | SPDK_VERSION="25.09" 5 | CONTAINER_REGISTRY="quay.io/ceph" 6 | QUAY_SPDK="${CONTAINER_REGISTRY}/spdk" 7 | QUAY_CEPH="${CONTAINER_REGISTRY}/vstart-cluster" 8 | QUAY_NVMEOF="${CONTAINER_REGISTRY}/nvmeof" 9 | QUAY_NVMEOFCLI="${CONTAINER_REGISTRY}/nvmeof-cli" 10 | MAINTAINER="Ceph Developers " 11 | COMPOSE_PROJECT_NAME="ceph-nvmeof" 12 | NVMEOF_CONTAINER_NAME="${COMPOSE_PROJECT_NAME}-nvmeof-1" 13 | # Note(xin3liang): docker-compose doesn't yet support list/array variable 14 | # substitution. See https://github.com/docker/compose/issues/4249. 15 | # Because of this issue and the CPU arch-related build parameters 16 | # (SPDK_TARGET_ARCH, SPDK_MAKEFLAGS, and CEPH_CLUSTER_CEPH_REPO_BASEURL) 17 | # haven't been decided at the container build time yet, currently only 18 | # supports building one platform at a time. 19 | TARGET_PLATFORM="linux/amd64" 20 | 21 | # Performance 22 | NVMEOF_NOFILE=20480 # Max number of open files (depends on number of hosts connected) 23 | HUGEPAGES=2048 # 4 GB 24 | HUGEPAGES_DIR="/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages" 25 | 26 | # NVMe-oF 27 | NVMEOF_VERSION="${VERSION}" 28 | NVMEOF_CONFIG="./ceph-nvmeof.conf" 29 | NVMEOF_SPDK_VERSION="${SPDK_VERSION}" 30 | NVMEOF_CEPH_VERSION="${CEPH_VERSION}" 31 | NVMEOF_NAME="ceph-nvmeof" 32 | NVMEOF_SUMMARY="Ceph NVMe over Fabrics Gateway" 33 | NVMEOF_DESCRIPTION="Service to provide block storage on top of Ceph for platforms (e.g.: VMWare) without native Ceph support (RBD), replacing existing approaches (iSCSI) with a newer and more versatile standard (NVMe-oF)." 34 | NVMEOF_URL="https://github.com/ceph/ceph-nvmeof" 35 | NVMEOF_TAGS="ceph,nvme-of,nvme-of gateway,rbd,block storage" 36 | NVMEOF_WANTS="ceph,rbd" 37 | NVMEOF_IP_ADDRESS=192.168.13.3 38 | NVMEOF_IPV6_ADDRESS=2001:db8::3 39 | NVMEOF_IO_PORT=4420 40 | NVMEOF_GW_PORT=5500 41 | NVMEOF_DISC_PORT=8009 42 | NVMEOF_PROMETHEUS_PORT=10008 43 | NVMEOF_EXPOSE_SERVICES="${NVMEOF_IO_PORT}/tcp:nvme,${NVMEOF_GW_PORT}/tcp:grpc,${NVMEOF_DISC_PORT}/tcp:nvme-disc,${NVMEOF_PROMETHEUS_PORT}/tcp:prom" 44 | 45 | # NVMe-oF CLI 46 | NVMEOF_CLI_VERSION="${VERSION}" 47 | NVMEOF_CLI_NAME="ceph-nvmeof-cli" 48 | NVMEOF_CLI_SUMMARY="Ceph NVMe over Fabrics CLI" 49 | NVMEOF_CLI_DESCRIPTION="Command line interface for Ceph NVMe over Fabrics Gateway" 50 | 51 | # SPDK 52 | SPDK_CEPH_VERSION="${CEPH_VERSION}" 53 | SPDK_NAME="SPDK" 54 | SPDK_SUMMARY="Build Ultra High-Performance Storage Applications with the Storage Performance Development Kit" 55 | SPDK_DESCRIPTION="The Storage Performance Development Kit (SPDK) provides a set of tools and libraries for writing high performance, scalable, user-mode storage applications" 56 | SPDK_URL="https://spdk.io" 57 | 58 | SPDK_PKGDEP_ARGS="--rbd" 59 | # check spdk/configure --help 60 | SPDK_CONFIGURE_ARGS="--with-idxd --with-rbd --disable-tests --disable-unit-tests --disable-examples --enable-debug" 61 | SPDK_TARGET_ARCH="x86-64-v2" 62 | SPDK_MAKEFLAGS= 63 | SPDK_CENTOS_BASE="https://mirror.stream.centos.org/9-stream/BaseOS/x86_64/os/Packages/" 64 | SPDK_CENTOS_REPO_VER="9.0-30.el9" 65 | 66 | # Ceph Cluster 67 | CEPH_CLUSTER_VERSION="${CEPH_VERSION}" 68 | CEPH_BRANCH=main 69 | CEPH_SHA=latest 70 | 71 | CEPH_DEVEL_MGR_PATH=../ceph 72 | 73 | # Atom 74 | ATOM_SHA=2fd1f4373c0e4ddfa683fb96c98c548e1b515dac 75 | 76 | # Demo settings 77 | RBD_POOL=rbd 78 | RBD_IMAGE_NAME=demo_image 79 | RBD_IMAGE_SIZE=10MB 80 | NQN="nqn.2016-06.io.spdk:cnode1" 81 | SERIAL="SPDK00000000000001" 82 | 83 | # Container names in docker compose environent 84 | BDEVPERF_CONTAINER_NAME="${COMPOSE_PROJECT_NAME}-bdevperf-1" 85 | DISC1="${COMPOSE_PROJECT_NAME}-discovery-1" 86 | GW1="${COMPOSE_PROJECT_NAME}-nvmeof-1" 87 | GW2="${COMPOSE_PROJECT_NAME}-nvmeof-2" 88 | 89 | # Keys for security tests 90 | PSK_KEY1="NVMeTLSkey-1:01:YzrPElk4OYy1uUERriPwiiyEJE/+J5ckYpLB+5NHMsR2iBuT:" # generated using "nvme gen-tls-key" 91 | PSK_KEY2="NVMeTLSkey-1:01:vUrPe33Auz/sgAAcYctjI0oOOEFM5lheeLy7U+yTsD/LHm9q:" 92 | PSK_KEY3="NVMeTLSkey-1:01:IuIuyghntsi1iX5LdnlRp7MjON1QuYe4hELKYr5VwsL4AgHU:" 93 | 94 | DHCHAP_KEY1="DHHC-1:01:rPTE0Q73nd3hEqqEuQNaPL11G/aFXpOHtldWXz9vNCeef4WV:" # generated using "nvme gen-dhchap-key" 95 | DHCHAP_KEY2="DHHC-1:01:x7ecfGgIdOEl+J5cJ9JcZHOS2By2Me6eDJUnrsT9MVrCWRYV:" 96 | DHCHAP_KEY3="DHHC-1:01:eNNXGjidEHHStbUi2Gmpps0JcnofReFfy+NaulguGgt327hz:" 97 | DHCHAP_KEY4="DHHC-1:01:c8D8fVPP/wcuxxRCd8mdQQFjOWtjcS2KmspzvkeOEoF6SUm6:" 98 | DHCHAP_KEY5="DHHC-1:01:zNZ6nrs5JDIpqbH/ZP1VTAATxNf5i/rH44dci+vvjhsyI2ha:" 99 | DHCHAP_KEY6="DHHC-1:01:Bu4tZd7X2oW7XxmVH5tGCdoS30pDX6bZvexHYoudeVlJW9yz:" 100 | DHCHAP_KEY7="DHHC-1:01:JPJkDQ2po2FfLmKYlTF/sJ2HzVO/FKWxgXKE/H6XfL8ogQ1T:" 101 | DHCHAP_KEY8="DHHC-1:01:e0B0vDxKleDzYVtG42xqFvoWZfiufkoywmfRKrETzayRdf1j:" 102 | DHCHAP_KEY9="DHHC-1:01:KD+sfH3/o2bRQoV0ESjBUywQlMnSaYpZISUbVa0k0nsWpNST:" 103 | DHCHAP_KEY10="DHHC-1:00:rWf0ZFYO7IgWGttM8w6jUrAY4cTQyqyXPdmxHeOSve3w5QU9:" 104 | DHCHAP_KEY11="DHHC-1:02:j3uUz05r5aQy42vX4tDXqVf9HgUPPdEp3kXTgUWl9EphsG7jwpr9KSIt3bmRLXBijPTIDQ==:" 105 | -------------------------------------------------------------------------------- /tests/ha/auto_load_balance.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -xe 3 | # See 4 | # - https://github.com/spdk/spdk/blob/master/doc/jsonrpc.md 5 | # - https://spdk.io/doc/nvmf_multipath_howto.html 6 | 7 | GW1_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /1/ {print $1}') 8 | GW2_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /2/ {print $1}') 9 | GW3_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /3/ {print $1}') 10 | 11 | 12 | ip1="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW1_NAME")" 13 | ip2="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW2_NAME")" 14 | #ip3="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW3_NAME")" 15 | 16 | NQN1="nqn.2016-06.io.spdk:cnode1" 17 | NQN2="nqn.2016-06.io.spdk:cnode2" 18 | NQN3="nqn.2016-06.io.spdk:cnode3" 19 | 20 | NUM_SUBSYSTEMS=3 21 | MAX_NAMESPACE=58 22 | 23 | test_ns_distribution() 24 | { 25 | num_grps=$1 26 | prev_cnt=0 27 | first_cnt=1 28 | declare -A ana_load 29 | 30 | for group in $(seq "$num_grps"); do 31 | ana_load[$group]=0 32 | done 33 | 34 | for i in $(seq $NUM_SUBSYSTEMS); do 35 | NQN="nqn.2016-06.io.spdk:cnode$i" 36 | for group in $(seq $num_grps); do 37 | ns_list=$(docker compose run -T --rm nvmeof-cli --server-address $ip2 --server-port 5500 --output stdio --format json namespace list -n $NQN) 38 | #count=$(echo "$json"|jq '[.namespaces[] | select(.load_balancing_group == 1)]|length') 39 | count=$(echo "$ns_list" | jq --argjson group "$group" '[.namespaces[] | select(.load_balancing_group == $group)] | length') 40 | echo "namespaces of subsystem " $NQN " Ana-group " $group " = " $count 41 | ana_load[$group]=$(( ana_load[$group] + count )) 42 | if ((first_cnt == 1)); then 43 | first_cnt=0 44 | prev_cnt=$count 45 | else 46 | #compare count with prev_count 47 | diff=$(( count - prev_cnt )) 48 | diff=${diff#-} 49 | if (( diff > 2 )); then 50 | echo "ℹ️ ℹ️ Namespace Distribution issue" 51 | exit 1 52 | else 53 | echo "ℹ️ ℹ️ Compared OK!" 54 | fi 55 | fi 56 | done 57 | done 58 | min_val=10000 59 | max_val=0 60 | for group in "${!ana_load[@]}"; do 61 | val=${ana_load[$group]} 62 | if (( val < min_val )); then 63 | min_val=$val 64 | fi 65 | if (( val > max_val )); then 66 | max_val=$val 67 | fi 68 | done 69 | if (( max_val - min_val > 2 )); then 70 | echo "ℹ️ ℹ️ Namespace ANA group Distribution issue" 71 | exit 1 72 | else 73 | echo "ℹ️ ℹ️ ANA Compared OK!" 74 | fi 75 | } 76 | 77 | 78 | echo "ℹ️ ℹ️ Start test: create additional 3 subsystems and 3 listeners:" 79 | 80 | docker compose run -T --rm nvmeof-cli --server-address $ip1 --server-port 5500 subsystem add -n $NQN2 --no-group-append 81 | docker compose run -T --rm nvmeof-cli --server-address $ip1 --server-port 5500 subsystem add -n $NQN3 --no-group-append 82 | #docker compose run -T --rm nvmeof-cli --server-address $ip1 --server-port 5500 subsystem add -n $NQN3 --no-group-append 83 | sleep 2 84 | docker compose run --rm nvmeof-cli --server-address $ip1 --server-port 5500 listener add --subsystem $NQN2 --host-name $GW1_NAME --traddr $ip1 --trsvcid 4420 85 | docker compose run --rm nvmeof-cli --server-address $ip1 --server-port 5500 listener add --subsystem $NQN3 --host-name $GW1_NAME --traddr $ip1 --trsvcid 4420 86 | docker compose run --rm nvmeof-cli --server-address $ip2 --server-port 5500 listener add --subsystem $NQN2 --host-name $GW2_NAME --traddr $ip2 --trsvcid 4420 87 | docker compose run --rm nvmeof-cli --server-address $ip2 --server-port 5500 listener add --subsystem $NQN3 --host-name $GW2_NAME --traddr $ip2 --trsvcid 4420 88 | 89 | echo "ℹ️ ℹ️ Create namespaces with explicit LB = 1" 90 | 91 | 92 | for i in $(seq $NUM_SUBSYSTEMS); do 93 | NQN="nqn.2016-06.io.spdk:cnode$i" 94 | for num in $(seq $MAX_NAMESPACE); 95 | do 96 | image_name="demo_image$(expr \( $num + 5 \) \* $i)" 97 | echo $image_name 98 | docker compose run --rm nvmeof-cli --server-address $ip2 --server-port 5500 namespace add --subsystem $NQN --rbd-pool rbd --rbd-image $image_name --size 10M --rbd-create-image -l 1 --force 99 | done 100 | done 101 | 102 | #auto load balance is working , check distribution now and then after 2 minutes (300 ns rebalance takes ~ 4 mins) # subs=$(docker compose run -T --rm nvmeof-cli --server-address $ip1 --server-port 5500 --output stdio --format json get_subsystems 2>&1 | sed 's/Get subsystems://') 103 | echo "ℹ️ ℹ️ Wait for rebalance " 104 | sleep 250 105 | 106 | test_ns_distribution 2 107 | 108 | docker compose exec -T ceph ceph nvme-gw delete $GW1_NAME rbd '' 109 | echo "ℹ️ ℹ️ Wait for scale-down rebalance " 110 | sleep 110 111 | test_ns_distribution 1 112 | docker compose exec -T ceph ceph nvme-gw create $GW1_NAME rbd '' 113 | echo "ℹ️ ℹ️ Wait for rebalance after create GW" 114 | sleep 200 115 | test_ns_distribution 2 116 | 117 | ############################################################################################ 118 | 119 | echo "ℹ️ ℹ️ test passed" 120 | exit 0 121 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # syntax = docker/dockerfile:1.4 2 | 3 | ARG NVMEOF_SPDK_VERSION \ 4 | CONTAINER_REGISTRY \ 5 | NVMEOF_TARGET # either 'gateway' or 'cli' 6 | 7 | #------------------------------------------------------------------------------ 8 | # Base image for NVMEOF_TARGET=cli (nvmeof-cli) 9 | FROM registry.access.redhat.com/ubi9/ubi@sha256:66233eebd72bb5baa25190d4f55e1dc3fff3a9b77186c1f91a0abdb274452072 AS base-cli 10 | ENV GRPC_DNS_RESOLVER=native 11 | ENTRYPOINT ["python3", "-m", "control.cli"] 12 | CMD [] 13 | 14 | #------------------------------------------------------------------------------ 15 | # Base image for NVMEOF_TARGET=gateway (nvmeof-gateway) 16 | FROM ${CONTAINER_REGISTRY:-quay.io/ceph}/spdk:${NVMEOF_SPDK_VERSION:-NULL} AS base-gateway 17 | RUN \ 18 | --mount=type=cache,target=/var/cache/dnf \ 19 | --mount=type=cache,target=/var/lib/dnf \ 20 | dnf install -y python3-rados && \ 21 | dnf install -y python3-rbd && \ 22 | dnf install -y gdb && \ 23 | dnf config-manager --set-enabled crb && \ 24 | dnf install -y ceph-mon-client-nvmeof 25 | ENTRYPOINT ["python3", "-m", "control"] 26 | CMD ["-c", "/src/ceph-nvmeof.conf"] 27 | 28 | #------------------------------------------------------------------------------ 29 | # Intermediate layer for Python set-up 30 | FROM base-$NVMEOF_TARGET AS python-intermediate 31 | 32 | RUN \ 33 | --mount=type=cache,target=/var/cache/dnf \ 34 | --mount=type=cache,target=/var/lib/dnf \ 35 | dnf update -y 36 | 37 | ENV PYTHONUNBUFFERED=1 \ 38 | PYTHONIOENCODING=UTF-8 \ 39 | LC_ALL=C.UTF-8 \ 40 | LANG=C.UTF-8 \ 41 | PIP_NO_CACHE_DIR=off \ 42 | PYTHON_MAJOR=3 \ 43 | PYTHON_MINOR=9 \ 44 | PDM_PREFER_BINARY=:all: 45 | 46 | ARG APPDIR=/src 47 | 48 | ARG NVMEOF_NAME \ 49 | NVMEOF_SUMMARY \ 50 | NVMEOF_DESCRIPTION \ 51 | NVMEOF_URL \ 52 | NVMEOF_VERSION \ 53 | NVMEOF_MAINTAINER \ 54 | NVMEOF_TAGS \ 55 | NVMEOF_WANTS \ 56 | NVMEOF_EXPOSE_SERVICES \ 57 | BUILD_DATE \ 58 | NVMEOF_GIT_REPO \ 59 | NVMEOF_GIT_BRANCH \ 60 | NVMEOF_GIT_COMMIT \ 61 | NVMEOF_SPDK_VERSION \ 62 | NVMEOF_CEPH_VERSION \ 63 | NVMEOF_GIT_MODIFIED_FILES \ 64 | SPDK_GIT_REPO \ 65 | SPDK_GIT_BRANCH \ 66 | SPDK_GIT_COMMIT \ 67 | HUGEPAGES \ 68 | HUGEPAGES_DIR 69 | 70 | ENV NVMEOF_VERSION="${NVMEOF_VERSION}" \ 71 | NVMEOF_GIT_REPO="${NVMEOF_GIT_REPO}" \ 72 | NVMEOF_GIT_BRANCH="${NVMEOF_GIT_BRANCH}" \ 73 | NVMEOF_GIT_COMMIT="${NVMEOF_GIT_COMMIT}" \ 74 | BUILD_DATE="${BUILD_DATE}" \ 75 | NVMEOF_SPDK_VERSION="${NVMEOF_SPDK_VERSION}" \ 76 | NVMEOF_CEPH_VERSION="${NVMEOF_CEPH_VERSION}" \ 77 | NVMEOF_GIT_MODIFIED_FILES="${NVMEOF_GIT_MODIFIED_FILES}" \ 78 | SPDK_GIT_REPO="${SPDK_GIT_REPO}" \ 79 | SPDK_GIT_BRANCH="${SPDK_GIT_BRANCH}" \ 80 | SPDK_GIT_COMMIT="${SPDK_GIT_COMMIT}" \ 81 | HUGEPAGES="${HUGEPAGES}" \ 82 | HUGEPAGES_DIR="${HUGEPAGES_DIR}" 83 | 84 | # Generic labels 85 | LABEL name="$NVMEOF_NAME" \ 86 | version="$NVMEOF_VERSION" \ 87 | summary="$NVMEOF_SUMMARY" \ 88 | description="$NVMEOF_DESCRIPTION" \ 89 | maintainer="$NVMEOF_MAINTAINER" \ 90 | release="" \ 91 | url="$NVMEOF_URL" \ 92 | build-date="$BUILD_DATE" \ 93 | vcs-ref="$NVMEOF_GIT_COMMIT" 94 | 95 | # k8s-specific labels 96 | LABEL io.k8s.display-name="$NVMEOF_SUMMARY" \ 97 | io.k8s.description="$NVMEOF_DESCRIPTION" 98 | 99 | # k8s-specific labels 100 | LABEL io.openshift.tags="$NVMEOF_TAGS" \ 101 | io.openshift.wants="$NVMEOF_WANTS" \ 102 | io.openshift.expose-services="$NVMEOF_EXPOSE_SERVICES" 103 | 104 | # Ceph-specific labels 105 | LABEL io.ceph.component="$NVMEOF_NAME" \ 106 | io.ceph.summary="$NVMEOF_SUMMARY" \ 107 | io.ceph.description="$NVMEOF_DESCRIPTION" \ 108 | io.ceph.url="$NVMEOF_URL" \ 109 | io.ceph.version="$NVMEOF_VERSION" \ 110 | io.ceph.maintainer="$NVMEOF_MAINTAINER" \ 111 | io.ceph.git.repo="$NVMEOF_GIT_REPO" \ 112 | io.ceph.git.branch="$NVMEOF_GIT_BRANCH" \ 113 | io.ceph.git.commit="$NVMEOF_GIT_COMMIT" 114 | 115 | ENV PYTHONPATH=$APPDIR/__pypackages__/$PYTHON_MAJOR.$PYTHON_MINOR/lib 116 | 117 | WORKDIR $APPDIR 118 | 119 | #------------------------------------------------------------------------------ 120 | FROM python-intermediate AS builder-base 121 | ARG PDM_VERSION=2.17.3 \ 122 | PDM_INSTALL_CMD=sync \ 123 | PDM_INSTALL_FLAGS="-v --no-isolation --no-self --no-editable" \ 124 | PDM_INSTALL_DEV="" 125 | ENV PDM_INSTALL_FLAGS="$PDM_INSTALL_FLAGS $PDM_INSTALL_DEV" 126 | 127 | ENV PDM_CHECK_UPDATE=0 128 | 129 | # https://pdm.fming.dev/latest/usage/advanced/#use-pdm-in-a-multi-stage-dockerfile 130 | RUN \ 131 | --mount=type=cache,target=/var/cache/dnf \ 132 | --mount=type=cache,target=/var/lib/dnf \ 133 | dnf install -y python3-pip && \ 134 | dnf install -y gcc python3-devel 135 | RUN \ 136 | --mount=type=cache,target=/root/.cache/pip \ 137 | pip install -U pip setuptools wheel 138 | 139 | RUN \ 140 | --mount=type=cache,target=/root/.cache/pip \ 141 | pip install pdm==$PDM_VERSION 142 | 143 | #------------------------------------------------------------------------------ 144 | FROM builder-base AS builder 145 | 146 | COPY pyproject.toml pdm.lock pdm.toml ./ 147 | RUN \ 148 | --mount=type=cache,target=/root/.cache/pdm \ 149 | pdm "$PDM_INSTALL_CMD" $PDM_INSTALL_FLAGS 150 | 151 | COPY . . 152 | RUN pdm run protoc 153 | 154 | #------------------------------------------------------------------------------ 155 | FROM python-intermediate 156 | COPY --from=builder $APPDIR . 157 | -------------------------------------------------------------------------------- /tests/ha/rbd_qos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function cephnvmf_func() 4 | { 5 | /usr/bin/docker compose run --rm nvmeof-cli --server-address ${NVMEOF_IP_ADDRESS} --server-port ${NVMEOF_GW_PORT} $@ 6 | } 7 | 8 | . .env 9 | 10 | set -e 11 | set -x 12 | 13 | echo "ℹ️ set global RBD QOS limit value" 14 | make -s exec SVC=ceph OPTS=-T CMD="rbd config global set global rbd_qos_iops_limit 20" 15 | 16 | echo "ℹ️ create resources" 17 | cephnvmf_func subsystem add --subsystem ${NQN} --no-group-append 18 | cephnvmf_func namespace add --subsystem ${NQN} --rbd-pool ${RBD_POOL} --rbd-image ${RBD_IMAGE_NAME} --size ${RBD_IMAGE_SIZE} --rbd-create-image 19 | 20 | echo "ℹ️ try setting namespace QOS value" 21 | set +e 22 | cephnvmf_func --output stdio namespace set_qos --subsystem ${NQN} --nsid 1 --rw-megabytes-per-second 30 > /dev/null 2> /tmp/qos.err 23 | if [[ $? -eq 0 ]]; then 24 | echo "Setting QOS with RBD QOS attribute set should fail" 25 | exit 1 26 | fi 27 | set -e 28 | grep "Failure setting QOS limits for namespace 1 on ${NQN}: QOS limits were changed for RBD image ${RBD_POOL}/${RBD_IMAGE_NAME}" /tmp/qos.err 29 | rm -f /tmp/qos.err 30 | ns_list=$(cephnvmf_func --output stdio --format json namespace list --subsystem ${NQN}) 31 | [[ `echo $ns_list | jq -r '.status'` == "0" ]] 32 | [[ `echo $ns_list | jq -r '.subsystem_nqn'` == "${NQN}" ]] 33 | [[ `echo $ns_list | jq -r '.namespaces[0].nsid'` == "1" ]] 34 | [[ `echo $ns_list | jq -r '.namespaces[0].rw_ios_per_second'` == "0" ]] 35 | [[ `echo $ns_list | jq -r '.namespaces[0].rw_mbytes_per_second'` == "0" ]] 36 | [[ `echo $ns_list | jq -r '.namespaces[0].r_mbytes_per_second'` == "0" ]] 37 | [[ `echo $ns_list | jq -r '.namespaces[0].w_mbytes_per_second'` == "0" ]] 38 | [[ `echo $ns_list | jq -r '.namespaces[1]'` == "null" ]] 39 | 40 | echo "ℹ️ try setting namespace QOS value using --force" 41 | cephnvmf_func namespace set_qos --subsystem ${NQN} --nsid 1 --rw-megabytes-per-second 30 --force 42 | ns_list=$(cephnvmf_func --output stdio --format json namespace list --subsystem ${NQN}) 43 | [[ `echo $ns_list | jq -r '.status'` == "0" ]] 44 | [[ `echo $ns_list | jq -r '.subsystem_nqn'` == "${NQN}" ]] 45 | [[ `echo $ns_list | jq -r '.namespaces[0].nsid'` == "1" ]] 46 | [[ `echo $ns_list | jq -r '.namespaces[0].rw_ios_per_second'` == "0" ]] 47 | [[ `echo $ns_list | jq -r '.namespaces[0].rw_mbytes_per_second'` == "30" ]] 48 | [[ `echo $ns_list | jq -r '.namespaces[0].r_mbytes_per_second'` == "0" ]] 49 | [[ `echo $ns_list | jq -r '.namespaces[0].w_mbytes_per_second'` == "0" ]] 50 | [[ `echo $ns_list | jq -r '.namespaces[1]'` == "null" ]] 51 | 52 | echo "ℹ️ delete namespace and image" 53 | cephnvmf_func namespace del --subsystem ${NQN} --nsid 1 54 | make -s exec SVC=ceph OPTS=-T CMD="rbd remove ${RBD_POOL}/${RBD_IMAGE_NAME}" 55 | 56 | echo "ℹ️ reset global RBD QOS limit value" 57 | make -s exec SVC=ceph OPTS=-T CMD="rbd config global set global rbd_qos_iops_limit 0" 58 | 59 | echo "ℹ️ create namespace and image again" 60 | cephnvmf_func namespace add --subsystem ${NQN} --rbd-pool ${RBD_POOL} --rbd-image ${RBD_IMAGE_NAME}2 --size ${RBD_IMAGE_SIZE} --rbd-create-image 61 | 62 | echo "ℹ️ try setting namespace QOS value" 63 | cephnvmf_func namespace set_qos --subsystem ${NQN} --nsid 1 --rw-megabytes-per-second 40 64 | ns_list=$(cephnvmf_func --output stdio --format json namespace list --subsystem ${NQN}) 65 | [[ `echo $ns_list | jq -r '.status'` == "0" ]] 66 | [[ `echo $ns_list | jq -r '.subsystem_nqn'` == "${NQN}" ]] 67 | [[ `echo $ns_list | jq -r '.namespaces[0].nsid'` == "1" ]] 68 | [[ `echo $ns_list | jq -r '.namespaces[0].rw_ios_per_second'` == "0" ]] 69 | [[ `echo $ns_list | jq -r '.namespaces[0].rw_mbytes_per_second'` == "40" ]] 70 | [[ `echo $ns_list | jq -r '.namespaces[0].r_mbytes_per_second'` == "0" ]] 71 | [[ `echo $ns_list | jq -r '.namespaces[0].w_mbytes_per_second'` == "0" ]] 72 | [[ `echo $ns_list | jq -r '.namespaces[1]'` == "null" ]] 73 | 74 | echo "ℹ️ set image RBD QOS limit value" 75 | make -s exec SVC=ceph OPTS=-T CMD="rbd config image set ${RBD_POOL}/${RBD_IMAGE_NAME}2 rbd_qos_iops_limit 30" 76 | 77 | echo "ℹ️ try setting namespace QOS value" 78 | set +e 79 | cephnvmf_func namespace set_qos --subsystem ${NQN} --nsid 1 --rw-megabytes-per-second 50 80 | if [[ $? -eq 0 ]]; then 81 | echo "Setting QOS with RBD QOS attribute set should fail" 82 | exit 1 83 | fi 84 | set -e 85 | ns_list=$(cephnvmf_func --output stdio --format json namespace list --subsystem ${NQN}) 86 | [[ `echo $ns_list | jq -r '.status'` == "0" ]] 87 | [[ `echo $ns_list | jq -r '.subsystem_nqn'` == "${NQN}" ]] 88 | [[ `echo $ns_list | jq -r '.namespaces[0].nsid'` == "1" ]] 89 | [[ `echo $ns_list | jq -r '.namespaces[0].rw_ios_per_second'` == "0" ]] 90 | [[ `echo $ns_list | jq -r '.namespaces[0].rw_mbytes_per_second'` == "40" ]] 91 | [[ `echo $ns_list | jq -r '.namespaces[0].r_mbytes_per_second'` == "0" ]] 92 | [[ `echo $ns_list | jq -r '.namespaces[0].w_mbytes_per_second'` == "0" ]] 93 | [[ `echo $ns_list | jq -r '.namespaces[1]'` == "null" ]] 94 | 95 | echo "ℹ️ try setting namespace QOS value using --force" 96 | cephnvmf_func namespace set_qos --subsystem ${NQN} --nsid 1 --rw-megabytes-per-second 50 --force 97 | ns_list=$(cephnvmf_func --output stdio --format json namespace list --subsystem ${NQN}) 98 | [[ `echo $ns_list | jq -r '.status'` == "0" ]] 99 | [[ `echo $ns_list | jq -r '.subsystem_nqn'` == "${NQN}" ]] 100 | [[ `echo $ns_list | jq -r '.namespaces[0].nsid'` == "1" ]] 101 | [[ `echo $ns_list | jq -r '.namespaces[0].rw_ios_per_second'` == "0" ]] 102 | [[ `echo $ns_list | jq -r '.namespaces[0].rw_mbytes_per_second'` == "50" ]] 103 | [[ `echo $ns_list | jq -r '.namespaces[0].r_mbytes_per_second'` == "0" ]] 104 | [[ `echo $ns_list | jq -r '.namespaces[0].w_mbytes_per_second'` == "0" ]] 105 | [[ `echo $ns_list | jq -r '.namespaces[1]'` == "null" ]] 106 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL Advanced" 13 | 14 | on: 15 | push: 16 | branches: [ "devel", "*", "reef_7.1", "squid_8.0", "squid_8.1", "tentacle_9.0" ] 17 | pull_request: 18 | branches: [ "devel", "*", "reef_7.1", "squid_8.0", "squid_8.1", "tentacle_9.0" ] 19 | schedule: 20 | - cron: '38 0 * * 6' 21 | 22 | jobs: 23 | detect-go: 24 | name: Detect Go Code 25 | runs-on: ubuntu-latest 26 | permissions: 27 | contents: read 28 | outputs: 29 | has-go: ${{ steps.check.outputs.has-go }} 30 | steps: 31 | - name: Checkout repository 32 | uses: actions/checkout@v4 33 | 34 | - name: Check for Go code 35 | id: check 36 | run: | 37 | if find . -name "*.go" -not -path "./vendor/*" -not -path "./.git/*" | grep -q .; then 38 | echo "has-go=true" >> $GITHUB_OUTPUT 39 | echo "✅ Go code found - will analyze" 40 | else 41 | echo "has-go=false" >> $GITHUB_OUTPUT 42 | echo "⏭️ No Go code - skipping Go analysis" 43 | fi 44 | 45 | analyze: 46 | name: Analyze (${{ matrix.language }}) 47 | needs: detect-go 48 | # Runner size impacts CodeQL analysis time. To learn more, please see: 49 | # - https://gh.io/recommended-hardware-resources-for-running-codeql 50 | # - https://gh.io/supported-runners-and-hardware-resources 51 | # - https://gh.io/using-larger-runners (GitHub.com only) 52 | # Consider using larger runners or machines with greater resources for possible analysis time improvements. 53 | runs-on: ubuntu-latest 54 | permissions: 55 | # required for all workflows 56 | security-events: write 57 | 58 | # required to fetch internal or private CodeQL packs 59 | packages: read 60 | 61 | # only required for workflows in private repositories 62 | actions: read 63 | contents: read 64 | 65 | strategy: 66 | fail-fast: false 67 | matrix: 68 | include: 69 | - language: actions 70 | build-mode: none 71 | - language: python 72 | build-mode: none 73 | # CodeQL supports the following values keywords for 'language': 'actions', 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'rust', 'swift' 74 | # Use `c-cpp` to analyze code written in C, C++ or both 75 | # Use 'java-kotlin' to analyze code written in Java, Kotlin or both 76 | # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both 77 | # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, 78 | # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. 79 | # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how 80 | # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages 81 | steps: 82 | - name: Checkout repository 83 | uses: actions/checkout@v4 84 | 85 | # Add any setup steps before running the `github/codeql-action/init` action. 86 | # This includes steps like installing compilers or runtimes (`actions/setup-node` 87 | # or others). This is typically only required for manual builds. 88 | # - name: Setup runtime (example) 89 | # uses: actions/setup-example@v1 90 | 91 | # Initializes the CodeQL tools for scanning. 92 | - name: Initialize CodeQL 93 | uses: github/codeql-action/init@v3 94 | with: 95 | languages: ${{ matrix.language }} 96 | build-mode: ${{ matrix.build-mode }} 97 | # If you wish to specify custom queries, you can do so here or in a config file. 98 | # By default, queries listed here will override any specified in a config file. 99 | # Prefix the list here with "+" to use these queries and those in the config file. 100 | 101 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 102 | # queries: security-extended,security-and-quality 103 | 104 | - name: Perform CodeQL Analysis 105 | uses: github/codeql-action/analyze@v3 106 | with: 107 | category: "/language:${{matrix.language}}" 108 | 109 | analyze-go: 110 | name: Analyze (go) 111 | needs: detect-go 112 | if: needs.detect-go.outputs.has-go == 'true' 113 | runs-on: ubuntu-latest 114 | permissions: 115 | security-events: write 116 | packages: read 117 | actions: read 118 | contents: read 119 | 120 | steps: 121 | - name: Checkout repository 122 | uses: actions/checkout@v4 123 | 124 | - name: Initialize CodeQL 125 | uses: github/codeql-action/init@v3 126 | with: 127 | languages: go 128 | build-mode: autobuild 129 | 130 | - name: Perform CodeQL Analysis 131 | uses: github/codeql-action/analyze@v3 132 | with: 133 | category: "/language:go" 134 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Make config 2 | MAKEFLAGS += --no-builtin-rules --no-builtin-variables 3 | TARGET_ARCH := $(shell uname -m) 4 | .SUFFIXES: 5 | 6 | # Assign default CPU arch related parameters 7 | include .env 8 | ifneq (, $(filter $(TARGET_ARCH), arm64 aarch64)) 9 | ceph_repo_arch = arm64 10 | TARGET_PLATFORM = linux/arm64 11 | SPDK_TARGET_ARCH = armv8-a+crypto 12 | SPDK_MAKEFLAGS = $(shell echo "DPDKBUILD_FLAGS=-Dplatform=generic -j $$(nproc)") 13 | else ifneq (, $(filter $(TARGET_ARCH), amd64 x86_64)) 14 | ceph_repo_arch = x86_64 15 | TARGET_PLATFORM = linux/amd64 16 | SPDK_TARGET_ARCH = x86-64-v2 17 | else 18 | $(error Unspported CPU arch '$(TARGET_ARCH)' !! Set TARGET_ARCH to x86_64, amd64 or arm64, aarch64 arches) 19 | endif 20 | 21 | # Includes 22 | include mk/containerized.mk 23 | include mk/demo.mk 24 | include mk/demosecurepsk.mk 25 | include mk/demosecuredhchap.mk 26 | include mk/misc.mk 27 | include mk/autohelp.mk 28 | 29 | ## Basic targets: 30 | .DEFAULT_GOAL := all 31 | all: setup $(ALL) 32 | 33 | verify: ## Run Python source files through flake8 34 | @echo Verifying Python source files 35 | flake8 control/*.py tests/*.py 36 | 37 | setup: ## Configure huge-pages (requires sudo/root password) 38 | 39 | @echo Setup core dump pattern as /tmp/coredump/core.* 40 | mkdir -p /tmp/coredump 41 | sudo mkdir -p /var/log/ceph 42 | sudo chmod 0755 /var/log/ceph 43 | sudo bash -c 'echo "|/usr/bin/env tee /tmp/coredump/core.%e.%p.%h.%t" > /proc/sys/kernel/core_pattern' 44 | sudo bash -c 'echo $(HUGEPAGES) > $(HUGEPAGES_DIR)' 45 | @echo Actual Hugepages allocation: $$(cat $(HUGEPAGES_DIR)) 46 | @[ $$(cat $(HUGEPAGES_DIR)) -eq $(HUGEPAGES) ] 47 | 48 | build pull logs down: SVC ?= ceph spdk bdevperf nvmeof nvmeof-devel nvmeof-cli discovery 49 | 50 | build: export NVMEOF_GIT_REPO != git remote get-url origin 51 | build: export NVMEOF_GIT_BRANCH != git name-rev --name-only HEAD 52 | build: export NVMEOF_GIT_COMMIT != git rev-parse HEAD 53 | build: export SPDK_GIT_REPO != git -C spdk remote get-url origin 54 | build: export SPDK_GIT_BRANCH != git -C spdk name-rev --name-only HEAD 55 | build: export SPDK_GIT_COMMIT != git rev-parse HEAD:spdk 56 | build: export BUILD_DATE != date -u +"%Y-%m-%d %H:%M:%S %Z" 57 | build: export NVMEOF_GIT_MODIFIED_FILES != git status -s | grep -e "^ *M" | sed 's/^ *M //' | xargs 58 | 59 | # Variables 60 | SHAMAN_FETCH_ATTEMPTS := 3 61 | 62 | # Fetch and export CEPH_CLUSTER_CEPH_REPO_BASEURL with retries 63 | build: export CEPH_CLUSTER_CEPH_REPO_BASEURL != \ 64 | for i in $$(seq 1 $(SHAMAN_FETCH_ATTEMPTS)); do \ 65 | sha1="$(CEPH_SHA)"; \ 66 | if [ "$$sha1" = "latest" ]; then \ 67 | idx=$$((i - 1)); \ 68 | sha1=$$(curl -s \ 69 | "https://shaman.ceph.com/api/search/?status=ready&project=ceph&ref=$(CEPH_BRANCH)&flavor=default&distros=centos/9/$(ceph_repo_arch)" \ 70 | | jq -r "sort_by(.modified) | reverse | .[$$idx].sha1"); \ 71 | >&2 echo "Attempt ($$i): Using 'latest' SHA1 for arch=$(ceph_repo_arch), branch=$(CEPH_BRANCH): $$sha1"; \ 72 | fi; \ 73 | >&2 echo "Attempt ($$i): Fetching URL for arch=$(ceph_repo_arch), branch=$(CEPH_BRANCH), sha=$(CEPH_SHA)..."; \ 74 | url=$$(curl -s "https://shaman.ceph.com/api/repos/ceph/$(CEPH_BRANCH)/$$sha1/centos/9/" | jq -r '.[] | select(.status == "ready" and .archs[] == "$(ceph_repo_arch)") | .url'); \ 75 | if [ -n "$$url" ]; then \ 76 | >&2 echo "Success: Retrieved URL for arch=$(ceph_repo_arch), branch=$(CEPH_BRANCH), sha=$(CEPH_SHA): $$url"; \ 77 | echo "$$url"; \ 78 | break; \ 79 | fi; \ 80 | >&2 echo "Retrying... Failed attempt ($$i) for arch=$(ceph_repo_arch), branch=$(CEPH_BRANCH), sha=$(CEPH_SHA)"; \ 81 | sleep 2; \ 82 | done; \ 83 | if [ -z "$$url" ]; then \ 84 | >&2 echo "Failure: Unable to retrieve a valid URL for arch=$(ceph_repo_arch), branch=$(CEPH_BRANCH), sha=$(CEPH_SHA) after $(SHAMAN_FETCH_ATTEMPTS) attempts"; \ 85 | exit 1; \ 86 | fi 87 | 88 | build: export TARGET_PLATFORM := $(TARGET_PLATFORM) 89 | build: export SPDK_TARGET_ARCH := $(SPDK_TARGET_ARCH) 90 | build: export SPDK_MAKEFLAGS := $(SPDK_MAKEFLAGS) 91 | up: ## Launch services 92 | up: SCALE?= 1 ## Number of gateways 93 | up: 94 | @$(CURDIR)/tests/ha/start_up.sh $(SCALE) 95 | 96 | clean: $(CLEAN) setup ## Clean-up environment 97 | clean: override HUGEPAGES = 0 98 | clean: 99 | /usr/bin/rm -f control/proto/gateway_pb2_grpc.py control/proto/gateway_pb2.py control/proto/gateway_pb2.pyi control/proto/monitor_pb2_grpc.py control/proto/monitor_pb2.py control/proto/monitor_pb2.pyi 100 | 101 | update-lockfile: run ## Update dependencies in lockfile (pdm.lock) 102 | update-lockfile: SVC=nvmeof-builder-base 103 | update-lockfile: override OPTS+=--entrypoint=pdm 104 | update-lockfile: CMD=update --no-sync --no-isolation --no-self --no-editable 105 | 106 | protoc: run ## Generate gRPC protocol files 107 | protoc: SVC=nvmeof-builder 108 | protoc: override OPTS+=--entrypoint=pdm 109 | protoc: CMD=run protoc 110 | 111 | EXPORT_DIR ?= /tmp ## Directory to export packages (RPM and Python wheel) 112 | export-rpms: SVC=spdk-rpm-export 113 | export-rpms: OPTS=--entrypoint=cp -v $(strip $(EXPORT_DIR)):/tmp 114 | export-rpms: CMD=-r /rpm /tmp 115 | export-rpms: run ## Build SPDK RPMs and copy them to $(EXPORT_DIR)/rpm 116 | @echo RPMs exported to: 117 | @find $(strip $(EXPORT_DIR))/rpm -type f 118 | 119 | export-python: SVC=nvmeof-python-export 120 | export-python: OPTS=--entrypoint=pdm -v $(strip $(EXPORT_DIR)):/tmp 121 | export-python: CMD=build --no-sdist --no-clean -d /tmp 122 | export-python: run ## Build Ceph NVMe-oF Gateway Python package and copy it to /tmp 123 | @echo Python wheel exported to: 124 | @find $(strip $(EXPORT_DIR))/ceph_nvmeof-*.whl 125 | 126 | help: AUTOHELP_SUMMARY = Makefile to build and deploy the Ceph NVMe-oF Gateway 127 | help: autohelp 128 | 129 | .PHONY: all setup clean help update-lockfile protoc export-rpms export-python 130 | -------------------------------------------------------------------------------- /tests/ha/4gws.sh: -------------------------------------------------------------------------------- 1 | set -xe 2 | rpc=/usr/libexec/spdk/scripts/rpc.py 3 | cmd=nvmf_subsystem_get_listeners 4 | 5 | expect_optimized() { 6 | GW_NAME=$1 7 | EXPECTED_OPTIMIZED=$2 8 | NQN=$3 9 | 10 | socket_retries=0 11 | socket="" 12 | while [ $socket_retries -lt 10 ] ; do 13 | socket=$(docker exec "$GW_NAME" find /var/tmp -name spdk.sock) 14 | if [ -n "$socket" ]; then 15 | break 16 | fi 17 | socket_retries=$(expr $socket_retries + 1) 18 | sleep 1 19 | done 20 | if [ -z "$socket" ]; then 21 | exit 1 # failed 22 | fi 23 | 24 | # Verify expected number of "optimized" 25 | for i in $(seq 50); do 26 | response=$(docker exec "$GW_NAME" "$rpc" "-s" "$socket" "$cmd" "$NQN") 27 | ana_states=$(echo "$response" | jq -r '.[0].ana_states') 28 | 29 | # Count the number of "optimized" groups 30 | optimized_count=$(jq -nr --argjson ana_states "$ana_states" '$ana_states | map(select(.ana_state == "optimized")) | length') 31 | 32 | # Check if there is expected number of "optimized" group 33 | if [ "$optimized_count" -eq "$EXPECTED_OPTIMIZED" ]; then 34 | # Iterate through JSON array 35 | for item in $(echo "$ana_states" | jq -c '.[]'); do 36 | ana_group=$(echo "$item" | jq -r '.ana_group') 37 | ana_state=$(echo "$item" | jq -r '.ana_state') 38 | 39 | # Check if ana_state is "optimized" 40 | if [ "$ana_state" = "optimized" ]; then 41 | echo "$ana_group" 42 | fi 43 | done 44 | return 45 | else 46 | sleep 5 47 | continue 48 | fi 49 | done 50 | echo "‼️ expect_optimized timeout GW_NAME=$1 EXPECTED_OPTIMIZED=$2 NQN=$3" 51 | exit 1 # failed 52 | } 53 | 54 | # GW name by index 55 | gw_name() { 56 | i=$1 57 | docker ps --format '{{.ID}}\t{{.Names}}' --filter status=running --filter status=exited | awk '$2 ~ /nvmeof/ && $2 ~ /'$i'/ {print $1}' 58 | } 59 | 60 | # Function to access numbers by index 61 | access_number_by_index() { 62 | numbers=$1 63 | index=$(expr $2 + 1) 64 | number=$(echo "$numbers" | awk -v idx="$index" 'NR == idx {print}') 65 | echo "$number" 66 | } 67 | 68 | # verify that given numbers must be either 1 and 2 or 2 and 1 69 | verify_ana_groups() { 70 | nr1=$1 71 | nr2=$2 72 | 73 | if [ "$nr1" -eq 1 ] && [ "$nr2" -eq 2 ]; then 74 | echo "Verified: first is 1 and second is 2" 75 | elif [ "$nr1" -eq 2 ] && [ "$nr2" -eq 1 ]; then 76 | echo "Verified: first is 2 and second is 1" 77 | else 78 | echo "Invalid numbers: first and second must be either 1 and 2 or 2 and 1" 79 | exit 1 80 | fi 81 | } 82 | 83 | # Function to choose n random number at 1..m range 84 | choose_n_m() { 85 | n=$1 86 | m=$2 87 | count=0 88 | numbers="" 89 | 90 | # Ensure m is greater than 1 to avoid division by zero errors 91 | if [ "$m" -le 1 ]; then 92 | echo "Upper limit m must be greater than 1." 93 | exit 1 94 | fi 95 | 96 | while [ "$count" -lt "$n" ]; do 97 | # Generate a random number between 1 and m 98 | random_number=$(expr $RANDOM % $m + 1) 99 | 100 | # Check if the number is unique 101 | is_unique=$(echo "$numbers" | grep -c "\<$random_number\>") 102 | if [ "$is_unique" -eq 0 ]; then 103 | # Add the unique number to the list 104 | numbers="$numbers $random_number" 105 | echo $random_number 106 | count=$(expr $count + 1) 107 | fi 108 | done 109 | } 110 | 111 | validate_all_active() { 112 | for s in $(seq $NUM_SUBSYSTEMS); do 113 | all_ana_states=$(for g in $(seq $NUM_GATEWAYS); do 114 | NQN="nqn.2016-06.io.spdk:cnode$s" 115 | GW_OPTIMIZED=$(expect_optimized "$(gw_name $g)" 1 "$NQN") 116 | gw_ana=$(access_number_by_index "$GW_OPTIMIZED" 0) 117 | echo $gw_ana 118 | done) 119 | 120 | if [ "$(echo "$all_ana_states" | sort -n)" != "$(seq $NUM_GATEWAYS)" ]; then 121 | echo "all active state failure" 122 | exit 1 123 | fi 124 | done 125 | } 126 | 127 | 128 | # 129 | # MAIN 130 | # 131 | 132 | NUM_SUBSYSTEMS=2 133 | NUM_GATEWAYS=4 134 | FAILING_GATEWAYS=2 135 | # 136 | # Step 1 validate all gateways are optimized for one of ANA group 137 | # and all groups are unique 138 | # 139 | 140 | echo "ℹ️ Step 1" 141 | validate_all_active 142 | 143 | # 144 | # Step 2 failover 145 | # 146 | 147 | echo "ℹ️ Step 2" 148 | gws_to_stop=$(choose_n_m $FAILING_GATEWAYS $NUM_GATEWAYS) 149 | for i in $(seq 0 $(expr $FAILING_GATEWAYS - 1)); do 150 | gw=$(access_number_by_index "$gws_to_stop" $i) 151 | gw_name=$(gw_name $gw) 152 | echo "ℹ️ Stop gw $gw_name i=$i gw=$gw" 153 | docker stop $gw_name 154 | done 155 | 156 | docker ps 157 | 158 | # expect remaining gws to have two optimized groups each 159 | for i in $(seq 4); do 160 | found=0 161 | for j in $(seq 0 $(expr $FAILING_GATEWAYS - 1)); do 162 | stopped_gw=$(access_number_by_index "$gws_to_stop" $j) 163 | if [ "$i" -eq "$stopped_gw" ]; then 164 | found=1 165 | break 166 | fi 167 | done 168 | 169 | # if gw is a healthy one 170 | if [ "$found" -eq "0" ]; then 171 | echo "ℹ️ Check healthy gw gw=$i" 172 | for s in $(seq $NUM_SUBSYSTEMS); do 173 | NQN="nqn.2016-06.io.spdk:cnode$s" 174 | GW_OPTIMIZED=$(expect_optimized "$(gw_name $i)" 2 "$NQN") 175 | done 176 | fi 177 | done 178 | 179 | # 180 | # Step 3 failback 181 | # 182 | echo "ℹ️ Step 3" 183 | for i in $(seq 0 $(expr $FAILING_GATEWAYS - 1)); do 184 | gw=$(access_number_by_index "$gws_to_stop" $i) 185 | gw_name=$(gw_name $gw) 186 | echo "ℹ️ Start gw $gw_name i=$i gw=$gw" 187 | docker start $gw_name 188 | done 189 | 190 | docker ps 191 | 192 | validate_all_active 193 | -------------------------------------------------------------------------------- /tests/test_erasure_pool.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import grpc 3 | import re 4 | from control.server import GatewayServer 5 | from control.cli import main as cli 6 | from control.cephutils import CephUtils 7 | from control.proto import gateway_pb2_grpc as pb2_grpc 8 | 9 | image = "ec_pool_image" 10 | pool = "rbd" 11 | ec_pool_no_overwrites = "ec_pool_no_overwrites" 12 | ec_pool_overwrites = "ec_pool_overwrites" 13 | subsystem = "nqn.2016-06.io.spdk:cnode1" 14 | group_name = "mygroup" 15 | 16 | 17 | @pytest.fixture(scope="module") 18 | def gateway(config): 19 | """Sets up and tears down Gateway""" 20 | 21 | addr = config.get("gateway", "addr") 22 | port = config.getint("gateway", "port") 23 | config.config["gateway"]["group"] = group_name 24 | config.config["gateway-logs"]["log_level"] = "debug" 25 | ceph_utils = CephUtils(config) 26 | 27 | with GatewayServer(config) as gateway: 28 | 29 | # Start gateway 30 | gateway.gw_logger_object.set_log_level("debug") 31 | ceph_utils.execute_ceph_monitor_command( 32 | "{" + f'"prefix":"nvme-gw create", "id": "{gateway.name}", "pool": "{pool}", ' 33 | f'"group": "{group_name}"' + "}" 34 | ) 35 | gateway.serve() 36 | 37 | # Bind the client and Gateway 38 | channel = grpc.insecure_channel(f"{addr}:{port}") 39 | pb2_grpc.GatewayStub(channel) 40 | yield gateway.gateway_rpc, ceph_utils 41 | 42 | # Stop gateway 43 | gateway.server.stop(grace=1) 44 | gateway.gateway_rpc.gateway_state.delete_state() 45 | 46 | 47 | def test_setup_environment(caplog, gateway): 48 | gw, ceph_utils = gateway 49 | caplog.clear() 50 | ceph_utils.execute_ceph_monitor_command( 51 | "{" + f'"prefix":"osd pool create", "pool": "{ec_pool_no_overwrites}", ' 52 | f'"pool_type": "erasure"' + "}" 53 | ) 54 | assert f'Execute monitor command: {{"prefix":"osd pool create", "pool": ' \ 55 | f'"{ec_pool_no_overwrites}", "pool_type": "erasure"}}' in caplog.text 56 | assert f'Monitor reply: (0, b\'\', "pool \'{ec_pool_no_overwrites}\' created")' in caplog.text 57 | caplog.clear() 58 | ceph_utils.execute_ceph_monitor_command( 59 | "{" + f'"prefix":"osd pool create", "pool": "{ec_pool_overwrites}", ' 60 | f'"pool_type": "erasure"' + "}" 61 | ) 62 | assert f'Execute monitor command: {{"prefix":"osd pool create", "pool": ' \ 63 | f'"{ec_pool_overwrites}", "pool_type": "erasure"}}' in caplog.text 64 | assert f'Monitor reply: (0, b\'\', "pool \'{ec_pool_overwrites}\' created")' in caplog.text 65 | caplog.clear() 66 | ceph_utils.execute_ceph_monitor_command( 67 | "{" + f'"prefix":"osd pool set", "pool": "{ec_pool_overwrites}", ' 68 | f'"var": "allow_ec_overwrites", "val": "true"' + "}" 69 | ) 70 | assert f'Execute monitor command: {{"prefix":"osd pool set", "pool": ' \ 71 | f'"{ec_pool_overwrites}", "var": "allow_ec_overwrites", ' \ 72 | f'"val": "true"}}' in caplog.text 73 | pattern = re.compile(r"Monitor reply: \(0, b'', 'set pool \d+ allow_ec_overwrites to true'\)") 74 | assert pattern.search(caplog.text) is not None 75 | caplog.clear() 76 | cli(["subsystem", "add", "--subsystem", subsystem, "--no-group-append"]) 77 | assert f"Adding subsystem {subsystem}: Successful" in caplog.text 78 | 79 | 80 | def test_pool_does_not_exist(caplog, gateway): 81 | caplog.clear() 82 | cli(["namespace", "add", "--subsystem", subsystem, "--rbd-pool", "junk", 83 | "--rbd-image", "junkimage", "--size", "10MB", "--rbd-create-image"]) 84 | assert f"Failure adding namespace to {subsystem}: RBD pool " \ 85 | f"junk doesn't exist" in caplog.text 86 | 87 | 88 | def test_data_pool_does_not_exist(caplog, gateway): 89 | caplog.clear() 90 | cli(["namespace", "add", "--subsystem", subsystem, "--rbd-pool", pool, 91 | "--rbd-data-pool", "junk", 92 | "--rbd-image", "junkimage", "--size", "10MB", "--rbd-create-image"]) 93 | assert f"Failure adding namespace to {subsystem}: RBD data pool " \ 94 | f"junk doesn't exist" in caplog.text 95 | 96 | 97 | def test_use_erasure_pool_as_rbd_pool(caplog, gateway): 98 | caplog.clear() 99 | cli(["namespace", "add", "--subsystem", subsystem, "--rbd-pool", ec_pool_overwrites, 100 | "--rbd-image", "junkimage", "--size", "10MB", "--rbd-create-image"]) 101 | assert f"Failure adding namespace to {subsystem}: RBD pool " \ 102 | f"{ec_pool_overwrites} is not a replicated pool" in caplog.text 103 | 104 | 105 | def test_use_erasure_pool_with_no_overwrites(caplog, gateway): 106 | caplog.clear() 107 | cli(["namespace", "add", "--subsystem", subsystem, "--rbd-pool", pool, 108 | "--rbd-data-pool", ec_pool_no_overwrites, 109 | "--rbd-image", "junkimage", "--size", "10MB", "--rbd-create-image"]) 110 | assert f'Failure adding namespace to {subsystem}: RBD data pool ' \ 111 | f'{ec_pool_no_overwrites} doesn\'t have "allow_ec_overwrites" set' in caplog.text 112 | 113 | 114 | def test_use_erasure_pool_as_rbd_data_pool(caplog, gateway): 115 | caplog.clear() 116 | cli(["namespace", "add", "--subsystem", subsystem, "--rbd-pool", pool, 117 | "--rbd-data-pool", ec_pool_overwrites, 118 | "--rbd-image", image, "--size", "10MB", "--rbd-create-image"]) 119 | assert f"Adding namespace 1 to {subsystem}: Successful" in caplog.text 120 | assert f"Image {pool}/{image} created, size is 10485760 bytes, " \ 121 | f"data pool is {ec_pool_overwrites}" in caplog.text 122 | caplog.clear() 123 | cli(["--format", "json", "namespace", "list", "--subsystem", subsystem, "--nsid", "1"]) 124 | assert f'"rbd_image_name": "{image}"' in caplog.text 125 | assert f'"rbd_pool_name": "{pool}"' in caplog.text 126 | assert f'"rbd_data_pool_name": "{ec_pool_overwrites}"' in caplog.text 127 | -------------------------------------------------------------------------------- /tests/ha/auto_listeners.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -xe 3 | 4 | 5 | GW1_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /1/ {print $1}') 6 | GW2_NAME=$(docker ps --format '{{.ID}}\t{{.Names}}' | awk '$2 ~ /nvmeof/ && $2 ~ /2/ {print $1}') 7 | 8 | ip1="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW1_NAME")" 9 | ip2="$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' "$GW2_NAME")" 10 | 11 | NUM_SUBSYSTEMS=3 12 | NQN1="nqn.2016-06.io.spdk:cnode01" # auto-listeners 13 | NQN2="nqn.2016-06.io.spdk:cnode02" # auto-listeners with secure listeners 14 | NQN3="nqn.2016-06.io.spdk:cnode03" # normal listeners 15 | 16 | SUBNET=$(echo $ip1 | grep -oE "^([0-9]{1,3}\.){3}") 17 | SUBNET="${SUBNET}0/24" 18 | 19 | echo "Subnet $SUBNET would be used to create 2 subsystems with auto-listeners $NQN1 (non-secure listeners) and $NQN2 (secure listeners)" 20 | echo "And create $NQN3 with normal listeners" 21 | 22 | 23 | test_listeners() 24 | { 25 | ip_1=$1 26 | ip_2=$2 # optional 27 | for i in $(seq $NUM_SUBSYSTEMS); do 28 | NQN="nqn.2016-06.io.spdk:cnode0$i" 29 | is_secure=No 30 | if [ "$NQN" = "$NQN2" ]; then 31 | is_secure=Yes 32 | fi 33 | is_manual=No 34 | if [ "$NQN" = "$NQN3" ]; then 35 | is_manual=Yes 36 | fi 37 | 38 | # CHECK 1: list listeners 39 | docker compose run -T --rm nvmeof-cli --server-address $ip_1 --server-port 5500 --output stdio --format plain listener list -n $NQN > /tmp/listeners.txt 40 | cat /tmp/listeners.txt 41 | [[ `cat /tmp/listeners.txt | grep "${ip_1}" | awk '{print $2}'` == "TCP" ]] 42 | [[ `cat /tmp/listeners.txt | grep "${ip_1}" | awk '{print $3}'` == "IPv4" ]] 43 | [[ `cat /tmp/listeners.txt | grep "${ip_1}" | awk '{print $4}'` == "${ip_1}:4420" ]] 44 | [[ `cat /tmp/listeners.txt | grep "${ip_1}" | awk '{print $5}'` == "$is_secure" ]] 45 | [[ `cat /tmp/listeners.txt | grep "${ip_1}" | awk '{print $6}'` == "Yes" ]] 46 | [[ `cat /tmp/listeners.txt | grep "${ip_1}" | awk '{print $7}'` == "$is_manual" ]] 47 | if [ -n "$ip_2" ]; then 48 | [[ `cat /tmp/listeners.txt | grep "${ip_2}" | awk '{print $2}'` == "TCP" ]] 49 | [[ `cat /tmp/listeners.txt | grep "${ip_2}" | awk '{print $3}'` == "IPv4" ]] 50 | [[ `cat /tmp/listeners.txt | grep "${ip_2}" | awk '{print $4}'` == "${ip_2}:4420" ]] 51 | [[ `cat /tmp/listeners.txt | grep "${ip_2}" | awk '{print $5}'` == "$is_secure" ]] 52 | [[ `cat /tmp/listeners.txt | grep "${ip_2}" | awk '{print $6}'` == "No" ]] 53 | [[ `cat /tmp/listeners.txt | grep "${ip_2}" | awk '{print $7}'` == "$is_manual" ]] 54 | fi 55 | 56 | # CHECK 2: gw listener_info 57 | docker compose run -T --rm nvmeof-cli --server-address $ip_1 --server-port 5500 --output stdio --format plain gw listener_info -n $NQN > /tmp/gw_listeners.txt 58 | cat /tmp/gw_listeners.txt 59 | [[ `cat /tmp/gw_listeners.txt | grep "${ip_1}" | awk '{print $2}'` == "TCP" ]] 60 | [[ `cat /tmp/gw_listeners.txt | grep "${ip_1}" | awk '{print $3}'` == "IPv4" ]] 61 | [[ `cat /tmp/gw_listeners.txt | grep "${ip_1}" | awk '{print $4}'` == "${ip_1}:4420" ]] 62 | [[ `cat /tmp/gw_listeners.txt | grep "${ip_1}" | awk '{print $5}'` == "$is_secure" ]] 63 | [[ `cat /tmp/gw_listeners.txt | grep "${ip_1}" | awk '{print $6}'` == "Yes" ]] 64 | if [ -n "$ip_2" ]; then 65 | docker compose run -T --rm nvmeof-cli --server-address $ip_2 --server-port 5500 --output stdio --format plain gw listener_info -n $NQN > /tmp/gw_listeners.txt 66 | cat /tmp/gw_listeners.txt 67 | [[ `cat /tmp/gw_listeners.txt | grep "${ip_2}" | awk '{print $2}'` == "TCP" ]] 68 | [[ `cat /tmp/gw_listeners.txt | grep "${ip_2}" | awk '{print $3}'` == "IPv4" ]] 69 | [[ `cat /tmp/gw_listeners.txt | grep "${ip_2}" | awk '{print $4}'` == "${ip_2}:4420" ]] 70 | [[ `cat /tmp/gw_listeners.txt | grep "${ip_2}" | awk '{print $5}'` == "$is_secure" ]] 71 | [[ `cat /tmp/gw_listeners.txt | grep "${ip_2}" | awk '{print $6}'` == "Yes" ]] 72 | fi 73 | 74 | done 75 | } 76 | 77 | echo "ℹ️ ℹ️ Start test: create 2 subsystems with auto listeners and 1 normal subsystem with manual listeners:" 78 | 79 | docker compose run -T --rm nvmeof-cli --server-address $ip2 --server-port 5500 subsystem add -n $NQN1 --no-group-append --network-mask $SUBNET 80 | docker compose run -T --rm nvmeof-cli --server-address $ip2 --server-port 5500 subsystem add -n $NQN2 --no-group-append --network-mask $SUBNET --secure-listeners 81 | 82 | docker compose run -T --rm nvmeof-cli --server-address $ip2 --server-port 5500 subsystem add -n $NQN3 --no-group-append 83 | docker compose run --rm nvmeof-cli --server-address $ip2 --server-port 5500 listener add --subsystem $NQN3 --host-name $GW1_NAME --traddr $ip1 --trsvcid 4420 84 | docker compose run --rm nvmeof-cli --server-address $ip2 --server-port 5500 listener add --subsystem $NQN3 --host-name $GW2_NAME --traddr $ip2 --trsvcid 4420 85 | 86 | docker compose run -T --rm nvmeof-cli --server-address $ip2 --server-port 5500 --output stdio --format json subsystem list 87 | 88 | echo "ℹ️ ℹ️ Create hosts" 89 | for i in $(seq $NUM_SUBSYSTEMS); do 90 | NQN="nqn.2016-06.io.spdk:cnode0$i" 91 | docker compose run --rm nvmeof-cli --server-address $ip2 --server-port 5500 host add --subsystem $NQN --host-nqn ${NQN}host 92 | done 93 | 94 | echo "ℹ️ ℹ️ Create namespaces" 95 | for i in $(seq $NUM_SUBSYSTEMS); do 96 | NQN="nqn.2016-06.io.spdk:cnode0$i" 97 | for num in $(seq 3); do 98 | image_name="demo_image$(expr \( $num + 5 \) \* $i)" 99 | echo $image_name 100 | docker compose run --rm nvmeof-cli --server-address $ip2 --server-port 5500 namespace add --subsystem $NQN --rbd-pool rbd --rbd-image $image_name --size 10M --rbd-create-image --force 101 | done 102 | done 103 | 104 | test_listeners $ip1 $ip2 105 | 106 | ############################################################################################ 107 | 108 | echo "ℹ️ ℹ️ test passed" 109 | exit 0 110 | -------------------------------------------------------------------------------- /tests/test_grpc.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import time 3 | from control.server import GatewayServer 4 | from control.cli import main as cli 5 | from control.cephutils import CephUtils 6 | 7 | image = "mytestdevimage" 8 | pool = "rbd" 9 | subsystem_prefix = "nqn.2016-06.io.spdk:cnode" 10 | host_prefix = "nqn.2016-06.io.spdk:host" 11 | created_resource_count = 20 12 | subsys_list_count = 5 13 | 14 | 15 | def wait_for_string(caplog, str, timeout): 16 | for i in range(timeout): 17 | if str in caplog.text: 18 | return 19 | time.sleep(1) 20 | 21 | assert False, f"Couldn't find string \"{str}\" in {timeout} seconds" 22 | 23 | 24 | def create_resource_by_index(i): 25 | subsystem = f"{subsystem_prefix}{i}" 26 | cli(["subsystem", "add", "--subsystem", subsystem]) 27 | cli(["namespace", "add", "--subsystem", subsystem, "--rbd-pool", pool, "--rbd-image", image, 28 | "--size", "16MB", "--rbd-create-image", "--load-balancing-group", "1", 29 | "--force", "--no-auto-visible"]) 30 | 31 | 32 | def check_resource_by_index(i, caplog): 33 | subsystem = f"{subsystem_prefix}{i}" 34 | caplog.clear() 35 | cli(["--format", "plain", "subsystem", "list", "--subsystem", subsystem]) 36 | assert f"{subsystem}" in caplog.text 37 | caplog.clear() 38 | cli(["--format", "plain", "namespace", "list", "--subsystem", subsystem, "--nsid", "1"]) 39 | assert "No namespace" not in caplog.text 40 | assert "Failure listing namespaces:" not in caplog.text 41 | 42 | # We want to fail in case we got an exception about invalid data in pb2 functions but this is 43 | # just a warning for pytest. In order for the test to fail in such a case we need to ask pytest 44 | # to regard this as an error 45 | 46 | 47 | @pytest.mark.filterwarnings("error::pytest.PytestUnhandledThreadExceptionWarning") 48 | def test_create_get_subsys(caplog, config): 49 | config.config["gateway"]["group"] = "" 50 | ceph_utils = CephUtils(config) 51 | with GatewayServer(config) as gateway: 52 | ceph_utils.execute_ceph_monitor_command( 53 | "{" + f'"prefix":"nvme-gw create", "id": "{gateway.name}", ' 54 | f'"pool": "{pool}", "group": ""' + "}" 55 | ) 56 | gateway.serve() 57 | 58 | for i in range(created_resource_count): 59 | create_resource_by_index(i) 60 | assert "failed" not in caplog.text.lower().replace( 61 | "failed to notify", "").replace( 62 | "failed to delete reservation_key from image", "") 63 | assert "Failure" not in caplog.text 64 | 65 | assert f"{subsystem_prefix}0 with load balancing group id 1" in caplog.text 66 | 67 | caplog.clear() 68 | # add a listener 69 | cli(["listener", "add", "--subsystem", f"{subsystem_prefix}0", "--host-name", 70 | gateway.gateway_rpc.host_name, "--traddr", "127.0.0.1", "--trsvcid", "5001"]) 71 | assert f"Adding {subsystem_prefix}0 listener at 127.0.0.1:5001: Successful" in caplog.text 72 | 73 | # Set QOS for the first namespace 74 | cli(["namespace", "set_qos", "--subsystem", f"{subsystem_prefix}0", "--nsid", "1", 75 | "--rw-ios-per-second", "2000"]) 76 | assert f"Setting QOS limits of namespace 1 in " \ 77 | f"{subsystem_prefix}0: Successful" in caplog.text 78 | assert f"No previous QOS limits found, this is the first time the limits are set " \ 79 | f"for namespace 1 on {subsystem_prefix}0" in caplog.text 80 | caplog.clear() 81 | cli(["namespace", "set_qos", "--subsystem", f"{subsystem_prefix}0", "--nsid", "1", 82 | "--r-megabytes-per-second", "5"]) 83 | assert f"Setting QOS limits of namespace 1 in " \ 84 | f"{subsystem_prefix}0: Successful" in caplog.text 85 | assert f"No previous QOS limits found, this is the first time the limits are set " \ 86 | f"for namespace 1 on {subsystem_prefix}0" not in caplog.text 87 | 88 | # add host to the first namespace 89 | caplog.clear() 90 | cli(["namespace", "add_host", "--subsystem", f"{subsystem_prefix}0", 91 | "--nsid", "1", "--host-nqn", f"{host_prefix}0", "--force"]) 92 | assert "Failure adding host" not in caplog.text 93 | 94 | caplog.clear() 95 | 96 | # restart the gateway here 97 | with GatewayServer(config) as gateway: 98 | ceph_utils.execute_ceph_monitor_command( 99 | "{" + f'"prefix":"nvme-gw create", "id": "{gateway.name}", ' 100 | f'"pool": "{pool}", "group": ""' + "}" 101 | ) 102 | gateway.serve() 103 | 104 | # wait until we see at least one subsystem created 105 | wait_for_string(caplog, f"Received request to create subsystem {subsystem_prefix}", 60) 106 | 107 | for i in range(subsys_list_count): 108 | cli(["--format", "plain", "subsystem", "list"]) 109 | assert "Exception" not in caplog.text 110 | assert "No subsystems" not in caplog.text 111 | time.sleep(0.1) 112 | 113 | time.sleep(20) # Make sure update() is over 114 | assert f"{subsystem_prefix}0 with load balancing group id 1" in caplog.text 115 | assert f"Received request to set QOS limits for namespace 1 on " \ 116 | f"{subsystem_prefix}0, R/W IOs per second: 2000 " \ 117 | f"Read megabytes per second: 5" in caplog.text 118 | assert f"Received request to set QOS limits for namespace 1 on " \ 119 | f"{subsystem_prefix}0, R/W IOs per second: 2000 " \ 120 | f"Read megabytes per second: 5" in caplog.text 121 | assert f"Received request to add host {host_prefix}0 to namespace 1 on " \ 122 | f"{subsystem_prefix}0, force: True, context: None" in caplog.text 123 | caplog.clear() 124 | cli(["--format", "plain", "subsystem", "list"]) 125 | assert "Exception" not in caplog.text 126 | assert "No subsystems" not in caplog.text 127 | for i in range(created_resource_count): 128 | check_resource_by_index(i, caplog) 129 | --------------------------------------------------------------------------------