├── benchmark ├── __init__.py ├── nullbench.py ├── lis.py └── cephtestrados.py ├── cluster ├── __init__.py └── cluster.py ├── client_endpoints ├── __init__.py ├── cephfsfuse_client_endpoints.py ├── rbdnbd_client_endpoints.py ├── cephfskernel_client_endpoints.py ├── librbd_client_endpoints.py ├── rbdkernel_client_endpoints.py ├── rgws3_client_endpoints.py ├── rbdfuse_client_endpoints.py ├── client_endpoints.py ├── rbdtcmu_client_endpoints.py └── ceph_client_endpoints.py ├── .gitignore ├── post_processing ├── ___init___.py ├── plotter │ ├── __init__.py │ ├── README.md │ ├── simple_plotter.py │ ├── directory_comparison_plotter.py │ └── file_comparison_plotter.py ├── reports │ ├── __init__.py │ └── README.md ├── types.py ├── README.md └── formatter │ └── README.md ├── requirements.txt ├── run_tests.sh ├── docs ├── cluster.png ├── toplevel.png ├── workloads.png ├── benchmarks.png ├── cbt_utests_gen.png ├── cbt_utests_run.png ├── Workloads.md ├── TestPlanSchema.md └── AutomaticUnitTestGeneration.md ├── example ├── runme ├── wip-mark-testing │ ├── README │ ├── runtests.xfs.yaml │ └── ceph.conf ├── example-3x-radosbench.yaml ├── example-ec-radosbench.yaml ├── example-kvmrbdfio.yaml ├── example-raw.yaml ├── wip-cosbench │ ├── cosbench_ex.yaml │ └── cosbench_ex_ceph.conf ├── example-3x-radosbench-crimson.yaml ├── example-hsbench.yaml ├── bluestore │ ├── runtests.bluestore_example.yaml │ └── mkpart_hdd_nvme_bs.sh ├── example-client_endpoints.yaml └── rbd_fio_test.yml ├── tools ├── crimson │ ├── example_picture.png │ ├── crimson_auto_bench_example.png │ ├── fio_config.yaml │ ├── bench_config.yaml │ ├── seastore_radosbench_run.sh │ ├── seastore_fio_run.sh │ └── seastore_metrics_run.sh ├── fio_visualize_data │ ├── fioplotcommon.py │ └── fio-plot-stats-usage.rst ├── compare_sysctl.py ├── test_bm_template.py ├── mkpartmagna.sh ├── fio_objectstore_tools │ ├── hdd-runs.json │ ├── nvme-runs.json │ ├── bluestore_throttle_tuning.rst │ ├── analyze.py │ └── summarize.py ├── fio-parse-jsons │ └── README.md ├── invariant.yaml ├── fio_common_output_wrapper.py ├── fio-parse-json.py ├── makecephconf.py ├── generate_performance_report.py ├── generate_comparison_performance_report.py ├── config_wizard.py ├── is-regression.py └── serialise_benchmark.py ├── include ├── html │ └── table.html ├── css │ └── table.css ├── performance_report.tex └── js │ └── table.js ├── COPYING ├── tox.ini ├── setup.sh ├── parsing ├── htmlgenerator.py ├── database.py └── test.py ├── tests ├── test_benchmarkfactory.py ├── test_common.py └── test_bm_nullbench.py ├── client_endpoints_factory.py ├── benchmarkfactory.py ├── log_support.py ├── cbt.py ├── compare.py ├── plot_results.py ├── settings.py └── statistic.py /benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cluster/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /client_endpoints/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyo 3 | -------------------------------------------------------------------------------- /post_processing/___init___.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /post_processing/plotter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /post_processing/reports/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pyyaml 2 | lxml 3 | matplotlib 4 | -------------------------------------------------------------------------------- /run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | set -o pipefail 4 | 5 | tox 6 | -------------------------------------------------------------------------------- /docs/cluster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceph/cbt/master/docs/cluster.png -------------------------------------------------------------------------------- /docs/toplevel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceph/cbt/master/docs/toplevel.png -------------------------------------------------------------------------------- /docs/workloads.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceph/cbt/master/docs/workloads.png -------------------------------------------------------------------------------- /docs/benchmarks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceph/cbt/master/docs/benchmarks.png -------------------------------------------------------------------------------- /example/runme: -------------------------------------------------------------------------------- 1 | ../cbt.py --archive=/home/ubuntu/data/foo ./runtests.xfs.yaml 2 | 3 | -------------------------------------------------------------------------------- /docs/cbt_utests_gen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceph/cbt/master/docs/cbt_utests_gen.png -------------------------------------------------------------------------------- /docs/cbt_utests_run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceph/cbt/master/docs/cbt_utests_run.png -------------------------------------------------------------------------------- /tools/crimson/example_picture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceph/cbt/master/tools/crimson/example_picture.png -------------------------------------------------------------------------------- /tools/crimson/crimson_auto_bench_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ceph/cbt/master/tools/crimson/crimson_auto_bench_example.png -------------------------------------------------------------------------------- /include/html/table.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |
8 | 9 | -------------------------------------------------------------------------------- /include/css/table.css: -------------------------------------------------------------------------------- 1 | #view > tbody div { 2 | /* height:50; */ 3 | } 4 | 5 | th { 6 | background-color:blue; 7 | color:white 8 | } 9 | 10 | td { 11 | border: 1px solid lightgrey; 12 | background-color:#ccc; 13 | width:200px; 14 | } 15 | -------------------------------------------------------------------------------- /example/wip-mark-testing/README: -------------------------------------------------------------------------------- 1 | This directory contains the current set of tests that are run to verify 2 | that new PRs don't break CBT. It's very incomplete at the moment, but 3 | a start! It also may serve as one of the examples for how to run basic 4 | tests with CBT. 5 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ 2 | Upstream-Contact: mnelson@redhat.com 3 | License: LGPL2.1 or later (see COPYING-LGPL2.1) 4 | 5 | Files: * 6 | Copyright: (c) 2013-2015 Red Hat, Inc. 7 | License: LGPL2.1 (see COPYING-LGPL2.1) 8 | -------------------------------------------------------------------------------- /tools/crimson/fio_config.yaml: -------------------------------------------------------------------------------- 1 | alias: eg.0.classic 2 | fio_rbd_rand_write: 1 3 | client: 1 1 4 | block_size: 4K 5 | osd_cores: 1 2 6 | time: 5 7 | tolerance_time: 5 8 | retry_limit: 2 9 | store: bluestore 10 | iostat: True 11 | --- 12 | alias: eg.1.crimson 13 | fio_rbd_rand_write: 1 14 | client: 1 1 15 | block_size: 4K 16 | osd_cores: 1 2 17 | time: 5 18 | tolerance_time: 5 19 | retry_limit: 2 20 | crimson: True 21 | store: bluestore 22 | iostat: True 23 | -------------------------------------------------------------------------------- /include/performance_report.tex: -------------------------------------------------------------------------------- 1 | \usepackage[margin=1.5cm]{geometry} 2 | \usepackage{titlesec} 3 | \usepackage{fancyhdr} 4 | \newcommand{\sectionbreak}{\newpage} 5 | \fancypagestyle{plain}{ 6 | \fancyhf{} 7 | \renewcommand{\headrulewidth}{0pt} 8 | \renewcommand{\footrulewidth}{0pt} 9 | \fancyhead[R]{\nouppercase{\rightmark}} 10 | \fancyhead[L]{\nouppercase{\leftmark}} 11 | \fancyfoot[C]{\thepage} 12 | \fancyfoot[L]{BUILD} 13 | \fancyfoot[R]{\today} 14 | } 15 | \pagestyle{plain} -------------------------------------------------------------------------------- /tools/crimson/bench_config.yaml: -------------------------------------------------------------------------------- 1 | # this is an example for the config file of crimson_auto_bench 2 | 3 | alias: eg.0.classic 4 | rand_write: 1 5 | client: 1 2 6 | block_size: 4K 7 | osd_cores: 1 2 8 | time: 5 9 | tolerance_time: 10 10 | retry_limit: 2 11 | iostat: True 12 | --- 13 | alias: eg.1.crimson 14 | rand_write: 1 15 | client: 1 2 16 | block_size: 4K 17 | osd_cores: 1 2 18 | time: 5 19 | tolerance_time: 10 20 | retry_limit: 2 21 | crimson: True 22 | store: bluestore 23 | iostat: True 24 | -------------------------------------------------------------------------------- /tools/fio_visualize_data/fioplotcommon.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import re 4 | 5 | def sort_map_data_by_key(data): 6 | sorteddata = {} 7 | # Sort data dictionary based on key 8 | convert = lambda text: int(text) if text.isdigit() else text 9 | alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] 10 | sorted_keys = sorted(data.keys(), key=alphanum_key) 11 | for key in sorted_keys: 12 | sorteddata[key] = data[key] 13 | return sorteddata 14 | 15 | -------------------------------------------------------------------------------- /benchmark/nullbench.py: -------------------------------------------------------------------------------- 1 | from .benchmark import Benchmark 2 | 3 | 4 | class Nullbench(Benchmark): 5 | 6 | def __init__(self, archive_dir, cluster, config): 7 | super(Nullbench, self).__init__(archive_dir, cluster, config) 8 | 9 | def initialize(self): 10 | super(Nullbench, self).initialize() 11 | 12 | def run(self): 13 | super(Nullbench, self).run() 14 | 15 | def recovery_callback(self): 16 | pass 17 | 18 | def __str__(self): 19 | super(Nullbench, self).__str__() 20 | -------------------------------------------------------------------------------- /client_endpoints/cephfsfuse_client_endpoints.py: -------------------------------------------------------------------------------- 1 | import common 2 | 3 | from .ceph_client_endpoints import CephClientEndpoints 4 | 5 | class CephfsFuseClientEndpoints(CephClientEndpoints): 6 | def create(self): 7 | self.create_fs() 8 | 9 | def mount(self): 10 | self.mount_fs() 11 | 12 | def mount_fs_helper(self, node, dir_name): 13 | cmd = 'sudo %s -c %s --client_mds_namespace=%s %s' % (self.ceph_fuse_cmd, self.tmp_conf, self.name, dir_name) 14 | common.pdsh(node, cmd, continue_if_error=False).communicate() 15 | 16 | def create_recovery_image(self): 17 | self.create_rbd_recovery() 18 | -------------------------------------------------------------------------------- /client_endpoints/rbdnbd_client_endpoints.py: -------------------------------------------------------------------------------- 1 | import common 2 | 3 | from .ceph_client_endpoints import CephClientEndpoints 4 | 5 | class RbdNbdClientEndpoints(CephClientEndpoints): 6 | def create(self): 7 | self.create_rbd() 8 | 9 | def mount(self): 10 | self.mount_rbd() 11 | 12 | def map_rbd(self, node, rbd_name): 13 | cmd = 'sudo %s map %s/%s' % (self.rbd_nbd_cmd, self.pool, rbd_name) 14 | stdout, stderr = common.pdsh(node, cmd, continue_if_error=False).communicate() 15 | return stdout.rstrip().rpartition(": ")[2] 16 | 17 | def create_recovery_image(self): 18 | self.create_rbd_recovery() 19 | -------------------------------------------------------------------------------- /client_endpoints/cephfskernel_client_endpoints.py: -------------------------------------------------------------------------------- 1 | import common 2 | 3 | from .ceph_client_endpoints import CephClientEndpoints 4 | 5 | class CephfsKernelClientEndpoints(CephClientEndpoints): 6 | def create(self): 7 | self.create_fs() 8 | 9 | def mount(self): 10 | self.mount_fs() 11 | 12 | def mount_fs_helper(self, node, dir_name): 13 | cmd = 'sudo %s %s:/ %s -o name=admin,secretfile=%s,mds_namespace=%s' % (self.mount_cmd, ','.join(self.mon_addrs), dir_name, self.client_secret, self.name) 14 | common.pdsh(node, cmd, continue_if_error=False).communicate() 15 | 16 | def create_recovery_image(self): 17 | self.create_rbd_recovery() 18 | -------------------------------------------------------------------------------- /post_processing/types.py: -------------------------------------------------------------------------------- 1 | """ 2 | A file to contain common type definitions for use in the post-processing 3 | """ 4 | 5 | from typing import Union 6 | 7 | # FIO json file data types 8 | JOBS_DATA_TYPE = list[dict[str, Union[str, dict[str, Union[int, float, dict[str, Union[int, float]]]]]]] 9 | 10 | # Common formatter data types 11 | IODEPTH_DETAILS_TYPE = dict[str, str] 12 | COMMON_FORMAT_FILE_DATA_TYPE = dict[str, Union[str, IODEPTH_DETAILS_TYPE]] 13 | 14 | # Common formatter internal data types 15 | INTERNAL_BLOCKSIZE_DATA_TYPE = dict[str, COMMON_FORMAT_FILE_DATA_TYPE] 16 | INTERNAL_FORMATTED_OUTPUT_TYPE = dict[str, INTERNAL_BLOCKSIZE_DATA_TYPE] 17 | 18 | # Plotter types 19 | PLOT_DATA_TYPE = dict[str, dict[str, str]] 20 | -------------------------------------------------------------------------------- /cluster/cluster.py: -------------------------------------------------------------------------------- 1 | class Cluster(object): 2 | def __init__(self, config): 3 | self.config = config 4 | base_tmp = config.get('tmp_dir', '/tmp/cbt') 5 | self.mnt_dir = config.get('mnt_dir', "%s/%s" % (base_tmp, 'mnt')) 6 | self.tmp_dir = "%s/%s" % (base_tmp, config.get('clusterid')) 7 | self.archive_dir = "%s/%s" % (config.get('archive_dir'), config.get('clusterid')) 8 | self.tmp_conf = config.get('tmp_conf', '/tmp/cbt') 9 | 10 | def get_mnt_dir(self): 11 | return self.mnt_dir 12 | 13 | def getclass(self): 14 | return self.__class__.__name__ 15 | 16 | def initialize(self): 17 | pass 18 | 19 | def cleanup(self): 20 | pass 21 | 22 | def __str__(self): 23 | return str(self.config) 24 | -------------------------------------------------------------------------------- /client_endpoints/librbd_client_endpoints.py: -------------------------------------------------------------------------------- 1 | import common 2 | 3 | from .ceph_client_endpoints import CephClientEndpoints 4 | 5 | class LibrbdClientEndpoints(CephClientEndpoints): 6 | def __init__(self, cluster, config): 7 | super(LibrbdClientEndpoints, self).__init__(cluster, config) 8 | 9 | def create(self): 10 | self.create_rbd() 11 | 12 | def mount(self): 13 | # Don't mount anything, just set the endpoints to the pool/rbd names 14 | for ep_num in range(0, self.endpoints_per_client): 15 | rbd_name = self.get_local_rbd_name(ep_num) 16 | self.endpoints.append("%s/%s" % (self.pool, rbd_name)) 17 | self.endpoint_type = "rbd" 18 | return self.get_endpoints() 19 | 20 | def create_recovery_image(self): 21 | self.create_rbd_recovery() 22 | -------------------------------------------------------------------------------- /example/example-3x-radosbench.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | head: "ceph@head" 3 | clients: ["ceph@client"] 4 | osds: ["ceph@osd"] 5 | mons: ["ceph@mon"] 6 | osds_per_node: 1 7 | fs: xfs 8 | mkfs_opts: -f -i size=2048 9 | mount_opts: -o inode64,noatime,logbsize=256k 10 | conf_file: /home/ceph/ceph-tools/cbt/example/ceph.conf 11 | ceph.conf: /home/ceph/ceph-tools/cbt/example/ceph.conf 12 | iterations: 3 13 | rebuild_every_test: False 14 | tmp_dir: "/tmp/cbt" 15 | pool_profiles: 16 | replicated: 17 | pg_size: 4096 18 | pgp_size: 4096 19 | replication: 'replicated' 20 | benchmarks: 21 | radosbench: 22 | op_size: [ 4194304, 524288, 4096 ] 23 | write_only: False 24 | time: 300 25 | concurrent_ops: [ 128 ] 26 | concurrent_procs: 1 27 | use_existing: True 28 | pool_profile: replicated 29 | -------------------------------------------------------------------------------- /example/example-ec-radosbench.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | head: "ceph@head" 3 | clients: ["ceph@client"] 4 | osds: ["ceph@osd"] 5 | mons: ["ceph@mon"] 6 | osds_per_node: 1 7 | fs: xfs 8 | mkfs_opts: -f -i size=2048 9 | mount_opts: -o inode64,noatime,logbsize=256k 10 | conf_file: /home/ceph/ceph-tools/cbt/example/ceph.conf 11 | ceph.conf: /home/ceph/ceph-tools/cbt/example/ceph.conf 12 | iterations: 3 13 | rebuild_every_test: False 14 | tmp_dir: "/tmp/cbt" 15 | pool_profiles: 16 | erasure: 17 | pg_size: 4096 18 | pgp_size: 4096 19 | replication: 'erasure' 20 | erasure_profile: 'myec' 21 | benchmarks: 22 | radosbench: 23 | op_size: [ 4194304, 524288, 4096 ] 24 | write_only: False 25 | time: 300 26 | concurrent_ops: [ 128 ] 27 | concurrent_procs: 1 28 | use_existing: True 29 | pool_profile: erasure 30 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | minversion = 1.6 3 | skipsdist = True 4 | envlist = py3,pep8 5 | 6 | [testenv] 7 | usedevelop = True 8 | install_command = pip install -U {opts} {packages} 9 | setenv = VIRTUAL_ENV={envdir} 10 | # deps = -r{toxinidir}/test-requirements.txt 11 | commands = 12 | py.test {posargs:tests} 13 | 14 | [tox:jenkins] 15 | downloadcache = ~/cache/pip 16 | 17 | [testenv:pep8] 18 | usedevelop = False 19 | deps = flake8 20 | commands = 21 | flake8 --config=tox.ini {posargs:.} 22 | 23 | [testenv:cover] 24 | setenv = NOSE_WITH_COVERAGE=1 25 | 26 | [testenv:venv] 27 | deps = -r{toxinidir}/requirements.txt 28 | commands = {posargs:} 29 | 30 | [testenv:devenv] 31 | envdir = devenv 32 | usedevelop = True 33 | 34 | [flake8] 35 | ignore = E501 36 | exclude = .venv,.git,.tox,dist,doc,*lib/python*,*egg,build,tools,__init__.py,docs 37 | show-pep8 = True 38 | # show-source = True 39 | statistics = True 40 | 41 | [hacking] 42 | import_exceptions = testtools.matchers 43 | -------------------------------------------------------------------------------- /client_endpoints/rbdkernel_client_endpoints.py: -------------------------------------------------------------------------------- 1 | import common 2 | 3 | from .ceph_client_endpoints import CephClientEndpoints 4 | 5 | class RbdKernelClientEndpoints(CephClientEndpoints): 6 | def __init__(self, cluster,config): 7 | super(RbdKernelClientEndpoints, self).__init__(cluster, config) 8 | 9 | # Kernel RBD breaks if certain features are disabled 10 | self.disabled_features = config.get('disabled_features', 'deep-flatten,fast-diff,object-map') 11 | 12 | def create(self): 13 | self.create_rbd() 14 | 15 | def mount(self): 16 | self.mount_rbd() 17 | 18 | def map_rbd(self, node, rbd_name): 19 | cmd = 'sudo %s map %s/%s --id admin --options noshare' % (self.rbd_cmd, self.pool, rbd_name) 20 | stdout, stderr = common.pdsh(node, cmd, continue_if_error=False).communicate() 21 | return stdout.rstrip().rpartition(": ")[2] 22 | 23 | def create_recovery_image(self): 24 | self.create_rbd_recovery() 25 | -------------------------------------------------------------------------------- /client_endpoints/rgws3_client_endpoints.py: -------------------------------------------------------------------------------- 1 | from .ceph_client_endpoints import CephClientEndpoints 2 | 3 | 4 | class RgwS3ClientEndpoints(CephClientEndpoints): 5 | def __init__(self, cluster, config): 6 | super(RgwS3ClientEndpoints, self).__init__(cluster, config) 7 | 8 | def create(self): 9 | self.access_key = self.config.get('access_key', '03VIHOWDVK3Z0VSCXBNH') 10 | self.secret_key = self.config.get('secret_key', 'KTTxQIIJV3uNox21vcqxWIpHMUOApWVWsJKdHwgG') 11 | self.user = self.config.get('user', 'cbt') 12 | self.cluster.add_s3_user(self.user, self.access_key, self.secret_key) 13 | 14 | def mount(self): 15 | # Don't actually mount anything, just set the endpoints 16 | urls = self.config.get('urls', self.cluster.get_urls()) 17 | for ep_num in range(0, self.endpoints_per_client): 18 | url = urls[ep_num % len(urls)] 19 | self.endpoints.append({"url": url, "access_key": self.access_key, "secret_key": self.secret_key}) 20 | self.endpoint_type = "s3" 21 | return self.get_endpoints() 22 | -------------------------------------------------------------------------------- /example/example-kvmrbdfio.yaml: -------------------------------------------------------------------------------- 1 | # this example lets you run kvmrbdfio.py benchmark 2 | # inside a single-host Ceph cluster on a virtual machine, 3 | # using a kernel RBD device as a simulated virtual disk 4 | # of course, the storage pool for the /dev/rbd1 must 5 | # be replicated using a crush rule like: 6 | # # ceph osd crush rule create-simple too-few-hosts myvm osd 7 | # and then you create the storage pool using 8 | # # ceph osd pool create mypool 32 32 too-few-hosts 9 | 10 | cluster: 11 | use_existing: True 12 | head: "myvm" 13 | clients: [ "^../vms.list" ] 14 | osds: ["myvm"] 15 | mons: ["myvm"] 16 | iterations: 2 17 | rebuild_every_test: False 18 | tmp_dir: "/tmp/cbt" 19 | pool_profiles: 20 | replicated: 21 | pg_size: 64 22 | pgp_size: 64 23 | replication: 3 24 | crush_profile: 1 25 | benchmarks: 26 | kvmrbdfio: 27 | fio_cmd: /usr/local/bin/fio 28 | time: 60 29 | ramp: 20 30 | startdelay: 10 31 | rate_iops: 2 32 | iodepth: [2] 33 | numjobs: 1 34 | block_devices: /dev/rbd1 35 | mode: randwrite 36 | # rwmixread: 20 37 | op_size: 4096 38 | vol_size: 64 39 | 40 | -------------------------------------------------------------------------------- /tools/crimson/seastore_radosbench_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | TOP_DIR=$(cd $(dirname "$0") && pwd) 4 | 5 | # configurations 6 | RESULT_DIR="$TOP_DIR/results" 7 | BUILD_DIR="~/ceph/build/" 8 | POOL_NAME="test-pool" 9 | TOTAL_ROUND=10 10 | BENCH_SECONDS=1 11 | 12 | # Note: currently only support single OSD to measure write amplification 13 | # correctly. 14 | if [ -e $RESULT_DIR ]; then 15 | echo "'$RESULT_DIR' dir already exists, remove it or select a different one" 16 | exit 1 17 | fi 18 | 19 | mkdir -p $RESULT_DIR 20 | cd $BUILD_DIR 21 | CURRENT_ROUND=0 22 | TARGET_ROUND=$(( CURRENT_ROUND + TOTAL_ROUND )) 23 | 24 | CEPH_DEV=1 ./bin/ceph tell osd.0 dump_metrics 2>&1 | tee $RESULT_DIR/result_${CURRENT_ROUND}_metrics.log 25 | while [ $CURRENT_ROUND -lt $TARGET_ROUND ] 26 | do 27 | (( ++CURRENT_ROUND )) 28 | echo "start round $CURRENT_ROUND ..." 29 | CEPH_DEV=1 ./bin/rados bench -p $POOL_NAME $BENCH_SECONDS write -b 4096 --no-cleanup 2>&1 | tee $RESULT_DIR/result_${CURRENT_ROUND}_bench.log 30 | CEPH_DEV=1 ./bin/ceph tell osd.0 dump_metrics 2>&1 | tee $RESULT_DIR/result_${CURRENT_ROUND}_metrics.log 31 | echo "finish round $CURRENT_ROUND" 32 | echo 33 | sleep 2 34 | done 35 | echo "done!" 36 | cd $TOP_DIR 37 | -------------------------------------------------------------------------------- /example/example-raw.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | use_existing: True 3 | osds_per_node: 24 4 | clients: [192.168.122.249] 5 | iterations: 2 6 | 7 | benchmarks: 8 | rawfio: 9 | iterations: 2 10 | time: 60 11 | ramp: '0' 12 | # iodepth: [16, 32, 64] 13 | iodepth: [16] 14 | numjobs: 1 15 | mode: [ write] 16 | ioengine: libaio 17 | # Block Size 18 | op_size: [4096000] 19 | # size o volume test 20 | vol_size: 1024 21 | direct: 0 22 | # Readahead settings 23 | client_ra: 128 24 | # Use directory from / if you set to False the script will format client_dev 25 | use_dir: False 26 | # When use_dir is true, we'r using the directory to make tests 27 | client_dir: '/mnt' 28 | # When use_dir is False we need a device to format and mount before make tests 29 | client_dev: '/dev/vdb' 30 | # Make filesyste when we use client_dev and use_dir is False 31 | client_mkfs: True 32 | # What is the filesystem 33 | client_fs: xfs 34 | concurrent_procs: 1 35 | fio_cmd: '/usr/bin/fio' 36 | block_devices: [/dev/vdb] 37 | # block_devices: [/dev/sda, /dev/sdb, /dev/sdc, /dev/sdd, /dev/sde, /dev/sdf, /dev/sdg, /dev/sdh, /dev/sdi, /dev/sdj, /dev/sdk, /dev/sdl, /dev/sdm, /dev/sdn] 38 | 39 | -------------------------------------------------------------------------------- /tools/compare_sysctl.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import argparse 4 | 5 | def parse_args(): 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument("FILE", help="sysctl output files to parse", nargs="+") 8 | args = parser.parse_args() 9 | 10 | return args 11 | 12 | def compare_items(foo, files): 13 | # Write the header 14 | print('"Attribute",', end=' ') 15 | for fn in files: 16 | print(('"%s",' % fn), end=' ') 17 | print() 18 | 19 | for attribute,items in sorted(foo.items()): 20 | if len(items) < len(files) or not all_same(list(items.values())): 21 | print('"%s",' % attribute, end=' ') 22 | for fn in files: 23 | if fn in items: 24 | print(('"%s",' % items[fn]), end=' ') 25 | else: 26 | print('"",', end=' ') 27 | print() 28 | 29 | def all_same(items): 30 | return all(x == items[0] for x in items) 31 | 32 | if __name__ == '__main__': 33 | kvdict = {} 34 | ctx = parse_args() 35 | for fn in ctx.FILE: 36 | f = open(fn, 'r') 37 | for line in f: 38 | (key, value) = line.rstrip('\r\n').rsplit(' = ') 39 | kvdict.setdefault(key, {}).update({fn: value}) 40 | compare_items(kvdict, ctx.FILE) 41 | 42 | -------------------------------------------------------------------------------- /tools/test_bm_template.py: -------------------------------------------------------------------------------- 1 | """ Unit tests for the BenchmarkX class """ 2 | 3 | import unittest 4 | import hashlib 5 | import json 6 | import benchmarkfactory 7 | import settings 8 | from cluster.ceph import Ceph 9 | 10 | 11 | class TestBenchmarkX(unittest.TestCase): 12 | """ Sanity tests for BenchmarkX """ 13 | archive_dir = "/tmp" 14 | iteration = {'acceptable': [1,2,3], 'iteration': 0} 15 | cluster = {} 16 | cl_name = "tools/invariant.yaml" 17 | bl_name = "tools/baseline.json" 18 | bl_json = {} 19 | bl_md5 = 'MD5SUMNone' 20 | md5_returned = None 21 | 22 | @classmethod 23 | def setUpClass(cls): 24 | with open(cls.bl_name, 'rb') as f: 25 | data = f.read() 26 | f.close() 27 | cls.md5_returned = hashlib.md5(data).hexdigest() 28 | settings.mock_initialize(config_file=cls.cl_name) 29 | cls.cluster = Ceph.mockinit(settings.cluster) 30 | with open(cls.bl_name, 'r') as f: 31 | cls.bl_json = json.load(f) 32 | f.close() 33 | 34 | @classmethod 35 | def tearDownClass(cls): 36 | cls.cluster = None 37 | cls.bl_json = None 38 | 39 | def test_valid_baseline(self): 40 | """ Verify the baseline has not been compromised """ 41 | self.assertEqual( self.bl_md5, str(self.md5_returned) ) 42 | -------------------------------------------------------------------------------- /client_endpoints/rbdfuse_client_endpoints.py: -------------------------------------------------------------------------------- 1 | import common 2 | import logging 3 | 4 | from .ceph_client_endpoints import CephClientEndpoints 5 | 6 | logger = logging.getLogger("cbt") 7 | 8 | 9 | class RbdFuseClientEndpoints(CephClientEndpoints): 10 | def _init__(self, config, cluster): 11 | super().__init__(cluster, config) 12 | 13 | def create(self): 14 | self.create_rbd() 15 | 16 | def mount(self): 17 | self.mount_rbd() 18 | 19 | def map_rbd(self, node, rbd_name): 20 | fuse_dir = '%s/%s-fuse' % (self.mnt_dir, self.name) 21 | 22 | # Check to make sure that fuse is not already mapped. 23 | stdout, stderr = common.pdsh(node, 'sudo ps aux | grep %s' % self.rbd_fuse_cmd, continue_if_error=False).communicate() 24 | if fuse_dir in stdout: 25 | raise ValueError('RBD-Fuse was already mapped at %s!' % fuse_dir) 26 | common.pdsh(node, 'sudo mkdir -p -m0755 -- %s' % fuse_dir, continue_if_error=False).communicate() 27 | common.pdsh(node, 'sudo %s %s -p %s' % (self.rbd_fuse_cmd, fuse_dir, self.pool), continue_if_error=False).communicate() 28 | logger.info('Mapped RBD-Fuse pool %s to %s' % (self.pool, fuse_dir)) 29 | 30 | return '%s/%s' % (fuse_dir, rbd_name) 31 | 32 | def create_recovery_image(self): 33 | self.create_rbd_recovery() 34 | -------------------------------------------------------------------------------- /example/wip-cosbench/cosbench_ex.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | user: 'cbt' 3 | head: "cadmin" 4 | clients: ["cadmin"] 5 | osds: ["inf1", "inf2", "inf3"] 6 | mons: 7 | inf1: 8 | a: "192.168.110.51:6789" 9 | inf2: 10 | b: "192.168.110.52:6789" 11 | inf3: 12 | c: "192.168.110.53:6789" 13 | rgws: ["inf1", "inf2", "inf3"] 14 | osds_per_node: 1 15 | fs: 'xfs' 16 | mkfs_opts: '-f -i size=2048' 17 | mount_opts: '-o inode64,noatime,logbsize=256k' 18 | conf_file: '/home/cbt/cbt/runs/test2.ceph.conf' 19 | iterations: 1 20 | use_existing: True 21 | clusterid: "ceph" 22 | tmp_dir: "/tmp/cbt" 23 | pool_profiles: 24 | rbd: 25 | pg_size: 64 26 | pgp_size: 64 27 | replication: 2 28 | benchmarks: 29 | cosbench: 30 | obj_size: [64KB] 31 | osd_ra: [4096] 32 | workers: 1 33 | containers_max: 10 34 | objects_max: 100 35 | mode: [write] 36 | cosbench_dir: '/home/cbt/cb/0.4.1.0' 37 | cosbench_xml_dir: '/home/cbt/cb/xml_dir' 38 | controller: "cadmin" 39 | auth: 40 | config: username=cosbench:operator;password=intel2012;url=http://192.168.110.51:7480/auth/v1.0;retry=9 41 | template: [default] 42 | rampup: 10 43 | runtime: 100 44 | rampdown: 10 45 | containers: ["u(1,100)"] 46 | objects: ["u(1,100)"] 47 | ratio: [100] 48 | -------------------------------------------------------------------------------- /client_endpoints/client_endpoints.py: -------------------------------------------------------------------------------- 1 | class ClientEndpoints(object): 2 | def __init__(self, cluster, config): 3 | self.config = config 4 | self.cluster = cluster 5 | self.driver = self.config.get('driver', None) 6 | self.name = 'cbt-%s' % self.driver 7 | self.mnt_dir = cluster.mnt_dir 8 | self.endpoint_size = self.config.get('endpoint_size', 4096) 9 | self.endpoint_type = None 10 | self.endpoints_per_client = self.config.get('endpoints_per_client', 1) 11 | self.endpoints = [] 12 | self.initialized = False 13 | 14 | def initialize(self): 15 | self.create() 16 | self.mount() 17 | self.initialized = True 18 | 19 | def get_initialized(self): 20 | return self.initialized 21 | 22 | def get_endpoints(self): 23 | return self.endpoints 24 | 25 | def get_endpoint_type(self): 26 | return self.endpoint_type 27 | 28 | def get_endpoints_per_client(self): 29 | return self.endpoints_per_client 30 | 31 | def get_endpoint_size(self): 32 | return self.endpoint_size 33 | 34 | def create(self): 35 | pass 36 | 37 | def mount(self): 38 | pass 39 | 40 | def umount(self): 41 | pass 42 | 43 | def remove(self): 44 | pass 45 | 46 | def create_recovery_image(self): 47 | pass 48 | -------------------------------------------------------------------------------- /client_endpoints/rbdtcmu_client_endpoints.py: -------------------------------------------------------------------------------- 1 | import common 2 | 3 | from .ceph_client_endpoints import CephClientEndpoints 4 | 5 | 6 | class RbdTcmuClientEndpoints(CephClientEndpoints): 7 | def create(self): 8 | self.create_rbd() 9 | 10 | def mount(self): 11 | self.mount_rbd() 12 | 13 | def map_rbd(self, node, rbd_name): 14 | common.pdsh(node, f'sudo targetcli /backstores/user:rbd create cfgstring={self.pool}/{rbd_name} name={rbd_name} size={self.endpoint_size}M', 15 | continue_if_error=False).communicate() 16 | stdout, stderr = common.pdsh(node, f'sudo targetcli /loopback create', continue_if_error=False).communicate() 17 | wwn = stdout.rstrip().rpartition(": ")[2].rpartition(" ")[2][:-1] 18 | common.pdsh(node, f'sudo targetcli /loopback/{wwn}/luns create /backstores/user:rbd/{rbd_name}', continue_if_error=False).communicate() 19 | stdout, stderr = common.pdsh(node, f'cat /sys/kernel/config/target/loopback/{wwn}/tpgt_1/address', continue_if_error=False).communicate() 20 | address = stdout.rstrip().rpartition(": ")[2] 21 | stdout, stderr = common.pdsh(node, f'ls /sys/class/scsi_disk/{address}:0/device/block', continue_if_error=False).communicate() 22 | return '/dev/%s' % stdout.rstrip().rpartition(": ")[2] 23 | 24 | def create_recovery_image(self): 25 | self.create_rbd_recovery() 26 | -------------------------------------------------------------------------------- /tools/mkpartmagna.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SYSPART=`df | grep "/$" | cut -d" " -f1 | cut -d"/" -f3` 4 | if [[ $SYSPART=="mapper" ]] 5 | then 6 | echo "System disk is on an LVM - determining underlying block device..." 7 | SYSPART=`pvscan | grep -i root | awk -F " " '{print $2}' | awk -F "/" '{print $3}' | cut -c1,2,3` 8 | fi 9 | diskid='wwn' 10 | echo "System on $SYSPART" 11 | 12 | failed() 13 | { 14 | sleep 2 # Wait for the kernel to stop whining 15 | echo "Hrm, that didn't work. Calling for help." 16 | # sudo ipmitool chassis identify force 17 | echo "RAID Config failed: ${1}" 18 | while [ 1 ]; do sleep 10; done 19 | exit 1; 20 | } 21 | 22 | fakefailed() 23 | { 24 | echo "ignoring megacli errors and forging on: ${1}" 25 | } 26 | 27 | echo "Making label on OSD devices" 28 | 29 | # Data 30 | i=0 31 | for DEV in `ls -al /dev/disk/by-id | grep $diskid | grep -v part | cut -f3 -d"/" | tr '\n' ' '` 32 | do 33 | if [[ ! $SYSPART =~ $DEV ]] 34 | then 35 | sudo parted -s -a optimal /dev/$DEV mklabel gpt || failed "mklabel $DEV" 36 | echo "Creating osd device $i data label" 37 | echo "sudo parted -s -a optimal /dev/$DEV mkpart osd-device-$i-data $sp% $ep%" 38 | sudo parted -s -a optimal /dev/$DEV mkpart osd-device-$i-journal 0% 1000M || failed "mkpart $i-journal" 39 | sudo parted -s -a optimal /dev/$DEV mkpart osd-device-$i-data 1000M 100% || failed "mkpart $i-data" 40 | let "i++" 41 | fi 42 | done 43 | -------------------------------------------------------------------------------- /tools/crimson/seastore_fio_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | TOP_DIR=$(cd $(dirname "$0") && pwd) 4 | 5 | # configurations 6 | RESULT_DIR="$TOP_DIR/results" 7 | BUILD_DIR="~/ceph/build/" 8 | FIO_CONF="~/ceph/rbd_write.fio" 9 | POOL_NAME="rbd" 10 | POOL_NUM=128 11 | TOTAL_ROUND=3 12 | IMAG_NAME="fio_test" 13 | 14 | # Note: currently only support single OSD to measure write amplification 15 | # correctly. 16 | if [ -e $RESULT_DIR ]; then 17 | echo "'$RESULT_DIR' dir already exists, remove it or select a different one" 18 | exit 1 19 | fi 20 | 21 | mkdir -p $RESULT_DIR 22 | cd $BUILD_DIR 23 | CURRENT_ROUND=0 24 | TARGET_ROUND=$(( CURRENT_ROUND + TOTAL_ROUND )) 25 | 26 | CEPH_DEV=1 ./bin/ceph osd pool create $POOL_NAME $POOL_NUM $POOL_NUM 27 | CEPH_DEV=1 ./bin/ceph osd pool set --yes-i-really-mean-it $POOL_NAME size 1 && ./bin/ceph osd pool --yes-i-really-mean-it set $POOL_NAME min_size 1 28 | CEPH_DEV=1 ./bin/rbd create $IMAG_NAME --size 2G --image-format=2 --rbd_default_features=3 29 | 30 | CEPH_DEV=1 ./bin/ceph tell osd.0 dump_metrics 2>&1 | tee $RESULT_DIR/result_${CURRENT_ROUND}_metrics.log 31 | while [ $CURRENT_ROUND -lt $TARGET_ROUND ] 32 | do 33 | (( ++CURRENT_ROUND )) 34 | echo "start round $CURRENT_ROUND ..." 35 | CEPH_DEV=1 fio $FIO_CONF --output=$RESULT_DIR/result_${CURRENT_ROUND}_bench.log 36 | CEPH_DEV=1 ./bin/ceph tell osd.0 dump_metrics 2>&1 | tee $RESULT_DIR/result_${CURRENT_ROUND}_metrics.log 37 | echo "finish round $CURRENT_ROUND" 38 | echo 39 | sleep 2 40 | done 41 | echo "done!" 42 | cd $TOP_DIR 43 | -------------------------------------------------------------------------------- /example/example-3x-radosbench-crimson.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | head: "ceph@ubulap" 3 | clients: ["ceph@ubulap"] 4 | osds: ["ceph@ubulap"] 5 | mons: ["ceph@ubulap"] 6 | osds_per_node: 1 7 | conf_file: /home/rzarzynski/ceph-1/build/ceph.conf 8 | # PID file is important for perf stat. crimson-osd should use defaults 9 | # its location. 10 | pid_dir: /home/rzarzynski/ceph-1/build/out 11 | iterations: 3 12 | rebuild_every_test: False 13 | tmp_dir: "/tmp/cbt" 14 | ceph_cmd: /home/rzarzynski/ceph-1/build/bin/ceph 15 | rados_cmd: /home/rzarzynski/ceph-1/build/bin/rados 16 | pool_profiles: 17 | replicated: 18 | pg_size: 128 19 | pgp_size: 128 20 | replication: 'replicated' 21 | benchmarks: 22 | radosbench: 23 | op_size: [ 4096, 8192 ] 24 | write_only: True 25 | time: 3 26 | concurrent_ops: [ 16 ] 27 | concurrent_procs: 1 28 | # crimson must be already deployed. It can be done with vstart.sh: 29 | # MDS=0 MGR=1 OSD=1 MON=1 ../src/vstart.sh -n --without-dashboard \ 30 | # --memstore -X -o "memstore_device_bytes=34359738368" --crimson \ 31 | # --nodaemon --redirect-output 32 | use_existing: True 33 | pool_profile: replicated 34 | acceptable: 35 | bandwidth: '(or (greater) (near 0.05))' 36 | iops_avg: '(or (greater) (near 0.05))' 37 | iops_stddev: '(or (less) (near 0.05))' 38 | latency_avg: '(or (less) (near 0.05))' 39 | monitoring_profiles: 40 | perf: 41 | nodes: 42 | - osds 43 | args: 'stat -p {pid} -o {perf_dir}/perf_stat.{pid}' 44 | -------------------------------------------------------------------------------- /tools/fio_objectstore_tools/hdd-runs.json: -------------------------------------------------------------------------------- 1 | { 2 | "base": { 3 | "runtime": 3600, 4 | "devices": { 5 | "nvme": { 6 | "device_type": "ssd", 7 | "block_wal_path": "/dev/nvme0n1p2", 8 | "block_db_path": "/dev/nvme0n1p3", 9 | "block_path": "/dev/nvme0n1p4", 10 | "target_dir": "/mnt/sjust/bluestore-nvme" 11 | }, 12 | "nvme_plain": { 13 | "device_type": "ssd", 14 | "block_path": "/dev/nvme3n1p4", 15 | "target_dir": "/mnt/sjust/bluestore-nvme-plain" 16 | }, 17 | "hdd": { 18 | "device_type": "hdd", 19 | "block_path": "/dev/sdh2", 20 | "target_dir": "/mnt/sjust/bluestore-hdd" 21 | }, 22 | "hdd_nvme_db": { 23 | "device_type": "hdd", 24 | "block_wal_path": "/dev/nvme2n1p2", 25 | "block_db_path": "/dev/nvme2n1p3", 26 | "block_path": "/dev/sdg2", 27 | "target_dir": "/mnt/sjust/bluestore-hdd-nvme-db" 28 | } 29 | }, 30 | "size": 512, 31 | "filesize": 4, 32 | "preextend": "true", 33 | "qd": 1024, 34 | "numjobs": 32, 35 | "tcio_hdd": 1048576, 36 | "bluestore_deferred_throttle": [ 37 | 2, 38 | 4, 39 | 8, 40 | 12 41 | ], 42 | "bluestore_throttle": [ 43 | 8, 44 | 12, 45 | 16, 46 | 20, 47 | 24 48 | ], 49 | "vary_bluestore_throttle_period": 30 50 | }, 51 | "runs": { 52 | "bs": [ 53 | 4, 54 | 512 55 | ], 56 | "target_device": [ 57 | "nvme", 58 | "nvme_plain" 59 | ], 60 | "run": [ 61 | 0, 62 | 1 63 | ] 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /tools/fio_objectstore_tools/nvme-runs.json: -------------------------------------------------------------------------------- 1 | { 2 | "base": { 3 | "runtime": 3600, 4 | "devices": { 5 | "nvme": { 6 | "device_type": "ssd", 7 | "block_wal_path": "/dev/nvme0n1p2", 8 | "block_db_path": "/dev/nvme0n1p3", 9 | "block_path": "/dev/nvme0n1p4", 10 | "target_dir": "/mnt/sjust/bluestore-nvme" 11 | }, 12 | "nvme_plain": { 13 | "device_type": "ssd", 14 | "block_path": "/dev/nvme3n1p4", 15 | "target_dir": "/mnt/sjust/bluestore-nvme-plain" 16 | }, 17 | "hdd": { 18 | "device_type": "hdd", 19 | "block_path": "/dev/sdh2", 20 | "target_dir": "/mnt/sjust/bluestore-hdd" 21 | }, 22 | "hdd_nvme_db": { 23 | "device_type": "hdd", 24 | "block_wal_path": "/dev/nvme2n1p2", 25 | "block_db_path": "/dev/nvme2n1p3", 26 | "block_path": "/dev/sdg2", 27 | "target_dir": "/mnt/sjust/bluestore-hdd-nvme-db" 28 | } 29 | }, 30 | "size": 512, 31 | "filesize": 4, 32 | "preextend": "true", 33 | "qd": 1024, 34 | "numjobs": 32, 35 | "tcio_hdd": 1048576, 36 | "tcio_ssd": 8192, 37 | "bluestore_deferred_throttle": [ 38 | 0.25, 39 | 0.5, 40 | 1, 41 | 2, 42 | 4 43 | ], 44 | "bluestore_throttle": [ 45 | 0.25, 46 | 0.5, 47 | 1, 48 | 2, 49 | 4, 50 | 6, 51 | 8 52 | ], 53 | "vary_bluestore_throttle_period": 30 54 | }, 55 | "runs": { 56 | "bs": [ 57 | 4, 58 | 512 59 | ], 60 | "target_device": [ 61 | "nvme" 62 | ], 63 | "run": [ 64 | 0 65 | ] 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /post_processing/README.md: -------------------------------------------------------------------------------- 1 | # Post Processing of CBT results 2 | 3 | ## Description 4 | A set of tools that can be used to post process the data from any run of CBT. It provides a report in github markdown, 5 | and optionally pdf, format that contains a set of hockey-stick curves generated from the CBT run. 6 | The tool set consists of three separate tools that can be run stand-alone. The eventual aim is to integrate the post 7 | processing into CBT once more benchmark types are supported. 8 | 9 | There are three components to the post processing which are: 10 | 11 | * [formatter](formatter/README.md) 12 | * [plotter](plotter/README.md) 13 | * [reports](reports/README.md) 14 | 15 | 16 | ## Suppoted benchmark tools 17 | This list will be added to as extra benchmark tools are supported. 18 | * fio 19 | 20 | ## Dependencies 21 | These post processing changes include some new dependencies to be run correctly 22 | 23 | ### python dependencies 24 | The following python modules are dependencies for this work: 25 | * matplotlib 26 | * mdutils 27 | 28 | Both have been added to the requirements.txt file in the CBT project. 29 | 30 | ### Dependencies for pdf report generation 31 | To generate a report in pdf format there are 2 additional requirements 32 | 33 | A working install of tex is required on the base operating system, which can be installed using the package manager. 34 | For Red Hat based OSes this can be achieved by running `yum install texlive` 35 | 36 | [Pandoc](https://pandoc.org/), which can be installed on most Linux distributions using the included package manager. 37 | For Red Hat based OSes use `yum install pandoc` 38 | 39 | The minimum pandoc level tested is `2.14.0.3` which is available for RHEL 9 40 | -------------------------------------------------------------------------------- /post_processing/plotter/README.md: -------------------------------------------------------------------------------- 1 | # Plotter 2 | Draws the hockey stick plots for a benchmark run from the data produced by the formatter. These are png files, with one 3 | plot produced per block size used. 4 | 5 | There is also a python class that will produce comparison plots of two or more different CBT runs for one or more block 6 | sizes. 7 | Due to the tools used there are only 6 unique colours available for the plot lines, so it is recommended to limit the 8 | comparison to 6 or less files or directories. 9 | 10 | ## Standalone script 11 | A wrapper script is only provided to produce comparison plots. 12 | ``` 13 | plot_comparison.py --files= 14 | --directories= 15 | --output_directory= 16 | --labels=" 17 | ``` 18 | where 19 | - `--output_directory` Required. The full path to a directory to store the plots. Will be created if it doesn't exist 20 | - `--files` Optional. A comma separated list of files to plot on a single axis 21 | - `--directories` Optional. A comma separated list of directories to plot. A single plot will be produced per blocksize 22 | - `--labels` Optional. Comma separated list of labels to use for the lines on the comparison plot, in the same order as 23 | --file or --directories. 24 | 25 | One of `--files` or `--directories` must be provided. 26 | 27 | Full help text is provided by using `--help` with the script 28 | 29 | ## Example 30 | 31 | ```bash 32 | PYTHONPATH=/cbt /cbt/tools/plot_comparison.py --directories="/tmp/ch_cbt_main_run,/tmp/ch_cbt_sandbox_run" --output_directory="/tmp/main_sb_comparisons" 33 | ``` -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # script to install CBT dependencies and tools for active benchmarking 4 | 5 | sudo yum -y install deltarpm 6 | sudo yum check-update 7 | sudo yum -y update 8 | sudo yum install -y psmisc util-linux coreutils xfsprogs e2fsprogs findutils \ 9 | git wget bzip2 make automake gcc gcc-c++ kernel-devel perf blktrace lsof \ 10 | redhat-lsb sysstat screen python3-yaml ipmitool dstat zlib-devel ntp 11 | 12 | MIRROR="http://mirror.hmc.edu/fedora/linux/releases/22/Everything/x86_64/os/Packages" 13 | 14 | wget ${MIRROR}/p/pdsh-2.31-3.fc22.x86_64.rpm 15 | wget ${MIRROR}/p/pdsh-2.31-3.fc22.x86_64.rpm 16 | wget ${MIRROR}/p/pdsh-rcmd-ssh-2.31-3.fc22.x86_64.rpm 17 | wget ${MIRROR}/c/collectl-4.0.0-1.fc22.noarch.rpm 18 | wget ${MIRROR}/i/iftop-1.0-0.9.pre4.fc22.x86_64.rpm 19 | wget ${MIRROR}/i/iperf3-3.0.10-1.fc22.x86_64.rpm 20 | 21 | sudo yum localinstall -y *.rpm 22 | 23 | git clone https://github.com/axboe/fio.git 24 | git clone https://github.com/andikleen/pmu-tools.git 25 | git clone https://github.com/brendangregg/FlameGraph 26 | 27 | cd ${HOME}/fio 28 | ./configure 29 | make 30 | 31 | # wget < Red Hat Ceph Storage ISO URL > 32 | # sudo mount -o loop Ceph-*-dvd.iso /mnt 33 | sudo yum localinstall -y /mnt/{MON,OSD}/*.rpm 34 | sudo yum localinstall -y /mnt/Installer/ceph-deploy-*.rpm 35 | 36 | sudo sed -i 's/Defaults requiretty/#Defaults requiretty/g' /etc/sudoers 37 | sudo setenforce 0 38 | ( awk '!/SELINUX=/' /etc/selinux/config ; echo "SELINUX=disabled" ) > /tmp/x 39 | sudo mv /tmp/x /etc/selinux/config 40 | rpm -qa firewalld | grep firewalld && sudo systemctl stop firewalld && sudo systemctl disable firewalld 41 | sudo systemctl stop irqbalance 42 | sudo systemctl disable irqbalance 43 | sudo systemctl start ntpd.service 44 | sudo systemctl enable ntpd.service 45 | -------------------------------------------------------------------------------- /example/wip-cosbench/cosbench_ex_ceph.conf: -------------------------------------------------------------------------------- 1 | [global] 2 | osd pool default size = 1 3 | auth cluster required = none 4 | auth service required = none 5 | auth client required = none 6 | keyring = /tmp/cbt/ceph/keyring 7 | osd pg bits = 8 8 | osd pgp bits = 8 9 | log to syslog = false 10 | log file = /tmp/cbt/ceph/log/$name.log 11 | public network = 192.168.110.0/24 12 | cluster network = 192.168.110.0/24 13 | rbd cache = true 14 | osd scrub load threshold = 0.01 15 | osd scrub min interval = 137438953472 16 | osd scrub max interval = 137438953472 17 | osd deep scrub interval = 137438953472 18 | osd max scrubs = 16 19 | filestore merge threshold = 40 20 | filestore split multiple = 8 21 | osd op threads = 8 22 | mon pg warn max object skew = 100000 23 | mon pg warn min per osd = 0 24 | mon pg warn max per osd = 32768 25 | 26 | [mon] 27 | mon data = /tmp/cbt/ceph/mon.$id 28 | 29 | [mon.a] 30 | host = inf1 31 | mon addr = 192.168.110.51:6789 32 | 33 | [mon.b] 34 | host = inf2 35 | mon addr = 192.168.110.52:6789 36 | 37 | [mon.c] 38 | host = inf3 39 | mon addr = 192.168.110.53:6789 40 | 41 | [osd.0] 42 | host = inf1 43 | osd data = /tmp/cbt/mnt/osd-device-0-data 44 | osd journal = /dev/disk/by-partlabel/osd-device-0-journal 45 | 46 | [osd.1] 47 | host = inf2 48 | osd data = /tmp/cbt/mnt/osd-device-0-data 49 | osd journal = /dev/disk/by-partlabel/osd-device-0-journal 50 | 51 | [osd.2] 52 | host = inf3 53 | osd data = /tmp/cbt/mnt/osd-device-0-data 54 | osd journal = /dev/disk/by-partlabel/osd-device-0-journal 55 | -------------------------------------------------------------------------------- /example/wip-mark-testing/runtests.xfs.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | user: 'nhm' 3 | head: "burnupiX" 4 | clients: ["burnupiY"] 5 | osds: ["burnupiX"] 6 | mons: 7 | burnupiY: 8 | a: "192.168.10.2:6789" 9 | osds_per_node: 4 10 | fs: 'xfs' 11 | mkfs_opts: '-f -i size=2048' 12 | mount_opts: '-o inode64,noatime,logbsize=256k' 13 | conf_file: '/home/nhm/src/cbt/example/wip-mark-testing/ceph.conf' 14 | iterations: 1 15 | use_existing: False 16 | clusterid: "ceph" 17 | tmp_dir: "/tmp/cbt" 18 | pool_profiles: 19 | radosbench: 20 | pg_size: 1024 21 | pgp_size: 1024 22 | replication: 3 23 | rbd: 24 | pg_size: 4096 25 | pgp_size: 4096 26 | replication: 3 27 | benchmarks: 28 | radosbench: 29 | op_size: [4194304, 131072, 4096] 30 | write_only: False 31 | time: 300 32 | concurrent_ops: [32] 33 | concurrent_procs: 4 34 | osd_ra: [4096] 35 | pool_profile: 'radosbench' 36 | librbdfio: 37 | time: 10 38 | vol_size: 2048 39 | mode: ['read', 'write', 'randread', 'randwrite', 'rw', 'randrw'] 40 | rwmixread: 50 41 | op_size: [4194304, 131072, 4096] 42 | procs_per_volume: [1] 43 | volumes_per_client: [1] 44 | iodepth: [32] 45 | osd_ra: [4096] 46 | cmd_path: '/home/nhm/src/fio/fio' 47 | pool_profile: 'rbd' 48 | log_avg_msec: 100 49 | rbdfio: 50 | time: 10 51 | vol_size: 2048 52 | mode: ['read', 'write', 'randread', 'randwrite', 'rw', 'randrw'] 53 | rwmixread: 50 54 | op_size: [4194304, 131072, 4096] 55 | concurrent_procs: [1] 56 | iodepth: [32] 57 | osd_ra: [4096] 58 | cmd_path: '/home/nhm/src/fio/fio' 59 | pool_profile: 'rbd' 60 | log_avg_msec: 100 61 | # Optionally disable fine-grained logging by fio 62 | log_iops: False 63 | log_bw: False 64 | log_lat: False 65 | -------------------------------------------------------------------------------- /example/example-hsbench.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | user: 'user' 3 | head: "localhost" 4 | clients: ["localhost"] 5 | osds: ["localhost"] 6 | mons: 7 | localhost: 8 | a: "127.0.0.1:6789" 9 | mgrs: 10 | localhost: 11 | a: ~ 12 | rgws: 13 | localhost: 14 | client.radosgw.gateway: 15 | host: "127.0.0.1" 16 | osds_per_node: 1 17 | fs: 'xfs' 18 | mkfs_opts: '-f -i size=2048' 19 | mount_opts: '-o inode64,noatime,logbsize=256k' 20 | conf_file: '/home/user/ceph_tests/ceph.conf' 21 | iterations: 1 22 | use_existing: False 23 | clusterid: "ceph" 24 | tmp_dir: "/tmp/cbt" 25 | ceph-authtool_cmd: "/usr/local/bin/ceph-authtool" 26 | ceph-osd_cmd: "env -i TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728 /usr/local/bin/ceph-osd" 27 | ceph-rgw_cmd: "env -i TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728 /usr/local/bin/radosgw" 28 | ceph-mon_cmd: "/usr/local/bin/ceph-mon" 29 | ceph-run_cmd: "/usr/local/bin/ceph-run" 30 | rados_cmd: "/usr/local/bin/rados" 31 | ceph_cmd: "/usr/local/bin/ceph" 32 | rbd_cmd: "/usr/local/bin/rbd" 33 | ceph-mgr_cmd: "/usr/local/bin/ceph-mgr" 34 | radosgw-admin_cmd: "/usr/local/bin/radosgw-admin" 35 | 36 | osd_valgrind: "massif" 37 | pool_profiles: 38 | rgw: 39 | pg_size: 128 40 | pgp_size: 128 41 | replication: 1 42 | rgw_pools: 43 | control: rgw 44 | meta: rgw 45 | log: rgw 46 | buckets: rgw 47 | buckets_data: rgw 48 | buckets_index: rgw 49 | 50 | client_endpoints: 51 | hsbench: 52 | driver: 's3' 53 | endpoints_per_client: 1 54 | 55 | benchmarks: 56 | hsbench: 57 | cmd_path: '/home//go/src/github.com/markhpc/hsbench/hsbench' 58 | client_endpoints: 'hsbench' 59 | objects: 10000 60 | duration: -1 61 | buckets: 1 62 | threads: 16 63 | size: '4K' 64 | report_interval: 10 65 | -------------------------------------------------------------------------------- /parsing/htmlgenerator.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import zlib 3 | import urllib.request 4 | import urllib.parse 5 | import urllib.error 6 | 7 | 8 | class HTMLGenerator(object): 9 | def __init__(self): 10 | self.styles = [] 11 | self.html = [] 12 | self.scripts = [] 13 | 14 | def encode(self, text): 15 | return base64.b64encode(zlib.compress(urllib.parse.quote(text), 9)) 16 | 17 | def read_file(self, filename): 18 | f = open(filename, "r") 19 | text = f.read() 20 | f.close() 21 | return text 22 | 23 | def add_html(self, text): 24 | self.html.append(text) 25 | 26 | def add_style(self, text): 27 | self.styles.append('') 28 | 29 | def add_script(self, text): 30 | self.scripts.append('') 31 | 32 | def add_encoded_script(self, text): 33 | self.scripts.append('') 34 | 35 | def to_string(self): 36 | return '\n'.join(self.html + self.styles + self.scripts) 37 | 38 | def format_data(self, data): 39 | lines = [] 40 | for row in data: 41 | tmprow = [] 42 | for pair in row: 43 | first = '"' + pair[0] + '"' 44 | second = '' 45 | if isinstance(pair[1], float): 46 | second = "%.2f" % pair[1] 47 | elif isinstance(pair[1], str) or isinstance(pair[1], str): 48 | second = '"' + pair[1] + '"' 49 | else: 50 | second = pair[1] 51 | tmprow.append(str(first) + ':' + str(second)) 52 | lines.append('{' + ', '.join(tmprow) + '}') 53 | return 'var dataSet = [' + ',\n'.join(lines) + '];\n' 54 | -------------------------------------------------------------------------------- /tests/test_benchmarkfactory.py: -------------------------------------------------------------------------------- 1 | """ Unit tests for the Benchmarkfactory class """ 2 | 3 | import unittest 4 | import benchmarkfactory 5 | from log_support import setup_loggers 6 | 7 | 8 | class TestBenchmarkFactory(unittest.TestCase): 9 | """ Sanity tests for Benchmarkfactory """ 10 | def test_permutations_1(self): 11 | """ Basic sanity permutations """ 12 | config = {"x": 12, "y": True, "z": {1: 2}, "t": [1, 2, 4]} 13 | cfgs = list(benchmarkfactory.all_configs(config)) 14 | self.assertEqual(len(cfgs), 3) 15 | self.assertEqual([dict] * 3, list(map(type, cfgs))) 16 | tvals = [] 17 | 18 | for cfg in cfgs: 19 | for field in 'xyz': 20 | self.assertEqual(cfg[field], config[field]) 21 | tvals.append(cfg['t']) 22 | 23 | self.assertEqual(sorted(tvals), sorted(config['t'])) 24 | 25 | def test_permutations_2(self): 26 | """ Basic sanity permutations """ 27 | config = {"x": 12, "y": True, "z": {1: 2}, "t": [1, 2, 4], "j": [7, True, 'gg']} 28 | cfgs = list(benchmarkfactory.all_configs(config)) 29 | self.assertEqual(len(cfgs), 9) 30 | self.assertEqual([dict] * 9, list(map(type, cfgs))) 31 | 32 | tjvals = [] 33 | 34 | for cfg in cfgs: 35 | for field in 'xyz': 36 | self.assertEqual(cfg[field], config[field]) 37 | tjvals.append((cfg['t'], cfg['j'])) 38 | 39 | for tval in config['t']: 40 | for jval in config['j']: 41 | self.assertEqual(1, tjvals.count((tval, jval))) 42 | 43 | def test_permutations_0(self): 44 | """ Basic sanity permutations """ 45 | config = {"x": 12, "y": True, "z": {1: 2}} 46 | cfgs = list(benchmarkfactory.all_configs(config)) 47 | self.assertEqual(len(cfgs), 1) 48 | self.assertEqual(cfgs[0], config) 49 | 50 | if __name__ == '__main__': 51 | setup_loggers(log_fname='/tmp/cbt-utest.log') 52 | unittest.main() 53 | -------------------------------------------------------------------------------- /post_processing/plotter/simple_plotter.py: -------------------------------------------------------------------------------- 1 | """ 2 | A file containing the classes and code required to read a file stored in the common 3 | intermediate format introduced in PR 319 (https://github.com/ceph/cbt/pull/319) and 4 | produce a hockey-stick curve graph 5 | """ 6 | 7 | from pathlib import Path 8 | 9 | import matplotlib.pyplot as plotter 10 | 11 | from post_processing.common import ( 12 | DATA_FILE_EXTENSION, 13 | DATA_FILE_EXTENSION_WITH_DOT, 14 | PLOT_FILE_EXTENSION, 15 | read_intermediate_file, 16 | ) 17 | from post_processing.plotter.common_format_plotter import CommonFormatPlotter 18 | from post_processing.types import COMMON_FORMAT_FILE_DATA_TYPE 19 | 20 | 21 | class SimplePlotter(CommonFormatPlotter): 22 | """ 23 | Read the intermediate data file in the common json format and produce a hockey-stick 24 | curve plot that includes standard deviation error bars. 25 | """ 26 | 27 | def __init__(self, archive_directory: str) -> None: 28 | # A Path object for the directory where the data files are stored 29 | self._path: Path = Path(f"{archive_directory}/visualisation") 30 | 31 | def draw_and_save(self) -> None: 32 | for file_path in self._path.glob(f"*{DATA_FILE_EXTENSION_WITH_DOT}"): 33 | file_data: COMMON_FORMAT_FILE_DATA_TYPE = read_intermediate_file(f"{file_path}") 34 | output_file_path: str = self._generate_output_file_name(files=[file_path]) 35 | self._add_single_file_data_with_errorbars(plotter=plotter, file_data=file_data) 36 | self._add_title(plotter=plotter, source_files=[file_path]) 37 | self._set_axis(plotter=plotter) 38 | self._save_plot(plotter=plotter, file_path=output_file_path) 39 | self._clear_plot(plotter=plotter) 40 | 41 | def _generate_output_file_name(self, files: list[Path]) -> str: 42 | # we know we will only ever be passed a single file name 43 | return f"{str(files[0])[:-len(DATA_FILE_EXTENSION)]}{PLOT_FILE_EXTENSION}" 44 | -------------------------------------------------------------------------------- /post_processing/formatter/README.md: -------------------------------------------------------------------------------- 1 | # Formatter 2 | 3 | The formatter converts CBT output json files into the correct format for the rest of the post processing. It is 4 | a json file of the format: 5 | 6 | ``` 7 | { 8 | : { 9 | bandwidth_bytes: 10 | blocksize: 11 | io_bytes: 12 | iops: 13 | latency: 14 | number_of_jobs: 15 | percentage_reads: 16 | percentage_writes: 17 | runtime_seconds: 18 | std_deviation: 19 | total_ios: 20 | } 21 | ... 22 | { 23 | 24 | } 25 | maximum_bandwidth: 26 | latency_at_max_bandwidth: 27 | maximum_iops: 28 | latency_at_max_iops: 29 | } 30 | ``` 31 | A single file will be produced per block size used for the benchmark run. 32 | 33 | ## Standalone script 34 | A wrapper script has been provided for the formatter 35 | ``` 36 | fio_common_output_wrapper.py --archive= 37 | --results_file_root= 38 | ``` 39 | where 40 | - `--archive` Required. the archive directory given to CBT for the benchmark run. 41 | - `--results_file_root` Optional. the name of the results file to process, without the extension. This defaults to `json_output`, 42 | which is the default for CBT runs, if not specified 43 | 44 | Full help text is provided by using `--help` with the script 45 | 46 | ## Output 47 | A directory called `visualisation` will be created in the directory specified by `--archive` that contains all the processed files. 48 | There will be one file per blocksize used for the benchmark run. 49 | 50 | ## Example 51 | 52 | ```bash 53 | PYTHONPATH=/cbt /cbt/tools/fio_common_output_wrapper.py --archive="/tmp/ch_cbt_run" --results_file_root="ch_json_result" 54 | ``` -------------------------------------------------------------------------------- /example/bluestore/runtests.bluestore_example.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | user: 'ubuntu' 3 | head: "incerta01.front.sepia.ceph.com" 4 | clients: ["incerta01.front.sepia.ceph.com", "incerta02.front.sepia.ceph.com", "incerta03.front.sepia.ceph.com", "incerta04.front.sepia.ceph.com"] 5 | osds: ["incerta01.front.sepia.ceph.com", "incerta02.front.sepia.ceph.com", "incerta03.front.sepia.ceph.com", "incerta04.front.sepia.ceph.com"] 6 | mons: 7 | incerta01.front.sepia.ceph.com: 8 | a: "10.0.10.101:6789" 9 | osds_per_node: 4 10 | fs: 'xfs' 11 | mkfs_opts: '-f -i size=2048' 12 | mount_opts: '-o inode64,noatime' 13 | conf_file: '/home/nhm/incerta/ceph.conf.bluestore_example' 14 | iterations: 1 15 | use_existing: False 16 | clusterid: "ceph" 17 | tmp_dir: "/tmp/cbt" 18 | ceph-osd_cmd: "env -i TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728 /usr/local/bin/ceph-osd" 19 | ceph-mon_cmd: "/usr/local/bin/ceph-mon" 20 | ceph-run_cmd: "/usr/local/bin/ceph-run" 21 | rados_cmd: "/usr/local/bin/rados" 22 | ceph_cmd: "/usr/local/bin/ceph" 23 | rbd_cmd: "/usr/local/bin/rbd" 24 | 25 | pool_profiles: 26 | rbd: 27 | pg_size: 8192 28 | pgp_size: 8192 29 | replication: 3 30 | 31 | benchmarks: 32 | librbdfio: 33 | time: 300 34 | vol_size: 32768 35 | mode: ['read', 'write', 'randread', 'randwrite', 'rw', 'randrw'] 36 | rwmixread: 50 37 | op_size: [4194304, 2097152, 1048576, 524288, 262144, 131072, 65536, 32768, 16384, 8192, 4096] 38 | procs_per_volume: [1] 39 | volumes_per_client: [2] 40 | iodepth: [32] 41 | osd_ra: [4096] 42 | cmd_path: '/home/ubuntu/src/fio/fio' 43 | pool_profile: 'rbd' 44 | log_avg_msec: 100 45 | # Optionally disable fine-grained logging by fio 46 | log_iops: False 47 | log_bw: False 48 | log_lat: False 49 | # Optionally, set the rbd and pool names 50 | poolname: 'rbd' 51 | rbdname: 'img01' 52 | # use_existing_volumes needs to be true to set the pool and rbd names 53 | use_existing_volumes: True 54 | -------------------------------------------------------------------------------- /tools/fio-parse-jsons/README.md: -------------------------------------------------------------------------------- 1 | # fio-parse-jsons.py - a FIO post processing tool. 2 | 3 | ## Description: 4 | 5 | This is a standalone tool to assist the post processing of JSON outout files from CBT when running the FIO benchmark. 6 | 7 | The execution of the script produces as output: 8 | 9 | 1. a gnuplot script, 10 | 2. a .dat file with the data to plot, 11 | 3. a summary table of FIO results in wiki format, printed to stdout. 12 | 13 | This is especially useful to produce a response graph from a set of executions ranging the number of FIO jobs and the iodepth values. 14 | The script was written before knowledge of CBT was gained, so in a way is independent of the script driving the tests. 15 | A future PR would integrate the functionality of this standalone script with that of CBT. 16 | 17 | ## Requirements: 18 | 19 | Besides the yaml and xml Python modules that CBT already depends upon, you need to install the package gnuplot according to your Linux distro. 20 | 21 | ## Usage: 22 | 23 | The following is an example of the execution of the script: 24 | 25 | ```bash 26 | # python3 /cbt/tools/fio-parse-jsons.py -c crimson200gb_1procs_randwrite_list -t 'Crimson 200GB RBD 4k rw' -a crimson4cores_200gb_1img_4k_1procs_randwrite_avg.json 27 | ``` 28 | 29 | the arguments are: 30 | 31 | - `-c config_file:`a txt file containing the list of FIO output JSON file to process, 32 | - `-t title:` the string ot use as title for the gnuplot chart, 33 | - `-a cpu_avg.json:` a .json file containing the avg CPU utilisation, normally produced by the script parse-top.pl. 34 | 35 | The following are the .dat and gnuplot files produced: 36 | 37 | ```bash 38 | crimson200gb_1procs_randwrite.dat 39 | crimson200gb_1procs_randwrite.plot 40 | ``` 41 | 42 | To produce the chart, simply execute 43 | 44 | ```bash 45 | gnuplot classic200gb_1procs_randwrite.plot 46 | ``` 47 | 48 | the IOPs vs latency chart result is shown below: 49 | 50 | ![crimson200gb_1procs_randwrite](https://github.com/ceph/cbt/assets/23522684/44aeeb17-b99f-48c7-bd0e-3a443c2e5d90) 51 | 52 | -------------------------------------------------------------------------------- /example/bluestore/mkpart_hdd_nvme_bs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | JPD=8 4 | 5 | failed() 6 | { 7 | sleep 2 # Wait for the kernel to stop whining 8 | echo "Hrm, that didn't work. Calling for help." 9 | # sudo ipmitool chassis identify force 10 | echo "RAID Config failed: ${1}" 11 | while [ 1 ]; do sleep 10; done 12 | exit 1; 13 | } 14 | 15 | # First, look for the system disk so we avoid touching it. 16 | SYSPART=`df | grep "/$" | cut -d" " -f1 | cut -d"/" -f3` 17 | #SYSPART=`sudo pvs | grep "/dev/" | cut -f3 -d" " | sed -e 's/[0-9]*$//g'` 18 | echo "System on $SYSPART" 19 | 20 | # Remove the partition label symlinks 21 | sudo rm /dev/disk/by-partlabel/osd-device* 22 | 23 | echo "Making label on OSD devices" 24 | i=0 25 | 26 | # Next, Make the OSD data partitions. In this case we search for the seagate disks in the node. 27 | for DEV in `ls -al /dev/disk/by-id/ata-ST9* | grep -v "part" | cut -f7 -d"/" | tr '\n' ' '` 28 | do 29 | if [[ ! $SYSPART =~ $DEV ]] && [ $i -lt 37 ] 30 | then 31 | sudo parted -s -a optimal /dev/$DEV mklabel gpt || failed "mklabel $DEV" 32 | echo "Creating osd device $i data label" 33 | sudo parted -s -a optimal /dev/$DEV mkpart osd-device-$i-data 0G 10G || failed "mkpart $i-data" 34 | sudo parted -s -a optimal /dev/$DEV mkpart osd-device-$i-block 10G 100% || failed "mkpart $i-block" 35 | let "i++" 36 | fi 37 | done 38 | 39 | j=0; 40 | for DEV in `ls -al /dev/nvme*n1 | cut -f3 -d"/" | tr '\n' ' '` 41 | do 42 | sudo parted -s -a optimal /dev/$DEV mklabel gpt || failed "mklabel $DEV" 43 | for ((k=0; k < $JPD; k++ )) 44 | do 45 | if [[ ! $SYSPART =~ $DEV ]] && [ $j -lt $i ] 46 | then 47 | echo "Creating osd device $j journal label" 48 | sudo parted -s -a optimal /dev/$DEV mkpart osd-device-$j-wal $(( 10 * $k ))G $(( 10 * $(($k)) + 2))G || failed "mkpart $j-wal" 49 | sudo parted -s -a optimal /dev/$DEV mkpart osd-device-$j-db $(( 10 * $(($k)) + 2 ))G $(( 10 * $(($k + 1)) ))G || failed "mkpart $j-db" 50 | let "j++" 51 | fi 52 | done 53 | done 54 | -------------------------------------------------------------------------------- /tools/invariant.yaml: -------------------------------------------------------------------------------- 1 | # Dummy yaml for the generator serialiser --DO NOT CHANGE! 2 | cluster: 3 | user: 'user' 4 | head: "localhost" 5 | clients: ["localhost"] 6 | osds: ["localhost"] 7 | archive_dir: '/tmp' 8 | rgws: 9 | localhost: 10 | client.radosgw.gateway: 11 | host: "127.0.0.1" 12 | osds_per_node: 1 13 | fs: 'xfs' 14 | mkfs_opts: '-f -i size=2048' 15 | mount_opts: '-o inode64,noatime,logbsize=256k' 16 | conf_file: '/etc/ceph/ceph.conf' 17 | tmp_conf: '/etc/ceph/ceph.conf' # used by hsbench 18 | iterations: 1 19 | use_existing: False 20 | clusterid: "ceph" 21 | tmp_dir: "/tmp/cbt" 22 | ceph-authtool_cmd: "/usr/local/bin/ceph-authtool" 23 | ceph-osd_cmd: "env -i TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728 /usr/local/bin/ceph-osd" 24 | ceph-rgw_cmd: "env -i TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728 /usr/local/bin/radosgw" 25 | ceph-mon_cmd: "/usr/local/bin/ceph-mon" 26 | ceph-run_cmd: "/usr/local/bin/ceph-run" 27 | rados_cmd: "/usr/local/bin/rados" 28 | ceph_cmd: "/usr/local/bin/ceph" 29 | rbd_cmd: "/usr/local/bin/rbd" 30 | ceph-mgr_cmd: "/usr/local/bin/ceph-mgr" 31 | radosgw-admin_cmd: "/usr/local/bin/radosgw-admin" 32 | 33 | osd_valgrind: "massif" 34 | pool_profiles: 35 | rgw: 36 | pg_size: 128 37 | pgp_size: 128 38 | replication: 1 39 | rgw_pools: 40 | control: rgw 41 | meta: rgw 42 | log: rgw 43 | buckets: rgw 44 | buckets_data: rgw 45 | buckets_index: rgw 46 | 47 | client_endpoints: 48 | hsbench: 49 | driver: 's3' 50 | endpoints_per_client: 1 51 | 52 | benchmarks: 53 | hsbench: 54 | cmd_path: '/home//go/src/github.com/markhpc/hsbench/hsbench' 55 | client_endpoints: 'hsbench' 56 | objects: 10000 57 | duration: -1 58 | buckets: 1 59 | threads: 16 60 | size: '4K' 61 | report_interval: 10 62 | osd_ra: 0 63 | rbdfio: 64 | osd_ra: 0 65 | tmp_conf: '/etc/ceph/ceph.conf' 66 | radosbench: 67 | rados_cmd: '/home/rzarzynski/ceph-1/build/bin/rados' 68 | -------------------------------------------------------------------------------- /docs/Workloads.md: -------------------------------------------------------------------------------- 1 | # Workloads 2 | 3 | A workload is the specification of a sequence of tests to be executed in the order given. 4 | Typically this involves a *range* of values for a specific benchmark argument. The most used is 5 | the *queue depth*. Depending of the benchmark, this can be expressed as a function of the number 6 | of jobs (or threads, or processes), such that the increase number of these causes a proportional 7 | increase in the I/O. Specifiying workloads in this way permits to generate *response latency curves* 8 | from the results. 9 | 10 | The workload feature is currently supported for `librbdfio` only. 11 | 12 | ![workloads](./workloads.png) 13 | 14 | * A `workloads` section is composed by a non-empty collection. Each item in the workload has a free name, 15 | and contains in turn a collection of valid options with values for the benchmark. 16 | * For each of the `iodepth` and `numjobs` options, a range of integer values is permitted. 17 | 18 | During execution, any of the given values for the benchmark options in the global section are overwritten 19 | by the given values within the current test workload. The global values are restored once the workload test 20 | completes. 21 | 22 | As an example, the following specifies two workloads: 23 | 24 | * the first is named `precondition` and consists of executing a random write over a queue depth of 4, 25 | (that is, the product of numjobs and iodepth), and indicates that monitoring should be disabled during the 26 | execution of the workload, 27 | * the second is named test1, and specifies a random read over the combinatorial of the provided sequences for 28 | the numjobs and iodepth, resp. That is, (1,1), (1,4), (1,8) .. (8,8). 29 | 30 | 31 | ```yaml 32 | 33 | workloads: 34 | precondition: 35 | jobname: 'precond1rw' 36 | mode: 'randwrite' 37 | numjobs: [ 1 ] 38 | iodepth: [ 4 ] 39 | monitor: False # whether to run the monitors along the test 40 | test1: 41 | jobname: 'rr' 42 | mode: 'randread' 43 | numjobs: [ 1, 4, 8 ] 44 | iodepth: [ 1, 4, 8 ] 45 | 46 | ``` 47 | -------------------------------------------------------------------------------- /example/example-client_endpoints.yaml: -------------------------------------------------------------------------------- 1 | cluster: 2 | user: 'perf' 3 | head: "incerta01" 4 | clients: ["incerta01"] 5 | osds: ["incerta02"] 6 | mons: 7 | incerta01: 8 | a: "10.0.10.101:6789" 9 | mgrs: 10 | incerta01: 11 | a: ~ 12 | mdss: 13 | incerta01: 14 | a: ~ 15 | osds_per_node: 1 16 | fs: 'xfs' 17 | mkfs_opts: '-f -i size=2048' 18 | mount_opts: '-o inode64,noatime,logbsize=256k' 19 | conf_file: '/home/perf/ceph_tests/ceph.conf.64.async' 20 | iterations: 1 21 | use_existing: False 22 | clusterid: "ceph" 23 | tmp_dir: "/tmp/cbt" 24 | ceph-osd_cmd: "env -i TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728 /usr/local/bin/ceph-osd" 25 | ceph-mon_cmd: "/usr/local/bin/ceph-mon" 26 | ceph-run_cmd: "/usr/local/bin/ceph-run" 27 | rados_cmd: "/usr/local/bin/rados" 28 | ceph_cmd: "/usr/local/bin/ceph" 29 | ceph-fuse_cmd: "/usr/local/bin/ceph-fuse" 30 | rbd_cmd: "/usr/local/bin/rbd" 31 | rbd-nbd_cmd: "/usr/local/bin/rbd-nbd" 32 | rbd-fuse_cmd: "/usr/local/bin/rbd-fuse" 33 | ceph-mgr_cmd: "/usr/local/bin/ceph-mgr" 34 | ceph-mds_cmd: "/usr/local/bin/ceph-mds" 35 | osd_valgrind: "massif" 36 | pool_profiles: 37 | replication: 38 | pg_size: 256 39 | pgp_size: 256 40 | replication: 1 41 | ec21: 42 | pg_size: 2048 43 | pgp_size: 2048 44 | replication: 'erasure' 45 | erasure_profile: 'ec21' 46 | erasure_profiles: 47 | ec21: 48 | erasure_k: 2 49 | erasure_m: 1 50 | cephfs_pools: 51 | cephfs_data: replication 52 | cephfs_metadata: replication 53 | 54 | client_endpoints: 55 | fiotest: 56 | driver: 'librbd' 57 | # driver: 'rbd-kernel' 58 | # driver: 'rbd-nbd' 59 | # driver: 'rbd-fuse' 60 | # driver: 'rbd-tcmu' 61 | # driver: 'cephfs-kernel' 62 | # driver: 'cephfs-fuse' 63 | endpoints_per_client: 1 64 | endpoint_size: 524288 65 | pool_profile: replication 66 | 67 | benchmarks: 68 | fio: 69 | client_endpoints: 'fiotest' 70 | time: 300 71 | time_based: True 72 | norandommap: True 73 | size: 262144 74 | mode: ['read', 'write', 'randread','randwrite'] 75 | rwmixread: 50 76 | op_size: [4194304, 131072, 4096] 77 | procs_per_endpoint: [1] 78 | iodepth: [32] 79 | osd_ra: [4096] 80 | cmd_path: '/home/perf/src/fio/fio' 81 | log_avg_msec: 100 82 | 83 | -------------------------------------------------------------------------------- /client_endpoints_factory.py: -------------------------------------------------------------------------------- 1 | import settings 2 | 3 | from cluster.ceph import Ceph 4 | 5 | from client_endpoints.librbd_client_endpoints import LibrbdClientEndpoints 6 | from client_endpoints.rbdkernel_client_endpoints import RbdKernelClientEndpoints 7 | from client_endpoints.rbdnbd_client_endpoints import RbdNbdClientEndpoints 8 | from client_endpoints.rbdfuse_client_endpoints import RbdFuseClientEndpoints 9 | from client_endpoints.rbdtcmu_client_endpoints import RbdTcmuClientEndpoints 10 | from client_endpoints.cephfskernel_client_endpoints import CephfsKernelClientEndpoints 11 | from client_endpoints.cephfsfuse_client_endpoints import CephfsFuseClientEndpoints 12 | from client_endpoints.rgws3_client_endpoints import RgwS3ClientEndpoints 13 | ce_objects = {} 14 | 15 | def get(cluster, name): 16 | if isinstance(cluster, Ceph): 17 | return get_ceph(cluster, name) 18 | 19 | def get_ceph(cluster, name): 20 | ce_config = settings.client_endpoints.get(name, None) 21 | 22 | if ce_config == None: 23 | raise ValueError('No client_endpoints with name "%s" found.' % name) 24 | 25 | cclass = cluster.getclass() 26 | key = "%s-%s" % (cclass, name) 27 | 28 | if key in ce_objects: 29 | return ce_objects[key] 30 | 31 | driver = ce_config.get('driver', None) 32 | if driver is None: 33 | raise ValueError('No driver defined in the "%s" client_endpoints.' % name) 34 | elif driver == "librbd": 35 | ce_objects[key] = LibrbdClientEndpoints(cluster, ce_config) 36 | elif driver == "rbd-kernel": 37 | ce_objects[key] = RbdKernelClientEndpoints(cluster, ce_config) 38 | elif driver == "rbd-nbd": 39 | ce_objects[key] = RbdNbdClientEndpoints(cluster, ce_config) 40 | elif driver == "rbd-fuse": 41 | ce_objects[key] = RbdFuseClientEndpoints(cluster, ce_config) 42 | elif driver == "rbd-tcmu": 43 | ce_objects[key] = RbdTcmuClientEndpoints(cluster, ce_config) 44 | elif driver == "cephfs-kernel": 45 | ce_objects[key] = CephfsKernelClientEndpoints(cluster, ce_config) 46 | elif driver == "cephfs-fuse": 47 | ce_objects[key] = CephfsFuseClientEndpoints(cluster, ce_config) 48 | elif driver == "s3": 49 | ce_objects[key] = RgwS3ClientEndpoints(cluster, ce_config) 50 | else: 51 | raise ValueError('%s clusters do not support "%s" client_endpoints.' % (cclass, driver)) 52 | return ce_objects[key] 53 | -------------------------------------------------------------------------------- /benchmarkfactory.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import itertools 3 | 4 | import settings 5 | from benchmark.radosbench import Radosbench 6 | from benchmark.fio import Fio 7 | from benchmark.hsbench import Hsbench 8 | from benchmark.rbdfio import RbdFio 9 | from benchmark.rawfio import RawFio 10 | from benchmark.kvmrbdfio import KvmRbdFio 11 | from benchmark.librbdfio import LibrbdFio 12 | from benchmark.nullbench import Nullbench 13 | from benchmark.cosbench import Cosbench 14 | from benchmark.cephtestrados import CephTestRados 15 | from benchmark.getput import Getput 16 | 17 | def get_all(archive, cluster, iteration): 18 | for benchmark, config in sorted(settings.benchmarks.items()): 19 | default = {"benchmark": benchmark, 20 | "iteration": iteration} 21 | for current in all_configs(config): 22 | current.update(default) 23 | yield get_object(archive, cluster, benchmark, current) 24 | 25 | 26 | def all_configs(config): 27 | """ 28 | return all parameter combinations for config 29 | config: dict - list of params 30 | iterate over all top-level lists in config 31 | """ 32 | cycle_over_lists = [] 33 | cycle_over_names = [] 34 | default = {} 35 | 36 | for param, value in list(config.items()): 37 | # acceptable applies to benchmark as a whole, no need to it to 38 | # the set for permutation 39 | if param == 'acceptable': 40 | default[param] = value 41 | elif isinstance(value, list): 42 | cycle_over_lists.append(value) 43 | cycle_over_names.append(param) 44 | else: 45 | default[param] = value 46 | 47 | for permutation in itertools.product(*cycle_over_lists): 48 | current = copy.deepcopy(default) 49 | current.update(list(zip(cycle_over_names, permutation))) 50 | yield current 51 | 52 | def get_object(archive, cluster, benchmark, bconfig): 53 | benchmarks = { 54 | 'nullbench': Nullbench, 55 | 'radosbench': Radosbench, 56 | 'fio': Fio, 57 | 'hsbench': Hsbench, 58 | 'rbdfio': RbdFio, 59 | 'kvmrbdfio': KvmRbdFio, 60 | 'rawfio': RawFio, 61 | 'librbdfio': LibrbdFio, 62 | 'cosbench': Cosbench, 63 | 'cephtestrados': CephTestRados, 64 | 'getput': Getput} 65 | try: 66 | return benchmarks[benchmark](archive, cluster, bconfig) 67 | except KeyError: 68 | return None 69 | -------------------------------------------------------------------------------- /tools/fio_common_output_wrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Usage: 5 | fio_common_output_wrapper.py --archive= 6 | --results_file_root= 7 | 8 | Input: 9 | --archive [Required] The achive directory that contains the 10 | results filed from an fio run of cbt 11 | 12 | --results_file_root [Optional] The base name for the json output files 13 | produced from an fio run in cbt. 14 | Default: "json_output" 15 | 16 | Examples: 17 | fio_common_output_wrapper.py --archive="/tmp/ch_cbt_run" 18 | 19 | fio_common_output_wrapper.py --archive="/tmp/ch_cbt_run" --results_file_root="ch_json_result" 20 | """ 21 | 22 | import subprocess 23 | from argparse import ArgumentParser, Namespace 24 | from logging import Logger, getLogger 25 | 26 | from post_processing.formatter.common_output_formatter import CommonOutputFormatter 27 | 28 | log: Logger = getLogger() 29 | 30 | 31 | def main() -> int: 32 | """ 33 | Main routine for the script 34 | """ 35 | 36 | result: int = 0 37 | 38 | parser: ArgumentParser = ArgumentParser(description="Parse cbt json output into a common format") 39 | parser.add_argument("--archive", type=str, required=True, help="The archive directory used for the CBT results") 40 | parser.add_argument( 41 | "--results_file_root", 42 | type=str, 43 | required=False, 44 | default="json_output*", 45 | help="The filename root of all the CBT output json files", 46 | ) 47 | 48 | args: Namespace = parser.parse_args() 49 | 50 | output_directory: str = f"{args.archive}/visualisation/" 51 | subprocess.run(f"mkdir -p -m0755 {output_directory}", shell=True) 52 | 53 | formatter: CommonOutputFormatter = CommonOutputFormatter( 54 | archive_directory=args.archive, filename_root=args.results_file_root 55 | ) 56 | 57 | try: 58 | formatter.convert_all_files() 59 | formatter.write_output_file() 60 | except Exception as e: 61 | log.error( 62 | "Encountered an error parsing results in directory %s with name %s" 63 | % (args.archive, args.results_file_root) 64 | ) 65 | log.exception(e) 66 | result = 1 67 | 68 | return result 69 | 70 | 71 | if __name__ == "__main__": 72 | main() 73 | -------------------------------------------------------------------------------- /include/js/table.js: -------------------------------------------------------------------------------- 1 | var keys = d3.keys(dataSet[0]); 2 | 3 | var mins = {} 4 | var maxes = {} 5 | dataSet.forEach(function(item) { 6 | var mean = d3.mean(d3.values(item).slice(3)); 7 | var deviation = d3.deviation(d3.values(item).slice(3)); 8 | var minmax_key = d3.values(item).slice(0,3).join(""); 9 | // console.log(minmax_key); 10 | mins[minmax_key] = mean-deviation; 11 | maxes[minmax_key] = mean+deviation; 12 | }); 13 | //console.log(mins); 14 | //console.log(maxes); 15 | 16 | var thead = d3.select("#view > thead") 17 | var th = thead.selectAll("th") 18 | .data(keys) 19 | .enter() 20 | .append('th') 21 | .text(function(d){ return d }) 22 | 23 | var tbody = d3.select("#view > tbody"); 24 | 25 | var tr = tbody.selectAll("tr") 26 | .data(dataSet) 27 | .enter() 28 | .append('tr') 29 | .selectAll('td') 30 | .data(function (row) { 31 | key = d3.values(row).slice(0,3).join("") 32 | dataArray = d3.entries(row); 33 | dataArray.forEach(function(data) { 34 | data["min"] = mins[key]; 35 | data["max"] = maxes[key]; 36 | }); 37 | // console.log(dataArray); 38 | return dataArray; 39 | 40 | }) 41 | .enter() 42 | .append('td') 43 | .append('div') 44 | .style({ 45 | "background-color": function(d, i){ 46 | if(i < 3) return "lightblue"; 47 | console.log(d); 48 | if (d.min === 0 && d.max === 0) { 49 | return "lightgrey"; 50 | } 51 | return makecolor(d.value, d.min, d.max); 52 | }, 53 | }) 54 | .text(function(d){ 55 | return d.value 56 | }); 57 | 58 | function makecolor(val, min, max) { 59 | var red = 255; 60 | var green = 255; 61 | if(val < min) { 62 | green = 0; 63 | } else if(val < min+((max-min)/2.0)) { 64 | green = Math.round(((val-min)/((max-min)/2.0)) * 255); 65 | } else if(val < max) { 66 | red = Math.round(((max-val)/((max-min)/2.0)) * 255); 67 | } else { 68 | red = 0; 69 | } 70 | return "#" + rgb2hex(red,green,0); 71 | } 72 | 73 | function rgb2hex(r,g,b) { 74 | if (g !== undefined) 75 | return Number(0x1000000 + r*0x10000 + g*0x100 + b).toString(16).substring(1); 76 | else 77 | return Number(0x1000000 + r[0]*0x10000 + r[1]*0x100 + r[2]).toString(16).substring(1); 78 | } 79 | 80 | -------------------------------------------------------------------------------- /log_support.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import yaml 4 | 5 | has_a_tty = os.isatty(1) # test stdout 6 | 7 | 8 | def load_run_params(run_params_file): 9 | with open(run_params_file) as fd: 10 | dt = yaml.load(fd) 11 | 12 | return dict(run_uuid=dt['run_uuid'], 13 | comment=dt.get('comment')) 14 | 15 | 16 | def color_me(color): 17 | RESET_SEQ = "\033[0m" 18 | COLOR_SEQ = "\033[1;%dm" 19 | 20 | color_seq = COLOR_SEQ % (30 + color) 21 | 22 | def closure(msg): 23 | return color_seq + msg + RESET_SEQ 24 | return closure 25 | 26 | 27 | class ColoredFormatter(logging.Formatter): 28 | BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = list(range(8)) 29 | 30 | colors = { 31 | 'WARNING': color_me(YELLOW), 32 | 'DEBUG': color_me(BLUE), 33 | 'CRITICAL': color_me(RED), 34 | 'ERROR': color_me(RED), 35 | 'INFO': color_me(GREEN) 36 | } 37 | 38 | def __init__(self, msg, use_color=True, datefmt=None): 39 | logging.Formatter.__init__(self, msg, datefmt=datefmt) 40 | self.use_color = use_color 41 | 42 | def format(self, record): 43 | orig = record.__dict__ 44 | record.__dict__ = record.__dict__.copy() 45 | levelname = record.levelname 46 | 47 | prn_name = levelname + ' ' * (8 - len(levelname)) 48 | if (levelname in self.colors) and has_a_tty: 49 | record.levelname = self.colors[levelname](prn_name) 50 | else: 51 | record.levelname = prn_name 52 | 53 | # super doesn't work here in 2.6 O_o 54 | res = logging.Formatter.format(self, record) 55 | # res = super(ColoredFormatter, self).format(record) 56 | 57 | # restore record, as it will be used by other formatters 58 | record.__dict__ = orig 59 | return res 60 | 61 | 62 | def setup_loggers(def_level=logging.DEBUG, log_fname=None): 63 | logger = logging.getLogger('cbt') 64 | logger.setLevel(logging.DEBUG) 65 | sh = logging.StreamHandler() 66 | sh.setLevel(def_level) 67 | 68 | log_format = '%(asctime)s - %(levelname)s - %(name)-8s - %(message)s' 69 | colored_formatter = ColoredFormatter(log_format, datefmt="%H:%M:%S") 70 | 71 | sh.setFormatter(colored_formatter) 72 | logger.addHandler(sh) 73 | 74 | if log_fname is not None: 75 | fh = logging.FileHandler(log_fname) 76 | formatter = logging.Formatter(log_format, datefmt="%H:%M:%S") 77 | fh.setFormatter(formatter) 78 | fh.setLevel(logging.DEBUG) 79 | logger.addHandler(fh) 80 | else: 81 | fh = None 82 | -------------------------------------------------------------------------------- /benchmark/lis.py: -------------------------------------------------------------------------------- 1 | import operator as op 2 | 3 | # a mini s-expr interpreter 4 | # inspired by https://norvig.com/lispy.html 5 | 6 | Symbol = str 7 | List = list 8 | 9 | 10 | class Lispy: 11 | @staticmethod 12 | def _tokenize(s): 13 | return s.replace('(', ' ( ').replace(')', ' ) ').split() 14 | 15 | @staticmethod 16 | def _atom(token): 17 | try: 18 | return int(token) 19 | except ValueError: 20 | try: 21 | return float(token) 22 | except ValueError: 23 | return Symbol(token) 24 | 25 | def _read_from_tokens(self, tokens): 26 | if len(tokens) == 0: 27 | raise SyntaxError('unexpected EOF while reading') 28 | token = tokens.pop(0) 29 | if token == '(': 30 | stmt = [] 31 | while tokens[0] != ')': 32 | stmt.append(self._read_from_tokens(tokens)) 33 | tokens.pop(0) # pop off ')' 34 | return stmt 35 | elif token == ')': 36 | raise SyntaxError('unexpected ")"') 37 | else: 38 | return self._atom(token) 39 | 40 | def parse(self, s): 41 | return self._read_from_tokens(self._tokenize(s)) 42 | 43 | def eval(self, stmt, env): 44 | if isinstance(stmt, Symbol): 45 | return env.eval(stmt) 46 | elif isinstance(stmt, List): 47 | func = self.eval(stmt[0], env) 48 | args = [self.eval(exp, env) for exp in stmt[1:]] 49 | return func(*args) 50 | else: 51 | return stmt 52 | 53 | 54 | class Env(dict): 55 | @staticmethod 56 | def near(lhs, rhs, abs_error): 57 | if rhs == 0: 58 | return lhs == rhs 59 | else: 60 | return (abs(lhs - rhs) / float(rhs)) <= abs_error 61 | 62 | def __init__(self, outer, **locals): 63 | if locals: 64 | self.update(locals) 65 | self.outer = outer 66 | # pass 'result' and 'baseline' to some functions 67 | # TODO: return "goodness" instead of a boolean 68 | self.update({ 69 | 'less': lambda: self.eval('result') < self.eval('baseline'), 70 | 'greater': lambda: self.eval('result') > self.eval('baseline'), 71 | 'near': lambda abs_error: self.near(self.eval('result'), 72 | self.eval('baseline'), 73 | abs_error), 74 | 'or': op.or_}) 75 | 76 | def find(self, var): 77 | if var in self: 78 | return self 79 | elif self.outer: 80 | return self.outer.find(var) 81 | else: 82 | raise NameError(var) 83 | 84 | def eval(self, var): 85 | return self.find(var)[var] 86 | -------------------------------------------------------------------------------- /parsing/database.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | conn = sqlite3.connect(':memory:') 4 | 5 | FORMAT = ['hash', 'testname', 'iteration', 'benchmark', 'osdra', 'opsize', 'cprocs', 'iodepth', 'testtype', 'writebw', 'readbw'] 6 | TYPES = {'hash': 'text primary key', 'testname': 'text', 'iteration': 'integer', 'benchmark': 'text', 'osdra': 'integer', 'opsize': 'integer', 'cprocs': 'integer', 'iodepth': 'integer', 'testtype': 'text', 'writebw': 'real', 'readbw': 'real'} 7 | 8 | 9 | def create_db(): 10 | c = conn.cursor() 11 | q = 'CREATE TABLE if not exists results (' 12 | values = [] 13 | for key in FORMAT: 14 | values.append("%s %s" % (key, TYPES[key])) 15 | q += ', '.join(values) + ')' 16 | print(q) 17 | c.execute(q) 18 | conn.commit() 19 | 20 | 21 | def insert(values): 22 | c = conn.cursor() 23 | c.execute('INSERT INTO results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ? ,?, ?)', values) 24 | conn.commit() 25 | 26 | 27 | def update_readbw(hashval, bw): 28 | c = conn.cursor() 29 | c.execute('UPDATE results SET readbw = readbw + ? WHERE hash = ?', (bw, hashval)) 30 | conn.commit() 31 | 32 | 33 | def update_writebw(hashval, bw): 34 | c = conn.cursor() 35 | c.execute('UPDATE results SET writebw = writebw + ? WHERE hash = ?', (bw, hashval)) 36 | conn.commit() 37 | 38 | 39 | def get_values(column): 40 | c = conn.cursor() 41 | # Careful here, this could lead to an SQL injection but appears necessary 42 | # since bindings can't be used for column names. 43 | c.execute('SELECT distinct %s FROM results ORDER BY %s' % (column, column)) 44 | return [item[0] for item in c.fetchall()] 45 | 46 | 47 | def fetch_table(params): 48 | c = conn.cursor() 49 | distincts = {} 50 | 51 | for param in params: 52 | distincts[param] = get_values(param) 53 | 54 | c.execute('SELECT testname,%s,readbw,writebw FROM results ORDER BY %s,testname' % (','.join(params), ','.join(params))) 55 | testnames = get_values('testname') 56 | 57 | table = [] 58 | writerow = [] 59 | readrow = [] 60 | for row in c.fetchall(): 61 | # Check to make sure we aren't missing a test 62 | while row[0] != testnames[len(writerow)]: 63 | blank = ['%s' % testnames[len(writerow)], ''] 64 | writerow.append(blank) 65 | readrow.append(blank) 66 | writerow.append([row[0], row[-1]]) 67 | readrow.append([row[0], row[-2]]) 68 | if len(writerow) == len(testnames): 69 | pre = [] 70 | for i in range(0, len(params)): 71 | pre.append([params[i], row[i + 1]]) 72 | table.append(pre + [['optype', 'write']] + writerow) 73 | table.append(pre + [['optype', 'read']] + readrow) 74 | writerow = [] 75 | readrow = [] 76 | return table 77 | -------------------------------------------------------------------------------- /docs/TestPlanSchema.md: -------------------------------------------------------------------------------- 1 | # Test plan schema 2 | 3 | A valid test plan .yaml consists of the following compulsory sections at the top level (the level is 4 | indicated by the indentation in .yaml: the top level has 0 indentation): 5 | 6 | * `cluster` 7 | * `benchmarks`. 8 | 9 | It may also have the following optional sections at the same level: 10 | 11 | * `monitoring_profile` 12 | * `client_endpoints`. 13 | 14 | ![top_level](./toplevel.png) 15 | 16 | ## `cluster` 17 | 18 | The cluster section enumerates the components of the Ceph cluster relevant to CBT. There are two 19 | general classes of components: 20 | 21 | * scalars: for example names whose value is a string, a numeric or a boolean; 22 | * collections: components that in turn contain further information, for example profile of pool 23 | replication. 24 | 25 | The following are scalar compulsory entities: 26 | * a head node: this is a string indicating the node that starts the cluster. 27 | * a list of clients, each a string, representing a ssh-reachable host that has a benchmark 28 | executable installed, 29 | * a list of osds nodes, each of which has at least a running OSD process. 30 | 31 | ![cluster](./cluster.png) 32 | 33 | 34 | ## `benchmarks` 35 | 36 | The benchmarks section consists of a non-empty list of collections, each describing a benchmark 37 | entity. 38 | 39 | * A benchmark entity starts with its *name* (second level indentation), valid names are for example: 40 | `radosbench`, `hsbench`, `kvmrbdfio`, `librbdfio`, etc. 41 | 42 | * The contents of the benchmark entity (third level indentation) consist of a collection of items 43 | (either scalars or collections themselves). Most of these entities represent options for the 44 | command line invocation of the benchmark when executed by the clients. 45 | 46 | ![benchmarks](./benchmarks.png) 47 | 48 | 49 | ## `monitoring_profiles` 50 | 51 | 52 | The monitoring_profiles section consists of a non-empty list of of collections, each describing a 53 | monitoring tool. 54 | 55 | A monitoring entity starts with its name (at second level indentation). Currently supported are `perf` 56 | , `collectl`, `top`. 57 | 58 | The contents of the monitoring entity consists of : 59 | * a `nodes` (third level indentation) list of processes to monitor (by default the osd nodes), and 60 | * an optional string `args` (third level indentation) to indicate the arguments to the monitoring tool. 61 | 62 | 63 | ## `client_endpoints` 64 | 65 | The client_endpoints section consists of a non-empty list of collections, each associated to a 66 | benchmark entity, and typically indicating the driver for the benchmark. The client_endpoints, if 67 | specified on a test plan, must be cross referenced by the benchmark section, and as such normally the 68 | client_endpoints section precedes the benchmarks section in the test plan. 69 | 70 | See the dir `example/` for a number of test plan examples. 71 | -------------------------------------------------------------------------------- /tools/fio_objectstore_tools/bluestore_throttle_tuning.rst: -------------------------------------------------------------------------------- 1 | ========================= 2 | BlueStore Throttle Tuning 3 | ========================= 4 | 5 | Motivation 6 | ========== 7 | 8 | BlueStore has a throttling mechanism in order to ensure that queued IO doesn't 9 | increase without bound. If this throttle is set too low, osd throughput will 10 | suffer. If it's set too high, we'll see unnecessary increases in latency at 11 | the objectstore level preventing the OSD queues from performing QoS. If 12 | latency at the objectstore level is 1s due to the current queue length, the 13 | best possible latency for an incoming high priority IO would be 1s. 14 | 15 | Generally, we'd expect the relationship between latency and throttle value (or 16 | queue depth) to have two behavior types. When the store is sub-saturated, we'd 17 | expect increases in queued IO to increase throughput with little corresponding 18 | increase in latency. As the store saturates, we'd expect throughput to become 19 | relatively insensitive to throttle, but latency would begin to increase 20 | linearly. 21 | 22 | In choosing these throttle limits, a user would want first to understand the 23 | latency/throughput/throttle relationships for their hardware as well as their 24 | workload/application's preference for latency vs throughput. One could choose 25 | to deliberately sacrafice some amount of max throughput in exchange for better 26 | qos, or one might choose to capture as much throughput as possible at the 27 | expense of higher average and especially tail latency. 28 | 29 | Usage 30 | ===== 31 | 32 | There is a backend for fio (src/test/fio/fio_ceph_objectstore.cc) which backs 33 | fio with a single objectstore instance. This instance has an option which will 34 | at configurable intervals alter the throttle values among the configured 35 | options online as the fio test runs. By capturing a trace of ios performed via 36 | lttng, we can get an idea of the throttle/latency/throughput relationship for a 37 | particular workload and device. 38 | 39 | First, ceph needs to be built with fio and lttng: 40 | 41 | :: 42 | ./do_cmake.sh --verbose -DWITH_FIO=on -DWITH_LTTNG=on -DCMAKE_BUILD_TYPE=RelWithDebInfo 43 | 44 | Next, there are a few scripts in the cbt.git repository to ease running fio 45 | with the right backend and graphing the results under fio_objectstore_tools/. 46 | Create a copy of runs.json updating configs as needed (particularly device 47 | paths). You can then do a run by running: 48 | 49 | :: 50 | ./run.py --initialize runs 51 | ./run.py --run 52 | 53 | Results will appear in dated subdirs under ~/output by default. 54 | 55 | In order to generate graphs from these results, run: 56 | 57 | :: 58 | ./analyze.py --generate-graphs --output 59 | 60 | The resulting graphs will plot latency and throughput for each traced IO (with 61 | curves for median (green) and 99pct (red)) against the kv throttle and deferred 62 | throttle values when the IO was released from the throttle. 63 | -------------------------------------------------------------------------------- /tools/fio-parse-json.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # 3 | # fio-json-prs.py - example script to parse distributed workload generation result 4 | # produced by fio in JSON format 5 | # 6 | # input parameters: 7 | # 1 - JSON file - file containing fio JSON output 8 | # 2 - JSON path - path through JSON tree to a leaf node 9 | # 10 | # assumption: json output of non-leaf nodes consists of either 11 | # - dictionary - key field selects sub-value 12 | # - sequence - key field syntax is name=value, where 13 | # name is a dictionary key of sequence elements, and 14 | # value is the desired value to select a sequence element 15 | # example: 16 | # python3 fio-parse-json.py r.fiojob.json.log 'jobs/jobname=randread/read/iops' 17 | # 18 | 19 | import os, sys 20 | from pprint import pprint 21 | import json 22 | 23 | NOTOK=1 24 | 25 | if len(sys.argv) < 3: 26 | print('usage: fio-parse-json.py fio-json.log path-to-leaf') 27 | print('path-to-leaf is a slash-separated list of key names in JSON tree') 28 | print('see instructions at top of this script') 29 | sys.exit(NOTOK) 30 | 31 | 32 | def filter_json_node(next_branch, node_list_in): 33 | #print next_branch, json.dumps(node, indent=4) 34 | #print '' 35 | #sys.stdout.flush() 36 | next_node_list = [] 37 | for n in node_list_in: 38 | dotlist = next_branch.split('=') 39 | if len(dotlist) > 2: 40 | print('unrecognized syntax at %s'%str(node)) 41 | sys.exit(NOTOK) 42 | elif len(dotlist) == 1: 43 | next_node_list.append(n[next_branch]) 44 | assert(isinstance(n, dict)) 45 | else: # must be a sequence, take any element with key matching value 46 | select_key = dotlist[0] 47 | select_value = dotlist[1] 48 | for e in n: # node is a seq 49 | #print 'select with key %s value %s sequence element %s'%(select_key, select_value, e) 50 | if select_value == '*': 51 | next_node_list.append(e) 52 | else: 53 | v = e[select_key] 54 | if v == select_value: 55 | next_node_list.append(e) 56 | 57 | if len(next_node_list) == 0: 58 | print('no list member in %s has key %s value %s'%(str(node), select_key, select_value)) 59 | sys.exit(NOTOK) 60 | return next_node_list 61 | 62 | 63 | fn = sys.argv[1] 64 | json_tree_path = sys.argv[2].split('/') 65 | with open(fn, 'r') as json_data: 66 | 67 | # check for empty file 68 | 69 | f_info = os.fstat(json_data.fileno()) 70 | if f_info.st_size == 0: 71 | print('JSON input file %s is empty'%fn) 72 | sys.exit(NOTOK) 73 | 74 | # find start of JSON object and position file handle right before that 75 | 76 | lines = json_data.readlines() 77 | start_of_json_data=0 78 | for l in lines: 79 | if l[0] == '{': break 80 | start_of_json_data += 1 81 | json_data.seek(0, os.SEEK_SET) 82 | for j in range(0,start_of_json_data): 83 | l = json_data.readline() 84 | 85 | # parse the JSON object 86 | 87 | node = json.load(json_data) 88 | current_branch = None 89 | next_node_list = [node] 90 | for next_branch in json_tree_path: 91 | next_node_list = filter_json_node(next_branch, next_node_list) 92 | for n in next_node_list: print(n) 93 | 94 | -------------------------------------------------------------------------------- /post_processing/plotter/directory_comparison_plotter.py: -------------------------------------------------------------------------------- 1 | """ 2 | A file containing the classes and code required to read two files stored in the common 3 | intermediate format introduced in CBT PR #319 (https://github.com/ceph/cbt/pull/319) 4 | and produce a plot of both the files on the same axes. 5 | """ 6 | 7 | from logging import Logger, getLogger 8 | from pathlib import Path 9 | 10 | import matplotlib.pyplot as plotter 11 | 12 | from post_processing.common import ( 13 | PLOT_FILE_EXTENSION_WITH_DOT, 14 | find_common_data_file_names, 15 | read_intermediate_file, 16 | ) 17 | from post_processing.plotter.common_format_plotter import CommonFormatPlotter 18 | from post_processing.types import COMMON_FORMAT_FILE_DATA_TYPE 19 | 20 | log: Logger = getLogger("cbt") 21 | 22 | 23 | class DirectoryComparisonPlotter(CommonFormatPlotter): 24 | """ 25 | Read the intermediate data files in the common json format and produce a 26 | curve plot of both sets of data on the same axes. Error bars are not included 27 | as they seem to make the plot harder to read and compare. 28 | """ 29 | 30 | def __init__(self, output_directory: str, directories: list[str]) -> None: 31 | self._output_directory: str = f"{output_directory}" 32 | self._comparison_directories: list[Path] = [Path(f"{directory}/visualisation") for directory in directories] 33 | 34 | def draw_and_save(self) -> None: 35 | # output_file_path: str = self._generate_output_file_name(files=self._comparison_directories) 36 | 37 | # We will only compare data for files with the same name, so find all 38 | # the file names that are common across all directories. Not sure this 39 | # is the right way though 40 | common_file_names: list[str] = find_common_data_file_names(self._comparison_directories) 41 | 42 | for file_name in common_file_names: 43 | output_file_path: str = self._generate_output_file_name(files=[Path(file_name)]) 44 | for directory in self._comparison_directories: 45 | file_data: COMMON_FORMAT_FILE_DATA_TYPE = read_intermediate_file(f"{directory}/{file_name}") 46 | # we choose the last directory name for the label to apply to the data 47 | self._add_single_file_data( 48 | plotter=plotter, 49 | file_data=file_data, 50 | label=f"{directory.parts[-2]}", 51 | ) 52 | 53 | self._add_title(plotter=plotter, source_files=[Path(file_name)]) 54 | self._set_axis(plotter=plotter) 55 | 56 | # make sure we add the legend to the plot 57 | plotter.legend() # pyright: ignore[reportUnknownMemberType] 58 | 59 | self._save_plot(plotter=plotter, file_path=output_file_path) 60 | self._clear_plot(plotter=plotter) 61 | 62 | def _generate_output_file_name(self, files: list[Path]) -> str: 63 | # we know we will only ever be passed a single file name 64 | output_file: str = f"{self._output_directory}/Comparison_{files[0].stem}{PLOT_FILE_EXTENSION_WITH_DOT}" 65 | 66 | return output_file 67 | -------------------------------------------------------------------------------- /post_processing/reports/README.md: -------------------------------------------------------------------------------- 1 | # Reports 2 | 3 | Produces a report in github markdown, and optionally pdf format that includes a summary table and the relevant 4 | plots from the CBT run. 5 | 6 | ## Output 7 | A report in github markdown format with a plots directory containing the required plots. The report and plots directory 8 | can be uploaded directly to github as-is and the links will be maintained. 9 | 10 | Optionally a report in pdf format can also be created. 11 | 12 | Due to the tools used there are only 6 unique colours available for the plot lines, so it is recommended to limit the 13 | comparison to 6 or less files or directories. During testing we found that more than four directories can start rendering 14 | the pdf report unreadable, so it is not recommended to create a pdf report to compare data from more than four 15 | benchmark runs. 16 | 17 | ## Standalone scripts 18 | There are actually 2 scripts provided as wrappers for the report generation: 19 | * generate_performance_report.py 20 | * generate_comparison_performance_report.py 21 | 22 | ### generate_performance_report 23 | Creates a performance report for a single benchmark run. The results must first have had the formatter run on them. 24 | 25 | ``` 26 | generate_performance_report.py --archive= 27 | --output_directory= 28 | --create_pdf 29 | ``` 30 | 31 | where: 32 | - `--archive` Required. The archive directory containing the files from the formatter 33 | - `--output_directory` Required. The directory to store the markdown report file and relevant plots. 34 | - `--create_pdf` Optional. Create a pdf report 35 | 36 | Full help text is provided by using `--help` with the scripts 37 | 38 | #### Example 39 | ```bash 40 | PYTHONPATH=/cbt /cbt/tools/generate_performance_report.py --archive="/tmp/ch_cbt_main_run" --output_directory="/tmp/reports/main" --create_pdf 41 | ``` 42 | 43 | ### generate_comparison_performance_report.py 44 | Creates a report comparing 2 or more benchmark runs. The report will only include plots and results for formatted files 45 | that are common in all the directories. 46 | 47 | ``` 48 | generate_comparison_performance_report.py --baseline= 49 | --archives= 50 | --output_directory= 51 | --create_pdf 52 | ``` 53 | where 54 | - `--baseline` Required. The full path to the baseline results for the comparison 55 | - `--archives` Required. A comma-separated list of directories containing results to compare to the baseline 56 | - `--output_directory` Required. The directory to store the markdown report file and relevant plots. 57 | - `--create_pdf` Optional. Create a pdf report 58 | 59 | #### Examples 60 | ```bash 61 | PYTHONPATH=/cbt /cbt/tools/generate_comparison_performance_report.py --baseline="/tmp/ch_cbt_main_run" --archives="/tmp/ch_sandbox/" --output_directory="/tmp/reports/main" --create_pdf 62 | ``` -------------------------------------------------------------------------------- /example/wip-mark-testing/ceph.conf: -------------------------------------------------------------------------------- 1 | [global] 2 | osd pool default size = 1 3 | 4 | osd crush chooseleaf type = 0 5 | 6 | keyring = /tmp/cbt/ceph/keyring 7 | osd pg bits = 8 8 | osd pgp bits = 8 9 | auth supported = none 10 | log to syslog = false 11 | log file = /tmp/cbt/ceph/log/$name.log 12 | filestore xattr use omap = true 13 | auth cluster required = none 14 | auth service required = none 15 | auth client required = none 16 | 17 | public network = 192.168.10.0/24 18 | cluster network = 192.168.10.0/24 19 | rbd cache = true 20 | osd scrub load threshold = 0.01 21 | osd scrub min interval = 137438953472 22 | osd scrub max interval = 137438953472 23 | osd deep scrub interval = 137438953472 24 | osd max scrubs = 16 25 | 26 | filestore merge threshold = 40 27 | filestore split multiple = 8 28 | osd op threads = 8 29 | 30 | debug_lockdep = "0/0" 31 | debug_context = "0/0" 32 | debug_crush = "0/0" 33 | debug_mds = "0/0" 34 | debug_mds_balancer = "0/0" 35 | debug_mds_locker = "0/0" 36 | debug_mds_log = "0/0" 37 | debug_mds_log_expire = "0/0" 38 | debug_mds_migrator = "0/0" 39 | debug_buffer = "0/0" 40 | debug_timer = "0/0" 41 | debug_filer = "0/0" 42 | debug_objecter = "0/0" 43 | debug_rados = "0/0" 44 | debug_rbd = "0/0" 45 | debug_journaler = "0/0" 46 | debug_objectcacher = "0/0" 47 | debug_client = "0/0" 48 | debug_osd = "0/0" 49 | debug_optracker = "0/0" 50 | debug_objclass = "0/0" 51 | debug_filestore = "0/0" 52 | debug_journal = "0/0" 53 | debug_ms = "0/0" 54 | debug_mon = "0/0" 55 | debug_monc = "0/0" 56 | debug_paxos = "0/0" 57 | debug_tp = "0/0" 58 | debug_auth = "0/0" 59 | debug_finisher = "0/0" 60 | debug_heartbeatmap = "0/0" 61 | debug_perfcounter = "0/0" 62 | debug_rgw = "0/0" 63 | debug_hadoop = "0/0" 64 | debug_asok = "0/0" 65 | debug_throttle = "0/0" 66 | 67 | mon pg warn max object skew = 100000 68 | mon pg warn min per osd = 0 69 | mon pg warn max per osd = 32768 70 | 71 | 72 | [mon] 73 | mon data = /tmp/cbt/ceph/mon.$id 74 | 75 | [mon.a] 76 | host = burnupiY 77 | mon addr = 192.168.10.2:6789 78 | 79 | [osd.0] 80 | host = burnupiX 81 | osd data = /tmp/cbt/mnt/osd-device-0-data 82 | osd journal = /dev/disk/by-partlabel/osd-device-0-journal 83 | 84 | [osd.1] 85 | host = burnupiX 86 | osd data = /tmp/cbt/mnt/osd-device-1-data 87 | osd journal = /dev/disk/by-partlabel/osd-device-1-journal 88 | 89 | [osd.2] 90 | host = burnupiX 91 | osd data = /tmp/cbt/mnt/osd-device-2-data 92 | osd journal = /dev/disk/by-partlabel/osd-device-2-journal 93 | 94 | [osd.3] 95 | host = burnupiX 96 | osd data = /tmp/cbt/mnt/osd-device-3-data 97 | osd journal = /dev/disk/by-partlabel/osd-device-3-journal 98 | 99 | -------------------------------------------------------------------------------- /tests/test_common.py: -------------------------------------------------------------------------------- 1 | """ Unit tests for the Common class """ 2 | 3 | import uuid 4 | import shutil 5 | import warnings 6 | import os 7 | import tempfile 8 | import unittest 9 | import unittest.mock 10 | import common 11 | 12 | VAR_NAME = "CBT_TEST_NODES" 13 | MSG = f"No test VM provided. Set {VAR_NAME} env var" 14 | 15 | def iter_nodes(nodes): 16 | """ 17 | Iterator to produce each individual node 18 | """ 19 | for node in nodes.split(","): 20 | if '@' in node: 21 | node = node.split("@", 1)[1] 22 | yield node 23 | 24 | 25 | class TestCommon(unittest.TestCase): 26 | """ Sanity tests for common.py """ 27 | def test_mkdirp(self): 28 | """ 29 | Can create a directory 30 | """ 31 | with warnings.catch_warnings(): 32 | warnings.simplefilter("ignore") 33 | with tempfile.TemporaryDirectory() as tmp: 34 | fname = os.path.join(tmp, 'a', 'b12', 'zasdasd') 35 | common.mkdir_p(fname) 36 | self.assertTrue(os.path.isdir(fname)) 37 | shutil.rmtree(fname) 38 | 39 | @unittest.skipIf(VAR_NAME not in os.environ, MSG) 40 | def test_pdsh(self): 41 | """ 42 | Can issue a valid cli to the nodes 43 | """ 44 | nodes = os.environ[VAR_NAME] 45 | out, _err = common.pdsh(nodes, "ls /").communicate() 46 | # output from the first node in the list, so we are interested 47 | # ib the contents 48 | for _node in iter_nodes(nodes): 49 | self.assertIn("etc\n", out) 50 | 51 | @unittest.skipIf(VAR_NAME not in os.environ, MSG) 52 | def test_pdsh_no_cmd(self): 53 | """ 54 | Can issue an invalid cli to the node, get rc not 0 55 | """ 56 | nodes = os.environ[VAR_NAME] 57 | proc = common.pdsh(nodes, "unknown_cmd_131321") 58 | proc.communicate() 59 | # log(proc) 60 | #self.assertNotEqual(proc.myrtncode, 0) 61 | self.assertEqual(proc.myrtncode, 0) 62 | 63 | @unittest.skipIf(VAR_NAME not in os.environ, MSG) 64 | def test_pdcp_rpdcp(self): 65 | """ 66 | Can copy a file to the nodes 67 | """ 68 | nodes = os.environ[VAR_NAME] 69 | with warnings.catch_warnings(): 70 | warnings.simplefilter("ignore") 71 | tmp = uuid.uuid4().hex 72 | fname = os.path.join('/tmp/',tmp) 73 | val = str(uuid.uuid1()) 74 | with open(fname, "w", encoding='UTF-8') as fd: 75 | fd.write(val) 76 | try: 77 | common.pdcp(nodes, None, fname, fname).communicate() 78 | out, _err = common.pdsh(nodes, "cat " + fname).communicate() 79 | for _node in iter_nodes(nodes): 80 | #self.assertIn(f"{node}: {val}\n", out) 81 | self.assertIn(out,f"{val}\n") 82 | finally: 83 | pass 84 | 85 | common.rpdcp(nodes, None, fname, os.path.dirname(fname)).communicate() 86 | try: 87 | with open(fname,encoding='UTF-8') as fd: 88 | self.assertEqual(fd.read(), val) 89 | finally: 90 | try: 91 | os.remove(fname) 92 | except OSError: 93 | pass 94 | common.pdsh(nodes, "rm " + fname).communicate() 95 | -------------------------------------------------------------------------------- /parsing/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import argparse 4 | import os 5 | import fnmatch 6 | import hashlib 7 | import database 8 | from htmlgenerator import HTMLGenerator 9 | 10 | 11 | def mkhash(values): 12 | value_string = ''.join([str(i) for i in values]) 13 | return hashlib.sha256(value_string).hexdigest() 14 | 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser(description='get fio averages.') 18 | parser.add_argument( 19 | 'input_directory', 20 | help='Directory to search.', 21 | ) 22 | 23 | args = parser.parse_args() 24 | return args 25 | 26 | 27 | def find(pattern, path): 28 | result = [] 29 | for root, dirs, files in os.walk(path): 30 | for name in files: 31 | if fnmatch.fnmatch(name, pattern): 32 | result.append(os.path.join(root, name)) 33 | return result 34 | 35 | 36 | def splits(s, d1, d2): 37 | l, _, r = s.partition(d1) 38 | m, _, r = r.partition(d2) 39 | return m 40 | 41 | 42 | def getbw(s): 43 | if "GB/s" in s: 44 | return float(s[:-4]) * 1024 45 | if "MB/s" in s: 46 | return float(s[:-4]) 47 | if "KB/s" in s: 48 | return float(s[:-4]) / 1024 49 | 50 | 51 | if __name__ == '__main__': 52 | ctx = parse_args() 53 | database.create_db() 54 | 55 | files = find('output.*', ctx.input_directory) 56 | totals = {} 57 | for inputname in files: 58 | # strip off the input directory 59 | params = inputname[len(ctx.input_directory):].split("/")[3:-1] 60 | # make readahead into an int 61 | params[3] = int(params[3][7:]) 62 | 63 | # Make op_size into an int 64 | params[4] = int(params[4][8:]) 65 | 66 | # Make cprocs into an int 67 | params[5] = int(params[5][17:]) 68 | 69 | # Make io_depth int an int 70 | params[6] = int(params[6][9:]) 71 | 72 | params_hash = mkhash(params) 73 | params = [params_hash] + params 74 | params.extend([0, 0]) 75 | database.insert(params) 76 | 77 | for line in open(inputname): 78 | if "aggrb" in line: 79 | bw = getbw(splits(line, 'aggrb=', ',')) 80 | if "READ" in line: 81 | database.update_readbw(params_hash, bw) 82 | elif "WRITE" in line: 83 | database.update_writebw(params_hash, bw) 84 | html = HTMLGenerator() 85 | html.add_html(html.read_file('/home/nhm/src/cbt/include/html/table.html')) 86 | html.add_style(html.read_file('/home/nhm/src/cbt/include/css/table.css')) 87 | html.add_script(html.read_file('/home/nhm/src/cbt/include/js/jsxcompressor.min.js')) 88 | html.add_script(html.read_file('/home/nhm/src/cbt/include/js/d3.js')) 89 | html.add_script(html.read_file('/home/nhm/src/cbt/include/js/d3var.js')) 90 | html.add_script(html.format_data(database.fetch_table(['opsize', 'testtype']))) 91 | html.add_script(html.read_file('/home/nhm/src/cbt/include/js/table.js')) 92 | 93 | print('') 94 | print('D3 Table Test ') 95 | print('') 96 | print(html.to_string()) 97 | print('') 98 | # print database.fetch_table(['opsize', 'testtype']) 99 | 100 | # get_section(['opsize', 'testtype']) 101 | 102 | # write_html() 103 | # write_data(['opsize', 'testtype']) 104 | # write_style() 105 | # write_js() 106 | -------------------------------------------------------------------------------- /post_processing/plotter/file_comparison_plotter.py: -------------------------------------------------------------------------------- 1 | """ 2 | A file containing the classes and code required to read two files stored in the common 3 | intermediate format introduced in CBT PR #319 (https://github.com/ceph/cbt/pull/319) 4 | and produce a plot of both the files on the same axes. 5 | """ 6 | 7 | from logging import Logger, getLogger 8 | from pathlib import Path 9 | from typing import Optional 10 | 11 | import matplotlib.pyplot as plotter 12 | 13 | from post_processing.common import ( 14 | DATA_FILE_EXTENSION_WITH_DOT, 15 | PLOT_FILE_EXTENSION_WITH_DOT, 16 | get_blocksize_percentage_operation_from_file_name, 17 | read_intermediate_file, 18 | ) 19 | from post_processing.plotter.common_format_plotter import CommonFormatPlotter 20 | from post_processing.types import COMMON_FORMAT_FILE_DATA_TYPE 21 | 22 | log: Logger = getLogger("cbt") 23 | 24 | 25 | class FileComparisonPlotter(CommonFormatPlotter): 26 | """ 27 | Read the intermediate data files in the common json format and produce a 28 | curve plot of both sets of data on the same axes. Error bars are not included 29 | as they seem to make the plot harder to read and compare. 30 | """ 31 | 32 | def __init__(self, output_directory: str, files: list[str], labels: Optional[list[str]] = None) -> None: 33 | self._output_directory: str = f"{output_directory}" 34 | self._comparison_files: list[Path] = [Path(file) for file in files] 35 | self._labels: Optional[list[str]] = None 36 | 37 | def draw_and_save(self) -> None: 38 | output_file_path: str = self._generate_output_file_name(files=self._comparison_files) 39 | 40 | for file_path in self._comparison_files: 41 | index: int = self._comparison_files.index(file_path) 42 | file_data: COMMON_FORMAT_FILE_DATA_TYPE = read_intermediate_file(f"{file_path}") 43 | 44 | operation_details: tuple[str, str, str] = get_blocksize_percentage_operation_from_file_name( 45 | file_name=file_path.stem 46 | ) 47 | 48 | # If we have a label use it, otherwise set the label from the 49 | # filename. We can reliably do this as we create the file name when 50 | # we save the intermediate file. 51 | label: str = "" 52 | if self._labels is not None: 53 | label = self._labels[index] 54 | 55 | if label == "": 56 | label = " ".join(operation_details) 57 | 58 | self._add_single_file_data(plotter=plotter, file_data=file_data, label=label) 59 | 60 | # make sure we add the legend to the plot 61 | plotter.legend() # pyright: ignore[reportUnknownMemberType] 62 | 63 | self._add_title(plotter=plotter, source_files=self._comparison_files) 64 | self._set_axis(plotter=plotter) 65 | self._save_plot(plotter=plotter, file_path=output_file_path) 66 | self._clear_plot(plotter=plotter) 67 | 68 | def set_labels(self, labels: list[str]) -> None: 69 | """ 70 | Set the labels for the plot lines 71 | """ 72 | self._labels = labels 73 | 74 | def _generate_output_file_name(self, files: list[Path]) -> str: 75 | output_file: str = f"{self._output_directory}/Comparison" 76 | 77 | for file_path in files: 78 | # get the actual file name - this will be the last part of the path 79 | file_name = file_path.parts[-1] 80 | # strip off the .json extension from each file 81 | file: str = file_name[: -len(DATA_FILE_EXTENSION_WITH_DOT)] 82 | 83 | output_file += f"_{file}" 84 | 85 | return f"{output_file}{PLOT_FILE_EXTENSION_WITH_DOT}" 86 | -------------------------------------------------------------------------------- /cbt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import argparse 3 | import collections 4 | import logging 5 | import pprint 6 | import sys 7 | 8 | import settings 9 | import benchmarkfactory 10 | from cluster.ceph import Ceph 11 | from log_support import setup_loggers 12 | 13 | logger = logging.getLogger("cbt") 14 | # Uncomment this if further debug detail (module, funcname) are needed 15 | #FORMAT = "%(asctime)s] [%(levelname)s] [%(name)s] [%(funcName)s():%(lineno)s] %(message)s" 16 | #logging.basicConfig(format=FORMAT, force=True) 17 | #logger.setLevel(logging.DEBUG) 18 | 19 | 20 | def parse_args(args): 21 | parser = argparse.ArgumentParser(description='Continuously run ceph tests.') 22 | parser.add_argument( 23 | '-a', '--archive', 24 | required=True, 25 | help='Directory where the results should be archived.', 26 | ) 27 | 28 | parser.add_argument( 29 | '-c', '--conf', 30 | required=False, 31 | help='The ceph.conf file to use.', 32 | ) 33 | 34 | parser.add_argument( 35 | 'config_file', 36 | help='YAML config file.', 37 | ) 38 | 39 | return parser.parse_args(args[1:]) 40 | 41 | 42 | def main(argv): 43 | setup_loggers() 44 | ctx = parse_args(argv) 45 | settings.initialize(ctx) 46 | 47 | logger.debug("Settings.cluster:\n %s", 48 | pprint.pformat(settings.cluster).replace("\n", "\n ")) 49 | 50 | global_init = collections.OrderedDict() 51 | rebuild_every_test = settings.cluster.get('rebuild_every_test', False) 52 | archive_dir = settings.cluster.get('archive_dir') 53 | 54 | 55 | # FIXME: Create ClusterFactory and parametrically match benchmarks and clusters. 56 | cluster = Ceph(settings.cluster) 57 | 58 | # Only initialize and prefill upfront if we aren't rebuilding for each test. 59 | if not rebuild_every_test: 60 | if not cluster.use_existing: 61 | cluster.initialize(); 62 | # Why does it need to iterate for the creation of benchmarks? 63 | for iteration in range(settings.cluster.get("iterations", 0)): 64 | benchmarks = benchmarkfactory.get_all(archive_dir, cluster, iteration) 65 | for b in benchmarks: 66 | if b.exists(): 67 | continue 68 | if b.getclass() not in global_init: 69 | b.initialize() 70 | b.initialize_endpoints() 71 | b.prefill() 72 | b.cleanup() 73 | # Only initialize once per class. 74 | global_init[b.getclass()] = b 75 | 76 | #logger.debug("Settings.cluster.is_teuthology:%s",settings.cluster.get('is_teuthology', False)) 77 | # Run the benchmarks 78 | return_code = 0 79 | try: 80 | for iteration in range(settings.cluster.get("iterations", 0)): 81 | benchmarks = benchmarkfactory.get_all(archive_dir, cluster, iteration) 82 | for b in benchmarks: 83 | if not b.exists() and not settings.cluster.get('is_teuthology', False): 84 | continue 85 | 86 | if rebuild_every_test: 87 | cluster.initialize() 88 | b.initialize() 89 | # Always try to initialize endpoints before running the test 90 | b.initialize_endpoints() 91 | logger.info(f"Running benchmark %s == iteration %d ==" % (b, iteration)) 92 | b.run() 93 | except: 94 | return_code = 1 # FAIL 95 | logger.exception("During tests") 96 | 97 | return return_code 98 | 99 | if __name__ == '__main__': 100 | exit(main(sys.argv)) 101 | -------------------------------------------------------------------------------- /tools/crimson/seastore_metrics_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | TOP_DIR=$(cd $(dirname "$0") && pwd) 4 | 5 | # configurations 6 | RESULT_DIR="$TOP_DIR/results" 7 | BUILD_DIR="~/ceph/build/" 8 | 9 | TOTAL_ROUND=10 10 | ROUND_SECONDS=1 11 | 12 | WITH_RADOS_BENCH=true 13 | BENCH_POOL="pool-name" 14 | BENCH_IODEPTH=64 15 | BENCH_TIME=$(( ($TOTAL_ROUND - 1) * $ROUND_SECONDS - 16 | ($ROUND_SECONDS > 120 ? 120 : $ROUND_SECONDS) )) 17 | 18 | METRICS_ENABLE=true 19 | 20 | # require nvme and iostat, interval > 180s 21 | STATS_ENABLE=true 22 | STATS_DEV="/dev/dev-name" 23 | 24 | collect_metrics() { 25 | if ! $METRICS_ENABLE; then 26 | return 27 | fi 28 | local current_round=$1 29 | local current_ms=$2 30 | local file_name=result_${current_round}_metrics_${current_ms}.log 31 | echo "start collect metrics to $file_name ..." 32 | CEPH_DEV=1 ./bin/ceph tell osd.0 dump_metrics 2>&1 | tee $RESULT_DIR/$file_name > /dev/null 33 | echo "finish collect metrics" 34 | } 35 | 36 | collect_stats() { 37 | if ! $STATS_ENABLE; then 38 | return 39 | fi 40 | local current_round=$1 41 | local current_ms=$2 42 | local file_name=result_${current_round}_stats_${current_ms}.log 43 | echo "start collect stats to $file_name ..." 44 | if [ `iostat -k -d $STATS_DEV | awk 'NR == 3 {print $5}'` = "kB_dscd/s" ]; then 45 | local read_wrtn_dscd_kb=( `iostat -k -d $STATS_DEV | awk 'NR == 4 {print $6, $7, $8}'` ) 46 | elif [ `iostat -k -d $STATS_DEV | awk 'NR == 3 {print $5}'` = "kB_read" ]; then 47 | local read_wrtn_dscd_kb=( `iostat -k -d $STATS_DEV | awk 'NR == 4 {print $5, $6}'` ) 48 | read_wrtn_dscd_kb[2]=0 49 | else 50 | echo "Warning! The parameter is incorrect. Modify the parameter according to the actual output of the iostat commmand" 51 | exit 1 52 | fi 53 | local nand_host_sectors=( `nvme intel smart-log-add $STATS_DEV | awk 'NR == 14 || NR == 15 {print $5}'` ) 54 | if [ ${#nand_host_sectors[@]} -le 2 ]; then 55 | echo "Error! getting parameters, please try to execute command: nvme intel smart-log-add /dev/dev-name" 56 | exit 1 57 | fi 58 | tee $RESULT_DIR/$file_name > /dev/null << EOT 59 | { 60 | "read_kb": { 61 | "value": ${read_wrtn_dscd_kb[0]} 62 | }, 63 | "wrtn_kb": { 64 | "value": ${read_wrtn_dscd_kb[1]} 65 | }, 66 | "dscd_kb": { 67 | "value": ${read_wrtn_dscd_kb[2]} 68 | }, 69 | "nand_sect": { 70 | "value": ${nand_host_sectors[0]} 71 | }, 72 | "host_sect": { 73 | "value": ${nand_host_sectors[1]} 74 | } 75 | } 76 | EOT 77 | echo "finish collect stats" 78 | } 79 | 80 | run_rados_bench() { 81 | if ! $WITH_RADOS_BENCH; then 82 | return 83 | fi 84 | local bench_cmd="CEPH_DEV=1 ./bin/rados bench -p $BENCH_POOL $BENCH_TIME write -b 4096 --concurrent-ios=$BENCH_IODEPTH --no-cleanup" 85 | local file_name=result_0_radosbench.log 86 | echo "start rados bench $BENCH_TIME seconds to $file_name ..." 87 | CEPH_DEV=1 ./bin/rados bench -p $BENCH_POOL $BENCH_TIME write -b 4096 --concurrent-ios=$BENCH_IODEPTH --no-cleanup | tee $RESULT_DIR/$file_name & 88 | } 89 | 90 | # Note: currently only support single OSD to measure write amplification 91 | # correctly. 92 | if [ -e $RESULT_DIR ]; then 93 | echo "'$RESULT_DIR' dir already exists, remove it or select a different one" 94 | exit 1 95 | fi 96 | 97 | mkdir -p $RESULT_DIR 98 | cd $BUILD_DIR 99 | CURRENT_ROUND=0 100 | TARGET_ROUND=$(( CURRENT_ROUND + TOTAL_ROUND )) 101 | CURRENT_MS=$(($(date +%s%N)/1000000)) 102 | collect_metrics $CURRENT_ROUND $CURRENT_MS 103 | collect_stats $CURRENT_ROUND $CURRENT_MS 104 | while [ $CURRENT_ROUND -lt $TARGET_ROUND ] 105 | do 106 | (( ++CURRENT_ROUND )) 107 | echo "start round $CURRENT_ROUND of $TARGET_ROUND for ${ROUND_SECONDS}s ..." 108 | sleep $ROUND_SECONDS 109 | CURRENT_MS=$(($(date +%s%N)/1000000)) 110 | collect_metrics $CURRENT_ROUND $CURRENT_MS 111 | collect_stats $CURRENT_ROUND $CURRENT_MS 112 | echo "finish round $CURRENT_ROUND" 113 | echo 114 | if [ $CURRENT_ROUND -eq 1 ]; then 115 | run_rados_bench 116 | fi 117 | done 118 | echo "done!" 119 | cd $TOP_DIR 120 | -------------------------------------------------------------------------------- /docs/AutomaticUnitTestGeneration.md: -------------------------------------------------------------------------------- 1 | # tools/serialise_benchmark.py -- Automatic Unit Test Generation 2 | 3 | ## Description: 4 | 5 | This is a standalone tool to generate unit tests for CBT. 6 | 7 | The execution of the script produces as output: 8 | 9 | 1. a new baseline tools/baseline.json, this is a serialisation of each of the Benchmark class instances, 10 | 2. a new set of test/test_bm{benchmark}.py, each consisting on a set of sanity unit tests. 11 | 12 | ## Requirements: 13 | 14 | The Python modules pytest and pytest-xdist should be installed on the machine that will run the tool, this can be the same as the one that drives CBT. 15 | 16 | ## Usage: 17 | 18 | The following is an example of the execution of the script: 19 | 20 | ```bash 21 | # python3 tools/serialise_benchmark.py 22 | ``` 23 | An example of the expected normal ouput is shown below. 24 | 25 | ![cbt_utests_gen](./cbt_utests_gen.png) 26 | 27 | This would have created (or updated if existing already) the set of unit tests for the supported benchmarks. 28 | 29 | ## Execution of unit tests: 30 | 31 | The unit tests can be executed from the command line as follows: 32 | 33 | ```bash 34 | # python3 -m pytest -p no:cacheprovider tests/ 35 | ``` 36 | An example output showing a successful execution: 37 | 38 | ![cbt_utests_run](./cbt_utests_run.png) 39 | 40 | Note: the tests skipped above require an environment variable to be defined to identify the target nodes 41 | for exercising pdsh. 42 | 43 | The following is an example to execute the pdsh tests: 44 | 45 | ```bash 46 | # export CBT_TEST_NODES=root@ceph2,root@ceph4 47 | # export PDSH_SSH_ARGS_APPEND="-p 8023 -o StrictHostKeyChecking=no -v -E /tmp/ssh.out" 48 | ``` 49 | 50 | ## Generation of Unit tests 51 | 52 | The main idea is the concept of **referencial transparency**, (see for example [ref_transparency](https://stackoverflow.com/questions/210835/what-is-referential-transparency)). Basically, in the functional programming 53 | paradigm, it means that given a function and an input value, you will always receive the same output. The test 54 | generator takes advantage of this since the constructors of the Benchmark classes should always produce instances 55 | with the same initial state. The class Benchmark in CBT expects as an argument an object from a .yaml file (the test plan, which includes a Cluster type object). If we ensure to provide a fixed minimal cluster object to the 56 | constructor of the Benchmark class, we can have an _invariant_ that we can use to test that each of the attributes 57 | of the Benchmark classes have the same value across runs. 58 | 59 | In other words, each class constructor of the CBT Benchmark class behaves like a function and always produces 60 | object instances initialised with the same values, provided the same fixed cluster instance as argument. 61 | 62 | 63 | * For each Benchmark class supported, the tool constructs a serialisation of the object instance, and saves them 64 | in the tools/baseline.json. 65 | * To prevent tampering, an md5sum of the contents of the .json file is calculated. 66 | * For each Benchmark class suppported, the tool uses a boilerplate code template to produce unit tests. Each unit test verifies that a supported attribute of the benchmark class is initialised as recorded by the baseline.json. 67 | * When executed, the unit tests perform a sanity check to ensure that the baseline.json has not changed since the creation of the unit tests, if so proceeds to verify each attribute of each Benchmark class. This is useful to detect 68 | whether some attributes has been changed, replaced or deleted. This is especially useful to detect for regressions 69 | during code refactoring. 70 | 71 | 72 | ## Workflow recommeded 73 | 74 | 75 | * Before starting a code refactoring effort, run the unit tests: they should all pass as shown above. 76 | * Make the intended code change -- for example, remove a benchmark.py class module, or refine with new attributes, 77 | or delete some existing attributes. 78 | * Run the unit tests: some should fail accordingly to indicate the missing attributes that existed in the past but no longer in the current benchmark class module. 79 | * Run the tool serialise_benchmark.py. This will regenerate the baseline.json and the unit tests. 80 | * Run the unit tests: they should now all pass. 81 | * Iterate if required. 82 | -------------------------------------------------------------------------------- /example/rbd_fio_test.yml: -------------------------------------------------------------------------------- 1 | cluster: 2 | use_existing: True 3 | osds_per_node: 1 4 | user: 'root' 5 | head: "sv1-ceph3.ssd.hursley.ibm.com" 6 | #head: "ceph3" 7 | #clients: [localhost] 8 | clients: ["sv1-ceph3.ssd.hursley.ibm.com"] 9 | #osds: ["localhost"] 10 | osds: ["sv1-ceph3.ssd.hursley.ibm.com"] 11 | #mons: ["localhost"] 12 | iterations: 1 13 | conf_file: '/ceph/build/ceph.conf' 14 | ceph-osd_cmd: '/ceph/build/bin/ceph-osd' 15 | ceph-mon_cmd: '/ceph/build/bin/ceph-mon' 16 | ceph-run_cmd: '/ceph/build/bin/ceph-run' 17 | ceph-rgw_cmd: '/ceph/build/bin/radosgw' 18 | ceph-mgr_cmd: '/ceph/build/bin/ceph-mgr' 19 | ceph-mds_cmd: '/ceph/build/bin/ceph-mds' 20 | ceph-authtool_cmd: '/ceph/build/bin/ceph-authtool' 21 | radosgw-admin_cmd: '/ceph/build/bin/radosgw-admin' 22 | ceph_cmd: '/ceph/build/bin/ceph' 23 | ceph-fuse_cmd: '/ceph/build/bin/ceph-fuse' 24 | rados_cmd: '/ceph/build/bin/rados' 25 | rbd_cmd: '/ceph/build/bin/rbd' 26 | rbd-nbd_cmd: '/ceph/build/bin/rbd-nbd_cmd' 27 | rbd-fuse_cmd: '/ceph/build/bin/rbd-fuse_cmd' 28 | tmp_dir: "/tmp/cbt" 29 | pid_dir: "/ceph/build/out/" 30 | pdsh_ssh_args: "-a -p 8023 -x -l%u %h" 31 | pool_profiles: 32 | rbd: 33 | pg_size: 256 34 | pgp_size: 256 35 | #replication: 1 36 | monitoring_profiles: 37 | # These monitor only the OSD node/process 38 | collectl: 39 | # These options indicate: 40 | # collect 30 samples, summary mem and CPU util, each 5 secs for CPU samples, 10 sec for all other, 41 | # process options: threads utilisation 42 | # filter samples for CPUs 0 to 3 (e.g. Crimson IO reactor cores), 43 | # filter samples for process osd (redundant), 44 | # produce gnuplot file data format (aka csv) in output file (gziped) 45 | # 46 | args: '-c 30 -sZC -i 5:10 --procopts t --cpufilt 0-3 --procfilt cosd -P -f {collectl_dir}' 47 | perf: 48 | perf_cmd: 'perf' 49 | # This collects 10 secs of data to produce flame graphs 50 | args: 'record -e cycles:u --call-graph dwarf -i -p {pid} -o {perf_dir}/{pid}_osd_perf.out sleep 10' 51 | top: 52 | top_cmd: 'top' 53 | # This collects 30 samples, Core and thread CPU utilisation 54 | args: '-b -H -1 -p {pid} -n 30 > {top_dir}/{pid}_osd_top.out' 55 | benchmarks: 56 | librbdfio: 57 | cmd_path: '/usr/local/bin/fio' 58 | # ToDo: consider a subdict that defines the global FIO options 59 | # but we need to ensure backwards compatibility with existing .yaml 60 | wait_pgautoscaler_timeout: 2 # in secs 61 | use_existing_volumes: False 62 | no_sudo: True 63 | ## Global FIO options 64 | pool_profile: 'rbd' 65 | vol_size: 1024 # Volume size in Megabytes 66 | #vol_name: 'fio_test_%d' # TBC. valid python format string 67 | #rbdname: 'fio_test_%d' 68 | idle_monitor_sleep: 5 # in seconds 69 | fio_out_format: 'json' 70 | iterations: 3 71 | time: 300 # length of run 72 | ramp: 30 # ramp up time 73 | log_avg_msec: 100 74 | log_iops: True 75 | log_bw: True 76 | log_lat: True 77 | poolname: 'rbd' 78 | mode: 'randwrite' 79 | iodepth: [1, 4 ,16] 80 | numjobs: [1, 4, 8] 81 | op_size: [4096] # block IO size in bytes 82 | procs_per_client: [1] 83 | volumes_per_client: [1] # volumes per ceph node 84 | # for tests involving specific CPU cores: 85 | fio_cpu_set: '15-15' 86 | # Optional FIO options for the /prefill stage 87 | prefill: 88 | blocksize: '4M' 89 | numjobs: 1 90 | # Each block below uses its own local options during its execution 91 | workloads: 92 | precondition: 93 | jobname: 'precond1rw' 94 | mode: 'randwrite' 95 | numjobs: [ 1 ] 96 | iodepth: [ 4 ] 97 | monitor: False # whether to run the monitors along the test 98 | test1: 99 | jobname: 'rr' 100 | mode: 'randread' 101 | numjobs: [ 1] 102 | iodepth: [ 1] 103 | #numjobs: [ 1, 4, 8 ] 104 | #iodepth: [ 1, 4, 8 ] 105 | # ToDo: can we add a list of the monitoring subset we are interested for this workload? 106 | test2: 107 | jobname: 'rw' 108 | mode: 'randwrite' 109 | numjobs: [ 1 ] 110 | iodepth: [ 1 ] 111 | #numjobs: [ 1, 4, 8 ] 112 | #iodepth: [ 1, 4, 8 ] 113 | -------------------------------------------------------------------------------- /compare.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import argparse 4 | import os 5 | import logging 6 | import sys 7 | import yaml 8 | 9 | import settings 10 | import benchmarkfactory 11 | from cluster.ceph import Ceph 12 | from log_support import setup_loggers 13 | 14 | logger = logging.getLogger("cbt") 15 | 16 | 17 | # Github Flavored Markdown elements 18 | class Table: 19 | def __init__(self): 20 | self.text = '' 21 | self.cols = 0 22 | 23 | def add_headers(self, *headers): 24 | text = ' | '.join(headers) + '\n' 25 | text += ' | '.join('-' * len(h) for h in headers) + '\n' 26 | self.text += text 27 | self.cols = len(headers) 28 | 29 | def add_cells(self, *cells): 30 | assert(self.cols == len(cells)) 31 | text = ' | '.join(str(c) for c in cells) + '\n' 32 | self.text += text 33 | 34 | def __str__(self): 35 | return self.text 36 | 37 | 38 | class Heading: 39 | def __init__(self, level, text): 40 | self.text = '#' * level + ' ' + text + '\n' 41 | 42 | def __str__(self): 43 | return self.text 44 | 45 | 46 | class Heading3(Heading): 47 | def __init__(self, text): 48 | super().__init__(3, text) 49 | 50 | 51 | def main(): 52 | setup_loggers() 53 | parser = argparse.ArgumentParser(description='query and compare CBT test results') 54 | parser.add_argument( 55 | '-a', '--archive', 56 | required=True, 57 | help='Directory where the results to be compared are archived.') 58 | parser.add_argument( 59 | '-b', '--baseline', 60 | required=True, 61 | help='Directory where the baseline results are archived.') 62 | parser.add_argument( 63 | '-v', '--verbose', 64 | action='store_true', 65 | help='be chatty') 66 | parser.add_argument( 67 | '--output', 68 | help='write result in markdown to specified file', 69 | type=argparse.FileType('w')) 70 | ctx = parser.parse_args(sys.argv[1:]) 71 | # settings.initialize() expects ctx.config_file and ctx.conf 72 | ctx.config_file = os.path.join(ctx.archive, 'results', 'cbt_config.yaml') 73 | ctx.conf = None 74 | settings.initialize(ctx) 75 | 76 | results = [] 77 | for iteration in range(settings.cluster.get('iterations', 0)): 78 | cluster = Ceph(settings.cluster) 79 | benchmarks = list(zip(benchmarkfactory.get_all(ctx.archive, cluster, iteration), 80 | benchmarkfactory.get_all(ctx.baseline, cluster, iteration))) 81 | for current, baseline in benchmarks: 82 | if not current.exists(True): 83 | logger.error("tested: %s result does not exist in %s", 84 | current, ctx.archive) 85 | break 86 | if not baseline.exists(True): 87 | logger.error("baseline: %s result does not exist in %s", 88 | baseline, ctx.baseline) 89 | break 90 | results.extend(current.evaluate(baseline)) 91 | 92 | nr_accepted = sum(result.accepted for result in results) 93 | if ctx.verbose: 94 | for result in results: 95 | if result.accepted: 96 | logger.info(result) 97 | else: 98 | logger.warning(result) 99 | 100 | nr_tests = len(results) 101 | nr_rejected = nr_tests - nr_accepted 102 | 103 | if ctx.output: 104 | heading = None 105 | if nr_rejected: 106 | heading = Heading3(f'{nr_rejected} out of {nr_tests} failed') 107 | else: 108 | heading = Heading3(f'all {nr_tests} tests passed') 109 | ctx.output.write(str(heading)) 110 | 111 | table = Table() 112 | table.add_headers('run', 'metric', 'baseline', 'result', 'accepted') 113 | for r in results: 114 | table.add_cells(r.run, r.alias, r.baseline, r.result, 115 | ' ' if r.accepted else ':x:') 116 | ctx.output.write(str(table)) 117 | 118 | if nr_rejected > 0: 119 | logger.warning("%d tests failed out of %d", nr_rejected, len(results)) 120 | sys.exit(1) 121 | else: 122 | logger.info("All %d tests passed.", len(results)) 123 | 124 | 125 | if __name__ == '__main__': 126 | main() 127 | -------------------------------------------------------------------------------- /tools/fio_objectstore_tools/analyze.py: -------------------------------------------------------------------------------- 1 | #!env python3 2 | 3 | import json 4 | import os 5 | import sys 6 | import itertools 7 | import argparse 8 | import sys 9 | import subprocess 10 | import numpy as np 11 | 12 | from summarize import dump_target, generate_summary 13 | from traces import open_trace, iterate_structured_trace 14 | from graph import graph, Scatter, Histogram 15 | 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument( 18 | 'target', metavar='T', type=str, help='target results directory') 19 | parser.add_argument( 20 | '--match', type=str, help='json for matching', default='{}') 21 | parser.add_argument( 22 | '--output', type=str, help='output directory') 23 | parser.add_argument( 24 | '--generate-graphs', action='store_true', help='generate graphs') 25 | parser.add_argument( 26 | '--detailed', action='store_true', 27 | help='generate more detailed graphs') 28 | parser.add_argument( 29 | '--drop-first', type=float, 30 | help='drop', default=10.0) 31 | parser.add_argument( 32 | '--drop-after', type=float, 33 | help='drop') 34 | parser.add_argument( 35 | '--filter-latency-above', type=float, 36 | help='filter out latency above given percentile') 37 | parser.add_argument( 38 | '--filter-latency-below', type=float, 39 | help='filter out latency below given percentile') 40 | 41 | 42 | def get_targets(directory): 43 | contents = os.listdir(directory) 44 | if 'ceph.conf' in contents: 45 | return [(os.path.basename(directory), directory)] 46 | else: 47 | return [(x, os.path.join(directory, x)) for x in contents] 48 | 49 | 50 | args = parser.parse_args() 51 | 52 | match = json.loads(args.match) 53 | targets = get_targets(args.target) 54 | projected = [dump_target(name, target) for name, target in targets] 55 | 56 | def do_filter(match, input): 57 | def cond(x): 58 | return all(x[1]['config'].get(k) == v for k, v in list(match.items())) 59 | return list(filter(cond, input)) 60 | 61 | filtered_targets, filtered = list(zip(*do_filter(match, list(zip(targets, projected))))) 62 | 63 | summary = generate_summary(filtered, match) 64 | 65 | graph_filename = lambda x: None 66 | if args.output: 67 | subprocess.run(['mkdir', '-p', args.output], check=False) 68 | graph_filename = lambda x: os.path.join(args.output, x + '.png') 69 | 70 | def do_mask(above, below): 71 | def f(lat): 72 | l, u = np.percentile( 73 | lat, 74 | [below if below else 0.0, 75 | above if above else 100.0], 76 | interpolation='linear') 77 | 78 | return (lat > l) & (lat < u) 79 | return f 80 | 81 | masker = None 82 | mask_params = None 83 | 84 | if args.filter_latency_above or args.filter_latency_below: 85 | mask_params = ['latency'] 86 | masker = do_mask(args.filter_latency_above, args.filter_latency_below) 87 | 88 | TO_GRAPH_SMALL = [ 89 | [Scatter(x, 'commit_latency_no_throttle', ymax=0.99) for x in 90 | ['current_kv_throttle_cost', 'current_deferred_throttle_cost']], 91 | [Scatter(x, 'throughput', ymax=0.99) for x in 92 | ['current_kv_throttle_cost', 'current_deferred_throttle_cost']] 93 | ] 94 | 95 | TO_GRAPH_LARGE = [ 96 | [Scatter(*x, ymax=0.99) for x in 97 | [('current_kv_throttle_cost', 'commit_latency_no_throttle'), 98 | ('current_deferred_throttle_cost', 'commit_latency_no_throttle'), 99 | ('total_throttle', 'commit_latency_no_throttle')]], 100 | [Scatter(*x, ymax=0.99) for x in 101 | [('current_kv_throttle_cost', 'throughput'), 102 | ('current_deferred_throttle_cost', 'throughput'), 103 | ('total_throttle', 'throughput')]], 104 | [Histogram(x) for x in 105 | ['current_kv_throttle_cost', 'current_deferred_throttle_cost', 'total_throttle']] 106 | ] 107 | 108 | if args.generate_graphs: 109 | for name, path in filtered_targets: 110 | print("Generating graph for {}, path: {}".format(name, path)) 111 | events = iterate_structured_trace(open_trace(path)) 112 | if args.drop_first: 113 | events = itertools.dropwhile( 114 | lambda x: x.get_start() < args.drop_first, events) 115 | if args.drop_after: 116 | events = itertools.takewhile( 117 | lambda x: x.get_start() < args.drop_after, events) 118 | 119 | graph( 120 | events, name, graph_filename(name), 121 | TO_GRAPH_LARGE if args.detailed else TO_GRAPH_SMALL, 122 | mask_params, masker) 123 | 124 | json.dump(summary, sys.stdout, sort_keys=True, indent=2) 125 | -------------------------------------------------------------------------------- /tools/makecephconf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import argparse 4 | import os 5 | import subprocess 6 | import sys 7 | import yaml 8 | import time 9 | import copy 10 | 11 | def read_config(config_file): 12 | config = {} 13 | try: 14 | with file(config_file) as f: 15 | g = yaml.safe_load_all(f) 16 | for new in g: 17 | config.update(new) 18 | except IOError as e: 19 | raise argparse.ArgumentTypeError(str(e)) 20 | return config 21 | 22 | def parse_args(): 23 | parser = argparse.ArgumentParser(description='Continuously run ceph tests.') 24 | parser.add_argument( 25 | '--target', 26 | required = True, 27 | help = 'Directory where the config files should go.', 28 | ) 29 | parser.add_argument( 30 | 'config_file', 31 | help = 'YAML config file.', 32 | ) 33 | args = parser.parse_args() 34 | return args 35 | 36 | def populate(l, name, value): 37 | name = name.replace("_", " ") 38 | l.append(" %s = %s" % (name, value)) 39 | 40 | def mkosds(lists, yaml): 41 | i = 0 42 | for server in yaml.get('osd_servers', []): 43 | for j in range(0, yaml.get('osds_per_server', 0)): 44 | name = "osd.%d" % i 45 | lists[name] = [] 46 | lists[name].append(" host = %s" % server) 47 | lists[name].append(" osd data = /srv/osd-device-%d-data" % j) 48 | lists[name].append(" osd journal = /srv/osd-device-%d-data/journal" % j) 49 | # lists[name].append(" osd journal = /dev/disk/by-partlabel/osd-device-%d-journal" % j) 50 | i += 1 51 | 52 | def writescript(f, param, value, conf): 53 | for fs,rtconf in sorted(runtests_conf.items()): 54 | pdir = param 55 | if value: 56 | pdir = "%s_%s" % (param, value) 57 | f.write("%s --conf %s --archive %s/%s/%s %s\n" % (runtests_exec, conf, outdir, fs, pdir, rtconf)) 58 | 59 | def parametric(lists, yaml): 60 | if "global" not in lists: 61 | lists["global"] = [] 62 | scriptname = "%s/runme.sh" % target 63 | f = open(scriptname,'w') 64 | f.write("#!/bin/bash\n") 65 | 66 | # the default 67 | filename = "%s/default.ceph.conf" % target 68 | writefile(lists, filename) 69 | writescript(f, "default", "", filename) 70 | 71 | for param,value in sorted(yaml.items()): 72 | if (isinstance(value, dict)): 73 | lc = copy.deepcopy(lists) 74 | for k,v in sorted(value.items()): 75 | populate(lc.get("global"), k, v) 76 | filename = "%s/%s.ceph.conf" % (target, param) 77 | writefile(lc, filename) 78 | writescript(f, param, "", filename) 79 | elif (isinstance(value, list)): 80 | for vi in value: 81 | lc = copy.deepcopy(lists) 82 | populate(lc.get("global"), param, vi) 83 | filename = "%s/%s_%s.ceph.conf" % (target, param, vi) 84 | writefile(lc, filename) 85 | writescript(f, param, vi, filename) 86 | else: 87 | lc = copy.deepcopy(lists) 88 | populate(lc.get("global"), param, value) 89 | filename = "%s/%s_%s.ceph.conf" % (target, param, value) 90 | writefile(lc, filename) 91 | writescript(f, param, value, filename) 92 | f.close() 93 | os.chmod(scriptname, 0o755) 94 | 95 | def writefile(lists, out): 96 | f = open(out,'w') 97 | # print out 98 | for k,v in sorted(lists.items()): 99 | f.write("[%s]\n" % k) 100 | for line in v: f.write("%s\n" % line) 101 | f.write("\n") 102 | f.close() 103 | 104 | target = "" 105 | outdir = "" 106 | runtests_exec = "" 107 | runtests_conf = {} 108 | 109 | if __name__ == '__main__': 110 | ctx = parse_args() 111 | config = read_config(ctx.config_file) 112 | 113 | target = os.path.abspath(ctx.target) 114 | os.system("mkdir -p -m0755 -- %s" % target) 115 | 116 | settings = config.get("settings", {}) 117 | runtests_exec = settings.get("runtests_exec", "") 118 | runtests_conf = settings.get("runtests_conf", {}) 119 | outdir = settings.get("outdir", "") 120 | 121 | default = config.get("default", {}) 122 | lists = {} 123 | for section in default: 124 | lists[section] = [] 125 | for k,v in default.get(section).items(): 126 | populate(lists.get(section), k, v) 127 | mkosds(lists, config.get("settings", {})) 128 | parametric(lists, config.get("parametric", {})) 129 | -------------------------------------------------------------------------------- /tools/fio_objectstore_tools/summarize.py: -------------------------------------------------------------------------------- 1 | #!env python3 2 | 3 | import os 4 | import json 5 | from run import get_fio_output, get_base_config 6 | 7 | def populate_args(parser): 8 | parser.add_argument('target', metavar='T', type=str, help='target results directory') 9 | parser.add_argument('--match', type=str, help='json for matching', default='{}') 10 | parser.add_argument('--output', type=str, help='output directory') 11 | parser.add_argument('--generate-graphs', type=bool, help='generate graphs') 12 | parser.set_default(func=summarize) 13 | 14 | def project(name, config, fio_stats, perf_stats): 15 | def f(op): 16 | return { 17 | 'iops_min': op['iops_min'], 18 | 'iops_max': op['iops_max'], 19 | 'iops': op['iops'], 20 | 'clat_min_ns': op['clat_ns']['min'], 21 | 'clat_max_ns': op['clat_ns']['max'], 22 | 'clat_mean_ns': op['clat_ns']['mean'], 23 | 'clat_median_ns': op['clat_ns']['percentile']['50.000000'], 24 | 'clat_99.9_ns': op['clat_ns']['percentile']['99.900000'], 25 | 'slat_min_ns': op['slat_ns']['min'], 26 | 'slat_max_ns': op['slat_ns']['max'], 27 | 'slat_mean_ns': op['slat_ns']['mean'], 28 | } 29 | fio = dict(((op, f(fio_stats['jobs'][0][op])) for op in ['read', 'write'])) 30 | 31 | wanted_perf = [ 32 | 'commit_lat', 33 | 'kv_commit_lat', 34 | 'kv_final_lat', 35 | 'kv_flush_lat', 36 | 'kv_sync_lat', 37 | 'state_deferred_aio_wait_lat', 38 | 'state_deferred_cleanup_lat', 39 | 'state_deferred_queued_lat', 40 | 'state_kv_committing_lat' 41 | ] 42 | 43 | perf = { 44 | k: v['avgtime'] for k, v in 45 | [x for x in list(perf_stats['perfcounter_collection']['bluestore'].items()) if '_lat' in x[0]] 46 | } 47 | 48 | return { 49 | 'fio': fio, 50 | 'config': config, 51 | 'name': name, 52 | 'perf': perf, 53 | } 54 | 55 | def dump_target(name, directory): 56 | fio_output = {} 57 | with open(get_fio_output(directory)) as f: 58 | decoder = json.JSONDecoder() 59 | fio_output, _ = decoder.raw_decode(f.read()) 60 | #fio_output = json.load(f) 61 | perf_output = {} 62 | with open(os.path.join(directory, 'perf_counters.json')) as f: 63 | perf_output = json.load(f) 64 | with open(get_base_config(directory)) as f: 65 | base_config = json.load(f) 66 | return project(name, base_config, fio_output, perf_output) 67 | 68 | def generate_summary(filtered, match): 69 | def config_to_frozen(config, match): 70 | ret = dict([x for x in list(config.items()) if x[0] not in match]) 71 | if 'run' in ret: 72 | del ret['run'] 73 | return frozenset(sorted(ret.items())) 74 | 75 | def group_by_config(input): 76 | grouped = {} 77 | for run in filtered: 78 | key = config_to_frozen(run['config'], match) 79 | if key not in grouped: 80 | grouped[key] = [] 81 | grouped[key].append(run) 82 | return [{'config': dict(list(k)), 'runs': v} for k, v in list(grouped.items())] 83 | 84 | grouped = group_by_config(filtered) 85 | 86 | def union_top_n(group): 87 | ret = set() 88 | for run in group: 89 | ret = ret.union( 90 | [k for v, k in sorted(((a, b) for b, a in list(run['perf'].items())))][::-1][:5] 91 | ) 92 | return ret 93 | 94 | def project_run(perfs): 95 | def ret(run): 96 | return { 97 | 'tp': run['fio']['write']['iops'], 98 | 'lat': run['fio']['write']['clat_mean_ns'] / 1000000000.0, 99 | 'slat': run['fio']['write']['slat_mean_ns'] / 1000000000.0, 100 | 'perf': dict([x for x in list(run['perf'].items()) if x[0] in perfs]) 101 | } 102 | return ret 103 | 104 | def sort_by(f, input): 105 | return [v for (_, _, v) in sorted([(f(x[0]), x[1], x[0]) for x in zip(input, list(range(len(input))))])] 106 | 107 | def project_group(group): 108 | perfs = union_top_n(group['runs']) 109 | return { 110 | 'config': group['config'], 111 | 'runs': sort_by( 112 | lambda x: x['tp'], 113 | list(map(project_run(perfs), group['runs']))) 114 | } 115 | 116 | return sort_by( 117 | lambda x: (x['config'].get('bs', 0), x['config'].get('size', 0)), 118 | list(map(project_group, grouped))) 119 | -------------------------------------------------------------------------------- /tools/generate_performance_report.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env -S python3 -B 2 | """ 3 | A script to automatically generate a report from a set of performance run data 4 | in the common intermediate format described in CBT PR 319. 5 | The archive should contain the 'visualisation' sub directory where all 6 | the .json and plot files reside. 7 | 8 | tools/fio_common_output_wrapper.py will generate the .json files and the SimplePlotter 9 | module in CBT PR 321 can be used to generate the plot files 10 | 11 | Usage: 12 | generate_performance_report.py --archive= 13 | --output_directory= 14 | --create_pdf 15 | 16 | 17 | Input: 18 | --output_directory [Required] The directory to write the comparison plot 19 | to. If this does not exists it will be created. 20 | 21 | --archive [Required] The directory that contains the common 22 | format .json files and plot files to include 23 | in the report. 24 | 25 | --create_pdf [Optional] Create a pdf file of the report markdown 26 | file. 27 | This requires pandoc to be installed, 28 | and be on the path. 29 | 30 | Examples: 31 | 32 | Generate a markdown report file for the results in '/tmp/squid_main' directory 33 | ans sabve it in the '/tmp/main_results' directory: 34 | 35 | generate_performance_report.py --archive=/tmp/squid_main 36 | --output_directory =/tmp/main_results 37 | 38 | Additionally generate a pdf report file for the example above: 39 | 40 | generate_performance_report.py --archive=/tmp/squid_main 41 | --output_directory =/tmp/main_results 42 | --create_pdf 43 | """ 44 | 45 | import os 46 | import subprocess 47 | from argparse import ArgumentParser, Namespace 48 | from logging import INFO, Logger, basicConfig, getLogger 49 | 50 | from post_processing.reports.simple_report_generator import SimpleReportGenerator 51 | 52 | log: Logger = getLogger(f"{os.path.basename(__file__)}") 53 | 54 | 55 | def main() -> int: 56 | """ 57 | Main routine for the script 58 | """ 59 | 60 | result: int = 0 61 | 62 | description: str = "Produces a performance report in markdown format \n" 63 | description += "from the json and png files stored in the visualisation\n" 64 | description += "subdirectory of the directory given by --archive\n" 65 | description += "The resulting report(s) are saved in the specified output directory.\n" 66 | description += "The json files must be in the correct format, as described by CBT PR 319\n" 67 | description += "(https://github.com/ceph/cbt/pull/319)" 68 | 69 | parser: ArgumentParser = ArgumentParser(description=description) 70 | 71 | parser.add_argument( 72 | "--output_directory", 73 | type=str, 74 | required=True, 75 | help="The directory to store the comparison plot file(s)", 76 | ) 77 | parser.add_argument( 78 | "--archive", 79 | type=str, 80 | required=False, 81 | help="The directory that contains the set of json results files and generated plot files" 82 | + "for a particular test run", 83 | ) 84 | parser.add_argument( 85 | "--create_pdf", 86 | action="store_true", 87 | help="Generate a pdf report file in addition to the markdown report", 88 | ) 89 | 90 | arguments: Namespace = parser.parse_args() 91 | 92 | # will only create the output directory if it does not already exist 93 | subprocess.run(f"mkdir -p -m0755 {arguments.output_directory}", shell=True) 94 | 95 | report_generator = SimpleReportGenerator( 96 | archive_directories=arguments.archive, output_directory=arguments.output_directory 97 | ) 98 | 99 | try: 100 | report_generator.create_report() 101 | 102 | if arguments.create_pdf: 103 | report_generator.save_as_pdf() 104 | 105 | except Exception: 106 | log.exception("Encountered an error plotting results") 107 | result = 1 108 | 109 | return result 110 | 111 | 112 | def initialise_logging() -> None: 113 | """ 114 | Set up the logging for the sub-modules 115 | """ 116 | basicConfig(level=INFO, format="%(name)-20s: %(levelname)-8s %(message)s") 117 | 118 | 119 | if __name__ == "__main__": 120 | initialise_logging() 121 | main() 122 | -------------------------------------------------------------------------------- /tests/test_bm_nullbench.py: -------------------------------------------------------------------------------- 1 | """ Unit tests for the Benchmarknullbench class """ 2 | 3 | import unittest 4 | import hashlib 5 | import json 6 | import benchmarkfactory 7 | import settings 8 | from cluster.ceph import Ceph 9 | 10 | 11 | class TestBenchmarknullbench(unittest.TestCase): 12 | """ Sanity tests for Benchmarknullbench """ 13 | archive_dir = "/tmp" 14 | iteration = {'acceptable': [1,2,3], 'iteration': 0} 15 | cluster = {} 16 | cl_name = "tools/invariant.yaml" 17 | bl_name = "tools/baseline.json" 18 | bl_json = {} 19 | bl_md5 = 'e6b6fcd2be74bd08939c64a249ab2125' 20 | md5_returned = None 21 | 22 | @classmethod 23 | def setUpClass(cls): 24 | with open(cls.bl_name, 'rb') as f: 25 | data = f.read() 26 | f.close() 27 | cls.md5_returned = hashlib.md5(data).hexdigest() 28 | settings.mock_initialize(config_file=cls.cl_name) 29 | cls.cluster = Ceph.mockinit(settings.cluster) 30 | with open(cls.bl_name, 'r') as f: 31 | cls.bl_json = json.load(f) 32 | f.close() 33 | 34 | @classmethod 35 | def tearDownClass(cls): 36 | cls.cluster = None 37 | cls.bl_json = None 38 | 39 | def test_valid_baseline(self): 40 | """ Verify the baseline has not been compromised """ 41 | self.assertEqual( self.bl_md5, str(self.md5_returned) ) 42 | 43 | def test_valid_archive_dir(self): 44 | """ Basic sanity attribute identity archive_dir check""" 45 | b = benchmarkfactory.get_object(self.archive_dir, 46 | self.cluster, 'nullbench', self.iteration) 47 | self.assertEqual(self.bl_json['nullbench']['archive_dir'], b.__dict__['archive_dir']) 48 | 49 | def test_valid_cmd_path(self): 50 | """ Basic sanity attribute identity cmd_path check""" 51 | b = benchmarkfactory.get_object(self.archive_dir, 52 | self.cluster, 'nullbench', self.iteration) 53 | self.assertEqual(self.bl_json['nullbench']['cmd_path'], b.__dict__['cmd_path']) 54 | 55 | def test_valid_cmd_path_full(self): 56 | """ Basic sanity attribute identity cmd_path_full check""" 57 | b = benchmarkfactory.get_object(self.archive_dir, 58 | self.cluster, 'nullbench', self.iteration) 59 | self.assertEqual(self.bl_json['nullbench']['cmd_path_full'], b.__dict__['cmd_path_full']) 60 | 61 | def test_valid_config(self): 62 | """ Basic sanity attribute identity config check""" 63 | b = benchmarkfactory.get_object(self.archive_dir, 64 | self.cluster, 'nullbench', self.iteration) 65 | self.assertEqual(self.bl_json['nullbench']['config'], b.__dict__['config']) 66 | 67 | def test_valid_log_bw(self): 68 | """ Basic sanity attribute identity log_bw check""" 69 | b = benchmarkfactory.get_object(self.archive_dir, 70 | self.cluster, 'nullbench', self.iteration) 71 | self.assertEqual(self.bl_json['nullbench']['log_bw'], b.__dict__['log_bw']) 72 | 73 | def test_valid_log_iops(self): 74 | """ Basic sanity attribute identity log_iops check""" 75 | b = benchmarkfactory.get_object(self.archive_dir, 76 | self.cluster, 'nullbench', self.iteration) 77 | self.assertEqual(self.bl_json['nullbench']['log_iops'], b.__dict__['log_iops']) 78 | 79 | def test_valid_log_lat(self): 80 | """ Basic sanity attribute identity log_lat check""" 81 | b = benchmarkfactory.get_object(self.archive_dir, 82 | self.cluster, 'nullbench', self.iteration) 83 | self.assertEqual(self.bl_json['nullbench']['log_lat'], b.__dict__['log_lat']) 84 | 85 | def test_valid_osd_ra(self): 86 | """ Basic sanity attribute identity osd_ra check""" 87 | b = benchmarkfactory.get_object(self.archive_dir, 88 | self.cluster, 'nullbench', self.iteration) 89 | self.assertEqual(self.bl_json['nullbench']['osd_ra'], b.__dict__['osd_ra']) 90 | 91 | def test_valid_osd_ra_changed(self): 92 | """ Basic sanity attribute identity osd_ra_changed check""" 93 | b = benchmarkfactory.get_object(self.archive_dir, 94 | self.cluster, 'nullbench', self.iteration) 95 | self.assertEqual(self.bl_json['nullbench']['osd_ra_changed'], b.__dict__['osd_ra_changed']) 96 | 97 | def test_valid_run_dir(self): 98 | """ Basic sanity attribute identity run_dir check""" 99 | b = benchmarkfactory.get_object(self.archive_dir, 100 | self.cluster, 'nullbench', self.iteration) 101 | self.assertEqual(self.bl_json['nullbench']['run_dir'], b.__dict__['run_dir']) 102 | 103 | def test_valid_valgrind(self): 104 | """ Basic sanity attribute identity valgrind check""" 105 | b = benchmarkfactory.get_object(self.archive_dir, 106 | self.cluster, 'nullbench', self.iteration) 107 | self.assertEqual(self.bl_json['nullbench']['valgrind'], b.__dict__['valgrind']) 108 | 109 | if __name__ == '__main__': 110 | unittest.main() 111 | -------------------------------------------------------------------------------- /tools/generate_comparison_performance_report.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env -S python3 -B 2 | """ 3 | A script to automatically generate a report from a set of performance run data 4 | in the common intermediate format described in CBT PR 319. 5 | The archive should contain the 'visualisation' sub directory where all 6 | the .json and plot files reside. 7 | 8 | tools/fio_common_output_wrapper.py will generate the .json files and the SimplePlotter 9 | module in CBT PR 321 can be used to generate the plot files 10 | 11 | Usage: 12 | generate_comparison_performance_report.py 13 | --baseline= 14 | --archives= 15 | --output_directory= 16 | --create_pdf 17 | 18 | 19 | Input: 20 | --output_directory [Required] The directory to write the comparison plot 21 | to. If this does not exists it will be created. 22 | 23 | --baseline [Required] The directory containing the common 24 | format .json files to use as the baseline 25 | for the report 26 | 27 | --archives [Required] The directories that contain the common 28 | format .json files to compare to the baseline 29 | 30 | --create_pdf [Optional] Create a pdf file of the report markdown 31 | file. 32 | This requires pandoc to be installed, 33 | and be on the path. 34 | 35 | Examples: 36 | 37 | Generate a markdown report file for the results in '/tmp/squid_main' directory 38 | ans sabve it in the '/tmp/main_results' directory: 39 | 40 | generate_comparison_performance_report.py --baseline=/tmp/squid_main 41 | --archives=/tmp/my_build 42 | --output_directory =/tmp/main_results 43 | 44 | Additionally generate a pdf report file for the example above: 45 | 46 | generate_comparison_performance_report.py --baseline=/tmp/squid_main 47 | --archive=/tmp/squid_main 48 | --output_directory =/tmp/main_results 49 | --create_pdf 50 | """ 51 | 52 | import os 53 | import subprocess 54 | from argparse import ArgumentParser, Namespace 55 | from logging import INFO, Logger, basicConfig, getLogger 56 | 57 | from post_processing.reports.comparison_report_generator import ComparisonReportGenerator 58 | 59 | log: Logger = getLogger(f"{os.path.basename(__file__)}") 60 | 61 | 62 | def main() -> int: 63 | """ 64 | Main routine for the script 65 | """ 66 | 67 | result: int = 0 68 | 69 | description: str = "Produces a performance report in markdown format \n" 70 | description += "from the json and png files stored in the visualisation\n" 71 | description += "subdirectory of the directory given by --archive\n" 72 | description += "The resulting report(s) are saved in the specified output directory.\n" 73 | description += "The json files must be in the correct format, as described by CBT PR 319\n" 74 | description += "(https://github.com/ceph/cbt/pull/319)" 75 | 76 | parser: ArgumentParser = ArgumentParser(description=description) 77 | 78 | parser.add_argument( 79 | "--output_directory", 80 | type=str, 81 | required=True, 82 | help="The directory to store the comparison plot file(s)", 83 | ) 84 | parser.add_argument( 85 | "--baseline", 86 | type=str, 87 | required=True, 88 | help="The full path to the directory that contain the set " 89 | + "of json results files to be used as the baseline for this " 90 | + "comparion", 91 | ) 92 | parser.add_argument( 93 | "--archives", 94 | type=str, 95 | required=True, 96 | help="A comma separated list of the directories that contain the set " 97 | + "of json results files to be compared to the baseline", 98 | ) 99 | parser.add_argument( 100 | "--create_pdf", 101 | action="store_true", 102 | help="Generate a pdf report file in addition to the markdown report", 103 | ) 104 | 105 | arguments: Namespace = parser.parse_args() 106 | 107 | # will only create the output directory if it does not already exist 108 | subprocess.run(f"mkdir -p -m0755 {arguments.output_directory}", shell=True) 109 | 110 | report_generator = ComparisonReportGenerator( 111 | archive_directories=f"{arguments.baseline},{arguments.archives}", output_directory=arguments.output_directory 112 | ) 113 | 114 | try: 115 | report_generator.create_report() 116 | 117 | if arguments.create_pdf: 118 | report_generator.save_as_pdf() 119 | 120 | except Exception: 121 | log.exception("Encountered an error creating the report") 122 | result = 1 123 | 124 | return result 125 | 126 | 127 | def initialise_logging() -> None: 128 | """ 129 | Set up the logging for the sub-modules 130 | """ 131 | basicConfig(level=INFO, format="%(name)-20s: %(levelname)-8s %(message)s") 132 | 133 | 134 | if __name__ == "__main__": 135 | initialise_logging() 136 | main() 137 | -------------------------------------------------------------------------------- /plot_results.py: -------------------------------------------------------------------------------- 1 | """ 2 | A file containing the classes and code required to read a file stored in the common 3 | intermediate format introduced in PR 319 (https://github.com/ceph/cbt/pull/319) and produce a hockey-stick curve graph 4 | """ 5 | 6 | import json 7 | from logging import Logger, getLogger 8 | from pathlib import Path 9 | from typing import Dict, List, Union 10 | 11 | import matplotlib.pyplot as plotter 12 | 13 | log: Logger = getLogger("cbt") 14 | 15 | 16 | class PlotResults: 17 | """ 18 | Read the intermediate data file in the common json format and produce a hockey-stick 19 | curve plot that includes standard deviation error bars. 20 | """ 21 | 22 | # A converted between the operation type in the intermediate file format 23 | # and a human-readable string that can be used in the title for the plot. 24 | TITLE_CONVERSION: Dict[str, str] = { 25 | "read": "Sequential Read", 26 | "write": "Sequential Write", 27 | "randread": "Random Read", 28 | "randwrite": "Random Write", 29 | "readwrite": "Sequential Read/Write", 30 | "randrw": "Random Read/Write", 31 | } 32 | 33 | def __init__(self, archive_directory: str) -> None: 34 | self._data_directory: str = f"{archive_directory}/visualisation" 35 | 36 | self._path: Path = Path(self._data_directory) 37 | 38 | def draw_and_save(self) -> None: 39 | """ 40 | Produce the plot files for each of the intermediate data files in the given directory. 41 | """ 42 | 43 | for file_path in self._path.glob("*.json"): 44 | file_data: Dict[str, Union[str, Dict[str, str]]] = self._read_intermediate_file(f"{file_path}") 45 | output_file: str = f"{str(file_path)[:-4]}png" 46 | plot_title: str = self._generate_plot_title(file_path.parts[-1]) 47 | 48 | keys: List[str] = [key for key in file_data.keys() if isinstance(file_data[key], dict)] 49 | plot_data: Dict[str, Dict[str, str]] = {} 50 | sorted_plot_data: Dict[str, Dict[str, str]] = {} 51 | for key, data in file_data.items(): 52 | if isinstance(data, dict): 53 | plot_data[key] = data 54 | 55 | sorted_keys: List[str] = sorted(keys, key=int) 56 | for key in sorted_keys: 57 | sorted_plot_data[key] = plot_data[key] 58 | 59 | x_axis: List[Union[int, float]] = [] 60 | y_axis: List[Union[int, float]] = [] 61 | error_bars: List[float] = [] 62 | 63 | log.info("converting file %s", f"{file_path}") 64 | 65 | for _, data in sorted_plot_data.items(): 66 | # for blocksize less than 64K we want to use the bandwidth to plot the graphs, 67 | # otherwise we should use iops. 68 | blocksize: int = int(int(data["blocksize"]) / 1024) 69 | if blocksize < 64: 70 | # convert bytes to Mb, not Mib, so use 1000s rather than 1024 71 | x_axis.append(float(data["bandwidth_bytes"]) / (1000 * 1000)) 72 | plotter.xlabel("Bandwidth (MB)") # pyright: ignore[reportUnknownMemberType] 73 | else: 74 | x_axis.append(float(data["iops"])) 75 | plotter.xlabel("IOps") # pyright: ignore[reportUnknownMemberType] 76 | # The stored values are in ns, we want to convert to ms 77 | y_axis.append(float(data["latency"]) / (1000 * 1000)) 78 | plotter.ylabel("Latency (ms)") # pyright: ignore[reportUnknownMemberType] 79 | error_bars.append(float(data["std_deviation"]) / (1000 * 1000)) 80 | 81 | plotter.title(plot_title) # pyright: ignore[reportUnknownMemberType] 82 | plotter.errorbar(x_axis, y_axis, error_bars, capsize=3, ecolor="red") # pyright: ignore[reportUnknownMemberType] 83 | plotter.savefig(output_file, format="png") # pyright: ignore[reportUnknownMemberType] 84 | # Now we have saved the file, clear the plot for the next file 85 | plotter.clf() 86 | 87 | def _read_intermediate_file(self, file_path: str) -> Dict[str, Union[str, Dict[str, str]]]: 88 | """ 89 | Read the json data from the intermediate file and store it for processing. 90 | """ 91 | data: Dict[str, Union[str, Dict[str, str]]] = {} 92 | # We know the file encoding as we wrote it ourselves as part of 93 | # common_output_format.py, so it is safe to specify here 94 | with open(f"{file_path}", "r", encoding="utf8") as file_data: 95 | data = json.load(file_data) 96 | 97 | return data 98 | 99 | def _generate_plot_title(self, source_file: str) -> str: 100 | """ 101 | Given the Path object for the input file, generate the title for the 102 | data plot 103 | """ 104 | # Strip the .json from the file name as we don't need it 105 | title_with_underscores: str = f"{source_file[:-5]}" 106 | parts: List[str] = title_with_underscores.split("_") 107 | 108 | # The filename is in one of 2 formats: 109 | # BLOCKSIZE_OPERATION.json 110 | # BLOCKSIZE_READ_WRITE_OPERATION.json 111 | # 112 | # The split on _ will mean that the last element [-1] will always be 113 | # the operation, and the first part [0] will be the blocksize 114 | title: str = f"{int(int(parts[0][:-1]) / 1024)}K " 115 | if len(parts) > 2: 116 | title += f"{parts[1]}/{parts[2]} " 117 | 118 | title += f"{self.TITLE_CONVERSION[parts[-1]]}" 119 | return title 120 | -------------------------------------------------------------------------------- /settings.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import yaml 3 | import sys 4 | import os 5 | import socket 6 | import logging 7 | 8 | 9 | logger = logging.getLogger("cbt") 10 | 11 | common = {} 12 | cluster = {} 13 | client_endpoints = {} 14 | benchmarks = {} 15 | monitoring_profiles = {} 16 | 17 | 18 | def _handle_monitoring_legacy(): 19 | """ 20 | Inject collectl even if the config says nothing about it to preserve 21 | compatibility with current CBT's configuration files. 22 | """ 23 | global monitoring_profiles 24 | if 'collectl' not in monitoring_profiles: 25 | monitoring_profiles['collectl'] = {} 26 | 27 | 28 | def initialize(ctx): 29 | global common, cluster, client_endpoints, benchmarks, monitoring_profiles 30 | 31 | config = {} 32 | try: 33 | with open(ctx.config_file) as f: 34 | config = yaml.safe_load(f) 35 | except IOError as e: 36 | raise argparse.ArgumentTypeError(str(e)) 37 | 38 | common = config.get('common', {}) 39 | cluster = config.get('cluster', {}) 40 | client_endpoints = config.get('client_endpoints', {}) 41 | benchmarks = config.get('benchmarks', {}) 42 | monitoring_profiles = config.get('monitoring_profiles', dict(collectl={})) 43 | 44 | if not cluster: 45 | shutdown('No cluster section found in config file, bailing.') 46 | 47 | if not benchmarks: 48 | shutdown('No benchmarks section found in config file, bailing.') 49 | 50 | # set the archive_dir from the commandline if present 51 | if ctx.archive: 52 | cluster['archive_dir'] = ctx.archive 53 | if 'archive_dir' not in cluster: 54 | shutdown('No archive dir has been set.') 55 | 56 | _handle_monitoring_legacy() 57 | 58 | # store cbt configuration in the archive directory 59 | cbt_results = os.path.join(cluster['archive_dir'], 'results') 60 | config_file = os.path.join(cbt_results, 'cbt_config.yaml') 61 | if not os.path.exists(cluster['archive_dir']): 62 | os.makedirs(cluster['archive_dir']) 63 | if not os.path.exists(cbt_results): 64 | os.makedirs(cbt_results) 65 | if not os.path.exists(config_file): 66 | config_dict = dict(cluster=cluster, benchmarks=benchmarks, monitoring_profiles=monitoring_profiles) 67 | with open(config_file, 'w') as fd: 68 | yaml.dump(config_dict, fd, default_flow_style=False) 69 | 70 | # set the tmp_dir if not set. 71 | if 'tmp_dir' not in cluster: 72 | cluster['tmp_dir'] = '/tmp/cbt.%s' % os.getpid() 73 | 74 | # set the ceph.conf file from the commandline if present 75 | if ctx.conf: 76 | cluster['conf_file'] = ctx.conf 77 | # If no conf file is set, default to /etc/ceph/ceph.conf 78 | # FIXME: We shouldn't have cluster specific defaults in settings. 79 | # Eventually make a base class with specific cluster implementations. 80 | if 'conf_file' not in cluster: 81 | cluster['conf_file'] = '/etc/ceph/ceph.conf' 82 | try: 83 | f = open(cluster['conf_file']) 84 | f.close() 85 | except IOError as e: 86 | shutdown('Was not able to access conf file: %s' % cluster['conf_file']) 87 | 88 | 89 | def host_info(host): 90 | ret = {} 91 | user = cluster.get('user') 92 | 93 | if '@' in host: 94 | user, host = host.split('@') 95 | ret['user'] = user 96 | if ':' in host: 97 | host, port = host.split(':') 98 | ret['port'] = port 99 | if user: 100 | ret['user'] = user 101 | ret['host'] = host 102 | # Follow-up: add support for socket.getaddrinfo 103 | try: 104 | ret['addr'] = socket.gethostbyname(host) 105 | except socket.gaierror as e: 106 | shutdown(f'Was not able to gethostbyname: {host}') 107 | return ret 108 | 109 | 110 | def getnodes(*nodelists): 111 | nodes = [] 112 | 113 | for nodelist in nodelists: 114 | cur = cluster.get(nodelist, []) 115 | if isinstance(cur, str): 116 | nodes.append(cur) 117 | elif isinstance(cur, dict): 118 | nodes.extend(list(cur.keys())) 119 | elif isinstance(cur, list): 120 | nodes.extend(cur) 121 | else: 122 | raise ValueError("Can't process nodes of type %s - unknown set type: %r", 123 | nodelist, cur) 124 | 125 | str_nodes = ','.join(uniquenodes(nodes)) 126 | #logger.debug("Nodes : %s", str_nodes) 127 | return str_nodes 128 | 129 | 130 | def uniquenodes(nodes): 131 | unique = [node for node in nodes if node] 132 | ret = [] 133 | 134 | for host in unique: 135 | info = host_info(host) 136 | host_str = info['host'] 137 | if 'user' in info: 138 | host_str = "%s@%s" % (info['user'], host_str) 139 | ret.append(host_str) 140 | return set(ret) 141 | 142 | 143 | def shutdown(message): 144 | sys.exit(message) 145 | 146 | 147 | def mock_initialize(config_file="tools/invariant.yaml"): 148 | """ Auxiliary method only to be used from serialise_benchmarks.py""" 149 | global common, cluster, client_endpoints, benchmarks, monitoring_profiles 150 | config = {} 151 | try: 152 | with open(config_file) as f: 153 | config = yaml.safe_load(f) 154 | except IOError as e: 155 | raise argparse.ArgumentTypeError(str(e)) 156 | 157 | common = config.get('common', {}) 158 | cluster = config.get('cluster', {}) 159 | client_endpoints = config.get('client_endpoints', {}) 160 | benchmarks = config.get('benchmarks', {}) 161 | monitoring_profiles = config.get('monitoring_profiles', dict(collectl={})) 162 | # Set some defaults required 163 | cluster['tmp_dir'] = '/tmp/cbt.XYZ' 164 | cluster['osd_ra'] = '0' 165 | -------------------------------------------------------------------------------- /tools/fio_visualize_data/fio-plot-stats-usage.rst: -------------------------------------------------------------------------------- 1 | ==================== 2 | Visualize Fio Output 3 | ==================== 4 | 5 | Motivation 6 | ========== 7 | 8 | Fio generates quite a bit of output that is sometimes hard to decipher 9 | and understand. This problem is exacerbated further if one is running 10 | multiple tests with different ceph options to tune ceph performance. 11 | It would be good to have a tool that decodes the data from the log files 12 | created by Fio and generate meaningful graphs that provide insight into 13 | ceph performance. 14 | 15 | The attempt here is to start with some basic scripts that parse Fio 16 | output and generate plots like average client latencies and completion 17 | latency percentiles. 18 | 19 | Going further the idea is to enhance the scripts to generate more meaningful 20 | graphs, tighter integration with cbt to generate graphs via yaml 21 | specification as part of the test itself. 22 | 23 | Usage 24 | ===== 25 | .. code-block:: console 26 | 27 | 28 | $ ./fio-plot-stats.py -h 29 | usage: fio-plot-stats.py [-h] -f {json,csv} -s SRCDIR -d DESTDIR -o 30 | {read,write} -m {bw,lat,pct} [-i {pdf,png}] 31 | [-n FILENAME] [-r TIMERANGE TIMERANGE] [-p] 32 | 33 | Generate plots from fio output 34 | 35 | optional arguments: 36 | -h, --help show this help message and exit 37 | -f {json,csv}, --filetype {json,csv} 38 | type of file to parse 39 | -s SRCDIR, --source SRCDIR 40 | source directory containing fio output files 41 | -d DESTDIR, --destination DESTDIR 42 | destination directory to save generated plots 43 | -o {read,write}, --optype {read,write} 44 | plot read or write stats 45 | -m {bw,lat,pct}, --metric {bw,lat,pct} 46 | metric to analyze/plot 47 | -i {pdf,png}, --imgformat {pdf,png} 48 | plot image format 49 | -n FILENAME, --filename FILENAME 50 | source file containing CSV data to analyze/plot 51 | -r TIMERANGE TIMERANGE, --timerange TIMERANGE TIMERANGE 52 | time range to plot/calculate stats for CSV data 53 | -p, --subplot create a subplot with provided timerange 54 | 55 | Working Details 56 | =============== 57 | The input file format option ``-f/--filetype`` is mandatory. Depending on 58 | this, additonal options if preferred may be provided to override the default 59 | behavior. For JSON file type, the tool scans for the files in the source 60 | directory and generates graphs. For CSV file type, an additional 61 | parameter called ``-n/--filename`` needs to be specified. 62 | 63 | The option ``-o/--optype`` tells the script to scan read or write statistcs in 64 | the Fio files and generate the graphs. 65 | 66 | An additional artifact (apart from charts) of parsing JSON data is a 67 | CSV file containing the stats from the parsed files. 68 | 69 | NOTE: All fio files in the source directory having 'json' string in filename are 70 | treated as JSON files and are scanned automatically. Therefore, it is important 71 | to have 'json' string in the filename if JSON data is required to be visualized. 72 | 73 | Examples 74 | ======== 75 | **Example 1** 76 | 77 | The following commands scan the source directory for files having 78 | string 'json' in the filenames and parses specfied stats (lat, bw or pct) 79 | from the files to generate comparison graphs in the destination folder: 80 | 81 | .. code-block:: console 82 | 83 | $python3 fio-plot-stats.py -s ~/cbt_logs/json_logs -f json -o write -d ~/cbt_logs/json_logs -m lat 84 | $python3 fio-plot-stats.py -s ~/cbt_logs/json_logs -f json -o write -d ~/cbt_logs/json_logs -m bw 85 | $python3 fio-plot-stats.py -s ~/cbt_logs/json_logs -f json -o write -d ~/cbt_logs/json_logs -m pct 86 | 87 | **Example 2** 88 | 89 | The following command uses the specified CSV file containing write latency 90 | stats generated by fio and generates a chart of latency distribution across 91 | the entire duration of the test: 92 | 93 | .. code-block:: console 94 | 95 | $python3 fio-plot-stats.py -f csv -s ~/cbt_logs -d ~/cbt_logs -o write -n wpq_clat_Run7 -m lat 96 | 97 | **Example 3** 98 | 99 | The following command is similar to Example 2, except that additionally a 100 | subplot is generated in the same chart showing the latency distribution 101 | in the specified time range: 102 | 103 | .. code-block:: console 104 | 105 | $python3 fio-plot-stats.py -f csv -s ~/cbt_logs -d ~/cbt_logs -o write -n wpq_clat_Run7 -m lat -r 0 160 -p 106 | 107 | Note that if the '-p/--subplot' option is not specified in example 3, a 108 | chart with a single graph is generated for the time range specified. 109 | 110 | Plots may be generated for 'bandwidth' metric by specifying 'bw' for the 111 | '-m' parameter in the above examples. 112 | 113 | Additionally, percentile data and charts may be generated by specifying 'pct' 114 | for the '-m' parameter. The raw clat latency data captured by fio must be 115 | provided as an input using the '-n' option. This uses pandas and numpy module 116 | to generate percentile table and charts for the average, 50th, 95th, 99th and 117 | 99.5th percentiles. Given a time range, samples are analyzed for each second in 118 | between and the above percentiles are saved into a new pandas dataframe. A csv 119 | file is generated in addition to the chart for the time range specified. 120 | 121 | NOTE: Logging the histogram data generated by fio and running the analysis on 122 | it would provide more accurate information about percentile distribution. 123 | Please see fio source repository for more information on this. 124 | -------------------------------------------------------------------------------- /tools/config_wizard.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # NOTE: Be sure to run script on the main ceph monitor as the desired 3 | # CBT user if running the script automatically (-a). 4 | 5 | import argparse 6 | import os 7 | import socket 8 | import sys 9 | 10 | from config_class import Config, KvmRbdFio, Radosbench, RbdFio 11 | 12 | BENCHMARKS = ["radosbench", "kvmrbdfio", "rbdfio"] 13 | TMP_DIR = "/dev/null" 14 | 15 | 16 | def parse_arguments(): 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument("-a", "--automate", help="Automatically create a config" 19 | " file with default values for" 20 | " Radosbench, RBDFIO and " 21 | "KVMRBDFIO.", 22 | action="store_true") 23 | parser.add_argument("-o", "--output_file", help="Specify filename for " 24 | "output config file. " 25 | "Defaults to 'cbt_config" 26 | ".xfs.yaml'", type=str, 27 | nargs="?", 28 | default="cbt_config" 29 | ".xfs.yaml") 30 | return parser.parse_args() 31 | 32 | 33 | def get_hosts(auto): 34 | if auto: 35 | clients = [] 36 | monitor = os.popen("hostname -s").read().rstrip() 37 | hosts = os.popen("ceph osd tree | grep host").read().split("\n") 38 | for host in hosts: 39 | if host != "": 40 | clients.append(host.rstrip().split(" ")[-1]) 41 | return (monitor, clients) 42 | 43 | try: 44 | monitor = input("Enter the hostname of the monitor: ") 45 | clients = input("Enter the hostname(s) of the OSD(s) seperated by" 46 | " comma: ").replace(" ", "").split(",") 47 | except KeyboardInterrupt: 48 | print("Aborting script. No data will be saved.") 49 | sys.exit(1) 50 | return (monitor, clients) 51 | 52 | 53 | def get_user(auto): 54 | if auto: 55 | return os.getlogin() 56 | 57 | try: 58 | user = input("Enter the username for CBT: ") 59 | except KeyboardInterrupt: 60 | print("Aborting script. No data will be saved.") 61 | sys.exit(1) 62 | return user 63 | 64 | 65 | def get_tmp_dir(auto): 66 | if auto: 67 | return TMP_DIR 68 | 69 | try: 70 | directory = input("Enter the temporary directory for CBT results: ") 71 | except KeyboardInterrupt: 72 | print("Aborting script. No data will be saved.") 73 | sys.exit(1) 74 | return directory 75 | 76 | 77 | def select_tests(): 78 | while True: 79 | valid = True 80 | print("Which of the following tests would you like to run?\nradosbench"\ 81 | ", kvmrbdfio, rbdfio") 82 | try: 83 | tests = input("Enter the test names seperated by comma: ") 84 | tests = tests.replace(" ", "").split(",") 85 | except KeyboardInterrupt: 86 | print("Aborting script. No data will be saved.") 87 | sys.exit(1) 88 | for test in tests: 89 | if test.lower() not in BENCHMARKS: 90 | print("Unknown test: %s" % (test)) 91 | print("Please specify only valid tests from the list above\n") 92 | valid = False 93 | break 94 | if valid: 95 | return [x.lower() for x in tests] 96 | 97 | 98 | def generate_test_values(test, default, config): 99 | if test == "rbdfio": 100 | rbdfio = RbdFio(default, config) 101 | config.add_benchmark_settings(rbdfio.output) 102 | elif test == "kvmrbdfio": 103 | kvmrbdfio = KvmRbdFio(default, config) 104 | config.add_benchmark_settings(kvmrbdfio.output) 105 | else: 106 | radosbench = Radosbench(default, config) 107 | config.add_benchmark_settings(radosbench.output) 108 | 109 | 110 | def main(): 111 | args = parse_arguments() 112 | hosts = get_hosts(args.automate) 113 | user = get_user(args.automate) 114 | tmp_dir = get_tmp_dir(args.automate) 115 | conf = Config(args.output_file, hosts, user, tmp_dir) 116 | if args.automate: 117 | rbdfio = RbdFio(True, conf) 118 | kvmrbdfio = KvmRbdFio(True, conf) 119 | radosbench = Radosbench(True, conf) 120 | conf.add_benchmark_settings(rbdfio.output) 121 | conf.add_benchmark_settings(kvmrbdfio.output) 122 | conf.add_benchmark_settings(radosbench.output) 123 | else: 124 | tests = select_tests() 125 | for test in tests: 126 | use_default = False 127 | print("\nEntering settings for %s:" % (test)) 128 | while True: 129 | try: 130 | default = input("Would you like to use default" 131 | " settings for %s [y/n]? " % (test)) 132 | except KeyboardInterrupt: 133 | print("Aborting script. No data will be saved.") 134 | sys.exit(1) 135 | if default.lower() == "y": 136 | print("Using default values for %s" % (test)) 137 | use_default = True 138 | break 139 | elif default.lower() == "n": 140 | use_default = False 141 | break 142 | generate_test_values(test, use_default, conf) 143 | conf.save_file() 144 | print("Output saved to: %s" % (conf.out_file)) 145 | 146 | if __name__ == "__main__": 147 | main() 148 | -------------------------------------------------------------------------------- /benchmark/cephtestrados.py: -------------------------------------------------------------------------------- 1 | from .benchmark import Benchmark 2 | import common 3 | import settings 4 | import monitoring 5 | import os 6 | import time 7 | import logging 8 | 9 | logger = logging.getLogger('cbt') 10 | 11 | 12 | class CephTestRados(Benchmark): 13 | 14 | def __init__(self, archive_dir, cluster, config): 15 | super(CephTestRados, self).__init__(archive_dir, cluster, config) 16 | 17 | self.tmp_conf = self.cluster.tmp_conf 18 | 19 | self.bools = {} 20 | if config.get('ec_pool', False): 21 | self.bools['ec_pool'] = True 22 | if config.get('write_fadvise_dontneed', False): 23 | self.bools['write_fadvise_dontneed'] = True 24 | if config.get('pool_snaps', False): 25 | self.bools['pool_snaps'] = True 26 | if config.get('write_append_excl', True): 27 | self.bools['write_append_excl'] = True 28 | 29 | self.variables = {} 30 | self.variables['object_size'] = int(config.get('object_size', 4000000)) 31 | self.variables['max_ops'] = str(config.get('ops', 10000)) 32 | self.variables['objects'] = str(config.get('objects', 500)) 33 | self.variables['max_in_flight'] = str(config.get('max_in_flight', 16)) 34 | self.variables['size'] = int(config.get('object_size', 4000000)) 35 | self.variables['min_stride_size'] = str(config.get('min_stride_size', self.variables['object_size'] / 10)) 36 | self.variables['max_stride_size'] = str(config.get('max_stride_size', self.variables['object_size'] / 5)) 37 | self.variables['max_seconds'] = str(config.get('max_seconds', 0)) 38 | 39 | self.weights = {'read': 100, 'write': 100, 'delete': 10} 40 | for weight in ['snap_create', 'snap_remove', 'rollback', 'setattr', 'rmattr', 'watch', 'copy_from', 'hit_set_list', 'is_dirty', 'cache_flush', 'cache_try_flush', 'cache_evict' 'append', 'write', 'read', 'delete']: 41 | self.addweight(weight) 42 | if 'write_append_excl' in self.bools and 'append' in self.weights: 43 | self.weights['append'] = self.weights['write'] / 2 44 | self.weights['append_excl'] = self.weights['write'] 45 | 46 | if 'write_append_excl' in self.bools and 'write' in self.weights: 47 | self.weights['write'] = self.weights['write'] / 2 48 | self.weights['write_excl'] = self.weights['write'] 49 | 50 | self.run_dir = '%s/osd_ra-%08d/object_size-%08d' % (self.run_dir, int(self.osd_ra), int(self.variables['object_size'])) 51 | self.out_dir = '%s/osd_ra-%08d/object_size-%08d' % (self.archive_dir, int(self.osd_ra), int(self.variables['object_size'])) 52 | self.pool_profile = config.get('pool_profile', 'default') 53 | self.cmd_path = config.get('cmd_path', '/usr/bin/ceph_test_rados') 54 | 55 | def addweight(self, weight): 56 | value = self.config.get("%s_weight" % weight, None) 57 | if value is not None: 58 | self.weights[weight] = int(value) 59 | 60 | def exists(self): 61 | if os.path.exists(self.out_dir): 62 | print('Skipping existing test in %s.' % self.out_dir) 63 | return True 64 | return False 65 | 66 | # Initialize may only be called once depending on rebuild_every_test setting 67 | def initialize(self): 68 | super(CephTestRados, self).initialize() 69 | 70 | def run(self): 71 | super(CephTestRados, self).run() 72 | 73 | # Remake the pool 74 | self.mkpool() 75 | self.dropcaches() 76 | self.cluster.dump_config(self.run_dir) 77 | monitoring.start(self.run_dir) 78 | time.sleep(5) 79 | # Run the backfill testing thread if requested 80 | if 'recovery_test' in self.cluster.config: 81 | recovery_callback = self.recovery_callback 82 | self.cluster.create_recovery_test(self.run_dir, recovery_callback) 83 | 84 | logger.info('Running ceph_test_rados.') 85 | ps = [] 86 | for i in range(1): 87 | p = common.pdsh(settings.getnodes('clients'), self.mkcmd()) 88 | ps.append(p) 89 | for p in ps: 90 | p.wait() 91 | # If we were doing recovery, wait until it's done. 92 | if 'recovery_test' in self.cluster.config: 93 | self.cluster.wait_recovery_done() 94 | 95 | monitoring.stop(self.run_dir) 96 | 97 | # Finally, get the historic ops 98 | self.cluster.dump_historic_ops(self.run_dir) 99 | common.sync_files('%s/*' % self.run_dir, self.out_dir) 100 | 101 | def mkcmd(self): 102 | cmd = [self.cmd_path] 103 | out_file = '%s/output' % self.run_dir 104 | 105 | for flag in ['ec_pool', 'write_fadvise_dontneed', 'pool_snaps']: 106 | if flag in self.bools: 107 | cmd.append('--%s' % flag.replace('_', '-')) 108 | for variable in ['max_ops', 'objects', 'max_in_flight', 'size', 'min_stride_size', 'max_stride_size', 'max_seconds']: 109 | value = self.variables[variable] 110 | if value: 111 | cmd.extend(['--%s' % variable.replace('_', '-'), str(value)]) 112 | for op, weight in self.weights.items(): 113 | cmd.extend(['--op', op, str(weight)]) 114 | cmd.extend(['--pool', 'ceph_test_rados']) 115 | cmd.extend(['|', 'awk \'{ print strftime("%Y-%m-%d %H:%M:%S"), $0; fflush(); }\'' '>', out_file]) 116 | logger.debug("%s", cmd) 117 | return ' '.join(cmd) 118 | 119 | def mkpool(self): 120 | monitoring.start("%s/pool_monitoring" % self.run_dir) 121 | self.cluster.rmpool('ceph_test_rados', self.pool_profile) 122 | self.cluster.mkpool('ceph_test_rados', self.pool_profile, 'ceph_test_rados') 123 | monitoring.stop() 124 | 125 | def recovery_callback(self): 126 | common.pdsh(settings.getnodes('clients'), 'sudo pkill -f ceph_test_rados').communicate() 127 | 128 | def __str__(self): 129 | return "%s\n%s\n%s" % (self.run_dir, self.out_dir, super(CephTestRados, self).__str__()) 130 | -------------------------------------------------------------------------------- /statistic.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | try: 4 | from scipy import stats 5 | from numpy import array, linalg 6 | from scipy.optimize import leastsq 7 | from numpy.polynomial.chebyshev import chebfit, chebval 8 | no_numpy = False 9 | except ImportError: 10 | no_numpy = True 11 | 12 | __doc__ = """ 13 | This module contains functions for processing test results. 14 | Main function is data_property. 15 | """ 16 | 17 | 18 | def average(data): 19 | return sum(data) / len(data) 20 | 21 | 22 | def mediana(vals): 23 | return sum(vals) / len(vals) 24 | 25 | 26 | def deviation(vals): 27 | med = mediana(vals) 28 | squares_sum = sum(abs(med - i) ** 2.0 for i in vals) 29 | return ((squares_sum / len(vals)) ** 0.5) 30 | 31 | 32 | def round_3_digit(val): 33 | return round_val_and_deviation((val, val / 10.0))[0] 34 | 35 | 36 | def round_val_and_deviation(val, dev): 37 | if dev < 1E-7: 38 | return val, dev 39 | 40 | dev_div = 10.0 ** (math.floor(math.log10(dev)) - 1) 41 | 42 | dev1 = int(dev / dev_div) * dev_div 43 | val1 = int(val / dev_div) * dev_div 44 | 45 | return [type(val)(val1), type(dev)(dev1)] 46 | 47 | 48 | def approximate_curve(x, y, xnew, curved_coef): 49 | """returns ynew - y values of some curve approximation""" 50 | if no_numpy: 51 | raise ValueError("No numpy found") 52 | 53 | return chebval(xnew, chebfit(x, y, curved_coef)) 54 | 55 | 56 | def approximate_line(x, y, xnew, relative_dist=False): 57 | """ x, y - test data, xnew - dots, where we want find approximation 58 | if not relative_dist distance = y - newy 59 | returns ynew - y values of linear approximation""" 60 | 61 | if no_numpy: 62 | raise ValueError("No numpy found") 63 | 64 | # convert to numpy.array (don't work without it) 65 | ox = array(x) 66 | oy = array(y) 67 | 68 | # set approximation function 69 | def func_line(tpl, x): 70 | return tpl[0] * x + tpl[1] 71 | 72 | def error_func_rel(tpl, x, y): 73 | return 1.0 - y / func_line(tpl, x) 74 | 75 | def error_func_abs(tpl, x, y): 76 | return y - func_line(tpl, x) 77 | 78 | # choose distance mode 79 | error_func = error_func_rel if relative_dist else error_func_abs 80 | 81 | tpl_initial = tuple(linalg.solve([[ox[0], 1.0], [ox[1], 1.0]], 82 | oy[:2])) 83 | 84 | # find line 85 | tpl_final, success = leastsq(error_func, 86 | tpl_initial[:], 87 | args=(ox, oy)) 88 | 89 | # if error 90 | if success not in list(range(1, 5)): 91 | raise ValueError("No line for this dots") 92 | 93 | # return new dots 94 | return func_line(tpl_final, array(xnew)) 95 | 96 | 97 | def difference(y, ynew): 98 | """returns average and maximum relative and 99 | absolute differences between y and ynew 100 | result may contain None values for y = 0 101 | return value - tuple: 102 | [(abs dif, rel dif) * len(y)], 103 | (abs average, abs max), 104 | (rel average, rel max)""" 105 | 106 | abs_dlist = [] 107 | rel_dlist = [] 108 | 109 | for y1, y2 in zip(y, ynew): 110 | # absolute 111 | abs_dlist.append(y1 - y2) 112 | 113 | if y1 > 1E-6: 114 | rel_dlist.append(abs(abs_dlist[-1] / y1)) 115 | else: 116 | raise ZeroDivisionError("{0!r} is too small".format(y1)) 117 | 118 | da_avg = sum(abs_dlist) / len(abs_dlist) 119 | dr_avg = sum(rel_dlist) / len(rel_dlist) 120 | 121 | return (list(zip(abs_dlist, rel_dlist)), 122 | (da_avg, max(abs_dlist)), (dr_avg, max(rel_dlist)) 123 | ) 124 | 125 | 126 | class StatProperties(object): 127 | """ 128 | Statustical properties of array of data 129 | average 130 | mediana 131 | perc_95 - 95 percentile 132 | perc_05 - 5 percentile 133 | deviation 134 | confidence - 95% confidence interval for average 135 | min 136 | max 137 | raw - original data 138 | """ 139 | def __init__(self): 140 | # average value 141 | self.average = None 142 | 143 | # mediana value 144 | self.mediana = None 145 | 146 | # 95 percentile 147 | self.perc_95 = None 148 | 149 | # 5 percentile 150 | self.perc_05 = None 151 | 152 | # deviation 153 | self.deviation = None 154 | 155 | # 95% confidence interval for average 156 | self.confidence = None 157 | 158 | # minimal and maximum value 159 | self.min = None 160 | self.max = None 161 | 162 | # array of raw values 163 | self.raw = None 164 | 165 | def rounded_average_conf(self): 166 | return round_val_and_deviation(self.average, self.confidence) 167 | 168 | def rounded_average_dev(self): 169 | return round_val_and_deviation(self.average, self.deviation) 170 | 171 | def __str__(self): 172 | return "{0}({1} ~ {2})".format(self.__class__.__name__, 173 | round_3_digit(self.average), 174 | round_3_digit(self.deviation)) 175 | 176 | def __repr__(self): 177 | return str(self) 178 | 179 | 180 | def data_property(data, confidence=0.95): 181 | """ 182 | calculate StatProperties for data 183 | """ 184 | res = StatProperties() 185 | if len(data) == 0: 186 | return res 187 | 188 | data = sorted(data) 189 | res.average, res.deviation = round_val_and_deviation(data) 190 | res.max = data[-1] 191 | res.min = data[0] 192 | 193 | ln = len(data) 194 | if ln % 2 == 0: 195 | res.mediana = (data[ln / 2] + data[ln / 2 - 1]) / 2 196 | else: 197 | res.mediana = data[ln / 2] 198 | 199 | res.perc_95 = data[int((ln - 1) * 0.95)] 200 | res.perc_05 = data[int((ln - 1) * 0.05)] 201 | 202 | if not no_numpy and ln >= 3: 203 | res.confidence = stats.sem(data) * \ 204 | stats.t.ppf((1 + confidence) / 2, ln - 1) 205 | else: 206 | res.confidence = res.deviation 207 | 208 | res.raw = data[:] 209 | return res 210 | -------------------------------------------------------------------------------- /tools/is-regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # 3 | # is_regression.py - statistical test for performance throughput regression 4 | # based on python scipy.stats.ttest_ind() function 5 | # 6 | # we input two sets of samples: 7 | # the baseline sample set -- used as an indication of previously achieved level of performance 8 | # the current sample set -- used as an indication of the system currently being tested for performance regression 9 | # 10 | # command line inputs: 11 | # sample_type -- 'throughput' or 'response-time' 12 | # confidence_threshold -- min probability that two sample sets have a different mean 13 | # (e.g. 95 means that results differ with 95% probability) 14 | # max_pct_dev -- maximum percent deviation of either sample set, 100.0 x std.dev/mean 15 | # base_sample -- file containing baseline performance throughput samples, 1 per line 16 | # current_sample -- file containing current performance throughput samples, 1 per line 17 | # 18 | # return status codes: 19 | # 0 -- no regression, PASS 20 | # 10 -- regression, FAIL 21 | # 11 -- either sample set's variance too large 22 | # reject if the percent deviation for either baseline or current samples is > max_pct_dev 23 | # 24 | # we declare a performance regression if base_set mean is worse than current_set mean and a T-test determines 25 | # that the probability that the two sample sets have a different mean is greater than confidence_threshold 26 | # 27 | # the base sample set mean is "worse" than the current sample set mean if and only if: 28 | # the sample_type is 'throughput' and the base mean > current mean 29 | # the sample type is 'response-time' and the base mean < current mean 30 | # 31 | # References: The Art of Computer Systems Perf. Analysis, Raj Jain 32 | # see documentation for python scipy.stats.ttest_ind() function 33 | # 34 | 35 | import os 36 | import sys 37 | from sys import argv, exit 38 | import math 39 | import numpy 40 | import scipy 41 | from scipy.stats import ttest_ind 42 | from numpy import array 43 | 44 | # process status codes returned to shell 45 | NOTOK=-1 46 | PASS = 0 47 | FAIL = 10 48 | VARIANCE_TOO_HIGH=11 49 | NOT_ENOUGH_SAMPLES=12 50 | 51 | def usage(msg): 52 | print('\nERROR: ' + msg) 53 | print('usage: is_regression.py sample_type confidence_threshold max_pct_dev base_samples_file test_samples_file') 54 | print('sample_type is either "throughput" or "response-time"') 55 | print('confidence_threshold is probability that sample means differ expressed as a percentage') 56 | print('max_pct_dev is maximum percent deviation allowed for either sample set') 57 | print('samples files are text files with one floating-point sample value per line') 58 | sys.exit(NOTOK) 59 | 60 | def read_samples_from_file( sample_filepath ): 61 | with open(sample_filepath, "r") as sample_file: 62 | samples = [ float(r.strip()) for r in sample_file.readlines() ] 63 | print('%d samples read from file %s'%(len(samples), sample_filepath)) 64 | return array(samples) 65 | 66 | def print_sample_stats(samples_name, samples_array): 67 | s = samples_array 68 | print('sample stats for %s: min = %f, max = %f, mean = %f, sd = %f, pct.dev. = %5.2f'%\ 69 | (samples_name, s.min(), s.max(), s.mean(), s.std(ddof=1), 100.0*s.std(ddof=1)/s.mean())) 70 | 71 | if len(argv) < 6: 72 | usage('not enough command line arguments') 73 | 74 | sample_type = argv[1] 75 | confidence_threshold = float(argv[2]) 76 | max_pct_dev = float(argv[3]) 77 | 78 | # read in and acknowledge command line arguments 79 | 80 | print('sample type = %s , confidence_threshold = %6.2f %%, max. pct. deviation = %6.2f %%'%\ 81 | (sample_type, confidence_threshold, max_pct_dev)) 82 | 83 | baseline_sample_array = read_samples_from_file(argv[4]) 84 | print_sample_stats('baseline', baseline_sample_array) 85 | 86 | current_sample_array = read_samples_from_file(argv[5]) 87 | print_sample_stats('current', current_sample_array) 88 | 89 | # reject invalid inputs 90 | 91 | if len(current_sample_array) < 3: 92 | print('ERROR: not enough current samples') 93 | exit(NOT_ENOUGH_SAMPLES) 94 | 95 | if len(baseline_sample_array) < 3: 96 | print('ERROR: not enough baseline samples') 97 | exit(NOT_ENOUGH_SAMPLES) 98 | 99 | # flunk the test if standard deviation is too high for either sample test 100 | 101 | baseline_pct_dev = 100.0 * baseline_sample_array.std(ddof=1) / baseline_sample_array.mean() 102 | current_pct_dev = 100.0 * current_sample_array.std(ddof=1) / current_sample_array.mean() 103 | 104 | if baseline_pct_dev > max_pct_dev: 105 | print('ERROR: pct. deviation of %5.2f is too high for baseline samples'%baseline_pct_dev) 106 | exit(VARIANCE_TOO_HIGH) 107 | if current_pct_dev > max_pct_dev: 108 | print('ERROR: pct. deviation of %5.2f is too high for current samples'%current_pct_dev) 109 | exit(VARIANCE_TOO_HIGH) 110 | 111 | # FAIL the test if sample sets are accurate enough and 112 | # current sample set is statistically worse than baseline sample set 113 | 114 | (t, same_mean_probability) = ttest_ind(baseline_sample_array, current_sample_array) 115 | print('t-test t-statistic = %f probability = %f'%(t,same_mean_probability)) 116 | print('t-test says that mean of two sample sets differs with probability %6.2f%%'%\ 117 | ((1.0-same_mean_probability)*100.0)) 118 | 119 | pb_threshold = (100.0 - confidence_threshold)/100.0 120 | print('same_mean_prob %f pb_threshold %f'%(same_mean_probability, pb_threshold)) 121 | if same_mean_probability < pb_threshold: 122 | # the two samples do not have the same mean 123 | # fail if current sample is worse than baseline sample as defined above 124 | if (sample_type == 'throughput'): 125 | if (baseline_sample_array.mean() > current_sample_array.mean()): 126 | print('declaring a performance regression test FAILURE because of lower throughput') 127 | exit(FAIL) 128 | elif (sample_type == 'response-time'): 129 | if (baseline_sample_array.mean() < current_sample_array.mean()): 130 | print('declaring a performance regression test FAILURE because of higher response time') 131 | exit(FAIL) 132 | else: usage('sample_type must either be "throughput" or "response-time"') 133 | print('current sample set is statistically better than baseline sample set') 134 | else: 135 | print('sample sets are statistically indistinguishable for specified confidence level') 136 | exit(PASS) # no regression found 137 | -------------------------------------------------------------------------------- /tools/serialise_benchmark.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # 3 | # serialise_benchmark.py - generate a bechserial.json from all the benchmark classes, 4 | # and automated creation of unit tests for them. 5 | # 6 | import argparse 7 | import os, sys 8 | import pprint 9 | import json 10 | from json import JSONEncoder 11 | import yaml 12 | import hashlib 13 | import benchmarkfactory 14 | import settings 15 | from cluster.ceph import Ceph 16 | from log_support import setup_loggers 17 | 18 | log_fname = '/tmp/cbt-utest.log' 19 | 20 | class BenchGenerator(object): 21 | """ 22 | Class used for the serialisation of the benchmark classes 23 | and automated generation or unit tests 24 | """ 25 | all_benchmarks = [ 26 | 'nullbench', 27 | 'fio', 28 | 'hsbench', 29 | 'radosbench', 30 | 'kvmrbdfio', 31 | 'rawfio', 32 | 'librbdfio', 33 | 'cephtestrados', 34 | 'rbdfio', 35 | 'getput' 36 | ] 37 | archive_dir = "/tmp" 38 | iteration = {'acceptable': [1,2,3], 'iteration': 0} 39 | cluster = {} 40 | bl_name = "tools/baseline.json" 41 | bl_md5 = None 42 | cl_name = "tools/invariant.yaml" 43 | ut_name = "tests/test_bm.py" 44 | djson = {} 45 | current = {} 46 | 47 | def __init__(self): 48 | """ Init using mock constructors for a fixed cluster """ 49 | settings.mock_initialize(config_file=BenchGenerator.cl_name) 50 | BenchGenerator.cluster = Ceph.mockinit(settings.cluster) 51 | 52 | def get_md5_bl(self): 53 | """ Calculate the MD5sum from baseline contents """ 54 | with open(self.bl_name, 'rb') as f: 55 | data = f.read() 56 | f.close() 57 | return hashlib.md5(data).hexdigest() 58 | #bl_md5 = hashlib.md5(data.encode("utf-8")).hexdigest() 59 | 60 | def gen_json(self): 61 | """ Serialise the object into a json file""" 62 | result = {} 63 | for bm in self.all_benchmarks: 64 | b = benchmarkfactory.get_object(self.archive_dir, 65 | self.cluster, bm, self.iteration) 66 | result[bm] = b.__dict__ 67 | with open(self.bl_name, 'w', encoding='utf-8') as f: 68 | json.dump(result, f, sort_keys=True, indent=4, cls=BenchJSONEncoder) 69 | f.close() 70 | # data from json.dump() does not support buffer API 71 | self.bl_md5 = self.get_md5_bl() 72 | 73 | def verify_md5(self): 74 | """ Verify the MD5SUM of the baseline.json is correct """ 75 | md5_returned = self.get_md5_bl() 76 | if self.bl_md5 == md5_returned: 77 | print("MD5 verified.") 78 | return True 79 | else: 80 | print(f"MD5 verification failed! {self.bl_md5} vs. {md5_returned}") 81 | return False 82 | 83 | def verify_json(self): 84 | """ Verify the baseline json against the current benchmark classes """ 85 | with open(self.bl_name, 'r') as f: 86 | self.djson = json.load(f) 87 | f.close() 88 | for bm in self.all_benchmarks: 89 | b = benchmarkfactory.get_object(self.archive_dir, 90 | self.cluster, bm, self.iteration) 91 | self.current[bm] = b.__dict__ 92 | # This loop verifies that the active classes have the same attributes 93 | # as the baseline: no complains would happen if new attributes have been 94 | # added, but a difference will show for each old attribute removed 95 | for bm in self.djson.keys(): 96 | if isinstance(self.djson[bm], dict): 97 | for k in self.djson[bm].keys(): 98 | # Skip Cluster since its a Ceph object, and acceptable was removed 99 | # We need to skip _iodepth_per_volume here as the json file format 100 | # cannot cope with a dictionary that does not use a str as the key. 101 | # _iodepth_per_volume is intentionally a dict[int,int] 102 | if k == "cluster" or k == "acceptable" or k == "_iodepth_per_volume": 103 | continue 104 | if not self.djson[bm][k] == self.current[bm][k]: 105 | if isinstance(self.djson[bm][k], dict): 106 | set1 = dict(self.djson[bm][k].items()) 107 | set2 = dict(self.current[bm][k].items()) 108 | print(set2 ^ set1) 109 | else: 110 | print(f"{bm}[{k}]: diff type {type(self.djson[bm][k])}") 111 | print(f"{bm}[{k}]: {self.djson[bm][k]} vs {self.current[bm][k]}") 112 | 113 | def gen_utests(self): 114 | """ Generate the unit tests from baseline json against the self.current benchmark classes """ 115 | djson = self.djson 116 | for bm in djson.keys(): 117 | if isinstance(djson[bm], dict): 118 | subst = f"sed -e 's/BenchmarkX/Benchmark{bm}/g' -e 's/MD5SUMNone/{self.bl_md5}/g' " 119 | input = "tools/test_bm_template.py" 120 | out = f"tests/test_bm_{bm}.py" 121 | cmd = f"{subst} {input} > {out}" 122 | #print(cmd) 123 | os.system(cmd) 124 | with open(out, "a") as f: 125 | for k in djson[bm].keys(): 126 | # Skip Cluster since its a Ceph object, and acceptable is removed 127 | if k == "cluster" or k == "acceptable": 128 | continue 129 | ut = f""" 130 | def test_valid_{k}(self): 131 | \"\"\" Basic sanity attribute identity {k} check\"\"\" 132 | b = benchmarkfactory.get_object(self.archive_dir, 133 | self.cluster, '{bm}', self.iteration) 134 | self.assertEqual(self.bl_json['{bm}']['{k}'], b.__dict__['{k}']) 135 | """ 136 | f.write(ut) 137 | tail = f""" 138 | if __name__ == '__main__': 139 | unittest.main() 140 | """ 141 | f.write(tail) 142 | f.close() 143 | 144 | 145 | class BenchJSONEncoder(JSONEncoder): 146 | def default(self, obj): 147 | return obj.__dict__ 148 | 149 | def main(argv): 150 | setup_loggers(log_fname='/tmp/cbt-utest.log') 151 | bg = BenchGenerator() 152 | bg.gen_json() 153 | bg.verify_json() 154 | bg.verify_md5() 155 | bg.gen_utests() 156 | return 0 157 | 158 | if __name__ == '__main__': 159 | exit(main(sys.argv)) 160 | -------------------------------------------------------------------------------- /client_endpoints/ceph_client_endpoints.py: -------------------------------------------------------------------------------- 1 | import common 2 | import settings 3 | import logging 4 | import time 5 | 6 | from .client_endpoints import ClientEndpoints 7 | 8 | logger = logging.getLogger("cbt") 9 | 10 | 11 | class CephClientEndpoints(ClientEndpoints): 12 | def __init__(self, cluster, config): 13 | super(CephClientEndpoints, self).__init__(cluster, config) 14 | self.ceph_cmd = cluster.ceph_cmd 15 | self.ceph_fuse_cmd = cluster.ceph_fuse_cmd 16 | self.rbd_cmd = cluster.rbd_cmd 17 | self.rbd_nbd_cmd = cluster.rbd_nbd_cmd 18 | self.rbd_fuse_cmd = cluster.rbd_fuse_cmd 19 | self.tmp_conf = cluster.tmp_conf 20 | self.mount_cmd = cluster.mount_cmd 21 | self.client_keyring = cluster.client_keyring 22 | self.client_secret = cluster.client_secret 23 | self.pool = None 24 | self.pool_profile = config.get('pool_profile', 'default') 25 | self.data_pool = None 26 | self.data_pool_profile = config.get('data_pool_profile', None) 27 | self.recov_pool = None 28 | self.recov_pool_profile = config.get('recov_pool_profile', 'default') 29 | self.order = config.get('order', 22) 30 | self.disabled_features = config.get('disabled_features', None) 31 | 32 | # get the list of mons 33 | self.mon_addrs = [] 34 | mon_hosts = self.cluster.get_mon_hosts() 35 | for mon_host, mons in mon_hosts.items(): 36 | for mon, addr in mons.items(): 37 | self.mon_addrs.append(addr) 38 | 39 | def get_rbd_name(self, node, ep_num): 40 | node_part = node.rpartition("@")[2] 41 | return '%s-%d' % (node_part, ep_num) 42 | 43 | def get_local_rbd_name(self, ep_num): 44 | return '`%s`-%d' % (common.get_fqdn_cmd(), ep_num) 45 | 46 | def get_dir_name(self, ep_num): 47 | return '%s/%s/%s' % (self.mnt_dir, self.name, ep_num) 48 | 49 | def create_fs(self): 50 | self.pool = self.name 51 | self.data_pool = self.name 52 | self.cluster.rmpool(self.pool, self.pool_profile) 53 | self.cluster.mkpool(self.pool, self.pool_profile, 'cephfs') 54 | if self.data_pool_profile: 55 | self.data_pool = '%s-data' % self.name 56 | self.cluster.rmpool(self.data_pool, self.data_pool_profile) 57 | self.cluster.mkpool(self.data_pool, self.data_pool_profile, 'cephfs') 58 | else: 59 | self.data_pool = self.pool 60 | fs_new_cmd = 'sudo %s -c %s fs new %s %s %s' % (self.ceph_cmd, 61 | self.tmp_conf, 62 | self.name, 63 | self.pool, 64 | self.data_pool) 65 | common.pdsh(settings.getnodes('head'), fs_new_cmd, continue_if_error=False).communicate() 66 | 67 | def mount_fs(self): 68 | for ep_num in range(0, self.endpoints_per_client): 69 | dir_name = self.get_dir_name(ep_num) 70 | for node in common.get_fqdn_list('clients'): 71 | common.pdsh(node, 'sudo mkdir -p -m0755 -- %s' % dir_name, continue_if_error=False).communicate() 72 | # FIXME: Apparently something is racey because we can get: 73 | # "mount error 2 = No such file or directory" without the pause. 74 | time.sleep(1) 75 | self.mount_fs_helper(node, dir_name) 76 | self.endpoints.append(dir_name) 77 | self.endpoint_type = "directory" 78 | return self.get_endpoints() 79 | 80 | def mount_fs_helper(self, node, dir_name): 81 | pass 82 | 83 | def create_rbd(self): 84 | self.pool = self.name 85 | dp_option = '' 86 | 87 | self.cluster.rmpool(self.pool, self.pool_profile) 88 | self.cluster.mkpool(self.pool, self.pool_profile, 'rbd') 89 | if self.data_pool_profile: 90 | self.data_pool = '%s-data' % self.name 91 | dp_option = '--data-pool %s' % self.data_pool 92 | self.cluster.rmpool(self.data_pool, self.data_pool_profile) 93 | self.cluster.mkpool(self.data_pool, self.data_pool_profile, 'rbd') 94 | 95 | for node in common.get_fqdn_list('clients'): 96 | for ep_num in range(0, self.endpoints_per_client): 97 | rbd_name = self.get_rbd_name(node, ep_num) 98 | 99 | # Make the RBD Image 100 | cmd = '%s -c %s create %s --pool %s --size %s %s --order %s' % (self.rbd_cmd, self.tmp_conf, rbd_name, self.pool, self.endpoint_size, dp_option, self.order) 101 | common.pdsh(settings.getnodes('head'), cmd, continue_if_error=False).communicate() 102 | 103 | # Disable Features 104 | if self.disabled_features: 105 | cmd = 'sudo %s feature disable %s/%s %s' % (self.rbd_cmd, self.pool, rbd_name, self.disabled_features) 106 | common.pdsh(settings.getnodes('head'), cmd, continue_if_error=False).communicate() 107 | 108 | def create_rbd_recovery(self): 109 | self.pool = '%s-recov' % self.name 110 | self.cluster.rmpool(self.pool, self.recov_pool_profile) 111 | self.cluster.mkpool(self.pool, self.recov_pool_profile, 'rbd') 112 | for node in common.get_fqdn_list('clients'): 113 | for ep_num in range(0, self.endpoints_per_client): 114 | rbd_name = '%s-%s' % (self.pool, self.get_rbd_name(node, ep_num)) 115 | self.cluster.mkimage(rbd_name, self.endpoint_size, self.pool, self.data_pool, self.order) 116 | 117 | def mount_rbd(self): 118 | for ep_num in range(0, self.endpoints_per_client): 119 | dir_name = self.get_dir_name(ep_num) 120 | for node in common.get_fqdn_list('clients'): 121 | rbd_name = self.get_rbd_name(node, ep_num) 122 | rbd_device = self.map_rbd(node, rbd_name) 123 | 124 | logger.info(rbd_device) 125 | 126 | # mkfs 127 | common.pdsh(node, 'sudo mkfs.xfs %s' % rbd_device, continue_if_error=False).communicate() 128 | 129 | # mkdir 130 | common.pdsh(node, 'sudo mkdir -p -m0755 -- %s' % dir_name, continue_if_error=False).communicate() 131 | 132 | # mount 133 | common.pdsh(node, 'sudo mount -t xfs %s %s' % (rbd_device, dir_name), 134 | continue_if_error=False).communicate() 135 | self.endpoints.append(dir_name) 136 | self.endpoint_type = "directory" 137 | return self.get_endpoints() 138 | 139 | def map_rbd(self, node, rbd_name): 140 | pass 141 | --------------------------------------------------------------------------------