├── benchmark
├── __init__.py
├── nullbench.py
├── lis.py
└── cephtestrados.py
├── cluster
├── __init__.py
└── cluster.py
├── client_endpoints
├── __init__.py
├── cephfsfuse_client_endpoints.py
├── rbdnbd_client_endpoints.py
├── cephfskernel_client_endpoints.py
├── librbd_client_endpoints.py
├── rbdkernel_client_endpoints.py
├── rgws3_client_endpoints.py
├── rbdfuse_client_endpoints.py
├── client_endpoints.py
├── rbdtcmu_client_endpoints.py
└── ceph_client_endpoints.py
├── .gitignore
├── post_processing
├── ___init___.py
├── plotter
│ ├── __init__.py
│ ├── README.md
│ ├── simple_plotter.py
│ ├── directory_comparison_plotter.py
│ └── file_comparison_plotter.py
├── reports
│ ├── __init__.py
│ └── README.md
├── types.py
├── README.md
└── formatter
│ └── README.md
├── requirements.txt
├── run_tests.sh
├── docs
├── cluster.png
├── toplevel.png
├── workloads.png
├── benchmarks.png
├── cbt_utests_gen.png
├── cbt_utests_run.png
├── Workloads.md
├── TestPlanSchema.md
└── AutomaticUnitTestGeneration.md
├── example
├── runme
├── wip-mark-testing
│ ├── README
│ ├── runtests.xfs.yaml
│ └── ceph.conf
├── example-3x-radosbench.yaml
├── example-ec-radosbench.yaml
├── example-kvmrbdfio.yaml
├── example-raw.yaml
├── wip-cosbench
│ ├── cosbench_ex.yaml
│ └── cosbench_ex_ceph.conf
├── example-3x-radosbench-crimson.yaml
├── example-hsbench.yaml
├── bluestore
│ ├── runtests.bluestore_example.yaml
│ └── mkpart_hdd_nvme_bs.sh
├── example-client_endpoints.yaml
└── rbd_fio_test.yml
├── tools
├── crimson
│ ├── example_picture.png
│ ├── crimson_auto_bench_example.png
│ ├── fio_config.yaml
│ ├── bench_config.yaml
│ ├── seastore_radosbench_run.sh
│ ├── seastore_fio_run.sh
│ └── seastore_metrics_run.sh
├── fio_visualize_data
│ ├── fioplotcommon.py
│ └── fio-plot-stats-usage.rst
├── compare_sysctl.py
├── test_bm_template.py
├── mkpartmagna.sh
├── fio_objectstore_tools
│ ├── hdd-runs.json
│ ├── nvme-runs.json
│ ├── bluestore_throttle_tuning.rst
│ ├── analyze.py
│ └── summarize.py
├── fio-parse-jsons
│ └── README.md
├── invariant.yaml
├── fio_common_output_wrapper.py
├── fio-parse-json.py
├── makecephconf.py
├── generate_performance_report.py
├── generate_comparison_performance_report.py
├── config_wizard.py
├── is-regression.py
└── serialise_benchmark.py
├── include
├── html
│ └── table.html
├── css
│ └── table.css
├── performance_report.tex
└── js
│ └── table.js
├── COPYING
├── tox.ini
├── setup.sh
├── parsing
├── htmlgenerator.py
├── database.py
└── test.py
├── tests
├── test_benchmarkfactory.py
├── test_common.py
└── test_bm_nullbench.py
├── client_endpoints_factory.py
├── benchmarkfactory.py
├── log_support.py
├── cbt.py
├── compare.py
├── plot_results.py
├── settings.py
└── statistic.py
/benchmark/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/cluster/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/client_endpoints/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.pyo
3 |
--------------------------------------------------------------------------------
/post_processing/___init___.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/post_processing/plotter/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/post_processing/reports/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pyyaml
2 | lxml
3 | matplotlib
4 |
--------------------------------------------------------------------------------
/run_tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | set -o pipefail
4 |
5 | tox
6 |
--------------------------------------------------------------------------------
/docs/cluster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/cbt/master/docs/cluster.png
--------------------------------------------------------------------------------
/docs/toplevel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/cbt/master/docs/toplevel.png
--------------------------------------------------------------------------------
/docs/workloads.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/cbt/master/docs/workloads.png
--------------------------------------------------------------------------------
/docs/benchmarks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/cbt/master/docs/benchmarks.png
--------------------------------------------------------------------------------
/example/runme:
--------------------------------------------------------------------------------
1 | ../cbt.py --archive=/home/ubuntu/data/foo ./runtests.xfs.yaml
2 |
3 |
--------------------------------------------------------------------------------
/docs/cbt_utests_gen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/cbt/master/docs/cbt_utests_gen.png
--------------------------------------------------------------------------------
/docs/cbt_utests_run.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/cbt/master/docs/cbt_utests_run.png
--------------------------------------------------------------------------------
/tools/crimson/example_picture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/cbt/master/tools/crimson/example_picture.png
--------------------------------------------------------------------------------
/tools/crimson/crimson_auto_bench_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/cbt/master/tools/crimson/crimson_auto_bench_example.png
--------------------------------------------------------------------------------
/include/html/table.html:
--------------------------------------------------------------------------------
1 |
2 |
8 |
9 |
--------------------------------------------------------------------------------
/include/css/table.css:
--------------------------------------------------------------------------------
1 | #view > tbody div {
2 | /* height:50; */
3 | }
4 |
5 | th {
6 | background-color:blue;
7 | color:white
8 | }
9 |
10 | td {
11 | border: 1px solid lightgrey;
12 | background-color:#ccc;
13 | width:200px;
14 | }
15 |
--------------------------------------------------------------------------------
/example/wip-mark-testing/README:
--------------------------------------------------------------------------------
1 | This directory contains the current set of tests that are run to verify
2 | that new PRs don't break CBT. It's very incomplete at the moment, but
3 | a start! It also may serve as one of the examples for how to run basic
4 | tests with CBT.
5 |
--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
1 | Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
2 | Upstream-Contact: mnelson@redhat.com
3 | License: LGPL2.1 or later (see COPYING-LGPL2.1)
4 |
5 | Files: *
6 | Copyright: (c) 2013-2015 Red Hat, Inc.
7 | License: LGPL2.1 (see COPYING-LGPL2.1)
8 |
--------------------------------------------------------------------------------
/tools/crimson/fio_config.yaml:
--------------------------------------------------------------------------------
1 | alias: eg.0.classic
2 | fio_rbd_rand_write: 1
3 | client: 1 1
4 | block_size: 4K
5 | osd_cores: 1 2
6 | time: 5
7 | tolerance_time: 5
8 | retry_limit: 2
9 | store: bluestore
10 | iostat: True
11 | ---
12 | alias: eg.1.crimson
13 | fio_rbd_rand_write: 1
14 | client: 1 1
15 | block_size: 4K
16 | osd_cores: 1 2
17 | time: 5
18 | tolerance_time: 5
19 | retry_limit: 2
20 | crimson: True
21 | store: bluestore
22 | iostat: True
23 |
--------------------------------------------------------------------------------
/include/performance_report.tex:
--------------------------------------------------------------------------------
1 | \usepackage[margin=1.5cm]{geometry}
2 | \usepackage{titlesec}
3 | \usepackage{fancyhdr}
4 | \newcommand{\sectionbreak}{\newpage}
5 | \fancypagestyle{plain}{
6 | \fancyhf{}
7 | \renewcommand{\headrulewidth}{0pt}
8 | \renewcommand{\footrulewidth}{0pt}
9 | \fancyhead[R]{\nouppercase{\rightmark}}
10 | \fancyhead[L]{\nouppercase{\leftmark}}
11 | \fancyfoot[C]{\thepage}
12 | \fancyfoot[L]{BUILD}
13 | \fancyfoot[R]{\today}
14 | }
15 | \pagestyle{plain}
--------------------------------------------------------------------------------
/tools/crimson/bench_config.yaml:
--------------------------------------------------------------------------------
1 | # this is an example for the config file of crimson_auto_bench
2 |
3 | alias: eg.0.classic
4 | rand_write: 1
5 | client: 1 2
6 | block_size: 4K
7 | osd_cores: 1 2
8 | time: 5
9 | tolerance_time: 10
10 | retry_limit: 2
11 | iostat: True
12 | ---
13 | alias: eg.1.crimson
14 | rand_write: 1
15 | client: 1 2
16 | block_size: 4K
17 | osd_cores: 1 2
18 | time: 5
19 | tolerance_time: 10
20 | retry_limit: 2
21 | crimson: True
22 | store: bluestore
23 | iostat: True
24 |
--------------------------------------------------------------------------------
/tools/fio_visualize_data/fioplotcommon.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | import re
4 |
5 | def sort_map_data_by_key(data):
6 | sorteddata = {}
7 | # Sort data dictionary based on key
8 | convert = lambda text: int(text) if text.isdigit() else text
9 | alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
10 | sorted_keys = sorted(data.keys(), key=alphanum_key)
11 | for key in sorted_keys:
12 | sorteddata[key] = data[key]
13 | return sorteddata
14 |
15 |
--------------------------------------------------------------------------------
/benchmark/nullbench.py:
--------------------------------------------------------------------------------
1 | from .benchmark import Benchmark
2 |
3 |
4 | class Nullbench(Benchmark):
5 |
6 | def __init__(self, archive_dir, cluster, config):
7 | super(Nullbench, self).__init__(archive_dir, cluster, config)
8 |
9 | def initialize(self):
10 | super(Nullbench, self).initialize()
11 |
12 | def run(self):
13 | super(Nullbench, self).run()
14 |
15 | def recovery_callback(self):
16 | pass
17 |
18 | def __str__(self):
19 | super(Nullbench, self).__str__()
20 |
--------------------------------------------------------------------------------
/client_endpoints/cephfsfuse_client_endpoints.py:
--------------------------------------------------------------------------------
1 | import common
2 |
3 | from .ceph_client_endpoints import CephClientEndpoints
4 |
5 | class CephfsFuseClientEndpoints(CephClientEndpoints):
6 | def create(self):
7 | self.create_fs()
8 |
9 | def mount(self):
10 | self.mount_fs()
11 |
12 | def mount_fs_helper(self, node, dir_name):
13 | cmd = 'sudo %s -c %s --client_mds_namespace=%s %s' % (self.ceph_fuse_cmd, self.tmp_conf, self.name, dir_name)
14 | common.pdsh(node, cmd, continue_if_error=False).communicate()
15 |
16 | def create_recovery_image(self):
17 | self.create_rbd_recovery()
18 |
--------------------------------------------------------------------------------
/client_endpoints/rbdnbd_client_endpoints.py:
--------------------------------------------------------------------------------
1 | import common
2 |
3 | from .ceph_client_endpoints import CephClientEndpoints
4 |
5 | class RbdNbdClientEndpoints(CephClientEndpoints):
6 | def create(self):
7 | self.create_rbd()
8 |
9 | def mount(self):
10 | self.mount_rbd()
11 |
12 | def map_rbd(self, node, rbd_name):
13 | cmd = 'sudo %s map %s/%s' % (self.rbd_nbd_cmd, self.pool, rbd_name)
14 | stdout, stderr = common.pdsh(node, cmd, continue_if_error=False).communicate()
15 | return stdout.rstrip().rpartition(": ")[2]
16 |
17 | def create_recovery_image(self):
18 | self.create_rbd_recovery()
19 |
--------------------------------------------------------------------------------
/client_endpoints/cephfskernel_client_endpoints.py:
--------------------------------------------------------------------------------
1 | import common
2 |
3 | from .ceph_client_endpoints import CephClientEndpoints
4 |
5 | class CephfsKernelClientEndpoints(CephClientEndpoints):
6 | def create(self):
7 | self.create_fs()
8 |
9 | def mount(self):
10 | self.mount_fs()
11 |
12 | def mount_fs_helper(self, node, dir_name):
13 | cmd = 'sudo %s %s:/ %s -o name=admin,secretfile=%s,mds_namespace=%s' % (self.mount_cmd, ','.join(self.mon_addrs), dir_name, self.client_secret, self.name)
14 | common.pdsh(node, cmd, continue_if_error=False).communicate()
15 |
16 | def create_recovery_image(self):
17 | self.create_rbd_recovery()
18 |
--------------------------------------------------------------------------------
/post_processing/types.py:
--------------------------------------------------------------------------------
1 | """
2 | A file to contain common type definitions for use in the post-processing
3 | """
4 |
5 | from typing import Union
6 |
7 | # FIO json file data types
8 | JOBS_DATA_TYPE = list[dict[str, Union[str, dict[str, Union[int, float, dict[str, Union[int, float]]]]]]]
9 |
10 | # Common formatter data types
11 | IODEPTH_DETAILS_TYPE = dict[str, str]
12 | COMMON_FORMAT_FILE_DATA_TYPE = dict[str, Union[str, IODEPTH_DETAILS_TYPE]]
13 |
14 | # Common formatter internal data types
15 | INTERNAL_BLOCKSIZE_DATA_TYPE = dict[str, COMMON_FORMAT_FILE_DATA_TYPE]
16 | INTERNAL_FORMATTED_OUTPUT_TYPE = dict[str, INTERNAL_BLOCKSIZE_DATA_TYPE]
17 |
18 | # Plotter types
19 | PLOT_DATA_TYPE = dict[str, dict[str, str]]
20 |
--------------------------------------------------------------------------------
/cluster/cluster.py:
--------------------------------------------------------------------------------
1 | class Cluster(object):
2 | def __init__(self, config):
3 | self.config = config
4 | base_tmp = config.get('tmp_dir', '/tmp/cbt')
5 | self.mnt_dir = config.get('mnt_dir', "%s/%s" % (base_tmp, 'mnt'))
6 | self.tmp_dir = "%s/%s" % (base_tmp, config.get('clusterid'))
7 | self.archive_dir = "%s/%s" % (config.get('archive_dir'), config.get('clusterid'))
8 | self.tmp_conf = config.get('tmp_conf', '/tmp/cbt')
9 |
10 | def get_mnt_dir(self):
11 | return self.mnt_dir
12 |
13 | def getclass(self):
14 | return self.__class__.__name__
15 |
16 | def initialize(self):
17 | pass
18 |
19 | def cleanup(self):
20 | pass
21 |
22 | def __str__(self):
23 | return str(self.config)
24 |
--------------------------------------------------------------------------------
/client_endpoints/librbd_client_endpoints.py:
--------------------------------------------------------------------------------
1 | import common
2 |
3 | from .ceph_client_endpoints import CephClientEndpoints
4 |
5 | class LibrbdClientEndpoints(CephClientEndpoints):
6 | def __init__(self, cluster, config):
7 | super(LibrbdClientEndpoints, self).__init__(cluster, config)
8 |
9 | def create(self):
10 | self.create_rbd()
11 |
12 | def mount(self):
13 | # Don't mount anything, just set the endpoints to the pool/rbd names
14 | for ep_num in range(0, self.endpoints_per_client):
15 | rbd_name = self.get_local_rbd_name(ep_num)
16 | self.endpoints.append("%s/%s" % (self.pool, rbd_name))
17 | self.endpoint_type = "rbd"
18 | return self.get_endpoints()
19 |
20 | def create_recovery_image(self):
21 | self.create_rbd_recovery()
22 |
--------------------------------------------------------------------------------
/example/example-3x-radosbench.yaml:
--------------------------------------------------------------------------------
1 | cluster:
2 | head: "ceph@head"
3 | clients: ["ceph@client"]
4 | osds: ["ceph@osd"]
5 | mons: ["ceph@mon"]
6 | osds_per_node: 1
7 | fs: xfs
8 | mkfs_opts: -f -i size=2048
9 | mount_opts: -o inode64,noatime,logbsize=256k
10 | conf_file: /home/ceph/ceph-tools/cbt/example/ceph.conf
11 | ceph.conf: /home/ceph/ceph-tools/cbt/example/ceph.conf
12 | iterations: 3
13 | rebuild_every_test: False
14 | tmp_dir: "/tmp/cbt"
15 | pool_profiles:
16 | replicated:
17 | pg_size: 4096
18 | pgp_size: 4096
19 | replication: 'replicated'
20 | benchmarks:
21 | radosbench:
22 | op_size: [ 4194304, 524288, 4096 ]
23 | write_only: False
24 | time: 300
25 | concurrent_ops: [ 128 ]
26 | concurrent_procs: 1
27 | use_existing: True
28 | pool_profile: replicated
29 |
--------------------------------------------------------------------------------
/example/example-ec-radosbench.yaml:
--------------------------------------------------------------------------------
1 | cluster:
2 | head: "ceph@head"
3 | clients: ["ceph@client"]
4 | osds: ["ceph@osd"]
5 | mons: ["ceph@mon"]
6 | osds_per_node: 1
7 | fs: xfs
8 | mkfs_opts: -f -i size=2048
9 | mount_opts: -o inode64,noatime,logbsize=256k
10 | conf_file: /home/ceph/ceph-tools/cbt/example/ceph.conf
11 | ceph.conf: /home/ceph/ceph-tools/cbt/example/ceph.conf
12 | iterations: 3
13 | rebuild_every_test: False
14 | tmp_dir: "/tmp/cbt"
15 | pool_profiles:
16 | erasure:
17 | pg_size: 4096
18 | pgp_size: 4096
19 | replication: 'erasure'
20 | erasure_profile: 'myec'
21 | benchmarks:
22 | radosbench:
23 | op_size: [ 4194304, 524288, 4096 ]
24 | write_only: False
25 | time: 300
26 | concurrent_ops: [ 128 ]
27 | concurrent_procs: 1
28 | use_existing: True
29 | pool_profile: erasure
30 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | minversion = 1.6
3 | skipsdist = True
4 | envlist = py3,pep8
5 |
6 | [testenv]
7 | usedevelop = True
8 | install_command = pip install -U {opts} {packages}
9 | setenv = VIRTUAL_ENV={envdir}
10 | # deps = -r{toxinidir}/test-requirements.txt
11 | commands =
12 | py.test {posargs:tests}
13 |
14 | [tox:jenkins]
15 | downloadcache = ~/cache/pip
16 |
17 | [testenv:pep8]
18 | usedevelop = False
19 | deps = flake8
20 | commands =
21 | flake8 --config=tox.ini {posargs:.}
22 |
23 | [testenv:cover]
24 | setenv = NOSE_WITH_COVERAGE=1
25 |
26 | [testenv:venv]
27 | deps = -r{toxinidir}/requirements.txt
28 | commands = {posargs:}
29 |
30 | [testenv:devenv]
31 | envdir = devenv
32 | usedevelop = True
33 |
34 | [flake8]
35 | ignore = E501
36 | exclude = .venv,.git,.tox,dist,doc,*lib/python*,*egg,build,tools,__init__.py,docs
37 | show-pep8 = True
38 | # show-source = True
39 | statistics = True
40 |
41 | [hacking]
42 | import_exceptions = testtools.matchers
43 |
--------------------------------------------------------------------------------
/client_endpoints/rbdkernel_client_endpoints.py:
--------------------------------------------------------------------------------
1 | import common
2 |
3 | from .ceph_client_endpoints import CephClientEndpoints
4 |
5 | class RbdKernelClientEndpoints(CephClientEndpoints):
6 | def __init__(self, cluster,config):
7 | super(RbdKernelClientEndpoints, self).__init__(cluster, config)
8 |
9 | # Kernel RBD breaks if certain features are disabled
10 | self.disabled_features = config.get('disabled_features', 'deep-flatten,fast-diff,object-map')
11 |
12 | def create(self):
13 | self.create_rbd()
14 |
15 | def mount(self):
16 | self.mount_rbd()
17 |
18 | def map_rbd(self, node, rbd_name):
19 | cmd = 'sudo %s map %s/%s --id admin --options noshare' % (self.rbd_cmd, self.pool, rbd_name)
20 | stdout, stderr = common.pdsh(node, cmd, continue_if_error=False).communicate()
21 | return stdout.rstrip().rpartition(": ")[2]
22 |
23 | def create_recovery_image(self):
24 | self.create_rbd_recovery()
25 |
--------------------------------------------------------------------------------
/client_endpoints/rgws3_client_endpoints.py:
--------------------------------------------------------------------------------
1 | from .ceph_client_endpoints import CephClientEndpoints
2 |
3 |
4 | class RgwS3ClientEndpoints(CephClientEndpoints):
5 | def __init__(self, cluster, config):
6 | super(RgwS3ClientEndpoints, self).__init__(cluster, config)
7 |
8 | def create(self):
9 | self.access_key = self.config.get('access_key', '03VIHOWDVK3Z0VSCXBNH')
10 | self.secret_key = self.config.get('secret_key', 'KTTxQIIJV3uNox21vcqxWIpHMUOApWVWsJKdHwgG')
11 | self.user = self.config.get('user', 'cbt')
12 | self.cluster.add_s3_user(self.user, self.access_key, self.secret_key)
13 |
14 | def mount(self):
15 | # Don't actually mount anything, just set the endpoints
16 | urls = self.config.get('urls', self.cluster.get_urls())
17 | for ep_num in range(0, self.endpoints_per_client):
18 | url = urls[ep_num % len(urls)]
19 | self.endpoints.append({"url": url, "access_key": self.access_key, "secret_key": self.secret_key})
20 | self.endpoint_type = "s3"
21 | return self.get_endpoints()
22 |
--------------------------------------------------------------------------------
/example/example-kvmrbdfio.yaml:
--------------------------------------------------------------------------------
1 | # this example lets you run kvmrbdfio.py benchmark
2 | # inside a single-host Ceph cluster on a virtual machine,
3 | # using a kernel RBD device as a simulated virtual disk
4 | # of course, the storage pool for the /dev/rbd1 must
5 | # be replicated using a crush rule like:
6 | # # ceph osd crush rule create-simple too-few-hosts myvm osd
7 | # and then you create the storage pool using
8 | # # ceph osd pool create mypool 32 32 too-few-hosts
9 |
10 | cluster:
11 | use_existing: True
12 | head: "myvm"
13 | clients: [ "^../vms.list" ]
14 | osds: ["myvm"]
15 | mons: ["myvm"]
16 | iterations: 2
17 | rebuild_every_test: False
18 | tmp_dir: "/tmp/cbt"
19 | pool_profiles:
20 | replicated:
21 | pg_size: 64
22 | pgp_size: 64
23 | replication: 3
24 | crush_profile: 1
25 | benchmarks:
26 | kvmrbdfio:
27 | fio_cmd: /usr/local/bin/fio
28 | time: 60
29 | ramp: 20
30 | startdelay: 10
31 | rate_iops: 2
32 | iodepth: [2]
33 | numjobs: 1
34 | block_devices: /dev/rbd1
35 | mode: randwrite
36 | # rwmixread: 20
37 | op_size: 4096
38 | vol_size: 64
39 |
40 |
--------------------------------------------------------------------------------
/tools/crimson/seastore_radosbench_run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | TOP_DIR=$(cd $(dirname "$0") && pwd)
4 |
5 | # configurations
6 | RESULT_DIR="$TOP_DIR/results"
7 | BUILD_DIR="~/ceph/build/"
8 | POOL_NAME="test-pool"
9 | TOTAL_ROUND=10
10 | BENCH_SECONDS=1
11 |
12 | # Note: currently only support single OSD to measure write amplification
13 | # correctly.
14 | if [ -e $RESULT_DIR ]; then
15 | echo "'$RESULT_DIR' dir already exists, remove it or select a different one"
16 | exit 1
17 | fi
18 |
19 | mkdir -p $RESULT_DIR
20 | cd $BUILD_DIR
21 | CURRENT_ROUND=0
22 | TARGET_ROUND=$(( CURRENT_ROUND + TOTAL_ROUND ))
23 |
24 | CEPH_DEV=1 ./bin/ceph tell osd.0 dump_metrics 2>&1 | tee $RESULT_DIR/result_${CURRENT_ROUND}_metrics.log
25 | while [ $CURRENT_ROUND -lt $TARGET_ROUND ]
26 | do
27 | (( ++CURRENT_ROUND ))
28 | echo "start round $CURRENT_ROUND ..."
29 | CEPH_DEV=1 ./bin/rados bench -p $POOL_NAME $BENCH_SECONDS write -b 4096 --no-cleanup 2>&1 | tee $RESULT_DIR/result_${CURRENT_ROUND}_bench.log
30 | CEPH_DEV=1 ./bin/ceph tell osd.0 dump_metrics 2>&1 | tee $RESULT_DIR/result_${CURRENT_ROUND}_metrics.log
31 | echo "finish round $CURRENT_ROUND"
32 | echo
33 | sleep 2
34 | done
35 | echo "done!"
36 | cd $TOP_DIR
37 |
--------------------------------------------------------------------------------
/example/example-raw.yaml:
--------------------------------------------------------------------------------
1 | cluster:
2 | use_existing: True
3 | osds_per_node: 24
4 | clients: [192.168.122.249]
5 | iterations: 2
6 |
7 | benchmarks:
8 | rawfio:
9 | iterations: 2
10 | time: 60
11 | ramp: '0'
12 | # iodepth: [16, 32, 64]
13 | iodepth: [16]
14 | numjobs: 1
15 | mode: [ write]
16 | ioengine: libaio
17 | # Block Size
18 | op_size: [4096000]
19 | # size o volume test
20 | vol_size: 1024
21 | direct: 0
22 | # Readahead settings
23 | client_ra: 128
24 | # Use directory from / if you set to False the script will format client_dev
25 | use_dir: False
26 | # When use_dir is true, we'r using the directory to make tests
27 | client_dir: '/mnt'
28 | # When use_dir is False we need a device to format and mount before make tests
29 | client_dev: '/dev/vdb'
30 | # Make filesyste when we use client_dev and use_dir is False
31 | client_mkfs: True
32 | # What is the filesystem
33 | client_fs: xfs
34 | concurrent_procs: 1
35 | fio_cmd: '/usr/bin/fio'
36 | block_devices: [/dev/vdb]
37 | # block_devices: [/dev/sda, /dev/sdb, /dev/sdc, /dev/sdd, /dev/sde, /dev/sdf, /dev/sdg, /dev/sdh, /dev/sdi, /dev/sdj, /dev/sdk, /dev/sdl, /dev/sdm, /dev/sdn]
38 |
39 |
--------------------------------------------------------------------------------
/tools/compare_sysctl.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | import argparse
4 |
5 | def parse_args():
6 | parser = argparse.ArgumentParser()
7 | parser.add_argument("FILE", help="sysctl output files to parse", nargs="+")
8 | args = parser.parse_args()
9 |
10 | return args
11 |
12 | def compare_items(foo, files):
13 | # Write the header
14 | print('"Attribute",', end=' ')
15 | for fn in files:
16 | print(('"%s",' % fn), end=' ')
17 | print()
18 |
19 | for attribute,items in sorted(foo.items()):
20 | if len(items) < len(files) or not all_same(list(items.values())):
21 | print('"%s",' % attribute, end=' ')
22 | for fn in files:
23 | if fn in items:
24 | print(('"%s",' % items[fn]), end=' ')
25 | else:
26 | print('"",', end=' ')
27 | print()
28 |
29 | def all_same(items):
30 | return all(x == items[0] for x in items)
31 |
32 | if __name__ == '__main__':
33 | kvdict = {}
34 | ctx = parse_args()
35 | for fn in ctx.FILE:
36 | f = open(fn, 'r')
37 | for line in f:
38 | (key, value) = line.rstrip('\r\n').rsplit(' = ')
39 | kvdict.setdefault(key, {}).update({fn: value})
40 | compare_items(kvdict, ctx.FILE)
41 |
42 |
--------------------------------------------------------------------------------
/tools/test_bm_template.py:
--------------------------------------------------------------------------------
1 | """ Unit tests for the BenchmarkX class """
2 |
3 | import unittest
4 | import hashlib
5 | import json
6 | import benchmarkfactory
7 | import settings
8 | from cluster.ceph import Ceph
9 |
10 |
11 | class TestBenchmarkX(unittest.TestCase):
12 | """ Sanity tests for BenchmarkX """
13 | archive_dir = "/tmp"
14 | iteration = {'acceptable': [1,2,3], 'iteration': 0}
15 | cluster = {}
16 | cl_name = "tools/invariant.yaml"
17 | bl_name = "tools/baseline.json"
18 | bl_json = {}
19 | bl_md5 = 'MD5SUMNone'
20 | md5_returned = None
21 |
22 | @classmethod
23 | def setUpClass(cls):
24 | with open(cls.bl_name, 'rb') as f:
25 | data = f.read()
26 | f.close()
27 | cls.md5_returned = hashlib.md5(data).hexdigest()
28 | settings.mock_initialize(config_file=cls.cl_name)
29 | cls.cluster = Ceph.mockinit(settings.cluster)
30 | with open(cls.bl_name, 'r') as f:
31 | cls.bl_json = json.load(f)
32 | f.close()
33 |
34 | @classmethod
35 | def tearDownClass(cls):
36 | cls.cluster = None
37 | cls.bl_json = None
38 |
39 | def test_valid_baseline(self):
40 | """ Verify the baseline has not been compromised """
41 | self.assertEqual( self.bl_md5, str(self.md5_returned) )
42 |
--------------------------------------------------------------------------------
/client_endpoints/rbdfuse_client_endpoints.py:
--------------------------------------------------------------------------------
1 | import common
2 | import logging
3 |
4 | from .ceph_client_endpoints import CephClientEndpoints
5 |
6 | logger = logging.getLogger("cbt")
7 |
8 |
9 | class RbdFuseClientEndpoints(CephClientEndpoints):
10 | def _init__(self, config, cluster):
11 | super().__init__(cluster, config)
12 |
13 | def create(self):
14 | self.create_rbd()
15 |
16 | def mount(self):
17 | self.mount_rbd()
18 |
19 | def map_rbd(self, node, rbd_name):
20 | fuse_dir = '%s/%s-fuse' % (self.mnt_dir, self.name)
21 |
22 | # Check to make sure that fuse is not already mapped.
23 | stdout, stderr = common.pdsh(node, 'sudo ps aux | grep %s' % self.rbd_fuse_cmd, continue_if_error=False).communicate()
24 | if fuse_dir in stdout:
25 | raise ValueError('RBD-Fuse was already mapped at %s!' % fuse_dir)
26 | common.pdsh(node, 'sudo mkdir -p -m0755 -- %s' % fuse_dir, continue_if_error=False).communicate()
27 | common.pdsh(node, 'sudo %s %s -p %s' % (self.rbd_fuse_cmd, fuse_dir, self.pool), continue_if_error=False).communicate()
28 | logger.info('Mapped RBD-Fuse pool %s to %s' % (self.pool, fuse_dir))
29 |
30 | return '%s/%s' % (fuse_dir, rbd_name)
31 |
32 | def create_recovery_image(self):
33 | self.create_rbd_recovery()
34 |
--------------------------------------------------------------------------------
/example/wip-cosbench/cosbench_ex.yaml:
--------------------------------------------------------------------------------
1 | cluster:
2 | user: 'cbt'
3 | head: "cadmin"
4 | clients: ["cadmin"]
5 | osds: ["inf1", "inf2", "inf3"]
6 | mons:
7 | inf1:
8 | a: "192.168.110.51:6789"
9 | inf2:
10 | b: "192.168.110.52:6789"
11 | inf3:
12 | c: "192.168.110.53:6789"
13 | rgws: ["inf1", "inf2", "inf3"]
14 | osds_per_node: 1
15 | fs: 'xfs'
16 | mkfs_opts: '-f -i size=2048'
17 | mount_opts: '-o inode64,noatime,logbsize=256k'
18 | conf_file: '/home/cbt/cbt/runs/test2.ceph.conf'
19 | iterations: 1
20 | use_existing: True
21 | clusterid: "ceph"
22 | tmp_dir: "/tmp/cbt"
23 | pool_profiles:
24 | rbd:
25 | pg_size: 64
26 | pgp_size: 64
27 | replication: 2
28 | benchmarks:
29 | cosbench:
30 | obj_size: [64KB]
31 | osd_ra: [4096]
32 | workers: 1
33 | containers_max: 10
34 | objects_max: 100
35 | mode: [write]
36 | cosbench_dir: '/home/cbt/cb/0.4.1.0'
37 | cosbench_xml_dir: '/home/cbt/cb/xml_dir'
38 | controller: "cadmin"
39 | auth:
40 | config: username=cosbench:operator;password=intel2012;url=http://192.168.110.51:7480/auth/v1.0;retry=9
41 | template: [default]
42 | rampup: 10
43 | runtime: 100
44 | rampdown: 10
45 | containers: ["u(1,100)"]
46 | objects: ["u(1,100)"]
47 | ratio: [100]
48 |
--------------------------------------------------------------------------------
/client_endpoints/client_endpoints.py:
--------------------------------------------------------------------------------
1 | class ClientEndpoints(object):
2 | def __init__(self, cluster, config):
3 | self.config = config
4 | self.cluster = cluster
5 | self.driver = self.config.get('driver', None)
6 | self.name = 'cbt-%s' % self.driver
7 | self.mnt_dir = cluster.mnt_dir
8 | self.endpoint_size = self.config.get('endpoint_size', 4096)
9 | self.endpoint_type = None
10 | self.endpoints_per_client = self.config.get('endpoints_per_client', 1)
11 | self.endpoints = []
12 | self.initialized = False
13 |
14 | def initialize(self):
15 | self.create()
16 | self.mount()
17 | self.initialized = True
18 |
19 | def get_initialized(self):
20 | return self.initialized
21 |
22 | def get_endpoints(self):
23 | return self.endpoints
24 |
25 | def get_endpoint_type(self):
26 | return self.endpoint_type
27 |
28 | def get_endpoints_per_client(self):
29 | return self.endpoints_per_client
30 |
31 | def get_endpoint_size(self):
32 | return self.endpoint_size
33 |
34 | def create(self):
35 | pass
36 |
37 | def mount(self):
38 | pass
39 |
40 | def umount(self):
41 | pass
42 |
43 | def remove(self):
44 | pass
45 |
46 | def create_recovery_image(self):
47 | pass
48 |
--------------------------------------------------------------------------------
/client_endpoints/rbdtcmu_client_endpoints.py:
--------------------------------------------------------------------------------
1 | import common
2 |
3 | from .ceph_client_endpoints import CephClientEndpoints
4 |
5 |
6 | class RbdTcmuClientEndpoints(CephClientEndpoints):
7 | def create(self):
8 | self.create_rbd()
9 |
10 | def mount(self):
11 | self.mount_rbd()
12 |
13 | def map_rbd(self, node, rbd_name):
14 | common.pdsh(node, f'sudo targetcli /backstores/user:rbd create cfgstring={self.pool}/{rbd_name} name={rbd_name} size={self.endpoint_size}M',
15 | continue_if_error=False).communicate()
16 | stdout, stderr = common.pdsh(node, f'sudo targetcli /loopback create', continue_if_error=False).communicate()
17 | wwn = stdout.rstrip().rpartition(": ")[2].rpartition(" ")[2][:-1]
18 | common.pdsh(node, f'sudo targetcli /loopback/{wwn}/luns create /backstores/user:rbd/{rbd_name}', continue_if_error=False).communicate()
19 | stdout, stderr = common.pdsh(node, f'cat /sys/kernel/config/target/loopback/{wwn}/tpgt_1/address', continue_if_error=False).communicate()
20 | address = stdout.rstrip().rpartition(": ")[2]
21 | stdout, stderr = common.pdsh(node, f'ls /sys/class/scsi_disk/{address}:0/device/block', continue_if_error=False).communicate()
22 | return '/dev/%s' % stdout.rstrip().rpartition(": ")[2]
23 |
24 | def create_recovery_image(self):
25 | self.create_rbd_recovery()
26 |
--------------------------------------------------------------------------------
/tools/mkpartmagna.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | SYSPART=`df | grep "/$" | cut -d" " -f1 | cut -d"/" -f3`
4 | if [[ $SYSPART=="mapper" ]]
5 | then
6 | echo "System disk is on an LVM - determining underlying block device..."
7 | SYSPART=`pvscan | grep -i root | awk -F " " '{print $2}' | awk -F "/" '{print $3}' | cut -c1,2,3`
8 | fi
9 | diskid='wwn'
10 | echo "System on $SYSPART"
11 |
12 | failed()
13 | {
14 | sleep 2 # Wait for the kernel to stop whining
15 | echo "Hrm, that didn't work. Calling for help."
16 | # sudo ipmitool chassis identify force
17 | echo "RAID Config failed: ${1}"
18 | while [ 1 ]; do sleep 10; done
19 | exit 1;
20 | }
21 |
22 | fakefailed()
23 | {
24 | echo "ignoring megacli errors and forging on: ${1}"
25 | }
26 |
27 | echo "Making label on OSD devices"
28 |
29 | # Data
30 | i=0
31 | for DEV in `ls -al /dev/disk/by-id | grep $diskid | grep -v part | cut -f3 -d"/" | tr '\n' ' '`
32 | do
33 | if [[ ! $SYSPART =~ $DEV ]]
34 | then
35 | sudo parted -s -a optimal /dev/$DEV mklabel gpt || failed "mklabel $DEV"
36 | echo "Creating osd device $i data label"
37 | echo "sudo parted -s -a optimal /dev/$DEV mkpart osd-device-$i-data $sp% $ep%"
38 | sudo parted -s -a optimal /dev/$DEV mkpart osd-device-$i-journal 0% 1000M || failed "mkpart $i-journal"
39 | sudo parted -s -a optimal /dev/$DEV mkpart osd-device-$i-data 1000M 100% || failed "mkpart $i-data"
40 | let "i++"
41 | fi
42 | done
43 |
--------------------------------------------------------------------------------
/tools/crimson/seastore_fio_run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | TOP_DIR=$(cd $(dirname "$0") && pwd)
4 |
5 | # configurations
6 | RESULT_DIR="$TOP_DIR/results"
7 | BUILD_DIR="~/ceph/build/"
8 | FIO_CONF="~/ceph/rbd_write.fio"
9 | POOL_NAME="rbd"
10 | POOL_NUM=128
11 | TOTAL_ROUND=3
12 | IMAG_NAME="fio_test"
13 |
14 | # Note: currently only support single OSD to measure write amplification
15 | # correctly.
16 | if [ -e $RESULT_DIR ]; then
17 | echo "'$RESULT_DIR' dir already exists, remove it or select a different one"
18 | exit 1
19 | fi
20 |
21 | mkdir -p $RESULT_DIR
22 | cd $BUILD_DIR
23 | CURRENT_ROUND=0
24 | TARGET_ROUND=$(( CURRENT_ROUND + TOTAL_ROUND ))
25 |
26 | CEPH_DEV=1 ./bin/ceph osd pool create $POOL_NAME $POOL_NUM $POOL_NUM
27 | CEPH_DEV=1 ./bin/ceph osd pool set --yes-i-really-mean-it $POOL_NAME size 1 && ./bin/ceph osd pool --yes-i-really-mean-it set $POOL_NAME min_size 1
28 | CEPH_DEV=1 ./bin/rbd create $IMAG_NAME --size 2G --image-format=2 --rbd_default_features=3
29 |
30 | CEPH_DEV=1 ./bin/ceph tell osd.0 dump_metrics 2>&1 | tee $RESULT_DIR/result_${CURRENT_ROUND}_metrics.log
31 | while [ $CURRENT_ROUND -lt $TARGET_ROUND ]
32 | do
33 | (( ++CURRENT_ROUND ))
34 | echo "start round $CURRENT_ROUND ..."
35 | CEPH_DEV=1 fio $FIO_CONF --output=$RESULT_DIR/result_${CURRENT_ROUND}_bench.log
36 | CEPH_DEV=1 ./bin/ceph tell osd.0 dump_metrics 2>&1 | tee $RESULT_DIR/result_${CURRENT_ROUND}_metrics.log
37 | echo "finish round $CURRENT_ROUND"
38 | echo
39 | sleep 2
40 | done
41 | echo "done!"
42 | cd $TOP_DIR
43 |
--------------------------------------------------------------------------------
/example/example-3x-radosbench-crimson.yaml:
--------------------------------------------------------------------------------
1 | cluster:
2 | head: "ceph@ubulap"
3 | clients: ["ceph@ubulap"]
4 | osds: ["ceph@ubulap"]
5 | mons: ["ceph@ubulap"]
6 | osds_per_node: 1
7 | conf_file: /home/rzarzynski/ceph-1/build/ceph.conf
8 | # PID file is important for perf stat. crimson-osd should use defaults
9 | # its location.
10 | pid_dir: /home/rzarzynski/ceph-1/build/out
11 | iterations: 3
12 | rebuild_every_test: False
13 | tmp_dir: "/tmp/cbt"
14 | ceph_cmd: /home/rzarzynski/ceph-1/build/bin/ceph
15 | rados_cmd: /home/rzarzynski/ceph-1/build/bin/rados
16 | pool_profiles:
17 | replicated:
18 | pg_size: 128
19 | pgp_size: 128
20 | replication: 'replicated'
21 | benchmarks:
22 | radosbench:
23 | op_size: [ 4096, 8192 ]
24 | write_only: True
25 | time: 3
26 | concurrent_ops: [ 16 ]
27 | concurrent_procs: 1
28 | # crimson must be already deployed. It can be done with vstart.sh:
29 | # MDS=0 MGR=1 OSD=1 MON=1 ../src/vstart.sh -n --without-dashboard \
30 | # --memstore -X -o "memstore_device_bytes=34359738368" --crimson \
31 | # --nodaemon --redirect-output
32 | use_existing: True
33 | pool_profile: replicated
34 | acceptable:
35 | bandwidth: '(or (greater) (near 0.05))'
36 | iops_avg: '(or (greater) (near 0.05))'
37 | iops_stddev: '(or (less) (near 0.05))'
38 | latency_avg: '(or (less) (near 0.05))'
39 | monitoring_profiles:
40 | perf:
41 | nodes:
42 | - osds
43 | args: 'stat -p {pid} -o {perf_dir}/perf_stat.{pid}'
44 |
--------------------------------------------------------------------------------
/tools/fio_objectstore_tools/hdd-runs.json:
--------------------------------------------------------------------------------
1 | {
2 | "base": {
3 | "runtime": 3600,
4 | "devices": {
5 | "nvme": {
6 | "device_type": "ssd",
7 | "block_wal_path": "/dev/nvme0n1p2",
8 | "block_db_path": "/dev/nvme0n1p3",
9 | "block_path": "/dev/nvme0n1p4",
10 | "target_dir": "/mnt/sjust/bluestore-nvme"
11 | },
12 | "nvme_plain": {
13 | "device_type": "ssd",
14 | "block_path": "/dev/nvme3n1p4",
15 | "target_dir": "/mnt/sjust/bluestore-nvme-plain"
16 | },
17 | "hdd": {
18 | "device_type": "hdd",
19 | "block_path": "/dev/sdh2",
20 | "target_dir": "/mnt/sjust/bluestore-hdd"
21 | },
22 | "hdd_nvme_db": {
23 | "device_type": "hdd",
24 | "block_wal_path": "/dev/nvme2n1p2",
25 | "block_db_path": "/dev/nvme2n1p3",
26 | "block_path": "/dev/sdg2",
27 | "target_dir": "/mnt/sjust/bluestore-hdd-nvme-db"
28 | }
29 | },
30 | "size": 512,
31 | "filesize": 4,
32 | "preextend": "true",
33 | "qd": 1024,
34 | "numjobs": 32,
35 | "tcio_hdd": 1048576,
36 | "bluestore_deferred_throttle": [
37 | 2,
38 | 4,
39 | 8,
40 | 12
41 | ],
42 | "bluestore_throttle": [
43 | 8,
44 | 12,
45 | 16,
46 | 20,
47 | 24
48 | ],
49 | "vary_bluestore_throttle_period": 30
50 | },
51 | "runs": {
52 | "bs": [
53 | 4,
54 | 512
55 | ],
56 | "target_device": [
57 | "nvme",
58 | "nvme_plain"
59 | ],
60 | "run": [
61 | 0,
62 | 1
63 | ]
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/tools/fio_objectstore_tools/nvme-runs.json:
--------------------------------------------------------------------------------
1 | {
2 | "base": {
3 | "runtime": 3600,
4 | "devices": {
5 | "nvme": {
6 | "device_type": "ssd",
7 | "block_wal_path": "/dev/nvme0n1p2",
8 | "block_db_path": "/dev/nvme0n1p3",
9 | "block_path": "/dev/nvme0n1p4",
10 | "target_dir": "/mnt/sjust/bluestore-nvme"
11 | },
12 | "nvme_plain": {
13 | "device_type": "ssd",
14 | "block_path": "/dev/nvme3n1p4",
15 | "target_dir": "/mnt/sjust/bluestore-nvme-plain"
16 | },
17 | "hdd": {
18 | "device_type": "hdd",
19 | "block_path": "/dev/sdh2",
20 | "target_dir": "/mnt/sjust/bluestore-hdd"
21 | },
22 | "hdd_nvme_db": {
23 | "device_type": "hdd",
24 | "block_wal_path": "/dev/nvme2n1p2",
25 | "block_db_path": "/dev/nvme2n1p3",
26 | "block_path": "/dev/sdg2",
27 | "target_dir": "/mnt/sjust/bluestore-hdd-nvme-db"
28 | }
29 | },
30 | "size": 512,
31 | "filesize": 4,
32 | "preextend": "true",
33 | "qd": 1024,
34 | "numjobs": 32,
35 | "tcio_hdd": 1048576,
36 | "tcio_ssd": 8192,
37 | "bluestore_deferred_throttle": [
38 | 0.25,
39 | 0.5,
40 | 1,
41 | 2,
42 | 4
43 | ],
44 | "bluestore_throttle": [
45 | 0.25,
46 | 0.5,
47 | 1,
48 | 2,
49 | 4,
50 | 6,
51 | 8
52 | ],
53 | "vary_bluestore_throttle_period": 30
54 | },
55 | "runs": {
56 | "bs": [
57 | 4,
58 | 512
59 | ],
60 | "target_device": [
61 | "nvme"
62 | ],
63 | "run": [
64 | 0
65 | ]
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/post_processing/README.md:
--------------------------------------------------------------------------------
1 | # Post Processing of CBT results
2 |
3 | ## Description
4 | A set of tools that can be used to post process the data from any run of CBT. It provides a report in github markdown,
5 | and optionally pdf, format that contains a set of hockey-stick curves generated from the CBT run.
6 | The tool set consists of three separate tools that can be run stand-alone. The eventual aim is to integrate the post
7 | processing into CBT once more benchmark types are supported.
8 |
9 | There are three components to the post processing which are:
10 |
11 | * [formatter](formatter/README.md)
12 | * [plotter](plotter/README.md)
13 | * [reports](reports/README.md)
14 |
15 |
16 | ## Suppoted benchmark tools
17 | This list will be added to as extra benchmark tools are supported.
18 | * fio
19 |
20 | ## Dependencies
21 | These post processing changes include some new dependencies to be run correctly
22 |
23 | ### python dependencies
24 | The following python modules are dependencies for this work:
25 | * matplotlib
26 | * mdutils
27 |
28 | Both have been added to the requirements.txt file in the CBT project.
29 |
30 | ### Dependencies for pdf report generation
31 | To generate a report in pdf format there are 2 additional requirements
32 |
33 | A working install of tex is required on the base operating system, which can be installed using the package manager.
34 | For Red Hat based OSes this can be achieved by running `yum install texlive`
35 |
36 | [Pandoc](https://pandoc.org/), which can be installed on most Linux distributions using the included package manager.
37 | For Red Hat based OSes use `yum install pandoc`
38 |
39 | The minimum pandoc level tested is `2.14.0.3` which is available for RHEL 9
40 |
--------------------------------------------------------------------------------
/post_processing/plotter/README.md:
--------------------------------------------------------------------------------
1 | # Plotter
2 | Draws the hockey stick plots for a benchmark run from the data produced by the formatter. These are png files, with one
3 | plot produced per block size used.
4 |
5 | There is also a python class that will produce comparison plots of two or more different CBT runs for one or more block
6 | sizes.
7 | Due to the tools used there are only 6 unique colours available for the plot lines, so it is recommended to limit the
8 | comparison to 6 or less files or directories.
9 |
10 | ## Standalone script
11 | A wrapper script is only provided to produce comparison plots.
12 | ```
13 | plot_comparison.py --files=
14 | --directories=
15 | --output_directory=
16 | --labels="
17 | ```
18 | where
19 | - `--output_directory` Required. The full path to a directory to store the plots. Will be created if it doesn't exist
20 | - `--files` Optional. A comma separated list of files to plot on a single axis
21 | - `--directories` Optional. A comma separated list of directories to plot. A single plot will be produced per blocksize
22 | - `--labels` Optional. Comma separated list of labels to use for the lines on the comparison plot, in the same order as
23 | --file or --directories.
24 |
25 | One of `--files` or `--directories` must be provided.
26 |
27 | Full help text is provided by using `--help` with the script
28 |
29 | ## Example
30 |
31 | ```bash
32 | PYTHONPATH=/cbt /cbt/tools/plot_comparison.py --directories="/tmp/ch_cbt_main_run,/tmp/ch_cbt_sandbox_run" --output_directory="/tmp/main_sb_comparisons"
33 | ```
--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # script to install CBT dependencies and tools for active benchmarking
4 |
5 | sudo yum -y install deltarpm
6 | sudo yum check-update
7 | sudo yum -y update
8 | sudo yum install -y psmisc util-linux coreutils xfsprogs e2fsprogs findutils \
9 | git wget bzip2 make automake gcc gcc-c++ kernel-devel perf blktrace lsof \
10 | redhat-lsb sysstat screen python3-yaml ipmitool dstat zlib-devel ntp
11 |
12 | MIRROR="http://mirror.hmc.edu/fedora/linux/releases/22/Everything/x86_64/os/Packages"
13 |
14 | wget ${MIRROR}/p/pdsh-2.31-3.fc22.x86_64.rpm
15 | wget ${MIRROR}/p/pdsh-2.31-3.fc22.x86_64.rpm
16 | wget ${MIRROR}/p/pdsh-rcmd-ssh-2.31-3.fc22.x86_64.rpm
17 | wget ${MIRROR}/c/collectl-4.0.0-1.fc22.noarch.rpm
18 | wget ${MIRROR}/i/iftop-1.0-0.9.pre4.fc22.x86_64.rpm
19 | wget ${MIRROR}/i/iperf3-3.0.10-1.fc22.x86_64.rpm
20 |
21 | sudo yum localinstall -y *.rpm
22 |
23 | git clone https://github.com/axboe/fio.git
24 | git clone https://github.com/andikleen/pmu-tools.git
25 | git clone https://github.com/brendangregg/FlameGraph
26 |
27 | cd ${HOME}/fio
28 | ./configure
29 | make
30 |
31 | # wget < Red Hat Ceph Storage ISO URL >
32 | # sudo mount -o loop Ceph-*-dvd.iso /mnt
33 | sudo yum localinstall -y /mnt/{MON,OSD}/*.rpm
34 | sudo yum localinstall -y /mnt/Installer/ceph-deploy-*.rpm
35 |
36 | sudo sed -i 's/Defaults requiretty/#Defaults requiretty/g' /etc/sudoers
37 | sudo setenforce 0
38 | ( awk '!/SELINUX=/' /etc/selinux/config ; echo "SELINUX=disabled" ) > /tmp/x
39 | sudo mv /tmp/x /etc/selinux/config
40 | rpm -qa firewalld | grep firewalld && sudo systemctl stop firewalld && sudo systemctl disable firewalld
41 | sudo systemctl stop irqbalance
42 | sudo systemctl disable irqbalance
43 | sudo systemctl start ntpd.service
44 | sudo systemctl enable ntpd.service
45 |
--------------------------------------------------------------------------------
/example/wip-cosbench/cosbench_ex_ceph.conf:
--------------------------------------------------------------------------------
1 | [global]
2 | osd pool default size = 1
3 | auth cluster required = none
4 | auth service required = none
5 | auth client required = none
6 | keyring = /tmp/cbt/ceph/keyring
7 | osd pg bits = 8
8 | osd pgp bits = 8
9 | log to syslog = false
10 | log file = /tmp/cbt/ceph/log/$name.log
11 | public network = 192.168.110.0/24
12 | cluster network = 192.168.110.0/24
13 | rbd cache = true
14 | osd scrub load threshold = 0.01
15 | osd scrub min interval = 137438953472
16 | osd scrub max interval = 137438953472
17 | osd deep scrub interval = 137438953472
18 | osd max scrubs = 16
19 | filestore merge threshold = 40
20 | filestore split multiple = 8
21 | osd op threads = 8
22 | mon pg warn max object skew = 100000
23 | mon pg warn min per osd = 0
24 | mon pg warn max per osd = 32768
25 |
26 | [mon]
27 | mon data = /tmp/cbt/ceph/mon.$id
28 |
29 | [mon.a]
30 | host = inf1
31 | mon addr = 192.168.110.51:6789
32 |
33 | [mon.b]
34 | host = inf2
35 | mon addr = 192.168.110.52:6789
36 |
37 | [mon.c]
38 | host = inf3
39 | mon addr = 192.168.110.53:6789
40 |
41 | [osd.0]
42 | host = inf1
43 | osd data = /tmp/cbt/mnt/osd-device-0-data
44 | osd journal = /dev/disk/by-partlabel/osd-device-0-journal
45 |
46 | [osd.1]
47 | host = inf2
48 | osd data = /tmp/cbt/mnt/osd-device-0-data
49 | osd journal = /dev/disk/by-partlabel/osd-device-0-journal
50 |
51 | [osd.2]
52 | host = inf3
53 | osd data = /tmp/cbt/mnt/osd-device-0-data
54 | osd journal = /dev/disk/by-partlabel/osd-device-0-journal
55 |
--------------------------------------------------------------------------------
/example/wip-mark-testing/runtests.xfs.yaml:
--------------------------------------------------------------------------------
1 | cluster:
2 | user: 'nhm'
3 | head: "burnupiX"
4 | clients: ["burnupiY"]
5 | osds: ["burnupiX"]
6 | mons:
7 | burnupiY:
8 | a: "192.168.10.2:6789"
9 | osds_per_node: 4
10 | fs: 'xfs'
11 | mkfs_opts: '-f -i size=2048'
12 | mount_opts: '-o inode64,noatime,logbsize=256k'
13 | conf_file: '/home/nhm/src/cbt/example/wip-mark-testing/ceph.conf'
14 | iterations: 1
15 | use_existing: False
16 | clusterid: "ceph"
17 | tmp_dir: "/tmp/cbt"
18 | pool_profiles:
19 | radosbench:
20 | pg_size: 1024
21 | pgp_size: 1024
22 | replication: 3
23 | rbd:
24 | pg_size: 4096
25 | pgp_size: 4096
26 | replication: 3
27 | benchmarks:
28 | radosbench:
29 | op_size: [4194304, 131072, 4096]
30 | write_only: False
31 | time: 300
32 | concurrent_ops: [32]
33 | concurrent_procs: 4
34 | osd_ra: [4096]
35 | pool_profile: 'radosbench'
36 | librbdfio:
37 | time: 10
38 | vol_size: 2048
39 | mode: ['read', 'write', 'randread', 'randwrite', 'rw', 'randrw']
40 | rwmixread: 50
41 | op_size: [4194304, 131072, 4096]
42 | procs_per_volume: [1]
43 | volumes_per_client: [1]
44 | iodepth: [32]
45 | osd_ra: [4096]
46 | cmd_path: '/home/nhm/src/fio/fio'
47 | pool_profile: 'rbd'
48 | log_avg_msec: 100
49 | rbdfio:
50 | time: 10
51 | vol_size: 2048
52 | mode: ['read', 'write', 'randread', 'randwrite', 'rw', 'randrw']
53 | rwmixread: 50
54 | op_size: [4194304, 131072, 4096]
55 | concurrent_procs: [1]
56 | iodepth: [32]
57 | osd_ra: [4096]
58 | cmd_path: '/home/nhm/src/fio/fio'
59 | pool_profile: 'rbd'
60 | log_avg_msec: 100
61 | # Optionally disable fine-grained logging by fio
62 | log_iops: False
63 | log_bw: False
64 | log_lat: False
65 |
--------------------------------------------------------------------------------
/example/example-hsbench.yaml:
--------------------------------------------------------------------------------
1 | cluster:
2 | user: 'user'
3 | head: "localhost"
4 | clients: ["localhost"]
5 | osds: ["localhost"]
6 | mons:
7 | localhost:
8 | a: "127.0.0.1:6789"
9 | mgrs:
10 | localhost:
11 | a: ~
12 | rgws:
13 | localhost:
14 | client.radosgw.gateway:
15 | host: "127.0.0.1"
16 | osds_per_node: 1
17 | fs: 'xfs'
18 | mkfs_opts: '-f -i size=2048'
19 | mount_opts: '-o inode64,noatime,logbsize=256k'
20 | conf_file: '/home/user/ceph_tests/ceph.conf'
21 | iterations: 1
22 | use_existing: False
23 | clusterid: "ceph"
24 | tmp_dir: "/tmp/cbt"
25 | ceph-authtool_cmd: "/usr/local/bin/ceph-authtool"
26 | ceph-osd_cmd: "env -i TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728 /usr/local/bin/ceph-osd"
27 | ceph-rgw_cmd: "env -i TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728 /usr/local/bin/radosgw"
28 | ceph-mon_cmd: "/usr/local/bin/ceph-mon"
29 | ceph-run_cmd: "/usr/local/bin/ceph-run"
30 | rados_cmd: "/usr/local/bin/rados"
31 | ceph_cmd: "/usr/local/bin/ceph"
32 | rbd_cmd: "/usr/local/bin/rbd"
33 | ceph-mgr_cmd: "/usr/local/bin/ceph-mgr"
34 | radosgw-admin_cmd: "/usr/local/bin/radosgw-admin"
35 |
36 | osd_valgrind: "massif"
37 | pool_profiles:
38 | rgw:
39 | pg_size: 128
40 | pgp_size: 128
41 | replication: 1
42 | rgw_pools:
43 | control: rgw
44 | meta: rgw
45 | log: rgw
46 | buckets: rgw
47 | buckets_data: rgw
48 | buckets_index: rgw
49 |
50 | client_endpoints:
51 | hsbench:
52 | driver: 's3'
53 | endpoints_per_client: 1
54 |
55 | benchmarks:
56 | hsbench:
57 | cmd_path: '/home//go/src/github.com/markhpc/hsbench/hsbench'
58 | client_endpoints: 'hsbench'
59 | objects: 10000
60 | duration: -1
61 | buckets: 1
62 | threads: 16
63 | size: '4K'
64 | report_interval: 10
65 |
--------------------------------------------------------------------------------
/parsing/htmlgenerator.py:
--------------------------------------------------------------------------------
1 | import base64
2 | import zlib
3 | import urllib.request
4 | import urllib.parse
5 | import urllib.error
6 |
7 |
8 | class HTMLGenerator(object):
9 | def __init__(self):
10 | self.styles = []
11 | self.html = []
12 | self.scripts = []
13 |
14 | def encode(self, text):
15 | return base64.b64encode(zlib.compress(urllib.parse.quote(text), 9))
16 |
17 | def read_file(self, filename):
18 | f = open(filename, "r")
19 | text = f.read()
20 | f.close()
21 | return text
22 |
23 | def add_html(self, text):
24 | self.html.append(text)
25 |
26 | def add_style(self, text):
27 | self.styles.append('')
28 |
29 | def add_script(self, text):
30 | self.scripts.append('')
31 |
32 | def add_encoded_script(self, text):
33 | self.scripts.append('')
34 |
35 | def to_string(self):
36 | return '\n'.join(self.html + self.styles + self.scripts)
37 |
38 | def format_data(self, data):
39 | lines = []
40 | for row in data:
41 | tmprow = []
42 | for pair in row:
43 | first = '"' + pair[0] + '"'
44 | second = ''
45 | if isinstance(pair[1], float):
46 | second = "%.2f" % pair[1]
47 | elif isinstance(pair[1], str) or isinstance(pair[1], str):
48 | second = '"' + pair[1] + '"'
49 | else:
50 | second = pair[1]
51 | tmprow.append(str(first) + ':' + str(second))
52 | lines.append('{' + ', '.join(tmprow) + '}')
53 | return 'var dataSet = [' + ',\n'.join(lines) + '];\n'
54 |
--------------------------------------------------------------------------------
/tests/test_benchmarkfactory.py:
--------------------------------------------------------------------------------
1 | """ Unit tests for the Benchmarkfactory class """
2 |
3 | import unittest
4 | import benchmarkfactory
5 | from log_support import setup_loggers
6 |
7 |
8 | class TestBenchmarkFactory(unittest.TestCase):
9 | """ Sanity tests for Benchmarkfactory """
10 | def test_permutations_1(self):
11 | """ Basic sanity permutations """
12 | config = {"x": 12, "y": True, "z": {1: 2}, "t": [1, 2, 4]}
13 | cfgs = list(benchmarkfactory.all_configs(config))
14 | self.assertEqual(len(cfgs), 3)
15 | self.assertEqual([dict] * 3, list(map(type, cfgs)))
16 | tvals = []
17 |
18 | for cfg in cfgs:
19 | for field in 'xyz':
20 | self.assertEqual(cfg[field], config[field])
21 | tvals.append(cfg['t'])
22 |
23 | self.assertEqual(sorted(tvals), sorted(config['t']))
24 |
25 | def test_permutations_2(self):
26 | """ Basic sanity permutations """
27 | config = {"x": 12, "y": True, "z": {1: 2}, "t": [1, 2, 4], "j": [7, True, 'gg']}
28 | cfgs = list(benchmarkfactory.all_configs(config))
29 | self.assertEqual(len(cfgs), 9)
30 | self.assertEqual([dict] * 9, list(map(type, cfgs)))
31 |
32 | tjvals = []
33 |
34 | for cfg in cfgs:
35 | for field in 'xyz':
36 | self.assertEqual(cfg[field], config[field])
37 | tjvals.append((cfg['t'], cfg['j']))
38 |
39 | for tval in config['t']:
40 | for jval in config['j']:
41 | self.assertEqual(1, tjvals.count((tval, jval)))
42 |
43 | def test_permutations_0(self):
44 | """ Basic sanity permutations """
45 | config = {"x": 12, "y": True, "z": {1: 2}}
46 | cfgs = list(benchmarkfactory.all_configs(config))
47 | self.assertEqual(len(cfgs), 1)
48 | self.assertEqual(cfgs[0], config)
49 |
50 | if __name__ == '__main__':
51 | setup_loggers(log_fname='/tmp/cbt-utest.log')
52 | unittest.main()
53 |
--------------------------------------------------------------------------------
/post_processing/plotter/simple_plotter.py:
--------------------------------------------------------------------------------
1 | """
2 | A file containing the classes and code required to read a file stored in the common
3 | intermediate format introduced in PR 319 (https://github.com/ceph/cbt/pull/319) and
4 | produce a hockey-stick curve graph
5 | """
6 |
7 | from pathlib import Path
8 |
9 | import matplotlib.pyplot as plotter
10 |
11 | from post_processing.common import (
12 | DATA_FILE_EXTENSION,
13 | DATA_FILE_EXTENSION_WITH_DOT,
14 | PLOT_FILE_EXTENSION,
15 | read_intermediate_file,
16 | )
17 | from post_processing.plotter.common_format_plotter import CommonFormatPlotter
18 | from post_processing.types import COMMON_FORMAT_FILE_DATA_TYPE
19 |
20 |
21 | class SimplePlotter(CommonFormatPlotter):
22 | """
23 | Read the intermediate data file in the common json format and produce a hockey-stick
24 | curve plot that includes standard deviation error bars.
25 | """
26 |
27 | def __init__(self, archive_directory: str) -> None:
28 | # A Path object for the directory where the data files are stored
29 | self._path: Path = Path(f"{archive_directory}/visualisation")
30 |
31 | def draw_and_save(self) -> None:
32 | for file_path in self._path.glob(f"*{DATA_FILE_EXTENSION_WITH_DOT}"):
33 | file_data: COMMON_FORMAT_FILE_DATA_TYPE = read_intermediate_file(f"{file_path}")
34 | output_file_path: str = self._generate_output_file_name(files=[file_path])
35 | self._add_single_file_data_with_errorbars(plotter=plotter, file_data=file_data)
36 | self._add_title(plotter=plotter, source_files=[file_path])
37 | self._set_axis(plotter=plotter)
38 | self._save_plot(plotter=plotter, file_path=output_file_path)
39 | self._clear_plot(plotter=plotter)
40 |
41 | def _generate_output_file_name(self, files: list[Path]) -> str:
42 | # we know we will only ever be passed a single file name
43 | return f"{str(files[0])[:-len(DATA_FILE_EXTENSION)]}{PLOT_FILE_EXTENSION}"
44 |
--------------------------------------------------------------------------------
/post_processing/formatter/README.md:
--------------------------------------------------------------------------------
1 | # Formatter
2 |
3 | The formatter converts CBT output json files into the correct format for the rest of the post processing. It is
4 | a json file of the format:
5 |
6 | ```
7 | {
8 | : {
9 | bandwidth_bytes:
10 | blocksize:
11 | io_bytes:
12 | iops:
13 | latency:
14 | number_of_jobs:
15 | percentage_reads:
16 | percentage_writes:
17 | runtime_seconds:
18 | std_deviation:
19 | total_ios:
20 | }
21 | ...
22 | {
23 |
24 | }
25 | maximum_bandwidth:
26 | latency_at_max_bandwidth:
27 | maximum_iops:
28 | latency_at_max_iops:
29 | }
30 | ```
31 | A single file will be produced per block size used for the benchmark run.
32 |
33 | ## Standalone script
34 | A wrapper script has been provided for the formatter
35 | ```
36 | fio_common_output_wrapper.py --archive=
37 | --results_file_root=
38 | ```
39 | where
40 | - `--archive` Required. the archive directory given to CBT for the benchmark run.
41 | - `--results_file_root` Optional. the name of the results file to process, without the extension. This defaults to `json_output`,
42 | which is the default for CBT runs, if not specified
43 |
44 | Full help text is provided by using `--help` with the script
45 |
46 | ## Output
47 | A directory called `visualisation` will be created in the directory specified by `--archive` that contains all the processed files.
48 | There will be one file per blocksize used for the benchmark run.
49 |
50 | ## Example
51 |
52 | ```bash
53 | PYTHONPATH=/cbt /cbt/tools/fio_common_output_wrapper.py --archive="/tmp/ch_cbt_run" --results_file_root="ch_json_result"
54 | ```
--------------------------------------------------------------------------------
/example/bluestore/runtests.bluestore_example.yaml:
--------------------------------------------------------------------------------
1 | cluster:
2 | user: 'ubuntu'
3 | head: "incerta01.front.sepia.ceph.com"
4 | clients: ["incerta01.front.sepia.ceph.com", "incerta02.front.sepia.ceph.com", "incerta03.front.sepia.ceph.com", "incerta04.front.sepia.ceph.com"]
5 | osds: ["incerta01.front.sepia.ceph.com", "incerta02.front.sepia.ceph.com", "incerta03.front.sepia.ceph.com", "incerta04.front.sepia.ceph.com"]
6 | mons:
7 | incerta01.front.sepia.ceph.com:
8 | a: "10.0.10.101:6789"
9 | osds_per_node: 4
10 | fs: 'xfs'
11 | mkfs_opts: '-f -i size=2048'
12 | mount_opts: '-o inode64,noatime'
13 | conf_file: '/home/nhm/incerta/ceph.conf.bluestore_example'
14 | iterations: 1
15 | use_existing: False
16 | clusterid: "ceph"
17 | tmp_dir: "/tmp/cbt"
18 | ceph-osd_cmd: "env -i TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728 /usr/local/bin/ceph-osd"
19 | ceph-mon_cmd: "/usr/local/bin/ceph-mon"
20 | ceph-run_cmd: "/usr/local/bin/ceph-run"
21 | rados_cmd: "/usr/local/bin/rados"
22 | ceph_cmd: "/usr/local/bin/ceph"
23 | rbd_cmd: "/usr/local/bin/rbd"
24 |
25 | pool_profiles:
26 | rbd:
27 | pg_size: 8192
28 | pgp_size: 8192
29 | replication: 3
30 |
31 | benchmarks:
32 | librbdfio:
33 | time: 300
34 | vol_size: 32768
35 | mode: ['read', 'write', 'randread', 'randwrite', 'rw', 'randrw']
36 | rwmixread: 50
37 | op_size: [4194304, 2097152, 1048576, 524288, 262144, 131072, 65536, 32768, 16384, 8192, 4096]
38 | procs_per_volume: [1]
39 | volumes_per_client: [2]
40 | iodepth: [32]
41 | osd_ra: [4096]
42 | cmd_path: '/home/ubuntu/src/fio/fio'
43 | pool_profile: 'rbd'
44 | log_avg_msec: 100
45 | # Optionally disable fine-grained logging by fio
46 | log_iops: False
47 | log_bw: False
48 | log_lat: False
49 | # Optionally, set the rbd and pool names
50 | poolname: 'rbd'
51 | rbdname: 'img01'
52 | # use_existing_volumes needs to be true to set the pool and rbd names
53 | use_existing_volumes: True
54 |
--------------------------------------------------------------------------------
/tools/fio-parse-jsons/README.md:
--------------------------------------------------------------------------------
1 | # fio-parse-jsons.py - a FIO post processing tool.
2 |
3 | ## Description:
4 |
5 | This is a standalone tool to assist the post processing of JSON outout files from CBT when running the FIO benchmark.
6 |
7 | The execution of the script produces as output:
8 |
9 | 1. a gnuplot script,
10 | 2. a .dat file with the data to plot,
11 | 3. a summary table of FIO results in wiki format, printed to stdout.
12 |
13 | This is especially useful to produce a response graph from a set of executions ranging the number of FIO jobs and the iodepth values.
14 | The script was written before knowledge of CBT was gained, so in a way is independent of the script driving the tests.
15 | A future PR would integrate the functionality of this standalone script with that of CBT.
16 |
17 | ## Requirements:
18 |
19 | Besides the yaml and xml Python modules that CBT already depends upon, you need to install the package gnuplot according to your Linux distro.
20 |
21 | ## Usage:
22 |
23 | The following is an example of the execution of the script:
24 |
25 | ```bash
26 | # python3 /cbt/tools/fio-parse-jsons.py -c crimson200gb_1procs_randwrite_list -t 'Crimson 200GB RBD 4k rw' -a crimson4cores_200gb_1img_4k_1procs_randwrite_avg.json
27 | ```
28 |
29 | the arguments are:
30 |
31 | - `-c config_file:`a txt file containing the list of FIO output JSON file to process,
32 | - `-t title:` the string ot use as title for the gnuplot chart,
33 | - `-a cpu_avg.json:` a .json file containing the avg CPU utilisation, normally produced by the script parse-top.pl.
34 |
35 | The following are the .dat and gnuplot files produced:
36 |
37 | ```bash
38 | crimson200gb_1procs_randwrite.dat
39 | crimson200gb_1procs_randwrite.plot
40 | ```
41 |
42 | To produce the chart, simply execute
43 |
44 | ```bash
45 | gnuplot classic200gb_1procs_randwrite.plot
46 | ```
47 |
48 | the IOPs vs latency chart result is shown below:
49 |
50 | 
51 |
52 |
--------------------------------------------------------------------------------
/example/bluestore/mkpart_hdd_nvme_bs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | JPD=8
4 |
5 | failed()
6 | {
7 | sleep 2 # Wait for the kernel to stop whining
8 | echo "Hrm, that didn't work. Calling for help."
9 | # sudo ipmitool chassis identify force
10 | echo "RAID Config failed: ${1}"
11 | while [ 1 ]; do sleep 10; done
12 | exit 1;
13 | }
14 |
15 | # First, look for the system disk so we avoid touching it.
16 | SYSPART=`df | grep "/$" | cut -d" " -f1 | cut -d"/" -f3`
17 | #SYSPART=`sudo pvs | grep "/dev/" | cut -f3 -d" " | sed -e 's/[0-9]*$//g'`
18 | echo "System on $SYSPART"
19 |
20 | # Remove the partition label symlinks
21 | sudo rm /dev/disk/by-partlabel/osd-device*
22 |
23 | echo "Making label on OSD devices"
24 | i=0
25 |
26 | # Next, Make the OSD data partitions. In this case we search for the seagate disks in the node.
27 | for DEV in `ls -al /dev/disk/by-id/ata-ST9* | grep -v "part" | cut -f7 -d"/" | tr '\n' ' '`
28 | do
29 | if [[ ! $SYSPART =~ $DEV ]] && [ $i -lt 37 ]
30 | then
31 | sudo parted -s -a optimal /dev/$DEV mklabel gpt || failed "mklabel $DEV"
32 | echo "Creating osd device $i data label"
33 | sudo parted -s -a optimal /dev/$DEV mkpart osd-device-$i-data 0G 10G || failed "mkpart $i-data"
34 | sudo parted -s -a optimal /dev/$DEV mkpart osd-device-$i-block 10G 100% || failed "mkpart $i-block"
35 | let "i++"
36 | fi
37 | done
38 |
39 | j=0;
40 | for DEV in `ls -al /dev/nvme*n1 | cut -f3 -d"/" | tr '\n' ' '`
41 | do
42 | sudo parted -s -a optimal /dev/$DEV mklabel gpt || failed "mklabel $DEV"
43 | for ((k=0; k < $JPD; k++ ))
44 | do
45 | if [[ ! $SYSPART =~ $DEV ]] && [ $j -lt $i ]
46 | then
47 | echo "Creating osd device $j journal label"
48 | sudo parted -s -a optimal /dev/$DEV mkpart osd-device-$j-wal $(( 10 * $k ))G $(( 10 * $(($k)) + 2))G || failed "mkpart $j-wal"
49 | sudo parted -s -a optimal /dev/$DEV mkpart osd-device-$j-db $(( 10 * $(($k)) + 2 ))G $(( 10 * $(($k + 1)) ))G || failed "mkpart $j-db"
50 | let "j++"
51 | fi
52 | done
53 | done
54 |
--------------------------------------------------------------------------------
/tools/invariant.yaml:
--------------------------------------------------------------------------------
1 | # Dummy yaml for the generator serialiser --DO NOT CHANGE!
2 | cluster:
3 | user: 'user'
4 | head: "localhost"
5 | clients: ["localhost"]
6 | osds: ["localhost"]
7 | archive_dir: '/tmp'
8 | rgws:
9 | localhost:
10 | client.radosgw.gateway:
11 | host: "127.0.0.1"
12 | osds_per_node: 1
13 | fs: 'xfs'
14 | mkfs_opts: '-f -i size=2048'
15 | mount_opts: '-o inode64,noatime,logbsize=256k'
16 | conf_file: '/etc/ceph/ceph.conf'
17 | tmp_conf: '/etc/ceph/ceph.conf' # used by hsbench
18 | iterations: 1
19 | use_existing: False
20 | clusterid: "ceph"
21 | tmp_dir: "/tmp/cbt"
22 | ceph-authtool_cmd: "/usr/local/bin/ceph-authtool"
23 | ceph-osd_cmd: "env -i TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728 /usr/local/bin/ceph-osd"
24 | ceph-rgw_cmd: "env -i TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728 /usr/local/bin/radosgw"
25 | ceph-mon_cmd: "/usr/local/bin/ceph-mon"
26 | ceph-run_cmd: "/usr/local/bin/ceph-run"
27 | rados_cmd: "/usr/local/bin/rados"
28 | ceph_cmd: "/usr/local/bin/ceph"
29 | rbd_cmd: "/usr/local/bin/rbd"
30 | ceph-mgr_cmd: "/usr/local/bin/ceph-mgr"
31 | radosgw-admin_cmd: "/usr/local/bin/radosgw-admin"
32 |
33 | osd_valgrind: "massif"
34 | pool_profiles:
35 | rgw:
36 | pg_size: 128
37 | pgp_size: 128
38 | replication: 1
39 | rgw_pools:
40 | control: rgw
41 | meta: rgw
42 | log: rgw
43 | buckets: rgw
44 | buckets_data: rgw
45 | buckets_index: rgw
46 |
47 | client_endpoints:
48 | hsbench:
49 | driver: 's3'
50 | endpoints_per_client: 1
51 |
52 | benchmarks:
53 | hsbench:
54 | cmd_path: '/home//go/src/github.com/markhpc/hsbench/hsbench'
55 | client_endpoints: 'hsbench'
56 | objects: 10000
57 | duration: -1
58 | buckets: 1
59 | threads: 16
60 | size: '4K'
61 | report_interval: 10
62 | osd_ra: 0
63 | rbdfio:
64 | osd_ra: 0
65 | tmp_conf: '/etc/ceph/ceph.conf'
66 | radosbench:
67 | rados_cmd: '/home/rzarzynski/ceph-1/build/bin/rados'
68 |
--------------------------------------------------------------------------------
/docs/Workloads.md:
--------------------------------------------------------------------------------
1 | # Workloads
2 |
3 | A workload is the specification of a sequence of tests to be executed in the order given.
4 | Typically this involves a *range* of values for a specific benchmark argument. The most used is
5 | the *queue depth*. Depending of the benchmark, this can be expressed as a function of the number
6 | of jobs (or threads, or processes), such that the increase number of these causes a proportional
7 | increase in the I/O. Specifiying workloads in this way permits to generate *response latency curves*
8 | from the results.
9 |
10 | The workload feature is currently supported for `librbdfio` only.
11 |
12 | 
13 |
14 | * A `workloads` section is composed by a non-empty collection. Each item in the workload has a free name,
15 | and contains in turn a collection of valid options with values for the benchmark.
16 | * For each of the `iodepth` and `numjobs` options, a range of integer values is permitted.
17 |
18 | During execution, any of the given values for the benchmark options in the global section are overwritten
19 | by the given values within the current test workload. The global values are restored once the workload test
20 | completes.
21 |
22 | As an example, the following specifies two workloads:
23 |
24 | * the first is named `precondition` and consists of executing a random write over a queue depth of 4,
25 | (that is, the product of numjobs and iodepth), and indicates that monitoring should be disabled during the
26 | execution of the workload,
27 | * the second is named test1, and specifies a random read over the combinatorial of the provided sequences for
28 | the numjobs and iodepth, resp. That is, (1,1), (1,4), (1,8) .. (8,8).
29 |
30 |
31 | ```yaml
32 |
33 | workloads:
34 | precondition:
35 | jobname: 'precond1rw'
36 | mode: 'randwrite'
37 | numjobs: [ 1 ]
38 | iodepth: [ 4 ]
39 | monitor: False # whether to run the monitors along the test
40 | test1:
41 | jobname: 'rr'
42 | mode: 'randread'
43 | numjobs: [ 1, 4, 8 ]
44 | iodepth: [ 1, 4, 8 ]
45 |
46 | ```
47 |
--------------------------------------------------------------------------------
/example/example-client_endpoints.yaml:
--------------------------------------------------------------------------------
1 | cluster:
2 | user: 'perf'
3 | head: "incerta01"
4 | clients: ["incerta01"]
5 | osds: ["incerta02"]
6 | mons:
7 | incerta01:
8 | a: "10.0.10.101:6789"
9 | mgrs:
10 | incerta01:
11 | a: ~
12 | mdss:
13 | incerta01:
14 | a: ~
15 | osds_per_node: 1
16 | fs: 'xfs'
17 | mkfs_opts: '-f -i size=2048'
18 | mount_opts: '-o inode64,noatime,logbsize=256k'
19 | conf_file: '/home/perf/ceph_tests/ceph.conf.64.async'
20 | iterations: 1
21 | use_existing: False
22 | clusterid: "ceph"
23 | tmp_dir: "/tmp/cbt"
24 | ceph-osd_cmd: "env -i TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES=134217728 /usr/local/bin/ceph-osd"
25 | ceph-mon_cmd: "/usr/local/bin/ceph-mon"
26 | ceph-run_cmd: "/usr/local/bin/ceph-run"
27 | rados_cmd: "/usr/local/bin/rados"
28 | ceph_cmd: "/usr/local/bin/ceph"
29 | ceph-fuse_cmd: "/usr/local/bin/ceph-fuse"
30 | rbd_cmd: "/usr/local/bin/rbd"
31 | rbd-nbd_cmd: "/usr/local/bin/rbd-nbd"
32 | rbd-fuse_cmd: "/usr/local/bin/rbd-fuse"
33 | ceph-mgr_cmd: "/usr/local/bin/ceph-mgr"
34 | ceph-mds_cmd: "/usr/local/bin/ceph-mds"
35 | osd_valgrind: "massif"
36 | pool_profiles:
37 | replication:
38 | pg_size: 256
39 | pgp_size: 256
40 | replication: 1
41 | ec21:
42 | pg_size: 2048
43 | pgp_size: 2048
44 | replication: 'erasure'
45 | erasure_profile: 'ec21'
46 | erasure_profiles:
47 | ec21:
48 | erasure_k: 2
49 | erasure_m: 1
50 | cephfs_pools:
51 | cephfs_data: replication
52 | cephfs_metadata: replication
53 |
54 | client_endpoints:
55 | fiotest:
56 | driver: 'librbd'
57 | # driver: 'rbd-kernel'
58 | # driver: 'rbd-nbd'
59 | # driver: 'rbd-fuse'
60 | # driver: 'rbd-tcmu'
61 | # driver: 'cephfs-kernel'
62 | # driver: 'cephfs-fuse'
63 | endpoints_per_client: 1
64 | endpoint_size: 524288
65 | pool_profile: replication
66 |
67 | benchmarks:
68 | fio:
69 | client_endpoints: 'fiotest'
70 | time: 300
71 | time_based: True
72 | norandommap: True
73 | size: 262144
74 | mode: ['read', 'write', 'randread','randwrite']
75 | rwmixread: 50
76 | op_size: [4194304, 131072, 4096]
77 | procs_per_endpoint: [1]
78 | iodepth: [32]
79 | osd_ra: [4096]
80 | cmd_path: '/home/perf/src/fio/fio'
81 | log_avg_msec: 100
82 |
83 |
--------------------------------------------------------------------------------
/client_endpoints_factory.py:
--------------------------------------------------------------------------------
1 | import settings
2 |
3 | from cluster.ceph import Ceph
4 |
5 | from client_endpoints.librbd_client_endpoints import LibrbdClientEndpoints
6 | from client_endpoints.rbdkernel_client_endpoints import RbdKernelClientEndpoints
7 | from client_endpoints.rbdnbd_client_endpoints import RbdNbdClientEndpoints
8 | from client_endpoints.rbdfuse_client_endpoints import RbdFuseClientEndpoints
9 | from client_endpoints.rbdtcmu_client_endpoints import RbdTcmuClientEndpoints
10 | from client_endpoints.cephfskernel_client_endpoints import CephfsKernelClientEndpoints
11 | from client_endpoints.cephfsfuse_client_endpoints import CephfsFuseClientEndpoints
12 | from client_endpoints.rgws3_client_endpoints import RgwS3ClientEndpoints
13 | ce_objects = {}
14 |
15 | def get(cluster, name):
16 | if isinstance(cluster, Ceph):
17 | return get_ceph(cluster, name)
18 |
19 | def get_ceph(cluster, name):
20 | ce_config = settings.client_endpoints.get(name, None)
21 |
22 | if ce_config == None:
23 | raise ValueError('No client_endpoints with name "%s" found.' % name)
24 |
25 | cclass = cluster.getclass()
26 | key = "%s-%s" % (cclass, name)
27 |
28 | if key in ce_objects:
29 | return ce_objects[key]
30 |
31 | driver = ce_config.get('driver', None)
32 | if driver is None:
33 | raise ValueError('No driver defined in the "%s" client_endpoints.' % name)
34 | elif driver == "librbd":
35 | ce_objects[key] = LibrbdClientEndpoints(cluster, ce_config)
36 | elif driver == "rbd-kernel":
37 | ce_objects[key] = RbdKernelClientEndpoints(cluster, ce_config)
38 | elif driver == "rbd-nbd":
39 | ce_objects[key] = RbdNbdClientEndpoints(cluster, ce_config)
40 | elif driver == "rbd-fuse":
41 | ce_objects[key] = RbdFuseClientEndpoints(cluster, ce_config)
42 | elif driver == "rbd-tcmu":
43 | ce_objects[key] = RbdTcmuClientEndpoints(cluster, ce_config)
44 | elif driver == "cephfs-kernel":
45 | ce_objects[key] = CephfsKernelClientEndpoints(cluster, ce_config)
46 | elif driver == "cephfs-fuse":
47 | ce_objects[key] = CephfsFuseClientEndpoints(cluster, ce_config)
48 | elif driver == "s3":
49 | ce_objects[key] = RgwS3ClientEndpoints(cluster, ce_config)
50 | else:
51 | raise ValueError('%s clusters do not support "%s" client_endpoints.' % (cclass, driver))
52 | return ce_objects[key]
53 |
--------------------------------------------------------------------------------
/benchmarkfactory.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import itertools
3 |
4 | import settings
5 | from benchmark.radosbench import Radosbench
6 | from benchmark.fio import Fio
7 | from benchmark.hsbench import Hsbench
8 | from benchmark.rbdfio import RbdFio
9 | from benchmark.rawfio import RawFio
10 | from benchmark.kvmrbdfio import KvmRbdFio
11 | from benchmark.librbdfio import LibrbdFio
12 | from benchmark.nullbench import Nullbench
13 | from benchmark.cosbench import Cosbench
14 | from benchmark.cephtestrados import CephTestRados
15 | from benchmark.getput import Getput
16 |
17 | def get_all(archive, cluster, iteration):
18 | for benchmark, config in sorted(settings.benchmarks.items()):
19 | default = {"benchmark": benchmark,
20 | "iteration": iteration}
21 | for current in all_configs(config):
22 | current.update(default)
23 | yield get_object(archive, cluster, benchmark, current)
24 |
25 |
26 | def all_configs(config):
27 | """
28 | return all parameter combinations for config
29 | config: dict - list of params
30 | iterate over all top-level lists in config
31 | """
32 | cycle_over_lists = []
33 | cycle_over_names = []
34 | default = {}
35 |
36 | for param, value in list(config.items()):
37 | # acceptable applies to benchmark as a whole, no need to it to
38 | # the set for permutation
39 | if param == 'acceptable':
40 | default[param] = value
41 | elif isinstance(value, list):
42 | cycle_over_lists.append(value)
43 | cycle_over_names.append(param)
44 | else:
45 | default[param] = value
46 |
47 | for permutation in itertools.product(*cycle_over_lists):
48 | current = copy.deepcopy(default)
49 | current.update(list(zip(cycle_over_names, permutation)))
50 | yield current
51 |
52 | def get_object(archive, cluster, benchmark, bconfig):
53 | benchmarks = {
54 | 'nullbench': Nullbench,
55 | 'radosbench': Radosbench,
56 | 'fio': Fio,
57 | 'hsbench': Hsbench,
58 | 'rbdfio': RbdFio,
59 | 'kvmrbdfio': KvmRbdFio,
60 | 'rawfio': RawFio,
61 | 'librbdfio': LibrbdFio,
62 | 'cosbench': Cosbench,
63 | 'cephtestrados': CephTestRados,
64 | 'getput': Getput}
65 | try:
66 | return benchmarks[benchmark](archive, cluster, bconfig)
67 | except KeyError:
68 | return None
69 |
--------------------------------------------------------------------------------
/tools/fio_common_output_wrapper.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """
4 | Usage:
5 | fio_common_output_wrapper.py --archive=
6 | --results_file_root=
7 |
8 | Input:
9 | --archive [Required] The achive directory that contains the
10 | results filed from an fio run of cbt
11 |
12 | --results_file_root [Optional] The base name for the json output files
13 | produced from an fio run in cbt.
14 | Default: "json_output"
15 |
16 | Examples:
17 | fio_common_output_wrapper.py --archive="/tmp/ch_cbt_run"
18 |
19 | fio_common_output_wrapper.py --archive="/tmp/ch_cbt_run" --results_file_root="ch_json_result"
20 | """
21 |
22 | import subprocess
23 | from argparse import ArgumentParser, Namespace
24 | from logging import Logger, getLogger
25 |
26 | from post_processing.formatter.common_output_formatter import CommonOutputFormatter
27 |
28 | log: Logger = getLogger()
29 |
30 |
31 | def main() -> int:
32 | """
33 | Main routine for the script
34 | """
35 |
36 | result: int = 0
37 |
38 | parser: ArgumentParser = ArgumentParser(description="Parse cbt json output into a common format")
39 | parser.add_argument("--archive", type=str, required=True, help="The archive directory used for the CBT results")
40 | parser.add_argument(
41 | "--results_file_root",
42 | type=str,
43 | required=False,
44 | default="json_output*",
45 | help="The filename root of all the CBT output json files",
46 | )
47 |
48 | args: Namespace = parser.parse_args()
49 |
50 | output_directory: str = f"{args.archive}/visualisation/"
51 | subprocess.run(f"mkdir -p -m0755 {output_directory}", shell=True)
52 |
53 | formatter: CommonOutputFormatter = CommonOutputFormatter(
54 | archive_directory=args.archive, filename_root=args.results_file_root
55 | )
56 |
57 | try:
58 | formatter.convert_all_files()
59 | formatter.write_output_file()
60 | except Exception as e:
61 | log.error(
62 | "Encountered an error parsing results in directory %s with name %s"
63 | % (args.archive, args.results_file_root)
64 | )
65 | log.exception(e)
66 | result = 1
67 |
68 | return result
69 |
70 |
71 | if __name__ == "__main__":
72 | main()
73 |
--------------------------------------------------------------------------------
/include/js/table.js:
--------------------------------------------------------------------------------
1 | var keys = d3.keys(dataSet[0]);
2 |
3 | var mins = {}
4 | var maxes = {}
5 | dataSet.forEach(function(item) {
6 | var mean = d3.mean(d3.values(item).slice(3));
7 | var deviation = d3.deviation(d3.values(item).slice(3));
8 | var minmax_key = d3.values(item).slice(0,3).join("");
9 | // console.log(minmax_key);
10 | mins[minmax_key] = mean-deviation;
11 | maxes[minmax_key] = mean+deviation;
12 | });
13 | //console.log(mins);
14 | //console.log(maxes);
15 |
16 | var thead = d3.select("#view > thead")
17 | var th = thead.selectAll("th")
18 | .data(keys)
19 | .enter()
20 | .append('th')
21 | .text(function(d){ return d })
22 |
23 | var tbody = d3.select("#view > tbody");
24 |
25 | var tr = tbody.selectAll("tr")
26 | .data(dataSet)
27 | .enter()
28 | .append('tr')
29 | .selectAll('td')
30 | .data(function (row) {
31 | key = d3.values(row).slice(0,3).join("")
32 | dataArray = d3.entries(row);
33 | dataArray.forEach(function(data) {
34 | data["min"] = mins[key];
35 | data["max"] = maxes[key];
36 | });
37 | // console.log(dataArray);
38 | return dataArray;
39 |
40 | })
41 | .enter()
42 | .append('td')
43 | .append('div')
44 | .style({
45 | "background-color": function(d, i){
46 | if(i < 3) return "lightblue";
47 | console.log(d);
48 | if (d.min === 0 && d.max === 0) {
49 | return "lightgrey";
50 | }
51 | return makecolor(d.value, d.min, d.max);
52 | },
53 | })
54 | .text(function(d){
55 | return d.value
56 | });
57 |
58 | function makecolor(val, min, max) {
59 | var red = 255;
60 | var green = 255;
61 | if(val < min) {
62 | green = 0;
63 | } else if(val < min+((max-min)/2.0)) {
64 | green = Math.round(((val-min)/((max-min)/2.0)) * 255);
65 | } else if(val < max) {
66 | red = Math.round(((max-val)/((max-min)/2.0)) * 255);
67 | } else {
68 | red = 0;
69 | }
70 | return "#" + rgb2hex(red,green,0);
71 | }
72 |
73 | function rgb2hex(r,g,b) {
74 | if (g !== undefined)
75 | return Number(0x1000000 + r*0x10000 + g*0x100 + b).toString(16).substring(1);
76 | else
77 | return Number(0x1000000 + r[0]*0x10000 + r[1]*0x100 + r[2]).toString(16).substring(1);
78 | }
79 |
80 |
--------------------------------------------------------------------------------
/log_support.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import yaml
4 |
5 | has_a_tty = os.isatty(1) # test stdout
6 |
7 |
8 | def load_run_params(run_params_file):
9 | with open(run_params_file) as fd:
10 | dt = yaml.load(fd)
11 |
12 | return dict(run_uuid=dt['run_uuid'],
13 | comment=dt.get('comment'))
14 |
15 |
16 | def color_me(color):
17 | RESET_SEQ = "\033[0m"
18 | COLOR_SEQ = "\033[1;%dm"
19 |
20 | color_seq = COLOR_SEQ % (30 + color)
21 |
22 | def closure(msg):
23 | return color_seq + msg + RESET_SEQ
24 | return closure
25 |
26 |
27 | class ColoredFormatter(logging.Formatter):
28 | BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = list(range(8))
29 |
30 | colors = {
31 | 'WARNING': color_me(YELLOW),
32 | 'DEBUG': color_me(BLUE),
33 | 'CRITICAL': color_me(RED),
34 | 'ERROR': color_me(RED),
35 | 'INFO': color_me(GREEN)
36 | }
37 |
38 | def __init__(self, msg, use_color=True, datefmt=None):
39 | logging.Formatter.__init__(self, msg, datefmt=datefmt)
40 | self.use_color = use_color
41 |
42 | def format(self, record):
43 | orig = record.__dict__
44 | record.__dict__ = record.__dict__.copy()
45 | levelname = record.levelname
46 |
47 | prn_name = levelname + ' ' * (8 - len(levelname))
48 | if (levelname in self.colors) and has_a_tty:
49 | record.levelname = self.colors[levelname](prn_name)
50 | else:
51 | record.levelname = prn_name
52 |
53 | # super doesn't work here in 2.6 O_o
54 | res = logging.Formatter.format(self, record)
55 | # res = super(ColoredFormatter, self).format(record)
56 |
57 | # restore record, as it will be used by other formatters
58 | record.__dict__ = orig
59 | return res
60 |
61 |
62 | def setup_loggers(def_level=logging.DEBUG, log_fname=None):
63 | logger = logging.getLogger('cbt')
64 | logger.setLevel(logging.DEBUG)
65 | sh = logging.StreamHandler()
66 | sh.setLevel(def_level)
67 |
68 | log_format = '%(asctime)s - %(levelname)s - %(name)-8s - %(message)s'
69 | colored_formatter = ColoredFormatter(log_format, datefmt="%H:%M:%S")
70 |
71 | sh.setFormatter(colored_formatter)
72 | logger.addHandler(sh)
73 |
74 | if log_fname is not None:
75 | fh = logging.FileHandler(log_fname)
76 | formatter = logging.Formatter(log_format, datefmt="%H:%M:%S")
77 | fh.setFormatter(formatter)
78 | fh.setLevel(logging.DEBUG)
79 | logger.addHandler(fh)
80 | else:
81 | fh = None
82 |
--------------------------------------------------------------------------------
/benchmark/lis.py:
--------------------------------------------------------------------------------
1 | import operator as op
2 |
3 | # a mini s-expr interpreter
4 | # inspired by https://norvig.com/lispy.html
5 |
6 | Symbol = str
7 | List = list
8 |
9 |
10 | class Lispy:
11 | @staticmethod
12 | def _tokenize(s):
13 | return s.replace('(', ' ( ').replace(')', ' ) ').split()
14 |
15 | @staticmethod
16 | def _atom(token):
17 | try:
18 | return int(token)
19 | except ValueError:
20 | try:
21 | return float(token)
22 | except ValueError:
23 | return Symbol(token)
24 |
25 | def _read_from_tokens(self, tokens):
26 | if len(tokens) == 0:
27 | raise SyntaxError('unexpected EOF while reading')
28 | token = tokens.pop(0)
29 | if token == '(':
30 | stmt = []
31 | while tokens[0] != ')':
32 | stmt.append(self._read_from_tokens(tokens))
33 | tokens.pop(0) # pop off ')'
34 | return stmt
35 | elif token == ')':
36 | raise SyntaxError('unexpected ")"')
37 | else:
38 | return self._atom(token)
39 |
40 | def parse(self, s):
41 | return self._read_from_tokens(self._tokenize(s))
42 |
43 | def eval(self, stmt, env):
44 | if isinstance(stmt, Symbol):
45 | return env.eval(stmt)
46 | elif isinstance(stmt, List):
47 | func = self.eval(stmt[0], env)
48 | args = [self.eval(exp, env) for exp in stmt[1:]]
49 | return func(*args)
50 | else:
51 | return stmt
52 |
53 |
54 | class Env(dict):
55 | @staticmethod
56 | def near(lhs, rhs, abs_error):
57 | if rhs == 0:
58 | return lhs == rhs
59 | else:
60 | return (abs(lhs - rhs) / float(rhs)) <= abs_error
61 |
62 | def __init__(self, outer, **locals):
63 | if locals:
64 | self.update(locals)
65 | self.outer = outer
66 | # pass 'result' and 'baseline' to some functions
67 | # TODO: return "goodness" instead of a boolean
68 | self.update({
69 | 'less': lambda: self.eval('result') < self.eval('baseline'),
70 | 'greater': lambda: self.eval('result') > self.eval('baseline'),
71 | 'near': lambda abs_error: self.near(self.eval('result'),
72 | self.eval('baseline'),
73 | abs_error),
74 | 'or': op.or_})
75 |
76 | def find(self, var):
77 | if var in self:
78 | return self
79 | elif self.outer:
80 | return self.outer.find(var)
81 | else:
82 | raise NameError(var)
83 |
84 | def eval(self, var):
85 | return self.find(var)[var]
86 |
--------------------------------------------------------------------------------
/parsing/database.py:
--------------------------------------------------------------------------------
1 | import sqlite3
2 |
3 | conn = sqlite3.connect(':memory:')
4 |
5 | FORMAT = ['hash', 'testname', 'iteration', 'benchmark', 'osdra', 'opsize', 'cprocs', 'iodepth', 'testtype', 'writebw', 'readbw']
6 | TYPES = {'hash': 'text primary key', 'testname': 'text', 'iteration': 'integer', 'benchmark': 'text', 'osdra': 'integer', 'opsize': 'integer', 'cprocs': 'integer', 'iodepth': 'integer', 'testtype': 'text', 'writebw': 'real', 'readbw': 'real'}
7 |
8 |
9 | def create_db():
10 | c = conn.cursor()
11 | q = 'CREATE TABLE if not exists results ('
12 | values = []
13 | for key in FORMAT:
14 | values.append("%s %s" % (key, TYPES[key]))
15 | q += ', '.join(values) + ')'
16 | print(q)
17 | c.execute(q)
18 | conn.commit()
19 |
20 |
21 | def insert(values):
22 | c = conn.cursor()
23 | c.execute('INSERT INTO results VALUES (?, ?, ?, ?, ?, ?, ?, ?, ? ,?, ?)', values)
24 | conn.commit()
25 |
26 |
27 | def update_readbw(hashval, bw):
28 | c = conn.cursor()
29 | c.execute('UPDATE results SET readbw = readbw + ? WHERE hash = ?', (bw, hashval))
30 | conn.commit()
31 |
32 |
33 | def update_writebw(hashval, bw):
34 | c = conn.cursor()
35 | c.execute('UPDATE results SET writebw = writebw + ? WHERE hash = ?', (bw, hashval))
36 | conn.commit()
37 |
38 |
39 | def get_values(column):
40 | c = conn.cursor()
41 | # Careful here, this could lead to an SQL injection but appears necessary
42 | # since bindings can't be used for column names.
43 | c.execute('SELECT distinct %s FROM results ORDER BY %s' % (column, column))
44 | return [item[0] for item in c.fetchall()]
45 |
46 |
47 | def fetch_table(params):
48 | c = conn.cursor()
49 | distincts = {}
50 |
51 | for param in params:
52 | distincts[param] = get_values(param)
53 |
54 | c.execute('SELECT testname,%s,readbw,writebw FROM results ORDER BY %s,testname' % (','.join(params), ','.join(params)))
55 | testnames = get_values('testname')
56 |
57 | table = []
58 | writerow = []
59 | readrow = []
60 | for row in c.fetchall():
61 | # Check to make sure we aren't missing a test
62 | while row[0] != testnames[len(writerow)]:
63 | blank = ['%s' % testnames[len(writerow)], '']
64 | writerow.append(blank)
65 | readrow.append(blank)
66 | writerow.append([row[0], row[-1]])
67 | readrow.append([row[0], row[-2]])
68 | if len(writerow) == len(testnames):
69 | pre = []
70 | for i in range(0, len(params)):
71 | pre.append([params[i], row[i + 1]])
72 | table.append(pre + [['optype', 'write']] + writerow)
73 | table.append(pre + [['optype', 'read']] + readrow)
74 | writerow = []
75 | readrow = []
76 | return table
77 |
--------------------------------------------------------------------------------
/docs/TestPlanSchema.md:
--------------------------------------------------------------------------------
1 | # Test plan schema
2 |
3 | A valid test plan .yaml consists of the following compulsory sections at the top level (the level is
4 | indicated by the indentation in .yaml: the top level has 0 indentation):
5 |
6 | * `cluster`
7 | * `benchmarks`.
8 |
9 | It may also have the following optional sections at the same level:
10 |
11 | * `monitoring_profile`
12 | * `client_endpoints`.
13 |
14 | 
15 |
16 | ## `cluster`
17 |
18 | The cluster section enumerates the components of the Ceph cluster relevant to CBT. There are two
19 | general classes of components:
20 |
21 | * scalars: for example names whose value is a string, a numeric or a boolean;
22 | * collections: components that in turn contain further information, for example profile of pool
23 | replication.
24 |
25 | The following are scalar compulsory entities:
26 | * a head node: this is a string indicating the node that starts the cluster.
27 | * a list of clients, each a string, representing a ssh-reachable host that has a benchmark
28 | executable installed,
29 | * a list of osds nodes, each of which has at least a running OSD process.
30 |
31 | 
32 |
33 |
34 | ## `benchmarks`
35 |
36 | The benchmarks section consists of a non-empty list of collections, each describing a benchmark
37 | entity.
38 |
39 | * A benchmark entity starts with its *name* (second level indentation), valid names are for example:
40 | `radosbench`, `hsbench`, `kvmrbdfio`, `librbdfio`, etc.
41 |
42 | * The contents of the benchmark entity (third level indentation) consist of a collection of items
43 | (either scalars or collections themselves). Most of these entities represent options for the
44 | command line invocation of the benchmark when executed by the clients.
45 |
46 | 
47 |
48 |
49 | ## `monitoring_profiles`
50 |
51 |
52 | The monitoring_profiles section consists of a non-empty list of of collections, each describing a
53 | monitoring tool.
54 |
55 | A monitoring entity starts with its name (at second level indentation). Currently supported are `perf`
56 | , `collectl`, `top`.
57 |
58 | The contents of the monitoring entity consists of :
59 | * a `nodes` (third level indentation) list of processes to monitor (by default the osd nodes), and
60 | * an optional string `args` (third level indentation) to indicate the arguments to the monitoring tool.
61 |
62 |
63 | ## `client_endpoints`
64 |
65 | The client_endpoints section consists of a non-empty list of collections, each associated to a
66 | benchmark entity, and typically indicating the driver for the benchmark. The client_endpoints, if
67 | specified on a test plan, must be cross referenced by the benchmark section, and as such normally the
68 | client_endpoints section precedes the benchmarks section in the test plan.
69 |
70 | See the dir `example/` for a number of test plan examples.
71 |
--------------------------------------------------------------------------------
/tools/fio_objectstore_tools/bluestore_throttle_tuning.rst:
--------------------------------------------------------------------------------
1 | =========================
2 | BlueStore Throttle Tuning
3 | =========================
4 |
5 | Motivation
6 | ==========
7 |
8 | BlueStore has a throttling mechanism in order to ensure that queued IO doesn't
9 | increase without bound. If this throttle is set too low, osd throughput will
10 | suffer. If it's set too high, we'll see unnecessary increases in latency at
11 | the objectstore level preventing the OSD queues from performing QoS. If
12 | latency at the objectstore level is 1s due to the current queue length, the
13 | best possible latency for an incoming high priority IO would be 1s.
14 |
15 | Generally, we'd expect the relationship between latency and throttle value (or
16 | queue depth) to have two behavior types. When the store is sub-saturated, we'd
17 | expect increases in queued IO to increase throughput with little corresponding
18 | increase in latency. As the store saturates, we'd expect throughput to become
19 | relatively insensitive to throttle, but latency would begin to increase
20 | linearly.
21 |
22 | In choosing these throttle limits, a user would want first to understand the
23 | latency/throughput/throttle relationships for their hardware as well as their
24 | workload/application's preference for latency vs throughput. One could choose
25 | to deliberately sacrafice some amount of max throughput in exchange for better
26 | qos, or one might choose to capture as much throughput as possible at the
27 | expense of higher average and especially tail latency.
28 |
29 | Usage
30 | =====
31 |
32 | There is a backend for fio (src/test/fio/fio_ceph_objectstore.cc) which backs
33 | fio with a single objectstore instance. This instance has an option which will
34 | at configurable intervals alter the throttle values among the configured
35 | options online as the fio test runs. By capturing a trace of ios performed via
36 | lttng, we can get an idea of the throttle/latency/throughput relationship for a
37 | particular workload and device.
38 |
39 | First, ceph needs to be built with fio and lttng:
40 |
41 | ::
42 | ./do_cmake.sh --verbose -DWITH_FIO=on -DWITH_LTTNG=on -DCMAKE_BUILD_TYPE=RelWithDebInfo
43 |
44 | Next, there are a few scripts in the cbt.git repository to ease running fio
45 | with the right backend and graphing the results under fio_objectstore_tools/.
46 | Create a copy of runs.json updating configs as needed (particularly device
47 | paths). You can then do a run by running:
48 |
49 | ::
50 | ./run.py --initialize runs
51 | ./run.py --run
52 |
53 | Results will appear in dated subdirs under ~/output by default.
54 |
55 | In order to generate graphs from these results, run:
56 |
57 | ::
58 | ./analyze.py --generate-graphs --output
59 |
60 | The resulting graphs will plot latency and throughput for each traced IO (with
61 | curves for median (green) and 99pct (red)) against the kv throttle and deferred
62 | throttle values when the IO was released from the throttle.
63 |
--------------------------------------------------------------------------------
/tools/fio-parse-json.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | #
3 | # fio-json-prs.py - example script to parse distributed workload generation result
4 | # produced by fio in JSON format
5 | #
6 | # input parameters:
7 | # 1 - JSON file - file containing fio JSON output
8 | # 2 - JSON path - path through JSON tree to a leaf node
9 | #
10 | # assumption: json output of non-leaf nodes consists of either
11 | # - dictionary - key field selects sub-value
12 | # - sequence - key field syntax is name=value, where
13 | # name is a dictionary key of sequence elements, and
14 | # value is the desired value to select a sequence element
15 | # example:
16 | # python3 fio-parse-json.py r.fiojob.json.log 'jobs/jobname=randread/read/iops'
17 | #
18 |
19 | import os, sys
20 | from pprint import pprint
21 | import json
22 |
23 | NOTOK=1
24 |
25 | if len(sys.argv) < 3:
26 | print('usage: fio-parse-json.py fio-json.log path-to-leaf')
27 | print('path-to-leaf is a slash-separated list of key names in JSON tree')
28 | print('see instructions at top of this script')
29 | sys.exit(NOTOK)
30 |
31 |
32 | def filter_json_node(next_branch, node_list_in):
33 | #print next_branch, json.dumps(node, indent=4)
34 | #print ''
35 | #sys.stdout.flush()
36 | next_node_list = []
37 | for n in node_list_in:
38 | dotlist = next_branch.split('=')
39 | if len(dotlist) > 2:
40 | print('unrecognized syntax at %s'%str(node))
41 | sys.exit(NOTOK)
42 | elif len(dotlist) == 1:
43 | next_node_list.append(n[next_branch])
44 | assert(isinstance(n, dict))
45 | else: # must be a sequence, take any element with key matching value
46 | select_key = dotlist[0]
47 | select_value = dotlist[1]
48 | for e in n: # node is a seq
49 | #print 'select with key %s value %s sequence element %s'%(select_key, select_value, e)
50 | if select_value == '*':
51 | next_node_list.append(e)
52 | else:
53 | v = e[select_key]
54 | if v == select_value:
55 | next_node_list.append(e)
56 |
57 | if len(next_node_list) == 0:
58 | print('no list member in %s has key %s value %s'%(str(node), select_key, select_value))
59 | sys.exit(NOTOK)
60 | return next_node_list
61 |
62 |
63 | fn = sys.argv[1]
64 | json_tree_path = sys.argv[2].split('/')
65 | with open(fn, 'r') as json_data:
66 |
67 | # check for empty file
68 |
69 | f_info = os.fstat(json_data.fileno())
70 | if f_info.st_size == 0:
71 | print('JSON input file %s is empty'%fn)
72 | sys.exit(NOTOK)
73 |
74 | # find start of JSON object and position file handle right before that
75 |
76 | lines = json_data.readlines()
77 | start_of_json_data=0
78 | for l in lines:
79 | if l[0] == '{': break
80 | start_of_json_data += 1
81 | json_data.seek(0, os.SEEK_SET)
82 | for j in range(0,start_of_json_data):
83 | l = json_data.readline()
84 |
85 | # parse the JSON object
86 |
87 | node = json.load(json_data)
88 | current_branch = None
89 | next_node_list = [node]
90 | for next_branch in json_tree_path:
91 | next_node_list = filter_json_node(next_branch, next_node_list)
92 | for n in next_node_list: print(n)
93 |
94 |
--------------------------------------------------------------------------------
/post_processing/plotter/directory_comparison_plotter.py:
--------------------------------------------------------------------------------
1 | """
2 | A file containing the classes and code required to read two files stored in the common
3 | intermediate format introduced in CBT PR #319 (https://github.com/ceph/cbt/pull/319)
4 | and produce a plot of both the files on the same axes.
5 | """
6 |
7 | from logging import Logger, getLogger
8 | from pathlib import Path
9 |
10 | import matplotlib.pyplot as plotter
11 |
12 | from post_processing.common import (
13 | PLOT_FILE_EXTENSION_WITH_DOT,
14 | find_common_data_file_names,
15 | read_intermediate_file,
16 | )
17 | from post_processing.plotter.common_format_plotter import CommonFormatPlotter
18 | from post_processing.types import COMMON_FORMAT_FILE_DATA_TYPE
19 |
20 | log: Logger = getLogger("cbt")
21 |
22 |
23 | class DirectoryComparisonPlotter(CommonFormatPlotter):
24 | """
25 | Read the intermediate data files in the common json format and produce a
26 | curve plot of both sets of data on the same axes. Error bars are not included
27 | as they seem to make the plot harder to read and compare.
28 | """
29 |
30 | def __init__(self, output_directory: str, directories: list[str]) -> None:
31 | self._output_directory: str = f"{output_directory}"
32 | self._comparison_directories: list[Path] = [Path(f"{directory}/visualisation") for directory in directories]
33 |
34 | def draw_and_save(self) -> None:
35 | # output_file_path: str = self._generate_output_file_name(files=self._comparison_directories)
36 |
37 | # We will only compare data for files with the same name, so find all
38 | # the file names that are common across all directories. Not sure this
39 | # is the right way though
40 | common_file_names: list[str] = find_common_data_file_names(self._comparison_directories)
41 |
42 | for file_name in common_file_names:
43 | output_file_path: str = self._generate_output_file_name(files=[Path(file_name)])
44 | for directory in self._comparison_directories:
45 | file_data: COMMON_FORMAT_FILE_DATA_TYPE = read_intermediate_file(f"{directory}/{file_name}")
46 | # we choose the last directory name for the label to apply to the data
47 | self._add_single_file_data(
48 | plotter=plotter,
49 | file_data=file_data,
50 | label=f"{directory.parts[-2]}",
51 | )
52 |
53 | self._add_title(plotter=plotter, source_files=[Path(file_name)])
54 | self._set_axis(plotter=plotter)
55 |
56 | # make sure we add the legend to the plot
57 | plotter.legend() # pyright: ignore[reportUnknownMemberType]
58 |
59 | self._save_plot(plotter=plotter, file_path=output_file_path)
60 | self._clear_plot(plotter=plotter)
61 |
62 | def _generate_output_file_name(self, files: list[Path]) -> str:
63 | # we know we will only ever be passed a single file name
64 | output_file: str = f"{self._output_directory}/Comparison_{files[0].stem}{PLOT_FILE_EXTENSION_WITH_DOT}"
65 |
66 | return output_file
67 |
--------------------------------------------------------------------------------
/post_processing/reports/README.md:
--------------------------------------------------------------------------------
1 | # Reports
2 |
3 | Produces a report in github markdown, and optionally pdf format that includes a summary table and the relevant
4 | plots from the CBT run.
5 |
6 | ## Output
7 | A report in github markdown format with a plots directory containing the required plots. The report and plots directory
8 | can be uploaded directly to github as-is and the links will be maintained.
9 |
10 | Optionally a report in pdf format can also be created.
11 |
12 | Due to the tools used there are only 6 unique colours available for the plot lines, so it is recommended to limit the
13 | comparison to 6 or less files or directories. During testing we found that more than four directories can start rendering
14 | the pdf report unreadable, so it is not recommended to create a pdf report to compare data from more than four
15 | benchmark runs.
16 |
17 | ## Standalone scripts
18 | There are actually 2 scripts provided as wrappers for the report generation:
19 | * generate_performance_report.py
20 | * generate_comparison_performance_report.py
21 |
22 | ### generate_performance_report
23 | Creates a performance report for a single benchmark run. The results must first have had the formatter run on them.
24 |
25 | ```
26 | generate_performance_report.py --archive=
27 | --output_directory=
28 | --create_pdf
29 | ```
30 |
31 | where:
32 | - `--archive` Required. The archive directory containing the files from the formatter
33 | - `--output_directory` Required. The directory to store the markdown report file and relevant plots.
34 | - `--create_pdf` Optional. Create a pdf report
35 |
36 | Full help text is provided by using `--help` with the scripts
37 |
38 | #### Example
39 | ```bash
40 | PYTHONPATH=/cbt /cbt/tools/generate_performance_report.py --archive="/tmp/ch_cbt_main_run" --output_directory="/tmp/reports/main" --create_pdf
41 | ```
42 |
43 | ### generate_comparison_performance_report.py
44 | Creates a report comparing 2 or more benchmark runs. The report will only include plots and results for formatted files
45 | that are common in all the directories.
46 |
47 | ```
48 | generate_comparison_performance_report.py --baseline=
49 | --archives=
50 | --output_directory=
51 | --create_pdf
52 | ```
53 | where
54 | - `--baseline` Required. The full path to the baseline results for the comparison
55 | - `--archives` Required. A comma-separated list of directories containing results to compare to the baseline
56 | - `--output_directory` Required. The directory to store the markdown report file and relevant plots.
57 | - `--create_pdf` Optional. Create a pdf report
58 |
59 | #### Examples
60 | ```bash
61 | PYTHONPATH=/cbt /cbt/tools/generate_comparison_performance_report.py --baseline="/tmp/ch_cbt_main_run" --archives="/tmp/ch_sandbox/" --output_directory="/tmp/reports/main" --create_pdf
62 | ```
--------------------------------------------------------------------------------
/example/wip-mark-testing/ceph.conf:
--------------------------------------------------------------------------------
1 | [global]
2 | osd pool default size = 1
3 |
4 | osd crush chooseleaf type = 0
5 |
6 | keyring = /tmp/cbt/ceph/keyring
7 | osd pg bits = 8
8 | osd pgp bits = 8
9 | auth supported = none
10 | log to syslog = false
11 | log file = /tmp/cbt/ceph/log/$name.log
12 | filestore xattr use omap = true
13 | auth cluster required = none
14 | auth service required = none
15 | auth client required = none
16 |
17 | public network = 192.168.10.0/24
18 | cluster network = 192.168.10.0/24
19 | rbd cache = true
20 | osd scrub load threshold = 0.01
21 | osd scrub min interval = 137438953472
22 | osd scrub max interval = 137438953472
23 | osd deep scrub interval = 137438953472
24 | osd max scrubs = 16
25 |
26 | filestore merge threshold = 40
27 | filestore split multiple = 8
28 | osd op threads = 8
29 |
30 | debug_lockdep = "0/0"
31 | debug_context = "0/0"
32 | debug_crush = "0/0"
33 | debug_mds = "0/0"
34 | debug_mds_balancer = "0/0"
35 | debug_mds_locker = "0/0"
36 | debug_mds_log = "0/0"
37 | debug_mds_log_expire = "0/0"
38 | debug_mds_migrator = "0/0"
39 | debug_buffer = "0/0"
40 | debug_timer = "0/0"
41 | debug_filer = "0/0"
42 | debug_objecter = "0/0"
43 | debug_rados = "0/0"
44 | debug_rbd = "0/0"
45 | debug_journaler = "0/0"
46 | debug_objectcacher = "0/0"
47 | debug_client = "0/0"
48 | debug_osd = "0/0"
49 | debug_optracker = "0/0"
50 | debug_objclass = "0/0"
51 | debug_filestore = "0/0"
52 | debug_journal = "0/0"
53 | debug_ms = "0/0"
54 | debug_mon = "0/0"
55 | debug_monc = "0/0"
56 | debug_paxos = "0/0"
57 | debug_tp = "0/0"
58 | debug_auth = "0/0"
59 | debug_finisher = "0/0"
60 | debug_heartbeatmap = "0/0"
61 | debug_perfcounter = "0/0"
62 | debug_rgw = "0/0"
63 | debug_hadoop = "0/0"
64 | debug_asok = "0/0"
65 | debug_throttle = "0/0"
66 |
67 | mon pg warn max object skew = 100000
68 | mon pg warn min per osd = 0
69 | mon pg warn max per osd = 32768
70 |
71 |
72 | [mon]
73 | mon data = /tmp/cbt/ceph/mon.$id
74 |
75 | [mon.a]
76 | host = burnupiY
77 | mon addr = 192.168.10.2:6789
78 |
79 | [osd.0]
80 | host = burnupiX
81 | osd data = /tmp/cbt/mnt/osd-device-0-data
82 | osd journal = /dev/disk/by-partlabel/osd-device-0-journal
83 |
84 | [osd.1]
85 | host = burnupiX
86 | osd data = /tmp/cbt/mnt/osd-device-1-data
87 | osd journal = /dev/disk/by-partlabel/osd-device-1-journal
88 |
89 | [osd.2]
90 | host = burnupiX
91 | osd data = /tmp/cbt/mnt/osd-device-2-data
92 | osd journal = /dev/disk/by-partlabel/osd-device-2-journal
93 |
94 | [osd.3]
95 | host = burnupiX
96 | osd data = /tmp/cbt/mnt/osd-device-3-data
97 | osd journal = /dev/disk/by-partlabel/osd-device-3-journal
98 |
99 |
--------------------------------------------------------------------------------
/tests/test_common.py:
--------------------------------------------------------------------------------
1 | """ Unit tests for the Common class """
2 |
3 | import uuid
4 | import shutil
5 | import warnings
6 | import os
7 | import tempfile
8 | import unittest
9 | import unittest.mock
10 | import common
11 |
12 | VAR_NAME = "CBT_TEST_NODES"
13 | MSG = f"No test VM provided. Set {VAR_NAME} env var"
14 |
15 | def iter_nodes(nodes):
16 | """
17 | Iterator to produce each individual node
18 | """
19 | for node in nodes.split(","):
20 | if '@' in node:
21 | node = node.split("@", 1)[1]
22 | yield node
23 |
24 |
25 | class TestCommon(unittest.TestCase):
26 | """ Sanity tests for common.py """
27 | def test_mkdirp(self):
28 | """
29 | Can create a directory
30 | """
31 | with warnings.catch_warnings():
32 | warnings.simplefilter("ignore")
33 | with tempfile.TemporaryDirectory() as tmp:
34 | fname = os.path.join(tmp, 'a', 'b12', 'zasdasd')
35 | common.mkdir_p(fname)
36 | self.assertTrue(os.path.isdir(fname))
37 | shutil.rmtree(fname)
38 |
39 | @unittest.skipIf(VAR_NAME not in os.environ, MSG)
40 | def test_pdsh(self):
41 | """
42 | Can issue a valid cli to the nodes
43 | """
44 | nodes = os.environ[VAR_NAME]
45 | out, _err = common.pdsh(nodes, "ls /").communicate()
46 | # output from the first node in the list, so we are interested
47 | # ib the contents
48 | for _node in iter_nodes(nodes):
49 | self.assertIn("etc\n", out)
50 |
51 | @unittest.skipIf(VAR_NAME not in os.environ, MSG)
52 | def test_pdsh_no_cmd(self):
53 | """
54 | Can issue an invalid cli to the node, get rc not 0
55 | """
56 | nodes = os.environ[VAR_NAME]
57 | proc = common.pdsh(nodes, "unknown_cmd_131321")
58 | proc.communicate()
59 | # log(proc)
60 | #self.assertNotEqual(proc.myrtncode, 0)
61 | self.assertEqual(proc.myrtncode, 0)
62 |
63 | @unittest.skipIf(VAR_NAME not in os.environ, MSG)
64 | def test_pdcp_rpdcp(self):
65 | """
66 | Can copy a file to the nodes
67 | """
68 | nodes = os.environ[VAR_NAME]
69 | with warnings.catch_warnings():
70 | warnings.simplefilter("ignore")
71 | tmp = uuid.uuid4().hex
72 | fname = os.path.join('/tmp/',tmp)
73 | val = str(uuid.uuid1())
74 | with open(fname, "w", encoding='UTF-8') as fd:
75 | fd.write(val)
76 | try:
77 | common.pdcp(nodes, None, fname, fname).communicate()
78 | out, _err = common.pdsh(nodes, "cat " + fname).communicate()
79 | for _node in iter_nodes(nodes):
80 | #self.assertIn(f"{node}: {val}\n", out)
81 | self.assertIn(out,f"{val}\n")
82 | finally:
83 | pass
84 |
85 | common.rpdcp(nodes, None, fname, os.path.dirname(fname)).communicate()
86 | try:
87 | with open(fname,encoding='UTF-8') as fd:
88 | self.assertEqual(fd.read(), val)
89 | finally:
90 | try:
91 | os.remove(fname)
92 | except OSError:
93 | pass
94 | common.pdsh(nodes, "rm " + fname).communicate()
95 |
--------------------------------------------------------------------------------
/parsing/test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | import argparse
4 | import os
5 | import fnmatch
6 | import hashlib
7 | import database
8 | from htmlgenerator import HTMLGenerator
9 |
10 |
11 | def mkhash(values):
12 | value_string = ''.join([str(i) for i in values])
13 | return hashlib.sha256(value_string).hexdigest()
14 |
15 |
16 | def parse_args():
17 | parser = argparse.ArgumentParser(description='get fio averages.')
18 | parser.add_argument(
19 | 'input_directory',
20 | help='Directory to search.',
21 | )
22 |
23 | args = parser.parse_args()
24 | return args
25 |
26 |
27 | def find(pattern, path):
28 | result = []
29 | for root, dirs, files in os.walk(path):
30 | for name in files:
31 | if fnmatch.fnmatch(name, pattern):
32 | result.append(os.path.join(root, name))
33 | return result
34 |
35 |
36 | def splits(s, d1, d2):
37 | l, _, r = s.partition(d1)
38 | m, _, r = r.partition(d2)
39 | return m
40 |
41 |
42 | def getbw(s):
43 | if "GB/s" in s:
44 | return float(s[:-4]) * 1024
45 | if "MB/s" in s:
46 | return float(s[:-4])
47 | if "KB/s" in s:
48 | return float(s[:-4]) / 1024
49 |
50 |
51 | if __name__ == '__main__':
52 | ctx = parse_args()
53 | database.create_db()
54 |
55 | files = find('output.*', ctx.input_directory)
56 | totals = {}
57 | for inputname in files:
58 | # strip off the input directory
59 | params = inputname[len(ctx.input_directory):].split("/")[3:-1]
60 | # make readahead into an int
61 | params[3] = int(params[3][7:])
62 |
63 | # Make op_size into an int
64 | params[4] = int(params[4][8:])
65 |
66 | # Make cprocs into an int
67 | params[5] = int(params[5][17:])
68 |
69 | # Make io_depth int an int
70 | params[6] = int(params[6][9:])
71 |
72 | params_hash = mkhash(params)
73 | params = [params_hash] + params
74 | params.extend([0, 0])
75 | database.insert(params)
76 |
77 | for line in open(inputname):
78 | if "aggrb" in line:
79 | bw = getbw(splits(line, 'aggrb=', ','))
80 | if "READ" in line:
81 | database.update_readbw(params_hash, bw)
82 | elif "WRITE" in line:
83 | database.update_writebw(params_hash, bw)
84 | html = HTMLGenerator()
85 | html.add_html(html.read_file('/home/nhm/src/cbt/include/html/table.html'))
86 | html.add_style(html.read_file('/home/nhm/src/cbt/include/css/table.css'))
87 | html.add_script(html.read_file('/home/nhm/src/cbt/include/js/jsxcompressor.min.js'))
88 | html.add_script(html.read_file('/home/nhm/src/cbt/include/js/d3.js'))
89 | html.add_script(html.read_file('/home/nhm/src/cbt/include/js/d3var.js'))
90 | html.add_script(html.format_data(database.fetch_table(['opsize', 'testtype'])))
91 | html.add_script(html.read_file('/home/nhm/src/cbt/include/js/table.js'))
92 |
93 | print('')
94 | print('D3 Table Test ')
95 | print('')
96 | print(html.to_string())
97 | print('')
98 | # print database.fetch_table(['opsize', 'testtype'])
99 |
100 | # get_section(['opsize', 'testtype'])
101 |
102 | # write_html()
103 | # write_data(['opsize', 'testtype'])
104 | # write_style()
105 | # write_js()
106 |
--------------------------------------------------------------------------------
/post_processing/plotter/file_comparison_plotter.py:
--------------------------------------------------------------------------------
1 | """
2 | A file containing the classes and code required to read two files stored in the common
3 | intermediate format introduced in CBT PR #319 (https://github.com/ceph/cbt/pull/319)
4 | and produce a plot of both the files on the same axes.
5 | """
6 |
7 | from logging import Logger, getLogger
8 | from pathlib import Path
9 | from typing import Optional
10 |
11 | import matplotlib.pyplot as plotter
12 |
13 | from post_processing.common import (
14 | DATA_FILE_EXTENSION_WITH_DOT,
15 | PLOT_FILE_EXTENSION_WITH_DOT,
16 | get_blocksize_percentage_operation_from_file_name,
17 | read_intermediate_file,
18 | )
19 | from post_processing.plotter.common_format_plotter import CommonFormatPlotter
20 | from post_processing.types import COMMON_FORMAT_FILE_DATA_TYPE
21 |
22 | log: Logger = getLogger("cbt")
23 |
24 |
25 | class FileComparisonPlotter(CommonFormatPlotter):
26 | """
27 | Read the intermediate data files in the common json format and produce a
28 | curve plot of both sets of data on the same axes. Error bars are not included
29 | as they seem to make the plot harder to read and compare.
30 | """
31 |
32 | def __init__(self, output_directory: str, files: list[str], labels: Optional[list[str]] = None) -> None:
33 | self._output_directory: str = f"{output_directory}"
34 | self._comparison_files: list[Path] = [Path(file) for file in files]
35 | self._labels: Optional[list[str]] = None
36 |
37 | def draw_and_save(self) -> None:
38 | output_file_path: str = self._generate_output_file_name(files=self._comparison_files)
39 |
40 | for file_path in self._comparison_files:
41 | index: int = self._comparison_files.index(file_path)
42 | file_data: COMMON_FORMAT_FILE_DATA_TYPE = read_intermediate_file(f"{file_path}")
43 |
44 | operation_details: tuple[str, str, str] = get_blocksize_percentage_operation_from_file_name(
45 | file_name=file_path.stem
46 | )
47 |
48 | # If we have a label use it, otherwise set the label from the
49 | # filename. We can reliably do this as we create the file name when
50 | # we save the intermediate file.
51 | label: str = ""
52 | if self._labels is not None:
53 | label = self._labels[index]
54 |
55 | if label == "":
56 | label = " ".join(operation_details)
57 |
58 | self._add_single_file_data(plotter=plotter, file_data=file_data, label=label)
59 |
60 | # make sure we add the legend to the plot
61 | plotter.legend() # pyright: ignore[reportUnknownMemberType]
62 |
63 | self._add_title(plotter=plotter, source_files=self._comparison_files)
64 | self._set_axis(plotter=plotter)
65 | self._save_plot(plotter=plotter, file_path=output_file_path)
66 | self._clear_plot(plotter=plotter)
67 |
68 | def set_labels(self, labels: list[str]) -> None:
69 | """
70 | Set the labels for the plot lines
71 | """
72 | self._labels = labels
73 |
74 | def _generate_output_file_name(self, files: list[Path]) -> str:
75 | output_file: str = f"{self._output_directory}/Comparison"
76 |
77 | for file_path in files:
78 | # get the actual file name - this will be the last part of the path
79 | file_name = file_path.parts[-1]
80 | # strip off the .json extension from each file
81 | file: str = file_name[: -len(DATA_FILE_EXTENSION_WITH_DOT)]
82 |
83 | output_file += f"_{file}"
84 |
85 | return f"{output_file}{PLOT_FILE_EXTENSION_WITH_DOT}"
86 |
--------------------------------------------------------------------------------
/cbt.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | import argparse
3 | import collections
4 | import logging
5 | import pprint
6 | import sys
7 |
8 | import settings
9 | import benchmarkfactory
10 | from cluster.ceph import Ceph
11 | from log_support import setup_loggers
12 |
13 | logger = logging.getLogger("cbt")
14 | # Uncomment this if further debug detail (module, funcname) are needed
15 | #FORMAT = "%(asctime)s] [%(levelname)s] [%(name)s] [%(funcName)s():%(lineno)s] %(message)s"
16 | #logging.basicConfig(format=FORMAT, force=True)
17 | #logger.setLevel(logging.DEBUG)
18 |
19 |
20 | def parse_args(args):
21 | parser = argparse.ArgumentParser(description='Continuously run ceph tests.')
22 | parser.add_argument(
23 | '-a', '--archive',
24 | required=True,
25 | help='Directory where the results should be archived.',
26 | )
27 |
28 | parser.add_argument(
29 | '-c', '--conf',
30 | required=False,
31 | help='The ceph.conf file to use.',
32 | )
33 |
34 | parser.add_argument(
35 | 'config_file',
36 | help='YAML config file.',
37 | )
38 |
39 | return parser.parse_args(args[1:])
40 |
41 |
42 | def main(argv):
43 | setup_loggers()
44 | ctx = parse_args(argv)
45 | settings.initialize(ctx)
46 |
47 | logger.debug("Settings.cluster:\n %s",
48 | pprint.pformat(settings.cluster).replace("\n", "\n "))
49 |
50 | global_init = collections.OrderedDict()
51 | rebuild_every_test = settings.cluster.get('rebuild_every_test', False)
52 | archive_dir = settings.cluster.get('archive_dir')
53 |
54 |
55 | # FIXME: Create ClusterFactory and parametrically match benchmarks and clusters.
56 | cluster = Ceph(settings.cluster)
57 |
58 | # Only initialize and prefill upfront if we aren't rebuilding for each test.
59 | if not rebuild_every_test:
60 | if not cluster.use_existing:
61 | cluster.initialize();
62 | # Why does it need to iterate for the creation of benchmarks?
63 | for iteration in range(settings.cluster.get("iterations", 0)):
64 | benchmarks = benchmarkfactory.get_all(archive_dir, cluster, iteration)
65 | for b in benchmarks:
66 | if b.exists():
67 | continue
68 | if b.getclass() not in global_init:
69 | b.initialize()
70 | b.initialize_endpoints()
71 | b.prefill()
72 | b.cleanup()
73 | # Only initialize once per class.
74 | global_init[b.getclass()] = b
75 |
76 | #logger.debug("Settings.cluster.is_teuthology:%s",settings.cluster.get('is_teuthology', False))
77 | # Run the benchmarks
78 | return_code = 0
79 | try:
80 | for iteration in range(settings.cluster.get("iterations", 0)):
81 | benchmarks = benchmarkfactory.get_all(archive_dir, cluster, iteration)
82 | for b in benchmarks:
83 | if not b.exists() and not settings.cluster.get('is_teuthology', False):
84 | continue
85 |
86 | if rebuild_every_test:
87 | cluster.initialize()
88 | b.initialize()
89 | # Always try to initialize endpoints before running the test
90 | b.initialize_endpoints()
91 | logger.info(f"Running benchmark %s == iteration %d ==" % (b, iteration))
92 | b.run()
93 | except:
94 | return_code = 1 # FAIL
95 | logger.exception("During tests")
96 |
97 | return return_code
98 |
99 | if __name__ == '__main__':
100 | exit(main(sys.argv))
101 |
--------------------------------------------------------------------------------
/tools/crimson/seastore_metrics_run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | TOP_DIR=$(cd $(dirname "$0") && pwd)
4 |
5 | # configurations
6 | RESULT_DIR="$TOP_DIR/results"
7 | BUILD_DIR="~/ceph/build/"
8 |
9 | TOTAL_ROUND=10
10 | ROUND_SECONDS=1
11 |
12 | WITH_RADOS_BENCH=true
13 | BENCH_POOL="pool-name"
14 | BENCH_IODEPTH=64
15 | BENCH_TIME=$(( ($TOTAL_ROUND - 1) * $ROUND_SECONDS -
16 | ($ROUND_SECONDS > 120 ? 120 : $ROUND_SECONDS) ))
17 |
18 | METRICS_ENABLE=true
19 |
20 | # require nvme and iostat, interval > 180s
21 | STATS_ENABLE=true
22 | STATS_DEV="/dev/dev-name"
23 |
24 | collect_metrics() {
25 | if ! $METRICS_ENABLE; then
26 | return
27 | fi
28 | local current_round=$1
29 | local current_ms=$2
30 | local file_name=result_${current_round}_metrics_${current_ms}.log
31 | echo "start collect metrics to $file_name ..."
32 | CEPH_DEV=1 ./bin/ceph tell osd.0 dump_metrics 2>&1 | tee $RESULT_DIR/$file_name > /dev/null
33 | echo "finish collect metrics"
34 | }
35 |
36 | collect_stats() {
37 | if ! $STATS_ENABLE; then
38 | return
39 | fi
40 | local current_round=$1
41 | local current_ms=$2
42 | local file_name=result_${current_round}_stats_${current_ms}.log
43 | echo "start collect stats to $file_name ..."
44 | if [ `iostat -k -d $STATS_DEV | awk 'NR == 3 {print $5}'` = "kB_dscd/s" ]; then
45 | local read_wrtn_dscd_kb=( `iostat -k -d $STATS_DEV | awk 'NR == 4 {print $6, $7, $8}'` )
46 | elif [ `iostat -k -d $STATS_DEV | awk 'NR == 3 {print $5}'` = "kB_read" ]; then
47 | local read_wrtn_dscd_kb=( `iostat -k -d $STATS_DEV | awk 'NR == 4 {print $5, $6}'` )
48 | read_wrtn_dscd_kb[2]=0
49 | else
50 | echo "Warning! The parameter is incorrect. Modify the parameter according to the actual output of the iostat commmand"
51 | exit 1
52 | fi
53 | local nand_host_sectors=( `nvme intel smart-log-add $STATS_DEV | awk 'NR == 14 || NR == 15 {print $5}'` )
54 | if [ ${#nand_host_sectors[@]} -le 2 ]; then
55 | echo "Error! getting parameters, please try to execute command: nvme intel smart-log-add /dev/dev-name"
56 | exit 1
57 | fi
58 | tee $RESULT_DIR/$file_name > /dev/null << EOT
59 | {
60 | "read_kb": {
61 | "value": ${read_wrtn_dscd_kb[0]}
62 | },
63 | "wrtn_kb": {
64 | "value": ${read_wrtn_dscd_kb[1]}
65 | },
66 | "dscd_kb": {
67 | "value": ${read_wrtn_dscd_kb[2]}
68 | },
69 | "nand_sect": {
70 | "value": ${nand_host_sectors[0]}
71 | },
72 | "host_sect": {
73 | "value": ${nand_host_sectors[1]}
74 | }
75 | }
76 | EOT
77 | echo "finish collect stats"
78 | }
79 |
80 | run_rados_bench() {
81 | if ! $WITH_RADOS_BENCH; then
82 | return
83 | fi
84 | local bench_cmd="CEPH_DEV=1 ./bin/rados bench -p $BENCH_POOL $BENCH_TIME write -b 4096 --concurrent-ios=$BENCH_IODEPTH --no-cleanup"
85 | local file_name=result_0_radosbench.log
86 | echo "start rados bench $BENCH_TIME seconds to $file_name ..."
87 | CEPH_DEV=1 ./bin/rados bench -p $BENCH_POOL $BENCH_TIME write -b 4096 --concurrent-ios=$BENCH_IODEPTH --no-cleanup | tee $RESULT_DIR/$file_name &
88 | }
89 |
90 | # Note: currently only support single OSD to measure write amplification
91 | # correctly.
92 | if [ -e $RESULT_DIR ]; then
93 | echo "'$RESULT_DIR' dir already exists, remove it or select a different one"
94 | exit 1
95 | fi
96 |
97 | mkdir -p $RESULT_DIR
98 | cd $BUILD_DIR
99 | CURRENT_ROUND=0
100 | TARGET_ROUND=$(( CURRENT_ROUND + TOTAL_ROUND ))
101 | CURRENT_MS=$(($(date +%s%N)/1000000))
102 | collect_metrics $CURRENT_ROUND $CURRENT_MS
103 | collect_stats $CURRENT_ROUND $CURRENT_MS
104 | while [ $CURRENT_ROUND -lt $TARGET_ROUND ]
105 | do
106 | (( ++CURRENT_ROUND ))
107 | echo "start round $CURRENT_ROUND of $TARGET_ROUND for ${ROUND_SECONDS}s ..."
108 | sleep $ROUND_SECONDS
109 | CURRENT_MS=$(($(date +%s%N)/1000000))
110 | collect_metrics $CURRENT_ROUND $CURRENT_MS
111 | collect_stats $CURRENT_ROUND $CURRENT_MS
112 | echo "finish round $CURRENT_ROUND"
113 | echo
114 | if [ $CURRENT_ROUND -eq 1 ]; then
115 | run_rados_bench
116 | fi
117 | done
118 | echo "done!"
119 | cd $TOP_DIR
120 |
--------------------------------------------------------------------------------
/docs/AutomaticUnitTestGeneration.md:
--------------------------------------------------------------------------------
1 | # tools/serialise_benchmark.py -- Automatic Unit Test Generation
2 |
3 | ## Description:
4 |
5 | This is a standalone tool to generate unit tests for CBT.
6 |
7 | The execution of the script produces as output:
8 |
9 | 1. a new baseline tools/baseline.json, this is a serialisation of each of the Benchmark class instances,
10 | 2. a new set of test/test_bm{benchmark}.py, each consisting on a set of sanity unit tests.
11 |
12 | ## Requirements:
13 |
14 | The Python modules pytest and pytest-xdist should be installed on the machine that will run the tool, this can be the same as the one that drives CBT.
15 |
16 | ## Usage:
17 |
18 | The following is an example of the execution of the script:
19 |
20 | ```bash
21 | # python3 tools/serialise_benchmark.py
22 | ```
23 | An example of the expected normal ouput is shown below.
24 |
25 | 
26 |
27 | This would have created (or updated if existing already) the set of unit tests for the supported benchmarks.
28 |
29 | ## Execution of unit tests:
30 |
31 | The unit tests can be executed from the command line as follows:
32 |
33 | ```bash
34 | # python3 -m pytest -p no:cacheprovider tests/
35 | ```
36 | An example output showing a successful execution:
37 |
38 | 
39 |
40 | Note: the tests skipped above require an environment variable to be defined to identify the target nodes
41 | for exercising pdsh.
42 |
43 | The following is an example to execute the pdsh tests:
44 |
45 | ```bash
46 | # export CBT_TEST_NODES=root@ceph2,root@ceph4
47 | # export PDSH_SSH_ARGS_APPEND="-p 8023 -o StrictHostKeyChecking=no -v -E /tmp/ssh.out"
48 | ```
49 |
50 | ## Generation of Unit tests
51 |
52 | The main idea is the concept of **referencial transparency**, (see for example [ref_transparency](https://stackoverflow.com/questions/210835/what-is-referential-transparency)). Basically, in the functional programming
53 | paradigm, it means that given a function and an input value, you will always receive the same output. The test
54 | generator takes advantage of this since the constructors of the Benchmark classes should always produce instances
55 | with the same initial state. The class Benchmark in CBT expects as an argument an object from a .yaml file (the test plan, which includes a Cluster type object). If we ensure to provide a fixed minimal cluster object to the
56 | constructor of the Benchmark class, we can have an _invariant_ that we can use to test that each of the attributes
57 | of the Benchmark classes have the same value across runs.
58 |
59 | In other words, each class constructor of the CBT Benchmark class behaves like a function and always produces
60 | object instances initialised with the same values, provided the same fixed cluster instance as argument.
61 |
62 |
63 | * For each Benchmark class supported, the tool constructs a serialisation of the object instance, and saves them
64 | in the tools/baseline.json.
65 | * To prevent tampering, an md5sum of the contents of the .json file is calculated.
66 | * For each Benchmark class suppported, the tool uses a boilerplate code template to produce unit tests. Each unit test verifies that a supported attribute of the benchmark class is initialised as recorded by the baseline.json.
67 | * When executed, the unit tests perform a sanity check to ensure that the baseline.json has not changed since the creation of the unit tests, if so proceeds to verify each attribute of each Benchmark class. This is useful to detect
68 | whether some attributes has been changed, replaced or deleted. This is especially useful to detect for regressions
69 | during code refactoring.
70 |
71 |
72 | ## Workflow recommeded
73 |
74 |
75 | * Before starting a code refactoring effort, run the unit tests: they should all pass as shown above.
76 | * Make the intended code change -- for example, remove a benchmark.py class module, or refine with new attributes,
77 | or delete some existing attributes.
78 | * Run the unit tests: some should fail accordingly to indicate the missing attributes that existed in the past but no longer in the current benchmark class module.
79 | * Run the tool serialise_benchmark.py. This will regenerate the baseline.json and the unit tests.
80 | * Run the unit tests: they should now all pass.
81 | * Iterate if required.
82 |
--------------------------------------------------------------------------------
/example/rbd_fio_test.yml:
--------------------------------------------------------------------------------
1 | cluster:
2 | use_existing: True
3 | osds_per_node: 1
4 | user: 'root'
5 | head: "sv1-ceph3.ssd.hursley.ibm.com"
6 | #head: "ceph3"
7 | #clients: [localhost]
8 | clients: ["sv1-ceph3.ssd.hursley.ibm.com"]
9 | #osds: ["localhost"]
10 | osds: ["sv1-ceph3.ssd.hursley.ibm.com"]
11 | #mons: ["localhost"]
12 | iterations: 1
13 | conf_file: '/ceph/build/ceph.conf'
14 | ceph-osd_cmd: '/ceph/build/bin/ceph-osd'
15 | ceph-mon_cmd: '/ceph/build/bin/ceph-mon'
16 | ceph-run_cmd: '/ceph/build/bin/ceph-run'
17 | ceph-rgw_cmd: '/ceph/build/bin/radosgw'
18 | ceph-mgr_cmd: '/ceph/build/bin/ceph-mgr'
19 | ceph-mds_cmd: '/ceph/build/bin/ceph-mds'
20 | ceph-authtool_cmd: '/ceph/build/bin/ceph-authtool'
21 | radosgw-admin_cmd: '/ceph/build/bin/radosgw-admin'
22 | ceph_cmd: '/ceph/build/bin/ceph'
23 | ceph-fuse_cmd: '/ceph/build/bin/ceph-fuse'
24 | rados_cmd: '/ceph/build/bin/rados'
25 | rbd_cmd: '/ceph/build/bin/rbd'
26 | rbd-nbd_cmd: '/ceph/build/bin/rbd-nbd_cmd'
27 | rbd-fuse_cmd: '/ceph/build/bin/rbd-fuse_cmd'
28 | tmp_dir: "/tmp/cbt"
29 | pid_dir: "/ceph/build/out/"
30 | pdsh_ssh_args: "-a -p 8023 -x -l%u %h"
31 | pool_profiles:
32 | rbd:
33 | pg_size: 256
34 | pgp_size: 256
35 | #replication: 1
36 | monitoring_profiles:
37 | # These monitor only the OSD node/process
38 | collectl:
39 | # These options indicate:
40 | # collect 30 samples, summary mem and CPU util, each 5 secs for CPU samples, 10 sec for all other,
41 | # process options: threads utilisation
42 | # filter samples for CPUs 0 to 3 (e.g. Crimson IO reactor cores),
43 | # filter samples for process osd (redundant),
44 | # produce gnuplot file data format (aka csv) in output file (gziped)
45 | #
46 | args: '-c 30 -sZC -i 5:10 --procopts t --cpufilt 0-3 --procfilt cosd -P -f {collectl_dir}'
47 | perf:
48 | perf_cmd: 'perf'
49 | # This collects 10 secs of data to produce flame graphs
50 | args: 'record -e cycles:u --call-graph dwarf -i -p {pid} -o {perf_dir}/{pid}_osd_perf.out sleep 10'
51 | top:
52 | top_cmd: 'top'
53 | # This collects 30 samples, Core and thread CPU utilisation
54 | args: '-b -H -1 -p {pid} -n 30 > {top_dir}/{pid}_osd_top.out'
55 | benchmarks:
56 | librbdfio:
57 | cmd_path: '/usr/local/bin/fio'
58 | # ToDo: consider a subdict that defines the global FIO options
59 | # but we need to ensure backwards compatibility with existing .yaml
60 | wait_pgautoscaler_timeout: 2 # in secs
61 | use_existing_volumes: False
62 | no_sudo: True
63 | ## Global FIO options
64 | pool_profile: 'rbd'
65 | vol_size: 1024 # Volume size in Megabytes
66 | #vol_name: 'fio_test_%d' # TBC. valid python format string
67 | #rbdname: 'fio_test_%d'
68 | idle_monitor_sleep: 5 # in seconds
69 | fio_out_format: 'json'
70 | iterations: 3
71 | time: 300 # length of run
72 | ramp: 30 # ramp up time
73 | log_avg_msec: 100
74 | log_iops: True
75 | log_bw: True
76 | log_lat: True
77 | poolname: 'rbd'
78 | mode: 'randwrite'
79 | iodepth: [1, 4 ,16]
80 | numjobs: [1, 4, 8]
81 | op_size: [4096] # block IO size in bytes
82 | procs_per_client: [1]
83 | volumes_per_client: [1] # volumes per ceph node
84 | # for tests involving specific CPU cores:
85 | fio_cpu_set: '15-15'
86 | # Optional FIO options for the /prefill stage
87 | prefill:
88 | blocksize: '4M'
89 | numjobs: 1
90 | # Each block below uses its own local options during its execution
91 | workloads:
92 | precondition:
93 | jobname: 'precond1rw'
94 | mode: 'randwrite'
95 | numjobs: [ 1 ]
96 | iodepth: [ 4 ]
97 | monitor: False # whether to run the monitors along the test
98 | test1:
99 | jobname: 'rr'
100 | mode: 'randread'
101 | numjobs: [ 1]
102 | iodepth: [ 1]
103 | #numjobs: [ 1, 4, 8 ]
104 | #iodepth: [ 1, 4, 8 ]
105 | # ToDo: can we add a list of the monitoring subset we are interested for this workload?
106 | test2:
107 | jobname: 'rw'
108 | mode: 'randwrite'
109 | numjobs: [ 1 ]
110 | iodepth: [ 1 ]
111 | #numjobs: [ 1, 4, 8 ]
112 | #iodepth: [ 1, 4, 8 ]
113 |
--------------------------------------------------------------------------------
/compare.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | import argparse
4 | import os
5 | import logging
6 | import sys
7 | import yaml
8 |
9 | import settings
10 | import benchmarkfactory
11 | from cluster.ceph import Ceph
12 | from log_support import setup_loggers
13 |
14 | logger = logging.getLogger("cbt")
15 |
16 |
17 | # Github Flavored Markdown elements
18 | class Table:
19 | def __init__(self):
20 | self.text = ''
21 | self.cols = 0
22 |
23 | def add_headers(self, *headers):
24 | text = ' | '.join(headers) + '\n'
25 | text += ' | '.join('-' * len(h) for h in headers) + '\n'
26 | self.text += text
27 | self.cols = len(headers)
28 |
29 | def add_cells(self, *cells):
30 | assert(self.cols == len(cells))
31 | text = ' | '.join(str(c) for c in cells) + '\n'
32 | self.text += text
33 |
34 | def __str__(self):
35 | return self.text
36 |
37 |
38 | class Heading:
39 | def __init__(self, level, text):
40 | self.text = '#' * level + ' ' + text + '\n'
41 |
42 | def __str__(self):
43 | return self.text
44 |
45 |
46 | class Heading3(Heading):
47 | def __init__(self, text):
48 | super().__init__(3, text)
49 |
50 |
51 | def main():
52 | setup_loggers()
53 | parser = argparse.ArgumentParser(description='query and compare CBT test results')
54 | parser.add_argument(
55 | '-a', '--archive',
56 | required=True,
57 | help='Directory where the results to be compared are archived.')
58 | parser.add_argument(
59 | '-b', '--baseline',
60 | required=True,
61 | help='Directory where the baseline results are archived.')
62 | parser.add_argument(
63 | '-v', '--verbose',
64 | action='store_true',
65 | help='be chatty')
66 | parser.add_argument(
67 | '--output',
68 | help='write result in markdown to specified file',
69 | type=argparse.FileType('w'))
70 | ctx = parser.parse_args(sys.argv[1:])
71 | # settings.initialize() expects ctx.config_file and ctx.conf
72 | ctx.config_file = os.path.join(ctx.archive, 'results', 'cbt_config.yaml')
73 | ctx.conf = None
74 | settings.initialize(ctx)
75 |
76 | results = []
77 | for iteration in range(settings.cluster.get('iterations', 0)):
78 | cluster = Ceph(settings.cluster)
79 | benchmarks = list(zip(benchmarkfactory.get_all(ctx.archive, cluster, iteration),
80 | benchmarkfactory.get_all(ctx.baseline, cluster, iteration)))
81 | for current, baseline in benchmarks:
82 | if not current.exists(True):
83 | logger.error("tested: %s result does not exist in %s",
84 | current, ctx.archive)
85 | break
86 | if not baseline.exists(True):
87 | logger.error("baseline: %s result does not exist in %s",
88 | baseline, ctx.baseline)
89 | break
90 | results.extend(current.evaluate(baseline))
91 |
92 | nr_accepted = sum(result.accepted for result in results)
93 | if ctx.verbose:
94 | for result in results:
95 | if result.accepted:
96 | logger.info(result)
97 | else:
98 | logger.warning(result)
99 |
100 | nr_tests = len(results)
101 | nr_rejected = nr_tests - nr_accepted
102 |
103 | if ctx.output:
104 | heading = None
105 | if nr_rejected:
106 | heading = Heading3(f'{nr_rejected} out of {nr_tests} failed')
107 | else:
108 | heading = Heading3(f'all {nr_tests} tests passed')
109 | ctx.output.write(str(heading))
110 |
111 | table = Table()
112 | table.add_headers('run', 'metric', 'baseline', 'result', 'accepted')
113 | for r in results:
114 | table.add_cells(r.run, r.alias, r.baseline, r.result,
115 | ' ' if r.accepted else ':x:')
116 | ctx.output.write(str(table))
117 |
118 | if nr_rejected > 0:
119 | logger.warning("%d tests failed out of %d", nr_rejected, len(results))
120 | sys.exit(1)
121 | else:
122 | logger.info("All %d tests passed.", len(results))
123 |
124 |
125 | if __name__ == '__main__':
126 | main()
127 |
--------------------------------------------------------------------------------
/tools/fio_objectstore_tools/analyze.py:
--------------------------------------------------------------------------------
1 | #!env python3
2 |
3 | import json
4 | import os
5 | import sys
6 | import itertools
7 | import argparse
8 | import sys
9 | import subprocess
10 | import numpy as np
11 |
12 | from summarize import dump_target, generate_summary
13 | from traces import open_trace, iterate_structured_trace
14 | from graph import graph, Scatter, Histogram
15 |
16 | parser = argparse.ArgumentParser()
17 | parser.add_argument(
18 | 'target', metavar='T', type=str, help='target results directory')
19 | parser.add_argument(
20 | '--match', type=str, help='json for matching', default='{}')
21 | parser.add_argument(
22 | '--output', type=str, help='output directory')
23 | parser.add_argument(
24 | '--generate-graphs', action='store_true', help='generate graphs')
25 | parser.add_argument(
26 | '--detailed', action='store_true',
27 | help='generate more detailed graphs')
28 | parser.add_argument(
29 | '--drop-first', type=float,
30 | help='drop', default=10.0)
31 | parser.add_argument(
32 | '--drop-after', type=float,
33 | help='drop')
34 | parser.add_argument(
35 | '--filter-latency-above', type=float,
36 | help='filter out latency above given percentile')
37 | parser.add_argument(
38 | '--filter-latency-below', type=float,
39 | help='filter out latency below given percentile')
40 |
41 |
42 | def get_targets(directory):
43 | contents = os.listdir(directory)
44 | if 'ceph.conf' in contents:
45 | return [(os.path.basename(directory), directory)]
46 | else:
47 | return [(x, os.path.join(directory, x)) for x in contents]
48 |
49 |
50 | args = parser.parse_args()
51 |
52 | match = json.loads(args.match)
53 | targets = get_targets(args.target)
54 | projected = [dump_target(name, target) for name, target in targets]
55 |
56 | def do_filter(match, input):
57 | def cond(x):
58 | return all(x[1]['config'].get(k) == v for k, v in list(match.items()))
59 | return list(filter(cond, input))
60 |
61 | filtered_targets, filtered = list(zip(*do_filter(match, list(zip(targets, projected)))))
62 |
63 | summary = generate_summary(filtered, match)
64 |
65 | graph_filename = lambda x: None
66 | if args.output:
67 | subprocess.run(['mkdir', '-p', args.output], check=False)
68 | graph_filename = lambda x: os.path.join(args.output, x + '.png')
69 |
70 | def do_mask(above, below):
71 | def f(lat):
72 | l, u = np.percentile(
73 | lat,
74 | [below if below else 0.0,
75 | above if above else 100.0],
76 | interpolation='linear')
77 |
78 | return (lat > l) & (lat < u)
79 | return f
80 |
81 | masker = None
82 | mask_params = None
83 |
84 | if args.filter_latency_above or args.filter_latency_below:
85 | mask_params = ['latency']
86 | masker = do_mask(args.filter_latency_above, args.filter_latency_below)
87 |
88 | TO_GRAPH_SMALL = [
89 | [Scatter(x, 'commit_latency_no_throttle', ymax=0.99) for x in
90 | ['current_kv_throttle_cost', 'current_deferred_throttle_cost']],
91 | [Scatter(x, 'throughput', ymax=0.99) for x in
92 | ['current_kv_throttle_cost', 'current_deferred_throttle_cost']]
93 | ]
94 |
95 | TO_GRAPH_LARGE = [
96 | [Scatter(*x, ymax=0.99) for x in
97 | [('current_kv_throttle_cost', 'commit_latency_no_throttle'),
98 | ('current_deferred_throttle_cost', 'commit_latency_no_throttle'),
99 | ('total_throttle', 'commit_latency_no_throttle')]],
100 | [Scatter(*x, ymax=0.99) for x in
101 | [('current_kv_throttle_cost', 'throughput'),
102 | ('current_deferred_throttle_cost', 'throughput'),
103 | ('total_throttle', 'throughput')]],
104 | [Histogram(x) for x in
105 | ['current_kv_throttle_cost', 'current_deferred_throttle_cost', 'total_throttle']]
106 | ]
107 |
108 | if args.generate_graphs:
109 | for name, path in filtered_targets:
110 | print("Generating graph for {}, path: {}".format(name, path))
111 | events = iterate_structured_trace(open_trace(path))
112 | if args.drop_first:
113 | events = itertools.dropwhile(
114 | lambda x: x.get_start() < args.drop_first, events)
115 | if args.drop_after:
116 | events = itertools.takewhile(
117 | lambda x: x.get_start() < args.drop_after, events)
118 |
119 | graph(
120 | events, name, graph_filename(name),
121 | TO_GRAPH_LARGE if args.detailed else TO_GRAPH_SMALL,
122 | mask_params, masker)
123 |
124 | json.dump(summary, sys.stdout, sort_keys=True, indent=2)
125 |
--------------------------------------------------------------------------------
/tools/makecephconf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 |
3 | import argparse
4 | import os
5 | import subprocess
6 | import sys
7 | import yaml
8 | import time
9 | import copy
10 |
11 | def read_config(config_file):
12 | config = {}
13 | try:
14 | with file(config_file) as f:
15 | g = yaml.safe_load_all(f)
16 | for new in g:
17 | config.update(new)
18 | except IOError as e:
19 | raise argparse.ArgumentTypeError(str(e))
20 | return config
21 |
22 | def parse_args():
23 | parser = argparse.ArgumentParser(description='Continuously run ceph tests.')
24 | parser.add_argument(
25 | '--target',
26 | required = True,
27 | help = 'Directory where the config files should go.',
28 | )
29 | parser.add_argument(
30 | 'config_file',
31 | help = 'YAML config file.',
32 | )
33 | args = parser.parse_args()
34 | return args
35 |
36 | def populate(l, name, value):
37 | name = name.replace("_", " ")
38 | l.append(" %s = %s" % (name, value))
39 |
40 | def mkosds(lists, yaml):
41 | i = 0
42 | for server in yaml.get('osd_servers', []):
43 | for j in range(0, yaml.get('osds_per_server', 0)):
44 | name = "osd.%d" % i
45 | lists[name] = []
46 | lists[name].append(" host = %s" % server)
47 | lists[name].append(" osd data = /srv/osd-device-%d-data" % j)
48 | lists[name].append(" osd journal = /srv/osd-device-%d-data/journal" % j)
49 | # lists[name].append(" osd journal = /dev/disk/by-partlabel/osd-device-%d-journal" % j)
50 | i += 1
51 |
52 | def writescript(f, param, value, conf):
53 | for fs,rtconf in sorted(runtests_conf.items()):
54 | pdir = param
55 | if value:
56 | pdir = "%s_%s" % (param, value)
57 | f.write("%s --conf %s --archive %s/%s/%s %s\n" % (runtests_exec, conf, outdir, fs, pdir, rtconf))
58 |
59 | def parametric(lists, yaml):
60 | if "global" not in lists:
61 | lists["global"] = []
62 | scriptname = "%s/runme.sh" % target
63 | f = open(scriptname,'w')
64 | f.write("#!/bin/bash\n")
65 |
66 | # the default
67 | filename = "%s/default.ceph.conf" % target
68 | writefile(lists, filename)
69 | writescript(f, "default", "", filename)
70 |
71 | for param,value in sorted(yaml.items()):
72 | if (isinstance(value, dict)):
73 | lc = copy.deepcopy(lists)
74 | for k,v in sorted(value.items()):
75 | populate(lc.get("global"), k, v)
76 | filename = "%s/%s.ceph.conf" % (target, param)
77 | writefile(lc, filename)
78 | writescript(f, param, "", filename)
79 | elif (isinstance(value, list)):
80 | for vi in value:
81 | lc = copy.deepcopy(lists)
82 | populate(lc.get("global"), param, vi)
83 | filename = "%s/%s_%s.ceph.conf" % (target, param, vi)
84 | writefile(lc, filename)
85 | writescript(f, param, vi, filename)
86 | else:
87 | lc = copy.deepcopy(lists)
88 | populate(lc.get("global"), param, value)
89 | filename = "%s/%s_%s.ceph.conf" % (target, param, value)
90 | writefile(lc, filename)
91 | writescript(f, param, value, filename)
92 | f.close()
93 | os.chmod(scriptname, 0o755)
94 |
95 | def writefile(lists, out):
96 | f = open(out,'w')
97 | # print out
98 | for k,v in sorted(lists.items()):
99 | f.write("[%s]\n" % k)
100 | for line in v: f.write("%s\n" % line)
101 | f.write("\n")
102 | f.close()
103 |
104 | target = ""
105 | outdir = ""
106 | runtests_exec = ""
107 | runtests_conf = {}
108 |
109 | if __name__ == '__main__':
110 | ctx = parse_args()
111 | config = read_config(ctx.config_file)
112 |
113 | target = os.path.abspath(ctx.target)
114 | os.system("mkdir -p -m0755 -- %s" % target)
115 |
116 | settings = config.get("settings", {})
117 | runtests_exec = settings.get("runtests_exec", "")
118 | runtests_conf = settings.get("runtests_conf", {})
119 | outdir = settings.get("outdir", "")
120 |
121 | default = config.get("default", {})
122 | lists = {}
123 | for section in default:
124 | lists[section] = []
125 | for k,v in default.get(section).items():
126 | populate(lists.get(section), k, v)
127 | mkosds(lists, config.get("settings", {}))
128 | parametric(lists, config.get("parametric", {}))
129 |
--------------------------------------------------------------------------------
/tools/fio_objectstore_tools/summarize.py:
--------------------------------------------------------------------------------
1 | #!env python3
2 |
3 | import os
4 | import json
5 | from run import get_fio_output, get_base_config
6 |
7 | def populate_args(parser):
8 | parser.add_argument('target', metavar='T', type=str, help='target results directory')
9 | parser.add_argument('--match', type=str, help='json for matching', default='{}')
10 | parser.add_argument('--output', type=str, help='output directory')
11 | parser.add_argument('--generate-graphs', type=bool, help='generate graphs')
12 | parser.set_default(func=summarize)
13 |
14 | def project(name, config, fio_stats, perf_stats):
15 | def f(op):
16 | return {
17 | 'iops_min': op['iops_min'],
18 | 'iops_max': op['iops_max'],
19 | 'iops': op['iops'],
20 | 'clat_min_ns': op['clat_ns']['min'],
21 | 'clat_max_ns': op['clat_ns']['max'],
22 | 'clat_mean_ns': op['clat_ns']['mean'],
23 | 'clat_median_ns': op['clat_ns']['percentile']['50.000000'],
24 | 'clat_99.9_ns': op['clat_ns']['percentile']['99.900000'],
25 | 'slat_min_ns': op['slat_ns']['min'],
26 | 'slat_max_ns': op['slat_ns']['max'],
27 | 'slat_mean_ns': op['slat_ns']['mean'],
28 | }
29 | fio = dict(((op, f(fio_stats['jobs'][0][op])) for op in ['read', 'write']))
30 |
31 | wanted_perf = [
32 | 'commit_lat',
33 | 'kv_commit_lat',
34 | 'kv_final_lat',
35 | 'kv_flush_lat',
36 | 'kv_sync_lat',
37 | 'state_deferred_aio_wait_lat',
38 | 'state_deferred_cleanup_lat',
39 | 'state_deferred_queued_lat',
40 | 'state_kv_committing_lat'
41 | ]
42 |
43 | perf = {
44 | k: v['avgtime'] for k, v in
45 | [x for x in list(perf_stats['perfcounter_collection']['bluestore'].items()) if '_lat' in x[0]]
46 | }
47 |
48 | return {
49 | 'fio': fio,
50 | 'config': config,
51 | 'name': name,
52 | 'perf': perf,
53 | }
54 |
55 | def dump_target(name, directory):
56 | fio_output = {}
57 | with open(get_fio_output(directory)) as f:
58 | decoder = json.JSONDecoder()
59 | fio_output, _ = decoder.raw_decode(f.read())
60 | #fio_output = json.load(f)
61 | perf_output = {}
62 | with open(os.path.join(directory, 'perf_counters.json')) as f:
63 | perf_output = json.load(f)
64 | with open(get_base_config(directory)) as f:
65 | base_config = json.load(f)
66 | return project(name, base_config, fio_output, perf_output)
67 |
68 | def generate_summary(filtered, match):
69 | def config_to_frozen(config, match):
70 | ret = dict([x for x in list(config.items()) if x[0] not in match])
71 | if 'run' in ret:
72 | del ret['run']
73 | return frozenset(sorted(ret.items()))
74 |
75 | def group_by_config(input):
76 | grouped = {}
77 | for run in filtered:
78 | key = config_to_frozen(run['config'], match)
79 | if key not in grouped:
80 | grouped[key] = []
81 | grouped[key].append(run)
82 | return [{'config': dict(list(k)), 'runs': v} for k, v in list(grouped.items())]
83 |
84 | grouped = group_by_config(filtered)
85 |
86 | def union_top_n(group):
87 | ret = set()
88 | for run in group:
89 | ret = ret.union(
90 | [k for v, k in sorted(((a, b) for b, a in list(run['perf'].items())))][::-1][:5]
91 | )
92 | return ret
93 |
94 | def project_run(perfs):
95 | def ret(run):
96 | return {
97 | 'tp': run['fio']['write']['iops'],
98 | 'lat': run['fio']['write']['clat_mean_ns'] / 1000000000.0,
99 | 'slat': run['fio']['write']['slat_mean_ns'] / 1000000000.0,
100 | 'perf': dict([x for x in list(run['perf'].items()) if x[0] in perfs])
101 | }
102 | return ret
103 |
104 | def sort_by(f, input):
105 | return [v for (_, _, v) in sorted([(f(x[0]), x[1], x[0]) for x in zip(input, list(range(len(input))))])]
106 |
107 | def project_group(group):
108 | perfs = union_top_n(group['runs'])
109 | return {
110 | 'config': group['config'],
111 | 'runs': sort_by(
112 | lambda x: x['tp'],
113 | list(map(project_run(perfs), group['runs'])))
114 | }
115 |
116 | return sort_by(
117 | lambda x: (x['config'].get('bs', 0), x['config'].get('size', 0)),
118 | list(map(project_group, grouped)))
119 |
--------------------------------------------------------------------------------
/tools/generate_performance_report.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env -S python3 -B
2 | """
3 | A script to automatically generate a report from a set of performance run data
4 | in the common intermediate format described in CBT PR 319.
5 | The archive should contain the 'visualisation' sub directory where all
6 | the .json and plot files reside.
7 |
8 | tools/fio_common_output_wrapper.py will generate the .json files and the SimplePlotter
9 | module in CBT PR 321 can be used to generate the plot files
10 |
11 | Usage:
12 | generate_performance_report.py --archive=
13 | --output_directory=
14 | --create_pdf
15 |
16 |
17 | Input:
18 | --output_directory [Required] The directory to write the comparison plot
19 | to. If this does not exists it will be created.
20 |
21 | --archive [Required] The directory that contains the common
22 | format .json files and plot files to include
23 | in the report.
24 |
25 | --create_pdf [Optional] Create a pdf file of the report markdown
26 | file.
27 | This requires pandoc to be installed,
28 | and be on the path.
29 |
30 | Examples:
31 |
32 | Generate a markdown report file for the results in '/tmp/squid_main' directory
33 | ans sabve it in the '/tmp/main_results' directory:
34 |
35 | generate_performance_report.py --archive=/tmp/squid_main
36 | --output_directory =/tmp/main_results
37 |
38 | Additionally generate a pdf report file for the example above:
39 |
40 | generate_performance_report.py --archive=/tmp/squid_main
41 | --output_directory =/tmp/main_results
42 | --create_pdf
43 | """
44 |
45 | import os
46 | import subprocess
47 | from argparse import ArgumentParser, Namespace
48 | from logging import INFO, Logger, basicConfig, getLogger
49 |
50 | from post_processing.reports.simple_report_generator import SimpleReportGenerator
51 |
52 | log: Logger = getLogger(f"{os.path.basename(__file__)}")
53 |
54 |
55 | def main() -> int:
56 | """
57 | Main routine for the script
58 | """
59 |
60 | result: int = 0
61 |
62 | description: str = "Produces a performance report in markdown format \n"
63 | description += "from the json and png files stored in the visualisation\n"
64 | description += "subdirectory of the directory given by --archive\n"
65 | description += "The resulting report(s) are saved in the specified output directory.\n"
66 | description += "The json files must be in the correct format, as described by CBT PR 319\n"
67 | description += "(https://github.com/ceph/cbt/pull/319)"
68 |
69 | parser: ArgumentParser = ArgumentParser(description=description)
70 |
71 | parser.add_argument(
72 | "--output_directory",
73 | type=str,
74 | required=True,
75 | help="The directory to store the comparison plot file(s)",
76 | )
77 | parser.add_argument(
78 | "--archive",
79 | type=str,
80 | required=False,
81 | help="The directory that contains the set of json results files and generated plot files"
82 | + "for a particular test run",
83 | )
84 | parser.add_argument(
85 | "--create_pdf",
86 | action="store_true",
87 | help="Generate a pdf report file in addition to the markdown report",
88 | )
89 |
90 | arguments: Namespace = parser.parse_args()
91 |
92 | # will only create the output directory if it does not already exist
93 | subprocess.run(f"mkdir -p -m0755 {arguments.output_directory}", shell=True)
94 |
95 | report_generator = SimpleReportGenerator(
96 | archive_directories=arguments.archive, output_directory=arguments.output_directory
97 | )
98 |
99 | try:
100 | report_generator.create_report()
101 |
102 | if arguments.create_pdf:
103 | report_generator.save_as_pdf()
104 |
105 | except Exception:
106 | log.exception("Encountered an error plotting results")
107 | result = 1
108 |
109 | return result
110 |
111 |
112 | def initialise_logging() -> None:
113 | """
114 | Set up the logging for the sub-modules
115 | """
116 | basicConfig(level=INFO, format="%(name)-20s: %(levelname)-8s %(message)s")
117 |
118 |
119 | if __name__ == "__main__":
120 | initialise_logging()
121 | main()
122 |
--------------------------------------------------------------------------------
/tests/test_bm_nullbench.py:
--------------------------------------------------------------------------------
1 | """ Unit tests for the Benchmarknullbench class """
2 |
3 | import unittest
4 | import hashlib
5 | import json
6 | import benchmarkfactory
7 | import settings
8 | from cluster.ceph import Ceph
9 |
10 |
11 | class TestBenchmarknullbench(unittest.TestCase):
12 | """ Sanity tests for Benchmarknullbench """
13 | archive_dir = "/tmp"
14 | iteration = {'acceptable': [1,2,3], 'iteration': 0}
15 | cluster = {}
16 | cl_name = "tools/invariant.yaml"
17 | bl_name = "tools/baseline.json"
18 | bl_json = {}
19 | bl_md5 = 'e6b6fcd2be74bd08939c64a249ab2125'
20 | md5_returned = None
21 |
22 | @classmethod
23 | def setUpClass(cls):
24 | with open(cls.bl_name, 'rb') as f:
25 | data = f.read()
26 | f.close()
27 | cls.md5_returned = hashlib.md5(data).hexdigest()
28 | settings.mock_initialize(config_file=cls.cl_name)
29 | cls.cluster = Ceph.mockinit(settings.cluster)
30 | with open(cls.bl_name, 'r') as f:
31 | cls.bl_json = json.load(f)
32 | f.close()
33 |
34 | @classmethod
35 | def tearDownClass(cls):
36 | cls.cluster = None
37 | cls.bl_json = None
38 |
39 | def test_valid_baseline(self):
40 | """ Verify the baseline has not been compromised """
41 | self.assertEqual( self.bl_md5, str(self.md5_returned) )
42 |
43 | def test_valid_archive_dir(self):
44 | """ Basic sanity attribute identity archive_dir check"""
45 | b = benchmarkfactory.get_object(self.archive_dir,
46 | self.cluster, 'nullbench', self.iteration)
47 | self.assertEqual(self.bl_json['nullbench']['archive_dir'], b.__dict__['archive_dir'])
48 |
49 | def test_valid_cmd_path(self):
50 | """ Basic sanity attribute identity cmd_path check"""
51 | b = benchmarkfactory.get_object(self.archive_dir,
52 | self.cluster, 'nullbench', self.iteration)
53 | self.assertEqual(self.bl_json['nullbench']['cmd_path'], b.__dict__['cmd_path'])
54 |
55 | def test_valid_cmd_path_full(self):
56 | """ Basic sanity attribute identity cmd_path_full check"""
57 | b = benchmarkfactory.get_object(self.archive_dir,
58 | self.cluster, 'nullbench', self.iteration)
59 | self.assertEqual(self.bl_json['nullbench']['cmd_path_full'], b.__dict__['cmd_path_full'])
60 |
61 | def test_valid_config(self):
62 | """ Basic sanity attribute identity config check"""
63 | b = benchmarkfactory.get_object(self.archive_dir,
64 | self.cluster, 'nullbench', self.iteration)
65 | self.assertEqual(self.bl_json['nullbench']['config'], b.__dict__['config'])
66 |
67 | def test_valid_log_bw(self):
68 | """ Basic sanity attribute identity log_bw check"""
69 | b = benchmarkfactory.get_object(self.archive_dir,
70 | self.cluster, 'nullbench', self.iteration)
71 | self.assertEqual(self.bl_json['nullbench']['log_bw'], b.__dict__['log_bw'])
72 |
73 | def test_valid_log_iops(self):
74 | """ Basic sanity attribute identity log_iops check"""
75 | b = benchmarkfactory.get_object(self.archive_dir,
76 | self.cluster, 'nullbench', self.iteration)
77 | self.assertEqual(self.bl_json['nullbench']['log_iops'], b.__dict__['log_iops'])
78 |
79 | def test_valid_log_lat(self):
80 | """ Basic sanity attribute identity log_lat check"""
81 | b = benchmarkfactory.get_object(self.archive_dir,
82 | self.cluster, 'nullbench', self.iteration)
83 | self.assertEqual(self.bl_json['nullbench']['log_lat'], b.__dict__['log_lat'])
84 |
85 | def test_valid_osd_ra(self):
86 | """ Basic sanity attribute identity osd_ra check"""
87 | b = benchmarkfactory.get_object(self.archive_dir,
88 | self.cluster, 'nullbench', self.iteration)
89 | self.assertEqual(self.bl_json['nullbench']['osd_ra'], b.__dict__['osd_ra'])
90 |
91 | def test_valid_osd_ra_changed(self):
92 | """ Basic sanity attribute identity osd_ra_changed check"""
93 | b = benchmarkfactory.get_object(self.archive_dir,
94 | self.cluster, 'nullbench', self.iteration)
95 | self.assertEqual(self.bl_json['nullbench']['osd_ra_changed'], b.__dict__['osd_ra_changed'])
96 |
97 | def test_valid_run_dir(self):
98 | """ Basic sanity attribute identity run_dir check"""
99 | b = benchmarkfactory.get_object(self.archive_dir,
100 | self.cluster, 'nullbench', self.iteration)
101 | self.assertEqual(self.bl_json['nullbench']['run_dir'], b.__dict__['run_dir'])
102 |
103 | def test_valid_valgrind(self):
104 | """ Basic sanity attribute identity valgrind check"""
105 | b = benchmarkfactory.get_object(self.archive_dir,
106 | self.cluster, 'nullbench', self.iteration)
107 | self.assertEqual(self.bl_json['nullbench']['valgrind'], b.__dict__['valgrind'])
108 |
109 | if __name__ == '__main__':
110 | unittest.main()
111 |
--------------------------------------------------------------------------------
/tools/generate_comparison_performance_report.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env -S python3 -B
2 | """
3 | A script to automatically generate a report from a set of performance run data
4 | in the common intermediate format described in CBT PR 319.
5 | The archive should contain the 'visualisation' sub directory where all
6 | the .json and plot files reside.
7 |
8 | tools/fio_common_output_wrapper.py will generate the .json files and the SimplePlotter
9 | module in CBT PR 321 can be used to generate the plot files
10 |
11 | Usage:
12 | generate_comparison_performance_report.py
13 | --baseline=
14 | --archives=
15 | --output_directory=
16 | --create_pdf
17 |
18 |
19 | Input:
20 | --output_directory [Required] The directory to write the comparison plot
21 | to. If this does not exists it will be created.
22 |
23 | --baseline [Required] The directory containing the common
24 | format .json files to use as the baseline
25 | for the report
26 |
27 | --archives [Required] The directories that contain the common
28 | format .json files to compare to the baseline
29 |
30 | --create_pdf [Optional] Create a pdf file of the report markdown
31 | file.
32 | This requires pandoc to be installed,
33 | and be on the path.
34 |
35 | Examples:
36 |
37 | Generate a markdown report file for the results in '/tmp/squid_main' directory
38 | ans sabve it in the '/tmp/main_results' directory:
39 |
40 | generate_comparison_performance_report.py --baseline=/tmp/squid_main
41 | --archives=/tmp/my_build
42 | --output_directory =/tmp/main_results
43 |
44 | Additionally generate a pdf report file for the example above:
45 |
46 | generate_comparison_performance_report.py --baseline=/tmp/squid_main
47 | --archive=/tmp/squid_main
48 | --output_directory =/tmp/main_results
49 | --create_pdf
50 | """
51 |
52 | import os
53 | import subprocess
54 | from argparse import ArgumentParser, Namespace
55 | from logging import INFO, Logger, basicConfig, getLogger
56 |
57 | from post_processing.reports.comparison_report_generator import ComparisonReportGenerator
58 |
59 | log: Logger = getLogger(f"{os.path.basename(__file__)}")
60 |
61 |
62 | def main() -> int:
63 | """
64 | Main routine for the script
65 | """
66 |
67 | result: int = 0
68 |
69 | description: str = "Produces a performance report in markdown format \n"
70 | description += "from the json and png files stored in the visualisation\n"
71 | description += "subdirectory of the directory given by --archive\n"
72 | description += "The resulting report(s) are saved in the specified output directory.\n"
73 | description += "The json files must be in the correct format, as described by CBT PR 319\n"
74 | description += "(https://github.com/ceph/cbt/pull/319)"
75 |
76 | parser: ArgumentParser = ArgumentParser(description=description)
77 |
78 | parser.add_argument(
79 | "--output_directory",
80 | type=str,
81 | required=True,
82 | help="The directory to store the comparison plot file(s)",
83 | )
84 | parser.add_argument(
85 | "--baseline",
86 | type=str,
87 | required=True,
88 | help="The full path to the directory that contain the set "
89 | + "of json results files to be used as the baseline for this "
90 | + "comparion",
91 | )
92 | parser.add_argument(
93 | "--archives",
94 | type=str,
95 | required=True,
96 | help="A comma separated list of the directories that contain the set "
97 | + "of json results files to be compared to the baseline",
98 | )
99 | parser.add_argument(
100 | "--create_pdf",
101 | action="store_true",
102 | help="Generate a pdf report file in addition to the markdown report",
103 | )
104 |
105 | arguments: Namespace = parser.parse_args()
106 |
107 | # will only create the output directory if it does not already exist
108 | subprocess.run(f"mkdir -p -m0755 {arguments.output_directory}", shell=True)
109 |
110 | report_generator = ComparisonReportGenerator(
111 | archive_directories=f"{arguments.baseline},{arguments.archives}", output_directory=arguments.output_directory
112 | )
113 |
114 | try:
115 | report_generator.create_report()
116 |
117 | if arguments.create_pdf:
118 | report_generator.save_as_pdf()
119 |
120 | except Exception:
121 | log.exception("Encountered an error creating the report")
122 | result = 1
123 |
124 | return result
125 |
126 |
127 | def initialise_logging() -> None:
128 | """
129 | Set up the logging for the sub-modules
130 | """
131 | basicConfig(level=INFO, format="%(name)-20s: %(levelname)-8s %(message)s")
132 |
133 |
134 | if __name__ == "__main__":
135 | initialise_logging()
136 | main()
137 |
--------------------------------------------------------------------------------
/plot_results.py:
--------------------------------------------------------------------------------
1 | """
2 | A file containing the classes and code required to read a file stored in the common
3 | intermediate format introduced in PR 319 (https://github.com/ceph/cbt/pull/319) and produce a hockey-stick curve graph
4 | """
5 |
6 | import json
7 | from logging import Logger, getLogger
8 | from pathlib import Path
9 | from typing import Dict, List, Union
10 |
11 | import matplotlib.pyplot as plotter
12 |
13 | log: Logger = getLogger("cbt")
14 |
15 |
16 | class PlotResults:
17 | """
18 | Read the intermediate data file in the common json format and produce a hockey-stick
19 | curve plot that includes standard deviation error bars.
20 | """
21 |
22 | # A converted between the operation type in the intermediate file format
23 | # and a human-readable string that can be used in the title for the plot.
24 | TITLE_CONVERSION: Dict[str, str] = {
25 | "read": "Sequential Read",
26 | "write": "Sequential Write",
27 | "randread": "Random Read",
28 | "randwrite": "Random Write",
29 | "readwrite": "Sequential Read/Write",
30 | "randrw": "Random Read/Write",
31 | }
32 |
33 | def __init__(self, archive_directory: str) -> None:
34 | self._data_directory: str = f"{archive_directory}/visualisation"
35 |
36 | self._path: Path = Path(self._data_directory)
37 |
38 | def draw_and_save(self) -> None:
39 | """
40 | Produce the plot files for each of the intermediate data files in the given directory.
41 | """
42 |
43 | for file_path in self._path.glob("*.json"):
44 | file_data: Dict[str, Union[str, Dict[str, str]]] = self._read_intermediate_file(f"{file_path}")
45 | output_file: str = f"{str(file_path)[:-4]}png"
46 | plot_title: str = self._generate_plot_title(file_path.parts[-1])
47 |
48 | keys: List[str] = [key for key in file_data.keys() if isinstance(file_data[key], dict)]
49 | plot_data: Dict[str, Dict[str, str]] = {}
50 | sorted_plot_data: Dict[str, Dict[str, str]] = {}
51 | for key, data in file_data.items():
52 | if isinstance(data, dict):
53 | plot_data[key] = data
54 |
55 | sorted_keys: List[str] = sorted(keys, key=int)
56 | for key in sorted_keys:
57 | sorted_plot_data[key] = plot_data[key]
58 |
59 | x_axis: List[Union[int, float]] = []
60 | y_axis: List[Union[int, float]] = []
61 | error_bars: List[float] = []
62 |
63 | log.info("converting file %s", f"{file_path}")
64 |
65 | for _, data in sorted_plot_data.items():
66 | # for blocksize less than 64K we want to use the bandwidth to plot the graphs,
67 | # otherwise we should use iops.
68 | blocksize: int = int(int(data["blocksize"]) / 1024)
69 | if blocksize < 64:
70 | # convert bytes to Mb, not Mib, so use 1000s rather than 1024
71 | x_axis.append(float(data["bandwidth_bytes"]) / (1000 * 1000))
72 | plotter.xlabel("Bandwidth (MB)") # pyright: ignore[reportUnknownMemberType]
73 | else:
74 | x_axis.append(float(data["iops"]))
75 | plotter.xlabel("IOps") # pyright: ignore[reportUnknownMemberType]
76 | # The stored values are in ns, we want to convert to ms
77 | y_axis.append(float(data["latency"]) / (1000 * 1000))
78 | plotter.ylabel("Latency (ms)") # pyright: ignore[reportUnknownMemberType]
79 | error_bars.append(float(data["std_deviation"]) / (1000 * 1000))
80 |
81 | plotter.title(plot_title) # pyright: ignore[reportUnknownMemberType]
82 | plotter.errorbar(x_axis, y_axis, error_bars, capsize=3, ecolor="red") # pyright: ignore[reportUnknownMemberType]
83 | plotter.savefig(output_file, format="png") # pyright: ignore[reportUnknownMemberType]
84 | # Now we have saved the file, clear the plot for the next file
85 | plotter.clf()
86 |
87 | def _read_intermediate_file(self, file_path: str) -> Dict[str, Union[str, Dict[str, str]]]:
88 | """
89 | Read the json data from the intermediate file and store it for processing.
90 | """
91 | data: Dict[str, Union[str, Dict[str, str]]] = {}
92 | # We know the file encoding as we wrote it ourselves as part of
93 | # common_output_format.py, so it is safe to specify here
94 | with open(f"{file_path}", "r", encoding="utf8") as file_data:
95 | data = json.load(file_data)
96 |
97 | return data
98 |
99 | def _generate_plot_title(self, source_file: str) -> str:
100 | """
101 | Given the Path object for the input file, generate the title for the
102 | data plot
103 | """
104 | # Strip the .json from the file name as we don't need it
105 | title_with_underscores: str = f"{source_file[:-5]}"
106 | parts: List[str] = title_with_underscores.split("_")
107 |
108 | # The filename is in one of 2 formats:
109 | # BLOCKSIZE_OPERATION.json
110 | # BLOCKSIZE_READ_WRITE_OPERATION.json
111 | #
112 | # The split on _ will mean that the last element [-1] will always be
113 | # the operation, and the first part [0] will be the blocksize
114 | title: str = f"{int(int(parts[0][:-1]) / 1024)}K "
115 | if len(parts) > 2:
116 | title += f"{parts[1]}/{parts[2]} "
117 |
118 | title += f"{self.TITLE_CONVERSION[parts[-1]]}"
119 | return title
120 |
--------------------------------------------------------------------------------
/settings.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import yaml
3 | import sys
4 | import os
5 | import socket
6 | import logging
7 |
8 |
9 | logger = logging.getLogger("cbt")
10 |
11 | common = {}
12 | cluster = {}
13 | client_endpoints = {}
14 | benchmarks = {}
15 | monitoring_profiles = {}
16 |
17 |
18 | def _handle_monitoring_legacy():
19 | """
20 | Inject collectl even if the config says nothing about it to preserve
21 | compatibility with current CBT's configuration files.
22 | """
23 | global monitoring_profiles
24 | if 'collectl' not in monitoring_profiles:
25 | monitoring_profiles['collectl'] = {}
26 |
27 |
28 | def initialize(ctx):
29 | global common, cluster, client_endpoints, benchmarks, monitoring_profiles
30 |
31 | config = {}
32 | try:
33 | with open(ctx.config_file) as f:
34 | config = yaml.safe_load(f)
35 | except IOError as e:
36 | raise argparse.ArgumentTypeError(str(e))
37 |
38 | common = config.get('common', {})
39 | cluster = config.get('cluster', {})
40 | client_endpoints = config.get('client_endpoints', {})
41 | benchmarks = config.get('benchmarks', {})
42 | monitoring_profiles = config.get('monitoring_profiles', dict(collectl={}))
43 |
44 | if not cluster:
45 | shutdown('No cluster section found in config file, bailing.')
46 |
47 | if not benchmarks:
48 | shutdown('No benchmarks section found in config file, bailing.')
49 |
50 | # set the archive_dir from the commandline if present
51 | if ctx.archive:
52 | cluster['archive_dir'] = ctx.archive
53 | if 'archive_dir' not in cluster:
54 | shutdown('No archive dir has been set.')
55 |
56 | _handle_monitoring_legacy()
57 |
58 | # store cbt configuration in the archive directory
59 | cbt_results = os.path.join(cluster['archive_dir'], 'results')
60 | config_file = os.path.join(cbt_results, 'cbt_config.yaml')
61 | if not os.path.exists(cluster['archive_dir']):
62 | os.makedirs(cluster['archive_dir'])
63 | if not os.path.exists(cbt_results):
64 | os.makedirs(cbt_results)
65 | if not os.path.exists(config_file):
66 | config_dict = dict(cluster=cluster, benchmarks=benchmarks, monitoring_profiles=monitoring_profiles)
67 | with open(config_file, 'w') as fd:
68 | yaml.dump(config_dict, fd, default_flow_style=False)
69 |
70 | # set the tmp_dir if not set.
71 | if 'tmp_dir' not in cluster:
72 | cluster['tmp_dir'] = '/tmp/cbt.%s' % os.getpid()
73 |
74 | # set the ceph.conf file from the commandline if present
75 | if ctx.conf:
76 | cluster['conf_file'] = ctx.conf
77 | # If no conf file is set, default to /etc/ceph/ceph.conf
78 | # FIXME: We shouldn't have cluster specific defaults in settings.
79 | # Eventually make a base class with specific cluster implementations.
80 | if 'conf_file' not in cluster:
81 | cluster['conf_file'] = '/etc/ceph/ceph.conf'
82 | try:
83 | f = open(cluster['conf_file'])
84 | f.close()
85 | except IOError as e:
86 | shutdown('Was not able to access conf file: %s' % cluster['conf_file'])
87 |
88 |
89 | def host_info(host):
90 | ret = {}
91 | user = cluster.get('user')
92 |
93 | if '@' in host:
94 | user, host = host.split('@')
95 | ret['user'] = user
96 | if ':' in host:
97 | host, port = host.split(':')
98 | ret['port'] = port
99 | if user:
100 | ret['user'] = user
101 | ret['host'] = host
102 | # Follow-up: add support for socket.getaddrinfo
103 | try:
104 | ret['addr'] = socket.gethostbyname(host)
105 | except socket.gaierror as e:
106 | shutdown(f'Was not able to gethostbyname: {host}')
107 | return ret
108 |
109 |
110 | def getnodes(*nodelists):
111 | nodes = []
112 |
113 | for nodelist in nodelists:
114 | cur = cluster.get(nodelist, [])
115 | if isinstance(cur, str):
116 | nodes.append(cur)
117 | elif isinstance(cur, dict):
118 | nodes.extend(list(cur.keys()))
119 | elif isinstance(cur, list):
120 | nodes.extend(cur)
121 | else:
122 | raise ValueError("Can't process nodes of type %s - unknown set type: %r",
123 | nodelist, cur)
124 |
125 | str_nodes = ','.join(uniquenodes(nodes))
126 | #logger.debug("Nodes : %s", str_nodes)
127 | return str_nodes
128 |
129 |
130 | def uniquenodes(nodes):
131 | unique = [node for node in nodes if node]
132 | ret = []
133 |
134 | for host in unique:
135 | info = host_info(host)
136 | host_str = info['host']
137 | if 'user' in info:
138 | host_str = "%s@%s" % (info['user'], host_str)
139 | ret.append(host_str)
140 | return set(ret)
141 |
142 |
143 | def shutdown(message):
144 | sys.exit(message)
145 |
146 |
147 | def mock_initialize(config_file="tools/invariant.yaml"):
148 | """ Auxiliary method only to be used from serialise_benchmarks.py"""
149 | global common, cluster, client_endpoints, benchmarks, monitoring_profiles
150 | config = {}
151 | try:
152 | with open(config_file) as f:
153 | config = yaml.safe_load(f)
154 | except IOError as e:
155 | raise argparse.ArgumentTypeError(str(e))
156 |
157 | common = config.get('common', {})
158 | cluster = config.get('cluster', {})
159 | client_endpoints = config.get('client_endpoints', {})
160 | benchmarks = config.get('benchmarks', {})
161 | monitoring_profiles = config.get('monitoring_profiles', dict(collectl={}))
162 | # Set some defaults required
163 | cluster['tmp_dir'] = '/tmp/cbt.XYZ'
164 | cluster['osd_ra'] = '0'
165 |
--------------------------------------------------------------------------------
/tools/fio_visualize_data/fio-plot-stats-usage.rst:
--------------------------------------------------------------------------------
1 | ====================
2 | Visualize Fio Output
3 | ====================
4 |
5 | Motivation
6 | ==========
7 |
8 | Fio generates quite a bit of output that is sometimes hard to decipher
9 | and understand. This problem is exacerbated further if one is running
10 | multiple tests with different ceph options to tune ceph performance.
11 | It would be good to have a tool that decodes the data from the log files
12 | created by Fio and generate meaningful graphs that provide insight into
13 | ceph performance.
14 |
15 | The attempt here is to start with some basic scripts that parse Fio
16 | output and generate plots like average client latencies and completion
17 | latency percentiles.
18 |
19 | Going further the idea is to enhance the scripts to generate more meaningful
20 | graphs, tighter integration with cbt to generate graphs via yaml
21 | specification as part of the test itself.
22 |
23 | Usage
24 | =====
25 | .. code-block:: console
26 |
27 |
28 | $ ./fio-plot-stats.py -h
29 | usage: fio-plot-stats.py [-h] -f {json,csv} -s SRCDIR -d DESTDIR -o
30 | {read,write} -m {bw,lat,pct} [-i {pdf,png}]
31 | [-n FILENAME] [-r TIMERANGE TIMERANGE] [-p]
32 |
33 | Generate plots from fio output
34 |
35 | optional arguments:
36 | -h, --help show this help message and exit
37 | -f {json,csv}, --filetype {json,csv}
38 | type of file to parse
39 | -s SRCDIR, --source SRCDIR
40 | source directory containing fio output files
41 | -d DESTDIR, --destination DESTDIR
42 | destination directory to save generated plots
43 | -o {read,write}, --optype {read,write}
44 | plot read or write stats
45 | -m {bw,lat,pct}, --metric {bw,lat,pct}
46 | metric to analyze/plot
47 | -i {pdf,png}, --imgformat {pdf,png}
48 | plot image format
49 | -n FILENAME, --filename FILENAME
50 | source file containing CSV data to analyze/plot
51 | -r TIMERANGE TIMERANGE, --timerange TIMERANGE TIMERANGE
52 | time range to plot/calculate stats for CSV data
53 | -p, --subplot create a subplot with provided timerange
54 |
55 | Working Details
56 | ===============
57 | The input file format option ``-f/--filetype`` is mandatory. Depending on
58 | this, additonal options if preferred may be provided to override the default
59 | behavior. For JSON file type, the tool scans for the files in the source
60 | directory and generates graphs. For CSV file type, an additional
61 | parameter called ``-n/--filename`` needs to be specified.
62 |
63 | The option ``-o/--optype`` tells the script to scan read or write statistcs in
64 | the Fio files and generate the graphs.
65 |
66 | An additional artifact (apart from charts) of parsing JSON data is a
67 | CSV file containing the stats from the parsed files.
68 |
69 | NOTE: All fio files in the source directory having 'json' string in filename are
70 | treated as JSON files and are scanned automatically. Therefore, it is important
71 | to have 'json' string in the filename if JSON data is required to be visualized.
72 |
73 | Examples
74 | ========
75 | **Example 1**
76 |
77 | The following commands scan the source directory for files having
78 | string 'json' in the filenames and parses specfied stats (lat, bw or pct)
79 | from the files to generate comparison graphs in the destination folder:
80 |
81 | .. code-block:: console
82 |
83 | $python3 fio-plot-stats.py -s ~/cbt_logs/json_logs -f json -o write -d ~/cbt_logs/json_logs -m lat
84 | $python3 fio-plot-stats.py -s ~/cbt_logs/json_logs -f json -o write -d ~/cbt_logs/json_logs -m bw
85 | $python3 fio-plot-stats.py -s ~/cbt_logs/json_logs -f json -o write -d ~/cbt_logs/json_logs -m pct
86 |
87 | **Example 2**
88 |
89 | The following command uses the specified CSV file containing write latency
90 | stats generated by fio and generates a chart of latency distribution across
91 | the entire duration of the test:
92 |
93 | .. code-block:: console
94 |
95 | $python3 fio-plot-stats.py -f csv -s ~/cbt_logs -d ~/cbt_logs -o write -n wpq_clat_Run7 -m lat
96 |
97 | **Example 3**
98 |
99 | The following command is similar to Example 2, except that additionally a
100 | subplot is generated in the same chart showing the latency distribution
101 | in the specified time range:
102 |
103 | .. code-block:: console
104 |
105 | $python3 fio-plot-stats.py -f csv -s ~/cbt_logs -d ~/cbt_logs -o write -n wpq_clat_Run7 -m lat -r 0 160 -p
106 |
107 | Note that if the '-p/--subplot' option is not specified in example 3, a
108 | chart with a single graph is generated for the time range specified.
109 |
110 | Plots may be generated for 'bandwidth' metric by specifying 'bw' for the
111 | '-m' parameter in the above examples.
112 |
113 | Additionally, percentile data and charts may be generated by specifying 'pct'
114 | for the '-m' parameter. The raw clat latency data captured by fio must be
115 | provided as an input using the '-n' option. This uses pandas and numpy module
116 | to generate percentile table and charts for the average, 50th, 95th, 99th and
117 | 99.5th percentiles. Given a time range, samples are analyzed for each second in
118 | between and the above percentiles are saved into a new pandas dataframe. A csv
119 | file is generated in addition to the chart for the time range specified.
120 |
121 | NOTE: Logging the histogram data generated by fio and running the analysis on
122 | it would provide more accurate information about percentile distribution.
123 | Please see fio source repository for more information on this.
124 |
--------------------------------------------------------------------------------
/tools/config_wizard.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # NOTE: Be sure to run script on the main ceph monitor as the desired
3 | # CBT user if running the script automatically (-a).
4 |
5 | import argparse
6 | import os
7 | import socket
8 | import sys
9 |
10 | from config_class import Config, KvmRbdFio, Radosbench, RbdFio
11 |
12 | BENCHMARKS = ["radosbench", "kvmrbdfio", "rbdfio"]
13 | TMP_DIR = "/dev/null"
14 |
15 |
16 | def parse_arguments():
17 | parser = argparse.ArgumentParser()
18 | parser.add_argument("-a", "--automate", help="Automatically create a config"
19 | " file with default values for"
20 | " Radosbench, RBDFIO and "
21 | "KVMRBDFIO.",
22 | action="store_true")
23 | parser.add_argument("-o", "--output_file", help="Specify filename for "
24 | "output config file. "
25 | "Defaults to 'cbt_config"
26 | ".xfs.yaml'", type=str,
27 | nargs="?",
28 | default="cbt_config"
29 | ".xfs.yaml")
30 | return parser.parse_args()
31 |
32 |
33 | def get_hosts(auto):
34 | if auto:
35 | clients = []
36 | monitor = os.popen("hostname -s").read().rstrip()
37 | hosts = os.popen("ceph osd tree | grep host").read().split("\n")
38 | for host in hosts:
39 | if host != "":
40 | clients.append(host.rstrip().split(" ")[-1])
41 | return (monitor, clients)
42 |
43 | try:
44 | monitor = input("Enter the hostname of the monitor: ")
45 | clients = input("Enter the hostname(s) of the OSD(s) seperated by"
46 | " comma: ").replace(" ", "").split(",")
47 | except KeyboardInterrupt:
48 | print("Aborting script. No data will be saved.")
49 | sys.exit(1)
50 | return (monitor, clients)
51 |
52 |
53 | def get_user(auto):
54 | if auto:
55 | return os.getlogin()
56 |
57 | try:
58 | user = input("Enter the username for CBT: ")
59 | except KeyboardInterrupt:
60 | print("Aborting script. No data will be saved.")
61 | sys.exit(1)
62 | return user
63 |
64 |
65 | def get_tmp_dir(auto):
66 | if auto:
67 | return TMP_DIR
68 |
69 | try:
70 | directory = input("Enter the temporary directory for CBT results: ")
71 | except KeyboardInterrupt:
72 | print("Aborting script. No data will be saved.")
73 | sys.exit(1)
74 | return directory
75 |
76 |
77 | def select_tests():
78 | while True:
79 | valid = True
80 | print("Which of the following tests would you like to run?\nradosbench"\
81 | ", kvmrbdfio, rbdfio")
82 | try:
83 | tests = input("Enter the test names seperated by comma: ")
84 | tests = tests.replace(" ", "").split(",")
85 | except KeyboardInterrupt:
86 | print("Aborting script. No data will be saved.")
87 | sys.exit(1)
88 | for test in tests:
89 | if test.lower() not in BENCHMARKS:
90 | print("Unknown test: %s" % (test))
91 | print("Please specify only valid tests from the list above\n")
92 | valid = False
93 | break
94 | if valid:
95 | return [x.lower() for x in tests]
96 |
97 |
98 | def generate_test_values(test, default, config):
99 | if test == "rbdfio":
100 | rbdfio = RbdFio(default, config)
101 | config.add_benchmark_settings(rbdfio.output)
102 | elif test == "kvmrbdfio":
103 | kvmrbdfio = KvmRbdFio(default, config)
104 | config.add_benchmark_settings(kvmrbdfio.output)
105 | else:
106 | radosbench = Radosbench(default, config)
107 | config.add_benchmark_settings(radosbench.output)
108 |
109 |
110 | def main():
111 | args = parse_arguments()
112 | hosts = get_hosts(args.automate)
113 | user = get_user(args.automate)
114 | tmp_dir = get_tmp_dir(args.automate)
115 | conf = Config(args.output_file, hosts, user, tmp_dir)
116 | if args.automate:
117 | rbdfio = RbdFio(True, conf)
118 | kvmrbdfio = KvmRbdFio(True, conf)
119 | radosbench = Radosbench(True, conf)
120 | conf.add_benchmark_settings(rbdfio.output)
121 | conf.add_benchmark_settings(kvmrbdfio.output)
122 | conf.add_benchmark_settings(radosbench.output)
123 | else:
124 | tests = select_tests()
125 | for test in tests:
126 | use_default = False
127 | print("\nEntering settings for %s:" % (test))
128 | while True:
129 | try:
130 | default = input("Would you like to use default"
131 | " settings for %s [y/n]? " % (test))
132 | except KeyboardInterrupt:
133 | print("Aborting script. No data will be saved.")
134 | sys.exit(1)
135 | if default.lower() == "y":
136 | print("Using default values for %s" % (test))
137 | use_default = True
138 | break
139 | elif default.lower() == "n":
140 | use_default = False
141 | break
142 | generate_test_values(test, use_default, conf)
143 | conf.save_file()
144 | print("Output saved to: %s" % (conf.out_file))
145 |
146 | if __name__ == "__main__":
147 | main()
148 |
--------------------------------------------------------------------------------
/benchmark/cephtestrados.py:
--------------------------------------------------------------------------------
1 | from .benchmark import Benchmark
2 | import common
3 | import settings
4 | import monitoring
5 | import os
6 | import time
7 | import logging
8 |
9 | logger = logging.getLogger('cbt')
10 |
11 |
12 | class CephTestRados(Benchmark):
13 |
14 | def __init__(self, archive_dir, cluster, config):
15 | super(CephTestRados, self).__init__(archive_dir, cluster, config)
16 |
17 | self.tmp_conf = self.cluster.tmp_conf
18 |
19 | self.bools = {}
20 | if config.get('ec_pool', False):
21 | self.bools['ec_pool'] = True
22 | if config.get('write_fadvise_dontneed', False):
23 | self.bools['write_fadvise_dontneed'] = True
24 | if config.get('pool_snaps', False):
25 | self.bools['pool_snaps'] = True
26 | if config.get('write_append_excl', True):
27 | self.bools['write_append_excl'] = True
28 |
29 | self.variables = {}
30 | self.variables['object_size'] = int(config.get('object_size', 4000000))
31 | self.variables['max_ops'] = str(config.get('ops', 10000))
32 | self.variables['objects'] = str(config.get('objects', 500))
33 | self.variables['max_in_flight'] = str(config.get('max_in_flight', 16))
34 | self.variables['size'] = int(config.get('object_size', 4000000))
35 | self.variables['min_stride_size'] = str(config.get('min_stride_size', self.variables['object_size'] / 10))
36 | self.variables['max_stride_size'] = str(config.get('max_stride_size', self.variables['object_size'] / 5))
37 | self.variables['max_seconds'] = str(config.get('max_seconds', 0))
38 |
39 | self.weights = {'read': 100, 'write': 100, 'delete': 10}
40 | for weight in ['snap_create', 'snap_remove', 'rollback', 'setattr', 'rmattr', 'watch', 'copy_from', 'hit_set_list', 'is_dirty', 'cache_flush', 'cache_try_flush', 'cache_evict' 'append', 'write', 'read', 'delete']:
41 | self.addweight(weight)
42 | if 'write_append_excl' in self.bools and 'append' in self.weights:
43 | self.weights['append'] = self.weights['write'] / 2
44 | self.weights['append_excl'] = self.weights['write']
45 |
46 | if 'write_append_excl' in self.bools and 'write' in self.weights:
47 | self.weights['write'] = self.weights['write'] / 2
48 | self.weights['write_excl'] = self.weights['write']
49 |
50 | self.run_dir = '%s/osd_ra-%08d/object_size-%08d' % (self.run_dir, int(self.osd_ra), int(self.variables['object_size']))
51 | self.out_dir = '%s/osd_ra-%08d/object_size-%08d' % (self.archive_dir, int(self.osd_ra), int(self.variables['object_size']))
52 | self.pool_profile = config.get('pool_profile', 'default')
53 | self.cmd_path = config.get('cmd_path', '/usr/bin/ceph_test_rados')
54 |
55 | def addweight(self, weight):
56 | value = self.config.get("%s_weight" % weight, None)
57 | if value is not None:
58 | self.weights[weight] = int(value)
59 |
60 | def exists(self):
61 | if os.path.exists(self.out_dir):
62 | print('Skipping existing test in %s.' % self.out_dir)
63 | return True
64 | return False
65 |
66 | # Initialize may only be called once depending on rebuild_every_test setting
67 | def initialize(self):
68 | super(CephTestRados, self).initialize()
69 |
70 | def run(self):
71 | super(CephTestRados, self).run()
72 |
73 | # Remake the pool
74 | self.mkpool()
75 | self.dropcaches()
76 | self.cluster.dump_config(self.run_dir)
77 | monitoring.start(self.run_dir)
78 | time.sleep(5)
79 | # Run the backfill testing thread if requested
80 | if 'recovery_test' in self.cluster.config:
81 | recovery_callback = self.recovery_callback
82 | self.cluster.create_recovery_test(self.run_dir, recovery_callback)
83 |
84 | logger.info('Running ceph_test_rados.')
85 | ps = []
86 | for i in range(1):
87 | p = common.pdsh(settings.getnodes('clients'), self.mkcmd())
88 | ps.append(p)
89 | for p in ps:
90 | p.wait()
91 | # If we were doing recovery, wait until it's done.
92 | if 'recovery_test' in self.cluster.config:
93 | self.cluster.wait_recovery_done()
94 |
95 | monitoring.stop(self.run_dir)
96 |
97 | # Finally, get the historic ops
98 | self.cluster.dump_historic_ops(self.run_dir)
99 | common.sync_files('%s/*' % self.run_dir, self.out_dir)
100 |
101 | def mkcmd(self):
102 | cmd = [self.cmd_path]
103 | out_file = '%s/output' % self.run_dir
104 |
105 | for flag in ['ec_pool', 'write_fadvise_dontneed', 'pool_snaps']:
106 | if flag in self.bools:
107 | cmd.append('--%s' % flag.replace('_', '-'))
108 | for variable in ['max_ops', 'objects', 'max_in_flight', 'size', 'min_stride_size', 'max_stride_size', 'max_seconds']:
109 | value = self.variables[variable]
110 | if value:
111 | cmd.extend(['--%s' % variable.replace('_', '-'), str(value)])
112 | for op, weight in self.weights.items():
113 | cmd.extend(['--op', op, str(weight)])
114 | cmd.extend(['--pool', 'ceph_test_rados'])
115 | cmd.extend(['|', 'awk \'{ print strftime("%Y-%m-%d %H:%M:%S"), $0; fflush(); }\'' '>', out_file])
116 | logger.debug("%s", cmd)
117 | return ' '.join(cmd)
118 |
119 | def mkpool(self):
120 | monitoring.start("%s/pool_monitoring" % self.run_dir)
121 | self.cluster.rmpool('ceph_test_rados', self.pool_profile)
122 | self.cluster.mkpool('ceph_test_rados', self.pool_profile, 'ceph_test_rados')
123 | monitoring.stop()
124 |
125 | def recovery_callback(self):
126 | common.pdsh(settings.getnodes('clients'), 'sudo pkill -f ceph_test_rados').communicate()
127 |
128 | def __str__(self):
129 | return "%s\n%s\n%s" % (self.run_dir, self.out_dir, super(CephTestRados, self).__str__())
130 |
--------------------------------------------------------------------------------
/statistic.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | try:
4 | from scipy import stats
5 | from numpy import array, linalg
6 | from scipy.optimize import leastsq
7 | from numpy.polynomial.chebyshev import chebfit, chebval
8 | no_numpy = False
9 | except ImportError:
10 | no_numpy = True
11 |
12 | __doc__ = """
13 | This module contains functions for processing test results.
14 | Main function is data_property.
15 | """
16 |
17 |
18 | def average(data):
19 | return sum(data) / len(data)
20 |
21 |
22 | def mediana(vals):
23 | return sum(vals) / len(vals)
24 |
25 |
26 | def deviation(vals):
27 | med = mediana(vals)
28 | squares_sum = sum(abs(med - i) ** 2.0 for i in vals)
29 | return ((squares_sum / len(vals)) ** 0.5)
30 |
31 |
32 | def round_3_digit(val):
33 | return round_val_and_deviation((val, val / 10.0))[0]
34 |
35 |
36 | def round_val_and_deviation(val, dev):
37 | if dev < 1E-7:
38 | return val, dev
39 |
40 | dev_div = 10.0 ** (math.floor(math.log10(dev)) - 1)
41 |
42 | dev1 = int(dev / dev_div) * dev_div
43 | val1 = int(val / dev_div) * dev_div
44 |
45 | return [type(val)(val1), type(dev)(dev1)]
46 |
47 |
48 | def approximate_curve(x, y, xnew, curved_coef):
49 | """returns ynew - y values of some curve approximation"""
50 | if no_numpy:
51 | raise ValueError("No numpy found")
52 |
53 | return chebval(xnew, chebfit(x, y, curved_coef))
54 |
55 |
56 | def approximate_line(x, y, xnew, relative_dist=False):
57 | """ x, y - test data, xnew - dots, where we want find approximation
58 | if not relative_dist distance = y - newy
59 | returns ynew - y values of linear approximation"""
60 |
61 | if no_numpy:
62 | raise ValueError("No numpy found")
63 |
64 | # convert to numpy.array (don't work without it)
65 | ox = array(x)
66 | oy = array(y)
67 |
68 | # set approximation function
69 | def func_line(tpl, x):
70 | return tpl[0] * x + tpl[1]
71 |
72 | def error_func_rel(tpl, x, y):
73 | return 1.0 - y / func_line(tpl, x)
74 |
75 | def error_func_abs(tpl, x, y):
76 | return y - func_line(tpl, x)
77 |
78 | # choose distance mode
79 | error_func = error_func_rel if relative_dist else error_func_abs
80 |
81 | tpl_initial = tuple(linalg.solve([[ox[0], 1.0], [ox[1], 1.0]],
82 | oy[:2]))
83 |
84 | # find line
85 | tpl_final, success = leastsq(error_func,
86 | tpl_initial[:],
87 | args=(ox, oy))
88 |
89 | # if error
90 | if success not in list(range(1, 5)):
91 | raise ValueError("No line for this dots")
92 |
93 | # return new dots
94 | return func_line(tpl_final, array(xnew))
95 |
96 |
97 | def difference(y, ynew):
98 | """returns average and maximum relative and
99 | absolute differences between y and ynew
100 | result may contain None values for y = 0
101 | return value - tuple:
102 | [(abs dif, rel dif) * len(y)],
103 | (abs average, abs max),
104 | (rel average, rel max)"""
105 |
106 | abs_dlist = []
107 | rel_dlist = []
108 |
109 | for y1, y2 in zip(y, ynew):
110 | # absolute
111 | abs_dlist.append(y1 - y2)
112 |
113 | if y1 > 1E-6:
114 | rel_dlist.append(abs(abs_dlist[-1] / y1))
115 | else:
116 | raise ZeroDivisionError("{0!r} is too small".format(y1))
117 |
118 | da_avg = sum(abs_dlist) / len(abs_dlist)
119 | dr_avg = sum(rel_dlist) / len(rel_dlist)
120 |
121 | return (list(zip(abs_dlist, rel_dlist)),
122 | (da_avg, max(abs_dlist)), (dr_avg, max(rel_dlist))
123 | )
124 |
125 |
126 | class StatProperties(object):
127 | """
128 | Statustical properties of array of data
129 | average
130 | mediana
131 | perc_95 - 95 percentile
132 | perc_05 - 5 percentile
133 | deviation
134 | confidence - 95% confidence interval for average
135 | min
136 | max
137 | raw - original data
138 | """
139 | def __init__(self):
140 | # average value
141 | self.average = None
142 |
143 | # mediana value
144 | self.mediana = None
145 |
146 | # 95 percentile
147 | self.perc_95 = None
148 |
149 | # 5 percentile
150 | self.perc_05 = None
151 |
152 | # deviation
153 | self.deviation = None
154 |
155 | # 95% confidence interval for average
156 | self.confidence = None
157 |
158 | # minimal and maximum value
159 | self.min = None
160 | self.max = None
161 |
162 | # array of raw values
163 | self.raw = None
164 |
165 | def rounded_average_conf(self):
166 | return round_val_and_deviation(self.average, self.confidence)
167 |
168 | def rounded_average_dev(self):
169 | return round_val_and_deviation(self.average, self.deviation)
170 |
171 | def __str__(self):
172 | return "{0}({1} ~ {2})".format(self.__class__.__name__,
173 | round_3_digit(self.average),
174 | round_3_digit(self.deviation))
175 |
176 | def __repr__(self):
177 | return str(self)
178 |
179 |
180 | def data_property(data, confidence=0.95):
181 | """
182 | calculate StatProperties for data
183 | """
184 | res = StatProperties()
185 | if len(data) == 0:
186 | return res
187 |
188 | data = sorted(data)
189 | res.average, res.deviation = round_val_and_deviation(data)
190 | res.max = data[-1]
191 | res.min = data[0]
192 |
193 | ln = len(data)
194 | if ln % 2 == 0:
195 | res.mediana = (data[ln / 2] + data[ln / 2 - 1]) / 2
196 | else:
197 | res.mediana = data[ln / 2]
198 |
199 | res.perc_95 = data[int((ln - 1) * 0.95)]
200 | res.perc_05 = data[int((ln - 1) * 0.05)]
201 |
202 | if not no_numpy and ln >= 3:
203 | res.confidence = stats.sem(data) * \
204 | stats.t.ppf((1 + confidence) / 2, ln - 1)
205 | else:
206 | res.confidence = res.deviation
207 |
208 | res.raw = data[:]
209 | return res
210 |
--------------------------------------------------------------------------------
/tools/is-regression.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | #
3 | # is_regression.py - statistical test for performance throughput regression
4 | # based on python scipy.stats.ttest_ind() function
5 | #
6 | # we input two sets of samples:
7 | # the baseline sample set -- used as an indication of previously achieved level of performance
8 | # the current sample set -- used as an indication of the system currently being tested for performance regression
9 | #
10 | # command line inputs:
11 | # sample_type -- 'throughput' or 'response-time'
12 | # confidence_threshold -- min probability that two sample sets have a different mean
13 | # (e.g. 95 means that results differ with 95% probability)
14 | # max_pct_dev -- maximum percent deviation of either sample set, 100.0 x std.dev/mean
15 | # base_sample -- file containing baseline performance throughput samples, 1 per line
16 | # current_sample -- file containing current performance throughput samples, 1 per line
17 | #
18 | # return status codes:
19 | # 0 -- no regression, PASS
20 | # 10 -- regression, FAIL
21 | # 11 -- either sample set's variance too large
22 | # reject if the percent deviation for either baseline or current samples is > max_pct_dev
23 | #
24 | # we declare a performance regression if base_set mean is worse than current_set mean and a T-test determines
25 | # that the probability that the two sample sets have a different mean is greater than confidence_threshold
26 | #
27 | # the base sample set mean is "worse" than the current sample set mean if and only if:
28 | # the sample_type is 'throughput' and the base mean > current mean
29 | # the sample type is 'response-time' and the base mean < current mean
30 | #
31 | # References: The Art of Computer Systems Perf. Analysis, Raj Jain
32 | # see documentation for python scipy.stats.ttest_ind() function
33 | #
34 |
35 | import os
36 | import sys
37 | from sys import argv, exit
38 | import math
39 | import numpy
40 | import scipy
41 | from scipy.stats import ttest_ind
42 | from numpy import array
43 |
44 | # process status codes returned to shell
45 | NOTOK=-1
46 | PASS = 0
47 | FAIL = 10
48 | VARIANCE_TOO_HIGH=11
49 | NOT_ENOUGH_SAMPLES=12
50 |
51 | def usage(msg):
52 | print('\nERROR: ' + msg)
53 | print('usage: is_regression.py sample_type confidence_threshold max_pct_dev base_samples_file test_samples_file')
54 | print('sample_type is either "throughput" or "response-time"')
55 | print('confidence_threshold is probability that sample means differ expressed as a percentage')
56 | print('max_pct_dev is maximum percent deviation allowed for either sample set')
57 | print('samples files are text files with one floating-point sample value per line')
58 | sys.exit(NOTOK)
59 |
60 | def read_samples_from_file( sample_filepath ):
61 | with open(sample_filepath, "r") as sample_file:
62 | samples = [ float(r.strip()) for r in sample_file.readlines() ]
63 | print('%d samples read from file %s'%(len(samples), sample_filepath))
64 | return array(samples)
65 |
66 | def print_sample_stats(samples_name, samples_array):
67 | s = samples_array
68 | print('sample stats for %s: min = %f, max = %f, mean = %f, sd = %f, pct.dev. = %5.2f'%\
69 | (samples_name, s.min(), s.max(), s.mean(), s.std(ddof=1), 100.0*s.std(ddof=1)/s.mean()))
70 |
71 | if len(argv) < 6:
72 | usage('not enough command line arguments')
73 |
74 | sample_type = argv[1]
75 | confidence_threshold = float(argv[2])
76 | max_pct_dev = float(argv[3])
77 |
78 | # read in and acknowledge command line arguments
79 |
80 | print('sample type = %s , confidence_threshold = %6.2f %%, max. pct. deviation = %6.2f %%'%\
81 | (sample_type, confidence_threshold, max_pct_dev))
82 |
83 | baseline_sample_array = read_samples_from_file(argv[4])
84 | print_sample_stats('baseline', baseline_sample_array)
85 |
86 | current_sample_array = read_samples_from_file(argv[5])
87 | print_sample_stats('current', current_sample_array)
88 |
89 | # reject invalid inputs
90 |
91 | if len(current_sample_array) < 3:
92 | print('ERROR: not enough current samples')
93 | exit(NOT_ENOUGH_SAMPLES)
94 |
95 | if len(baseline_sample_array) < 3:
96 | print('ERROR: not enough baseline samples')
97 | exit(NOT_ENOUGH_SAMPLES)
98 |
99 | # flunk the test if standard deviation is too high for either sample test
100 |
101 | baseline_pct_dev = 100.0 * baseline_sample_array.std(ddof=1) / baseline_sample_array.mean()
102 | current_pct_dev = 100.0 * current_sample_array.std(ddof=1) / current_sample_array.mean()
103 |
104 | if baseline_pct_dev > max_pct_dev:
105 | print('ERROR: pct. deviation of %5.2f is too high for baseline samples'%baseline_pct_dev)
106 | exit(VARIANCE_TOO_HIGH)
107 | if current_pct_dev > max_pct_dev:
108 | print('ERROR: pct. deviation of %5.2f is too high for current samples'%current_pct_dev)
109 | exit(VARIANCE_TOO_HIGH)
110 |
111 | # FAIL the test if sample sets are accurate enough and
112 | # current sample set is statistically worse than baseline sample set
113 |
114 | (t, same_mean_probability) = ttest_ind(baseline_sample_array, current_sample_array)
115 | print('t-test t-statistic = %f probability = %f'%(t,same_mean_probability))
116 | print('t-test says that mean of two sample sets differs with probability %6.2f%%'%\
117 | ((1.0-same_mean_probability)*100.0))
118 |
119 | pb_threshold = (100.0 - confidence_threshold)/100.0
120 | print('same_mean_prob %f pb_threshold %f'%(same_mean_probability, pb_threshold))
121 | if same_mean_probability < pb_threshold:
122 | # the two samples do not have the same mean
123 | # fail if current sample is worse than baseline sample as defined above
124 | if (sample_type == 'throughput'):
125 | if (baseline_sample_array.mean() > current_sample_array.mean()):
126 | print('declaring a performance regression test FAILURE because of lower throughput')
127 | exit(FAIL)
128 | elif (sample_type == 'response-time'):
129 | if (baseline_sample_array.mean() < current_sample_array.mean()):
130 | print('declaring a performance regression test FAILURE because of higher response time')
131 | exit(FAIL)
132 | else: usage('sample_type must either be "throughput" or "response-time"')
133 | print('current sample set is statistically better than baseline sample set')
134 | else:
135 | print('sample sets are statistically indistinguishable for specified confidence level')
136 | exit(PASS) # no regression found
137 |
--------------------------------------------------------------------------------
/tools/serialise_benchmark.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | #
3 | # serialise_benchmark.py - generate a bechserial.json from all the benchmark classes,
4 | # and automated creation of unit tests for them.
5 | #
6 | import argparse
7 | import os, sys
8 | import pprint
9 | import json
10 | from json import JSONEncoder
11 | import yaml
12 | import hashlib
13 | import benchmarkfactory
14 | import settings
15 | from cluster.ceph import Ceph
16 | from log_support import setup_loggers
17 |
18 | log_fname = '/tmp/cbt-utest.log'
19 |
20 | class BenchGenerator(object):
21 | """
22 | Class used for the serialisation of the benchmark classes
23 | and automated generation or unit tests
24 | """
25 | all_benchmarks = [
26 | 'nullbench',
27 | 'fio',
28 | 'hsbench',
29 | 'radosbench',
30 | 'kvmrbdfio',
31 | 'rawfio',
32 | 'librbdfio',
33 | 'cephtestrados',
34 | 'rbdfio',
35 | 'getput'
36 | ]
37 | archive_dir = "/tmp"
38 | iteration = {'acceptable': [1,2,3], 'iteration': 0}
39 | cluster = {}
40 | bl_name = "tools/baseline.json"
41 | bl_md5 = None
42 | cl_name = "tools/invariant.yaml"
43 | ut_name = "tests/test_bm.py"
44 | djson = {}
45 | current = {}
46 |
47 | def __init__(self):
48 | """ Init using mock constructors for a fixed cluster """
49 | settings.mock_initialize(config_file=BenchGenerator.cl_name)
50 | BenchGenerator.cluster = Ceph.mockinit(settings.cluster)
51 |
52 | def get_md5_bl(self):
53 | """ Calculate the MD5sum from baseline contents """
54 | with open(self.bl_name, 'rb') as f:
55 | data = f.read()
56 | f.close()
57 | return hashlib.md5(data).hexdigest()
58 | #bl_md5 = hashlib.md5(data.encode("utf-8")).hexdigest()
59 |
60 | def gen_json(self):
61 | """ Serialise the object into a json file"""
62 | result = {}
63 | for bm in self.all_benchmarks:
64 | b = benchmarkfactory.get_object(self.archive_dir,
65 | self.cluster, bm, self.iteration)
66 | result[bm] = b.__dict__
67 | with open(self.bl_name, 'w', encoding='utf-8') as f:
68 | json.dump(result, f, sort_keys=True, indent=4, cls=BenchJSONEncoder)
69 | f.close()
70 | # data from json.dump() does not support buffer API
71 | self.bl_md5 = self.get_md5_bl()
72 |
73 | def verify_md5(self):
74 | """ Verify the MD5SUM of the baseline.json is correct """
75 | md5_returned = self.get_md5_bl()
76 | if self.bl_md5 == md5_returned:
77 | print("MD5 verified.")
78 | return True
79 | else:
80 | print(f"MD5 verification failed! {self.bl_md5} vs. {md5_returned}")
81 | return False
82 |
83 | def verify_json(self):
84 | """ Verify the baseline json against the current benchmark classes """
85 | with open(self.bl_name, 'r') as f:
86 | self.djson = json.load(f)
87 | f.close()
88 | for bm in self.all_benchmarks:
89 | b = benchmarkfactory.get_object(self.archive_dir,
90 | self.cluster, bm, self.iteration)
91 | self.current[bm] = b.__dict__
92 | # This loop verifies that the active classes have the same attributes
93 | # as the baseline: no complains would happen if new attributes have been
94 | # added, but a difference will show for each old attribute removed
95 | for bm in self.djson.keys():
96 | if isinstance(self.djson[bm], dict):
97 | for k in self.djson[bm].keys():
98 | # Skip Cluster since its a Ceph object, and acceptable was removed
99 | # We need to skip _iodepth_per_volume here as the json file format
100 | # cannot cope with a dictionary that does not use a str as the key.
101 | # _iodepth_per_volume is intentionally a dict[int,int]
102 | if k == "cluster" or k == "acceptable" or k == "_iodepth_per_volume":
103 | continue
104 | if not self.djson[bm][k] == self.current[bm][k]:
105 | if isinstance(self.djson[bm][k], dict):
106 | set1 = dict(self.djson[bm][k].items())
107 | set2 = dict(self.current[bm][k].items())
108 | print(set2 ^ set1)
109 | else:
110 | print(f"{bm}[{k}]: diff type {type(self.djson[bm][k])}")
111 | print(f"{bm}[{k}]: {self.djson[bm][k]} vs {self.current[bm][k]}")
112 |
113 | def gen_utests(self):
114 | """ Generate the unit tests from baseline json against the self.current benchmark classes """
115 | djson = self.djson
116 | for bm in djson.keys():
117 | if isinstance(djson[bm], dict):
118 | subst = f"sed -e 's/BenchmarkX/Benchmark{bm}/g' -e 's/MD5SUMNone/{self.bl_md5}/g' "
119 | input = "tools/test_bm_template.py"
120 | out = f"tests/test_bm_{bm}.py"
121 | cmd = f"{subst} {input} > {out}"
122 | #print(cmd)
123 | os.system(cmd)
124 | with open(out, "a") as f:
125 | for k in djson[bm].keys():
126 | # Skip Cluster since its a Ceph object, and acceptable is removed
127 | if k == "cluster" or k == "acceptable":
128 | continue
129 | ut = f"""
130 | def test_valid_{k}(self):
131 | \"\"\" Basic sanity attribute identity {k} check\"\"\"
132 | b = benchmarkfactory.get_object(self.archive_dir,
133 | self.cluster, '{bm}', self.iteration)
134 | self.assertEqual(self.bl_json['{bm}']['{k}'], b.__dict__['{k}'])
135 | """
136 | f.write(ut)
137 | tail = f"""
138 | if __name__ == '__main__':
139 | unittest.main()
140 | """
141 | f.write(tail)
142 | f.close()
143 |
144 |
145 | class BenchJSONEncoder(JSONEncoder):
146 | def default(self, obj):
147 | return obj.__dict__
148 |
149 | def main(argv):
150 | setup_loggers(log_fname='/tmp/cbt-utest.log')
151 | bg = BenchGenerator()
152 | bg.gen_json()
153 | bg.verify_json()
154 | bg.verify_md5()
155 | bg.gen_utests()
156 | return 0
157 |
158 | if __name__ == '__main__':
159 | exit(main(sys.argv))
160 |
--------------------------------------------------------------------------------
/client_endpoints/ceph_client_endpoints.py:
--------------------------------------------------------------------------------
1 | import common
2 | import settings
3 | import logging
4 | import time
5 |
6 | from .client_endpoints import ClientEndpoints
7 |
8 | logger = logging.getLogger("cbt")
9 |
10 |
11 | class CephClientEndpoints(ClientEndpoints):
12 | def __init__(self, cluster, config):
13 | super(CephClientEndpoints, self).__init__(cluster, config)
14 | self.ceph_cmd = cluster.ceph_cmd
15 | self.ceph_fuse_cmd = cluster.ceph_fuse_cmd
16 | self.rbd_cmd = cluster.rbd_cmd
17 | self.rbd_nbd_cmd = cluster.rbd_nbd_cmd
18 | self.rbd_fuse_cmd = cluster.rbd_fuse_cmd
19 | self.tmp_conf = cluster.tmp_conf
20 | self.mount_cmd = cluster.mount_cmd
21 | self.client_keyring = cluster.client_keyring
22 | self.client_secret = cluster.client_secret
23 | self.pool = None
24 | self.pool_profile = config.get('pool_profile', 'default')
25 | self.data_pool = None
26 | self.data_pool_profile = config.get('data_pool_profile', None)
27 | self.recov_pool = None
28 | self.recov_pool_profile = config.get('recov_pool_profile', 'default')
29 | self.order = config.get('order', 22)
30 | self.disabled_features = config.get('disabled_features', None)
31 |
32 | # get the list of mons
33 | self.mon_addrs = []
34 | mon_hosts = self.cluster.get_mon_hosts()
35 | for mon_host, mons in mon_hosts.items():
36 | for mon, addr in mons.items():
37 | self.mon_addrs.append(addr)
38 |
39 | def get_rbd_name(self, node, ep_num):
40 | node_part = node.rpartition("@")[2]
41 | return '%s-%d' % (node_part, ep_num)
42 |
43 | def get_local_rbd_name(self, ep_num):
44 | return '`%s`-%d' % (common.get_fqdn_cmd(), ep_num)
45 |
46 | def get_dir_name(self, ep_num):
47 | return '%s/%s/%s' % (self.mnt_dir, self.name, ep_num)
48 |
49 | def create_fs(self):
50 | self.pool = self.name
51 | self.data_pool = self.name
52 | self.cluster.rmpool(self.pool, self.pool_profile)
53 | self.cluster.mkpool(self.pool, self.pool_profile, 'cephfs')
54 | if self.data_pool_profile:
55 | self.data_pool = '%s-data' % self.name
56 | self.cluster.rmpool(self.data_pool, self.data_pool_profile)
57 | self.cluster.mkpool(self.data_pool, self.data_pool_profile, 'cephfs')
58 | else:
59 | self.data_pool = self.pool
60 | fs_new_cmd = 'sudo %s -c %s fs new %s %s %s' % (self.ceph_cmd,
61 | self.tmp_conf,
62 | self.name,
63 | self.pool,
64 | self.data_pool)
65 | common.pdsh(settings.getnodes('head'), fs_new_cmd, continue_if_error=False).communicate()
66 |
67 | def mount_fs(self):
68 | for ep_num in range(0, self.endpoints_per_client):
69 | dir_name = self.get_dir_name(ep_num)
70 | for node in common.get_fqdn_list('clients'):
71 | common.pdsh(node, 'sudo mkdir -p -m0755 -- %s' % dir_name, continue_if_error=False).communicate()
72 | # FIXME: Apparently something is racey because we can get:
73 | # "mount error 2 = No such file or directory" without the pause.
74 | time.sleep(1)
75 | self.mount_fs_helper(node, dir_name)
76 | self.endpoints.append(dir_name)
77 | self.endpoint_type = "directory"
78 | return self.get_endpoints()
79 |
80 | def mount_fs_helper(self, node, dir_name):
81 | pass
82 |
83 | def create_rbd(self):
84 | self.pool = self.name
85 | dp_option = ''
86 |
87 | self.cluster.rmpool(self.pool, self.pool_profile)
88 | self.cluster.mkpool(self.pool, self.pool_profile, 'rbd')
89 | if self.data_pool_profile:
90 | self.data_pool = '%s-data' % self.name
91 | dp_option = '--data-pool %s' % self.data_pool
92 | self.cluster.rmpool(self.data_pool, self.data_pool_profile)
93 | self.cluster.mkpool(self.data_pool, self.data_pool_profile, 'rbd')
94 |
95 | for node in common.get_fqdn_list('clients'):
96 | for ep_num in range(0, self.endpoints_per_client):
97 | rbd_name = self.get_rbd_name(node, ep_num)
98 |
99 | # Make the RBD Image
100 | cmd = '%s -c %s create %s --pool %s --size %s %s --order %s' % (self.rbd_cmd, self.tmp_conf, rbd_name, self.pool, self.endpoint_size, dp_option, self.order)
101 | common.pdsh(settings.getnodes('head'), cmd, continue_if_error=False).communicate()
102 |
103 | # Disable Features
104 | if self.disabled_features:
105 | cmd = 'sudo %s feature disable %s/%s %s' % (self.rbd_cmd, self.pool, rbd_name, self.disabled_features)
106 | common.pdsh(settings.getnodes('head'), cmd, continue_if_error=False).communicate()
107 |
108 | def create_rbd_recovery(self):
109 | self.pool = '%s-recov' % self.name
110 | self.cluster.rmpool(self.pool, self.recov_pool_profile)
111 | self.cluster.mkpool(self.pool, self.recov_pool_profile, 'rbd')
112 | for node in common.get_fqdn_list('clients'):
113 | for ep_num in range(0, self.endpoints_per_client):
114 | rbd_name = '%s-%s' % (self.pool, self.get_rbd_name(node, ep_num))
115 | self.cluster.mkimage(rbd_name, self.endpoint_size, self.pool, self.data_pool, self.order)
116 |
117 | def mount_rbd(self):
118 | for ep_num in range(0, self.endpoints_per_client):
119 | dir_name = self.get_dir_name(ep_num)
120 | for node in common.get_fqdn_list('clients'):
121 | rbd_name = self.get_rbd_name(node, ep_num)
122 | rbd_device = self.map_rbd(node, rbd_name)
123 |
124 | logger.info(rbd_device)
125 |
126 | # mkfs
127 | common.pdsh(node, 'sudo mkfs.xfs %s' % rbd_device, continue_if_error=False).communicate()
128 |
129 | # mkdir
130 | common.pdsh(node, 'sudo mkdir -p -m0755 -- %s' % dir_name, continue_if_error=False).communicate()
131 |
132 | # mount
133 | common.pdsh(node, 'sudo mount -t xfs %s %s' % (rbd_device, dir_name),
134 | continue_if_error=False).communicate()
135 | self.endpoints.append(dir_name)
136 | self.endpoint_type = "directory"
137 | return self.get_endpoints()
138 |
139 | def map_rbd(self, node, rbd_name):
140 | pass
141 |
--------------------------------------------------------------------------------