├── debian
├── compat
├── source
│ └── format
├── ceph-medic.lintian-overrides
├── rules
├── changelog
├── control
└── copyright
├── ceph_medic
├── checks
│ ├── clients.py
│ ├── mdss.py
│ ├── mgrs.py
│ ├── rgws.py
│ ├── __init__.py
│ ├── cluster.py
│ ├── osds.py
│ ├── mons.py
│ └── common.py
├── rules
│ ├── __init__.py
│ ├── jewel.py
│ └── kraken.py
├── tests
│ ├── util
│ │ ├── __init__.py
│ │ ├── test_configuration.py
│ │ └── test_hosts.py
│ ├── checks
│ │ ├── __init__.py
│ │ ├── test_cluster.py
│ │ ├── test_osds.py
│ │ └── test_mons.py
│ ├── remote
│ │ ├── __init__.py
│ │ ├── test_commands.py
│ │ └── test_functions.py
│ ├── __init__.py
│ ├── test_terminal.py
│ ├── test_main.py
│ ├── test_log.py
│ ├── test_collector.py
│ ├── conftest.py
│ └── test_runner.py
├── remote
│ ├── __init__.py
│ ├── util.py
│ ├── commands.py
│ └── functions.py
├── util
│ ├── net.py
│ ├── __init__.py
│ ├── mon.py
│ └── hosts.py
├── compat.py
├── log.py
├── __init__.py
├── loader.py
├── decorators.py
├── check.py
├── generate.py
├── connection.py
├── terminal.py
├── main.py
├── runner.py
└── collector.py
├── docs
├── .gitignore
├── source
│ ├── _static
│ │ └── .empty
│ ├── _themes
│ │ └── ceph
│ │ │ ├── theme.conf
│ │ │ └── static
│ │ │ ├── font
│ │ │ ├── ApexSans-Book.eot
│ │ │ ├── ApexSans-Book.ttf
│ │ │ ├── ApexSans-Book.woff
│ │ │ ├── ApexSans-Medium.eot
│ │ │ ├── ApexSans-Medium.ttf
│ │ │ └── ApexSans-Medium.woff
│ │ │ └── nature.css_t
│ ├── contents.rst
│ ├── codes
│ │ ├── cluster.rst
│ │ ├── mons.rst
│ │ ├── osds.rst
│ │ └── common.rst
│ ├── codes.rst
│ ├── _templates
│ │ └── smarttoc.html
│ ├── changelog.rst
│ ├── installation.rst
│ ├── facts.rst
│ ├── conf.py
│ └── index.rst
└── Makefile
├── requirements-dev.txt
├── tests
└── functional
│ ├── centos7
│ ├── Vagrantfile
│ ├── hosts
│ ├── test.yml
│ ├── group_vars
│ │ └── all
│ └── vagrant_variables.yml
│ ├── .gitignore
│ ├── scripts
│ └── generate_ssh_config.sh
│ ├── tox.ini
│ ├── playbooks
│ └── setup.yml
│ └── Vagrantfile
├── setup.cfg
├── MANIFEST.in
├── bin
└── ceph-medic
├── .gitignore
├── README.rst
├── tox.ini
├── LICENSE
├── ceph-medic.spec.in
├── CONTRIBUTING.rst
├── Makefile
└── setup.py
/debian/compat:
--------------------------------------------------------------------------------
1 | 7
2 |
--------------------------------------------------------------------------------
/ceph_medic/checks/clients.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ceph_medic/checks/mdss.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ceph_medic/checks/mgrs.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ceph_medic/checks/rgws.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ceph_medic/rules/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 |
--------------------------------------------------------------------------------
/docs/source/_static/.empty:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ceph_medic/tests/util/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ceph_medic/tests/checks/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/ceph_medic/tests/remote/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/debian/source/format:
--------------------------------------------------------------------------------
1 | 3.0 (quilt)
2 |
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | pytest >=2.1.3
2 | tox >=1.2
3 |
--------------------------------------------------------------------------------
/tests/functional/centos7/Vagrantfile:
--------------------------------------------------------------------------------
1 | ../Vagrantfile
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [tool:pytest]
2 | norecursedirs = .* _* virtualenv
3 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.rst
2 | include LICENSE
3 | include tox.ini
4 |
--------------------------------------------------------------------------------
/tests/functional/.gitignore:
--------------------------------------------------------------------------------
1 | *.vdi
2 | .vagrant/
3 | vagrant_ssh_config
4 |
--------------------------------------------------------------------------------
/ceph_medic/remote/__init__.py:
--------------------------------------------------------------------------------
1 | from . import functions # noqa
2 | from . import commands # noqa
3 |
--------------------------------------------------------------------------------
/ceph_medic/checks/__init__.py:
--------------------------------------------------------------------------------
1 | from . import osds, mons, clients, rgws, mdss, common, mgrs, cluster # noqa
2 |
--------------------------------------------------------------------------------
/docs/source/_themes/ceph/theme.conf:
--------------------------------------------------------------------------------
1 | [theme]
2 | inherit = basic
3 | stylesheet = nature.css
4 | pygments_style = tango
5 |
--------------------------------------------------------------------------------
/debian/ceph-medic.lintian-overrides:
--------------------------------------------------------------------------------
1 | # Package has not yet been submitted to Debian.
2 | new-package-should-close-itp-bug
3 |
--------------------------------------------------------------------------------
/bin/ceph-medic:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from ceph_medic import main
4 |
5 | if __name__ == '__main__':
6 | main.Medic()
7 |
--------------------------------------------------------------------------------
/docs/source/_themes/ceph/static/font/ApexSans-Book.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/ceph-medic/HEAD/docs/source/_themes/ceph/static/font/ApexSans-Book.eot
--------------------------------------------------------------------------------
/docs/source/_themes/ceph/static/font/ApexSans-Book.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/ceph-medic/HEAD/docs/source/_themes/ceph/static/font/ApexSans-Book.ttf
--------------------------------------------------------------------------------
/docs/source/_themes/ceph/static/font/ApexSans-Book.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/ceph-medic/HEAD/docs/source/_themes/ceph/static/font/ApexSans-Book.woff
--------------------------------------------------------------------------------
/docs/source/_themes/ceph/static/font/ApexSans-Medium.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/ceph-medic/HEAD/docs/source/_themes/ceph/static/font/ApexSans-Medium.eot
--------------------------------------------------------------------------------
/docs/source/_themes/ceph/static/font/ApexSans-Medium.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/ceph-medic/HEAD/docs/source/_themes/ceph/static/font/ApexSans-Medium.ttf
--------------------------------------------------------------------------------
/docs/source/_themes/ceph/static/font/ApexSans-Medium.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/ceph-medic/HEAD/docs/source/_themes/ceph/static/font/ApexSans-Medium.woff
--------------------------------------------------------------------------------
/tests/functional/centos7/hosts:
--------------------------------------------------------------------------------
1 | [mons]
2 | mon0 address=192.168.3.10
3 |
4 | [osds]
5 | osd0 address=192.168.3.100
6 |
7 | [medic]
8 | client0 address=192.168.3.40
9 |
--------------------------------------------------------------------------------
/docs/source/contents.rst:
--------------------------------------------------------------------------------
1 | ceph-medic contents
2 | ===================
3 |
4 | .. toctree::
5 | :maxdepth: 2
6 |
7 | index.rst
8 | installation.rst
9 | codes.rst
10 | facts.rst
11 | changelog.rst
12 |
--------------------------------------------------------------------------------
/ceph_medic/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | # helps reset altered metadata in tests
3 | base_metadata = {'rgws': {}, 'mgrs': {}, 'mdss':{}, 'clients': {},
4 | 'osds':{}, 'mons':{}, 'nodes': {}, 'cluster_name': 'ceph', 'failed_nodes': {}}
5 |
6 |
--------------------------------------------------------------------------------
/debian/rules:
--------------------------------------------------------------------------------
1 | #!/usr/bin/make -f
2 |
3 | # Uncomment this to turn on verbose mode.
4 | export DH_VERBOSE=1
5 |
6 | export PYBUILD_NAME=ceph-medic
7 |
8 | export PYBUILD_TEST_ARGS=ceph_medic/tests
9 |
10 | %:
11 | dh $@ --buildsystem pybuild --with python2
12 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.py[co]
2 |
3 | # Packages
4 | *.egg
5 | *.egg-info
6 | .cache
7 | dist
8 | build
9 | eggs
10 | parts
11 | bin
12 | var
13 | sdist
14 | develop-eggs
15 | .installed.cfg
16 |
17 | # Installer logs
18 | pip-log.txt
19 |
20 | # Unit test / coverage reports
21 | .coverage
22 | .tox
23 |
24 | #Translations
25 | *.mo
26 |
--------------------------------------------------------------------------------
/tests/functional/centos7/test.yml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | - hosts: medic
4 | become: no
5 | tasks:
6 |
7 | - name: copy hosts file to vagrant home dir
8 | command: cp /vagrant/hosts /home/vagrant
9 | become: yes
10 |
11 | - name: use ceph-medic to check ceph cluster
12 | command: ceph-medic --inventory /home/vagrant/hosts check
13 |
--------------------------------------------------------------------------------
/ceph_medic/util/net.py:
--------------------------------------------------------------------------------
1 | import socket
2 |
3 |
4 | def host_is_resolvable(hostname, _socket=None):
5 | _socket = _socket or socket # just used for testing
6 | try:
7 | _socket.getaddrinfo(hostname, 0)
8 | except _socket.gaierror:
9 | msg = "hostname: %s is not resolvable" % hostname
10 | raise RuntimeError(msg)
11 | return True
12 |
--------------------------------------------------------------------------------
/docs/source/codes/cluster.rst:
--------------------------------------------------------------------------------
1 | Cluster
2 | =======
3 | Cluster checks run once against the information of a cluster, and are
4 | not specific to any deamon.
5 |
6 |
7 | Errors
8 | ------
9 |
10 | .. _ECLS1:
11 |
12 | ECLS1
13 | ^^^^^
14 | No OSD nodes exist as part of the cluster.
15 |
16 | .. _ECLS2:
17 |
18 | ECLS2
19 | ^^^^^
20 | The cluster is nearfull.
21 |
22 |
23 |
--------------------------------------------------------------------------------
/tests/functional/scripts/generate_ssh_config.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Generate a custom ssh config from Vagrant so that it can then be used by
3 | # ansible.cfg
4 |
5 | path=$1
6 |
7 | if [ $# -eq 0 ]
8 | then
9 | echo "A path to the scenario is required as an argument and it wasn't provided"
10 | exit 1
11 | fi
12 |
13 | cd "$path"
14 | vagrant ssh-config > vagrant_ssh_config
15 |
--------------------------------------------------------------------------------
/ceph_medic/compat.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 |
3 | try:
4 | import ConfigParser as configparser
5 | except ImportError:
6 | import configparser
7 |
8 | try:
9 | from ConfigParser import SafeConfigParser as BaseConfigParser
10 | except ImportError:
11 | from configparser import ConfigParser as BaseConfigParser
12 |
13 | try:
14 | from StringIO import StringIO
15 | except ImportError:
16 | from io import StringIO
17 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | ==========
2 | ceph-medic
3 | ==========
4 |
5 | ``ceph-medic`` is a tool that performs checks against Ceph clusters to identify common issues preventing proper functionality. It supports Kubernetes and OpenShift, using ``kubectl`` and ``oc``, respectively. It requires non-interactive SSH access to accounts that can ``sudo`` without a password prompt.
6 |
7 | Full usage documentation can be found at: http://docs.ceph.com/ceph-medic
8 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = py27, py36, py37, flake8
3 |
4 | [testenv]
5 | deps=
6 | pytest
7 | mock
8 | commands=py.test -v {posargs:ceph_medic/tests}
9 |
10 | [testenv:docs]
11 | basepython=python
12 | changedir=docs/source
13 | deps=sphinx
14 | commands=
15 | sphinx-build -W -b html -d {envtmpdir}/doctrees . {envtmpdir}/html
16 |
17 | [testenv:flake8]
18 | deps=flake8
19 | commands=flake8 --select=F,E9 --exclude=vendor {posargs:ceph_medic}
20 |
--------------------------------------------------------------------------------
/tests/functional/centos7/group_vars/all:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | ceph_origin: repository
4 | ceph_repository: community
5 | ceph_stable_release: luminous
6 | cluster: test
7 | public_network: "192.168.3.0/24"
8 | cluster_network: "192.168.4.0/24"
9 | monitor_interface: eth1
10 | journal_size: 100
11 | osd_objectstore: "filestore"
12 | devices:
13 | - '/dev/sda'
14 | - '/dev/sdb'
15 | osd_scenario: collocated
16 | os_tuning_params:
17 | - { name: kernel.pid_max, value: 4194303 }
18 | - { name: fs.file-max, value: 26234859 }
19 |
--------------------------------------------------------------------------------
/ceph_medic/rules/jewel.py:
--------------------------------------------------------------------------------
1 | # Rules to apply for Jewel releases.
2 |
3 | # All checks are applied, but overrides to defaults can
4 | # be specified here.
5 | # overrides = {
6 | # # overrides the check called 'check_name' with a different expected value
7 | # # and changes # the level of this check to 'error'.
8 | # "check_name": {"expected": "value", "level": "error"},
9 | #}
10 |
11 | # Exclude the following checks:
12 | # excludes = ["check_name"]
13 |
14 | # Include the following checks:
15 | # includes = ["check_name"]
16 |
--------------------------------------------------------------------------------
/ceph_medic/rules/kraken.py:
--------------------------------------------------------------------------------
1 | # Rules to apply for Jewel releases.
2 |
3 | # All checks are applied, but overrides to defaults can
4 | # be specified here.
5 | # overrides = {
6 | # # overrides the check called 'check_name' with a different expected value
7 | # # and changes # the level of this check to 'error'.
8 | # "check_name": {"expected": "value", "level": "error"},
9 | #}
10 |
11 | # Exclude the following checks:
12 | # excludes = ["check_name"]
13 |
14 | # Include the following checks:
15 | # includes = ["check_name"]
16 |
--------------------------------------------------------------------------------
/docs/source/codes.rst:
--------------------------------------------------------------------------------
1 | ===========
2 | Error Codes
3 | ===========
4 |
5 | When performing checks, ``ceph-medic`` will return an error code and message for any that failed. These checks
6 | can either be a ``warning`` or ``error``, and will pertain to common issues or daemon specific issues. Any error
7 | code starting with ``E`` is an error, and any starting with ``W`` is a warning.
8 |
9 | Below you'll find a list of checks that are performed with the ``check`` subcommand.
10 |
11 |
12 | .. toctree::
13 | :maxdepth: 2
14 |
15 | codes/common.rst
16 | codes/mons.rst
17 | codes/osds.rst
18 | codes/cluster.rst
19 |
--------------------------------------------------------------------------------
/ceph_medic/checks/cluster.py:
--------------------------------------------------------------------------------
1 | from ceph_medic import metadata
2 |
3 |
4 | #
5 | # Error checks
6 | #
7 |
8 | def check_osds_exist():
9 | code = 'ECLS1'
10 | msg = 'There are no OSDs available'
11 | osd_count = len(metadata['osds'].keys())
12 | if not osd_count:
13 | return code, msg
14 |
15 |
16 | def check_nearfull():
17 | """
18 | Checks if the osd capacity is at nearfull
19 | """
20 | code = 'ECLS2'
21 | msg = 'Cluster is nearfull'
22 | try:
23 | osd_map = metadata['cluster']['status']['osdmap']['osdmap']
24 | except KeyError:
25 | return
26 | if osd_map.get('nearfull'):
27 | return code, msg
28 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = python -msphinx
7 | SPHINXPROJ = ceph-medic
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/ceph_medic/util/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | def str_to_int(string):
3 | """
4 | Parses a string number into an integer, optionally converting to a float
5 | and rounding down.
6 |
7 | Some LVM values may come with a comma instead of a dot to define decimals.
8 | This function normalizes a comma into a dot
9 | """
10 | try:
11 | integer = float(string.replace(',', '.'))
12 | except AttributeError:
13 | # this might be a integer already, so try to use it, otherwise raise
14 | # the original exception
15 | if isinstance(string, (int, float)):
16 | integer = string
17 | else:
18 | raise
19 |
20 | return int(integer)
21 |
--------------------------------------------------------------------------------
/docs/source/_templates/smarttoc.html:
--------------------------------------------------------------------------------
1 | {#
2 | Sphinx sidebar template: smart table of contents.
3 |
4 | Shows a sidebar ToC that gives you a more global view of the
5 | documentation, and not the confusing cur/prev/next which is the
6 | default sidebar.
7 |
8 | The ToC will open and collapse automatically to show the part of the
9 | hierarchy you are in. Top-level items will always be visible.
10 |
11 | #}
12 |
13 | {{ toctree(maxdepth=-1) }}
14 |
15 |
17 |
--------------------------------------------------------------------------------
/ceph_medic/log.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 | import logging
3 | import os
4 |
5 | BASE_FORMAT = "[%(name)s][%(levelname)-6s] %(message)s"
6 | FILE_FORMAT = "[%(asctime)s]" + BASE_FORMAT
7 |
8 |
9 | def setup(config=None):
10 | root_logger = logging.getLogger()
11 | log_path = config.get_safe('global', '--log-path', '.')
12 | if not os.path.exists(log_path):
13 | raise RuntimeError('configured ``--log-path`` value does not exist: %s' % log_path)
14 | date = datetime.strftime(datetime.utcnow(), '%Y-%m-%d')
15 | log_file = os.path.join(log_path, 'ceph-medic-%s.log' % date)
16 |
17 | root_logger.setLevel(logging.DEBUG)
18 |
19 | # File Logger
20 | fh = logging.FileHandler(log_file)
21 | fh.setLevel(logging.DEBUG)
22 | fh.setFormatter(logging.Formatter(FILE_FORMAT))
23 |
24 | root_logger.addHandler(fh)
25 |
--------------------------------------------------------------------------------
/debian/changelog:
--------------------------------------------------------------------------------
1 | ceph-medic (1.0.8) stable; urgency=medium
2 |
3 | * New upstream release
4 |
5 | -- Ceph Release Team Wed, 17 Jun 2020 16:15:00 -0600
6 |
7 | ceph-medic (1.0.7) stable; urgency=medium
8 |
9 | * New upstream release
10 |
11 | -- Ceph Release Team Tue, 24 Mar 2020 17:29:00 -0600
12 |
13 | ceph-medic (1.0.6) stable; urgency=medium
14 |
15 | * New upstream release
16 |
17 | -- Ceph Release Team Tue, 11 Feb 2020 16:41:07 -0600
18 |
19 | ceph-medic (1.0.4) stable; urgency=medium
20 |
21 | * New upstream release
22 |
23 | -- Ceph Release Team Tue, 27 Mar 2018 20:19:38 +0000
24 |
25 | ceph-medic (0.0.1-1) unstable; urgency=medium
26 |
27 | * Initial release.
28 |
29 | -- Ken Dreyer Wed, 28 Jun 2017 13:20:07 -0600
30 |
--------------------------------------------------------------------------------
/debian/control:
--------------------------------------------------------------------------------
1 | Source: ceph-medic
2 | Maintainer: Alfredo Deza
3 | Section: admin
4 | Priority: optional
5 | Build-Depends:
6 | debhelper (>= 7),
7 | dh-python,
8 | python,
9 | python-mock,
10 | python-pytest,
11 | python-remoto,
12 | python-setuptools,
13 | python-tambo
14 | X-Python-Version: >= 2.7
15 | Standards-Version: 3.9.7
16 | Homepage: http://ceph.com/
17 | Vcs-Git: git://github.com/ceph/ceph-medic.git
18 | Vcs-Browser: https://github.com/ceph/ceph-medic
19 |
20 | Package: ceph-medic
21 | Architecture: all
22 | Depends: ${misc:Depends}, ${python:Depends}
23 | Description: determine common issues on Ceph storage clusters
24 | ceph-medic is a very simple tool to run against a Ceph cluster to detect
25 | common issues that might prevent correct functionality. It requires
26 | non-interactive SSH access to accounts that can sudo without a password
27 | prompt.
28 |
--------------------------------------------------------------------------------
/ceph_medic/remote/util.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 |
4 |
5 | def which(executable):
6 | """find the location of an executable"""
7 | locations = (
8 | '/usr/local/bin',
9 | '/bin',
10 | '/usr/bin',
11 | '/usr/local/sbin',
12 | '/usr/sbin',
13 | '/sbin',
14 | )
15 |
16 | for location in locations:
17 | executable_path = os.path.join(location, executable)
18 | if os.path.exists(executable_path):
19 | return executable_path
20 |
21 |
22 | def run(command):
23 | """
24 | run a command, return stdout, stderr, and exit code.
25 | """
26 | process = subprocess.Popen(
27 | command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True
28 | )
29 | stdout = process.stdout.read().splitlines()
30 | stderr = process.stderr.read().splitlines()
31 | returncode = process.wait()
32 |
33 | return stdout, stderr, returncode
34 |
--------------------------------------------------------------------------------
/ceph_medic/__init__.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 |
3 |
4 | class UnloadedConfig(object):
5 | """
6 | This class is used as the default value for config.ceph so that if
7 | a configuration file is not successfully loaded then it will give
8 | a nice error message when values from the config are used.
9 | """
10 | def __init__(self, error=None):
11 | self.error = error
12 |
13 | def __getattr__(self, *a):
14 | raise RuntimeError(self.error)
15 |
16 |
17 | config = namedtuple('config', ['verbosity', 'nodes', 'hosts_file', 'file', 'cluster_name'])
18 | config.file = UnloadedConfig("No valid ceph-medic configuration file was loaded")
19 | config.nodes = {}
20 |
21 | metadata = {'failed_nodes': {}, 'rgws': {}, 'mgrs': {}, 'mdss': {}, 'clients': {}, 'osds': {}, 'mons': {}, 'nodes': {}, 'cluster': {}}
22 |
23 | daemon_types = [i for i in metadata.keys() if i not in ('nodes', 'failed_nodes', 'cluster')]
24 |
25 | __version__ = '1.0.8'
26 |
--------------------------------------------------------------------------------
/docs/source/codes/mons.rst:
--------------------------------------------------------------------------------
1 | Monitors
2 | ========
3 |
4 | The following checks indicate issues with monitor nodes.
5 |
6 | Errors
7 | ------
8 |
9 | .. _EMON1:
10 |
11 | EMON1
12 | _____
13 | The secret key used in the keyring differs from other nodes in the cluster.
14 |
15 | Warnings
16 | --------
17 |
18 |
19 | .. _WMON1:
20 |
21 | WMON1
22 | _____
23 | Multiple monitor directories are found on the same host.
24 |
25 | .. _WMON2:
26 |
27 | WMON2
28 | _____
29 | Collocated OSDs in monitor nodes were found on the same host.
30 |
31 | .. _WMON3:
32 |
33 | WMON3
34 | _____
35 | The recommended number of Monitor nodes is 3 for a high availability setup.
36 |
37 | .. _WMON4:
38 |
39 | WMON4
40 | _____
41 | It is recommended to have an odd number of monitors so that failures can be
42 | tolerated.
43 |
44 |
45 | .. _WMON5:
46 |
47 | WMON5
48 | _____
49 | Having a single monitor is not recommneded, as a failure would cause data loss.
50 | For high availability, at least 3 monitors is recommended.
51 |
--------------------------------------------------------------------------------
/ceph_medic/tests/test_terminal.py:
--------------------------------------------------------------------------------
1 | from ceph_medic import terminal
2 |
3 |
4 | class FakeWriter(object):
5 |
6 | def __init__(self):
7 | self.calls = []
8 |
9 | def write(self, string):
10 | self.calls.append(string)
11 |
12 | def flush(self):
13 | pass
14 |
15 |
16 | class TestWriteClearLine(object):
17 |
18 | def setup(self):
19 | self.fake_writer = FakeWriter()
20 | self.loader = terminal._Write(
21 | _writer=self.fake_writer,
22 | prefix='\r',
23 | clear_line=True
24 | )
25 |
26 | def test_adds_padding_for_81_chars(self):
27 | self.loader.write('1234567890')
28 | assert len(self.fake_writer.calls[0]) == 81
29 |
30 | def test_remaining_padding_is_whitespace(self):
31 | self.loader.write('1234567890')
32 | assert self.fake_writer.calls[0][11:] == ' ' * 70
33 |
34 | def test_long_line_adds_only_ten_chars(self):
35 | self.loader.write('1'*81)
36 | assert self.fake_writer.calls[0][82:] == ' ' * 10
37 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2016, Red Hat, Inc.
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 |
--------------------------------------------------------------------------------
/ceph_medic/tests/checks/test_cluster.py:
--------------------------------------------------------------------------------
1 | from ceph_medic.checks import cluster
2 | from ceph_medic import metadata
3 |
4 |
5 | class TestCheckOSDs(object):
6 |
7 | def setup(self):
8 | metadata['cluster_name'] = 'ceph'
9 | metadata['osds'] = {}
10 |
11 | def teardown(self):
12 | metadata['osds'] = {}
13 |
14 | def test_no_osds(self):
15 | assert cluster.check_osds_exist() == ('ECLS1', 'There are no OSDs available')
16 |
17 | def test_osds_are_found(self):
18 | metadata['osds'] = {'osd1': {}}
19 | assert cluster.check_osds_exist() is None
20 |
21 | class TestNearfull(object):
22 |
23 | def setup(self):
24 | metadata['cluster'] = {}
25 |
26 | def teardown(self):
27 | metadata['cluster'] = {}
28 |
29 | def test_key_error_is_ignored(self):
30 | assert cluster.check_nearfull() is None
31 | def test_osd_map_is_nearfull(self):
32 | metadata['cluster'] = {'status': {'osdmap': {'osdmap': {'nearfull': True}}}}
33 | assert cluster.check_nearfull() == ('ECLS2', 'Cluster is nearfull')
34 | def test_osd_map_is_not_nearfull(self):
35 | metadata['cluster'] = {'status': {'osdmap': {'osdmap': {'nearfull': False}}}}
36 |
--------------------------------------------------------------------------------
/ceph-medic.spec.in:
--------------------------------------------------------------------------------
1 | #
2 | # spec file for package ceph-medic
3 | #
4 | %global commit @COMMIT@
5 | %global shortcommit %(c=%{commit}; echo ${c:0:7})
6 |
7 | Name: ceph-medic
8 | Version: @VERSION@
9 | Release: @RELEASE@%{?dist}
10 | Summary: Find common issues on Ceph clusters
11 | License: MIT
12 | URL: https://github.com/ceph/ceph-medic
13 | Source0: %{name}-%{version}-%{shortcommit}.tar.gz
14 | BuildRequires: python-devel
15 | BuildRequires: python-setuptools
16 | BuildRequires: pytest
17 | BuildRequires: python-remoto
18 | BuildRequires: python-mock
19 | BuildRequires: python-tambo
20 | Requires: python-remoto
21 | Requires: python-tambo
22 | Requires: python-execnet
23 |
24 | BuildArch: noarch
25 |
26 |
27 | %description
28 | An admin tool to determine common issues on Ceph storage clusters.
29 |
30 | %prep
31 | %autosetup -p1
32 |
33 | %build
34 | python setup.py build
35 |
36 | %install
37 | python setup.py install -O1 --skip-build --root %{buildroot}
38 |
39 | %check
40 | export PYTHONPATH=$(pwd)
41 |
42 | py.test-%{python_version} -v ceph_medic/tests
43 |
44 | %files
45 | %license LICENSE
46 | %doc README.rst
47 | %{_bindir}/ceph-medic
48 | %{python_sitelib}/*
49 |
50 | %changelog
51 |
--------------------------------------------------------------------------------
/ceph_medic/tests/test_main.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import ceph_medic.main
3 |
4 | from mock import patch
5 |
6 |
7 | class TestMain(object):
8 | def test_main(self):
9 | assert ceph_medic.main
10 |
11 | def test_invalid_ssh_config(self, capsys):
12 | argv = ["ceph-medic", "--ssh-config", "/does/not/exist"]
13 | with pytest.raises(SystemExit):
14 | ceph_medic.main.Medic(argv)
15 | out, _ = capsys.readouterr()
16 | assert 'the given ssh config path does not exist' in out
17 |
18 | def test_valid_ssh_config(self, capsys):
19 | ssh_config = '/etc/ssh/ssh_config'
20 | argv = ["ceph-medic", "--ssh-config", ssh_config]
21 |
22 | def fake_exists(path):
23 | if path == ssh_config:
24 | return True
25 | if path.endswith('cephmedic.conf'):
26 | return False
27 | return True
28 |
29 | with patch.object(ceph_medic.main.os.path, 'exists') as m_exists:
30 | m_exists.side_effect = fake_exists
31 | ceph_medic.main.Medic(argv)
32 | out, _ = capsys.readouterr()
33 | assert 'ssh config path does not exist' not in out
34 | assert ssh_config == ceph_medic.main.ceph_medic.config.ssh_config
35 |
--------------------------------------------------------------------------------
/docs/source/codes/osds.rst:
--------------------------------------------------------------------------------
1 | OSDs
2 | ====
3 |
4 | The following checks indicate issues with OSD nodes.
5 |
6 | Warnings
7 | --------
8 |
9 |
10 | .. _WOSD1:
11 |
12 | WOSD1
13 | ^^^^^
14 | Multiple ceph_fsid values found in /var/lib/ceph/osd.
15 |
16 | This might mean you are hosting OSDs for many clusters on
17 | this node or that some OSDs are misconfigured to join the
18 | clusters you expect.
19 |
20 | .. _WOSD2:
21 |
22 | WOSD2
23 | ^^^^^
24 | Setting ``osd pool default min size = 1`` can lead to data loss because if the
25 | minimum is not met, Ceph will not acknowledge the write to the client.
26 |
27 | .. _WOSD3:
28 |
29 | WOSD3
30 | ^^^^^
31 | The default value of 3 OSD nodes for a healthy cluster must be met. If
32 | ``ceph.conf`` is configured to a different number, that setting will take
33 | precedence. The number of OSD nodes is calculated by adding
34 | ``osd_pool_default_size`` and ``osd_pool_default_min_size`` + 1. By default,
35 | this adds to 3.
36 |
37 | .. _WOSD4:
38 |
39 | WOSD4
40 | ^^^^^
41 | If ratios have been modified from its defaults, a warning is raised pointing to
42 | any ratio that diverges. The ratios observed with their defaults are:
43 |
44 | * ``backfillfull_ratio``: 0.9
45 | * ``nearfull_ratio``: 0.85
46 | * ``full_ratio``: 0.95
47 |
48 |
--------------------------------------------------------------------------------
/ceph_medic/loader.py:
--------------------------------------------------------------------------------
1 | """
2 | JSON Loading utilities
3 | """
4 | import os
5 | import imp
6 |
7 |
8 | def load_config(filepath, **kw):
9 | '''
10 | Creates a configuration dictionary from a file.
11 |
12 | :param filepath: The path to the file.
13 | '''
14 |
15 | abspath = os.path.abspath(os.path.expanduser(filepath))
16 | conf_dict = {}
17 | if not os.path.isfile(abspath):
18 | raise RuntimeError('`%s` is not a file.' % abspath)
19 |
20 | # First, make sure the code will actually compile (and has no SyntaxErrors)
21 | with open(abspath, 'rb') as f:
22 | compiled = compile(f.read(), abspath, 'exec')
23 |
24 | # Next, attempt to actually import the file as a module.
25 | # This provides more verbose import-related error reporting than exec()
26 | absname, _ = os.path.splitext(abspath)
27 | basepath, module_name = absname.rsplit(os.sep, 1)
28 | try:
29 | imp.load_module(
30 | module_name,
31 | *imp.find_module(module_name, [basepath])
32 | )
33 | except ImportError:
34 | pass
35 |
36 | # If we were able to import as a module, actually exec the compiled code
37 | exec(compiled, globals(), conf_dict)
38 | conf_dict['__file__'] = abspath
39 | return conf_dict
40 |
--------------------------------------------------------------------------------
/debian/copyright:
--------------------------------------------------------------------------------
1 | Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0
2 | Upstream-Name: ceph-medic
3 | Source: https://github.com/ceph/ceph-medic
4 |
5 | Files: *
6 | Copyright: (c) 2016 by Red Hat Inc.
7 | License: Expat
8 | Permission is hereby granted, free of charge, to any person obtaining a copy
9 | of this software and associated documentation files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | .
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | .
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 |
--------------------------------------------------------------------------------
/ceph_medic/tests/test_log.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pytest
3 | from ceph_medic.util import configuration
4 | from ceph_medic import log
5 | import logging
6 |
7 |
8 | class TestLogSetup(object):
9 |
10 | def teardown(self):
11 | logger = logging.getLogger()
12 | logger.handlers = []
13 |
14 | def test_barf_when_config_path_does_not_exist(self, tmpdir):
15 | location = os.path.join(str(tmpdir), 'ceph-medic.conf')
16 | with open(location, 'w') as _f:
17 | _f.write("""\n[global]\n--log-path=/bogus/path""")
18 | config = configuration.load(location)
19 | with pytest.raises(RuntimeError) as error:
20 | log.setup(config)
21 | assert 'value does not exist' in str(error.value)
22 |
23 | def test_create_log_config_correctly(self, tmpdir):
24 | tmp_log_path = str(tmpdir)
25 | location = os.path.join(tmp_log_path, 'ceph-medic.conf')
26 | with open(location, 'w') as _f:
27 | _f.write("""\n[global]\n--log-path=%s""" % tmp_log_path)
28 | config = configuration.load(location)
29 | log.setup(config)
30 | logger = logging.getLogger()
31 | # tox has its own logger now, we need to make sure we are talking about the
32 | # actual configured ones by ceph-medic
33 | ceph_medic_loggers = [
34 | i for i in logger.handlers if 'ceph-medic' in getattr(i, 'baseFilename', '')
35 | ]
36 | assert len(ceph_medic_loggers) == 1
37 |
--------------------------------------------------------------------------------
/tests/functional/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = {ansible2.2,ansible2.3,ansible2.4}-{nightly_centos7}
3 | skipsdist = True
4 |
5 | [testenv]
6 | whitelist_externals =
7 | vagrant
8 | bash
9 | git
10 | passenv=*
11 | setenv=
12 | ANSIBLE_SSH_ARGS = -F {changedir}/vagrant_ssh_config
13 | ansible2.2: ANSIBLE_STDOUT_CALLBACK = debug
14 | ANSIBLE_RETRY_FILES_ENABLED = False
15 | ANSIBLE_SSH_RETRIES = 5
16 | ANSIBLE_ACTION_PLUGINS = {envdir}/tmp/ceph-ansible/plugins/actions
17 | deps=
18 | ansible1.9: ansible==1.9.4
19 | ansible2.1: ansible==2.1
20 | ansible2.2: ansible==2.2.3
21 | ansible2.3: ansible==2.3.1
22 | ansible2.4: ansible==2.4.2
23 | notario>=0.0.13
24 | changedir=
25 | nightly_centos7: {toxinidir}/centos7
26 | commands=
27 | git clone -b {env:CEPH_ANSIBLE_BRANCH:master} --single-branch https://github.com/ceph/ceph-ansible.git {envdir}/tmp/ceph-ansible
28 |
29 | vagrant up --no-provision {posargs:--provider=virtualbox}
30 | bash {toxinidir}/scripts/generate_ssh_config.sh {changedir}
31 |
32 | # install ceph-medic on 'client0' vm and setup nodes for testing
33 | ansible-playbook -vv -i {changedir}/hosts {toxinidir}/playbooks/setup.yml --extra-vars="ceph_medic_branch={env:CEPH_MEDIC_DEV_BRANCH:master}"
34 | # use ceph-ansible to deploy a ceph cluster on the rest of the vms
35 | ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/site.yml.sample
36 | # use ceph-medic to check the cluster we just created
37 | ansible-playbook -vv -i {changedir}/hosts {changedir}/test.yml
38 |
39 | vagrant destroy --force
40 |
--------------------------------------------------------------------------------
/docs/source/changelog.rst:
--------------------------------------------------------------------------------
1 | 1.0.8
2 | -----
3 | 17-Jun-2020
4 |
5 | * Fix issues with podman support
6 |
7 | 1.0.7
8 | -----
9 | 24-Mar-2020
10 |
11 | * Fix test bugs that were breaking rpm builds
12 |
13 | 1.0.6
14 | -----
15 | 11-Feb-2020
16 |
17 | * Docker, podman container support
18 | * Fix broken SSH config option
19 | * Fix querying the Ceph version via admin socket on newer Ceph versions
20 |
21 | 1.0.5
22 | -----
23 | 27-Jun-2019
24 |
25 | * Add check for minimum OSD node count
26 | * Add check for minimum MON node count
27 | * Remove reporting of nodes that can't connect, report them separetely
28 | * Kubernetes, Openshift, container support
29 | * Fix unidentifiable user/group ID issues
30 | * Rook support
31 | * Report on failed nodes
32 | * When there are errors, set a non-zero exit status
33 | * Add separate "cluster wide" checks, which run once
34 | * Be able to retrieve socket configuration
35 | * Fix issue with trying to run ``whoami`` to test remote connections, use
36 | ``true`` instead
37 | * Add check for missing FSID
38 | * Skip OSD validation when there isn't any ceph.conf
39 | * Skip tmp directories in /var/lib/ceph scanning to prevent blowing up
40 | * Detect collocated daemons
41 | * Allow overriding ignores in the CLI, fallback to the config file
42 | * Break documentation up to have installation away from getting started
43 |
44 |
45 | 1.0.4
46 | -----
47 | 20-Aug-2018
48 |
49 | * Add checks for parity between installed and socket versions
50 | * Fix issues with loading configuration with whitespace
51 | * Add check for min_pool_size
52 | * Collect versions from running daemons
53 |
--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
1 | Contributing to ceph-medic
2 | ===========================
3 | Before any contributions, a reference ticket *must* exist. To open a new
4 | issue, requests can go to:
5 |
6 | https://github.com/ceph/ceph-medic/issues/new
7 |
8 | commits
9 | -------
10 | Once a ticket exists, commits should be prefaced by the ticket ID. This makes
11 | it easier for maintainers to keep track of why a given line changed, mapping
12 | directly to work done on a ticket.
13 |
14 | For tickets coming from tracker.ceph.com, we expect the following format::
15 |
16 | [RM-0000] this is a commit message for tracker.ceph.com
17 |
18 | ``RM`` stands for Redmine which is the software running tracker.ceph.com.
19 | Similarly, if a ticket was created in bugzilla.redhat.com, we expect the
20 | following format::
21 |
22 | [BZ-0000] this is a commit message for bugzilla.redhat.com
23 |
24 |
25 | To automate this process, you can create a branch with the tracker identifier
26 | and id (replace "0000" with the ticket number)::
27 |
28 | git checkout -b RM-0000
29 |
30 | And then use the follow prepare-commit-msg:
31 | https://gist.github.com/alfredodeza/6d62d99a95c9a7975fbe
32 |
33 | Copy that file to ``$GITREPOSITORY/.git/hooks/prepare-commit-msg``
34 | and mark it executable.
35 |
36 | Your commit messages should then be automatically prefixed with the branch name
37 | based off of the issue tracker.
38 |
39 | tests and documentation
40 | -----------------------
41 | Wherever it is feasible, tests must exist and documentation must be added or
42 | improved depending on the change.
43 |
44 | The build process not only runs tests but ensures that docs can be built from
45 | the proposed changes as well.
46 |
--------------------------------------------------------------------------------
/tests/functional/playbooks/setup.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - hosts: all
3 | gather_facts: True
4 | tasks:
5 | - name: write all nodes to /etc/hosts
6 | sudo: yes
7 | blockinfile:
8 | dest: /etc/hosts
9 | block: |
10 | {{ hostvars[item]["address"] }} {{ item }}
11 | marker: "# {mark} ANSIBLE MANAGED BLOCK {{ item }}"
12 | with_inventory_hostnames: all
13 |
14 | - hosts: medic
15 | become: yes
16 | tasks:
17 |
18 | - name: fetch shaman ceph-medic repo
19 | get_url:
20 | url: https://shaman.ceph.com/api/repos/ceph-medic/{{ ceph_medic_branch }}/latest/centos/7/repo
21 | dest: /etc/yum.repos.d/ceph-medic.repo
22 |
23 | - name: print contents of /etc/yum.repos.d/ceph-medic.repo
24 | command: cat /etc/yum.repos.d/ceph-medic.repo
25 |
26 | - name: install epel-release
27 | yum:
28 | name: epel-release
29 | state: present
30 |
31 | - name: install python-tambo
32 | yum:
33 | name: python-tambo
34 | state: present
35 | enablerepo: epel-testing
36 |
37 | - name: install ceph-medic
38 | yum:
39 | name: ceph-medic
40 | state: present
41 |
42 | - name: test ceph-medic install
43 | become: no
44 | command: ceph-medic --help
45 |
46 | - name: copy vagrant insecure private ssh key
47 | copy:
48 | src: ~/.vagrant.d/insecure_private_key
49 | dest: /home/vagrant/.ssh/id_dsa
50 | mode: 0600
51 | owner: vagrant
52 | group: vagrant
53 |
54 | - name: turn off StrictHostKeyChecking
55 | blockinfile:
56 | dest: /home/vagrant/.ssh/config
57 | create: yes
58 | mode: 0400
59 | owner: vagrant
60 | group: vagrant
61 | block: |
62 | Host *
63 | StrictHostKeyChecking no
64 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for constructing RPMs.
2 | # Try "make" (for SRPMS) or "make rpm"
3 |
4 | NAME = ceph-medic
5 |
6 | # Set the RPM package NVR from "git describe".
7 | # Examples:
8 | #
9 | # A "git describe" value of "v2.2.0rc1" would create an NVR
10 | # "ceph-medic-2.2.0-0.rc1.1.el7"
11 | #
12 | # A "git describe" value of "v2.2.0rc1-1-gc465f85" would create an NVR
13 | # "ceph-medic-2.2.0-0.rc1.1.gc465f85.el7"
14 | #
15 | # A "git describe" value of "v2.2.0" creates an NVR
16 | # "ceph-medic-2.2.0-1.el7"
17 |
18 | VERSION := $(shell git describe --tags --abbrev=0 --match 'v*' | sed 's/^v//')
19 | COMMIT := $(shell git rev-parse HEAD)
20 | SHORTCOMMIT := $(shell echo $(COMMIT) | cut -c1-7)
21 | RELEASE := $(shell git describe --tags --match 'v*' \
22 | | sed 's/^v//' \
23 | | sed 's/^[^-]*-//' \
24 | | sed 's/-/./')
25 | ifeq ($(VERSION),$(RELEASE))
26 | RELEASE = 1
27 | endif
28 | ifneq (,$(findstring rc,$(VERSION)))
29 | RC := $(shell echo $(VERSION) | sed 's/.*rc/rc/')
30 | RELEASE := 0.$(RC).$(RELEASE)
31 | VERSION := $(subst $(RC),,$(VERSION))
32 | endif
33 | NVR := $(NAME)-$(VERSION)-$(RELEASE).el7
34 |
35 | all: srpm
36 |
37 | # Testing only
38 | echo:
39 | echo COMMIT $(COMMIT)
40 | echo VERSION $(VERSION)
41 | echo RELEASE $(RELEASE)
42 | echo NVR $(NVR)
43 |
44 | clean:
45 | rm -rf dist/
46 | rm -rf ceph-medic-$(VERSION)-$(SHORTCOMMIT).tar.gz
47 | rm -rf $(NVR).src.rpm
48 |
49 | dist:
50 | git archive --format=tar.gz --prefix=ceph-medic-$(VERSION)/ HEAD > ceph-medic-$(VERSION)-$(SHORTCOMMIT).tar.gz
51 |
52 | spec:
53 | sed ceph-medic.spec.in \
54 | -e 's/@COMMIT@/$(COMMIT)/' \
55 | -e 's/@VERSION@/$(VERSION)/' \
56 | -e 's/@RELEASE@/$(RELEASE)/' \
57 | > ceph-medic.spec
58 |
59 | srpm: dist spec
60 | fedpkg -v --dist epel7 srpm
61 |
62 | rpm: dist srpm
63 | mock -r epel-7-x86_64 rebuild $(NVR).src.rpm \
64 | --resultdir=. \
65 | --define "dist .el7"
66 |
67 | .PHONY: dist rpm srpm
68 |
--------------------------------------------------------------------------------
/docs/source/codes/common.rst:
--------------------------------------------------------------------------------
1 | Common
2 | ======
3 | The following checks indiciate general issues with the cluster that are not specific to any daemon type.
4 |
5 | Warnings
6 | --------
7 |
8 | .. _WCOM1:
9 |
10 | WCOM1
11 | ^^^^^
12 | A running OSD and MON daemon were detected in the same node. Colocating OSDs and MONs is highly discouraged.
13 |
14 |
15 | Errors
16 | ------
17 |
18 | .. _ECOM1:
19 |
20 | ECOM1
21 | ^^^^^
22 | A ceph configuration file cannot be found at ``/etc/ceph/$cluster-name.conf``.
23 |
24 | .. _ECOM2:
25 |
26 | ECOM2
27 | ^^^^^
28 | The ``ceph`` executable was not found.
29 |
30 | .. _ECOM3:
31 |
32 | ECOM3
33 | ^^^^^
34 | The ``/var/lib/ceph`` directory does not exist or could not be collected.
35 |
36 | .. _ECOM4:
37 |
38 | ECOM4
39 | ^^^^^
40 | The ``/var/lib/ceph`` directory was not owned by the ``ceph`` user.
41 |
42 | .. _ECOM5:
43 |
44 | ECOM5
45 | ^^^^^
46 | The ``fsid`` defined in the configuration differs from other nodes in the cluster. The ``fsid`` must be
47 | the same for all nodes in the cluster.
48 |
49 | .. _ECOM6:
50 |
51 | ECOM6
52 | ^^^^^
53 | The installed version of ``ceph`` is not the same for all nodes in the cluster. The ``ceph`` version should be
54 | the same for all nodes in the cluster.
55 |
56 | .. _ECOM7:
57 |
58 | ECOM7
59 | ^^^^^
60 | The installed version of ``ceph`` is not the same as the one of a running ceph daemon. The installed ``ceph`` version should be the same as all running ceph daemons. If they do not match, the daemons most likely have not been restarted correctly after a version change.
61 |
62 | .. _ECOM8:
63 |
64 | ECOM8
65 | ^^^^^
66 | The ``fsid`` field must exist in the configuration for each node.
67 |
68 |
69 | .. _ECOM9:
70 |
71 | ECOM9
72 | ^^^^^
73 | A cluster should not have running daemons with a cluster ``fsid`` that is different from the rest of the daemons in a cluster. This potentially means that different cluster identifiers are being used, and that should not be the case.
74 |
75 |
76 | .. _ECOM10:
77 |
78 | ECOM10
79 | ^^^^^^
80 | Only a single monitor daemon shuld be running per host, having more than one monitor running on the same host reduces a cluster's resilience if the node goes down.
81 |
--------------------------------------------------------------------------------
/ceph_medic/decorators.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | from ceph_medic import terminal
4 | from functools import wraps
5 |
6 |
7 | def catches(catch=None, handler=None, exit=True):
8 | """
9 | Very simple decorator that tries any of the exception(s) passed in as
10 | a single exception class or tuple (containing multiple ones) returning the
11 | exception message and optionally handling the problem if it rises with the
12 | handler if it is provided.
13 |
14 | So instead of douing something like this::
15 |
16 | def bar():
17 | try:
18 | some_call()
19 | print "Success!"
20 | except TypeError, exc:
21 | print "Error while handling some call: %s" % exc
22 | sys.exit(1)
23 |
24 | You would need to decorate it like this to have the same effect::
25 |
26 | @catches(TypeError)
27 | def bar():
28 | some_call()
29 | print "Success!"
30 |
31 | If multiple exceptions need to be catched they need to be provided as a
32 | tuple::
33 |
34 | @catches((TypeError, AttributeError))
35 | def bar():
36 | some_call()
37 | print "Success!"
38 | """
39 | catch = catch or Exception
40 |
41 | def decorate(f):
42 |
43 | @wraps(f)
44 | def newfunc(*a, **kw):
45 | try:
46 | return f(*a, **kw)
47 | except catch as e:
48 | if os.environ.get('CEPH_MEDIC_DEBUG'):
49 | raise
50 | if handler:
51 | return handler(e)
52 | else:
53 | sys.stderr.write(make_exception_message(e))
54 | if exit:
55 | sys.exit(1)
56 | return newfunc
57 |
58 | return decorate
59 |
60 | #
61 | # Decorator helpers
62 | #
63 |
64 |
65 | def make_exception_message(exc):
66 | """
67 | An exception is passed in and this function
68 | returns the proper string depending on the result
69 | so it is readable enough.
70 | """
71 | if str(exc):
72 | return '%s %s: %s\n' % (terminal.red_arrow, exc.__class__.__name__, exc)
73 | else:
74 | return '%s %s\n' % (terminal.red_arrow, exc.__class__.__name__)
75 |
--------------------------------------------------------------------------------
/ceph_medic/tests/remote/test_commands.py:
--------------------------------------------------------------------------------
1 | from mock import Mock
2 | from ceph_medic.remote import commands
3 |
4 |
5 | class TestCephSocketVersion(object):
6 |
7 | def test_gets_socket_version(self, monkeypatch):
8 | def mock_check(conn, cmd):
9 | return (['{"version":"12.2.0"}'], [], 0)
10 | monkeypatch.setattr(commands, 'check', mock_check)
11 | result = commands.ceph_socket_version(Mock(), '/var/run/ceph/osd.asok')
12 | assert 'version' in result
13 |
14 | def test_handles_invalid_json(self, monkeypatch):
15 | def mock_check(conn, cmd):
16 | return (['version=12.2.0'], [], 0)
17 | monkeypatch.setattr(commands, 'check', mock_check)
18 | result = commands.ceph_socket_version(Mock(), '/var/run/ceph/osd.asok')
19 | assert result == {}
20 |
21 | def test_handles_non_zero_code(self, monkeypatch):
22 | def mock_check(conn, cmd):
23 | return (['version=12.2.0'], [], 1)
24 | monkeypatch.setattr(commands, 'check', mock_check)
25 | result = commands.ceph_socket_version(Mock(), '/var/run/ceph/osd.asok')
26 | assert result == {}
27 |
28 |
29 | class TestCephVersion(object):
30 |
31 | def test_gets_ceph_version(self, stub_check):
32 | stub_check(
33 | (['ceph version 14.1.1 (nautilus)', ''], [], 0),
34 | commands, 'check')
35 | result = commands.ceph_version(None)
36 | assert result == 'ceph version 14.1.1 (nautilus)'
37 |
38 | def test_handles_non_zero_status(self, stub_check, conn):
39 | stub_check(
40 | (['error mr. robinson', ''], [], 1),
41 | commands, 'check')
42 | result = commands.ceph_version(conn)
43 | assert result is None
44 |
45 |
46 | class TestDaemonSocketConfig(object):
47 |
48 | def test_loadable_json(self, stub_check, conn):
49 | stub_check(
50 | (['{"config": true}'], [], 0),
51 | commands, 'check')
52 | result = commands.daemon_socket_config(conn, '/')
53 | assert result == {'config': True}
54 |
55 | def test_unloadable_json(self, stub_check, conn):
56 | stub_check(
57 | (['{config: []}'], [], 0),
58 | commands, 'check')
59 | result = commands.daemon_socket_config(conn, '/')
60 | assert result == {}
61 |
62 |
--------------------------------------------------------------------------------
/tests/functional/centos7/vagrant_variables.yml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | # DEPLOY CONTAINERIZED DAEMONS
4 | docker: false
5 |
6 | # DEFINE THE NUMBER OF VMS TO RUN
7 | mon_vms: 1
8 | osd_vms: 1
9 | mds_vms: 0
10 | rgw_vms: 0
11 | nfs_vms: 0
12 | rbd_mirror_vms: 0
13 | client_vms: 1
14 | iscsi_gw_vms: 0
15 |
16 | # SUBNETS TO USE FOR THE VMS
17 | public_subnet: 192.168.3
18 | cluster_subnet: 192.168.4
19 |
20 | # MEMORY
21 | # set 1024 for CentOS
22 | memory: 512
23 |
24 | # Ethernet interface name
25 | # use eth1 for libvirt and ubuntu precise, enp0s8 for CentOS and ubuntu xenial
26 | eth: 'eth1'
27 |
28 | # VAGRANT BOX
29 | # Ceph boxes are *strongly* suggested. They are under better control and will
30 | # not get updated frequently unless required for build systems. These are (for
31 | # now):
32 | #
33 | # * ceph/ubuntu-xenial
34 | #
35 | # Ubuntu: ceph/ubuntu-xenial bento/ubuntu-16.04 or ubuntu/trusty64 or ubuntu/wily64
36 | # CentOS: bento/centos-7.1 or puppetlabs/centos-7.0-64-puppet
37 | # libvirt CentOS: centos/7
38 | # parallels Ubuntu: parallels/ubuntu-14.04
39 | # Debian: deb/jessie-amd64 - be careful the storage controller is named 'SATA Controller'
40 | # For more boxes have a look at:
41 | # - https://atlas.hashicorp.com/boxes/search?utf8=✓&sort=&provider=virtualbox&q=
42 | # - https://download.gluster.org/pub/gluster/purpleidea/vagrant/
43 | vagrant_box: centos/7
44 | client_vagrant_box: centos/7
45 | #ssh_private_key_path: "~/.ssh/id_rsa"
46 | # The sync directory changes based on vagrant box
47 | # Set to /home/vagrant/sync for Centos/7, /home/{ user }/vagrant for openstack and defaults to /vagrant
48 | #vagrant_sync_dir: /home/vagrant/sync
49 | #vagrant_sync_dir: /
50 | # Disables synced folder creation. Not needed for testing, will skip mounting
51 | # the vagrant directory on the remote box regardless of the provider.
52 | vagrant_disable_synced_folder: true
53 | # VAGRANT URL
54 | # This is a URL to download an image from an alternate location. vagrant_box
55 | # above should be set to the filename of the image.
56 | # Fedora virtualbox: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
57 | # Fedora libvirt: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-libvirt.box
58 | # vagrant_box_url: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
59 |
--------------------------------------------------------------------------------
/ceph_medic/util/mon.py:
--------------------------------------------------------------------------------
1 | import remoto
2 | import json
3 | import ceph_medic
4 | from ceph_medic import terminal
5 |
6 |
7 | def get_mon_report(conn):
8 | command = [
9 | 'ceph',
10 | '--cluster=%s' % ceph_medic.metadata['cluster_name'],
11 | 'report'
12 | ]
13 | out, err, code = remoto.process.check(
14 | conn,
15 | command
16 | )
17 |
18 | if code > 0:
19 | terminal.error('failed to connect to the cluster to fetch a report from the monitor')
20 | terminal.error('command: %s' % ' '.join(command))
21 | for line in err:
22 | terminal.error(line)
23 | raise RuntimeError()
24 |
25 | try:
26 | return json.loads(b''.join(out).decode('utf-8'))
27 | except ValueError:
28 | return {}
29 |
30 |
31 | def get_cluster_nodes(conn):
32 | """
33 | Ask a monitor (with a pre-made connection) about all the nodes in
34 | a cluster. This will be able to get us all known MONs and OSDs.
35 |
36 | It returns a dictionary with a mapping that looks like::
37 |
38 | {
39 | 'mons': [
40 | {
41 | 'host': 'node1',
42 | 'public_ip': '192.168.1.100',
43 | },
44 | ],
45 | 'osds': [
46 | {
47 | 'host': 'node2',
48 | 'public_ip': '192.168.1.101',
49 | },
50 | {
51 | 'host': 'node3',
52 | 'public_ip': '192.168.1.102',
53 | },
54 | ]
55 | }
56 |
57 | """
58 | report = get_mon_report(conn)
59 | nodes = {'mons': [], 'osds': []}
60 | try:
61 | # XXX Is this really needed? in what case we wouldn't have a monmap
62 | # with mons?
63 | mons = report['monmap']['mons']
64 | except KeyError:
65 | raise SystemExit(report)
66 | for i in mons:
67 | nodes['mons'].append({
68 | 'host': i['name'],
69 | 'public_ip': _extract_ip_address(i['public_addr'])
70 | })
71 |
72 | osds = report['osd_metadata']
73 | for i in osds:
74 | nodes['osds'].append({
75 | 'host': i['hostname'],
76 | 'public_ip': _extract_ip_address(i['front_addr'])
77 | })
78 |
79 | return nodes
80 |
81 |
82 | # XXX does not support IPV6
83 |
84 | def _extract_ip_address(string):
85 | """
86 | Addresses from Ceph reports can come up with subnets and ports using ':'
87 | and '/' to identify them properly. Parse those types of strings to extract
88 | just the IP.
89 | """
90 | port_removed = string.split(':')[0]
91 | return port_removed.split('/')[0]
92 |
--------------------------------------------------------------------------------
/ceph_medic/checks/osds.py:
--------------------------------------------------------------------------------
1 | from ceph_medic import metadata
2 | from ceph_medic.util import configuration
3 |
4 |
5 | #
6 | # Utilities
7 | #
8 |
9 | def get_osd_ceph_fsids(data):
10 | fsids = []
11 | for file_path in data['paths']['/var/lib/ceph']['files'].keys():
12 | if "ceph_fsid" in file_path:
13 | fsids.append(data['paths']['/var/lib/ceph']['files'][file_path]['contents'].strip())
14 | return set(fsids)
15 |
16 |
17 | # XXX move out to a central utility module for other checks
18 | def get_ceph_conf(data):
19 | path = '/etc/ceph/%s.conf' % metadata['cluster_name']
20 | try:
21 | conf_file = data['paths']['/etc/ceph']['files'][path]
22 | except KeyError:
23 | return None
24 | return configuration.load_string(conf_file['contents'])
25 |
26 |
27 | def check_osd_ceph_fsid(host, data):
28 | code = 'WOSD1'
29 | msg = "Multiple ceph_fsid values found: %s"
30 |
31 | current_fsids = get_osd_ceph_fsids(data)
32 |
33 | if len(current_fsids) > 1:
34 | return code, msg % ", ".join(current_fsids)
35 |
36 |
37 | def check_min_pool_size(host, data):
38 | code = 'WOSD2'
39 | msg = 'osd default pool min_size is set to 1, can potentially lose data'
40 | conf = get_ceph_conf(data)
41 | if not conf: # no ceph.conf found!
42 | return
43 | size = conf.get_safe('global', 'osd_pool_default_min_size', '0')
44 | if int(size) == 1:
45 | return code, msg
46 |
47 |
48 | def check_min_osd_nodes(host, data):
49 | code = 'WOSD3'
50 | msg = 'OSD nodes might not be enough for a healthy cluster (%s needed, %s found)'
51 | conf = get_ceph_conf(data)
52 | if not conf: # no ceph.conf found!
53 | return
54 | default_size = int(conf.get_safe('global', 'osd_pool_default_size', '3'))
55 | min_size = int(conf.get_safe('global', 'osd_pool_default_min_size', '0'))
56 | magical_number = default_size + min_size + 1
57 | osd_nodes = len(metadata['osds'])
58 | if magical_number > osd_nodes:
59 | return code, msg % (magical_number, osd_nodes)
60 |
61 |
62 | def check_reasonable_ratios(host, data):
63 | code = 'WOSD4'
64 | msg = 'Ratios have been modified to unreasonable values: %s'
65 | unreasonable_ratios = []
66 | reasonable_ratios = {
67 | "backfillfull_ratio": 0.9,
68 | "nearfull_ratio": 0.85,
69 | "full_ratio": 0.95
70 | }
71 |
72 | dump = data['ceph']['osd'].get('dump', {})
73 | for name, value in reasonable_ratios.items():
74 | ratio = dump.get(name)
75 | if not ratio:
76 | continue
77 | if ratio != reasonable_ratios[name]:
78 | unreasonable_ratios.append(name)
79 | if unreasonable_ratios:
80 | msg = msg % ', '.join(sorted(unreasonable_ratios))
81 | return code, msg
82 |
--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | ============
3 |
4 | ``ceph-medic`` supports a few different installation methods, including system
5 | packages for RPM distros via EPEL. For PyPI, it can be installed with::
6 |
7 | pip install ceph-medic
8 |
9 |
10 | Official Upstream Repos
11 | -----------------------
12 |
13 | Download official releases of ``ceph-medic`` at https://download.ceph.com/ceph-medic
14 |
15 | Currently, only RPM repos built for CentOS 7 are supported.
16 |
17 | ``ceph-medic`` has dependencies on packages found in EPEL, so EPEL will need to be enabled.
18 |
19 | Follow these steps to install a CentOS 7 repo from download.ceph.com:
20 |
21 | - Install the latest RPM repo from download.ceph.com::
22 |
23 | wget http://download.ceph.com/ceph-medic/latest/rpm/el7/ceph-medic.repo -O /etc/yum.repos.d/ceph-medic.repo
24 |
25 | - Install ``epel-release``::
26 |
27 |
28 | yum install epel-release
29 |
30 | - Install the GPG key for ``ceph-medic``::
31 |
32 | wget https://download.ceph.com/keys/release.asc
33 | rpm --import release.asc
34 |
35 | - Install ``ceph-medic``::
36 |
37 | yum install ceph-medic
38 |
39 | - Verify your install::
40 |
41 | ceph-medic --help
42 |
43 | Shaman Repos
44 | ------------
45 |
46 | Every branch pushed to ceph-medic.git gets a RPM repo created and stored at
47 | shaman.ceph.com. Currently, only RPM repos built for CentOS 7 are supported.
48 |
49 | Browse https://shaman.ceph.com/repos/ceph-medic to find the available repos.
50 |
51 | .. Note::
52 | Shaman repos are available for 2 weeks before they are automatically deleted.
53 | However, there should always be a repo available for the master branch of ``ceph-medic``.
54 |
55 | ``ceph-medic`` has dependencies on packages found in EPEL, so EPEL will need to be enabled.
56 |
57 | Follow these steps to install a CentOS 7 repo from shaman.ceph.com:
58 |
59 | - Install the latest master shaman repo::
60 |
61 | wget https://shaman.ceph.com/api/repos/ceph-medic/master/latest/centos/7/repo -O /etc/yum.repos.d/ceph-medic.repo
62 |
63 | - Install ``epel-release``::
64 |
65 | yum install epel-release
66 |
67 | - Install ``ceph-medic``::
68 |
69 | yum install ceph-medic
70 |
71 | - Verify your install::
72 |
73 | ceph-medic --help
74 |
75 | GitHub
76 | ------
77 | You can install directly from the source on GitHub by following these steps:
78 |
79 | - Clone the repository::
80 |
81 | git clone https://github.com/ceph/ceph-medic.git
82 |
83 |
84 | - Change to the ``ceph-medic`` directory::
85 |
86 | cd ceph-medic
87 |
88 | - Create and activate a Python Virtual Environment::
89 |
90 | virtualenv venv
91 | source venv/bin/activate
92 |
93 | - Install ceph-medic into the Virtual Environment::
94 |
95 | python setup.py install
96 |
97 | ``ceph-medic`` should now be installed and available in the created virtualenv.
98 | Check your installation by running: ``ceph-medic --help``
99 |
--------------------------------------------------------------------------------
/ceph_medic/check.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import ceph_medic
3 | import logging
4 | from ceph_medic import runner, collector
5 | from tambo import Transport
6 |
7 | logger = logging.getLogger(__name__)
8 |
9 |
10 | def as_list(string):
11 | if not string:
12 | return []
13 | string = string.strip(',')
14 |
15 | # split on commas
16 | string = string.split(',')
17 |
18 | # strip spaces
19 | return [x.strip() for x in string]
20 |
21 |
22 | class Check(object):
23 | help = "Run checks for all the configured nodes in a cluster or hosts file"
24 | long_help = """
25 | check: Run for all the configured nodes in the configuration
26 |
27 | Options:
28 | --ignore Comma-separated list of errors and warnings to ignore.
29 |
30 |
31 | Loaded Config Path: {config_path}
32 |
33 | Configured Nodes:
34 | {configured_nodes}
35 | """
36 |
37 | def __init__(self, argv=None, parse=True):
38 | self.argv = argv or sys.argv
39 |
40 | @property
41 | def subcommand_args(self):
42 | # find where `check` is
43 | index = self.argv.index('check')
44 | # slice the args
45 | return self.argv[index:]
46 |
47 | def _help(self):
48 | node_section = []
49 | for daemon, node in ceph_medic.config.nodes.items():
50 | header = "\n* %s:\n" % daemon
51 | body = '\n'.join([" %s" % n for n in ceph_medic.config.nodes[daemon]])
52 | node_section.append(header+body+'\n')
53 | return self.long_help.format(
54 | configured_nodes=''.join(node_section),
55 | config_path=ceph_medic.config.config_path
56 | )
57 |
58 | def main(self):
59 | options = ['--ignore']
60 | config_ignores = ceph_medic.config.file.get_list('check', '--ignore')
61 | parser = Transport(
62 | self.argv, options=options,
63 | check_version=False
64 | )
65 | parser.catch_help = self._help()
66 | parser.parse_args()
67 | ignored_codes = as_list(parser.get('--ignore', ''))
68 | # fallback to the configuration if nothing is defined in the CLI
69 | if not ignored_codes:
70 | ignored_codes = config_ignores
71 |
72 | if len(self.argv) < 1:
73 | return parser.print_help()
74 |
75 | # populate the nodes metadata with the configured nodes
76 | for daemon in ceph_medic.config.nodes.keys():
77 | ceph_medic.metadata['nodes'][daemon] = []
78 | for daemon, nodes in ceph_medic.config.nodes.items():
79 | for node in nodes:
80 | node_metadata = {'host': node['host']}
81 | if 'container' in node:
82 | node_metadata['container'] = node['container']
83 | ceph_medic.metadata['nodes'][daemon].append(node_metadata)
84 |
85 | collector.collect()
86 | test = runner.Runner()
87 | test.ignore = ignored_codes
88 | results = test.run()
89 | runner.report(results)
90 | #XXX might want to make this configurable to not bark on warnings for
91 | # example, setting forcefully for now, but the results object doesn't
92 | # make a distinction between error and warning (!)
93 | if results.errors or results.warnings:
94 | sys.exit(1)
95 |
--------------------------------------------------------------------------------
/docs/source/facts.rst:
--------------------------------------------------------------------------------
1 | Cluster node facts
2 | ==================
3 | Fact collection happens per node and creates a mapping of hosts and data
4 | gathered. Each daemon 'type' is the primary key::
5 |
6 | ...
7 | 'osd': {
8 | 'node1': {...},
9 | 'node2': {...},
10 | }
11 | 'mon': {
12 | 'node3': {...},
13 | }
14 |
15 |
16 | There are other top-level keys that make it easier to deal with fact metadata, for example a full list of all hosts discovered::
17 |
18 | 'hosts': ['node1', 'node2', 'node3'],
19 | 'osds': ['node1', 'node2'],
20 | 'mons': ['node3']
21 |
22 |
23 | Each host has distinct metadata that gets collected. If any errors are
24 | detected, the ``exception`` key is set populated with all information pertaining
25 | to the error generated when trying to execute the call. For example, a failed call to ``stat`` on a path might be::
26 |
27 | 'osd': {
28 | 'node1': {
29 | 'paths': {
30 | '/var/lib/osd': {
31 | 'exception': {
32 | 'traceback': "Traceback (most recent call last):\n File "remote.py", line 3, in \n os.stat('/var/lib/osd')\n OSError: [Errno 2] No such file or directory: '/var/lib/osd'\n",
33 | 'name': 'OSError',
34 | 'repr': "[Errno 2] No such file or directory: '/root'"
35 | 'attributes': {
36 | args : "(2, 'No such file or directory')",
37 | errno : 2,
38 | filename : '/var/lib/ceph' ,
39 | message : '',
40 | strerror : 'No such file or directory'
41 | }
42 | }
43 | }
44 | }
45 | }
46 |
47 | Note that objects will not get pickled, so data structures and objects will be
48 | sent back as plain text.
49 |
50 | Path contents are optionally enabled by the fact engine and will contain the
51 | raw representation of the full file contents. Here is an example of what
52 | a ``ceph.conf`` file would be in a monitor node::
53 |
54 |
55 | 'mon': {
56 | 'node3': {
57 | 'paths': {
58 | '/etc/ceph/': {
59 | 'dirs': [],
60 | 'files': {
61 | '/etc/ceph/ceph.conf': {
62 | 'contents': "[global]\nfsid = f05294bd-6e9d-4883-9819-c2800d4d7962\nmon_initial_members = node3\nmon_host = 192.168.111.102\nauth_cluster_required = cephx\nauth_service_required = cephx\nauth_client_required = cephx\n",
63 | 'owner': 'ceph',
64 | 'group': 'ceph',
65 | 'n_fields' : 19 ,
66 | 'n_sequence_fields' : 10 ,
67 | 'n_unnamed_fields' : 3 ,
68 | 'st_atime' : 1490714187.0 ,
69 | 'st_birthtime' : 1463607160.0 ,
70 | 'st_blksize' : 4096 ,
71 | 'st_blocks' : 0 ,
72 | 'st_ctime' : 1490295294.0 ,
73 | 'st_dev' : 16777220 ,
74 | 'st_flags' : 1048576 ,
75 | 'st_gen' : 0 ,
76 | 'st_gid' : 0 ,
77 | 'st_ino' : 62858421 ,
78 | 'st_mode' : 16877 ,
79 | 'st_mtime' : 1490295294.0 ,
80 | 'st_nlink' : 26 ,
81 | 'st_rdev' : 0 ,
82 | 'st_size' : 884 ,
83 | 'st_uid' : 0 ,
84 | 'exception': {},
85 | }
86 | }
87 | }
88 | }
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/ceph_medic/generate.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import sys
3 | import ceph_medic
4 | from ceph_medic.connection import get_connection
5 | import remoto
6 | import json
7 | from tambo import Transport
8 |
9 |
10 | def generate_inventory(inventory, to_stdout=False, tmp_dir=None):
11 | """
12 | Generates a host file to use with an ansible-playbook call.
13 |
14 | The first argument is a dictionary mapping that contains the group name as
15 | the key and a list of hostnames as values
16 |
17 | For example:
18 |
19 | {'mons': ['mon.host'], 'osds': ['osd1.host', 'osd1.host']}
20 | """
21 | result = []
22 | for section, hosts in inventory.items():
23 | group_name = section
24 | result.append("[{0}]".format(group_name))
25 | if not isinstance(hosts, list):
26 | hosts = [hosts]
27 | result.extend(hosts)
28 | result_str = "\n".join(result) + "\n"
29 | # if not None the NamedTemporaryFile will be created in the given directory
30 | if to_stdout:
31 | print(result_str)
32 | return
33 | with open('hosts_file', 'w') as hosts_file:
34 | hosts_file.write(result_str)
35 |
36 |
37 | def get_mon_report(conn):
38 | out, err, code = remoto.process.check(
39 | conn,
40 | [
41 | 'ceph',
42 | 'report'
43 | ],
44 | )
45 |
46 | if code > 0:
47 | for line in err:
48 | print(line)
49 |
50 | try:
51 | return json.loads(b''.join(out).decode('utf-8'))
52 | except ValueError:
53 | return {}
54 |
55 |
56 | class Generate(object):
57 | help = "Create a hosts file (Ansible compatible) from the information on a running Ceph cluster"
58 | long_help = """
59 | Create a hosts file (Ansible compatible) from the information on a running Ceph
60 | cluster.
61 |
62 | Usage:
63 |
64 | ceph-medic generate [/path/to/ceph.conf]
65 | ceph-medic generate [MONITOR HOST]
66 |
67 | Loaded Config Path: {config_path}
68 |
69 | """
70 |
71 | def __init__(self, argv=None, parse=True):
72 | self.argv = argv or sys.argv
73 |
74 | def _help(self):
75 | skip_internal = ['__file__', 'config_path', 'verbosity']
76 | node_section = []
77 | for daemon, node in ceph_medic.config['nodes'].items():
78 | if daemon in skip_internal or not node:
79 | continue
80 | header = "\n* %s:\n" % daemon
81 | body = '\n'.join([" %s" % n for n in ceph_medic.config['nodes'][daemon].keys()])
82 | node_section.append(header+body+'\n')
83 | return self.long_help.format(
84 | config_path=ceph_medic.config['config_path']
85 | )
86 |
87 | def main(self):
88 | options = ['--stdout']
89 | parser = Transport(
90 | self.argv, options=options,
91 | check_version=False
92 | )
93 | parser.catch_help = self._help()
94 |
95 | parser.parse_args()
96 |
97 | if len(self.argv) == 1:
98 | raise SystemExit("A monitor hostname or a ceph.conf file is required as an argument")
99 |
100 | node = self.argv[-1]
101 | inventory = {}
102 |
103 | with get_connection(node) as conn:
104 | report = get_mon_report(conn)
105 | try:
106 | mons = report['monmap']['mons']
107 | except KeyError:
108 | raise SystemExit(report)
109 | inventory['mons'] = [i['name'] for i in mons]
110 | osds = report['osd_metadata']
111 | inventory['osds'] = [i['hostname'] for i in osds]
112 |
113 | if not inventory:
114 | raise SystemExit('no hosts where found from remote monitor node: %s' % node)
115 |
116 | generate_inventory(inventory, to_stdout=parser.get('--stdout'))
117 | conn.exit()
118 | return
119 |
--------------------------------------------------------------------------------
/ceph_medic/tests/util/test_configuration.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pytest
3 | from textwrap import dedent
4 | from ceph_medic.util import configuration
5 |
6 |
7 | def make_hosts_file(filename, contents=None):
8 | contents = contents or "[mons]\nmon0\n[osds]\nosd0\n"
9 | with open(filename, 'w') as f:
10 | f.write(contents)
11 |
12 |
13 | class TestFlatInventory(object):
14 |
15 | def test_parses_both_sections(self, tmpdir):
16 | filename = os.path.join(str(tmpdir), 'hosts')
17 | make_hosts_file(filename)
18 | result = configuration.AnsibleInventoryParser(filename)
19 | assert sorted(result.nodes.keys()) == sorted(['mons', 'osds'])
20 |
21 | def test_populates_hosts(self, tmpdir):
22 | filename = os.path.join(str(tmpdir), 'hosts')
23 | make_hosts_file(filename)
24 | result = configuration.AnsibleInventoryParser(filename).nodes
25 | assert result['mons'][0]['host'] == 'mon0'
26 | assert result['osds'][0]['host'] == 'osd0'
27 |
28 | def test_hosts_do_not_get_mixed(self, tmpdir):
29 | filename = os.path.join(str(tmpdir), 'hosts')
30 | make_hosts_file(filename)
31 | result = configuration.AnsibleInventoryParser(filename).nodes
32 | assert len(result['mons']) == 1
33 | assert len(result['osds']) == 1
34 |
35 | def test_ignores_unknown_groups(self, tmpdir):
36 | filename = os.path.join(str(tmpdir), 'hosts')
37 | contents = """
38 | [mons]
39 | mon0
40 |
41 | [test]
42 | node1
43 | """
44 | make_hosts_file(filename, contents)
45 | result = configuration.AnsibleInventoryParser(filename).nodes
46 | assert 'test' not in result
47 |
48 | def test_hosts_file_does_not_exist(self):
49 | with pytest.raises(SystemExit):
50 | configuration.load_hosts(_path="/fake/path")
51 |
52 |
53 | class TestNestedInventory(object):
54 |
55 | def test_nested_one_level(self, tmpdir):
56 | filename = os.path.join(str(tmpdir), 'hosts')
57 | contents = """
58 | [mons:children]
59 | atlanta
60 |
61 | [atlanta]
62 | mon0
63 | """
64 | make_hosts_file(filename, contents)
65 | result = configuration.AnsibleInventoryParser(filename).nodes
66 | assert result['mons'][0]['host'] == 'mon0'
67 |
68 | def test_nested_one_level_populates_other_groups(self, tmpdir):
69 | filename = os.path.join(str(tmpdir), 'hosts')
70 | contents = """
71 | [mons:children]
72 | atlanta
73 |
74 | [atlanta]
75 | mon0
76 | """
77 | make_hosts_file(filename, contents)
78 | result = configuration.AnsibleInventoryParser(filename).nodes
79 | assert result['mons'][0]['host'] == 'mon0'
80 |
81 | def test_nested_levels_populates(self, tmpdir):
82 | filename = os.path.join(str(tmpdir), 'hosts')
83 | contents = """
84 | [mons:children]
85 | us
86 |
87 | [atlanta]
88 | mon0
89 |
90 | [us:children]
91 | atlanta
92 | """
93 | make_hosts_file(filename, contents)
94 | result = configuration.AnsibleInventoryParser(filename).nodes
95 | assert result['mons'][0]['host'] == 'mon0'
96 |
97 |
98 | class TestLoadString(object):
99 |
100 | def test_loads_valid_ceph_key(self):
101 | contents = dedent("""
102 | [global]
103 | cluster = ceph
104 | """)
105 | conf = configuration.load_string(contents)
106 | assert conf.get_safe('global', 'cluster') == 'ceph'
107 |
108 | def test_loads_key_with_spaces_converted(self):
109 | contents = dedent("""
110 | [global]
111 | some key here = ceph
112 | """)
113 | conf = configuration.load_string(contents)
114 | assert conf.get_safe('global', 'some_key_here') == 'ceph'
115 |
--------------------------------------------------------------------------------
/ceph_medic/remote/commands.py:
--------------------------------------------------------------------------------
1 | """
2 | A collection of helpers that will connect to a remote node to run a system
3 | command to return a specific value, instead of shipping a module and executing
4 | functions remotely, this just uses the current connection to execute Popen
5 | """
6 | import json
7 | from remoto.process import check
8 |
9 |
10 | def ceph_version(conn):
11 | try:
12 | output, _, exit_code = check(conn, ['ceph', '--version'])
13 | if exit_code != 0:
14 | conn.logger.error('Non zero exit status received, unable to retrieve information')
15 | return
16 | return output[0]
17 | except RuntimeError:
18 | conn.logger.exception('failed to fetch ceph version')
19 |
20 |
21 | def ceph_socket_version(conn, socket):
22 | try:
23 | result = dict()
24 | output, _, exit_code = check(
25 | conn,
26 | ['ceph', '--admin-daemon', socket, '--format', 'json', 'version']
27 | )
28 | if exit_code != 0:
29 | conn.logger.error('Non zero exit status received, unable to retrieve information')
30 | return result
31 | try:
32 | result = json.loads(output[0])
33 | except ValueError:
34 | conn.logger.exception(
35 | "failed to fetch ceph socket version, invalid json: %s" % output[0]
36 | )
37 | return result
38 | except RuntimeError:
39 | conn.logger.exception('failed to fetch ceph socket version')
40 |
41 |
42 | def ceph_status(conn):
43 | try: # collects information using ceph -s
44 | stdout, stderr, exit_code = check(conn, ['ceph', '-s', '--format', 'json'])
45 | result = dict()
46 | try:
47 | result = json.loads(''.join(stdout))
48 | except ValueError:
49 | conn.logger.exception("failed to fetch ceph status, invalid json: %s" % ''.join(stdout))
50 |
51 | if exit_code == 0:
52 | return result
53 | else:
54 | return {}
55 |
56 | except RuntimeError:
57 | conn.logger.exception('failed to fetch ceph status')
58 |
59 |
60 | def ceph_osd_dump(conn):
61 | try:
62 | stdout, stderr, exit_code = check(conn, ['ceph', 'osd', 'dump', '--format', 'json'])
63 | result = dict()
64 | if exit_code != 0:
65 | conn.logger.error('could not get osd dump from ceph')
66 | if stderr:
67 | for line in stderr:
68 | conn.logger.error(line)
69 | return result
70 | try:
71 | result = json.loads(''.join(stdout))
72 | except ValueError:
73 | conn.logger.exception("failed to fetch osd dump, invalid json: %s" % ''.join(stdout))
74 |
75 | return result
76 |
77 | except RuntimeError:
78 | conn.logger.exception('failed to fetch ceph osd dump')
79 |
80 |
81 | def daemon_socket_config(conn, socket):
82 | """
83 | Capture daemon-based config from the socket
84 | """
85 | try:
86 | output, _, exit_code = check(
87 | conn,
88 | ['ceph', '--admin-daemon', socket, 'config', 'show', '--format', 'json']
89 | )
90 | if exit_code != 0:
91 | conn.logger.error('Non zero exit status received, unable to retrieve information')
92 | return
93 | result = dict()
94 | try:
95 | result = json.loads(output[0])
96 | except ValueError:
97 | conn.logger.exception(
98 | "failed to fetch ceph configuration via socket, invalid json: %s" % output[0]
99 | )
100 | return result
101 | except RuntimeError:
102 | conn.logger.exception('failed to fetch ceph configuration via socket')
103 |
104 |
105 | def ceph_is_installed(conn):
106 | try:
107 | stdout, stderr, exit_code = check(conn, ['which', 'ceph'])
108 | except RuntimeError:
109 | conn.logger.exception('failed to check if ceph is available in the path')
110 | # XXX this might be incorrect
111 | return False
112 | if exit_code != 0:
113 | return False
114 | return True
115 |
--------------------------------------------------------------------------------
/ceph_medic/tests/remote/test_functions.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from ceph_medic.remote import functions
4 |
5 |
6 | def make_test_file(filename, contents=None):
7 | contents = contents or "foo"
8 | with open(filename, 'w') as f:
9 | f.write(contents)
10 |
11 |
12 | def make_test_tree(path, contents=None, tree=None):
13 | file1 = os.path.join(path, "file1.txt")
14 | dir1 = os.path.join(path, "dir1")
15 | file2 = os.path.join(path, "dir1/file2.txt")
16 | make_test_file(file1)
17 | os.mkdir(dir1)
18 | make_test_file(file2)
19 |
20 |
21 | class TestStatPath(object):
22 |
23 | def test_stat_file_includes_owner(self, tmpdir):
24 | filename = os.path.join(str(tmpdir), 'file')
25 | make_test_file(filename)
26 |
27 | result = functions.stat_path(filename)
28 | assert "owner" in result
29 |
30 | def test_stat_file_includes_group(self, tmpdir):
31 | filename = os.path.join(str(tmpdir), 'file')
32 | make_test_file(filename)
33 |
34 | result = functions.stat_path(filename)
35 | assert "group" in result
36 |
37 | def test_includes_file_content(self, tmpdir):
38 | filename = os.path.join(str(tmpdir), 'file')
39 | make_test_file(filename, contents="foo")
40 |
41 | result = functions.stat_path(filename, get_contents=True)
42 | assert result["contents"] == "foo"
43 |
44 | def test_exception_is_empty_on_success(self, tmpdir):
45 | filename = os.path.join(str(tmpdir), 'file')
46 | make_test_file(filename)
47 |
48 | result = functions.stat_path(filename)
49 | assert not result["exception"]
50 |
51 | def test_stat_dir(self, tmpdir):
52 | result = functions.stat_path(str(tmpdir))
53 | assert result != {}
54 |
55 | def test_no_callables(self, tmpdir):
56 | result = functions.stat_path(str(tmpdir))
57 | for value in result.values():
58 | assert callable(value) is False
59 |
60 |
61 | class TestStatPathErrors(object):
62 |
63 | def test_captures_exceptions(self):
64 | result = functions.stat_path('/does/not/exist')
65 | assert result['exception']['attributes']['errno'] == '2'
66 | assert result['exception']['name'] in ['FileNotFoundError', 'OSError']
67 |
68 |
69 | class AttributeLandMine(object):
70 |
71 | @property
72 | def explode(self):
73 | raise ValueError('Raising on attribute access')
74 |
75 |
76 | class TestCaptureException(object):
77 |
78 | def test_exceptions_in_errors_are_ignored(self):
79 | result = functions.capture_exception(AttributeLandMine())
80 | assert result['attributes'] == {'explode': None}
81 |
82 | def test_unserializable_attributes(self, factory):
83 | error = factory(unserial=lambda: True)
84 | result = functions.capture_exception(error)
85 | assert '')
41 | except KeyboardInterrupt:
42 | raise SystemExit("\nNot proceeding")
43 |
44 | old = "__version__ = '%s'" % metadata['version']
45 | new = "__version__ = '%s'" % self.version
46 |
47 | module_file = read_module_contents()
48 | with open('ceph_medic/__init__.py', 'w') as fileh:
49 | fileh.write(module_file.replace(old, new))
50 |
51 | # Commit everything with a standard commit message
52 | cmd = ['git', 'commit', '-a', '-m', 'version %s' % self.version]
53 | print(' '.join(cmd))
54 | subprocess.check_call(cmd)
55 |
56 |
57 | class ReleaseCommand(Command):
58 | """ Tag and push a new release. """
59 |
60 | user_options = [('sign', 's', 'GPG-sign the Git tag and release files')]
61 |
62 | def initialize_options(self):
63 | self.sign = False
64 |
65 | def finalize_options(self):
66 | pass
67 |
68 | def run(self):
69 | # Create Git tag
70 | tag_name = 'v%s' % version
71 | cmd = ['git', 'tag', '-a', tag_name, '-m', 'version %s' % version]
72 | if self.sign:
73 | cmd.append('-s')
74 | print(' '.join(cmd))
75 | subprocess.check_call(cmd)
76 |
77 | # Push Git tag to origin remote
78 | cmd = ['git', 'push', 'origin', tag_name]
79 | print(' '.join(cmd))
80 | subprocess.check_call(cmd)
81 |
82 | # Push package to pypi
83 | cmd = ['python', 'setup.py', 'sdist', 'upload']
84 | if self.sign:
85 | cmd.append('--sign')
86 | print(' '.join(cmd))
87 | #subprocess.check_call(cmd)
88 |
89 | # Push master to the remote
90 | cmd = ['git', 'push', 'origin', 'master']
91 | print(' '.join(cmd))
92 | subprocess.check_call(cmd)
93 |
94 |
95 | setup(
96 | name='ceph-medic',
97 | version=version,
98 | packages=find_packages(),
99 |
100 | author='Alfredo Deza',
101 | author_email='contact@redhat.com',
102 | description='detect common issues with ceph clusters',
103 | long_description=long_description,
104 | license='MIT',
105 | keywords='ceph doctor',
106 | url="https://github.com/ceph/ceph-medic",
107 | zip_safe=False,
108 |
109 | install_requires=[
110 | 'execnet',
111 | 'tambo',
112 | 'remoto>=1.1.2',
113 | ] + install_requires,
114 |
115 | tests_require=[
116 | 'pytest >=2.1.3',
117 | 'tox',
118 | 'mock',
119 | ],
120 |
121 | scripts=['bin/ceph-medic'],
122 | cmdclass={'bump': BumpCommand, 'release': ReleaseCommand},
123 | classifiers=[
124 | 'Development Status :: 4 - Beta',
125 | 'Intended Audience :: Developers',
126 | 'License :: OSI Approved :: MIT License',
127 | 'Topic :: Software Development :: Build Tools',
128 | 'Topic :: Utilities',
129 | 'Operating System :: MacOS :: MacOS X',
130 | 'Operating System :: POSIX',
131 | 'Programming Language :: Python :: 2.7',
132 | 'Programming Language :: Python :: 3.4',
133 | ]
134 |
135 | )
136 |
--------------------------------------------------------------------------------
/ceph_medic/tests/checks/test_osds.py:
--------------------------------------------------------------------------------
1 | from textwrap import dedent
2 | from ceph_medic.checks import osds
3 | from ceph_medic import metadata
4 |
5 |
6 | class TestOSDS(object):
7 |
8 | def test_fails_check_ceph_fsid(self):
9 | data = {'paths': {'/var/lib/ceph': {'files': {
10 | '/var/lib/ceph/osd/ceph-0/ceph_fsid': {'contents': "fsid1"},
11 | '/var/lib/ceph/osd/ceph-1/ceph_fsid': {'contents': "fsid2"},
12 | }}}}
13 | result = osds.check_osd_ceph_fsid(None, data)
14 | assert "WOSD1" in result
15 |
16 | def test_min_pool_size_fails(self, data):
17 | metadata['cluster_name'] = 'ceph'
18 | contents = dedent("""
19 | [global]
20 | cluster = foo
21 | osd_pool_default_min_size = 1
22 | """)
23 | osd_data = data()
24 | osd_data['paths']['/etc/ceph']['files']['/etc/ceph/ceph.conf'] = {'contents': contents}
25 | code, error = osds.check_min_pool_size(None, osd_data)
26 | assert error == 'osd default pool min_size is set to 1, can potentially lose data'
27 |
28 | def test_min_pool_size_is_correct(self, data):
29 | metadata['cluster_name'] = 'ceph'
30 | contents = dedent("""
31 | [global]
32 | cluster = foo
33 | osd_pool_default_min_size = 2
34 | """)
35 | osd_data = data()
36 | osd_data['paths']['/etc/ceph']['files']['/etc/ceph/ceph.conf'] = {'contents': contents}
37 | result = osds.check_min_pool_size(None, osd_data)
38 | assert result is None
39 |
40 |
41 | class TestMinOSDS(object):
42 |
43 | def test_min_osd_nodes_is_not_met(self, data):
44 | metadata['osds'] = {'osd1': []}
45 | metadata['cluster_name'] = 'ceph'
46 | osd_data = data()
47 | contents = dedent("""
48 | [global]
49 | cluster = foo
50 | osd_pool_default_min_size = 2
51 | """)
52 | osd_data['paths']['/etc/ceph']['files']['/etc/ceph/ceph.conf'] = {'contents': contents}
53 | code, error = osds.check_min_osd_nodes(None, osd_data)
54 | assert code == 'WOSD3'
55 | assert '6 needed, 1 found' in error
56 |
57 | def test_min_osd_nodes_is_met(self, data):
58 | metadata['osds'] = dict(('osd%s' % count, []) for count in range(6))
59 | metadata['cluster_name'] = 'ceph'
60 | osd_data = data()
61 | contents = dedent("""
62 | [global]
63 | cluster = foo
64 | osd_pool_default_min_size = 2
65 | """)
66 | osd_data['paths']['/etc/ceph']['files']['/etc/ceph/ceph.conf'] = {'contents': contents}
67 | result = osds.check_min_osd_nodes(None, osd_data)
68 | assert result is None
69 |
70 |
71 | class TestReasonableRatios(object):
72 |
73 | def setup(self):
74 | self.data = {'ceph': {'osd': {'dump': {}}}}
75 |
76 | def test_osd_is_empty(self):
77 | data = {'ceph': {'osd': {}}}
78 | assert osds.check_reasonable_ratios('node1', data) is None
79 |
80 | def test_ratios_are_all_very_reasonable(self):
81 | self.data['ceph']['osd']['dump'] = {
82 | "backfillfull_ratio": 0.9,
83 | "nearfull_ratio": 0.85,
84 | "full_ratio": 0.95
85 | }
86 | assert osds.check_reasonable_ratios('node1', self.data) is None
87 |
88 | def test_all_ratios_are_messed_up(self):
89 | self.data['ceph']['osd']['dump'] = {
90 | "backfillfull_ratio": 0.91,
91 | "nearfull_ratio": 0.84,
92 | "full_ratio": 0.92
93 | }
94 | code, msg = osds.check_reasonable_ratios('node1', self.data)
95 | assert code == 'WOSD4'
96 | assert 'backfillfull_ratio, full_ratio, nearfull_ratio' in msg
97 |
98 | def test_backfillfull_is_messed_up(self):
99 | self.data['ceph']['osd']['dump'] = {
100 | "backfillfull_ratio": 0.91,
101 | "nearfull_ratio": 0.85,
102 | "full_ratio": 0.95
103 | }
104 | code, msg = osds.check_reasonable_ratios('node1', self.data)
105 | assert msg.endswith('backfillfull_ratio')
106 |
107 | def test_nearfull_is_messed_up(self):
108 | self.data['ceph']['osd']['dump'] = {
109 | "backfillfull_ratio": 0.9,
110 | "nearfull_ratio": 0.88,
111 | "full_ratio": 0.95
112 | }
113 | code, msg = osds.check_reasonable_ratios('node1', self.data)
114 | assert msg.endswith('nearfull_ratio')
115 |
116 | def test_full_is_messed_up(self):
117 | self.data['ceph']['osd']['dump'] = {
118 | "backfillfull_ratio": 0.9,
119 | "nearfull_ratio": 0.89,
120 | "full_ratio": 0.95
121 | }
122 | code, msg = osds.check_reasonable_ratios('node1', self.data)
123 | assert msg.endswith('full_ratio')
124 |
--------------------------------------------------------------------------------
/ceph_medic/connection.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import socket
3 | import remoto
4 | import ceph_medic
5 | from execnet.gateway_bootstrap import HostNotFound
6 |
7 | logger = logging.getLogger(__name__)
8 |
9 |
10 | def get_connection(hostname, username=None, threads=5, use_sudo=None, detect_sudo=True, **kw):
11 | """
12 | A very simple helper, meant to return a connection
13 | that will know about the need to use sudo.
14 | """
15 | if kw.get('logger') is False: # explicitly disable remote logging
16 | remote_logger = None
17 | else:
18 | remote_logger = logging.getLogger(hostname)
19 |
20 | if username:
21 | hostname = "%s@%s" % (username, hostname)
22 |
23 | if ceph_medic.config.ssh_config:
24 | hostname = "-F %s %s" % (ceph_medic.config.ssh_config, hostname)
25 | try:
26 | deployment_type = kw.get(
27 | 'deployment_type',
28 | ceph_medic.config.file.get_safe(
29 | 'global', 'deployment_type', 'baremetal')
30 | )
31 | conn_obj = remoto.connection.get(deployment_type)
32 | if deployment_type in ['k8s', 'kubernetes', 'openshift', 'oc']:
33 | conn = container_platform_conn(hostname, conn_obj, deployment_type)
34 | # check if conn is ok
35 | stdout, stderr, code = remoto.process.check(conn, ['true'])
36 | if code:
37 | raise HostNotFound(
38 | 'Remote connection failed while testing connection:\n %s' % '\n'.join(stderr))
39 | elif deployment_type in ['docker', 'podman']:
40 | if kw.get('logger', True):
41 | remote_logger = logging.getLogger(kw['container'])
42 | conn = conn_obj(
43 | hostname,
44 | container_name=kw['container'],
45 | logger=remote_logger,
46 | detect_sudo=detect_sudo,
47 | )
48 | elif deployment_type in ['ssh', 'baremetal']:
49 | conn = conn_obj(
50 | hostname,
51 | logger=remote_logger,
52 | threads=threads,
53 | detect_sudo=detect_sudo,
54 | )
55 | else:
56 | raise RuntimeError(
57 | 'Invalid deployment_type: %s' % deployment_type)
58 | # Set a timeout value in seconds to disconnect and move on
59 | # if no data is sent back.
60 | conn.global_timeout = 300
61 | # XXX put this somewhere else
62 | if not ceph_medic.config.cluster_name:
63 | cluster_conf_files, stderr, exit_code = remoto.process.check(conn, ['ls', '/etc/ceph/'])
64 | cluster_name = 'ceph'
65 | if 'ceph.conf' not in cluster_conf_files:
66 | logger.warning('/etc/ceph/ceph.conf was not found, will try to infer the cluster name')
67 | for i in cluster_conf_files:
68 | if i.endswith('conf'):
69 | cluster_name = i.split('.conf')[0]
70 | logger.warning('inferred %s as the cluster name', cluster_name)
71 | ceph_medic.metadata['cluster_name'] = cluster_name
72 | else:
73 | ceph_medic.metadata['cluster_name'] = ceph_medic.config.cluster_name
74 | return conn
75 | except Exception as error:
76 | msg = "connecting to host: %s " % hostname
77 | errors = "resulted in errors: %s %s" % (error.__class__.__name__, error)
78 | logger.error(msg)
79 | logger.error(errors)
80 | raise error
81 |
82 |
83 | def container_platform_conn(hostname, conn_obj, deployment_type):
84 | """
85 | This helper function is only valid for container platform connections like
86 | OpenShift or Kubernetes. Fetches the configuration needed to properly
87 | configure the connection object, and then returns it.
88 | """
89 | container_platforms = {
90 | 'k8s': 'kubernetes',
91 | 'kubernetes': 'kubernetes',
92 | 'oc': 'openshift',
93 | 'openshift': 'openshift',
94 | }
95 | deployment_type = container_platforms.get(deployment_type, 'kubernetes')
96 | namespace = ceph_medic.config.file.get_safe(deployment_type, 'namespace', 'rook-ceph')
97 | context = ceph_medic.config.file.get_safe(deployment_type, 'context', None)
98 | return conn_obj(hostname, namespace, context=context)
99 |
100 |
101 | def as_bytes(string):
102 | """
103 | Ensure that whatever type of string is incoming, it is returned as bytes,
104 | encoding to utf-8 otherwise
105 | """
106 | if isinstance(string, bytes):
107 | return string
108 | return string.encode('utf-8', errors='ignore')
109 |
110 |
111 | def get_local_connection(logger, use_sudo=False):
112 | """
113 | Helper for local connections that are sometimes needed to operate
114 | on local hosts
115 | """
116 | return get_connection(
117 | socket.gethostname(), # cannot rely on 'localhost' here
118 | None,
119 | logger=logger,
120 | threads=1,
121 | use_sudo=use_sudo,
122 | detect_sudo=False
123 | )
124 |
--------------------------------------------------------------------------------
/ceph_medic/checks/mons.py:
--------------------------------------------------------------------------------
1 | from ceph_medic import metadata
2 | from ceph_medic.util import configuration
3 |
4 | #
5 | # Utilities
6 | #
7 |
8 |
9 | def get_secret(data):
10 | """
11 | keyring files look like::
12 |
13 | [mon.]
14 | key = AQBvaBFZAAAAABAA9VHgwCg3rWn8fMaX8KL01A==
15 | caps mon = "allow *"
16 |
17 | Fetch that keyring file and extract the actual key, no spaces.
18 |
19 | .. warning:: If multiple mon dirs exist, this utility will pick the first
20 | one it finds. There are checks that will complain about multiple mon dirs
21 | """
22 | file_paths = data['paths']['/var/lib/ceph']['files'].keys()
23 | _path = data['paths']['/var/lib/ceph']['files']
24 | for _file in file_paths:
25 | if _file.startswith('/var/lib/ceph/mon/') and _file.endswith('keyring'):
26 | contents = _path[_file]['contents']
27 | conf = configuration.load_string(contents)
28 | try:
29 | return conf.get_safe('mon.', 'key', '').split('\n')[0]
30 | except IndexError:
31 | # is it really possible to get a keyring file that doesn't
32 | # have a monitor secret?
33 | return ''
34 |
35 |
36 | def get_monitor_dirs(dirs):
37 | """
38 | Find all the /var/lib/ceph/mon/* directories. This is a bit tricky because
39 | we don't know if there are nested directories (the metadata reports them in
40 | a flat list).
41 | We must go through all of them and make sure that by splitting there aren't
42 | any nested ones and we are only reporting actual monitor dirs.
43 | """
44 | # get all the actual monitor dirs
45 | found = []
46 | prefix = '/var/lib/ceph/mon/'
47 | mon_dirs = [d for d in dirs if d.startswith(prefix)]
48 | for _dir in mon_dirs:
49 | # splitting on prefix[-1] will give us:
50 | # 'ceph-mon-1/maybe/nested' or 'ceph-mon-1'
51 | dirs = _dir.split(prefix)[-1].split('/')
52 | # splitting again on '/' and using the first part will ensure we only
53 | # get the dir
54 | found.append(dirs[0])
55 | return set(found)
56 |
57 |
58 | def get_osd_dirs(dirs):
59 | """
60 | Find all the /var/lib/ceph/osd/* directories. This is a bit tricky because
61 | we don't know if there are nested directories (the metadata reports them in
62 | a flat list).
63 | We must go through all of them and make sure that by splitting there aren't
64 | any nested ones and we are only reporting actual monitor dirs.
65 | """
66 | # get all the actual monitor dirs
67 | found = []
68 | prefix = '/var/lib/ceph/osd/'
69 | osd_dirs = [d for d in dirs if d.startswith(prefix)]
70 | for _dir in osd_dirs:
71 | # splitting on prefix[-1] will give us:
72 | # 'ceph-1/maybe/nested' or 'ceph-1'
73 | dirs = _dir.split(prefix)[-1].split('/')
74 | # splitting again on '/' and using the first part will ensure we only
75 | # get the dir
76 | found.append(dirs[0])
77 | return set(found)
78 | #
79 | # Error Checks
80 | #
81 |
82 |
83 | def check_mon_secret(host, data):
84 | code = 'EMON1'
85 | msg = 'secret key "%s" is different than host(s): %s'
86 | mismatched_hosts = []
87 |
88 | current_secret = get_secret(data)
89 | if not current_secret:
90 | # there is no file for the current host, so we can't compare
91 | return
92 |
93 | for host, host_data in metadata['mons'].items():
94 | host_secret = get_secret(host_data)
95 | if not host_secret:
96 | # cannot compare with another host that may not have the secret
97 | continue
98 | if current_secret != host_secret:
99 | mismatched_hosts.append(host)
100 |
101 | if mismatched_hosts:
102 | return code, msg % (current_secret, ','.join(mismatched_hosts))
103 |
104 | #
105 | # Warning Checks
106 | #
107 |
108 |
109 | def check_multiple_mon_dirs(host, data):
110 | code = 'WMON1'
111 | msg = 'multiple /var/lib/ceph/mon/* dirs found: %s'
112 | dirs = data['paths']['/var/lib/ceph']['dirs']
113 | monitor_dirs = get_monitor_dirs(dirs)
114 | if len(monitor_dirs) > 1:
115 | return code, msg % ','.join(monitor_dirs)
116 |
117 |
118 | def check_mon_collocated_with_osd(host, data):
119 | code = 'WMON2'
120 | msg = 'collocated OSDs found: %s'
121 | dirs = data['paths']['/var/lib/ceph']['dirs']
122 | osd_dirs = get_osd_dirs(dirs)
123 | if len(osd_dirs):
124 | return code, msg % ','.join(osd_dirs)
125 |
126 |
127 | def check_mon_recommended_count(host, data):
128 | code = 'WMON3'
129 | msg = 'Recommended number of MONs (3) not met: %s'
130 | mon_count = len(metadata['mons'].keys())
131 | if mon_count < 3:
132 | return code, msg % mon_count
133 |
134 |
135 | def check_mon_count_is_odd(host, data):
136 | code = 'WMON4'
137 | msg = 'Number of MONs is not an odd number: %s'
138 | mon_count = len(metadata['mons'].keys())
139 | if mon_count % 2 == 0:
140 | return code, msg % mon_count
141 |
142 |
143 | def check_for_single_mon(host, data):
144 | code = 'WMON5'
145 | msg = 'A single monitor was detected: %s'
146 | monitors = list(metadata['mons'].keys())
147 | if len(monitors) == 1:
148 | return code, msg % monitors.pop()
149 |
--------------------------------------------------------------------------------
/ceph_medic/tests/util/test_hosts.py:
--------------------------------------------------------------------------------
1 | import json
2 | import pytest
3 | from ceph_medic.util import hosts, configuration
4 | import ceph_medic
5 | from textwrap import dedent
6 |
7 |
8 | def failed_check(raise_=True):
9 | if raise_:
10 | raise RuntimeError('command failed')
11 | else:
12 | return dict(stdout='', stderr='', code=1)
13 |
14 |
15 | class TestContainerPlatform(object):
16 |
17 | def test_oc_executable_fails(self, monkeypatch, capsys):
18 | monkeypatch.setattr(hosts.process, 'check', lambda *a: failed_check())
19 | hosts.container_platform()
20 | stdout, stderr = capsys.readouterr()
21 | assert 'Unable to retrieve the pods using command' in stdout
22 | assert 'oc --request-timeout=5 get -n rook-ceph pods -o json' in stdout
23 |
24 | def test_kubectl_executable_fails(self, monkeypatch, capsys):
25 | monkeypatch.setattr(hosts.process, 'check', lambda *a: failed_check())
26 | hosts.container_platform('kubernetes')
27 | stdout, stderr = capsys.readouterr()
28 | assert 'Unable to retrieve the pods using command' in stdout
29 | assert 'kubectl --request-timeout=5 get -n rook-ceph pods -o json' in stdout
30 |
31 | def test_no_context(self, stub_check):
32 | check = stub_check((['{"items": {}}'], [], 1))
33 | hosts.container_platform('kubernetes')
34 | command = check.calls[0]['args'][1]
35 | assert command == [
36 | 'kubectl', '--request-timeout=5', 'get', '-n',
37 | 'rook-ceph', 'pods', '-o', 'json'
38 | ]
39 |
40 | def test_garbage_stdout(self, stub_check, capsys):
41 | stub_check((['could not contact platform'], [], 1))
42 | with pytest.raises(SystemExit):
43 | hosts.container_platform('kubernetes')
44 | stdout, stderr = capsys.readouterr()
45 | assert 'Unable to load JSON from stdout' in stdout
46 | assert 'could not contact platform' in stdout
47 |
48 | def test_garbage_stderr(self, stub_check, capsys):
49 | stub_check(([], ['could not contact platform'], 1))
50 | with pytest.raises(SystemExit):
51 | hosts.container_platform('kubernetes')
52 | stdout, stderr = capsys.readouterr()
53 | assert 'Unable to load JSON from stdout' in stdout
54 | assert 'could not contact platform' in stdout
55 |
56 | def test_kubectl_with_context(self, stub_check):
57 | contents = dedent("""
58 | [kubernetes]
59 | context = 87
60 | """)
61 | conf = configuration.load_string(contents)
62 | ceph_medic.config.file = conf
63 | check = stub_check((['{"items": {}}'], [], 1))
64 | hosts.container_platform('kubernetes')
65 | command = check.calls[0]['args'][1]
66 | assert command == [
67 | 'kubectl', '--context', '87', '--request-timeout=5', 'get', '-n',
68 | 'rook-ceph', 'pods', '-o', 'json'
69 | ]
70 |
71 | def test_oc_with_context(self, stub_check):
72 | contents = dedent("""
73 | [openshift]
74 | context = 87
75 | """)
76 | conf = configuration.load_string(contents)
77 | ceph_medic.config.file = conf
78 | check = stub_check((['{"items": {}}'], [], 1))
79 | hosts.container_platform()
80 | command = check.calls[0]['args'][1]
81 | assert command == [
82 | 'oc', '--context', '87', '--request-timeout=5', 'get', '-n',
83 | 'rook-ceph', 'pods', '-o', 'json'
84 | ]
85 |
86 |
87 | class TestBasicContainers(object):
88 | binaries = ['docker', 'podman']
89 |
90 | @pytest.mark.parametrize('binary', binaries)
91 | def test_executable_fails(
92 | self, binary, monkeypatch, make_nodes, capsys):
93 | monkeypatch.setattr(hosts.config, 'nodes', make_nodes(mgrs=['mgr0']))
94 | monkeypatch.setattr(
95 | hosts.ceph_medic.connection, 'get_connection',
96 | lambda *a, **k: None)
97 | monkeypatch.setattr(
98 | hosts.process, 'check', lambda *a: failed_check(False))
99 | hosts.basic_containers(binary)
100 | stdout, stderr = capsys.readouterr()
101 | assert 'Unable to list containers on host mgr0' in stdout
102 |
103 | @pytest.mark.parametrize('binary', binaries)
104 | def test_inspection(
105 | self, binary, monkeypatch, make_nodes, stub_check, capsys):
106 | monkeypatch.setattr(ceph_medic.config, 'cluster_name', 'ceph')
107 | monkeypatch.setattr(hosts.config, 'nodes', make_nodes(mgrs=['mgr0']))
108 | monkeypatch.setattr(
109 | hosts.ceph_medic.connection, 'get_connection',
110 | lambda *a, **k: None)
111 | fake_list = '\n'.join(['mgr0-container'])
112 | fake_mgr = json.dumps([{
113 | 'Name': 'mgr0-container',
114 | 'Config': {
115 | 'Env': [
116 | 'CLUSTER=ceph',
117 | 'CEPH_DAEMON=MGR',
118 | ]
119 | }
120 | }])
121 | stub_check([
122 | ([fake_mgr], [''], 0),
123 | ([fake_list], [''], 0),
124 | ])
125 | result = hosts.basic_containers(binary)
126 | assert result['mgrs'][0]['host'] == 'mgr0'
127 | assert result['mgrs'][0]['container'] == 'mgr0-container'
128 |
--------------------------------------------------------------------------------
/ceph_medic/tests/checks/test_mons.py:
--------------------------------------------------------------------------------
1 | from ceph_medic import metadata
2 | from ceph_medic.checks import mons
3 |
4 |
5 | class TestGetSecret(object):
6 |
7 | def setup(self):
8 | self.data = {
9 | 'paths': {
10 | '/var/lib/ceph': {
11 | 'files': {
12 | '/var/lib/ceph/mon/ceph-mon-0/keyring': {
13 | 'contents': '',
14 | }
15 | }
16 | }
17 | }
18 | }
19 |
20 | def set_contents(self, string, file_path=None):
21 | file_path = file_path or '/var/lib/ceph/mon/ceph-mon-0/keyring'
22 | self.data['paths']['/var/lib/ceph']['files'][file_path]['contents'] = string
23 |
24 | def test_get_secret(self):
25 | contents = """
26 | [mon.]
27 | key = AQBvaBFZAAAAABAA9VHgwCg3rWn8fMaX8KL01A==
28 | caps mon = "allow *"
29 | """
30 | self.set_contents(contents)
31 | result = mons.get_secret(self.data)
32 | assert result == 'AQBvaBFZAAAAABAA9VHgwCg3rWn8fMaX8KL01A=='
33 |
34 | def test_get_no_secret_empty_file(self):
35 | result = mons.get_secret(self.data)
36 | assert result == ''
37 |
38 | def test_get_no_secret_wrong_file(self):
39 | contents = """
40 | [mon.]
41 | caps mon = "allow *"
42 | """
43 | self.set_contents(contents)
44 | result = mons.get_secret(self.data)
45 | assert result == ''
46 |
47 |
48 | class TestGetMonitorDirs(object):
49 |
50 | def test_get_monitor_dirs(self):
51 | result = mons.get_monitor_dirs([
52 | '/var/lib/ceph/mon/ceph-mon-1',
53 | '/var/lib/ceph/something'])
54 |
55 | assert result == set(['ceph-mon-1'])
56 |
57 | def test_cannot_get_monitor_dirs(self):
58 | result = mons.get_monitor_dirs([
59 | '/var/lib/ceph/osd/ceph-osd-1',
60 | '/var/lib/ceph/something'])
61 | assert result == set([])
62 |
63 | def test_get_monitor_dirs_multiple(self):
64 | result = mons.get_monitor_dirs([
65 | '/var/lib/ceph/mon/ceph-mon-1',
66 | '/var/lib/ceph/mon/ceph-mon-3',
67 | '/var/lib/ceph/mon/ceph-mon-2',
68 | '/var/lib/ceph/something'])
69 |
70 | assert result == set(['ceph-mon-1', 'ceph-mon-2', 'ceph-mon-3'])
71 |
72 | def test_get_monitor_dirs_nested_multiple(self):
73 | result = mons.get_monitor_dirs([
74 | '/var/lib/ceph/mon/ceph-mon-1',
75 | '/var/lib/ceph/mon/ceph-mon-1/nested/dir/',
76 | '/var/lib/ceph/mon/ceph-mon-1/other/nested',
77 | '/var/lib/ceph/mon/ceph-mon-2',
78 | '/var/lib/ceph/something'])
79 |
80 | assert result == set(['ceph-mon-1', 'ceph-mon-2'])
81 |
82 |
83 | class TestOsdDirs(object):
84 |
85 | def test_get_osd_dirs_nested_multiple(self):
86 | result = mons.get_osd_dirs([
87 | '/var/lib/ceph/osd/ceph-1',
88 | '/var/lib/ceph/osd/ceph-1/nested/dir/',
89 | '/var/lib/ceph/osd/ceph-1/other/nested',
90 | '/var/lib/ceph/osd/ceph-2',
91 | '/var/lib/ceph/something'])
92 |
93 | assert result == set(['ceph-1', 'ceph-2'])
94 |
95 |
96 | class TestMonRecommendedCount(object):
97 |
98 | def test_recommended_count_is_met(self, data):
99 | metadata['mons'] = dict(('mon%s' % count, []) for count in range(6))
100 | metadata['cluster_name'] = 'ceph'
101 | osd_data = data()
102 | result = mons.check_mon_recommended_count(None, osd_data)
103 | assert result is None
104 |
105 | def test_recommended_count_is_unmet(self, data):
106 | metadata['mons'] = dict(('mon%s' % count, []) for count in range(1))
107 | metadata['cluster_name'] = 'ceph'
108 | osd_data = data()
109 | code, message = mons.check_mon_recommended_count(None, osd_data)
110 | assert code == 'WMON3'
111 | assert message == 'Recommended number of MONs (3) not met: 1'
112 |
113 |
114 | class TestMonCountIsOdd(object):
115 |
116 | def test_count_is_odd(self, data):
117 | metadata['mons'] = dict(('mon%s' % count, []) for count in range(3))
118 | metadata['cluster_name'] = 'ceph'
119 | osd_data = data()
120 | result = mons.check_mon_count_is_odd(None, osd_data)
121 | assert result is None
122 |
123 | def test_recommended_count_is_unmet(self, data):
124 | metadata['mons'] = dict(('mon%s' % count, []) for count in range(2))
125 | metadata['cluster_name'] = 'ceph'
126 | osd_data = data()
127 | code, message = mons.check_mon_count_is_odd(None, osd_data)
128 | assert code == 'WMON4'
129 | assert message == 'Number of MONs is not an odd number: 2'
130 |
131 |
132 | class TestSingleMon(object):
133 |
134 | def test_is_single(self, data):
135 | metadata['mons'] = {'mon.0': []}
136 | metadata['cluster_name'] = 'ceph'
137 | code, message = mons.check_for_single_mon(None, data())
138 | assert code == 'WMON5'
139 | assert message == 'A single monitor was detected: mon.0'
140 |
141 | def test_is_not_single(self, data):
142 | metadata['mons'] = dict(('mon%s' % count, []) for count in range(2))
143 | metadata['cluster_name'] = 'ceph'
144 | result = mons.check_for_single_mon(None, data())
145 | assert result is None
146 |
--------------------------------------------------------------------------------
/ceph_medic/util/hosts.py:
--------------------------------------------------------------------------------
1 | import json
2 | import logging
3 | import ceph_medic.connection
4 | from ceph_medic import config, terminal
5 | from remoto import connection, process
6 |
7 | logger = logging.getLogger(__name__)
8 |
9 |
10 | def _platform_options(platform):
11 | try:
12 | namespace = config.file.get_safe(platform, 'namespace', 'rook-ceph')
13 | context = config.file.get_safe(platform, 'context', None)
14 | except RuntimeError:
15 | namespace = 'rook-ceph'
16 | context = None
17 | return {'namespace': namespace, 'context': context}
18 |
19 |
20 | def container_platform(platform='openshift'):
21 | """
22 | Connect to a container platform (kubernetes or openshift), retrieve all the
23 | available pods that match the namespace (defaults to 'rook-ceph'), and
24 | return a dictionary including them, regardless of state.
25 | """
26 | local_conn = connection.get('local')()
27 | options = _platform_options(platform)
28 | context = options.get('context')
29 | namespace = options.get('namespace')
30 | executable = 'oc' if platform == 'openshift' else 'kubectl'
31 |
32 | if context:
33 | cmd = [executable, '--context', context]
34 | else:
35 | cmd = [executable]
36 |
37 | cmd.extend(['--request-timeout=5', 'get', '-n', namespace, 'pods', '-o', 'json'])
38 |
39 | try:
40 | out, err, code = process.check(local_conn, cmd)
41 | except RuntimeError:
42 | out = "{}"
43 | terminal.error('Unable to retrieve the pods using command: %s' % ' '.join(cmd))
44 | else:
45 | if code:
46 | output = out + err
47 | for line in output:
48 | terminal.error(line)
49 |
50 | try:
51 | pods = json.loads(''.join(out))
52 | except Exception:
53 | # Python3 has JSONDecodeError which doesn't exist in Python2
54 | # Python2 just raises ValueError
55 | stdout = ''.join(out)
56 | stderr = ''.join(err)
57 | logger.exception('Invalid JSON from stdout')
58 | terminal.error('Unable to load JSON from stdout')
59 | if stdout:
60 | logger.error('stdout: %s', stdout)
61 | terminal.error('stdout: %s' % stdout)
62 | if stderr:
63 | logger.error('stderr: %s', stderr)
64 | terminal.error('stderr: %s' % stderr)
65 | raise SystemExit(1)
66 |
67 | base_inventory = {
68 | 'rgws': [], 'mgrs': [], 'mdss': [], 'clients': [], 'osds': [], 'mons': []
69 | }
70 | label_map = {
71 | 'rook-ceph-mgr': 'mgrs',
72 | 'rook-ceph-mon': 'mons',
73 | 'rook-ceph-osd': 'osds',
74 | 'rook-ceph-mds': 'mdss',
75 | 'rook-ceph-rgw': 'rgws',
76 | 'rook-ceph-client': 'clients',
77 | }
78 |
79 | for item in pods.get('items', {}):
80 | label_name = item['metadata'].get('labels', {}).get('app')
81 | if not label_name:
82 | continue
83 | if label_name in label_map:
84 | inventory_key = label_map[label_name]
85 | base_inventory[inventory_key].append(
86 | {'host': item['metadata']['name'], 'group': None}
87 | )
88 | for key, value in dict(base_inventory).items():
89 | if not value:
90 | base_inventory.pop(key)
91 | return base_inventory
92 |
93 |
94 | def basic_containers(deployment_type):
95 | base_inventory = {
96 | 'rgws': [], 'mgrs': [], 'mdss': [], 'clients': [], 'osds': [],
97 | 'mons': []
98 | }
99 | label_map = {
100 | 'OSD': 'osds',
101 | 'OSD_CEPH_VOLUME_ACTIVATE': 'osds',
102 | 'MON': 'mons',
103 | 'MGR': 'mgrs',
104 | 'MDS': 'mdss',
105 | 'RGW': 'rgws',
106 | }
107 | metal_hosts = set()
108 | for nodes in config.nodes.values():
109 | for node in nodes:
110 | metal_hosts.add(node['host'])
111 | for host in metal_hosts:
112 | logger.debug("listing containers for host %s", host)
113 | cmd = [deployment_type, 'container', 'ls', '--format',
114 | '"{{ .Names }}"']
115 | conn = ceph_medic.connection.get_connection(
116 | host, deployment_type='ssh')
117 | out, err, code = process.check(conn, cmd)
118 | if code:
119 | terminal.error("Unable to list containers on host %s" % host)
120 | continue
121 | container_list = map(lambda i: i.strip('"'), out)
122 | if not container_list:
123 | terminal.warning("Host %s had no containers" % host)
124 | continue
125 | for container_name in container_list:
126 | cmd = [deployment_type, 'container', 'inspect', container_name]
127 | out, err, code = process.check(conn, cmd)
128 | if code:
129 | terminal.error(
130 | "Unable to inspect container %s on host %s" %
131 | (container_name, host)
132 | )
133 | continue
134 | detail = json.loads(''.join(out))[0]
135 | env = dict(
136 | [s.split('=', 1) for s in detail['Config']['Env']])
137 | if 'CEPH_DAEMON' not in env:
138 | continue
139 | if env.get('CLUSTER') != config.cluster_name:
140 | continue
141 | role = env['CEPH_DAEMON']
142 | if role not in label_map:
143 | continue
144 | base_inventory[label_map[role]].append(
145 | {'host': host, 'container': container_name, 'group': None}
146 | )
147 | return base_inventory
148 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # ceph-medic documentation build configuration file, created by
4 | # sphinx-quickstart on Tue Jun 27 14:32:23 2017.
5 | #
6 | # This file is execfile()d with the current directory set to its
7 | # containing dir.
8 | #
9 | # Note that not all possible configuration values are present in this
10 | # autogenerated file.
11 | #
12 | # All configuration values have a default; values that are commented out
13 | # serve to show the default.
14 |
15 | # If extensions (or modules to document with autodoc) are in another directory,
16 | # add these directories to sys.path here. If the directory is relative to the
17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
18 | #
19 | # import os
20 | # import sys
21 | import os
22 | import sys
23 | # sys.path.insert(0, os.path.abspath('.'))
24 | sys.path.append(os.path.abspath('_themes'))
25 |
26 |
27 | # -- General configuration ------------------------------------------------
28 |
29 | # If your documentation needs a minimal Sphinx version, state it here.
30 | #
31 | # needs_sphinx = '1.0'
32 |
33 | # Add any Sphinx extension module names here, as strings. They can be
34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
35 | # ones.
36 | extensions = []
37 |
38 | # Add any paths that contain templates here, relative to this directory.
39 | templates_path = ['_templates']
40 |
41 | # The suffix(es) of source filenames.
42 | # You can specify multiple suffix as a list of string:
43 | #
44 | # source_suffix = ['.rst', '.md']
45 | source_suffix = '.rst'
46 |
47 | # The master toctree document.
48 | master_doc = 'contents'
49 |
50 | # General information about the project.
51 | project = u'ceph-medic'
52 | copyright = u'2017, Andrew Schoen, Alfredo Deza'
53 | author = u'Andrew Schoen, Alfredo Deza'
54 |
55 | # The version info for the project you're documenting, acts as replacement for
56 | # |version| and |release|, also used in various other places throughout the
57 | # built documents.
58 | #
59 | # The short X.Y version.
60 | version = u'0.0.1'
61 | # The full version, including alpha/beta/rc tags.
62 | release = u'0.0.1'
63 |
64 | # The language for content autogenerated by Sphinx. Refer to documentation
65 | # for a list of supported languages.
66 | #
67 | # This is also used if you do content translation via gettext catalogs.
68 | # Usually you set "language" from the command line for these cases.
69 | language = None
70 |
71 | # List of patterns, relative to source directory, that match files and
72 | # directories to ignore when looking for source files.
73 | # This patterns also effect to html_static_path and html_extra_path
74 | exclude_patterns = []
75 |
76 | # The name of the Pygments (syntax highlighting) style to use.
77 | pygments_style = 'sphinx'
78 |
79 | # If true, `todo` and `todoList` produce output, else they produce nothing.
80 | todo_include_todos = False
81 |
82 |
83 | # -- Options for HTML output ----------------------------------------------
84 |
85 | # The theme to use for HTML and HTML Help pages. See the documentation for
86 | # a list of builtin themes.
87 | #
88 | html_theme = 'ceph'
89 | # Add any paths that contain custom themes here, relative to this directory.
90 | html_theme_path = ['_themes']
91 | html_show_sphinx = False
92 | html_sidebars = {
93 | '**': ['smarttoc.html', 'searchbox.html'],
94 | }
95 |
96 | # Theme options are theme-specific and customize the look and feel of a theme
97 | # further. For a list of options available for each theme, see the
98 | # documentation.
99 | #
100 | # html_theme_options = {}
101 |
102 | # Add any paths that contain custom static files (such as style sheets) here,
103 | # relative to this directory. They are copied after the builtin static files,
104 | # so a file named "default.css" will overwrite the builtin "default.css".
105 | html_static_path = ['_static']
106 |
107 |
108 | # -- Options for HTMLHelp output ------------------------------------------
109 |
110 | # Output file base name for HTML help builder.
111 | htmlhelp_basename = 'ceph-medicdoc'
112 |
113 |
114 | # -- Options for LaTeX output ---------------------------------------------
115 |
116 | latex_elements = {
117 | # The paper size ('letterpaper' or 'a4paper').
118 | #
119 | # 'papersize': 'letterpaper',
120 |
121 | # The font size ('10pt', '11pt' or '12pt').
122 | #
123 | # 'pointsize': '10pt',
124 |
125 | # Additional stuff for the LaTeX preamble.
126 | #
127 | # 'preamble': '',
128 |
129 | # Latex figure (float) alignment
130 | #
131 | # 'figure_align': 'htbp',
132 | }
133 |
134 | # Grouping the document tree into LaTeX files. List of tuples
135 | # (source start file, target name, title,
136 | # author, documentclass [howto, manual, or own class]).
137 | latex_documents = [
138 | (master_doc, 'ceph-medic.tex', u'ceph-medic Documentation',
139 | u'Andrew Schoen, Alfredo Deza', 'manual'),
140 | ]
141 |
142 |
143 | # -- Options for manual page output ---------------------------------------
144 |
145 | # One entry per manual page. List of tuples
146 | # (source start file, name, description, authors, manual section).
147 | man_pages = [
148 | (master_doc, 'ceph-medic', u'ceph-medic Documentation',
149 | [author], 1)
150 | ]
151 |
152 |
153 | # -- Options for Texinfo output -------------------------------------------
154 |
155 | # Grouping the document tree into Texinfo files. List of tuples
156 | # (source start file, target name, title, author,
157 | # dir menu entry, description, category)
158 | texinfo_documents = [
159 | (master_doc, 'ceph-medic', u'ceph-medic Documentation',
160 | author, 'ceph-medic', 'One line description of project.',
161 | 'Miscellaneous'),
162 | ]
163 |
164 |
165 |
166 |
--------------------------------------------------------------------------------
/ceph_medic/terminal.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 |
4 | class colorize(str):
5 | """
6 | Pretty simple to use::
7 |
8 | colorize.make('foo').bold
9 | colorize.make('foo').green
10 | colorize.make('foo').yellow
11 | colorize.make('foo').red
12 | colorize.make('foo').blue
13 |
14 | Otherwise you could go the long way (for example if you are
15 | testing this class)::
16 |
17 | string = colorize('foo')
18 | string._set_attributes()
19 | string.red
20 |
21 | """
22 |
23 | def __init__(self, string):
24 | self.stdout = sys.__stdout__
25 | self.appends = ''
26 | self.prepends = ''
27 | self.isatty = self.stdout.isatty()
28 |
29 | def _set_attributes(self):
30 | """
31 | Sets the attributes here because the str class does not
32 | allow to pass in anything other than a string to the constructor
33 | so we can't really mess with the other attributes.
34 | """
35 | for k, v in self.__colors__.items():
36 | setattr(self, k, self.make_color(v))
37 |
38 | def make_color(self, color):
39 | if not self.isatty or self.is_windows:
40 | return self
41 | return color + self + '\033[0m' + self.appends
42 |
43 | @property
44 | def __colors__(self):
45 | return dict(
46 | blue = '\033[34m',
47 | green = '\033[92m',
48 | yellow = '\033[33m',
49 | red = '\033[91m',
50 | bold = '\033[1m',
51 | ends = '\033[0m'
52 | )
53 |
54 | @property
55 | def is_windows(self):
56 | if sys.platform == 'win32':
57 | return True
58 | return False
59 |
60 | @classmethod
61 | def make(cls, string):
62 | """
63 | A helper method to return itself and workaround the fact that
64 | the str object doesn't allow extra arguments passed in to the
65 | constructor
66 | """
67 | obj = cls(string)
68 | obj._set_attributes()
69 | return obj
70 |
71 | #
72 | # Common string manipulations
73 | #
74 | red_arrow = colorize.make('-->').red
75 | blue_arrow = colorize.make('-->').blue
76 | yellow = lambda x: colorize.make(x).yellow
77 | blue = lambda x: colorize.make(x).blue
78 | green = lambda x: colorize.make(x).green
79 | red = lambda x: colorize.make(x).red
80 | bold = lambda x: colorize.make(x).bold
81 |
82 |
83 | CRITICAL = 5
84 | ERROR = 4
85 | WARNING = 3
86 | INFO = 2
87 | DEBUG = 1
88 |
89 | _level_names = {
90 | CRITICAL : 'critical',
91 | WARNING : 'warning',
92 | INFO : 'info',
93 | ERROR : 'error',
94 | DEBUG : 'debug'
95 | }
96 |
97 | _reverse_level_names = dict((v, k) for (k, v) in _level_names.items())
98 |
99 | _level_colors = {
100 | 'remote' : 'bold',
101 | 'critical' : 'red',
102 | 'warning' : 'yellow',
103 | 'info' : 'blue',
104 | 'debug' : 'blue',
105 | 'error' : 'red'
106 | }
107 |
108 |
109 | class _Write(object):
110 |
111 | def __init__(self, _writer=None, prefix='', suffix='', clear_line=False, flush=False):
112 | self._writer = _writer or sys.stdout
113 | self.suffix = suffix
114 | self.prefix = prefix
115 | self.flush = flush
116 | self.clear_line = clear_line
117 |
118 | def bold(self, string):
119 | self.write(bold(string))
120 |
121 | def raw(self, string):
122 | self.write(string + '\n')
123 |
124 | def write(self, line):
125 | padding = ''
126 | if self.clear_line:
127 | if len(line) > 80:
128 | padding = ' ' * 10
129 | else:
130 | padding = ' ' * (80 - len(line))
131 | line = line + padding
132 | self._writer.write(self.prefix + line + self.suffix)
133 | if self.flush:
134 | self._writer.flush()
135 |
136 |
137 | write = _Write()
138 | loader = _Write(prefix='\r', clear_line=True)
139 |
140 |
141 | class LogMessage(object):
142 |
143 | def __init__(self, level_name, message, writer=None, config_level=None):
144 | self.level_name = level_name
145 | self.message = message
146 | self.writer = writer or sys.stdout
147 | self.config_level = config_level or self.get_config_level()
148 |
149 | def skip(self):
150 | if self.level_int >= self.config_level:
151 | return False
152 | return True
153 |
154 | def header(self):
155 | colored = colorize.make(self.base_string)
156 | return getattr(colored, self.level_color)
157 |
158 | @property
159 | def base_string(self):
160 | if self.config_level < 2:
161 | return "--> [%s]" % self.level_name
162 | return "-->"
163 |
164 | @property
165 | def level_int(self):
166 | if self.level_name == 'remote':
167 | return 2
168 | return _reverse_level_names.get(self.level_name, 4)
169 |
170 | @property
171 | def level_color(self):
172 | return _level_colors.get(self.level_name, 'info')
173 |
174 | def line(self):
175 | msg = self.message.rstrip('\n')
176 | return "%s %s\n" % (self.header(), msg)
177 |
178 | def write(self):
179 | if not self.skip():
180 | self.writer.write(self.line())
181 |
182 | def get_config_level(self):
183 | import ceph_medic
184 | level = ceph_medic.config.verbosity
185 | return _reverse_level_names.get(level, 4)
186 |
187 |
188 | def error(message):
189 | return LogMessage('error', message).write()
190 |
191 |
192 | def debug(message):
193 | return LogMessage('debug', message).write()
194 |
195 |
196 | def info(message):
197 | return LogMessage('info', message).write()
198 |
199 |
200 | def warning(message):
201 | return LogMessage('warning', message).write()
202 |
203 |
204 | def critical(message):
205 | return LogMessage('critical', message).write()
206 |
--------------------------------------------------------------------------------
/ceph_medic/tests/test_collector.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from ceph_medic import collector, metadata
4 | from mock import Mock
5 |
6 |
7 | class FakeConnRemoteModule(object):
8 | """
9 | A fake remote_module class to be used
10 | with Mocked connection objects.
11 |
12 | This class contains stubbed methods for functions
13 | in ceph_medic.remote.functions which get their return
14 | value from the class attribute return_values.
15 |
16 | When creating an instance pass a dictionary that maps
17 | function names to their return values.
18 | """
19 |
20 | def __init__(self, return_values):
21 | self.return_values = return_values
22 |
23 | def stat_path(self, *args, **kwargs):
24 | return self.return_values.get('stat_path', {})
25 |
26 | def path_tree(self, *args, **kwargs):
27 | return self.return_values.get('path_tree', {})
28 |
29 |
30 | def get_tree(files=None, dirs=None):
31 | if files is None:
32 | files = ["file1.txt"]
33 | if dirs is None:
34 | dirs = ["dir1"]
35 | tree = dict(
36 | files=files,
37 | dirs=dirs,
38 | )
39 | return tree
40 |
41 |
42 | def get_mock_connection(data=None, files=None, dirs=None):
43 | conn = Mock()
44 | tree = get_tree(files=files, dirs=dirs)
45 | default_data = dict(
46 | path_tree=tree
47 | )
48 | data = data or default_data
49 | conn.remote_module = FakeConnRemoteModule(data)
50 | return conn
51 |
52 |
53 | class TestCollectPathMetadata(object):
54 |
55 | def test_metadata_includes_dirs(self):
56 | conn = get_mock_connection()
57 | result = collector.get_path_metadata(conn, "/some/path")
58 | assert "dirs" in result
59 |
60 | def test_metadata_includes_files(self):
61 | conn = get_mock_connection()
62 | result = collector.get_path_metadata(conn, "/some/path")
63 | assert "dirs" in result
64 |
65 | def test_metadata_includes_root_path(self):
66 | conn = get_mock_connection()
67 | result = collector.get_path_metadata(conn, "/some/path")
68 | assert "/some/path" in result["dirs"]
69 |
70 | def test_collects_root_path_when_no_files_or_dirs(self):
71 | conn = get_mock_connection(files=[], dirs=[])
72 | result = collector.get_path_metadata(conn, "/some/path")
73 | assert "/some/path" in result["dirs"]
74 |
75 |
76 | class TestCollectPaths(object):
77 |
78 | @pytest.mark.parametrize(
79 | 'path',
80 | ['/etc/ceph', '/var/lib/ceph', '/var/run/ceph'],
81 | )
82 | def test_includes_paths(self, path, monkeypatch):
83 | def mock_metadata(conn, p, **kw):
84 | return dict()
85 | monkeypatch.setattr(collector, 'get_path_metadata', mock_metadata)
86 | result = collector.collect_paths(Mock())
87 | assert path in result
88 |
89 |
90 | class TestCollectSocketInfo(object):
91 |
92 | def tests_collects_sockets(self, monkeypatch):
93 | monkeypatch.setattr(collector.remote.commands, 'ceph_socket_version', lambda conn, socket: dict())
94 | monkeypatch.setattr(collector.remote.commands, 'daemon_socket_config', lambda conn, socket: dict())
95 | metadata = {
96 | 'paths': {
97 | '/var/run/ceph': {'files': ['/var/run/ceph/osd.asok']},
98 | },
99 | }
100 | result = collector.collect_socket_info(Mock(), metadata)
101 | assert '/var/run/ceph/osd.asok' in result
102 |
103 | def test_ignores_unknown_files(self, monkeypatch):
104 | monkeypatch.setattr(collector.remote.commands, 'ceph_socket_version', lambda conn, socket: dict())
105 | monkeypatch.setattr(collector.remote.commands, 'daemon_socket_config', lambda conn, socket: dict())
106 | metadata = {
107 | 'paths': {
108 | '/var/run/ceph': {'files': ['/var/run/ceph/osd.asok', '/var/run/ceph/osd.log']},
109 | },
110 | }
111 | result = collector.collect_socket_info(Mock(), metadata)
112 | assert '/var/run/ceph/osd.log' not in result
113 |
114 |
115 | class TestCollect(object):
116 |
117 | def test_ignores_unknown_group(self):
118 | metadata["nodes"] = dict(test=[])
119 | # raises a RuntimeError because all nodes fail to connect
120 | with pytest.raises(RuntimeError):
121 | collector.collect()
122 |
123 | def test_collects_node_metadata(self, monkeypatch):
124 | metadata["nodes"] = {
125 | "mons": [{"host": "mon0"}],
126 | "osds": [{"host": "osd0"}],
127 | }
128 | metadata["cluster_name"] = "ceph"
129 | def mock_metadata(conn, hostname, cluster_nodes):
130 | return dict(meta="data")
131 | monkeypatch.setattr(collector, "get_connection",
132 | lambda host, container=None: Mock())
133 | monkeypatch.setattr(collector, "get_node_metadata", mock_metadata)
134 | monkeypatch.setattr(collector, "collect_cluster", lambda x: {})
135 | collector.collect()
136 | assert "mon0" in metadata["mons"]
137 | assert "meta" in metadata["mons"]["mon0"]
138 |
139 |
140 | class TestGetNodeMetadata(object):
141 |
142 | @pytest.mark.parametrize(
143 | 'key',
144 | ['ceph', 'devices', 'paths', 'network',],
145 | )
146 | def test_collects_metadata(self, key, monkeypatch):
147 | def mock_metadata(*args, **kwargs):
148 | return dict(meta="data")
149 | monkeypatch.setattr(collector, "collect_devices", mock_metadata)
150 | monkeypatch.setattr(collector, "collect_paths", mock_metadata)
151 | monkeypatch.setattr(collector, "collect_network", mock_metadata)
152 | monkeypatch.setattr(collector, "collect_ceph_info", mock_metadata)
153 | monkeypatch.setattr(collector, "collect_socket_info", mock_metadata)
154 | monkeypatch.setattr(collector, "collect_ceph_osd_info", mock_metadata)
155 | result = collector.get_node_metadata(Mock(), "mon0", [])
156 | assert key in result
157 |
--------------------------------------------------------------------------------
/ceph_medic/remote/functions.py:
--------------------------------------------------------------------------------
1 | import os
2 | import grp
3 | import pwd
4 | import traceback
5 | import sys
6 | import subprocess
7 |
8 |
9 | # Utilities
10 | #
11 | def capture_exception(error):
12 | details = {'attributes': {}}
13 | details['name'] = error.__class__.__name__
14 | details['repr'] = str(error)
15 | exc_type, exc_value, exc_traceback = sys.exc_info()
16 | details['traceback'] = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
17 | for attr in dir(error):
18 | if not attr.startswith('__'):
19 | try:
20 | details['attributes'][attr] = str(getattr(error, attr))
21 | except Exception:
22 | # getting an exception here is entirely possible, and since
23 | # there is no remote logging there is nothing we can do other
24 | # than eat it up. This section is going through each of the
25 | # attributes of the exception raised so it is mildly acceptable
26 | # to skip if anything is breaking
27 | details['attributes'][attr] = None
28 | return details
29 |
30 |
31 | def decoded(string):
32 | try:
33 | return string.decode('utf-8')
34 | except AttributeError:
35 | return string
36 |
37 |
38 | # Paths
39 | #
40 | def stat_path(path, skip_dirs=None, skip_files=None, get_contents=False):
41 | """stat a path on a remote host"""
42 | # Capture all information about a path, optionally getting the contents of
43 | # the remote path if it is a file. Exceptions get appended to each dictionary
44 | # object associated with the path
45 |
46 | # .. note:: Neither ``skip_dirs`` nor ``skip_files`` is used here, but the
47 | # remote execution of functions use name-based arguments which does not allow
48 | # the use of ``**kw``
49 | metadata = {u'exception': {}}
50 | path = decoded(path)
51 | try:
52 | stat_info = os.stat(path)
53 | if get_contents and os.path.isfile(path):
54 | with open(path, 'r') as opened_file:
55 | metadata[u'contents'] = decoded(opened_file.read())
56 | except Exception as error:
57 | return {'exception': capture_exception(error)}
58 |
59 | allowed_attrs = [
60 | 'n_fields', 'n_sequence_fields', 'n_unnamed_fields', 'st_atime',
61 | 'st_blksize', 'st_blocks', 'st_ctime', 'st_dev', 'st_gid', 'st_ino',
62 | 'st_mode', 'st_mtime', 'st_nlink', 'st_rdev', 'st_size', 'st_uid'
63 | ]
64 |
65 | # get all the stat results back into the metadata
66 | for attr in dir(stat_info):
67 | attr = decoded(attr)
68 | if attr in allowed_attrs:
69 | value = decoded(getattr(stat_info, attr))
70 | metadata[attr] = value
71 |
72 | # translate the owner and group:
73 | try:
74 | metadata[u'owner'] = decoded(pwd.getpwuid(stat_info.st_uid)[0])
75 | except KeyError:
76 | metadata[u'owner'] = stat_info.st_uid
77 | try:
78 | metadata[u'group'] = decoded(grp.getgrgid(stat_info.st_gid)[0])
79 | except KeyError:
80 | metadata[u'group'] = stat_info.st_gid
81 |
82 | return metadata
83 |
84 |
85 | def path_tree(path, skip_dirs=None, skip_files=None, get_contents=None):
86 | """generate a path tree"""
87 | # Generate a tree of paths, including directories and files, recursively, but
88 | # with the ability to exclude dirs and files with ``skip_dirs`` and
89 | # ``skip_files``.
90 | # The tree output groups the files and directories like::
91 |
92 | # {
93 | # 'path': '/etc/ceph',
94 | # 'dirs': ['/etc/ceph/ceph.d/'],
95 | # 'files': ['/etc/ceph/ceph.d/test.conf', '/etc/ceph/rbdmap']
96 | # }
97 |
98 | # .. note:: ``get_contents`` is not used here, but the remote execution of functions
99 | # use name-based arguments which does not allow the use of ``**kw``
100 | try:
101 | path = path.decode('utf-8')
102 | except AttributeError:
103 | pass
104 | skip_files = skip_files or []
105 | skip_dirs = skip_dirs or []
106 | files = []
107 | dirs = []
108 | # traverse for files and directories, topdown allows us to trim the
109 | # directories on the fly
110 | for root, _dirs, _files in os.walk(path, topdown=True):
111 | _dirs[:] = [d for d in _dirs if d not in skip_dirs]
112 | for _file in _files:
113 | absolute_path = os.path.join(root, _file)
114 | if _file in skip_files:
115 | continue
116 | files.append(absolute_path)
117 |
118 | for _dir in _dirs:
119 | absolute_path = os.path.join(root, _dir)
120 | dirs.append(absolute_path)
121 |
122 | # using the 'u' prefix forces python3<->python2 compatibility otherwise the
123 | # keys would be bytes, regardless if input is a str which should've forced
124 | # a 'str' behavior. The prefix is invalid syntax for Python 3.0 to 3.2, so
125 | # this will be valid in Python 3.3 and newer and Python 2
126 | return {u'path': path, u'dirs': dirs, u'files': files}
127 |
128 |
129 | def which(executable):
130 | """find the location of an executable"""
131 | locations = (
132 | '/usr/local/bin',
133 | '/bin',
134 | '/usr/bin',
135 | '/usr/local/sbin',
136 | '/usr/sbin',
137 | '/sbin',
138 | )
139 |
140 | for location in locations:
141 | executable_path = os.path.join(location, executable)
142 | if os.path.exists(executable_path):
143 | return executable_path
144 |
145 |
146 | def run(command):
147 | """
148 | run a command, return stdout, stderr, and exit code.
149 | """
150 | process = subprocess.Popen(
151 | command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True
152 | )
153 | stdout = process.stdout.read().splitlines()
154 | stderr = process.stderr.read().splitlines()
155 | returncode = process.wait()
156 |
157 | return stdout, stderr, returncode
158 |
159 |
160 | # remoto magic, needed to execute these functions remotely
161 | if __name__ == '__channelexec__':
162 | for item in channel: # noqa
163 | channel.send(eval(item)) # noqa
164 |
--------------------------------------------------------------------------------
/ceph_medic/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import random
3 | from ceph_medic import runner
4 | import ceph_medic
5 | from ceph_medic.tests import base_metadata
6 |
7 |
8 | class FakeWriter(object):
9 |
10 | def __init__(self):
11 | self.calls = []
12 | self.write = self.raw
13 | self.loader = self
14 |
15 | def raw(self, string):
16 | self.calls.append(string)
17 |
18 | def bold(self, string):
19 | self.calls.append(string)
20 |
21 | def get_output(self):
22 | return '\n'.join(self.calls)
23 |
24 |
25 | @pytest.fixture(scope='class', autouse=True)
26 | def clear_metadata():
27 | ceph_medic.metadata = base_metadata
28 |
29 |
30 | @pytest.fixture
31 | def mon_keyring():
32 | def make_keyring(default=False):
33 | if default:
34 | key = "AQBvaBFZAAAAABAA9VHgwCg3rWn8fMaX8KL01A=="
35 | else:
36 | key = "%032x==" % random.getrandbits(128)
37 |
38 | return """
39 | [mon.]
40 | key = %s
41 | caps mon = "allow *"
42 | """ % key
43 | return make_keyring
44 |
45 |
46 | @pytest.fixture
47 | def terminal(monkeypatch):
48 | fake_writer = FakeWriter()
49 | monkeypatch.setattr(runner.terminal, 'write', fake_writer)
50 | return fake_writer
51 |
52 |
53 | @pytest.fixture
54 | def data():
55 | """
56 | Default data structure for remote nodes
57 | """
58 | def _data():
59 | return {
60 | 'ceph': {'installed': True, 'version': '12.2.1', 'sockets':{}},
61 | 'paths': {
62 | '/etc/ceph': {'files': {}, 'dirs': {}},
63 | '/var/lib/ceph': {'files': {}, 'dirs': {}},
64 | }
65 | }
66 | return _data
67 |
68 |
69 | @pytest.fixture
70 | def make_data(data, **kw):
71 | """
72 | Customize basic data structure on remote nodes
73 | """
74 | def update(dictionary=None):
75 | base = data()
76 | if not dictionary:
77 | return base
78 | base.update(dictionary)
79 | return base
80 | return update
81 |
82 |
83 | @pytest.fixture
84 | def make_nodes():
85 | """
86 | Helper to generate nodes for daemons
87 | """
88 | def make_data(**kw):
89 | """
90 | ``kw`` is expected to be a mapping between daemon name and hosts for
91 | that daemon, like::
92 |
93 | make_data(mons=['node1', 'node2']
94 | """
95 | # default set of nodes
96 | data = dict(
97 | (k, {}) for k in ['rgws', 'mgrs', 'mdss', 'clients', 'osds', 'mons']
98 | )
99 | for daemon, node_names in kw.items():
100 | data[daemon] = [dict(host=node_name) for node_name in node_names]
101 | return data
102 | return make_data
103 |
104 |
105 | class Capture(object):
106 |
107 | def __init__(self, *a, **kw):
108 | self.a = a
109 | self.kw = kw
110 | self.calls = []
111 | self.return_values = kw.get('return_values', False)
112 | self.always_returns = kw.get('always_returns', False)
113 |
114 | def __call__(self, *a, **kw):
115 | self.calls.append({'args': a, 'kwargs': kw})
116 | if self.always_returns:
117 | return self.always_returns
118 | if self.return_values:
119 | return self.return_values.pop()
120 |
121 |
122 | class Factory(object):
123 |
124 | def __init__(self, **kw):
125 | for k, v in kw.items():
126 | setattr(self, k, v)
127 |
128 |
129 | @pytest.fixture
130 | def factory():
131 | return Factory
132 |
133 |
134 | @pytest.fixture
135 | def conn():
136 | """
137 | Useful when trying to pass a ``conn`` object around that will porbably want
138 | to log output
139 | """
140 | log = lambda x: x
141 | logger = Factory(error=log, exception=log)
142 | return Factory(logger=logger)
143 |
144 |
145 | @pytest.fixture
146 | def capture():
147 | return Capture()
148 |
149 |
150 | @pytest.fixture
151 | def fake_run(monkeypatch):
152 | fake_run = Capture()
153 | monkeypatch.setattr('remoto.process.run', fake_run)
154 | return fake_run
155 |
156 |
157 | @pytest.fixture
158 | def fake_check(monkeypatch):
159 | fake_call = Capture(always_returns=([], [], 0))
160 | monkeypatch.setattr('remoto.process.check', fake_call)
161 | return fake_call
162 |
163 |
164 | @pytest.fixture
165 | def stub_check(monkeypatch):
166 | """
167 | Monkeypatches process.check, so that a caller can add behavior to the
168 | response
169 | """
170 | def apply(return_values, module=None, string_module='remoto.process.check'):
171 | """
172 | ``return_values`` should be a tuple of 3 elements: stdout, stderr, and
173 | code. This should mimic the ``check()`` return values. For example::
174 |
175 | (['stdout'], ['stderr'], 0)
176 |
177 | Each item in the stdout or stderr lists represents a line.
178 | Additionally, if more than one response is wanted, a list with multiple
179 | tuples can be provided::
180 |
181 |
182 | [
183 | (['output'], [], 0),
184 | ([], ['error condition'], 1),
185 | (['output'], [], 0),
186 | ]
187 |
188 | When patching, most of the time the default ``string_module`` will be
189 | fine, but if it is required to patch an actual module with the added
190 | string, then it is possible to use them accordingly: whne the module is
191 | set, the call to ``monkeypatch`` will use both like::
192 |
193 | monkeypatch.setattr(module, 'function', value)
194 |
195 | Otherwise it will just patch it like::
196 |
197 | monkeypatch.setattr('remoto.process.check', value)
198 |
199 | """
200 | if isinstance(return_values, tuple):
201 | return_values = [return_values]
202 | stubbed_call = Capture(return_values=return_values)
203 | if module:
204 | monkeypatch.setattr(module, string_module, stubbed_call)
205 | else:
206 | monkeypatch.setattr(string_module, stubbed_call)
207 | return stubbed_call
208 |
209 | return apply
210 |
211 |
212 | @pytest.fixture(autouse=True)
213 | def reset_file_config(request, monkeypatch):
214 | """
215 | The globally available ``ceph_medic.config.file`` might get mangled in
216 | tests, make sure that after evert test, it gets reset, preventing pollution
217 | going into other tests later.
218 | """
219 | def fin():
220 | ceph_medic.config.file = ceph_medic.UnloadedConfig()
221 | request.addfinalizer(fin)
222 |
--------------------------------------------------------------------------------
/ceph_medic/main.py:
--------------------------------------------------------------------------------
1 | from ceph_medic import check, log
2 | import sys
3 | import os
4 | from textwrap import dedent
5 | from tambo import Transport
6 | from execnet.gateway_bootstrap import HostNotFound
7 | import ceph_medic
8 | from ceph_medic.decorators import catches
9 | from ceph_medic.util import configuration, hosts
10 | from ceph_medic import terminal
11 |
12 |
13 | class Medic(object):
14 | _help = """
15 | ceph-medic: A utility to run system checks on a Ceph cluster.
16 |
17 | Version: {version}
18 |
19 | Global Options:
20 | --config Path to a specific configuration file. Overrides the default:
21 | $HOME/.cephmedic.conf.
22 | --cluster Use a specific cluster name (defaults to 'ceph'). Alternatively,
23 | this is inferred from a conf file name in /etc/ceph/
24 | --ssh-config Specify an alternate configuration for SSH
25 | --version, version Shows the current installed version
26 | --inventory Prefer a ceph-ansible inventory (hosts) file instead of default
27 | (cwd, /etc/ansible/hosts) locations
28 | --verbosity Set verbosity level of logging output
29 |
30 | {sub_help}
31 |
32 | {config_path_header}: {config_path}
33 | {hosts_file_header}: {hosts_file}
34 | {configured_nodes}
35 | """
36 | mapper = {
37 | 'check': check.Check,
38 | # TODO: this needs a bit more work, disabling for now
39 | #'generate': generate.Generate,
40 | }
41 |
42 | def __init__(self, argv=None, parse=True):
43 | if argv is None:
44 | argv = sys.argv
45 | if parse:
46 | self.main(argv)
47 |
48 | def help(self, sub_help=None):
49 | if self.hosts_file is None:
50 | hosts_file_header = terminal.red('Loaded Inventory Hosts file')
51 | hosts_file = 'No hosts file found in cwd, /etc/ansible/, or configured'
52 | else:
53 | hosts_file_header = terminal.green('Loaded Inventory Hosts file')
54 | hosts_file = self.hosts_file
55 | return self._help.format(
56 | version=ceph_medic.__version__,
57 | config_path=self.config_path,
58 | config_path_header=terminal.green('Loaded Config Path'),
59 | hosts_file=hosts_file,
60 | hosts_file_header=hosts_file_header,
61 | sub_help=sub_help,
62 | configured_nodes=self.configured_nodes
63 | )
64 |
65 | @property
66 | def configured_nodes(self):
67 | _help = dedent("""
68 | Configured nodes (loaded from inventory hosts file):
69 | OSDs: {osd_node_count}
70 | MONs: {mon_node_count}
71 | MGRs: {mgr_node_count}
72 | MDSs: {mds_node_count}
73 | RGWs: {rgw_node_count}""")
74 | if self.hosts_file: # we have nodes that have been loaded
75 | nodes = ceph_medic.config.nodes
76 | return _help.format(
77 | osd_node_count=len(nodes.get('osds', [])),
78 | mon_node_count=len(nodes.get('mons', [])),
79 | mds_node_count=len(nodes.get('mdss', [])),
80 | mgr_node_count=len(nodes.get('mgrs', [])),
81 | rgw_node_count=len(nodes.get('rgws', []))
82 | )
83 | return ''
84 |
85 | @catches((RuntimeError, KeyboardInterrupt, HostNotFound))
86 | def main(self, argv):
87 | options = [
88 | '--cluster', '--ssh-config', '--inventory',
89 | '--config', '--verbosity',
90 | ]
91 | parser = Transport(
92 | argv, options=options,
93 | check_help=False,
94 | check_version=False
95 | )
96 | parser.parse_args()
97 |
98 | self.config_path = parser.get('--config', configuration.location())
99 |
100 | # load medic configuration
101 | loaded_config = configuration.load(path=parser.get('--config', self.config_path))
102 |
103 | # this is the earliest we can have enough config to setup logging
104 | log.setup(loaded_config)
105 | ceph_medic.config.file = loaded_config
106 | global_options = dict(ceph_medic.config.file._sections['global'])
107 |
108 | # SSH config
109 | ceph_medic.config.ssh_config = parser.get('--ssh-config', global_options.get('--ssh-config'))
110 | if ceph_medic.config.ssh_config:
111 | ssh_config_path = ceph_medic.config.ssh_config
112 | if not os.path.exists(ssh_config_path):
113 | terminal.error("the given ssh config path does not exist: %s" % ssh_config_path)
114 | sys.exit()
115 |
116 | ceph_medic.config.cluster_name = parser.get('--cluster', 'ceph')
117 | ceph_medic.metadata['cluster_name'] = 'ceph'
118 |
119 | # Deployment Type
120 | deployment_type = ceph_medic.config.file.get_safe('global', 'deployment_type', 'baremetal')
121 | if deployment_type in ['kubernetes', 'openshift', 'k8s', 'oc']:
122 | pod_hosts = hosts.container_platform(deployment_type)
123 | ceph_medic.config.nodes = pod_hosts
124 | ceph_medic.config.hosts_file = ':memory:'
125 | self.hosts_file = ':memory:'
126 | else:
127 | # Hosts file
128 | self.hosts_file = parser.get('--inventory', configuration.get_host_file())
129 |
130 | # find the hosts files, by the CLI first, fallback to the configuration
131 | # file, and lastly if none of those are found or defined, try to load
132 | # from well known locations (cwd, and /etc/ansible/)
133 | loaded_hosts = configuration.load_hosts(
134 | parser.get('--inventory',
135 | global_options.get('--inventory', self.hosts_file)))
136 | ceph_medic.config.nodes = loaded_hosts.nodes
137 | ceph_medic.config.hosts_file = loaded_hosts.filename
138 | self.hosts_file = loaded_hosts.filename
139 |
140 | if deployment_type in ['docker', 'podman']:
141 | ceph_medic.config.nodes = hosts.basic_containers(
142 | deployment_type)
143 |
144 | parser.catch_version = ceph_medic.__version__
145 | parser.mapper = self.mapper
146 | parser.catch_help = self.help(parser.subhelp())
147 | if len(argv) <= 1:
148 | return parser.print_help()
149 | ceph_medic.config.config_path = self.config_path
150 | parser.dispatch()
151 | parser.catches_help()
152 | parser.catches_version()
153 |
154 | # Verbosity
155 | verbosity = parser.get('--verbosity', 'debug')
156 | ceph_medic.config.verbosity = verbosity.lower()
157 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. ceph-medic documentation master file, created by
2 | sphinx-quickstart on Tue Jun 27 14:32:23 2017.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | =================================================
7 | Introduction
8 | =================================================
9 |
10 | ``ceph-medic`` is a very simple tool that runs against a Ceph cluster to detect
11 | common issues that might prevent correct functionality. It requires
12 | non-interactive SSH access to accounts that can ``sudo`` without a password
13 | prompt.
14 |
15 | Usage
16 | =====
17 |
18 | The basic usage of ``ceph-medic`` is to perform checks against a ceph cluster
19 | to identify potential issues with its installation or configuration. To do
20 | this, run the following command::
21 |
22 | ceph-medic --inventory /path/to/hosts --ssh-config /path/to/ssh_config check
23 |
24 | Inventory
25 | ---------
26 | ``ceph-medic`` needs to know the nodes that exist in your ceph cluster before
27 | it can perform checks. The inventory (or ``hosts`` file) is a typical Ansible
28 | inventory file and will be used to inform ``ceph-medic`` of the nodes in your
29 | cluster and their respective roles. The following standard host groups are
30 | supported by ``ceph-medic``: ``mons``, ``osds``, ``rgws``, ``mdss``, ``mgrs``
31 | and ``clients``. An example ``hosts`` file would look like::
32 |
33 | [mons]
34 | mon0
35 | mon1
36 |
37 | [osds]
38 | osd0
39 |
40 | [mgrs]
41 | mgr0
42 |
43 | The location of the ``hosts`` file can be passed into ``ceph-medic`` by using
44 | the ``--inventory`` cli option (e.g ``ceph-medic --inventory /path/to/hosts``).
45 |
46 | If the ``--inventory`` option is not defined, ``ceph-medic`` will first look in
47 | the current working directory for a file named ``hosts``. If the file does not
48 | exist, it will look for ``/etc/ansible/hosts`` to be used as the inventory.
49 |
50 | .. note:: Defining the inventory location is also possible via the config file
51 | under the ``[global]`` section.
52 |
53 |
54 | Inventory for Containers
55 | ------------------------
56 | Containerized deployments are also supported, via ``docker`` and ``podman``.
57 | As with ``baremetal`` deployments, an inventory file is required. If the
58 | cluster was deployed with ``ceph-ansible``, you may use that existing
59 | inventory.
60 |
61 | To configure ceph-medic to connect to a containerized cluster, the glocal section of the
62 | configuration needs to define ``deployment_type`` to either ``docker`` or
63 | ``podman``. For example::
64 |
65 | [global]
66 |
67 | deployment_type = podman
68 |
69 |
70 | Inventory for Container Platforms
71 | ---------------------------------
72 | Both ``kubernetes`` and ``openshift`` platforms can host containers remotely,
73 | but do allow to connect and retrieve information from a central location.
74 | To configure ceph-medic to connect to a platform, the glocal section of the
75 | configuration needs to define ``deployment_type`` to either ``kubernetes``, which
76 | uses the ``kubectl`` command, or ``openshift``, which uses the ``oc`` command. For example::
77 |
78 | [global]
79 |
80 | deployment_type = openshift
81 |
82 |
83 | When using ``openshift`` or ``kubernetes`` as a deployment type, there is no
84 | requirement to define a ``hosts`` file. The hosts are generated dynamically by
85 | calling out to the platform and retrieving the pods. When the pods are
86 | identified, they are grouped by deamon type (osd, mgr, rgw, mon, etc...).
87 |
88 | SSH Config
89 | ----------
90 |
91 | All nodes in your ``hosts`` file must be configured to provide non-interactive
92 | SSH access to accounts that can ``sudo`` without a password prompt.
93 |
94 | .. note::
95 | This is the same ssh config required by ansible. If you've used ``ceph-ansible`` to deploy your
96 | cluster then your nodes are most likely already configured for this type of ssh access. If that
97 | is the case, using the same user that performed the initial deployment would be easiest.
98 |
99 | To provide your ssh config you must use the ``--ssh-config`` flag and give it
100 | a path to a file that defines your ssh configuration. For example, a file like
101 | this is used to connect with a cluster comprised of vagrant vms::
102 |
103 | Host mon0
104 | HostName 127.0.0.1
105 | User vagrant
106 | Port 2200
107 | UserKnownHostsFile /dev/null
108 | StrictHostKeyChecking no
109 | PasswordAuthentication no
110 | IdentityFile /Users/andrewschoen/.vagrant.d/insecure_private_key
111 | IdentitiesOnly yes
112 | LogLevel FATAL
113 |
114 | Host osd0
115 | HostName 127.0.0.1
116 | User vagrant
117 | Port 2201
118 | UserKnownHostsFile /dev/null
119 | StrictHostKeyChecking no
120 | PasswordAuthentication no
121 | IdentityFile /Users/andrewschoen/.vagrant.d/insecure_private_key
122 | IdentitiesOnly yes
123 | LogLevel FATAL
124 |
125 |
126 | .. note:: SSH configuration is not needed when using ``kubernetes`` or
127 | ``openshift``
128 |
129 |
130 | Logging
131 | -------
132 |
133 | By default ``ceph-medic`` sends complete logs to the current working directory.
134 | This log file is more verbose than the output displayed on the terminal. To
135 | change where these logs are created, modify the default value for ``--log-path``
136 | in ``~/.cephmedic.conf``.
137 |
138 | Running checks
139 | --------------
140 |
141 | To perform checks against your cluster use the ``check`` subcommand. This will
142 | perform a series of general checks, as well as checks specific to each daemon.
143 | Sample output from this command will look like::
144 |
145 | ceph-medic --ssh-config vagrant_ssh_config check
146 | Host: mgr0 connection: [connected ]
147 | Host: mon0 connection: [connected ]
148 | Host: osd0 connection: [connected ]
149 | Collection completed!
150 |
151 | ======================= Starting remote check session ========================
152 | Version: 0.0.1 Cluster Name: "test"
153 | Total hosts: [3]
154 | OSDs: 1 MONs: 1 Clients: 0
155 | MDSs: 0 RGWs: 0 MGRs: 1
156 |
157 | ================================================================================
158 |
159 | ---------- managers ----------
160 | mgr0
161 |
162 | ------------ osds ------------
163 | osd0
164 |
165 | ------------ mons ------------
166 | mon0
167 |
168 | 17 passed, 0 errors, on 4 hosts
169 |
170 |
171 | The logging can also be configured in the ``cephmedic.conf`` file in the global
172 | section::
173 |
174 | [global]
175 | --log-path = .
176 |
177 | To ensure that cluster checks run properly, at least one monitor node should have administrative privileges.
178 |
--------------------------------------------------------------------------------
/ceph_medic/tests/test_runner.py:
--------------------------------------------------------------------------------
1 | import ceph_medic
2 | from ceph_medic import runner
3 | from ceph_medic.tests import base_metadata
4 | from textwrap import dedent
5 | from ceph_medic.util import configuration
6 |
7 |
8 | class TestRunner(object):
9 |
10 | def setup(self):
11 | runner.metadata = base_metadata
12 |
13 | def teardown(self):
14 | runner.metadata = base_metadata
15 |
16 | def test_calculate_total_hosts_is_0(self):
17 | run = runner.Runner()
18 | assert run.total_hosts == 0
19 |
20 | def test_calculate_hosts_single_daemon_type(self):
21 | ceph_medic.metadata['nodes']['osds'] = [{'host': 'node1'},{'host': 'node2'}]
22 | runner.metadata = ceph_medic.metadata
23 | run = runner.Runner()
24 | assert run.total_hosts == 2
25 |
26 | def test_count_from_different_daemon_types(self):
27 | ceph_medic.metadata['nodes']['osds'] = [{'host': 'node1'},{'host': 'node2'}]
28 | ceph_medic.metadata['nodes']['mons'] = [{'host': 'node3'},{'host': 'node4'}]
29 | runner.metadata = ceph_medic.metadata
30 | run = runner.Runner()
31 | assert run.total_hosts == 4
32 |
33 |
34 | class TestReport(object):
35 |
36 | def setup(self):
37 | runner.metadata = base_metadata
38 | runner.metadata['nodes'] = {}
39 | self.results = runner.Runner()
40 |
41 | def test_reports_unhandled_internal_errors(self, terminal):
42 | self.results.internal_errors = ['I am an error']
43 | runner.report(self.results)
44 | assert 'While running checks, ceph-medic had 1 unhandled errors' in terminal.calls[-1]
45 |
46 | def test_reports_no_errors(self, terminal):
47 | runner.report(self.results)
48 | assert terminal.calls[0] == '\n0 passed, on 0 hosts'
49 |
50 | def test_reports_warning(self, terminal):
51 | self.results.warnings = 1
52 | runner.report(self.results)
53 | assert terminal.calls[0] == '\n0 passed, 1 warning, on 0 hosts'
54 |
55 | def test_reports_warnings(self, terminal):
56 | self.results.warnings = 2
57 | runner.report(self.results)
58 | assert terminal.calls[0] == '\n0 passed, 2 warnings, on 0 hosts'
59 |
60 | def test_reports_error(self, terminal):
61 | self.results.errors = 1
62 | runner.report(self.results)
63 | assert terminal.calls[0] == '\n0 passed, 1 error, on 0 hosts'
64 |
65 | def test_reports_errors(self, terminal):
66 | self.results.errors = 2
67 | runner.report(self.results)
68 | assert terminal.calls[0] == '\n0 passed, 2 errors, on 0 hosts'
69 |
70 | def test_reports_error_and_warning(self, terminal):
71 | self.results.errors = 1
72 | self.results.warnings = 1
73 | runner.report(self.results)
74 | assert terminal.calls[0] == '\n0 passed, 1 error, 1 warning, on 0 hosts'
75 |
76 | def test_reports_errors_and_warnings(self, terminal):
77 | self.results.errors = 2
78 | self.results.warnings = 2
79 | runner.report(self.results)
80 | assert terminal.calls[0] == '\n0 passed, 2 errors, 2 warnings, on 0 hosts'
81 |
82 | def test_reports_internal_errors(self, terminal):
83 | self.results.internal_errors = ['error 1', 'error 2']
84 | self.results.warnings = 2
85 | runner.report(self.results)
86 | assert terminal.calls[0] == '\n0 passed, 2 warnings, 2 internal errors, on 0 hosts'
87 |
88 |
89 | class TestReportBasicOutput(object):
90 |
91 | def setup(self):
92 | contents = dedent("""
93 | [global]
94 | #
95 | """)
96 | conf = configuration.load_string(contents)
97 | ceph_medic.config.file = conf
98 | runner.metadata = base_metadata
99 | runner.metadata['cluster_name'] = 'ceph'
100 | runner.Runner().run()
101 |
102 | def teardown(self):
103 | runner.metadata = base_metadata
104 |
105 | def test_has_version(self, terminal):
106 | assert 'Version: ' in terminal.get_output()
107 |
108 | def test_has_cluster_name(self, terminal):
109 | assert 'Cluster Name: "ceph"' in terminal.get_output()
110 |
111 | def test_has_no_hosts(self, terminal):
112 | assert 'Total hosts: [0]' in terminal.get_output()
113 |
114 | def test_has_a_header(self, terminal):
115 | assert '== Starting remote check session ==' in terminal.get_output()
116 |
117 | def test_has_no_OSDs(self, terminal):
118 | assert 'OSDs: 0' in terminal.get_output()
119 |
120 | def test_has_no_MONs(self, terminal):
121 | assert 'MONs: 0' in terminal.get_output()
122 |
123 | def test_has_no_Clients(self, terminal):
124 | assert 'Clients: 0' in terminal.get_output()
125 |
126 | def test_has_no_MDSs(self, terminal):
127 | assert 'MDSs: 0' in terminal.get_output()
128 |
129 | def test_has_no_MGRs(self, terminal):
130 | assert 'MGRs: 0' in terminal.get_output()
131 |
132 | def test_has_no_RGWs(self, terminal):
133 | assert 'RGWs: 0' in terminal.get_output()
134 |
135 |
136 | class TestReportErrors(object):
137 |
138 | def setup(self):
139 | contents = dedent("""
140 | [global]
141 | #
142 | """)
143 | conf = configuration.load_string(contents)
144 | ceph_medic.config.file = conf
145 | runner.metadata = base_metadata
146 | runner.metadata['cluster_name'] = 'ceph'
147 | runner.Runner().run()
148 |
149 | def teardown(self):
150 | runner.metadata = base_metadata
151 |
152 | def test_get_new_lines_in_errors(self, terminal, mon_keyring, data, monkeypatch):
153 | data_node1 = data()
154 | data_node2 = data()
155 | data_node1['paths']['/var/lib/ceph']['files'] = {
156 | '/var/lib/ceph/mon/ceph-0/keyring': {'contents': mon_keyring()}
157 | }
158 | data_node1['paths']['/var/lib/ceph']['dirs'] = {
159 | '/var/lib/ceph/osd/ceph-10': {},
160 | '/var/lib/ceph/osd/ceph-11': {},
161 | '/var/lib/ceph/osd/ceph-12': {},
162 | '/var/lib/ceph/osd/ceph-13': {},
163 | '/var/lib/ceph/osd/ceph-0': {},
164 | '/var/lib/ceph/osd/ceph-1': {},
165 | '/var/lib/ceph/osd/ceph-2': {},
166 | '/var/lib/ceph/osd/ceph-3': {},
167 | }
168 |
169 | data_node2['paths']['/var/lib/ceph']['files'] = {
170 | '/var/lib/ceph/mon/ceph-1/keyring': {'contents': mon_keyring()},
171 | }
172 | data_node2['paths']['/var/lib/ceph']['dirs'] = {
173 | '/var/lib/ceph/osd/ceph-10': {},
174 | '/var/lib/ceph/osd/ceph-11': {},
175 | '/var/lib/ceph/osd/ceph-12': {},
176 | '/var/lib/ceph/osd/ceph-13': {},
177 | '/var/lib/ceph/osd/ceph-0': {},
178 | '/var/lib/ceph/osd/ceph-1': {},
179 | '/var/lib/ceph/osd/ceph-2': {},
180 | '/var/lib/ceph/osd/ceph-3': {},
181 | }
182 |
183 | # set the data everywhere we need it
184 | ceph_medic.metadata['mons'] = {'node1': data_node1, 'node2': data_node2}
185 | monkeypatch.setattr(ceph_medic.checks.mons, 'metadata', ceph_medic.metadata)
186 |
187 | runner.Runner().run()
188 | # Any line that is an error or a warning *must* end with a newline
189 | for line in terminal.calls:
190 | if line.lstrip().startswith(('E', 'W')):
191 | assert line.endswith('\n')
192 |
--------------------------------------------------------------------------------
/docs/source/_themes/ceph/static/nature.css_t:
--------------------------------------------------------------------------------
1 | /*
2 | * nature.css_t
3 | * ~~~~~~~~~~~~
4 | *
5 | * Sphinx stylesheet -- nature theme.
6 | *
7 | * :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS.
8 | * :license: BSD, see LICENSE for details.
9 | *
10 | */
11 |
12 | @import url("basic.css");
13 |
14 | /* -- page layout ----------------------------------------------------------- */
15 |
16 | @font-face {
17 | font-family: 'ApexSansMedium';
18 | src: url('font/ApexSans-Medium.eot');
19 | src: url('font/ApexSans-Medium.eot?#iefix') format('embedded-opentype'),
20 | url('font/ApexSans-Medium.woff') format('woff'),
21 | url('font/ApexSans-Medium.ttf') format('truetype'),
22 | url('font/ApexSans-Medium.svg#FontAwesome') format('svg');
23 | font-weight: normal;
24 | font-style: normal;
25 | }
26 |
27 | @font-face {
28 | font-family: 'ApexSansBook';
29 | src: url('font/ApexSans-Book.eot');
30 | src: url('font/ApexSans-Book.eot?#iefix') format('embedded-opentype'),
31 | url('font/ApexSans-Book.woff') format('woff'),
32 | url('font/ApexSans-Book.ttf') format('truetype'),
33 | url('font/ApexSans-Book.svg#FontAwesome') format('svg');
34 | font-weight: normal;
35 | font-style: normal;
36 | }
37 |
38 | body {
39 | font: 14px/1.4 Helvetica, Arial, sans-serif;
40 | background-color: #E6E8E8;
41 | color: #37424A;
42 | margin: 0;
43 | padding: 0;
44 | border-top: 5px solid #F05C56;
45 | }
46 |
47 | div.documentwrapper {
48 | float: left;
49 | width: 100%;
50 | }
51 |
52 | div.bodywrapper {
53 | margin: 0 0 0 330px;
54 | }
55 |
56 | hr {
57 | border: 1px solid #B1B4B6;
58 | }
59 |
60 | div.document {
61 | background-color: #ffffff;
62 | }
63 |
64 | div.body {
65 | background-color: #ffffff;
66 | color: #3E4349;
67 | padding: 0 30px 30px 30px;
68 | }
69 |
70 | div.footer {
71 | color: #222B31;
72 | width: 100%;
73 | padding: 13px 0;
74 | text-align: center;
75 | font-size: 75%;
76 | }
77 |
78 | div.footer a {
79 | color: #444;
80 | text-decoration: underline;
81 | }
82 |
83 | div.related {
84 | background-color: #80D2DC;
85 | line-height: 32px;
86 | color: #37424A;
87 | // text-shadow: 0px 1px 0 #444;
88 | font-size: 100%;
89 | border-top: #9C4850 5px solid;
90 | }
91 |
92 | div.related a {
93 | color: #37424A;
94 | text-decoration: none;
95 | }
96 |
97 | div.related a:hover {
98 | color: #fff;
99 | // text-decoration: underline;
100 | }
101 |
102 | div.sphinxsidebar {
103 | // font-size: 100%;
104 | line-height: 1.5em;
105 | width: 330px;
106 | }
107 |
108 | div.sphinxsidebarwrapper{
109 | padding: 20px 0;
110 | background-color: #efefef;
111 | }
112 |
113 | div.sphinxsidebar h3,
114 | div.sphinxsidebar h4 {
115 | font-family: ApexSansMedium;
116 | color: #e6e8e8;
117 | font-size: 1.2em;
118 | font-weight: normal;
119 | margin: 0;
120 | padding: 5px 10px;
121 | background-color: #5e6a71;
122 | // text-shadow: 1px 1px 0 white;
123 | text-transform: uppercase;
124 | }
125 |
126 | div.sphinxsidebar h4{
127 | font-size: 1.1em;
128 | }
129 |
130 | div.sphinxsidebar h3 a {
131 | color: #e6e8e8;
132 | }
133 |
134 |
135 | div.sphinxsidebar p {
136 | color: #888;
137 | padding: 5px 20px;
138 | }
139 |
140 | div.sphinxsidebar p.topless {
141 | }
142 |
143 | div.sphinxsidebar ul {
144 | margin: 10px 5px 10px 20px;
145 | padding: 0;
146 | color: #000;
147 | }
148 |
149 | div.sphinxsidebar a {
150 | color: #444;
151 | }
152 |
153 | div.sphinxsidebar input {
154 | border: 1px solid #ccc;
155 | font-family: sans-serif;
156 | font-size: 1em;
157 | }
158 |
159 | div.sphinxsidebar input[type=text]{
160 | margin-left: 20px;
161 | }
162 |
163 | /* -- body styles ----------------------------------------------------------- */
164 |
165 | a {
166 | color: #F05C56;
167 | text-decoration: none;
168 | }
169 |
170 | a:hover {
171 | color: #F05C56;
172 | text-decoration: underline;
173 | }
174 |
175 | div.body h1,
176 | div.body h2,
177 | div.body h3,
178 | div.body h4,
179 | div.body h5,
180 | div.body h6 {
181 | // font-family: ApexSansMedium;
182 | // background-color: #80D2DC;
183 | // font-weight: normal;
184 | // color: #37424a;
185 | margin: 30px 0px 10px 0px;
186 | padding: 5px 0 5px 0px;
187 | // text-shadow: 0px 1px 0 white;
188 | text-transform: uppercase;
189 | }
190 |
191 | div.body h1 { font: 20px/2.0 ApexSansBook; color: #37424A; border-top: 20px solid white; margin-top: 0; }
192 | div.body h2 { font: 18px/1.8 ApexSansMedium; background-color: #5E6A71; color: #E6E8E8; padding: 5px 10px; }
193 | div.body h3 { font: 16px/1.6 ApexSansMedium; color: #37424A; }
194 | div.body h4 { font: 14px/1.4 Helvetica, Arial, sans-serif; color: #37424A; }
195 | div.body h5 { font: 12px/1.2 Helvetica, Arial, sans-serif; color: #37424A; }
196 | div.body h6 { font-size: 100%; color: #37424A; }
197 |
198 | // div.body h2 { font-size: 150%; background-color: #E6E8E8; color: #37424A; }
199 | // div.body h3 { font-size: 120%; background-color: #E6E8E8; color: #37424A; }
200 | // div.body h4 { font-size: 110%; background-color: #E6E8E8; color: #37424A; }
201 | // div.body h5 { font-size: 100%; background-color: #E6E8E8; color: #37424A; }
202 | // div.body h6 { font-size: 100%; background-color: #E6E8E8; color: #37424A; }
203 |
204 | a.headerlink {
205 | color: #c60f0f;
206 | font-size: 0.8em;
207 | padding: 0 4px 0 4px;
208 | text-decoration: none;
209 | }
210 |
211 | a.headerlink:hover {
212 | background-color: #c60f0f;
213 | color: white;
214 | }
215 |
216 | div.body p, div.body dd, div.body li {
217 | line-height: 1.5em;
218 | }
219 |
220 | div.admonition p.admonition-title + p {
221 | display: inline;
222 | }
223 |
224 | div.highlight{
225 | background-color: white;
226 | }
227 |
228 | div.note {
229 | background-color: #e6e8e8;
230 | border: 1px solid #ccc;
231 | }
232 |
233 | div.seealso {
234 | background-color: #ffc;
235 | border: 1px solid #ff6;
236 | }
237 |
238 | div.topic {
239 | background-color: #efefef;
240 | }
241 |
242 | div.warning {
243 | background-color: #F05C56;
244 | border: 1px solid #9C4850;
245 | color: #fff;
246 | }
247 |
248 | p.admonition-title {
249 | display: inline;
250 | }
251 |
252 | p.admonition-title:after {
253 | content: ":";
254 | }
255 |
256 | pre {
257 | padding: 10px;
258 | background-color: White;
259 | color: #222;
260 | line-height: 1.2em;
261 | border: 1px solid #5e6a71;
262 | font-size: 1.1em;
263 | margin: 1.5em;
264 | -webkit-box-shadow: 1px 1px 1px #e6e8e8;
265 | -moz-box-shadow: 1px 1px 1px #e6e8e8;
266 | }
267 |
268 | tt {
269 | background-color: #ecf0f3;
270 | color: #222;
271 | /* padding: 1px 2px; */
272 | font-size: 15px;
273 | font-family: monospace;
274 | }
275 |
276 | .viewcode-back {
277 | font-family: Arial, sans-serif;
278 | }
279 |
280 | div.viewcode-block:target {
281 | background-color: #f4debf;
282 | border-top: 1px solid #ac9;
283 | border-bottom: 1px solid #ac9;
284 | }
285 |
286 | table.docutils {
287 | margin: 1.5em;
288 | }
289 |
290 | div.sidebar {
291 | border: 1px solid #5E6A71;
292 | background-color: #E6E8E8;
293 | }
294 |
295 | div.admonition.tip {
296 | background-color: #80D2DC;
297 | border: 1px solid #55AEBA;
298 | }
299 |
300 | div.admonition.important {
301 | background-color: #F05C56;
302 | border: 1px solid #9C4850;
303 | color: #fff;
304 | }
305 |
306 | div.tip tt.literal {
307 | background-color: #55aeba;
308 | color: #fff;
309 | }
310 |
311 | div.important tt.literal {
312 | background-color: #9C4850;
313 | color: #fff;
314 | }
315 |
316 | h2 .literal {
317 | color: #fff;
318 | background-color: #37424a;
319 | }
320 |
321 | dl.glossary dt {
322 | font-size: 1.0em;
323 | padding-top:20px;
324 |
325 | }
--------------------------------------------------------------------------------
/ceph_medic/checks/common.py:
--------------------------------------------------------------------------------
1 | from collections import Counter
2 | from ceph_medic import metadata, daemon_types
3 | from ceph_medic.util import configuration, str_to_int
4 |
5 |
6 | #
7 | # Utilities
8 | #
9 |
10 | def get_fsid(data):
11 | # FIXME: might want to load this thing into ConfigParser so that we can fetch
12 | # information. ceph-deploy is a good example on how to do this. See:
13 | # https://github.com/ceph/ceph-deploy/blob/master/ceph_deploy/conf/ceph.py
14 | cluster_path = '/etc/ceph/%s.conf' % metadata['cluster_name']
15 | try:
16 | contents = data['paths']['/etc/ceph']['files'][cluster_path]['contents']
17 | except KeyError:
18 | return ''
19 | conf = configuration.load_string(contents)
20 | try:
21 | return conf.get_safe('global', 'fsid', '')
22 | except IndexError:
23 | return ''
24 |
25 |
26 | def get_common_fsid():
27 | """
28 | Determine what is the most common Cluster FSID. If all of them are the same
29 | then we are fine, but if there is a mix, we need some base to compare to.
30 | """
31 | all_fsids = []
32 |
33 | for daemon_type in daemon_types:
34 | for node_metadata in metadata[daemon_type].values():
35 | fsids = get_host_fsids(node_metadata)
36 | all_fsids.extend(fsids)
37 |
38 | try:
39 | common_fsid = Counter(all_fsids).most_common()[0][0]
40 | except IndexError:
41 | return ''
42 | return common_fsid
43 |
44 |
45 | def get_host_fsids(node_metadata):
46 | """
47 | Return all the cluster FSIDs found for each socket in a host
48 | """
49 | all_fsids = []
50 | for socket_metadata in node_metadata['ceph']['sockets'].values():
51 | config = socket_metadata.get('config', {})
52 | if not config:
53 | continue
54 | fsid = config.get('fsid')
55 | if not fsid:
56 | continue
57 | all_fsids.append(fsid)
58 | return all_fsids
59 |
60 |
61 | #
62 | # Warning checks
63 | #
64 |
65 | def check_colocated_running_mons_osds(host, data):
66 | code = 'WCOM1'
67 | msg = 'collocated OSDs with MONs running: %s'
68 | sockets = data['ceph']['sockets']
69 | running_mons = []
70 | running_osds = []
71 | for socket_name in sockets.keys():
72 | if "mon." in socket_name:
73 | running_mons.append(socket_name)
74 | elif "osd." in socket_name:
75 | running_osds.append(socket_name)
76 | if running_mons and running_osds:
77 | daemons = "\n %s" % ','.join(running_osds)
78 | return code, msg % daemons
79 |
80 |
81 | #
82 | # Error checks
83 | #
84 |
85 |
86 | def check_ceph_conf_exists(host, data):
87 | cluster_conf = '/etc/ceph/%s.conf' % metadata['cluster_name']
88 |
89 | files = data['paths']['/etc/ceph']['files'].keys()
90 | if cluster_conf not in files:
91 | msg = "%s does not exist" % cluster_conf
92 | return 'ECOM1', msg
93 |
94 |
95 | def check_ceph_executable_exists(host, data):
96 | if data['ceph']['installed'] is False:
97 | return 'ECOM2', 'ceph executable was not found in common paths when running `which`'
98 |
99 |
100 | def check_var_lib_ceph_dir(host, data):
101 | code = 'ECOM3'
102 | exception = data['paths']['/var/lib/ceph']['dirs']['/var/lib/ceph']['exception']
103 | if exception:
104 | msg = '/var/lib/ceph could not be parsed: %s' % exception['repr']
105 | return code, msg
106 |
107 |
108 | def check_var_lib_ceph_permissions(host, data):
109 | code = 'ECOM4'
110 | group = data['paths']['/var/lib/ceph']['dirs']['/var/lib/ceph']['group']
111 | owner = data['paths']['/var/lib/ceph']['dirs']['/var/lib/ceph']['owner']
112 | if group == owner != 'ceph':
113 | msg = '/var/lib/ceph has invalid ownership: %s:%s, should be ceph:ceph' % (owner, group)
114 | return code, msg
115 |
116 |
117 | def check_cluster_fsid(host, data):
118 | code = 'ECOM5'
119 | msg = 'fsid "%s" is different than host(s): %s'
120 | mismatched_hosts = []
121 |
122 | current_fsid = get_fsid(data)
123 |
124 | # no fsid exists for the current host as defined in ceph.conf, let other
125 | # checks note about this instead of reporting an empty FSID
126 | if not current_fsid:
127 | return
128 |
129 | for daemon, hosts in metadata['nodes'].items():
130 | for host in hosts:
131 | hostname = host['host']
132 | host_fsid = get_fsid(metadata[daemon][hostname])
133 | if host_fsid and current_fsid != host_fsid:
134 | mismatched_hosts.append(hostname)
135 |
136 | if mismatched_hosts:
137 | return code, msg % (current_fsid, ','.join(mismatched_hosts))
138 |
139 |
140 | def check_ceph_version_parity(host, data):
141 | code = 'ECOM6'
142 | msg = '(installed) Ceph version "%s" is different than host(s): %s'
143 | mismatched_hosts = []
144 | host_version = data['ceph']['version']
145 | for daemon, hosts in metadata['nodes'].items():
146 | for host in hosts:
147 | hostname = host['host']
148 | version = metadata[daemon][hostname]['ceph']['version']
149 | if host_version != version:
150 | mismatched_hosts.append(hostname)
151 |
152 | if mismatched_hosts:
153 | return code, msg % (host_version, ','.join(mismatched_hosts))
154 |
155 |
156 | def check_ceph_socket_and_installed_version_parity(host, data):
157 | code = 'ECOM7'
158 | msg = '(installed) Ceph version "%s" is different than version from running socket(s): %s'
159 | mismatched_sockets = []
160 | host_version = data['ceph']['version']
161 | sockets = data['ceph']['sockets']
162 | for socket, socket_data in sockets.items():
163 | socket_version = socket_data['version'].get('version')
164 | if socket_version and socket_version not in host_version:
165 | mismatched_sockets.append("%s:%s" % (socket, socket_version))
166 |
167 | if mismatched_sockets:
168 | return code, msg % (host_version, ','.join(mismatched_sockets))
169 |
170 |
171 | def check_rgw_num_rados_handles(host, data):
172 | """
173 | Although this is an RGW setting, the way Ceph handles configurations can
174 | have this setting be different depending on the daemon. Since we are
175 | checking on every host and every socket, we are placing this check here
176 | with common checks.
177 | """
178 | code = 'WCOM7'
179 | msg = "rgw_num_rados_handles shouldn't be larger than 1, can lead to memory leaks: %s"
180 | sockets = data['ceph']['sockets']
181 | failed = []
182 | for socket, socket_data in sockets.items():
183 | config = socket_data.get('config', {})
184 | if not config:
185 | continue
186 | rgw_num_rados_handles = config.get('rgw_num_rados_handles', 1)
187 | name = socket.split('/var/run/ceph/')[-1]
188 | rgw_num_rados_handles = str_to_int(rgw_num_rados_handles)
189 | if rgw_num_rados_handles > 1:
190 | failed.append(name)
191 |
192 | if failed:
193 | return code, msg % ','.join(failed)
194 |
195 |
196 | def check_fsid_exists(host, data):
197 | code = 'ECOM8'
198 | msg = "'fsid' is missing in the ceph configuration"
199 |
200 | current_fsid = get_fsid(data)
201 | if not current_fsid:
202 | return code, msg
203 |
204 |
205 | def check_fsid_per_daemon(host, data):
206 | """
207 | In certain deployments types (hi rook!) the FSID will not be present in a
208 | ceph conf file - it will be passed in *directly* to the daemon as an
209 | argument. We aren't going to parse arguments, but the admin socket allows
210 | us to poke inside and check what cluster FSID the daemon is associated
211 | with.
212 | """
213 | code = 'ECOM9'
214 | msg = 'Found cluster FSIDs from running sockets different than: %s'
215 | sockets = data['ceph']['sockets']
216 | common_fsid = get_common_fsid()
217 | if not common_fsid: # is this even possible?
218 | return
219 |
220 | msg = msg % common_fsid
221 | sockets = data['ceph']['sockets']
222 | failed = False
223 | for socket, socket_data in sockets.items():
224 | config = socket_data.get('config', {})
225 | if not config:
226 | continue
227 | socket_fsid = config.get('fsid')
228 | if not socket_fsid:
229 | continue
230 | if socket_fsid != common_fsid:
231 | name = socket.split('/var/run/ceph/')[-1]
232 | msg += '\n %s : %s' % (name, socket_fsid)
233 | failed = True
234 | if failed:
235 | return code, msg
236 |
237 |
238 | def check_multiple_running_mons(host, data):
239 | code = 'ECOM10'
240 | msg = 'multiple running mons found: %s'
241 | sockets = data['ceph']['sockets']
242 | running_mons = []
243 | for socket_name in sockets.keys():
244 | if "mon." in socket_name:
245 | running_mons.append(socket_name)
246 | if len(running_mons) > 1:
247 | return code, msg % ','.join(running_mons)
248 |
--------------------------------------------------------------------------------
/ceph_medic/runner.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from ceph_medic import metadata, terminal, daemon_types
3 | from ceph_medic import checks, __version__
4 | from ceph_medic import config
5 |
6 | logger = logging.getLogger(__name__)
7 |
8 |
9 | class Runner(object):
10 |
11 | def __init__(self):
12 | self.passed = 0
13 | self.skipped = 0
14 | self.total = 0
15 | self.errors = 0
16 | self.warnings = 0
17 | self.ignore = []
18 | self.internal_errors = []
19 |
20 | @property
21 | def total_hosts(self):
22 | # XXX does not ensure unique nodes. In collocated scenarios, a single
23 | # node that is a 'mon' and an 'osd' would count as two nodes
24 | count = 0
25 | for daemon in metadata['nodes'].values():
26 | count += len(daemon)
27 | return count
28 |
29 | def run(self):
30 | """
31 | Go through all the daemons, and all checks. Single entrypoint for running
32 | checks everywhere.
33 | """
34 | start_header()
35 | for daemon_type in daemon_types:
36 | self.run_daemons(daemon_type)
37 |
38 | # these are checks that should run once per cluster
39 | nodes_header('cluster')
40 | self.run_cluster(checks.cluster)
41 |
42 | if metadata['failed_nodes']:
43 | terminal.write.bold('\n{daemon:-^30}\n'.format(daemon=' Failed Nodes '))
44 | for host, reason in metadata['failed_nodes'].items():
45 | terminal.loader.write(' %s' % terminal.red(host))
46 | terminal.write.write('\n')
47 | reason_lines = reason.split('\n')
48 | main_reason = reason_lines.pop(0)
49 | terminal.write.write(" %s\n" % main_reason)
50 | for line in reason_lines:
51 | terminal.write.write(" %s\n" % line)
52 | self.total = self.errors + self.warnings + self.passed + len(self.internal_errors)
53 | return self
54 |
55 | def run_daemons(self, daemon_type):
56 | has_nodes = metadata[daemon_type]
57 | is_daemon = daemon_type in metadata['nodes']
58 | if has_nodes and is_daemon: # we have nodes of this type to run
59 | nodes_header(daemon_type)
60 | else:
61 | return
62 |
63 | for host, data in metadata[daemon_type].items():
64 | modules = [checks.common, getattr(checks, daemon_type, None)]
65 | self.run_host(host, data, modules)
66 |
67 | def run_cluster(self, module):
68 | # XXX get the cluster name here
69 | cluster_name = '%s cluster' % metadata.get('cluster_name', 'ceph')
70 | terminal.loader.write(' %s' % terminal.yellow(cluster_name))
71 | has_error = False
72 | checks = collect_checks(module)
73 | for check in checks:
74 | try:
75 | # TODO: figure out how to skip running a specific check if
76 | # the code is ignored, maybe introspecting the function?
77 | result = getattr(module, check)()
78 | except Exception as error:
79 | result = None
80 | logger.exception('check had an unhandled error: %s', check)
81 | self.internal_errors.append(error)
82 | if result:
83 | code, message = result
84 | # XXX This is not ideal, we shouldn't need to get all the way here
85 | # to make sure this is actually ignored. (Or maybe it doesn't matter?)
86 | if code in self.ignore:
87 | self.skipped += 1
88 | # avoid writing anything else to the terminal, and just
89 | # go to the next check
90 | continue
91 | if not has_error:
92 | # XXX get the cluster name here
93 | terminal.loader.write(' %s' % terminal.red(cluster_name))
94 | terminal.write.write('\n')
95 |
96 | if code.startswith('E'):
97 | code = terminal.red(code)
98 | self.errors += 1
99 | elif code.startswith('W'):
100 | code = terminal.yellow(code)
101 | self.warnings += 1
102 | terminal.write.write(" %s: %s\n" % (code, message))
103 | has_error = True
104 | else:
105 | self.passed += 1
106 |
107 | if not has_error:
108 | terminal.loader.write(' %s\n' % terminal.green(cluster_name))
109 |
110 | def run_host(self, host, data, modules):
111 | terminal.loader.write(' %s' % terminal.yellow(host))
112 | has_error = False
113 | for module in modules:
114 | checks = collect_checks(module)
115 | for check in checks:
116 | try:
117 | # TODO: figure out how to skip running a specific check if
118 | # the code is ignored, maybe introspecting the function?
119 | result = getattr(module, check)(host, data)
120 | except Exception as error:
121 | result = None
122 | logger.exception('check had an unhandled error: %s', check)
123 | self.internal_errors.append(error)
124 | if result:
125 | code, message = result
126 | # XXX This is not ideal, we shouldn't need to get all the way here
127 | # to make sure this is actually ignored. (Or maybe it doesn't matter?)
128 | if code in self.ignore:
129 | self.skipped += 1
130 | # avoid writing anything else to the terminal, and just
131 | # go to the next check
132 | continue
133 | if not has_error:
134 | terminal.loader.write(' %s' % terminal.red(host))
135 | terminal.write.write('\n')
136 |
137 | if code.startswith('E'):
138 | self.errors += 1
139 | code = terminal.red(code)
140 | elif code.startswith('W'):
141 | self.warnings += 1
142 | code = terminal.yellow(code)
143 | terminal.write.write(" %s: %s\n" % (code, message))
144 | has_error = True
145 | else:
146 | self.passed += 1
147 |
148 | if not has_error:
149 | terminal.loader.write(' %s\n' % terminal.green(host))
150 |
151 |
152 | run_errors = terminal.yellow("""
153 | While running checks, ceph-medic had %s unhandled errors, please look at the
154 | configured log file and report the issue along with the traceback.
155 | """)
156 |
157 |
158 | def report(results):
159 | msg = "\n{passed}{error}{warning}{skipped}{internal_errors}{hosts}"
160 |
161 | if results.errors:
162 | msg = terminal.red(msg)
163 | elif results.warnings:
164 | msg = terminal.yellow(msg)
165 | else:
166 | msg = terminal.green(msg)
167 |
168 | errors = warnings = internal_errors = ''
169 |
170 | if results.errors:
171 | errors = '%s errors, ' % results.errors if results.errors > 1 else '1 error, '
172 | if results.warnings:
173 | warnings = '%s warnings, ' % results.warnings if results.warnings > 1 else '1 warning, '
174 | if results.internal_errors:
175 | internal_errors = "%s internal errors, " % len(results.internal_errors)
176 |
177 | terminal.write.raw(
178 | msg.format(
179 | passed="%s passed, " % results.passed,
180 | error=errors,
181 | warning=warnings,
182 | skipped="%s skipped, " % results.skipped if results.skipped else '',
183 | internal_errors=internal_errors,
184 | hosts="on %s hosts" % results.total_hosts
185 | )
186 | )
187 | if results.internal_errors:
188 | terminal.write.raw(run_errors % len(results.internal_errors))
189 |
190 |
191 | start_header_tmpl = """
192 | {title:=^80}
193 | Version: {version: >4} Cluster Name: "{cluster_name}"
194 | Connection: {connection_type}
195 | Total hosts: [{total_hosts}]
196 | OSDs: {osds: >4} MONs: {mons: >4} Clients: {clients: >4}
197 | MDSs: {mdss: >4} RGWs: {rgws: >4} MGRs: {mgrs: >7}
198 | """
199 |
200 |
201 | def start_header():
202 | connection_type = config.file.get_safe('global', 'deployment_type', 'ssh')
203 | daemon_totals = dict((daemon, 0) for daemon in daemon_types)
204 | total_hosts = 0
205 | for daemon in daemon_types:
206 | count = len(metadata[daemon].keys())
207 | total_hosts += count
208 | daemon_totals[daemon] = count
209 | terminal.write.raw(start_header_tmpl.format(
210 | title=' Starting remote check session ',
211 | version=__version__,
212 | connection_type=connection_type,
213 | total_hosts=total_hosts,
214 | cluster_name=metadata['cluster_name'],
215 | **daemon_totals))
216 | terminal.write.raw('=' * 80)
217 |
218 |
219 | def nodes_header(daemon_type):
220 | readable_daemons = {
221 | 'rgws': ' rados gateways ',
222 | 'mgrs': ' managers ',
223 | 'mons': ' mons ',
224 | 'osds': ' osds ',
225 | 'clients': ' clients ',
226 | 'cluster': ' cluster ',
227 | }
228 |
229 | terminal.write.bold('\n{daemon:-^30}\n'.format(
230 | daemon=readable_daemons.get(daemon_type, daemon_type)))
231 |
232 |
233 | def collect_checks(module):
234 | checks = [i for i in dir(module) if i.startswith('check')]
235 | return checks
236 |
--------------------------------------------------------------------------------
/ceph_medic/collector.py:
--------------------------------------------------------------------------------
1 | """
2 | Collect remote information on Ceph daemons, store everything in memory and make
3 | it available as a global part of the module so that other checks can consume it
4 | """
5 | from ceph_medic import metadata, remote, terminal
6 | from ceph_medic.terminal import loader
7 | from ceph_medic.connection import get_connection
8 | from execnet.gateway_bootstrap import HostNotFound
9 | import logging
10 |
11 |
12 | logger = logging.getLogger(__name__)
13 |
14 |
15 | def collect_paths(conn):
16 | """
17 | Gather all the interesting paths from the remote system, stat them, and
18 | capture contents when needed.
19 |
20 | Generates a tree path, using the "path of interest" as key, and appending
21 | the absolute paths of files in the 'files' key and directories in the
22 | 'dirs' key. A small subset of a tree would look
23 | very similar to::
24 |
25 | {
26 | '/etc/ceph': {
27 | 'dirs': {
28 | '/etc/ceph/ceph.d': {...},
29 | },
30 | 'files': {
31 | '/etc/ceph/ceph.d/ceph.conf': {...},
32 | },
33 | }
34 | }
35 |
36 | Each file and dir in a path tree will contain a set of keys populated
37 | mostly by calling ``stat`` on the remote system for that absolute path, in
38 | addition to capturing contents when "interesting files" are dfined. For
39 | example, the contents of a ``ceph.conf`` file will always be captured. This
40 | is how that file would look like in a tree path::
41 |
42 |
43 | {
44 | '/etc/ceph/ceph.d/test.conf':
45 | {
46 | 'contents': '[osd]\nosd mkfs type = xfs\nosd mkfs options[...] ',
47 | 'exception': {},
48 | 'group': 'ceph',
49 | 'n_fields': 16,
50 | 'n_sequence_fields': 10,
51 | 'n_unnamed_fields': 3,
52 | 'owner': 'ceph',
53 | 'st_atime': 1492721509.572292,
54 | 'st_blksize': 4096,
55 | 'st_blocks': 8,
56 | 'st_ctime': 1492721507.880156,
57 | 'st_dev': 64768L,
58 | 'st_gid': 167,
59 | 'st_ino': 100704475,
60 | 'st_mode': 33188,
61 | 'st_mtime': 1492721506.1060133,
62 | 'st_nlink': 1,
63 | 'st_rdev': 0,
64 | 'st_size': 650,
65 | 'st_uid': 167
66 | },
67 |
68 | }
69 |
70 | .. note:: ``contents`` is captured using ``file.read()`` so its value will
71 | be a single line with possible line breaks (if any). For reading and
72 | parsing that key on each line a split must be done on the line break.
73 |
74 | """
75 | path_metadata = {}
76 | paths = {
77 | "/etc/ceph": {'get_contents': True},
78 | "/var/lib/ceph": {
79 | 'get_contents': True,
80 | 'skip_files': ['activate.monmap', 'superblock'],
81 | 'skip_dirs': ['tmp', 'current', 'store.db']
82 | },
83 | "/var/run/ceph": {'get_contents': False},
84 | }
85 | for p, kw in paths.items():
86 | # Collect metadata about the files and dirs for the given path and assign
87 | # it back to the path_metadata for the current node
88 | path_metadata[p] = get_path_metadata(conn, p, **kw)
89 | return path_metadata
90 |
91 |
92 | def get_path_metadata(conn, path, **kw):
93 | # generate the tree
94 | tree = conn.remote_module.path_tree(
95 | path,
96 | kw.get('skip_dirs'),
97 | kw.get('skip_files'),
98 | kw.get('get_contents')
99 | )
100 |
101 | files = {}
102 | dirs = {}
103 |
104 | for i in tree['files']:
105 | files[i] = conn.remote_module.stat_path(i, None, None, kw.get('get_contents'))
106 | for i in tree['dirs']:
107 | dirs[i] = conn.remote_module.stat_path(i, None, None, False)
108 |
109 | # actual root path
110 | dirs[path] = conn.remote_module.stat_path(path, None, None, False)
111 |
112 | return {'dirs': dirs, 'files': files}
113 |
114 |
115 | def get_node_metadata(conn, hostname, cluster_nodes):
116 | # "import" the remote functions so that remote calls using the
117 | # functions can be executed
118 | conn.import_module(remote.functions)
119 |
120 | node_metadata = {'ceph': {}}
121 |
122 | # collect paths and files first
123 | loader.write('Host: %-*s collecting: [%s]' % (40, hostname, terminal.yellow('paths')))
124 | node_metadata['paths'] = collect_paths(conn)
125 | loader.write('Host: %-*s collecting: [%s]' % (40, hostname, terminal.green('paths')))
126 |
127 | # TODO: collect network information, passing all the cluster_nodes
128 | # so that it can check for inter-node connectivity
129 | loader.write('Host: %-*s collecting: [%s]' % (40, hostname, terminal.yellow('network')))
130 | node_metadata['network'] = collect_network(cluster_nodes)
131 | loader.write('Host: %-*s collecting: [%s]' % (40, hostname, terminal.green('network')))
132 |
133 | # TODO: collect device information
134 | loader.write('Host: %-*s collecting: [%s]' % (40, hostname, terminal.yellow('devices')))
135 | node_metadata['devices'] = collect_devices()
136 | loader.write('Host: %-*s collecting: [%s]' % (40, hostname, terminal.green('devices')))
137 |
138 | # collect ceph information
139 | loader.write('Host: %-*s collecting: [%s]' % (40, hostname, terminal.yellow('ceph information')))
140 | node_metadata['ceph'] = collect_ceph_info(conn)
141 | node_metadata['ceph']['sockets'] = collect_socket_info(conn, node_metadata)
142 | node_metadata['ceph']['osd'] = collect_ceph_osd_info(conn)
143 | loader.write('Host: %-*s collecting: [%s]' % (40, hostname, terminal.green('ceph information')))
144 |
145 | return node_metadata
146 |
147 |
148 | def collect():
149 | """
150 | The main collecting entrypoint. This function will call all the pieces
151 | needed to build the complete metadata set of a remote system so that checks
152 | can consume and verify that data.
153 |
154 | After collection is done, the full contents of the metadata are available
155 | at ``ceph_medic.metadata``
156 | """
157 | cluster_nodes = metadata['nodes']
158 | loader.write('collecting remote node information')
159 | total_nodes = 0
160 | failed_nodes = 0
161 | has_cluster_data = False
162 |
163 | for node_type, nodes in cluster_nodes.items():
164 | for node in nodes:
165 | # check if a node type exists for this node before doing any work:
166 | try:
167 | metadata[node_type]
168 | except KeyError:
169 | msg = "Skipping node {} from unknown host group: {}".format(node, node_type)
170 | logger.warning(msg)
171 | continue
172 |
173 | total_nodes += 1
174 | hostname = node['host']
175 | loader.write('Host: %-40s connection: [%-20s]' % (hostname, terminal.yellow('connecting')))
176 | # TODO: make sure that the hostname is resolvable, trying to
177 | # debug SSH issues with execnet is pretty hard/impossible, use
178 | # util.net.host_is_resolvable
179 | try:
180 | logger.debug('attempting connection to host: %s', node['host'])
181 | conn = get_connection(node['host'], container=node.get('container'))
182 | loader.write('Host: %-40s connection: [%-20s]' % (hostname, terminal.green('connected')))
183 | loader.write('\n')
184 | except HostNotFound as err:
185 | logger.exception('connection failed')
186 | loader.write('Host: %-40s connection: [%-20s]' % (hostname, terminal.red('failed')))
187 | loader.write('\n')
188 | failed_nodes += 1
189 | if metadata[node_type].get(hostname):
190 | metadata[node_type].pop(hostname)
191 | metadata['nodes'][node_type] = [i for i in metadata['nodes'][node_type] if i['host'] != hostname]
192 | metadata['failed_nodes'].update({hostname: str(err)})
193 | continue
194 |
195 | # send the full node metadata for global scope so that the checks
196 | # can consume this
197 | metadata[node_type][hostname] = get_node_metadata(conn, hostname, cluster_nodes)
198 | if node_type == 'mons': # if node type is monitor, admin privileges are most likely authorized
199 | if not has_cluster_data:
200 | cluster_data = collect_cluster(conn)
201 | if cluster_data:
202 | metadata['cluster'] = cluster_data
203 | has_cluster_data = True
204 | conn.exit()
205 |
206 | if failed_nodes == total_nodes:
207 | loader.write(terminal.red('Collection failed!') + ' ' *70)
208 | # TODO: this helps clear out the 'loader' line so that the error looks
209 | # clean, but this manual clearing should be done automatically
210 | terminal.write.raw('')
211 | raise RuntimeError('All nodes failed to connect. Cannot run any checks')
212 | if failed_nodes:
213 | loader.write(terminal.yellow('Collection completed with some failed connections' + ' ' *70 + '\n'))
214 | else:
215 | loader.write('Collection completed!' + ' ' *70 + '\n')
216 |
217 |
218 | # Network
219 | #
220 | def collect_network(cluster_nodes):
221 | """
222 | Collect node-specific information, but also try to check connectivity to
223 | other hosts that are passed in as ``cluster_nodes``
224 | """
225 | return {}
226 |
227 |
228 | # Devices
229 | #
230 | def collect_devices():
231 | """
232 | Get all the device information from the current node
233 | """
234 | return {}
235 |
236 |
237 | # Ceph
238 | #
239 | def collect_ceph_info(conn):
240 | result = dict()
241 | result['version'] = remote.commands.ceph_version(conn)
242 | result['installed'] = remote.commands.ceph_is_installed(conn)
243 | return result
244 |
245 |
246 | def collect_cluster(conn):
247 | """
248 | Captures useful cluster information like the status
249 | """
250 | result = dict()
251 | result['status'] = remote.commands.ceph_status(conn)
252 | return result
253 |
254 |
255 | # Ceph socket info
256 | #
257 | def collect_socket_info(conn, node_metadata):
258 | sockets = [socket for socket in node_metadata['paths']['/var/run/ceph']['files']
259 | if socket.endswith(".asok")]
260 | result = dict()
261 | for socket in sockets:
262 | result[socket] = {'version': {}, 'config': {}}
263 | result[socket]['version'] = remote.commands.ceph_socket_version(conn, socket)
264 | result[socket]['config'] = remote.commands.daemon_socket_config(conn, socket)
265 | return result
266 |
267 |
268 | # Ceph OSD info
269 | #
270 | def collect_ceph_osd_info(conn):
271 | result = {'dump': {}}
272 | result['dump'] = remote.commands.ceph_osd_dump(conn)
273 | return result
274 |
--------------------------------------------------------------------------------
/tests/functional/Vagrantfile:
--------------------------------------------------------------------------------
1 | # -*- mode: ruby -*-
2 | # vi: set ft=ruby :
3 |
4 | require 'yaml'
5 | require 'time'
6 | VAGRANTFILE_API_VERSION = '2'
7 |
8 | DEBUG = false
9 |
10 | config_file=File.expand_path(File.join(File.dirname(__FILE__), 'vagrant_variables.yml'))
11 | settings=YAML.load_file(config_file)
12 |
13 | LABEL_PREFIX = settings['label_prefix'] ? settings['label_prefix'] + "-" : ""
14 | NMONS = settings['mon_vms']
15 | NOSDS = settings['osd_vms']
16 | NMDSS = settings['mds_vms']
17 | NRGWS = settings['rgw_vms']
18 | NNFSS = settings['nfs_vms']
19 | RESTAPI = settings['restapi']
20 | NRBD_MIRRORS = settings['rbd_mirror_vms']
21 | CLIENTS = settings['client_vms']
22 | NISCSI_GWS = settings['iscsi_gw_vms']
23 | PUBLIC_SUBNET = settings['public_subnet']
24 | CLUSTER_SUBNET = settings['cluster_subnet']
25 | BOX = settings['vagrant_box']
26 | CLIENT_BOX = settings['client_vagrant_box']
27 | BOX_URL = settings['vagrant_box_url']
28 | SYNC_DIR = settings['vagrant_sync_dir']
29 | MEMORY = settings['memory']
30 | ETH = settings['eth']
31 | USER = settings['ssh_username']
32 |
33 | ASSIGN_STATIC_IP = settings.fetch('assign_static_ip', true)
34 | DISABLE_SYNCED_FOLDER = settings.fetch('vagrant_disable_synced_folder', false)
35 | DISK_UUID = Time.now.utc.to_i
36 |
37 | def create_vmdk(name, size)
38 | dir = Pathname.new(__FILE__).expand_path.dirname
39 | path = File.join(dir, '.vagrant', name + '.vmdk')
40 | `vmware-vdiskmanager -c -s #{size} -t 0 -a scsi #{path} \
41 | 2>&1 > /dev/null` unless File.exist?(path)
42 | end
43 |
44 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
45 | config.ssh.insert_key = false # workaround for https://github.com/mitchellh/vagrant/issues/5048
46 | config.ssh.private_key_path = settings['ssh_private_key_path']
47 | config.ssh.username = USER
48 |
49 | # When using libvirt, avoid errors like:
50 | # "CPU feature cmt not found"
51 | config.vm.provider :libvirt do |lv|
52 | lv.cpu_mode = 'host-passthrough'
53 | end
54 |
55 | # Faster bootup. Disables mounting the sync folder for libvirt and virtualbox
56 | if DISABLE_SYNCED_FOLDER
57 | config.vm.provider :virtualbox do |v,override|
58 | override.vm.synced_folder '.', SYNC_DIR, disabled: true
59 | end
60 | config.vm.provider :libvirt do |v,override|
61 | override.vm.synced_folder '.', SYNC_DIR, disabled: true
62 | end
63 | end
64 |
65 | (0..CLIENTS - 1).each do |i|
66 | config.vm.define "#{LABEL_PREFIX}client#{i}" do |client|
67 | client.vm.box = CLIENT_BOX
68 | client.vm.hostname = "#{LABEL_PREFIX}ceph-client#{i}"
69 | if ASSIGN_STATIC_IP
70 | client.vm.network :private_network,
71 | ip: "#{PUBLIC_SUBNET}.4#{i}"
72 | end
73 | # Virtualbox
74 | client.vm.provider :virtualbox do |vb|
75 | vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
76 | end
77 |
78 | # VMware
79 | client.vm.provider :vmware_fusion do |v|
80 | v.vmx['memsize'] = "#{MEMORY}"
81 | end
82 |
83 | # Libvirt
84 | client.vm.provider :libvirt do |lv|
85 | lv.memory = MEMORY
86 | lv.random_hostname = true
87 | end
88 |
89 | # Parallels
90 | client.vm.provider "parallels" do |prl|
91 | prl.name = "ceph-client#{i}"
92 | prl.memory = "#{MEMORY}"
93 | end
94 |
95 | client.vm.provider :linode do |provider|
96 | provider.label = client.vm.hostname
97 | end
98 | end
99 | end
100 |
101 | (0..NRGWS - 1).each do |i|
102 | config.vm.define "#{LABEL_PREFIX}rgw#{i}" do |rgw|
103 | rgw.vm.box = BOX
104 | rgw.vm.box_url = BOX_URL
105 | rgw.vm.hostname = "#{LABEL_PREFIX}ceph-rgw#{i}"
106 | if ASSIGN_STATIC_IP
107 | rgw.vm.network :private_network,
108 | ip: "#{PUBLIC_SUBNET}.5#{i}"
109 | end
110 |
111 | # Virtualbox
112 | rgw.vm.provider :virtualbox do |vb|
113 | vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
114 | end
115 |
116 | # VMware
117 | rgw.vm.provider :vmware_fusion do |v|
118 | v.vmx['memsize'] = "#{MEMORY}"
119 | end
120 |
121 | # Libvirt
122 | rgw.vm.provider :libvirt do |lv|
123 | lv.memory = MEMORY
124 | lv.random_hostname = true
125 | end
126 |
127 | # Parallels
128 | rgw.vm.provider "parallels" do |prl|
129 | prl.name = "ceph-rgw#{i}"
130 | prl.memory = "#{MEMORY}"
131 | end
132 |
133 | rgw.vm.provider :linode do |provider|
134 | provider.label = rgw.vm.hostname
135 | end
136 | end
137 | end
138 |
139 | (0..NNFSS - 1).each do |i|
140 | config.vm.define "nfs#{i}" do |nfs|
141 | nfs.vm.box = BOX
142 | nfs.vm.box_url = BOX_URL
143 | nfs.vm.hostname = "ceph-nfs#{i}"
144 | if ASSIGN_STATIC_IP
145 | nfs.vm.network :private_network,
146 | ip: "#{PUBLIC_SUBNET}.6#{i}"
147 | end
148 |
149 | # Virtualbox
150 | nfs.vm.provider :virtualbox do |vb|
151 | vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
152 | end
153 |
154 | # VMware
155 | nfs.vm.provider :vmware_fusion do |v|
156 | v.vmx['memsize'] = "#{MEMORY}"
157 | end
158 |
159 | # Libvirt
160 | nfs.vm.provider :libvirt do |lv|
161 | lv.memory = MEMORY
162 | lv.random_hostname = true
163 | end
164 |
165 | # Parallels
166 | nfs.vm.provider "parallels" do |prl|
167 | prl.name = "ceph-nfs#{i}"
168 | prl.memory = "#{MEMORY}"
169 | end
170 |
171 | nfs.vm.provider :linode do |provider|
172 | provider.label = nfs.vm.hostname
173 | end
174 | end
175 | end
176 |
177 | (0..NMDSS - 1).each do |i|
178 | config.vm.define "#{LABEL_PREFIX}mds#{i}" do |mds|
179 | mds.vm.box = BOX
180 | mds.vm.box_url = BOX_URL
181 | mds.vm.hostname = "#{LABEL_PREFIX}ceph-mds#{i}"
182 | if ASSIGN_STATIC_IP
183 | mds.vm.network :private_network,
184 | ip: "#{PUBLIC_SUBNET}.7#{i}"
185 | end
186 | # Virtualbox
187 | mds.vm.provider :virtualbox do |vb|
188 | vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
189 | end
190 |
191 | # VMware
192 | mds.vm.provider :vmware_fusion do |v|
193 | v.vmx['memsize'] = "#{MEMORY}"
194 | end
195 |
196 | # Libvirt
197 | mds.vm.provider :libvirt do |lv|
198 | lv.memory = MEMORY
199 | lv.random_hostname = true
200 | end
201 | # Parallels
202 | mds.vm.provider "parallels" do |prl|
203 | prl.name = "ceph-mds#{i}"
204 | prl.memory = "#{MEMORY}"
205 | end
206 |
207 | mds.vm.provider :linode do |provider|
208 | provider.label = mds.vm.hostname
209 | end
210 | end
211 | end
212 |
213 | (0..NRBD_MIRRORS - 1).each do |i|
214 | config.vm.define "#{LABEL_PREFIX}rbd_mirror#{i}" do |rbd_mirror|
215 | rbd_mirror.vm.box = BOX
216 | rbd_mirror.vm.box_url = BOX_URL
217 | rbd_mirror.vm.hostname = "#{LABEL_PREFIX}ceph-rbd-mirror#{i}"
218 | if ASSIGN_STATIC_IP
219 | rbd_mirror.vm.network :private_network,
220 | ip: "#{PUBLIC_SUBNET}.8#{i}"
221 | end
222 | # Virtualbox
223 | rbd_mirror.vm.provider :virtualbox do |vb|
224 | vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
225 | end
226 |
227 | # VMware
228 | rbd_mirror.vm.provider :vmware_fusion do |v|
229 | v.vmx['memsize'] = "#{MEMORY}"
230 | end
231 |
232 | # Libvirt
233 | rbd_mirror.vm.provider :libvirt do |lv|
234 | lv.memory = MEMORY
235 | lv.random_hostname = true
236 | end
237 | # Parallels
238 | rbd_mirror.vm.provider "parallels" do |prl|
239 | prl.name = "ceph-rbd-mirror#{i}"
240 | prl.memory = "#{MEMORY}"
241 | end
242 |
243 | rbd_mirror.vm.provider :linode do |provider|
244 | provider.label = rbd_mirror.vm.hostname
245 | end
246 | end
247 | end
248 |
249 | (0..NISCSI_GWS - 1).each do |i|
250 | config.vm.define "#{LABEL_PREFIX}iscsi_gw#{i}" do |iscsi_gw|
251 | iscsi_gw.vm.box = BOX
252 | iscsi_gw.vm.box_url = BOX_URL
253 | iscsi_gw.vm.hostname = "#{LABEL_PREFIX}ceph-iscsi-gw#{i}"
254 | if ASSIGN_STATIC_IP
255 | iscsi_gw.vm.network :private_network,
256 | ip: "#{PUBLIC_SUBNET}.9#{i}"
257 | end
258 | # Virtualbox
259 | iscsi_gw.vm.provider :virtualbox do |vb|
260 | vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
261 | end
262 |
263 | # VMware
264 | iscsi_gw.vm.provider :vmware_fusion do |v|
265 | v.vmx['memsize'] = "#{MEMORY}"
266 | end
267 |
268 | # Libvirt
269 | iscsi_gw.vm.provider :libvirt do |lv|
270 | lv.memory = MEMORY
271 | lv.random_hostname = true
272 | end
273 | # Parallels
274 | iscsi_gw.vm.provider "parallels" do |prl|
275 | prl.name = "ceph-iscsi-gw#{i}"
276 | prl.memory = "#{MEMORY}"
277 | end
278 |
279 | iscsi_gw.vm.provider :linode do |provider|
280 | provider.label = iscsi_gw.vm.hostname
281 | end
282 | end
283 | end
284 |
285 | (0..NMONS - 1).each do |i|
286 | config.vm.define "#{LABEL_PREFIX}mon#{i}" do |mon|
287 | mon.vm.box = BOX
288 | mon.vm.box_url = BOX_URL
289 | mon.vm.hostname = "#{LABEL_PREFIX}ceph-mon#{i}"
290 | if ASSIGN_STATIC_IP
291 | mon.vm.network :private_network,
292 | ip: "#{PUBLIC_SUBNET}.1#{i}"
293 | end
294 | # Virtualbox
295 | mon.vm.provider :virtualbox do |vb|
296 | vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
297 | end
298 |
299 | # VMware
300 | mon.vm.provider :vmware_fusion do |v|
301 | v.vmx['memsize'] = "#{MEMORY}"
302 | end
303 |
304 | # Libvirt
305 | mon.vm.provider :libvirt do |lv|
306 | lv.memory = MEMORY
307 | lv.random_hostname = true
308 | end
309 |
310 | # Parallels
311 | mon.vm.provider "parallels" do |prl|
312 | prl.name = "ceph-mon#{i}"
313 | prl.memory = "#{MEMORY}"
314 | end
315 |
316 | mon.vm.provider :linode do |provider|
317 | provider.label = mon.vm.hostname
318 | end
319 | end
320 | end
321 |
322 | (0..NOSDS - 1).each do |i|
323 | config.vm.define "#{LABEL_PREFIX}osd#{i}" do |osd|
324 | osd.vm.box = BOX
325 | osd.vm.box_url = BOX_URL
326 | osd.vm.hostname = "#{LABEL_PREFIX}ceph-osd#{i}"
327 | if ASSIGN_STATIC_IP
328 | osd.vm.network :private_network,
329 | ip: "#{PUBLIC_SUBNET}.10#{i}"
330 | osd.vm.network :private_network,
331 | ip: "#{CLUSTER_SUBNET}.20#{i}"
332 | end
333 | # Virtualbox
334 | osd.vm.provider :virtualbox do |vb|
335 | # Create our own controller for consistency and to remove VM dependency
336 | vb.customize ['storagectl', :id,
337 | '--name', 'OSD Controller',
338 | '--add', 'scsi']
339 | (0..2).each do |d|
340 | vb.customize ['createhd',
341 | '--filename', "disk-#{i}-#{d}",
342 | '--size', '11000'] unless File.exist?("disk-#{i}-#{d}.vdi")
343 | vb.customize ['storageattach', :id,
344 | '--storagectl', 'OSD Controller',
345 | '--port', 3 + d,
346 | '--device', 0,
347 | '--type', 'hdd',
348 | '--medium', "disk-#{i}-#{d}.vdi"]
349 | end
350 | vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
351 | end
352 |
353 | # VMware
354 | osd.vm.provider :vmware_fusion do |v|
355 | (0..1).each do |d|
356 | v.vmx["scsi0:#{d + 1}.present"] = 'TRUE'
357 | v.vmx["scsi0:#{d + 1}.fileName"] =
358 | create_vmdk("disk-#{i}-#{d}", '11000MB')
359 | end
360 | v.vmx['memsize'] = "#{MEMORY}"
361 | end
362 |
363 | # Libvirt
364 | driverletters = ('a'..'z').to_a
365 | osd.vm.provider :libvirt do |lv|
366 | # always make /dev/sd{a/b/c/d} so that CI can ensure that
367 | # virtualbox and libvirt will have the same devices to use for OSDs
368 | (0..3).each do |d|
369 | lv.storage :file, :device => "hd#{driverletters[d]}", :path => "disk-#{i}-#{d}-#{DISK_UUID}.disk", :size => '12G', :bus => "ide"
370 | end
371 | lv.memory = MEMORY
372 | lv.random_hostname = true
373 | end
374 |
375 | # Parallels
376 | osd.vm.provider "parallels" do |prl|
377 | prl.name = "ceph-osd#{i}"
378 | prl.memory = "#{MEMORY}"
379 | (0..1).each do |d|
380 | prl.customize ["set", :id,
381 | "--device-add",
382 | "hdd",
383 | "--iface",
384 | "sata"]
385 | end
386 | end
387 |
388 | osd.vm.provider :linode do |provider|
389 | provider.label = osd.vm.hostname
390 | end
391 |
392 | end
393 | end
394 | end
395 |
--------------------------------------------------------------------------------