├── debian
    ├── compat
    ├── source
    │   └── format
    ├── ceph-medic.lintian-overrides
    ├── rules
    ├── changelog
    ├── control
    └── copyright
├── ceph_medic
    ├── checks
    │   ├── clients.py
    │   ├── mdss.py
    │   ├── mgrs.py
    │   ├── rgws.py
    │   ├── __init__.py
    │   ├── cluster.py
    │   ├── osds.py
    │   ├── mons.py
    │   └── common.py
    ├── rules
    │   ├── __init__.py
    │   ├── jewel.py
    │   └── kraken.py
    ├── tests
    │   ├── util
    │   │   ├── __init__.py
    │   │   ├── test_configuration.py
    │   │   └── test_hosts.py
    │   ├── checks
    │   │   ├── __init__.py
    │   │   ├── test_cluster.py
    │   │   ├── test_osds.py
    │   │   └── test_mons.py
    │   ├── remote
    │   │   ├── __init__.py
    │   │   ├── test_commands.py
    │   │   └── test_functions.py
    │   ├── __init__.py
    │   ├── test_terminal.py
    │   ├── test_main.py
    │   ├── test_log.py
    │   ├── test_collector.py
    │   ├── conftest.py
    │   └── test_runner.py
    ├── remote
    │   ├── __init__.py
    │   ├── util.py
    │   ├── commands.py
    │   └── functions.py
    ├── util
    │   ├── net.py
    │   ├── __init__.py
    │   ├── mon.py
    │   └── hosts.py
    ├── compat.py
    ├── log.py
    ├── __init__.py
    ├── loader.py
    ├── decorators.py
    ├── check.py
    ├── generate.py
    ├── connection.py
    ├── terminal.py
    ├── main.py
    ├── runner.py
    └── collector.py
├── docs
    ├── .gitignore
    ├── source
    │   ├── _static
    │   │   └── .empty
    │   ├── _themes
    │   │   └── ceph
    │   │   │   ├── theme.conf
    │   │   │   └── static
    │   │   │       ├── font
    │   │   │           ├── ApexSans-Book.eot
    │   │   │           ├── ApexSans-Book.ttf
    │   │   │           ├── ApexSans-Book.woff
    │   │   │           ├── ApexSans-Medium.eot
    │   │   │           ├── ApexSans-Medium.ttf
    │   │   │           └── ApexSans-Medium.woff
    │   │   │       └── nature.css_t
    │   ├── contents.rst
    │   ├── codes
    │   │   ├── cluster.rst
    │   │   ├── mons.rst
    │   │   ├── osds.rst
    │   │   └── common.rst
    │   ├── codes.rst
    │   ├── _templates
    │   │   └── smarttoc.html
    │   ├── changelog.rst
    │   ├── installation.rst
    │   ├── facts.rst
    │   ├── conf.py
    │   └── index.rst
    └── Makefile
├── requirements-dev.txt
├── tests
    └── functional
    │   ├── centos7
    │       ├── Vagrantfile
    │       ├── hosts
    │       ├── test.yml
    │       ├── group_vars
    │       │   └── all
    │       └── vagrant_variables.yml
    │   ├── .gitignore
    │   ├── scripts
    │       └── generate_ssh_config.sh
    │   ├── tox.ini
    │   ├── playbooks
    │       └── setup.yml
    │   └── Vagrantfile
├── setup.cfg
├── MANIFEST.in
├── bin
    └── ceph-medic
├── .gitignore
├── README.rst
├── tox.ini
├── LICENSE
├── ceph-medic.spec.in
├── CONTRIBUTING.rst
├── Makefile
└── setup.py


/debian/compat:
--------------------------------------------------------------------------------
1 | 7
2 | 


--------------------------------------------------------------------------------
/ceph_medic/checks/clients.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ceph_medic/checks/mdss.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ceph_medic/checks/mgrs.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ceph_medic/checks/rgws.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ceph_medic/rules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | 


--------------------------------------------------------------------------------
/docs/source/_static/.empty:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ceph_medic/tests/util/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ceph_medic/tests/checks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ceph_medic/tests/remote/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/debian/source/format:
--------------------------------------------------------------------------------
1 | 3.0 (quilt)
2 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | pytest >=2.1.3
2 | tox >=1.2
3 | 


--------------------------------------------------------------------------------
/tests/functional/centos7/Vagrantfile:
--------------------------------------------------------------------------------
1 | ../Vagrantfile


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [tool:pytest]
2 | norecursedirs = .* _* virtualenv
3 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.rst
2 | include LICENSE
3 | include tox.ini
4 | 


--------------------------------------------------------------------------------
/tests/functional/.gitignore:
--------------------------------------------------------------------------------
1 | *.vdi
2 | .vagrant/
3 | vagrant_ssh_config
4 | 


--------------------------------------------------------------------------------
/ceph_medic/remote/__init__.py:
--------------------------------------------------------------------------------
1 | from . import functions  # noqa
2 | from . import commands  # noqa
3 | 


--------------------------------------------------------------------------------
/ceph_medic/checks/__init__.py:
--------------------------------------------------------------------------------
1 | from . import osds, mons, clients, rgws, mdss, common, mgrs, cluster  # noqa
2 | 


--------------------------------------------------------------------------------
/docs/source/_themes/ceph/theme.conf:
--------------------------------------------------------------------------------
1 | [theme]
2 | inherit = basic
3 | stylesheet = nature.css
4 | pygments_style = tango
5 | 


--------------------------------------------------------------------------------
/debian/ceph-medic.lintian-overrides:
--------------------------------------------------------------------------------
1 | # Package has not yet been submitted to Debian.
2 | new-package-should-close-itp-bug
3 | 


--------------------------------------------------------------------------------
/bin/ceph-medic:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | 
3 | from ceph_medic import main
4 | 
5 | if __name__ == '__main__':
6 |     main.Medic()
7 | 


--------------------------------------------------------------------------------
/docs/source/_themes/ceph/static/font/ApexSans-Book.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/ceph-medic/HEAD/docs/source/_themes/ceph/static/font/ApexSans-Book.eot


--------------------------------------------------------------------------------
/docs/source/_themes/ceph/static/font/ApexSans-Book.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/ceph-medic/HEAD/docs/source/_themes/ceph/static/font/ApexSans-Book.ttf


--------------------------------------------------------------------------------
/docs/source/_themes/ceph/static/font/ApexSans-Book.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/ceph-medic/HEAD/docs/source/_themes/ceph/static/font/ApexSans-Book.woff


--------------------------------------------------------------------------------
/docs/source/_themes/ceph/static/font/ApexSans-Medium.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/ceph-medic/HEAD/docs/source/_themes/ceph/static/font/ApexSans-Medium.eot


--------------------------------------------------------------------------------
/docs/source/_themes/ceph/static/font/ApexSans-Medium.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/ceph-medic/HEAD/docs/source/_themes/ceph/static/font/ApexSans-Medium.ttf


--------------------------------------------------------------------------------
/docs/source/_themes/ceph/static/font/ApexSans-Medium.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ceph/ceph-medic/HEAD/docs/source/_themes/ceph/static/font/ApexSans-Medium.woff


--------------------------------------------------------------------------------
/tests/functional/centos7/hosts:
--------------------------------------------------------------------------------
1 | [mons]
2 | mon0 address=192.168.3.10
3 | 
4 | [osds]
5 | osd0 address=192.168.3.100
6 | 
7 | [medic]
8 | client0 address=192.168.3.40
9 | 


--------------------------------------------------------------------------------
/docs/source/contents.rst:
--------------------------------------------------------------------------------
 1 | ceph-medic contents
 2 | ===================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 | 
 7 |    index.rst
 8 |    installation.rst
 9 |    codes.rst
10 |    facts.rst
11 |    changelog.rst
12 | 


--------------------------------------------------------------------------------
/ceph_medic/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | # helps reset altered metadata in tests
3 | base_metadata = {'rgws': {}, 'mgrs': {}, 'mdss':{}, 'clients': {},
4 |         'osds':{}, 'mons':{}, 'nodes': {}, 'cluster_name': 'ceph', 'failed_nodes': {}}
5 | 
6 | 


--------------------------------------------------------------------------------
/debian/rules:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/make -f
 2 | 
 3 | # Uncomment this to turn on verbose mode.
 4 | export DH_VERBOSE=1
 5 | 
 6 | export PYBUILD_NAME=ceph-medic
 7 | 
 8 | export PYBUILD_TEST_ARGS=ceph_medic/tests
 9 | 
10 | %:
11 | 	dh $@ --buildsystem pybuild --with python2
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[co]
 2 | 
 3 | # Packages
 4 | *.egg
 5 | *.egg-info
 6 | .cache
 7 | dist
 8 | build
 9 | eggs
10 | parts
11 | bin
12 | var
13 | sdist
14 | develop-eggs
15 | .installed.cfg
16 | 
17 | # Installer logs
18 | pip-log.txt
19 | 
20 | # Unit test / coverage reports
21 | .coverage
22 | .tox
23 | 
24 | #Translations
25 | *.mo
26 | 


--------------------------------------------------------------------------------
/tests/functional/centos7/test.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | - hosts: medic
 4 |   become: no
 5 |   tasks:
 6 | 
 7 |     - name: copy hosts file to vagrant home dir
 8 |       command: cp /vagrant/hosts /home/vagrant
 9 |       become: yes
10 | 
11 |     - name: use ceph-medic to check ceph cluster
12 |       command: ceph-medic --inventory /home/vagrant/hosts check
13 | 


--------------------------------------------------------------------------------
/ceph_medic/util/net.py:
--------------------------------------------------------------------------------
 1 | import socket
 2 | 
 3 | 
 4 | def host_is_resolvable(hostname, _socket=None):
 5 |     _socket = _socket or socket  # just used for testing
 6 |     try:
 7 |         _socket.getaddrinfo(hostname, 0)
 8 |     except _socket.gaierror:
 9 |         msg = "hostname: %s is not resolvable" % hostname
10 |         raise RuntimeError(msg)
11 |     return True
12 | 


--------------------------------------------------------------------------------
/docs/source/codes/cluster.rst:
--------------------------------------------------------------------------------
 1 | Cluster
 2 | =======
 3 | Cluster checks run once against the information of a cluster, and are
 4 | not specific to any deamon.
 5 | 
 6 | 
 7 | Errors
 8 | ------
 9 | 
10 | .. _ECLS1:
11 | 
12 | ECLS1
13 | ^^^^^
14 | No OSD nodes exist as part of the cluster.
15 | 
16 | .. _ECLS2:
17 | 
18 | ECLS2
19 | ^^^^^
20 | The cluster is nearfull.
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/tests/functional/scripts/generate_ssh_config.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Generate a custom ssh config from Vagrant so that it can then be used by
 3 | # ansible.cfg 
 4 | 
 5 | path=$1
 6 | 
 7 | if [ $# -eq 0 ]
 8 |   then
 9 |     echo "A path to the scenario is required as an argument and it wasn't provided"
10 |     exit 1
11 | fi
12 | 
13 | cd "$path"
14 | vagrant ssh-config > vagrant_ssh_config
15 | 


--------------------------------------------------------------------------------
/ceph_medic/compat.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | 
 3 | try:
 4 |     import ConfigParser as configparser
 5 | except ImportError:
 6 |     import configparser
 7 | 
 8 | try:
 9 |     from ConfigParser import SafeConfigParser as BaseConfigParser
10 | except ImportError:
11 |     from configparser import ConfigParser as BaseConfigParser
12 | 
13 | try:
14 |     from StringIO import StringIO
15 | except ImportError:
16 |     from io import StringIO
17 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | ==========
2 | ceph-medic
3 | ==========
4 | 
5 | ``ceph-medic`` is a tool that performs checks against Ceph clusters to identify common issues preventing proper functionality. It supports Kubernetes and OpenShift, using ``kubectl`` and ``oc``, respectively. It requires non-interactive SSH access to accounts that can ``sudo`` without a password prompt.
6 | 
7 | Full usage documentation can be found at: http://docs.ceph.com/ceph-medic
8 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py27, py36, py37, flake8
 3 | 
 4 | [testenv]
 5 | deps=
 6 |   pytest
 7 |   mock
 8 | commands=py.test -v {posargs:ceph_medic/tests}
 9 | 
10 | [testenv:docs]
11 | basepython=python
12 | changedir=docs/source
13 | deps=sphinx
14 | commands=
15 |     sphinx-build -W -b html -d {envtmpdir}/doctrees .  {envtmpdir}/html
16 | 
17 | [testenv:flake8]
18 | deps=flake8
19 | commands=flake8 --select=F,E9 --exclude=vendor {posargs:ceph_medic}
20 | 


--------------------------------------------------------------------------------
/tests/functional/centos7/group_vars/all:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | ceph_origin: repository
 4 | ceph_repository: community
 5 | ceph_stable_release: luminous
 6 | cluster: test
 7 | public_network: "192.168.3.0/24"
 8 | cluster_network: "192.168.4.0/24"
 9 | monitor_interface: eth1
10 | journal_size: 100
11 | osd_objectstore: "filestore"
12 | devices:
13 |   - '/dev/sda'
14 |   - '/dev/sdb'
15 | osd_scenario: collocated
16 | os_tuning_params:
17 |   - { name: kernel.pid_max, value: 4194303 }
18 |   - { name: fs.file-max, value: 26234859 }
19 | 


--------------------------------------------------------------------------------
/ceph_medic/rules/jewel.py:
--------------------------------------------------------------------------------
 1 | # Rules to apply for Jewel releases.
 2 | 
 3 | # All checks are applied, but overrides to defaults can
 4 | # be specified here.
 5 | # overrides = {
 6 | #     # overrides the check called 'check_name' with a different expected value
 7 | #     # and changes # the level of this check to 'error'.
 8 | #     "check_name": {"expected": "value", "level": "error"},
 9 | #}
10 | 
11 | # Exclude the following checks:
12 | # excludes = ["check_name"]
13 | 
14 | # Include the following checks:
15 | # includes = ["check_name"]
16 | 


--------------------------------------------------------------------------------
/ceph_medic/rules/kraken.py:
--------------------------------------------------------------------------------
 1 | # Rules to apply for Jewel releases.
 2 | 
 3 | # All checks are applied, but overrides to defaults can
 4 | # be specified here.
 5 | # overrides = {
 6 | #     # overrides the check called 'check_name' with a different expected value
 7 | #     # and changes # the level of this check to 'error'.
 8 | #     "check_name": {"expected": "value", "level": "error"},
 9 | #}
10 | 
11 | # Exclude the following checks:
12 | # excludes = ["check_name"]
13 | 
14 | # Include the following checks:
15 | # includes = ["check_name"]
16 | 


--------------------------------------------------------------------------------
/docs/source/codes.rst:
--------------------------------------------------------------------------------
 1 | ===========
 2 | Error Codes
 3 | ===========
 4 | 
 5 | When performing checks, ``ceph-medic`` will return an error code and message for any that failed. These checks
 6 | can either be a ``warning`` or ``error``, and will pertain to common issues or daemon specific issues. Any error
 7 | code starting with ``E`` is an error, and any starting with ``W`` is a warning.
 8 | 
 9 | Below you'll find a list of checks that are performed with the ``check`` subcommand.
10 | 
11 | 
12 | .. toctree::
13 |    :maxdepth: 2
14 | 
15 |    codes/common.rst
16 |    codes/mons.rst
17 |    codes/osds.rst
18 |    codes/cluster.rst
19 | 


--------------------------------------------------------------------------------
/ceph_medic/checks/cluster.py:
--------------------------------------------------------------------------------
 1 | from ceph_medic import metadata
 2 | 
 3 | 
 4 | #
 5 | # Error checks
 6 | #
 7 | 
 8 | def check_osds_exist():
 9 |     code = 'ECLS1'
10 |     msg = 'There are no OSDs available'
11 |     osd_count = len(metadata['osds'].keys())
12 |     if not osd_count:
13 |         return code, msg
14 | 
15 | 
16 | def check_nearfull():
17 |     """
18 |     Checks if the osd capacity is at nearfull
19 |     """
20 |     code = 'ECLS2'
21 |     msg = 'Cluster is nearfull'
22 |     try:
23 |         osd_map = metadata['cluster']['status']['osdmap']['osdmap']
24 |     except KeyError:
25 |         return
26 |     if osd_map.get('nearfull'):
27 |         return code, msg
28 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = python -msphinx
 7 | SPHINXPROJ    = ceph-medic
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/ceph_medic/util/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def str_to_int(string):
 3 |     """
 4 |     Parses a string number into an integer, optionally converting to a float
 5 |     and rounding down.
 6 | 
 7 |     Some LVM values may come with a comma instead of a dot to define decimals.
 8 |     This function normalizes a comma into a dot
 9 |     """
10 |     try:
11 |         integer = float(string.replace(',', '.'))
12 |     except AttributeError:
13 |         # this might be a integer already, so try to use it, otherwise raise
14 |         # the original exception
15 |         if isinstance(string, (int, float)):
16 |             integer = string
17 |         else:
18 |             raise
19 | 
20 |     return int(integer)
21 | 


--------------------------------------------------------------------------------
/docs/source/_templates/smarttoc.html:
--------------------------------------------------------------------------------
 1 | {#
 2 |     Sphinx sidebar template: smart table of contents.
 3 | 
 4 |     Shows a sidebar ToC that gives you a more global view of the
 5 |     documentation, and not the confusing cur/prev/next which is the
 6 |     default sidebar.
 7 | 
 8 |     The ToC will open and collapse automatically to show the part of the
 9 |     hierarchy you are in. Top-level items will always be visible.
10 | 
11 | #}
12 | <h3><a href="{{ pathto(master_doc) }}">{{ _('Table Of Contents') }}</a></h3>
13 | {{ toctree(maxdepth=-1) }}
14 | 
15 | <!-- ugly kludge to make genindex look like it's part of the toc
16 | <ul style="margin-top: -10px"><li class="toctree-l1"><a class="reference internal" href="{{ pathto('genindex') }}">Index</a></li></ul> -->
17 | 


--------------------------------------------------------------------------------
/ceph_medic/log.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | import logging
 3 | import os
 4 | 
 5 | BASE_FORMAT = "[%(name)s][%(levelname)-6s] %(message)s"
 6 | FILE_FORMAT = "[%(asctime)s]" + BASE_FORMAT
 7 | 
 8 | 
 9 | def setup(config=None):
10 |     root_logger = logging.getLogger()
11 |     log_path = config.get_safe('global', '--log-path', '.')
12 |     if not os.path.exists(log_path):
13 |         raise RuntimeError('configured ``--log-path`` value does not exist: %s' % log_path)
14 |     date = datetime.strftime(datetime.utcnow(), '%Y-%m-%d')
15 |     log_file = os.path.join(log_path, 'ceph-medic-%s.log' % date)
16 | 
17 |     root_logger.setLevel(logging.DEBUG)
18 | 
19 |     # File Logger
20 |     fh = logging.FileHandler(log_file)
21 |     fh.setLevel(logging.DEBUG)
22 |     fh.setFormatter(logging.Formatter(FILE_FORMAT))
23 | 
24 |     root_logger.addHandler(fh)
25 | 


--------------------------------------------------------------------------------
/debian/changelog:
--------------------------------------------------------------------------------
 1 | ceph-medic (1.0.8) stable; urgency=medium
 2 | 
 3 |   * New upstream release
 4 | 
 5 |  -- Ceph Release Team <ceph-maintainers@ceph.com>  Wed, 17 Jun 2020 16:15:00 -0600
 6 | 
 7 | ceph-medic (1.0.7) stable; urgency=medium
 8 | 
 9 |   * New upstream release
10 | 
11 |  -- Ceph Release Team <ceph-maintainers@ceph.com>  Tue, 24 Mar 2020 17:29:00 -0600
12 | 
13 | ceph-medic (1.0.6) stable; urgency=medium
14 | 
15 |   * New upstream release
16 | 
17 |  -- Ceph Release Team <ceph-maintainers@ceph.com>  Tue, 11 Feb 2020 16:41:07 -0600
18 | 
19 | ceph-medic (1.0.4) stable; urgency=medium
20 | 
21 |   * New upstream release
22 | 
23 |  -- Ceph Release Team <ceph-maintainers@ceph.com>  Tue, 27 Mar 2018 20:19:38 +0000
24 | 
25 | ceph-medic (0.0.1-1) unstable; urgency=medium
26 | 
27 |   * Initial release.
28 | 
29 |  -- Ken Dreyer <kdreyer@redhat.com>  Wed, 28 Jun 2017 13:20:07 -0600
30 | 


--------------------------------------------------------------------------------
/debian/control:
--------------------------------------------------------------------------------
 1 | Source: ceph-medic
 2 | Maintainer: Alfredo Deza <adeza@redhat.com>
 3 | Section: admin
 4 | Priority: optional
 5 | Build-Depends:
 6 |  debhelper (>= 7),
 7 |  dh-python,
 8 |  python,
 9 |  python-mock,
10 |  python-pytest,
11 |  python-remoto,
12 |  python-setuptools,
13 |  python-tambo
14 | X-Python-Version: >= 2.7
15 | Standards-Version: 3.9.7
16 | Homepage: http://ceph.com/
17 | Vcs-Git: git://github.com/ceph/ceph-medic.git
18 | Vcs-Browser: https://github.com/ceph/ceph-medic
19 | 
20 | Package: ceph-medic
21 | Architecture: all
22 | Depends: ${misc:Depends}, ${python:Depends}
23 | Description: determine common issues on Ceph storage clusters
24 |  ceph-medic is a very simple tool to run against a Ceph cluster to detect
25 |  common issues that might prevent correct functionality. It requires
26 |  non-interactive SSH access to accounts that can sudo without a password
27 |  prompt.
28 | 


--------------------------------------------------------------------------------
/ceph_medic/remote/util.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | 
 4 | 
 5 | def which(executable):
 6 |     """find the location of an executable"""
 7 |     locations = (
 8 |         '/usr/local/bin',
 9 |         '/bin',
10 |         '/usr/bin',
11 |         '/usr/local/sbin',
12 |         '/usr/sbin',
13 |         '/sbin',
14 |     )
15 | 
16 |     for location in locations:
17 |         executable_path = os.path.join(location, executable)
18 |         if os.path.exists(executable_path):
19 |             return executable_path
20 | 
21 | 
22 | def run(command):
23 |     """
24 |     run a command, return stdout, stderr, and exit code.
25 |     """
26 |     process = subprocess.Popen(
27 |         command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True
28 |     )
29 |     stdout = process.stdout.read().splitlines()
30 |     stderr = process.stderr.read().splitlines()
31 |     returncode = process.wait()
32 | 
33 |     return stdout, stderr, returncode
34 | 


--------------------------------------------------------------------------------
/ceph_medic/__init__.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | 
 4 | class UnloadedConfig(object):
 5 |     """
 6 |     This class is used as the default value for config.ceph so that if
 7 |     a configuration file is not successfully loaded then it will give
 8 |     a nice error message when values from the config are used.
 9 |     """
10 |     def __init__(self, error=None):
11 |         self.error = error
12 | 
13 |     def __getattr__(self, *a):
14 |         raise RuntimeError(self.error)
15 | 
16 | 
17 | config = namedtuple('config', ['verbosity', 'nodes', 'hosts_file', 'file', 'cluster_name'])
18 | config.file = UnloadedConfig("No valid ceph-medic configuration file was loaded")
19 | config.nodes = {}
20 | 
21 | metadata = {'failed_nodes': {}, 'rgws': {}, 'mgrs': {}, 'mdss': {}, 'clients': {}, 'osds': {}, 'mons': {}, 'nodes': {}, 'cluster': {}}
22 | 
23 | daemon_types = [i for i in metadata.keys() if i not in ('nodes', 'failed_nodes', 'cluster')]
24 | 
25 | __version__ = '1.0.8'
26 | 


--------------------------------------------------------------------------------
/docs/source/codes/mons.rst:
--------------------------------------------------------------------------------
 1 | Monitors
 2 | ========
 3 | 
 4 | The following checks indicate issues with monitor nodes.
 5 | 
 6 | Errors
 7 | ------
 8 | 
 9 | .. _EMON1:
10 | 
11 | EMON1
12 | _____
13 | The secret key used in the keyring differs from other nodes in the cluster.
14 | 
15 | Warnings
16 | --------
17 | 
18 | 
19 | .. _WMON1:
20 | 
21 | WMON1
22 | _____
23 | Multiple monitor directories are found on the same host.
24 | 
25 | .. _WMON2:
26 | 
27 | WMON2
28 | _____
29 | Collocated OSDs in monitor nodes were found on the same host.
30 | 
31 | .. _WMON3:
32 | 
33 | WMON3
34 | _____
35 | The recommended number of Monitor nodes is 3 for a high availability setup.
36 | 
37 | .. _WMON4:
38 | 
39 | WMON4
40 | _____
41 | It is recommended to have an odd number of monitors so that failures can be
42 | tolerated.
43 | 
44 | 
45 | .. _WMON5:
46 | 
47 | WMON5
48 | _____
49 | Having a single monitor is not recommneded, as a failure would cause data loss.
50 | For high availability, at least 3 monitors is recommended.
51 | 


--------------------------------------------------------------------------------
/ceph_medic/tests/test_terminal.py:
--------------------------------------------------------------------------------
 1 | from ceph_medic import terminal
 2 | 
 3 | 
 4 | class FakeWriter(object):
 5 | 
 6 |     def __init__(self):
 7 |         self.calls = []
 8 | 
 9 |     def write(self, string):
10 |         self.calls.append(string)
11 | 
12 |     def flush(self):
13 |         pass
14 | 
15 | 
16 | class TestWriteClearLine(object):
17 | 
18 |     def setup(self):
19 |         self.fake_writer = FakeWriter()
20 |         self.loader = terminal._Write(
21 |             _writer=self.fake_writer,
22 |             prefix='\r',
23 |             clear_line=True
24 |         )
25 | 
26 |     def test_adds_padding_for_81_chars(self):
27 |         self.loader.write('1234567890')
28 |         assert len(self.fake_writer.calls[0]) == 81
29 | 
30 |     def test_remaining_padding_is_whitespace(self):
31 |         self.loader.write('1234567890')
32 |         assert self.fake_writer.calls[0][11:] == ' ' * 70
33 | 
34 |     def test_long_line_adds_only_ten_chars(self):
35 |         self.loader.write('1'*81)
36 |         assert self.fake_writer.calls[0][82:] == ' ' * 10
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016, Red Hat, Inc.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/ceph_medic/tests/checks/test_cluster.py:
--------------------------------------------------------------------------------
 1 | from ceph_medic.checks import cluster
 2 | from ceph_medic import metadata
 3 | 
 4 | 
 5 | class TestCheckOSDs(object):
 6 | 
 7 |     def setup(self):
 8 |         metadata['cluster_name'] = 'ceph'
 9 |         metadata['osds'] = {}
10 | 
11 |     def teardown(self):
12 |         metadata['osds'] = {}
13 | 
14 |     def test_no_osds(self):
15 |         assert cluster.check_osds_exist() == ('ECLS1', 'There are no OSDs available')
16 | 
17 |     def test_osds_are_found(self):
18 |         metadata['osds'] = {'osd1': {}}
19 |         assert cluster.check_osds_exist() is None
20 | 
21 | class TestNearfull(object):
22 | 
23 |     def setup(self):
24 |         metadata['cluster'] = {}
25 | 
26 |     def teardown(self):
27 |         metadata['cluster'] = {}
28 | 
29 |     def test_key_error_is_ignored(self):
30 |         assert cluster.check_nearfull() is None
31 |     def test_osd_map_is_nearfull(self):
32 |         metadata['cluster'] = {'status': {'osdmap': {'osdmap': {'nearfull': True}}}}
33 |         assert cluster.check_nearfull() == ('ECLS2', 'Cluster is nearfull')
34 |     def test_osd_map_is_not_nearfull(self):
35 |         metadata['cluster'] = {'status': {'osdmap': {'osdmap': {'nearfull': False}}}}
36 |     


--------------------------------------------------------------------------------
/ceph-medic.spec.in:
--------------------------------------------------------------------------------
 1 | #
 2 | # spec file for package ceph-medic
 3 | #
 4 | %global commit @COMMIT@
 5 | %global shortcommit %(c=%{commit}; echo ${c:0:7})
 6 | 
 7 | Name:           ceph-medic
 8 | Version:        @VERSION@
 9 | Release:        @RELEASE@%{?dist}
10 | Summary:        Find common issues on Ceph clusters
11 | License:        MIT
12 | URL:            https://github.com/ceph/ceph-medic
13 | Source0:        %{name}-%{version}-%{shortcommit}.tar.gz
14 | BuildRequires:  python-devel
15 | BuildRequires:  python-setuptools
16 | BuildRequires:  pytest
17 | BuildRequires:  python-remoto
18 | BuildRequires:  python-mock
19 | BuildRequires:  python-tambo
20 | Requires:       python-remoto
21 | Requires:       python-tambo
22 | Requires:       python-execnet
23 | 
24 | BuildArch:      noarch
25 | 
26 | 
27 | %description
28 | An admin tool to determine common issues on Ceph storage clusters.
29 | 
30 | %prep
31 | %autosetup -p1
32 | 
33 | %build
34 | python setup.py build
35 | 
36 | %install
37 | python setup.py install -O1 --skip-build --root %{buildroot}
38 | 
39 | %check
40 | export PYTHONPATH=$(pwd)
41 | 
42 | py.test-%{python_version} -v ceph_medic/tests
43 | 
44 | %files
45 | %license LICENSE
46 | %doc README.rst
47 | %{_bindir}/ceph-medic
48 | %{python_sitelib}/*
49 | 
50 | %changelog
51 | 


--------------------------------------------------------------------------------
/ceph_medic/tests/test_main.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import ceph_medic.main
 3 | 
 4 | from mock import patch
 5 | 
 6 | 
 7 | class TestMain(object):
 8 |     def test_main(self):
 9 |         assert ceph_medic.main
10 | 
11 |     def test_invalid_ssh_config(self, capsys):
12 |         argv = ["ceph-medic", "--ssh-config", "/does/not/exist"]
13 |         with pytest.raises(SystemExit):
14 |             ceph_medic.main.Medic(argv)
15 |         out, _ = capsys.readouterr()
16 |         assert 'the given ssh config path does not exist' in out
17 | 
18 |     def test_valid_ssh_config(self, capsys):
19 |         ssh_config = '/etc/ssh/ssh_config'
20 |         argv = ["ceph-medic", "--ssh-config", ssh_config]
21 | 
22 |         def fake_exists(path):
23 |             if path == ssh_config:
24 |                 return True
25 |             if path.endswith('cephmedic.conf'):
26 |                 return False
27 |             return True
28 | 
29 |         with patch.object(ceph_medic.main.os.path, 'exists') as m_exists:
30 |             m_exists.side_effect = fake_exists
31 |             ceph_medic.main.Medic(argv)
32 |         out, _  = capsys.readouterr()
33 |         assert 'ssh config path does not exist' not in out
34 |         assert ssh_config == ceph_medic.main.ceph_medic.config.ssh_config
35 | 


--------------------------------------------------------------------------------
/docs/source/codes/osds.rst:
--------------------------------------------------------------------------------
 1 | OSDs
 2 | ====
 3 | 
 4 | The following checks indicate issues with OSD nodes.
 5 | 
 6 | Warnings
 7 | --------
 8 | 
 9 | 
10 | .. _WOSD1:
11 | 
12 | WOSD1
13 | ^^^^^
14 | Multiple ceph_fsid values found in /var/lib/ceph/osd.
15 | 
16 | This might mean you are hosting OSDs for many clusters on
17 | this node or that some OSDs are misconfigured to join the
18 | clusters you expect.
19 | 
20 | .. _WOSD2:
21 | 
22 | WOSD2
23 | ^^^^^
24 | Setting ``osd pool default min size = 1`` can lead to data loss because if the
25 | minimum is not met, Ceph will not acknowledge the write to the client.
26 | 
27 | .. _WOSD3:
28 | 
29 | WOSD3
30 | ^^^^^
31 | The default value of 3 OSD nodes for a healthy cluster must be met. If
32 | ``ceph.conf`` is configured to a different number, that setting will take
33 | precedence. The number of OSD nodes is calculated by adding
34 | ``osd_pool_default_size`` and ``osd_pool_default_min_size`` + 1. By default,
35 | this adds to 3.
36 | 
37 | .. _WOSD4:
38 | 
39 | WOSD4
40 | ^^^^^
41 | If ratios have been modified from its defaults, a warning is raised pointing to
42 | any ratio that diverges. The ratios observed with their defaults are:
43 | 
44 | * ``backfillfull_ratio``: 0.9
45 | * ``nearfull_ratio``: 0.85
46 | * ``full_ratio``: 0.95
47 | 
48 | 


--------------------------------------------------------------------------------
/ceph_medic/loader.py:
--------------------------------------------------------------------------------
 1 | """
 2 | JSON Loading utilities
 3 | """
 4 | import os
 5 | import imp
 6 | 
 7 | 
 8 | def load_config(filepath, **kw):
 9 |     '''
10 |     Creates a configuration dictionary from a file.
11 | 
12 |     :param filepath: The path to the file.
13 |     '''
14 | 
15 |     abspath = os.path.abspath(os.path.expanduser(filepath))
16 |     conf_dict = {}
17 |     if not os.path.isfile(abspath):
18 |         raise RuntimeError('`%s` is not a file.' % abspath)
19 | 
20 |     # First, make sure the code will actually compile (and has no SyntaxErrors)
21 |     with open(abspath, 'rb') as f:
22 |         compiled = compile(f.read(), abspath, 'exec')
23 | 
24 |     # Next, attempt to actually import the file as a module.
25 |     # This provides more verbose import-related error reporting than exec()
26 |     absname, _ = os.path.splitext(abspath)
27 |     basepath, module_name = absname.rsplit(os.sep, 1)
28 |     try:
29 |         imp.load_module(
30 |             module_name,
31 |             *imp.find_module(module_name, [basepath])
32 |         )
33 |     except ImportError:
34 |         pass
35 | 
36 |     # If we were able to import as a module, actually exec the compiled code
37 |     exec(compiled, globals(), conf_dict)
38 |     conf_dict['__file__'] = abspath
39 |     return conf_dict
40 | 


--------------------------------------------------------------------------------
/debian/copyright:
--------------------------------------------------------------------------------
 1 | Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0
 2 | Upstream-Name: ceph-medic
 3 | Source: https://github.com/ceph/ceph-medic
 4 | 
 5 | Files: *
 6 | Copyright: (c) 2016 by Red Hat Inc. <contact@redhat.com>
 7 | License: Expat
 8 |   Permission is hereby granted, free of charge, to any person obtaining a copy
 9 |   of this software and associated documentation files (the "Software"), to deal
10 |   in the Software without restriction, including without limitation the rights
11 |   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 |   copies of the Software, and to permit persons to whom the Software is
13 |   furnished to do so, subject to the following conditions:
14 |   .
15 |   The above copyright notice and this permission notice shall be included in
16 |   all copies or substantial portions of the Software.
17 |   .
18 |   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 |   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 |   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 |   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 |   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 |   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 |   THE SOFTWARE.
25 | 


--------------------------------------------------------------------------------
/ceph_medic/tests/test_log.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pytest
 3 | from ceph_medic.util import configuration
 4 | from ceph_medic import log
 5 | import logging
 6 | 
 7 | 
 8 | class TestLogSetup(object):
 9 | 
10 |     def teardown(self):
11 |         logger = logging.getLogger()
12 |         logger.handlers = []
13 | 
14 |     def test_barf_when_config_path_does_not_exist(self, tmpdir):
15 |         location = os.path.join(str(tmpdir), 'ceph-medic.conf')
16 |         with open(location, 'w') as _f:
17 |             _f.write("""\n[global]\n--log-path=/bogus/path""")
18 |         config = configuration.load(location)
19 |         with pytest.raises(RuntimeError) as error:
20 |             log.setup(config)
21 |         assert 'value does not exist' in str(error.value)
22 | 
23 |     def test_create_log_config_correctly(self, tmpdir):
24 |         tmp_log_path = str(tmpdir)
25 |         location = os.path.join(tmp_log_path, 'ceph-medic.conf')
26 |         with open(location, 'w') as _f:
27 |             _f.write("""\n[global]\n--log-path=%s""" % tmp_log_path)
28 |         config = configuration.load(location)
29 |         log.setup(config)
30 |         logger = logging.getLogger()
31 |         # tox has its own logger now, we need to make sure we are talking about the
32 |         # actual configured ones by ceph-medic
33 |         ceph_medic_loggers = [
34 |             i for i in logger.handlers if 'ceph-medic' in getattr(i, 'baseFilename', '')
35 |         ]
36 |         assert len(ceph_medic_loggers) == 1
37 | 


--------------------------------------------------------------------------------
/tests/functional/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = {ansible2.2,ansible2.3,ansible2.4}-{nightly_centos7}
 3 | skipsdist = True
 4 | 
 5 | [testenv]
 6 | whitelist_externals =
 7 |     vagrant
 8 |     bash
 9 |     git
10 | passenv=*
11 | setenv=
12 |   ANSIBLE_SSH_ARGS = -F {changedir}/vagrant_ssh_config
13 |   ansible2.2: ANSIBLE_STDOUT_CALLBACK = debug
14 |   ANSIBLE_RETRY_FILES_ENABLED = False
15 |   ANSIBLE_SSH_RETRIES = 5
16 |   ANSIBLE_ACTION_PLUGINS = {envdir}/tmp/ceph-ansible/plugins/actions
17 | deps=
18 |   ansible1.9: ansible==1.9.4
19 |   ansible2.1: ansible==2.1
20 |   ansible2.2: ansible==2.2.3
21 |   ansible2.3: ansible==2.3.1
22 |   ansible2.4: ansible==2.4.2
23 |   notario>=0.0.13
24 | changedir=
25 |   nightly_centos7: {toxinidir}/centos7
26 | commands=
27 |   git clone -b {env:CEPH_ANSIBLE_BRANCH:master} --single-branch https://github.com/ceph/ceph-ansible.git {envdir}/tmp/ceph-ansible
28 | 
29 |   vagrant up --no-provision {posargs:--provider=virtualbox}
30 |   bash {toxinidir}/scripts/generate_ssh_config.sh {changedir}
31 | 
32 |   # install ceph-medic on 'client0' vm and setup nodes for testing
33 |   ansible-playbook -vv -i {changedir}/hosts {toxinidir}/playbooks/setup.yml --extra-vars="ceph_medic_branch={env:CEPH_MEDIC_DEV_BRANCH:master}"
34 |   # use ceph-ansible to deploy a ceph cluster on the rest of the vms
35 |   ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/site.yml.sample
36 |   # use ceph-medic to check the cluster we just created
37 |   ansible-playbook -vv -i {changedir}/hosts {changedir}/test.yml
38 | 
39 |   vagrant destroy --force
40 | 


--------------------------------------------------------------------------------
/docs/source/changelog.rst:
--------------------------------------------------------------------------------
 1 | 1.0.8
 2 | -----
 3 | 17-Jun-2020
 4 | 
 5 | * Fix issues with podman support
 6 | 
 7 | 1.0.7
 8 | -----
 9 | 24-Mar-2020
10 | 
11 | * Fix test bugs that were breaking rpm builds
12 | 
13 | 1.0.6
14 | -----
15 | 11-Feb-2020
16 | 
17 | * Docker, podman container support
18 | * Fix broken SSH config option
19 | * Fix querying the Ceph version via admin socket on newer Ceph versions
20 | 
21 | 1.0.5
22 | -----
23 | 27-Jun-2019
24 | 
25 | * Add check for minimum OSD node count
26 | * Add check for minimum MON node count
27 | * Remove reporting of nodes that can't connect, report them separetely
28 | * Kubernetes, Openshift, container support
29 | * Fix unidentifiable user/group ID issues
30 | * Rook support
31 | * Report on failed nodes
32 | * When there are errors, set a non-zero exit status
33 | * Add separate "cluster wide" checks, which run once
34 | * Be able to retrieve socket configuration
35 | * Fix issue with trying to run ``whoami`` to test remote connections, use
36 |   ``true`` instead
37 | * Add check for missing FSID
38 | * Skip OSD validation when there isn't any ceph.conf
39 | * Skip tmp directories in /var/lib/ceph scanning to prevent blowing up
40 | * Detect collocated daemons
41 | * Allow overriding ignores in the CLI, fallback to the config file
42 | * Break documentation up to have installation away from getting started
43 | 
44 | 
45 | 1.0.4
46 | -----
47 | 20-Aug-2018
48 | 
49 | * Add checks for parity between installed and socket versions
50 | * Fix issues with loading configuration with whitespace
51 | * Add check for min_pool_size
52 | * Collect versions from running daemons
53 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
 1 | Contributing to ceph-medic
 2 | ===========================
 3 | Before any contributions, a reference ticket *must* exist.  To open a new
 4 | issue, requests can go to:
 5 | 
 6 | https://github.com/ceph/ceph-medic/issues/new
 7 | 
 8 | commits
 9 | -------
10 | Once a ticket exists, commits should be prefaced by the ticket ID. This makes
11 | it easier for maintainers to keep track of why a given line changed, mapping
12 | directly to work done on a ticket.
13 | 
14 | For tickets coming from tracker.ceph.com, we expect the following format::
15 | 
16 |     [RM-0000] this is a commit message for tracker.ceph.com
17 | 
18 | ``RM`` stands for Redmine which is the software running tracker.ceph.com.
19 | Similarly, if a ticket was created in bugzilla.redhat.com, we expect the
20 | following format::
21 | 
22 |     [BZ-0000] this is a commit message for bugzilla.redhat.com
23 | 
24 | 
25 | To automate this process, you can create a branch with the tracker identifier
26 | and id (replace "0000" with the ticket number)::
27 | 
28 |     git checkout -b RM-0000
29 | 
30 | And then use the follow prepare-commit-msg:
31 | https://gist.github.com/alfredodeza/6d62d99a95c9a7975fbe
32 | 
33 | Copy that file to ``$GITREPOSITORY/.git/hooks/prepare-commit-msg``
34 | and mark it executable.
35 | 
36 | Your commit messages should then be automatically prefixed with the branch name
37 | based off of the issue tracker.
38 | 
39 | tests and documentation
40 | -----------------------
41 | Wherever it is feasible, tests must exist and documentation must be added or
42 | improved depending on the change.
43 | 
44 | The build process not only runs tests but ensures that docs can be built from
45 | the proposed changes as well.
46 | 


--------------------------------------------------------------------------------
/tests/functional/playbooks/setup.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - hosts: all
 3 |   gather_facts: True
 4 |   tasks:
 5 |     - name: write all nodes to /etc/hosts
 6 |       sudo: yes
 7 |       blockinfile:
 8 |         dest: /etc/hosts
 9 |         block: |
10 |           {{ hostvars[item]["address"] }} {{ item }}
11 |         marker: "# {mark} ANSIBLE MANAGED BLOCK {{ item }}"
12 |       with_inventory_hostnames: all
13 | 
14 | - hosts: medic
15 |   become: yes
16 |   tasks:
17 | 
18 |     - name: fetch shaman ceph-medic repo
19 |       get_url:
20 |         url: https://shaman.ceph.com/api/repos/ceph-medic/{{ ceph_medic_branch }}/latest/centos/7/repo
21 |         dest: /etc/yum.repos.d/ceph-medic.repo
22 | 
23 |     - name: print contents of /etc/yum.repos.d/ceph-medic.repo
24 |       command: cat /etc/yum.repos.d/ceph-medic.repo
25 | 
26 |     - name: install epel-release
27 |       yum:
28 |         name: epel-release
29 |         state: present
30 | 
31 |     - name: install python-tambo
32 |       yum:
33 |         name: python-tambo
34 |         state: present
35 |         enablerepo: epel-testing
36 | 
37 |     - name: install ceph-medic
38 |       yum:
39 |         name: ceph-medic
40 |         state: present
41 | 
42 |     - name: test ceph-medic install
43 |       become: no
44 |       command: ceph-medic --help
45 | 
46 |     - name: copy vagrant insecure private ssh key
47 |       copy:
48 |         src: ~/.vagrant.d/insecure_private_key
49 |         dest: /home/vagrant/.ssh/id_dsa
50 |         mode: 0600
51 |         owner: vagrant
52 |         group: vagrant
53 | 
54 |     - name: turn off StrictHostKeyChecking
55 |       blockinfile:
56 |         dest: /home/vagrant/.ssh/config
57 |         create: yes
58 |         mode: 0400
59 |         owner: vagrant
60 |         group: vagrant
61 |         block: |
62 |           Host *
63 |               StrictHostKeyChecking no
64 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for constructing RPMs.
 2 | # Try "make" (for SRPMS) or "make rpm"
 3 | 
 4 | NAME = ceph-medic
 5 | 
 6 | # Set the RPM package NVR from "git describe".
 7 | # Examples:
 8 | #
 9 | #  A "git describe" value of "v2.2.0rc1" would create an NVR
10 | #  "ceph-medic-2.2.0-0.rc1.1.el7"
11 | #
12 | #  A "git describe" value of "v2.2.0rc1-1-gc465f85" would create an NVR
13 | #  "ceph-medic-2.2.0-0.rc1.1.gc465f85.el7"
14 | #
15 | #  A "git describe" value of "v2.2.0" creates an NVR
16 | #  "ceph-medic-2.2.0-1.el7"
17 | 
18 | VERSION := $(shell git describe --tags --abbrev=0 --match 'v*' | sed 's/^v//')
19 | COMMIT := $(shell git rev-parse HEAD)
20 | SHORTCOMMIT := $(shell echo $(COMMIT) | cut -c1-7)
21 | RELEASE := $(shell git describe --tags --match 'v*' \
22 |              | sed 's/^v//' \
23 |              | sed 's/^[^-]*-//' \
24 |              | sed 's/-/./')
25 | ifeq ($(VERSION),$(RELEASE))
26 |   RELEASE = 1
27 | endif
28 | ifneq (,$(findstring rc,$(VERSION)))
29 |     RC := $(shell echo $(VERSION) | sed 's/.*rc/rc/')
30 |     RELEASE := 0.$(RC).$(RELEASE)
31 |     VERSION := $(subst $(RC),,$(VERSION))
32 | endif
33 | NVR := $(NAME)-$(VERSION)-$(RELEASE).el7
34 | 
35 | all: srpm
36 | 
37 | # Testing only
38 | echo:
39 | 	echo COMMIT $(COMMIT)
40 | 	echo VERSION $(VERSION)
41 | 	echo RELEASE $(RELEASE)
42 | 	echo NVR $(NVR)
43 | 
44 | clean:
45 | 	rm -rf dist/
46 | 	rm -rf ceph-medic-$(VERSION)-$(SHORTCOMMIT).tar.gz
47 | 	rm -rf $(NVR).src.rpm
48 | 
49 | dist:
50 | 	git archive --format=tar.gz --prefix=ceph-medic-$(VERSION)/ HEAD > ceph-medic-$(VERSION)-$(SHORTCOMMIT).tar.gz
51 | 
52 | spec:
53 | 	sed ceph-medic.spec.in \
54 | 	  -e 's/@COMMIT@/$(COMMIT)/' \
55 | 	  -e 's/@VERSION@/$(VERSION)/' \
56 | 	  -e 's/@RELEASE@/$(RELEASE)/' \
57 | 	  > ceph-medic.spec
58 | 
59 | srpm: dist spec
60 | 	fedpkg -v --dist epel7 srpm
61 | 
62 | rpm: dist srpm
63 | 	mock -r epel-7-x86_64 rebuild $(NVR).src.rpm \
64 | 	  --resultdir=. \
65 | 	  --define "dist .el7"
66 | 
67 | .PHONY: dist rpm srpm
68 | 


--------------------------------------------------------------------------------
/docs/source/codes/common.rst:
--------------------------------------------------------------------------------
 1 | Common
 2 | ======
 3 | The following checks indiciate general issues with the cluster that are not specific to any daemon type.
 4 | 
 5 | Warnings
 6 | --------
 7 | 
 8 | .. _WCOM1:
 9 | 
10 | WCOM1
11 | ^^^^^
12 | A running OSD and MON daemon were detected in the same node. Colocating OSDs and MONs is highly discouraged.
13 | 
14 | 
15 | Errors
16 | ------
17 | 
18 | .. _ECOM1:
19 | 
20 | ECOM1
21 | ^^^^^
22 | A ceph configuration file cannot be found at ``/etc/ceph/$cluster-name.conf``.
23 | 
24 | .. _ECOM2:
25 | 
26 | ECOM2
27 | ^^^^^
28 | The ``ceph`` executable was not found.
29 | 
30 | .. _ECOM3:
31 | 
32 | ECOM3
33 | ^^^^^
34 | The ``/var/lib/ceph`` directory does not exist or could not be collected.
35 | 
36 | .. _ECOM4:
37 | 
38 | ECOM4
39 | ^^^^^
40 | The ``/var/lib/ceph`` directory was not owned by the ``ceph`` user.
41 | 
42 | .. _ECOM5:
43 | 
44 | ECOM5
45 | ^^^^^
46 | The ``fsid`` defined in the configuration differs from other nodes in the cluster. The ``fsid`` must be
47 | the same for all nodes in the cluster.
48 | 
49 | .. _ECOM6:
50 | 
51 | ECOM6
52 | ^^^^^
53 | The installed version of ``ceph`` is not the same for all nodes in the cluster. The ``ceph`` version should be
54 | the same for all nodes in the cluster.
55 | 
56 | .. _ECOM7:
57 | 
58 | ECOM7
59 | ^^^^^
60 | The installed version of ``ceph`` is not the same as the one of a running ceph daemon. The installed ``ceph`` version should be the same as all running ceph daemons. If they do not match, the daemons most likely have not been restarted correctly after a version change.
61 | 
62 | .. _ECOM8:
63 | 
64 | ECOM8
65 | ^^^^^
66 | The ``fsid`` field must exist in the configuration for each node.
67 | 
68 | 
69 | .. _ECOM9:
70 | 
71 | ECOM9
72 | ^^^^^
73 | A cluster should not have running daemons with a cluster ``fsid`` that is different from the rest of the daemons in a cluster. This potentially means that different cluster identifiers are being used, and that should not be the case.
74 | 
75 | 
76 | .. _ECOM10:
77 | 
78 | ECOM10
79 | ^^^^^^
80 | Only a single monitor daemon shuld be running per host, having more than one monitor running on the same host reduces a cluster's resilience if the node goes down.
81 | 


--------------------------------------------------------------------------------
/ceph_medic/decorators.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from ceph_medic import terminal
 4 | from functools import wraps
 5 | 
 6 | 
 7 | def catches(catch=None, handler=None, exit=True):
 8 |     """
 9 |     Very simple decorator that tries any of the exception(s) passed in as
10 |     a single exception class or tuple (containing multiple ones) returning the
11 |     exception message and optionally handling the problem if it rises with the
12 |     handler if it is provided.
13 | 
14 |     So instead of douing something like this::
15 | 
16 |         def bar():
17 |             try:
18 |                 some_call()
19 |                 print "Success!"
20 |             except TypeError, exc:
21 |                 print "Error while handling some call: %s" % exc
22 |                 sys.exit(1)
23 | 
24 |     You would need to decorate it like this to have the same effect::
25 | 
26 |         @catches(TypeError)
27 |         def bar():
28 |             some_call()
29 |             print "Success!"
30 | 
31 |     If multiple exceptions need to be catched they need to be provided as a
32 |     tuple::
33 | 
34 |         @catches((TypeError, AttributeError))
35 |         def bar():
36 |             some_call()
37 |             print "Success!"
38 |     """
39 |     catch = catch or Exception
40 | 
41 |     def decorate(f):
42 | 
43 |         @wraps(f)
44 |         def newfunc(*a, **kw):
45 |             try:
46 |                 return f(*a, **kw)
47 |             except catch as e:
48 |                 if os.environ.get('CEPH_MEDIC_DEBUG'):
49 |                     raise
50 |                 if handler:
51 |                     return handler(e)
52 |                 else:
53 |                     sys.stderr.write(make_exception_message(e))
54 |                     if exit:
55 |                         sys.exit(1)
56 |         return newfunc
57 | 
58 |     return decorate
59 | 
60 | #
61 | # Decorator helpers
62 | #
63 | 
64 | 
65 | def make_exception_message(exc):
66 |     """
67 |     An exception is passed in and this function
68 |     returns the proper string depending on the result
69 |     so it is readable enough.
70 |     """
71 |     if str(exc):
72 |         return '%s %s: %s\n' % (terminal.red_arrow, exc.__class__.__name__, exc)
73 |     else:
74 |         return '%s %s\n' % (terminal.red_arrow, exc.__class__.__name__)
75 | 


--------------------------------------------------------------------------------
/ceph_medic/tests/remote/test_commands.py:
--------------------------------------------------------------------------------
 1 | from mock import Mock
 2 | from ceph_medic.remote import commands
 3 | 
 4 | 
 5 | class TestCephSocketVersion(object):
 6 | 
 7 |     def test_gets_socket_version(self, monkeypatch):
 8 |         def mock_check(conn, cmd):
 9 |             return (['{"version":"12.2.0"}'], [], 0)
10 |         monkeypatch.setattr(commands, 'check', mock_check)
11 |         result = commands.ceph_socket_version(Mock(), '/var/run/ceph/osd.asok')
12 |         assert 'version' in result
13 | 
14 |     def test_handles_invalid_json(self, monkeypatch):
15 |         def mock_check(conn, cmd):
16 |             return (['version=12.2.0'], [], 0)
17 |         monkeypatch.setattr(commands, 'check', mock_check)
18 |         result = commands.ceph_socket_version(Mock(), '/var/run/ceph/osd.asok')
19 |         assert result == {}
20 | 
21 |     def test_handles_non_zero_code(self, monkeypatch):
22 |         def mock_check(conn, cmd):
23 |             return (['version=12.2.0'], [], 1)
24 |         monkeypatch.setattr(commands, 'check', mock_check)
25 |         result = commands.ceph_socket_version(Mock(), '/var/run/ceph/osd.asok')
26 |         assert result == {}
27 | 
28 | 
29 | class TestCephVersion(object):
30 | 
31 |     def test_gets_ceph_version(self, stub_check):
32 |         stub_check(
33 |             (['ceph version 14.1.1 (nautilus)', ''], [], 0),
34 |             commands, 'check')
35 |         result = commands.ceph_version(None)
36 |         assert result == 'ceph version 14.1.1 (nautilus)'
37 | 
38 |     def test_handles_non_zero_status(self, stub_check, conn):
39 |         stub_check(
40 |             (['error mr. robinson', ''], [], 1),
41 |             commands, 'check')
42 |         result = commands.ceph_version(conn)
43 |         assert result is None
44 | 
45 | 
46 | class TestDaemonSocketConfig(object):
47 | 
48 |     def test_loadable_json(self, stub_check, conn):
49 |         stub_check(
50 |             (['{"config": true}'], [], 0),
51 |             commands, 'check')
52 |         result = commands.daemon_socket_config(conn, '/')
53 |         assert result == {'config': True}
54 | 
55 |     def test_unloadable_json(self, stub_check, conn):
56 |         stub_check(
57 |             (['{config: []}'], [], 0),
58 |             commands, 'check')
59 |         result = commands.daemon_socket_config(conn, '/')
60 |         assert result == {}
61 | 
62 | 


--------------------------------------------------------------------------------
/tests/functional/centos7/vagrant_variables.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | # DEPLOY CONTAINERIZED DAEMONS
 4 | docker: false
 5 | 
 6 | # DEFINE THE NUMBER OF VMS TO RUN
 7 | mon_vms: 1
 8 | osd_vms: 1
 9 | mds_vms: 0
10 | rgw_vms: 0
11 | nfs_vms: 0
12 | rbd_mirror_vms: 0
13 | client_vms: 1
14 | iscsi_gw_vms: 0
15 | 
16 | # SUBNETS TO USE FOR THE VMS
17 | public_subnet: 192.168.3
18 | cluster_subnet: 192.168.4
19 | 
20 | # MEMORY
21 | # set 1024 for CentOS
22 | memory: 512
23 | 
24 | # Ethernet interface name
25 | # use eth1 for libvirt and ubuntu precise, enp0s8 for CentOS and ubuntu xenial
26 | eth: 'eth1'
27 | 
28 | # VAGRANT BOX
29 | # Ceph boxes are *strongly* suggested. They are under better control and will
30 | # not get updated frequently unless required for build systems. These are (for
31 | # now):
32 | #
33 | # * ceph/ubuntu-xenial
34 | #
35 | # Ubuntu: ceph/ubuntu-xenial bento/ubuntu-16.04 or ubuntu/trusty64 or ubuntu/wily64
36 | # CentOS: bento/centos-7.1 or puppetlabs/centos-7.0-64-puppet
37 | # libvirt CentOS: centos/7
38 | # parallels Ubuntu: parallels/ubuntu-14.04
39 | # Debian: deb/jessie-amd64 - be careful the storage controller is named 'SATA Controller'
40 | # For more boxes have a look at:
41 | #   - https://atlas.hashicorp.com/boxes/search?utf8=✓&sort=&provider=virtualbox&q=
42 | #   - https://download.gluster.org/pub/gluster/purpleidea/vagrant/
43 | vagrant_box: centos/7
44 | client_vagrant_box: centos/7
45 | #ssh_private_key_path: "~/.ssh/id_rsa"
46 | # The sync directory changes based on vagrant box
47 | # Set to /home/vagrant/sync for Centos/7, /home/{ user }/vagrant for openstack and defaults to /vagrant
48 | #vagrant_sync_dir: /home/vagrant/sync
49 | #vagrant_sync_dir: /
50 | # Disables synced folder creation. Not needed for testing, will skip mounting
51 | # the vagrant directory on the remote box regardless of the provider.
52 | vagrant_disable_synced_folder: true
53 | # VAGRANT URL
54 | # This is a URL to download an image from an alternate location.  vagrant_box
55 | # above should be set to the filename of the image.
56 | # Fedora virtualbox: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
57 | # Fedora libvirt: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-libvirt.box
58 | # vagrant_box_url: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
59 | 


--------------------------------------------------------------------------------
/ceph_medic/util/mon.py:
--------------------------------------------------------------------------------
 1 | import remoto
 2 | import json
 3 | import ceph_medic
 4 | from ceph_medic import terminal
 5 | 
 6 | 
 7 | def get_mon_report(conn):
 8 |     command = [
 9 |         'ceph',
10 |         '--cluster=%s' % ceph_medic.metadata['cluster_name'],
11 |         'report'
12 |     ]
13 |     out, err, code = remoto.process.check(
14 |         conn,
15 |         command
16 |     )
17 | 
18 |     if code > 0:
19 |         terminal.error('failed to connect to the cluster to fetch a report from the monitor')
20 |         terminal.error('command: %s' % ' '.join(command))
21 |         for line in err:
22 |             terminal.error(line)
23 |         raise RuntimeError()
24 | 
25 |     try:
26 |         return json.loads(b''.join(out).decode('utf-8'))
27 |     except ValueError:
28 |         return {}
29 | 
30 | 
31 | def get_cluster_nodes(conn):
32 |     """
33 |     Ask a monitor (with a pre-made connection) about all the nodes in
34 |     a cluster. This will be able to get us all known MONs and OSDs.
35 | 
36 |     It returns a dictionary with a mapping that looks like::
37 | 
38 |         {
39 |             'mons': [
40 |                 {
41 |                     'host': 'node1',
42 |                     'public_ip': '192.168.1.100',
43 |                 },
44 |             ],
45 |             'osds': [
46 |                 {
47 |                     'host': 'node2',
48 |                     'public_ip': '192.168.1.101',
49 |                 },
50 |                 {
51 |                     'host': 'node3',
52 |                     'public_ip': '192.168.1.102',
53 |                 },
54 |             ]
55 |         }
56 | 
57 |     """
58 |     report = get_mon_report(conn)
59 |     nodes = {'mons': [], 'osds': []}
60 |     try:
61 |         # XXX Is this really needed? in what case we wouldn't have a monmap
62 |         # with mons?
63 |         mons = report['monmap']['mons']
64 |     except KeyError:
65 |         raise SystemExit(report)
66 |     for i in mons:
67 |         nodes['mons'].append({
68 |             'host': i['name'],
69 |             'public_ip': _extract_ip_address(i['public_addr'])
70 |         })
71 | 
72 |     osds = report['osd_metadata']
73 |     for i in osds:
74 |         nodes['osds'].append({
75 |             'host': i['hostname'],
76 |             'public_ip': _extract_ip_address(i['front_addr'])
77 |         })
78 | 
79 |     return nodes
80 | 
81 | 
82 | # XXX does not support IPV6
83 | 
84 | def _extract_ip_address(string):
85 |     """
86 |     Addresses from Ceph reports can come up with subnets and ports using ':'
87 |     and '/' to identify them properly. Parse those types of strings to extract
88 |     just the IP.
89 |     """
90 |     port_removed = string.split(':')[0]
91 |     return port_removed.split('/')[0]
92 | 


--------------------------------------------------------------------------------
/ceph_medic/checks/osds.py:
--------------------------------------------------------------------------------
 1 | from ceph_medic import metadata
 2 | from ceph_medic.util import configuration
 3 | 
 4 | 
 5 | #
 6 | # Utilities
 7 | #
 8 | 
 9 | def get_osd_ceph_fsids(data):
10 |     fsids = []
11 |     for file_path in data['paths']['/var/lib/ceph']['files'].keys():
12 |         if "ceph_fsid" in file_path:
13 |             fsids.append(data['paths']['/var/lib/ceph']['files'][file_path]['contents'].strip())
14 |     return set(fsids)
15 | 
16 | 
17 | # XXX move out to a central utility module for other checks
18 | def get_ceph_conf(data):
19 |     path = '/etc/ceph/%s.conf' % metadata['cluster_name']
20 |     try:
21 |         conf_file = data['paths']['/etc/ceph']['files'][path]
22 |     except KeyError:
23 |         return None
24 |     return configuration.load_string(conf_file['contents'])
25 | 
26 | 
27 | def check_osd_ceph_fsid(host, data):
28 |     code = 'WOSD1'
29 |     msg = "Multiple ceph_fsid values found: %s"
30 | 
31 |     current_fsids = get_osd_ceph_fsids(data)
32 | 
33 |     if len(current_fsids) > 1:
34 |         return code, msg % ", ".join(current_fsids)
35 | 
36 | 
37 | def check_min_pool_size(host, data):
38 |     code = 'WOSD2'
39 |     msg = 'osd default pool min_size is set to 1, can potentially lose data'
40 |     conf = get_ceph_conf(data)
41 |     if not conf:  # no ceph.conf found!
42 |         return
43 |     size = conf.get_safe('global', 'osd_pool_default_min_size', '0')
44 |     if int(size) == 1:
45 |         return code, msg
46 | 
47 | 
48 | def check_min_osd_nodes(host, data):
49 |     code = 'WOSD3'
50 |     msg = 'OSD nodes might not be enough for a healthy cluster (%s needed, %s found)'
51 |     conf = get_ceph_conf(data)
52 |     if not conf:  # no ceph.conf found!
53 |         return
54 |     default_size = int(conf.get_safe('global', 'osd_pool_default_size', '3'))
55 |     min_size = int(conf.get_safe('global', 'osd_pool_default_min_size', '0'))
56 |     magical_number = default_size + min_size + 1
57 |     osd_nodes = len(metadata['osds'])
58 |     if magical_number > osd_nodes:
59 |         return code, msg % (magical_number, osd_nodes)
60 | 
61 | 
62 | def check_reasonable_ratios(host, data):
63 |     code = 'WOSD4'
64 |     msg = 'Ratios have been modified to unreasonable values: %s'
65 |     unreasonable_ratios = []
66 |     reasonable_ratios = {
67 |       "backfillfull_ratio": 0.9,
68 |       "nearfull_ratio": 0.85,
69 |       "full_ratio": 0.95
70 |     }
71 | 
72 |     dump = data['ceph']['osd'].get('dump', {})
73 |     for name, value in reasonable_ratios.items():
74 |         ratio = dump.get(name)
75 |         if not ratio:
76 |             continue
77 |         if ratio != reasonable_ratios[name]:
78 |             unreasonable_ratios.append(name)
79 |     if unreasonable_ratios:
80 |         msg = msg % ', '.join(sorted(unreasonable_ratios))
81 |         return code, msg
82 | 


--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | ``ceph-medic`` supports a few different installation methods, including system
 5 | packages for RPM distros via EPEL. For PyPI, it can be installed with::
 6 | 
 7 |     pip install ceph-medic
 8 | 
 9 | 
10 | Official Upstream Repos
11 | -----------------------
12 | 
13 | Download official releases of ``ceph-medic`` at https://download.ceph.com/ceph-medic
14 | 
15 | Currently, only RPM repos built for CentOS 7 are supported.
16 | 
17 | ``ceph-medic`` has dependencies on packages found in EPEL, so EPEL will need to be enabled.
18 | 
19 | Follow these steps to install a CentOS 7 repo from download.ceph.com:
20 | 
21 | - Install the latest RPM repo from download.ceph.com::
22 | 
23 |       wget http://download.ceph.com/ceph-medic/latest/rpm/el7/ceph-medic.repo -O /etc/yum.repos.d/ceph-medic.repo
24 | 
25 | - Install ``epel-release``::
26 | 
27 | 
28 |       yum install epel-release
29 | 
30 | - Install the GPG key for ``ceph-medic``::
31 | 
32 |       wget https://download.ceph.com/keys/release.asc
33 |       rpm --import release.asc
34 | 
35 | - Install ``ceph-medic``::
36 | 
37 |       yum install ceph-medic
38 | 
39 | - Verify your install::
40 | 
41 |       ceph-medic --help
42 | 
43 | Shaman Repos
44 | ------------
45 | 
46 | Every branch pushed to ceph-medic.git gets a RPM repo created and stored at
47 | shaman.ceph.com. Currently, only RPM repos built for CentOS 7 are supported.
48 | 
49 | Browse https://shaman.ceph.com/repos/ceph-medic to find the available repos.
50 | 
51 | .. Note:: 
52 |    Shaman repos are available for 2 weeks before they are automatically deleted.
53 |    However, there should always be a repo available for the master branch of ``ceph-medic``.
54 | 
55 | ``ceph-medic`` has dependencies on packages found in EPEL, so EPEL will need to be enabled.
56 | 
57 | Follow these steps to install a CentOS 7 repo from shaman.ceph.com:
58 | 
59 | - Install the latest master shaman repo::
60 | 
61 |       wget https://shaman.ceph.com/api/repos/ceph-medic/master/latest/centos/7/repo -O /etc/yum.repos.d/ceph-medic.repo
62 | 
63 | - Install ``epel-release``::
64 | 
65 |       yum install epel-release
66 | 
67 | - Install ``ceph-medic``::
68 | 
69 |       yum install ceph-medic
70 | 
71 | - Verify your install::
72 | 
73 |       ceph-medic --help
74 | 
75 | GitHub
76 | ------
77 | You can install directly from the source on GitHub by following these steps:
78 | 
79 | - Clone the repository::
80 | 
81 |       git clone https://github.com/ceph/ceph-medic.git
82 | 
83 | 
84 | - Change to the ``ceph-medic`` directory::
85 | 
86 |       cd ceph-medic
87 | 
88 | - Create and activate a Python Virtual Environment::
89 | 
90 |       virtualenv venv
91 |       source venv/bin/activate
92 | 
93 | - Install ceph-medic into the Virtual Environment::
94 | 
95 |       python setup.py install
96 | 
97 | ``ceph-medic`` should now be installed and available in the created virtualenv.  
98 | Check your installation by running: ``ceph-medic --help``
99 | 


--------------------------------------------------------------------------------
/ceph_medic/check.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import ceph_medic
 3 | import logging
 4 | from ceph_medic import runner, collector
 5 | from tambo import Transport
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | def as_list(string):
11 |     if not string:
12 |         return []
13 |     string = string.strip(',')
14 | 
15 |     # split on commas
16 |     string = string.split(',')
17 | 
18 |     # strip spaces
19 |     return [x.strip() for x in string]
20 | 
21 | 
22 | class Check(object):
23 |     help = "Run checks for all the configured nodes in a cluster or hosts file"
24 |     long_help = """
25 | check: Run for all the configured nodes in the configuration
26 | 
27 | Options:
28 |   --ignore              Comma-separated list of errors and warnings to ignore.
29 | 
30 | 
31 | Loaded Config Path: {config_path}
32 | 
33 | Configured Nodes:
34 | {configured_nodes}
35 |     """
36 | 
37 |     def __init__(self, argv=None, parse=True):
38 |         self.argv = argv or sys.argv
39 | 
40 |     @property
41 |     def subcommand_args(self):
42 |         # find where `check` is
43 |         index = self.argv.index('check')
44 |         # slice the args
45 |         return self.argv[index:]
46 | 
47 |     def _help(self):
48 |         node_section = []
49 |         for daemon, node in ceph_medic.config.nodes.items():
50 |             header = "\n* %s:\n" % daemon
51 |             body = '\n'.join(["    %s" % n for n in ceph_medic.config.nodes[daemon]])
52 |             node_section.append(header+body+'\n')
53 |         return self.long_help.format(
54 |             configured_nodes=''.join(node_section),
55 |             config_path=ceph_medic.config.config_path
56 |         )
57 | 
58 |     def main(self):
59 |         options = ['--ignore']
60 |         config_ignores = ceph_medic.config.file.get_list('check', '--ignore')
61 |         parser = Transport(
62 |             self.argv, options=options,
63 |             check_version=False
64 |         )
65 |         parser.catch_help = self._help()
66 |         parser.parse_args()
67 |         ignored_codes = as_list(parser.get('--ignore', ''))
68 |         # fallback to the configuration if nothing is defined in the CLI
69 |         if not ignored_codes:
70 |             ignored_codes = config_ignores
71 | 
72 |         if len(self.argv) < 1:
73 |             return parser.print_help()
74 | 
75 |         # populate the nodes metadata with the configured nodes
76 |         for daemon in ceph_medic.config.nodes.keys():
77 |             ceph_medic.metadata['nodes'][daemon] = []
78 |         for daemon, nodes in ceph_medic.config.nodes.items():
79 |             for node in nodes:
80 |                 node_metadata = {'host': node['host']}
81 |                 if 'container' in node:
82 |                     node_metadata['container'] = node['container']
83 |                 ceph_medic.metadata['nodes'][daemon].append(node_metadata)
84 | 
85 |         collector.collect()
86 |         test = runner.Runner()
87 |         test.ignore = ignored_codes
88 |         results = test.run()
89 |         runner.report(results)
90 |         #XXX might want to make this configurable to not bark on warnings for
91 |         # example, setting forcefully for now, but the results object doesn't
92 |         # make a distinction between error and warning (!)
93 |         if results.errors or results.warnings:
94 |             sys.exit(1)
95 | 


--------------------------------------------------------------------------------
/docs/source/facts.rst:
--------------------------------------------------------------------------------
 1 | Cluster node facts
 2 | ==================
 3 | Fact collection happens per node and creates a mapping of hosts and data
 4 | gathered. Each daemon 'type' is the primary key::
 5 | 
 6 |     ...
 7 |     'osd': {
 8 |         'node1': {...},
 9 |         'node2': {...},
10 |     }
11 |     'mon': {
12 |         'node3': {...},
13 |     }
14 | 
15 | 
16 | There are other top-level keys that make it easier to deal with fact metadata, for example a full list of all hosts discovered::
17 | 
18 |     'hosts': ['node1', 'node2', 'node3'],
19 |     'osds': ['node1', 'node2'],
20 |     'mons': ['node3']
21 | 
22 | 
23 | Each host has distinct metadata that gets collected. If any errors are
24 | detected, the ``exception`` key is set populated with all information pertaining
25 | to the error generated when trying to execute the call.  For example, a failed call to ``stat`` on a path might be::
26 | 
27 |     'osd': {
28 |         'node1': {
29 |             'paths': {
30 |                 '/var/lib/osd': {
31 |                     'exception': {
32 |                         'traceback': "Traceback (most recent call last):\n File "remote.py", line 3, in <module>\n os.stat('/var/lib/osd')\n OSError: [Errno 2] No such file or directory: '/var/lib/osd'\n",
33 |                         'name': 'OSError',
34 |                         'repr': "[Errno 2] No such file or directory: '/root'"
35 |                         'attributes': {
36 |                             args : "(2, 'No such file or directory')",
37 |                             errno : 2,
38 |                             filename :  '/var/lib/ceph' ,
39 |                             message : '',
40 |                             strerror :  'No such file or directory'
41 |                         }
42 |                 }
43 |             }
44 |         }
45 |     }
46 | 
47 | Note that objects will not get pickled, so data structures and objects will be
48 | sent back as plain text.
49 | 
50 | Path contents are optionally enabled by the fact engine and will contain the
51 | raw representation of the full file contents. Here is an example of what
52 | a ``ceph.conf`` file would be in a monitor node::
53 | 
54 | 
55 |      'mon': {
56 |          'node3': {
57 |              'paths': {
58 |                  '/etc/ceph/': {
59 |                     'dirs': [],
60 |                     'files': {
61 |                         '/etc/ceph/ceph.conf': {
62 |                             'contents': "[global]\nfsid = f05294bd-6e9d-4883-9819-c2800d4d7962\nmon_initial_members = node3\nmon_host = 192.168.111.102\nauth_cluster_required = cephx\nauth_service_required = cephx\nauth_client_required = cephx\n",
63 |                             'owner': 'ceph',
64 |                             'group': 'ceph',
65 |                             'n_fields' : 19 ,
66 |                             'n_sequence_fields' : 10 ,
67 |                             'n_unnamed_fields' : 3 ,
68 |                             'st_atime' : 1490714187.0 ,
69 |                             'st_birthtime' : 1463607160.0 ,
70 |                             'st_blksize' : 4096 ,
71 |                             'st_blocks' : 0 ,
72 |                             'st_ctime' : 1490295294.0 ,
73 |                             'st_dev' : 16777220 ,
74 |                             'st_flags' : 1048576 ,
75 |                             'st_gen' : 0 ,
76 |                             'st_gid' : 0 ,
77 |                             'st_ino' : 62858421 ,
78 |                             'st_mode' : 16877 ,
79 |                             'st_mtime' : 1490295294.0 ,
80 |                             'st_nlink' : 26 ,
81 |                             'st_rdev' : 0 ,
82 |                             'st_size' : 884 ,
83 |                             'st_uid' : 0 ,
84 |                             'exception': {},
85 |                          }
86 |                      }
87 |                  }
88 |              }
89 |          }
90 |      }
91 | 


--------------------------------------------------------------------------------
/ceph_medic/generate.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import sys
  3 | import ceph_medic
  4 | from ceph_medic.connection import get_connection
  5 | import remoto
  6 | import json
  7 | from tambo import Transport
  8 | 
  9 | 
 10 | def generate_inventory(inventory, to_stdout=False, tmp_dir=None):
 11 |     """
 12 |     Generates a host file to use with an ansible-playbook call.
 13 | 
 14 |     The first argument is a dictionary mapping that contains the group name as
 15 |     the key and a list of hostnames as values
 16 | 
 17 |     For example:
 18 | 
 19 |         {'mons': ['mon.host'], 'osds': ['osd1.host', 'osd1.host']}
 20 |     """
 21 |     result = []
 22 |     for section, hosts in inventory.items():
 23 |         group_name = section
 24 |         result.append("[{0}]".format(group_name))
 25 |         if not isinstance(hosts, list):
 26 |             hosts = [hosts]
 27 |         result.extend(hosts)
 28 |     result_str = "\n".join(result) + "\n"
 29 |     # if not None the NamedTemporaryFile will be created in the given directory
 30 |     if to_stdout:
 31 |         print(result_str)
 32 |         return
 33 |     with open('hosts_file', 'w') as hosts_file:
 34 |         hosts_file.write(result_str)
 35 | 
 36 | 
 37 | def get_mon_report(conn):
 38 |     out, err, code = remoto.process.check(
 39 |         conn,
 40 |         [
 41 |             'ceph',
 42 |             'report'
 43 |         ],
 44 |     )
 45 | 
 46 |     if code > 0:
 47 |         for line in err:
 48 |             print(line)
 49 | 
 50 |     try:
 51 |         return json.loads(b''.join(out).decode('utf-8'))
 52 |     except ValueError:
 53 |         return {}
 54 | 
 55 | 
 56 | class Generate(object):
 57 |     help = "Create a hosts file (Ansible compatible) from the information on a running Ceph cluster"
 58 |     long_help = """
 59 | Create a hosts file (Ansible compatible) from the information on a running Ceph
 60 | cluster.
 61 | 
 62 | Usage:
 63 | 
 64 |     ceph-medic generate [/path/to/ceph.conf]
 65 |     ceph-medic generate [MONITOR HOST]
 66 | 
 67 | Loaded Config Path: {config_path}
 68 | 
 69 |     """
 70 | 
 71 |     def __init__(self, argv=None, parse=True):
 72 |         self.argv = argv or sys.argv
 73 | 
 74 |     def _help(self):
 75 |         skip_internal = ['__file__', 'config_path', 'verbosity']
 76 |         node_section = []
 77 |         for daemon, node in ceph_medic.config['nodes'].items():
 78 |             if daemon in skip_internal or not node:
 79 |                 continue
 80 |             header = "\n* %s:\n" % daemon
 81 |             body = '\n'.join(["    %s" % n for n in ceph_medic.config['nodes'][daemon].keys()])
 82 |             node_section.append(header+body+'\n')
 83 |         return self.long_help.format(
 84 |             config_path=ceph_medic.config['config_path']
 85 |         )
 86 | 
 87 |     def main(self):
 88 |         options = ['--stdout']
 89 |         parser = Transport(
 90 |             self.argv, options=options,
 91 |             check_version=False
 92 |         )
 93 |         parser.catch_help = self._help()
 94 | 
 95 |         parser.parse_args()
 96 | 
 97 |         if len(self.argv) == 1:
 98 |             raise SystemExit("A monitor hostname or a ceph.conf file is required as an argument")
 99 | 
100 |         node = self.argv[-1]
101 |         inventory = {}
102 | 
103 |         with get_connection(node) as conn:
104 |             report = get_mon_report(conn)
105 |             try:
106 |                 mons = report['monmap']['mons']
107 |             except KeyError:
108 |                 raise SystemExit(report)
109 |             inventory['mons'] = [i['name'] for i in mons]
110 |             osds = report['osd_metadata']
111 |             inventory['osds'] = [i['hostname'] for i in osds]
112 | 
113 |         if not inventory:
114 |             raise SystemExit('no hosts where found from remote monitor node: %s' % node)
115 | 
116 |         generate_inventory(inventory, to_stdout=parser.get('--stdout'))
117 |         conn.exit()
118 |         return
119 | 


--------------------------------------------------------------------------------
/ceph_medic/tests/util/test_configuration.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pytest
  3 | from textwrap import dedent
  4 | from ceph_medic.util import configuration
  5 | 
  6 | 
  7 | def make_hosts_file(filename, contents=None):
  8 |     contents = contents or "[mons]\nmon0\n[osds]\nosd0\n"
  9 |     with open(filename, 'w') as f:
 10 |         f.write(contents)
 11 | 
 12 | 
 13 | class TestFlatInventory(object):
 14 | 
 15 |     def test_parses_both_sections(self, tmpdir):
 16 |         filename = os.path.join(str(tmpdir), 'hosts')
 17 |         make_hosts_file(filename)
 18 |         result = configuration.AnsibleInventoryParser(filename)
 19 |         assert sorted(result.nodes.keys()) == sorted(['mons', 'osds'])
 20 | 
 21 |     def test_populates_hosts(self, tmpdir):
 22 |         filename = os.path.join(str(tmpdir), 'hosts')
 23 |         make_hosts_file(filename)
 24 |         result = configuration.AnsibleInventoryParser(filename).nodes
 25 |         assert result['mons'][0]['host'] == 'mon0'
 26 |         assert result['osds'][0]['host'] == 'osd0'
 27 | 
 28 |     def test_hosts_do_not_get_mixed(self, tmpdir):
 29 |         filename = os.path.join(str(tmpdir), 'hosts')
 30 |         make_hosts_file(filename)
 31 |         result = configuration.AnsibleInventoryParser(filename).nodes
 32 |         assert len(result['mons']) == 1
 33 |         assert len(result['osds']) == 1
 34 | 
 35 |     def test_ignores_unknown_groups(self, tmpdir):
 36 |         filename = os.path.join(str(tmpdir), 'hosts')
 37 |         contents = """
 38 |         [mons]
 39 |         mon0
 40 | 
 41 |         [test]
 42 |         node1
 43 |         """
 44 |         make_hosts_file(filename, contents)
 45 |         result = configuration.AnsibleInventoryParser(filename).nodes
 46 |         assert 'test' not in result
 47 | 
 48 |     def test_hosts_file_does_not_exist(self):
 49 |         with pytest.raises(SystemExit):
 50 |             configuration.load_hosts(_path="/fake/path")
 51 | 
 52 | 
 53 | class TestNestedInventory(object):
 54 | 
 55 |     def test_nested_one_level(self, tmpdir):
 56 |         filename = os.path.join(str(tmpdir), 'hosts')
 57 |         contents = """
 58 |         [mons:children]
 59 |         atlanta
 60 | 
 61 |         [atlanta]
 62 |         mon0
 63 |         """
 64 |         make_hosts_file(filename, contents)
 65 |         result = configuration.AnsibleInventoryParser(filename).nodes
 66 |         assert result['mons'][0]['host'] == 'mon0'
 67 | 
 68 |     def test_nested_one_level_populates_other_groups(self, tmpdir):
 69 |         filename = os.path.join(str(tmpdir), 'hosts')
 70 |         contents = """
 71 |         [mons:children]
 72 |         atlanta
 73 | 
 74 |         [atlanta]
 75 |         mon0
 76 |         """
 77 |         make_hosts_file(filename, contents)
 78 |         result = configuration.AnsibleInventoryParser(filename).nodes
 79 |         assert result['mons'][0]['host'] == 'mon0'
 80 | 
 81 |     def test_nested_levels_populates(self, tmpdir):
 82 |         filename = os.path.join(str(tmpdir), 'hosts')
 83 |         contents = """
 84 |         [mons:children]
 85 |         us
 86 | 
 87 |         [atlanta]
 88 |         mon0
 89 | 
 90 |         [us:children]
 91 |         atlanta
 92 |         """
 93 |         make_hosts_file(filename, contents)
 94 |         result = configuration.AnsibleInventoryParser(filename).nodes
 95 |         assert result['mons'][0]['host'] == 'mon0'
 96 | 
 97 | 
 98 | class TestLoadString(object):
 99 | 
100 |     def test_loads_valid_ceph_key(self):
101 |         contents = dedent("""
102 |         [global]
103 |         cluster = ceph
104 |         """)
105 |         conf = configuration.load_string(contents)
106 |         assert conf.get_safe('global', 'cluster') == 'ceph'
107 | 
108 |     def test_loads_key_with_spaces_converted(self):
109 |         contents = dedent("""
110 |         [global]
111 |         some key here = ceph
112 |         """)
113 |         conf = configuration.load_string(contents)
114 |         assert conf.get_safe('global', 'some_key_here') == 'ceph'
115 | 


--------------------------------------------------------------------------------
/ceph_medic/remote/commands.py:
--------------------------------------------------------------------------------
  1 | """
  2 | A collection of helpers that will connect to a remote node to run a system
  3 | command to return a specific value, instead of shipping a module and executing
  4 | functions remotely, this just uses the current connection to execute Popen
  5 | """
  6 | import json
  7 | from remoto.process import check
  8 | 
  9 | 
 10 | def ceph_version(conn):
 11 |     try:
 12 |         output, _, exit_code = check(conn, ['ceph', '--version'])
 13 |         if exit_code != 0:
 14 |             conn.logger.error('Non zero exit status received, unable to retrieve information')
 15 |             return
 16 |         return output[0]
 17 |     except RuntimeError:
 18 |         conn.logger.exception('failed to fetch ceph version')
 19 | 
 20 | 
 21 | def ceph_socket_version(conn, socket):
 22 |     try:
 23 |         result = dict()
 24 |         output, _, exit_code = check(
 25 |             conn,
 26 |             ['ceph', '--admin-daemon', socket, '--format', 'json', 'version']
 27 |         )
 28 |         if exit_code != 0:
 29 |             conn.logger.error('Non zero exit status received, unable to retrieve information')
 30 |             return result
 31 |         try:
 32 |             result = json.loads(output[0])
 33 |         except ValueError:
 34 |             conn.logger.exception(
 35 |                 "failed to fetch ceph socket version, invalid json: %s" % output[0]
 36 |             )
 37 |         return result
 38 |     except RuntimeError:
 39 |         conn.logger.exception('failed to fetch ceph socket version')
 40 | 
 41 | 
 42 | def ceph_status(conn):
 43 |     try:                # collects information using ceph -s
 44 |         stdout, stderr, exit_code = check(conn, ['ceph', '-s', '--format', 'json'])
 45 |         result = dict()
 46 |         try:
 47 |             result = json.loads(''.join(stdout))
 48 |         except ValueError:
 49 |             conn.logger.exception("failed to fetch ceph status, invalid json: %s" % ''.join(stdout))
 50 | 
 51 |         if exit_code == 0:
 52 |             return result
 53 |         else:
 54 |             return {}
 55 | 
 56 |     except RuntimeError:
 57 |         conn.logger.exception('failed to fetch ceph status')
 58 | 
 59 | 
 60 | def ceph_osd_dump(conn):
 61 |     try:
 62 |         stdout, stderr, exit_code = check(conn, ['ceph', 'osd', 'dump', '--format', 'json'])
 63 |         result = dict()
 64 |         if exit_code != 0:
 65 |             conn.logger.error('could not get osd dump from ceph')
 66 |             if stderr:
 67 |                 for line in stderr:
 68 |                     conn.logger.error(line)
 69 |             return result
 70 |         try:
 71 |             result = json.loads(''.join(stdout))
 72 |         except ValueError:
 73 |             conn.logger.exception("failed to fetch osd dump, invalid json: %s" % ''.join(stdout))
 74 | 
 75 |         return result
 76 | 
 77 |     except RuntimeError:
 78 |         conn.logger.exception('failed to fetch ceph osd dump')
 79 | 
 80 | 
 81 | def daemon_socket_config(conn, socket):
 82 |     """
 83 |     Capture daemon-based config from the socket
 84 |     """
 85 |     try:
 86 |         output, _, exit_code = check(
 87 |             conn,
 88 |             ['ceph', '--admin-daemon', socket, 'config', 'show', '--format', 'json']
 89 |         )
 90 |         if exit_code != 0:
 91 |             conn.logger.error('Non zero exit status received, unable to retrieve information')
 92 |             return
 93 |         result = dict()
 94 |         try:
 95 |             result = json.loads(output[0])
 96 |         except ValueError:
 97 |             conn.logger.exception(
 98 |                 "failed to fetch ceph configuration via socket, invalid json: %s" % output[0]
 99 |             )
100 |         return result
101 |     except RuntimeError:
102 |         conn.logger.exception('failed to fetch ceph configuration via socket')
103 | 
104 | 
105 | def ceph_is_installed(conn):
106 |     try:
107 |         stdout, stderr, exit_code = check(conn, ['which', 'ceph'])
108 |     except RuntimeError:
109 |         conn.logger.exception('failed to check if ceph is available in the path')
110 |         # XXX this might be incorrect
111 |         return False
112 |     if exit_code != 0:
113 |         return False
114 |     return True
115 | 


--------------------------------------------------------------------------------
/ceph_medic/tests/remote/test_functions.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from ceph_medic.remote import functions
  4 | 
  5 | 
  6 | def make_test_file(filename, contents=None):
  7 |     contents = contents or "foo"
  8 |     with open(filename, 'w') as f:
  9 |         f.write(contents)
 10 | 
 11 | 
 12 | def make_test_tree(path, contents=None, tree=None):
 13 |     file1 = os.path.join(path, "file1.txt")
 14 |     dir1 = os.path.join(path, "dir1")
 15 |     file2 = os.path.join(path, "dir1/file2.txt")
 16 |     make_test_file(file1)
 17 |     os.mkdir(dir1)
 18 |     make_test_file(file2)
 19 | 
 20 | 
 21 | class TestStatPath(object):
 22 | 
 23 |     def test_stat_file_includes_owner(self, tmpdir):
 24 |         filename = os.path.join(str(tmpdir), 'file')
 25 |         make_test_file(filename)
 26 | 
 27 |         result = functions.stat_path(filename)
 28 |         assert "owner" in result
 29 | 
 30 |     def test_stat_file_includes_group(self, tmpdir):
 31 |         filename = os.path.join(str(tmpdir), 'file')
 32 |         make_test_file(filename)
 33 | 
 34 |         result = functions.stat_path(filename)
 35 |         assert "group" in result
 36 | 
 37 |     def test_includes_file_content(self, tmpdir):
 38 |         filename = os.path.join(str(tmpdir), 'file')
 39 |         make_test_file(filename, contents="foo")
 40 | 
 41 |         result = functions.stat_path(filename, get_contents=True)
 42 |         assert result["contents"] == "foo"
 43 | 
 44 |     def test_exception_is_empty_on_success(self, tmpdir):
 45 |         filename = os.path.join(str(tmpdir), 'file')
 46 |         make_test_file(filename)
 47 | 
 48 |         result = functions.stat_path(filename)
 49 |         assert not result["exception"]
 50 | 
 51 |     def test_stat_dir(self, tmpdir):
 52 |         result = functions.stat_path(str(tmpdir))
 53 |         assert result != {}
 54 | 
 55 |     def test_no_callables(self, tmpdir):
 56 |         result = functions.stat_path(str(tmpdir))
 57 |         for value in result.values():
 58 |             assert callable(value) is False
 59 | 
 60 | 
 61 | class TestStatPathErrors(object):
 62 | 
 63 |     def test_captures_exceptions(self):
 64 |         result = functions.stat_path('/does/not/exist')
 65 |         assert result['exception']['attributes']['errno'] == '2'
 66 |         assert result['exception']['name'] in ['FileNotFoundError', 'OSError']
 67 | 
 68 | 
 69 | class AttributeLandMine(object):
 70 | 
 71 |     @property
 72 |     def explode(self):
 73 |         raise ValueError('Raising on attribute access')
 74 | 
 75 | 
 76 | class TestCaptureException(object):
 77 | 
 78 |     def test_exceptions_in_errors_are_ignored(self):
 79 |         result = functions.capture_exception(AttributeLandMine())
 80 |         assert result['attributes'] == {'explode': None}
 81 | 
 82 |     def test_unserializable_attributes(self, factory):
 83 |         error = factory(unserial=lambda: True)
 84 |         result = functions.capture_exception(error)
 85 |         assert '<function ' in result['attributes']['unserial']
 86 | 
 87 | 
 88 | class TestPathTree(object):
 89 | 
 90 |     def test_skip_dirs(self, tmpdir):
 91 |         path = str(tmpdir)
 92 |         make_test_tree(path)
 93 |         result = functions.path_tree(path, skip_dirs=['dir1'])
 94 |         assert "dir1" not in result["dirs"]
 95 | 
 96 |     def test_skip_files(self, tmpdir):
 97 |         path = str(tmpdir)
 98 |         make_test_tree(path)
 99 |         result = functions.path_tree(path, skip_files=['file1.txt'])
100 |         assert "file1.txt" not in result["files"]
101 | 
102 |     def test_includes_path(self, tmpdir):
103 |         path = str(tmpdir)
104 |         make_test_tree(path)
105 |         result = functions.path_tree(path)
106 |         assert result["path"] == path
107 | 
108 |     def test_includes_files(self, tmpdir):
109 |         path = str(tmpdir)
110 |         make_test_tree(path)
111 |         result = functions.path_tree(path)
112 |         assert "files" in result
113 |         assert os.path.join(path, "file1.txt") in result["files"]
114 | 
115 |     def test_includes_dirs(self, tmpdir):
116 |         path = str(tmpdir)
117 |         make_test_tree(path)
118 |         result = functions.path_tree(path)
119 |         assert "dirs" in result
120 |         assert os.path.join(path, "dir1") in result["dirs"]
121 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | from setuptools import setup, find_packages, Command
  2 | import re
  3 | import sys
  4 | import subprocess
  5 | 
  6 | 
  7 | install_requires = []
  8 | pyversion = sys.version_info[:2]
  9 | 
 10 | 
 11 | def read_module_contents():
 12 |     with open('ceph_medic/__init__.py') as f:
 13 |         return f.read()
 14 | 
 15 | 
 16 | module_file = read_module_contents()
 17 | metadata = dict(re.findall("__([a-z]+)__\s*=\s*'([^']+)'", module_file))
 18 | long_description = open('README.rst').read()
 19 | version = metadata['version']
 20 | 
 21 | 
 22 | class BumpCommand(Command):
 23 |     """ Bump the __version__ number and commit all changes. """
 24 | 
 25 |     user_options = [('version=', 'v', 'version number to use')]
 26 | 
 27 |     def initialize_options(self):
 28 |         new_version = metadata['version'].split('.')
 29 |         new_version[-1] = str(int(new_version[-1]) + 1)  # Bump the final part
 30 |         self.version = ".".join(new_version)
 31 | 
 32 |     def finalize_options(self):
 33 |         pass
 34 | 
 35 |     def run(self):
 36 | 
 37 |         try:
 38 |             print('old version: %s  new version: %s' %
 39 |                   (metadata['version'], self.version))
 40 |             raw_input('Press enter to confirm, or ctrl-c to exit >')
 41 |         except KeyboardInterrupt:
 42 |             raise SystemExit("\nNot proceeding")
 43 | 
 44 |         old = "__version__ = '%s'" % metadata['version']
 45 |         new = "__version__ = '%s'" % self.version
 46 | 
 47 |         module_file = read_module_contents()
 48 |         with open('ceph_medic/__init__.py', 'w') as fileh:
 49 |             fileh.write(module_file.replace(old, new))
 50 | 
 51 |         # Commit everything with a standard commit message
 52 |         cmd = ['git', 'commit', '-a', '-m', 'version %s' % self.version]
 53 |         print(' '.join(cmd))
 54 |         subprocess.check_call(cmd)
 55 | 
 56 | 
 57 | class ReleaseCommand(Command):
 58 |     """ Tag and push a new release. """
 59 | 
 60 |     user_options = [('sign', 's', 'GPG-sign the Git tag and release files')]
 61 | 
 62 |     def initialize_options(self):
 63 |         self.sign = False
 64 | 
 65 |     def finalize_options(self):
 66 |         pass
 67 | 
 68 |     def run(self):
 69 |         # Create Git tag
 70 |         tag_name = 'v%s' % version
 71 |         cmd = ['git', 'tag', '-a', tag_name, '-m', 'version %s' % version]
 72 |         if self.sign:
 73 |             cmd.append('-s')
 74 |         print(' '.join(cmd))
 75 |         subprocess.check_call(cmd)
 76 | 
 77 |         # Push Git tag to origin remote
 78 |         cmd = ['git', 'push', 'origin', tag_name]
 79 |         print(' '.join(cmd))
 80 |         subprocess.check_call(cmd)
 81 | 
 82 |         # Push package to pypi
 83 |         cmd = ['python', 'setup.py', 'sdist', 'upload']
 84 |         if self.sign:
 85 |             cmd.append('--sign')
 86 |         print(' '.join(cmd))
 87 |         #subprocess.check_call(cmd)
 88 | 
 89 |         # Push master to the remote
 90 |         cmd = ['git', 'push', 'origin', 'master']
 91 |         print(' '.join(cmd))
 92 |         subprocess.check_call(cmd)
 93 | 
 94 | 
 95 | setup(
 96 |     name='ceph-medic',
 97 |     version=version,
 98 |     packages=find_packages(),
 99 | 
100 |     author='Alfredo Deza',
101 |     author_email='contact@redhat.com',
102 |     description='detect common issues with ceph clusters',
103 |     long_description=long_description,
104 |     license='MIT',
105 |     keywords='ceph doctor',
106 |     url="https://github.com/ceph/ceph-medic",
107 |     zip_safe=False,
108 | 
109 |     install_requires=[
110 |         'execnet',
111 |         'tambo',
112 |         'remoto>=1.1.2',
113 |     ] + install_requires,
114 | 
115 |     tests_require=[
116 |         'pytest >=2.1.3',
117 |         'tox',
118 |         'mock',
119 |     ],
120 | 
121 |     scripts=['bin/ceph-medic'],
122 |     cmdclass={'bump': BumpCommand, 'release': ReleaseCommand},
123 |     classifiers=[
124 |         'Development Status :: 4 - Beta',
125 |         'Intended Audience :: Developers',
126 |         'License :: OSI Approved :: MIT License',
127 |         'Topic :: Software Development :: Build Tools',
128 |         'Topic :: Utilities',
129 |         'Operating System :: MacOS :: MacOS X',
130 |         'Operating System :: POSIX',
131 |         'Programming Language :: Python :: 2.7',
132 |         'Programming Language :: Python :: 3.4',
133 |     ]
134 | 
135 | )
136 | 


--------------------------------------------------------------------------------
/ceph_medic/tests/checks/test_osds.py:
--------------------------------------------------------------------------------
  1 | from textwrap import dedent
  2 | from ceph_medic.checks import osds
  3 | from ceph_medic import metadata
  4 | 
  5 | 
  6 | class TestOSDS(object):
  7 | 
  8 |     def test_fails_check_ceph_fsid(self):
  9 |         data = {'paths': {'/var/lib/ceph': {'files': {
 10 |             '/var/lib/ceph/osd/ceph-0/ceph_fsid': {'contents': "fsid1"},
 11 |             '/var/lib/ceph/osd/ceph-1/ceph_fsid': {'contents': "fsid2"},
 12 |         }}}}
 13 |         result = osds.check_osd_ceph_fsid(None, data)
 14 |         assert "WOSD1" in result
 15 | 
 16 |     def test_min_pool_size_fails(self, data):
 17 |         metadata['cluster_name'] = 'ceph'
 18 |         contents = dedent("""
 19 |         [global]
 20 |         cluster = foo
 21 |         osd_pool_default_min_size = 1
 22 |         """)
 23 |         osd_data = data()
 24 |         osd_data['paths']['/etc/ceph']['files']['/etc/ceph/ceph.conf'] = {'contents': contents}
 25 |         code, error = osds.check_min_pool_size(None, osd_data)
 26 |         assert error == 'osd default pool min_size is set to 1, can potentially lose data'
 27 | 
 28 |     def test_min_pool_size_is_correct(self, data):
 29 |         metadata['cluster_name'] = 'ceph'
 30 |         contents = dedent("""
 31 |         [global]
 32 |         cluster = foo
 33 |         osd_pool_default_min_size = 2
 34 |         """)
 35 |         osd_data = data()
 36 |         osd_data['paths']['/etc/ceph']['files']['/etc/ceph/ceph.conf'] = {'contents': contents}
 37 |         result = osds.check_min_pool_size(None, osd_data)
 38 |         assert result is None
 39 | 
 40 | 
 41 | class TestMinOSDS(object):
 42 | 
 43 |     def test_min_osd_nodes_is_not_met(self, data):
 44 |         metadata['osds'] = {'osd1': []}
 45 |         metadata['cluster_name'] = 'ceph'
 46 |         osd_data = data()
 47 |         contents = dedent("""
 48 |         [global]
 49 |         cluster = foo
 50 |         osd_pool_default_min_size = 2
 51 |         """)
 52 |         osd_data['paths']['/etc/ceph']['files']['/etc/ceph/ceph.conf'] = {'contents': contents}
 53 |         code, error = osds.check_min_osd_nodes(None, osd_data)
 54 |         assert code == 'WOSD3'
 55 |         assert '6 needed, 1 found' in error
 56 | 
 57 |     def test_min_osd_nodes_is_met(self, data):
 58 |         metadata['osds'] = dict(('osd%s' % count, []) for count in range(6))
 59 |         metadata['cluster_name'] = 'ceph'
 60 |         osd_data = data()
 61 |         contents = dedent("""
 62 |         [global]
 63 |         cluster = foo
 64 |         osd_pool_default_min_size = 2
 65 |         """)
 66 |         osd_data['paths']['/etc/ceph']['files']['/etc/ceph/ceph.conf'] = {'contents': contents}
 67 |         result = osds.check_min_osd_nodes(None, osd_data)
 68 |         assert result is None
 69 | 
 70 | 
 71 | class TestReasonableRatios(object):
 72 | 
 73 |     def setup(self):
 74 |         self.data = {'ceph': {'osd': {'dump': {}}}}
 75 | 
 76 |     def test_osd_is_empty(self):
 77 |         data = {'ceph': {'osd': {}}}
 78 |         assert osds.check_reasonable_ratios('node1', data) is None
 79 | 
 80 |     def test_ratios_are_all_very_reasonable(self):
 81 |         self.data['ceph']['osd']['dump'] = {
 82 |           "backfillfull_ratio": 0.9,
 83 |           "nearfull_ratio": 0.85,
 84 |           "full_ratio": 0.95
 85 |         }
 86 |         assert osds.check_reasonable_ratios('node1', self.data) is None
 87 | 
 88 |     def test_all_ratios_are_messed_up(self):
 89 |         self.data['ceph']['osd']['dump'] = {
 90 |           "backfillfull_ratio": 0.91,
 91 |           "nearfull_ratio": 0.84,
 92 |           "full_ratio": 0.92
 93 |         }
 94 |         code, msg = osds.check_reasonable_ratios('node1', self.data)
 95 |         assert code == 'WOSD4'
 96 |         assert 'backfillfull_ratio, full_ratio, nearfull_ratio' in msg
 97 | 
 98 |     def test_backfillfull_is_messed_up(self):
 99 |         self.data['ceph']['osd']['dump'] = {
100 |           "backfillfull_ratio": 0.91,
101 |           "nearfull_ratio": 0.85,
102 |           "full_ratio": 0.95
103 |         }
104 |         code, msg = osds.check_reasonable_ratios('node1', self.data)
105 |         assert msg.endswith('backfillfull_ratio')
106 | 
107 |     def test_nearfull_is_messed_up(self):
108 |         self.data['ceph']['osd']['dump'] = {
109 |           "backfillfull_ratio": 0.9,
110 |           "nearfull_ratio": 0.88,
111 |           "full_ratio": 0.95
112 |         }
113 |         code, msg = osds.check_reasonable_ratios('node1', self.data)
114 |         assert msg.endswith('nearfull_ratio')
115 | 
116 |     def test_full_is_messed_up(self):
117 |         self.data['ceph']['osd']['dump'] = {
118 |           "backfillfull_ratio": 0.9,
119 |           "nearfull_ratio": 0.89,
120 |           "full_ratio": 0.95
121 |         }
122 |         code, msg = osds.check_reasonable_ratios('node1', self.data)
123 |         assert msg.endswith('full_ratio')
124 | 


--------------------------------------------------------------------------------
/ceph_medic/connection.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import socket
  3 | import remoto
  4 | import ceph_medic
  5 | from execnet.gateway_bootstrap import HostNotFound
  6 | 
  7 | logger = logging.getLogger(__name__)
  8 | 
  9 | 
 10 | def get_connection(hostname, username=None, threads=5, use_sudo=None, detect_sudo=True, **kw):
 11 |     """
 12 |     A very simple helper, meant to return a connection
 13 |     that will know about the need to use sudo.
 14 |     """
 15 |     if kw.get('logger') is False:  # explicitly disable remote logging
 16 |         remote_logger = None
 17 |     else:
 18 |         remote_logger = logging.getLogger(hostname)
 19 | 
 20 |     if username:
 21 |         hostname = "%s@%s" % (username, hostname)
 22 | 
 23 |     if ceph_medic.config.ssh_config:
 24 |         hostname = "-F %s %s" % (ceph_medic.config.ssh_config, hostname)
 25 |     try:
 26 |         deployment_type = kw.get(
 27 |             'deployment_type',
 28 |             ceph_medic.config.file.get_safe(
 29 |                 'global', 'deployment_type', 'baremetal')
 30 |         )
 31 |         conn_obj = remoto.connection.get(deployment_type)
 32 |         if deployment_type in ['k8s', 'kubernetes', 'openshift', 'oc']:
 33 |             conn = container_platform_conn(hostname, conn_obj, deployment_type)
 34 |             # check if conn is ok
 35 |             stdout, stderr, code = remoto.process.check(conn, ['true'])
 36 |             if code:
 37 |                 raise HostNotFound(
 38 |                     'Remote connection failed while testing connection:\n %s' % '\n'.join(stderr))
 39 |         elif deployment_type in ['docker', 'podman']:
 40 |             if kw.get('logger', True):
 41 |                 remote_logger = logging.getLogger(kw['container'])
 42 |             conn = conn_obj(
 43 |                 hostname,
 44 |                 container_name=kw['container'],
 45 |                 logger=remote_logger,
 46 |                 detect_sudo=detect_sudo,
 47 |             )
 48 |         elif deployment_type in ['ssh', 'baremetal']:
 49 |             conn = conn_obj(
 50 |                 hostname,
 51 |                 logger=remote_logger,
 52 |                 threads=threads,
 53 |                 detect_sudo=detect_sudo,
 54 |             )
 55 |         else:
 56 |             raise RuntimeError(
 57 |                     'Invalid deployment_type: %s' % deployment_type)
 58 |         # Set a timeout value in seconds to disconnect and move on
 59 |         # if no data is sent back.
 60 |         conn.global_timeout = 300
 61 |         # XXX put this somewhere else
 62 |         if not ceph_medic.config.cluster_name:
 63 |             cluster_conf_files, stderr, exit_code = remoto.process.check(conn, ['ls', '/etc/ceph/'])
 64 |             cluster_name = 'ceph'
 65 |             if 'ceph.conf' not in cluster_conf_files:
 66 |                 logger.warning('/etc/ceph/ceph.conf was not found, will try to infer the cluster name')
 67 |                 for i in cluster_conf_files:
 68 |                     if i.endswith('conf'):
 69 |                         cluster_name = i.split('.conf')[0]
 70 |                         logger.warning('inferred %s as the cluster name', cluster_name)
 71 |             ceph_medic.metadata['cluster_name'] = cluster_name
 72 |         else:
 73 |             ceph_medic.metadata['cluster_name'] = ceph_medic.config.cluster_name
 74 |         return conn
 75 |     except Exception as error:
 76 |         msg = "connecting to host: %s " % hostname
 77 |         errors = "resulted in errors: %s %s" % (error.__class__.__name__, error)
 78 |         logger.error(msg)
 79 |         logger.error(errors)
 80 |         raise error
 81 | 
 82 | 
 83 | def container_platform_conn(hostname, conn_obj, deployment_type):
 84 |     """
 85 |     This helper function is only valid for container platform connections like
 86 |     OpenShift or Kubernetes. Fetches the configuration needed to properly
 87 |     configure the connection object, and then returns it.
 88 |     """
 89 |     container_platforms = {
 90 |         'k8s': 'kubernetes',
 91 |         'kubernetes': 'kubernetes',
 92 |         'oc': 'openshift',
 93 |         'openshift': 'openshift',
 94 |     }
 95 |     deployment_type = container_platforms.get(deployment_type, 'kubernetes')
 96 |     namespace = ceph_medic.config.file.get_safe(deployment_type, 'namespace', 'rook-ceph')
 97 |     context = ceph_medic.config.file.get_safe(deployment_type, 'context', None)
 98 |     return conn_obj(hostname, namespace, context=context)
 99 | 
100 | 
101 | def as_bytes(string):
102 |     """
103 |     Ensure that whatever type of string is incoming, it is returned as bytes,
104 |     encoding to utf-8 otherwise
105 |     """
106 |     if isinstance(string, bytes):
107 |         return string
108 |     return string.encode('utf-8', errors='ignore')
109 | 
110 | 
111 | def get_local_connection(logger, use_sudo=False):
112 |     """
113 |     Helper for local connections that are sometimes needed to operate
114 |     on local hosts
115 |     """
116 |     return get_connection(
117 |         socket.gethostname(),  # cannot rely on 'localhost' here
118 |         None,
119 |         logger=logger,
120 |         threads=1,
121 |         use_sudo=use_sudo,
122 |         detect_sudo=False
123 |     )
124 | 


--------------------------------------------------------------------------------
/ceph_medic/checks/mons.py:
--------------------------------------------------------------------------------
  1 | from ceph_medic import metadata
  2 | from ceph_medic.util import configuration
  3 | 
  4 | #
  5 | # Utilities
  6 | #
  7 | 
  8 | 
  9 | def get_secret(data):
 10 |     """
 11 |     keyring files look like::
 12 | 
 13 |     [mon.]
 14 |         key = AQBvaBFZAAAAABAA9VHgwCg3rWn8fMaX8KL01A==
 15 |             caps mon = "allow *"
 16 | 
 17 |     Fetch that keyring file and extract the actual key, no spaces.
 18 | 
 19 |     .. warning:: If multiple mon dirs exist, this utility will pick the first
 20 |     one it finds. There are checks that will complain about multiple mon dirs
 21 |     """
 22 |     file_paths = data['paths']['/var/lib/ceph']['files'].keys()
 23 |     _path = data['paths']['/var/lib/ceph']['files']
 24 |     for _file in file_paths:
 25 |         if _file.startswith('/var/lib/ceph/mon/') and _file.endswith('keyring'):
 26 |             contents = _path[_file]['contents']
 27 |             conf = configuration.load_string(contents)
 28 |             try:
 29 |                 return conf.get_safe('mon.', 'key', '').split('\n')[0]
 30 |             except IndexError:
 31 |                 # is it really possible to get a keyring file that doesn't
 32 |                 # have a monitor secret?
 33 |                 return ''
 34 | 
 35 | 
 36 | def get_monitor_dirs(dirs):
 37 |     """
 38 |     Find all the /var/lib/ceph/mon/* directories. This is a bit tricky because
 39 |     we don't know if there are nested directories (the metadata reports them in
 40 |     a flat list).
 41 |     We must go through all of them and make sure that by splitting there aren't
 42 |     any nested ones and we are only reporting actual monitor dirs.
 43 |     """
 44 |     # get all the actual monitor dirs
 45 |     found = []
 46 |     prefix = '/var/lib/ceph/mon/'
 47 |     mon_dirs = [d for d in dirs if d.startswith(prefix)]
 48 |     for _dir in mon_dirs:
 49 |         # splitting on prefix[-1] will give us:
 50 |         # 'ceph-mon-1/maybe/nested' or 'ceph-mon-1'
 51 |         dirs = _dir.split(prefix)[-1].split('/')
 52 |         # splitting again on '/' and using the first part will ensure we only
 53 |         # get the dir
 54 |         found.append(dirs[0])
 55 |     return set(found)
 56 | 
 57 | 
 58 | def get_osd_dirs(dirs):
 59 |     """
 60 |     Find all the /var/lib/ceph/osd/* directories. This is a bit tricky because
 61 |     we don't know if there are nested directories (the metadata reports them in
 62 |     a flat list).
 63 |     We must go through all of them and make sure that by splitting there aren't
 64 |     any nested ones and we are only reporting actual monitor dirs.
 65 |     """
 66 |     # get all the actual monitor dirs
 67 |     found = []
 68 |     prefix = '/var/lib/ceph/osd/'
 69 |     osd_dirs = [d for d in dirs if d.startswith(prefix)]
 70 |     for _dir in osd_dirs:
 71 |         # splitting on prefix[-1] will give us:
 72 |         # 'ceph-1/maybe/nested' or 'ceph-1'
 73 |         dirs = _dir.split(prefix)[-1].split('/')
 74 |         # splitting again on '/' and using the first part will ensure we only
 75 |         # get the dir
 76 |         found.append(dirs[0])
 77 |     return set(found)
 78 | #
 79 | # Error Checks
 80 | #
 81 | 
 82 | 
 83 | def check_mon_secret(host, data):
 84 |     code = 'EMON1'
 85 |     msg = 'secret key "%s" is different than host(s): %s'
 86 |     mismatched_hosts = []
 87 | 
 88 |     current_secret = get_secret(data)
 89 |     if not current_secret:
 90 |         # there is no file for the current host, so we can't compare
 91 |         return
 92 | 
 93 |     for host, host_data in metadata['mons'].items():
 94 |         host_secret = get_secret(host_data)
 95 |         if not host_secret:
 96 |             # cannot compare with another host that may not have the secret
 97 |             continue
 98 |         if current_secret != host_secret:
 99 |             mismatched_hosts.append(host)
100 | 
101 |     if mismatched_hosts:
102 |         return code, msg % (current_secret, ','.join(mismatched_hosts))
103 | 
104 | #
105 | # Warning Checks
106 | #
107 | 
108 | 
109 | def check_multiple_mon_dirs(host, data):
110 |     code = 'WMON1'
111 |     msg = 'multiple /var/lib/ceph/mon/* dirs found: %s'
112 |     dirs = data['paths']['/var/lib/ceph']['dirs']
113 |     monitor_dirs = get_monitor_dirs(dirs)
114 |     if len(monitor_dirs) > 1:
115 |         return code, msg % ','.join(monitor_dirs)
116 | 
117 | 
118 | def check_mon_collocated_with_osd(host, data):
119 |     code = 'WMON2'
120 |     msg = 'collocated OSDs found: %s'
121 |     dirs = data['paths']['/var/lib/ceph']['dirs']
122 |     osd_dirs = get_osd_dirs(dirs)
123 |     if len(osd_dirs):
124 |         return code, msg % ','.join(osd_dirs)
125 | 
126 | 
127 | def check_mon_recommended_count(host, data):
128 |     code = 'WMON3'
129 |     msg = 'Recommended number of MONs (3) not met: %s'
130 |     mon_count = len(metadata['mons'].keys())
131 |     if mon_count < 3:
132 |         return code, msg % mon_count
133 | 
134 | 
135 | def check_mon_count_is_odd(host, data):
136 |     code = 'WMON4'
137 |     msg = 'Number of MONs is not an odd number: %s'
138 |     mon_count = len(metadata['mons'].keys())
139 |     if mon_count % 2 == 0:
140 |         return code, msg % mon_count
141 | 
142 | 
143 | def check_for_single_mon(host, data):
144 |     code = 'WMON5'
145 |     msg = 'A single monitor was detected: %s'
146 |     monitors = list(metadata['mons'].keys())
147 |     if len(monitors) == 1:
148 |         return code, msg % monitors.pop()
149 | 


--------------------------------------------------------------------------------
/ceph_medic/tests/util/test_hosts.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import pytest
  3 | from ceph_medic.util import hosts, configuration
  4 | import ceph_medic
  5 | from textwrap import dedent
  6 | 
  7 | 
  8 | def failed_check(raise_=True):
  9 |     if raise_:
 10 |         raise RuntimeError('command failed')
 11 |     else:
 12 |         return dict(stdout='', stderr='', code=1)
 13 | 
 14 | 
 15 | class TestContainerPlatform(object):
 16 | 
 17 |     def test_oc_executable_fails(self, monkeypatch, capsys):
 18 |         monkeypatch.setattr(hosts.process, 'check', lambda *a: failed_check())
 19 |         hosts.container_platform()
 20 |         stdout, stderr = capsys.readouterr()
 21 |         assert 'Unable to retrieve the pods using command' in stdout
 22 |         assert 'oc --request-timeout=5 get -n rook-ceph pods -o json' in stdout
 23 | 
 24 |     def test_kubectl_executable_fails(self, monkeypatch, capsys):
 25 |         monkeypatch.setattr(hosts.process, 'check', lambda *a: failed_check())
 26 |         hosts.container_platform('kubernetes')
 27 |         stdout, stderr = capsys.readouterr()
 28 |         assert 'Unable to retrieve the pods using command' in stdout
 29 |         assert 'kubectl --request-timeout=5 get -n rook-ceph pods -o json' in stdout
 30 | 
 31 |     def test_no_context(self, stub_check):
 32 |         check = stub_check((['{"items": {}}'], [], 1))
 33 |         hosts.container_platform('kubernetes')
 34 |         command = check.calls[0]['args'][1]
 35 |         assert command == [
 36 |             'kubectl', '--request-timeout=5', 'get', '-n',
 37 |             'rook-ceph', 'pods', '-o', 'json'
 38 |         ]
 39 | 
 40 |     def test_garbage_stdout(self, stub_check, capsys):
 41 |         stub_check((['could not contact platform'], [], 1))
 42 |         with pytest.raises(SystemExit):
 43 |             hosts.container_platform('kubernetes')
 44 |         stdout, stderr = capsys.readouterr()
 45 |         assert 'Unable to load JSON from stdout' in stdout
 46 |         assert 'could not contact platform' in stdout
 47 | 
 48 |     def test_garbage_stderr(self, stub_check, capsys):
 49 |         stub_check(([], ['could not contact platform'], 1))
 50 |         with pytest.raises(SystemExit):
 51 |             hosts.container_platform('kubernetes')
 52 |         stdout, stderr = capsys.readouterr()
 53 |         assert 'Unable to load JSON from stdout' in stdout
 54 |         assert 'could not contact platform' in stdout
 55 | 
 56 |     def test_kubectl_with_context(self, stub_check):
 57 |         contents = dedent("""
 58 |         [kubernetes]
 59 |         context = 87
 60 |         """)
 61 |         conf = configuration.load_string(contents)
 62 |         ceph_medic.config.file = conf
 63 |         check = stub_check((['{"items": {}}'], [], 1))
 64 |         hosts.container_platform('kubernetes')
 65 |         command = check.calls[0]['args'][1]
 66 |         assert command == [
 67 |             'kubectl', '--context', '87', '--request-timeout=5', 'get', '-n',
 68 |             'rook-ceph', 'pods', '-o', 'json'
 69 |         ]
 70 | 
 71 |     def test_oc_with_context(self, stub_check):
 72 |         contents = dedent("""
 73 |         [openshift]
 74 |         context = 87
 75 |         """)
 76 |         conf = configuration.load_string(contents)
 77 |         ceph_medic.config.file = conf
 78 |         check = stub_check((['{"items": {}}'], [], 1))
 79 |         hosts.container_platform()
 80 |         command = check.calls[0]['args'][1]
 81 |         assert command == [
 82 |             'oc', '--context', '87', '--request-timeout=5', 'get', '-n',
 83 |             'rook-ceph', 'pods', '-o', 'json'
 84 |         ]
 85 | 
 86 | 
 87 | class TestBasicContainers(object):
 88 |     binaries = ['docker', 'podman']
 89 | 
 90 |     @pytest.mark.parametrize('binary', binaries)
 91 |     def test_executable_fails(
 92 |             self, binary, monkeypatch, make_nodes, capsys):
 93 |         monkeypatch.setattr(hosts.config, 'nodes', make_nodes(mgrs=['mgr0']))
 94 |         monkeypatch.setattr(
 95 |             hosts.ceph_medic.connection, 'get_connection',
 96 |             lambda *a, **k: None)
 97 |         monkeypatch.setattr(
 98 |             hosts.process, 'check', lambda *a: failed_check(False))
 99 |         hosts.basic_containers(binary)
100 |         stdout, stderr = capsys.readouterr()
101 |         assert 'Unable to list containers on host mgr0' in stdout
102 | 
103 |     @pytest.mark.parametrize('binary', binaries)
104 |     def test_inspection(
105 |             self, binary, monkeypatch, make_nodes, stub_check, capsys):
106 |         monkeypatch.setattr(ceph_medic.config, 'cluster_name', 'ceph')
107 |         monkeypatch.setattr(hosts.config, 'nodes', make_nodes(mgrs=['mgr0']))
108 |         monkeypatch.setattr(
109 |             hosts.ceph_medic.connection, 'get_connection',
110 |             lambda *a, **k: None)
111 |         fake_list = '\n'.join(['mgr0-container'])
112 |         fake_mgr = json.dumps([{
113 |             'Name': 'mgr0-container',
114 |             'Config': {
115 |                 'Env': [
116 |                     'CLUSTER=ceph',
117 |                     'CEPH_DAEMON=MGR',
118 |                 ]
119 |             }
120 |         }])
121 |         stub_check([
122 |             ([fake_mgr], [''], 0),
123 |             ([fake_list], [''], 0),
124 |         ])
125 |         result = hosts.basic_containers(binary)
126 |         assert result['mgrs'][0]['host'] == 'mgr0'
127 |         assert result['mgrs'][0]['container'] == 'mgr0-container'
128 | 


--------------------------------------------------------------------------------
/ceph_medic/tests/checks/test_mons.py:
--------------------------------------------------------------------------------
  1 | from ceph_medic import metadata
  2 | from ceph_medic.checks import mons
  3 | 
  4 | 
  5 | class TestGetSecret(object):
  6 | 
  7 |     def setup(self):
  8 |         self.data = {
  9 |             'paths': {
 10 |                 '/var/lib/ceph': {
 11 |                     'files': {
 12 |                         '/var/lib/ceph/mon/ceph-mon-0/keyring': {
 13 |                             'contents': '',
 14 |                         }
 15 |                     }
 16 |                 }
 17 |             }
 18 |         }
 19 | 
 20 |     def set_contents(self, string, file_path=None):
 21 |         file_path = file_path or '/var/lib/ceph/mon/ceph-mon-0/keyring'
 22 |         self.data['paths']['/var/lib/ceph']['files'][file_path]['contents'] = string
 23 | 
 24 |     def test_get_secret(self):
 25 |         contents = """
 26 |         [mon.]
 27 |                key = AQBvaBFZAAAAABAA9VHgwCg3rWn8fMaX8KL01A==
 28 |                caps mon = "allow *"
 29 |         """
 30 |         self.set_contents(contents)
 31 |         result = mons.get_secret(self.data)
 32 |         assert result == 'AQBvaBFZAAAAABAA9VHgwCg3rWn8fMaX8KL01A=='
 33 | 
 34 |     def test_get_no_secret_empty_file(self):
 35 |         result = mons.get_secret(self.data)
 36 |         assert result == ''
 37 | 
 38 |     def test_get_no_secret_wrong_file(self):
 39 |         contents = """
 40 |         [mon.]
 41 |                caps mon = "allow *"
 42 |         """
 43 |         self.set_contents(contents)
 44 |         result = mons.get_secret(self.data)
 45 |         assert result == ''
 46 | 
 47 | 
 48 | class TestGetMonitorDirs(object):
 49 | 
 50 |     def test_get_monitor_dirs(self):
 51 |         result = mons.get_monitor_dirs([
 52 |             '/var/lib/ceph/mon/ceph-mon-1',
 53 |             '/var/lib/ceph/something'])
 54 | 
 55 |         assert result == set(['ceph-mon-1'])
 56 | 
 57 |     def test_cannot_get_monitor_dirs(self):
 58 |         result = mons.get_monitor_dirs([
 59 |             '/var/lib/ceph/osd/ceph-osd-1',
 60 |             '/var/lib/ceph/something'])
 61 |         assert result == set([])
 62 | 
 63 |     def test_get_monitor_dirs_multiple(self):
 64 |         result = mons.get_monitor_dirs([
 65 |             '/var/lib/ceph/mon/ceph-mon-1',
 66 |             '/var/lib/ceph/mon/ceph-mon-3',
 67 |             '/var/lib/ceph/mon/ceph-mon-2',
 68 |             '/var/lib/ceph/something'])
 69 | 
 70 |         assert result == set(['ceph-mon-1', 'ceph-mon-2', 'ceph-mon-3'])
 71 | 
 72 |     def test_get_monitor_dirs_nested_multiple(self):
 73 |         result = mons.get_monitor_dirs([
 74 |             '/var/lib/ceph/mon/ceph-mon-1',
 75 |             '/var/lib/ceph/mon/ceph-mon-1/nested/dir/',
 76 |             '/var/lib/ceph/mon/ceph-mon-1/other/nested',
 77 |             '/var/lib/ceph/mon/ceph-mon-2',
 78 |             '/var/lib/ceph/something'])
 79 | 
 80 |         assert result == set(['ceph-mon-1', 'ceph-mon-2'])
 81 | 
 82 | 
 83 | class TestOsdDirs(object):
 84 | 
 85 |     def test_get_osd_dirs_nested_multiple(self):
 86 |         result = mons.get_osd_dirs([
 87 |             '/var/lib/ceph/osd/ceph-1',
 88 |             '/var/lib/ceph/osd/ceph-1/nested/dir/',
 89 |             '/var/lib/ceph/osd/ceph-1/other/nested',
 90 |             '/var/lib/ceph/osd/ceph-2',
 91 |             '/var/lib/ceph/something'])
 92 | 
 93 |         assert result == set(['ceph-1', 'ceph-2'])
 94 | 
 95 | 
 96 | class TestMonRecommendedCount(object):
 97 | 
 98 |     def test_recommended_count_is_met(self, data):
 99 |         metadata['mons'] = dict(('mon%s' % count, []) for count in range(6))
100 |         metadata['cluster_name'] = 'ceph'
101 |         osd_data = data()
102 |         result = mons.check_mon_recommended_count(None, osd_data)
103 |         assert result is None
104 | 
105 |     def test_recommended_count_is_unmet(self, data):
106 |         metadata['mons'] = dict(('mon%s' % count, []) for count in range(1))
107 |         metadata['cluster_name'] = 'ceph'
108 |         osd_data = data()
109 |         code, message = mons.check_mon_recommended_count(None, osd_data)
110 |         assert code == 'WMON3'
111 |         assert message == 'Recommended number of MONs (3) not met: 1'
112 | 
113 | 
114 | class TestMonCountIsOdd(object):
115 | 
116 |     def test_count_is_odd(self, data):
117 |         metadata['mons'] = dict(('mon%s' % count, []) for count in range(3))
118 |         metadata['cluster_name'] = 'ceph'
119 |         osd_data = data()
120 |         result = mons.check_mon_count_is_odd(None, osd_data)
121 |         assert result is None
122 | 
123 |     def test_recommended_count_is_unmet(self, data):
124 |         metadata['mons'] = dict(('mon%s' % count, []) for count in range(2))
125 |         metadata['cluster_name'] = 'ceph'
126 |         osd_data = data()
127 |         code, message = mons.check_mon_count_is_odd(None, osd_data)
128 |         assert code == 'WMON4'
129 |         assert message == 'Number of MONs is not an odd number: 2'
130 | 
131 | 
132 | class TestSingleMon(object):
133 | 
134 |     def test_is_single(self, data):
135 |         metadata['mons'] = {'mon.0': []}
136 |         metadata['cluster_name'] = 'ceph'
137 |         code, message = mons.check_for_single_mon(None, data())
138 |         assert code == 'WMON5'
139 |         assert message == 'A single monitor was detected: mon.0'
140 | 
141 |     def test_is_not_single(self, data):
142 |         metadata['mons'] = dict(('mon%s' % count, []) for count in range(2))
143 |         metadata['cluster_name'] = 'ceph'
144 |         result = mons.check_for_single_mon(None, data())
145 |         assert result is None
146 | 


--------------------------------------------------------------------------------
/ceph_medic/util/hosts.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import ceph_medic.connection
  4 | from ceph_medic import config, terminal
  5 | from remoto import connection, process
  6 | 
  7 | logger = logging.getLogger(__name__)
  8 | 
  9 | 
 10 | def _platform_options(platform):
 11 |     try:
 12 |         namespace = config.file.get_safe(platform, 'namespace', 'rook-ceph')
 13 |         context = config.file.get_safe(platform, 'context', None)
 14 |     except RuntimeError:
 15 |         namespace = 'rook-ceph'
 16 |         context = None
 17 |     return {'namespace': namespace, 'context': context}
 18 | 
 19 | 
 20 | def container_platform(platform='openshift'):
 21 |     """
 22 |     Connect to a container platform (kubernetes or openshift), retrieve all the
 23 |     available pods that match the namespace (defaults to 'rook-ceph'), and
 24 |     return a dictionary including them, regardless of state.
 25 |     """
 26 |     local_conn = connection.get('local')()
 27 |     options = _platform_options(platform)
 28 |     context = options.get('context')
 29 |     namespace = options.get('namespace')
 30 |     executable = 'oc' if platform == 'openshift' else 'kubectl'
 31 | 
 32 |     if context:
 33 |         cmd = [executable, '--context', context]
 34 |     else:
 35 |         cmd = [executable]
 36 | 
 37 |     cmd.extend(['--request-timeout=5', 'get', '-n', namespace, 'pods', '-o', 'json'])
 38 | 
 39 |     try:
 40 |         out, err, code = process.check(local_conn, cmd)
 41 |     except RuntimeError:
 42 |         out = "{}"
 43 |         terminal.error('Unable to retrieve the pods using command: %s' % ' '.join(cmd))
 44 |     else:
 45 |         if code:
 46 |             output = out + err
 47 |             for line in output:
 48 |                 terminal.error(line)
 49 | 
 50 |     try:
 51 |         pods = json.loads(''.join(out))
 52 |     except Exception:
 53 |         # Python3 has JSONDecodeError which doesn't exist in Python2
 54 |         # Python2 just raises ValueError
 55 |         stdout = ''.join(out)
 56 |         stderr = ''.join(err)
 57 |         logger.exception('Invalid JSON from stdout')
 58 |         terminal.error('Unable to load JSON from stdout')
 59 |         if stdout:
 60 |             logger.error('stdout: %s', stdout)
 61 |             terminal.error('stdout: %s' % stdout)
 62 |         if stderr:
 63 |             logger.error('stderr: %s', stderr)
 64 |             terminal.error('stderr: %s' % stderr)
 65 |         raise SystemExit(1)
 66 | 
 67 |     base_inventory = {
 68 |         'rgws': [], 'mgrs': [], 'mdss': [], 'clients': [], 'osds': [], 'mons': []
 69 |     }
 70 |     label_map = {
 71 |         'rook-ceph-mgr': 'mgrs',
 72 |         'rook-ceph-mon': 'mons',
 73 |         'rook-ceph-osd': 'osds',
 74 |         'rook-ceph-mds': 'mdss',
 75 |         'rook-ceph-rgw': 'rgws',
 76 |         'rook-ceph-client': 'clients',
 77 |     }
 78 | 
 79 |     for item in pods.get('items', {}):
 80 |         label_name = item['metadata'].get('labels', {}).get('app')
 81 |         if not label_name:
 82 |             continue
 83 |         if label_name in label_map:
 84 |             inventory_key = label_map[label_name]
 85 |             base_inventory[inventory_key].append(
 86 |                 {'host': item['metadata']['name'], 'group': None}
 87 |             )
 88 |     for key, value in dict(base_inventory).items():
 89 |         if not value:
 90 |             base_inventory.pop(key)
 91 |     return base_inventory
 92 | 
 93 | 
 94 | def basic_containers(deployment_type):
 95 |     base_inventory = {
 96 |         'rgws': [], 'mgrs': [], 'mdss': [], 'clients': [], 'osds': [],
 97 |         'mons': []
 98 |     }
 99 |     label_map = {
100 |         'OSD': 'osds',
101 |         'OSD_CEPH_VOLUME_ACTIVATE': 'osds',
102 |         'MON': 'mons',
103 |         'MGR': 'mgrs',
104 |         'MDS': 'mdss',
105 |         'RGW': 'rgws',
106 |     }
107 |     metal_hosts = set()
108 |     for nodes in config.nodes.values():
109 |         for node in nodes:
110 |             metal_hosts.add(node['host'])
111 |     for host in metal_hosts:
112 |         logger.debug("listing containers for host %s", host)
113 |         cmd = [deployment_type, 'container', 'ls', '--format',
114 |                '"{{ .Names }}"']
115 |         conn = ceph_medic.connection.get_connection(
116 |             host, deployment_type='ssh')
117 |         out, err, code = process.check(conn, cmd)
118 |         if code:
119 |             terminal.error("Unable to list containers on host %s" % host)
120 |             continue
121 |         container_list = map(lambda i: i.strip('"'), out)
122 |         if not container_list:
123 |             terminal.warning("Host %s had no containers" % host)
124 |             continue
125 |         for container_name in container_list:
126 |             cmd = [deployment_type, 'container', 'inspect', container_name]
127 |             out, err, code = process.check(conn, cmd)
128 |             if code:
129 |                 terminal.error(
130 |                     "Unable to inspect container %s on host %s" %
131 |                     (container_name, host)
132 |                 )
133 |                 continue
134 |             detail = json.loads(''.join(out))[0]
135 |             env = dict(
136 |                 [s.split('=', 1) for s in detail['Config']['Env']])
137 |             if 'CEPH_DAEMON' not in env:
138 |                 continue
139 |             if env.get('CLUSTER') != config.cluster_name:
140 |                 continue
141 |             role = env['CEPH_DAEMON']
142 |             if role not in label_map:
143 |                 continue
144 |             base_inventory[label_map[role]].append(
145 |                 {'host': host, 'container': container_name, 'group': None}
146 |             )
147 |     return base_inventory
148 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # ceph-medic documentation build configuration file, created by
  4 | # sphinx-quickstart on Tue Jun 27 14:32:23 2017.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another directory,
 16 | # add these directories to sys.path here. If the directory is relative to the
 17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 18 | #
 19 | # import os
 20 | # import sys
 21 | import os
 22 | import sys
 23 | # sys.path.insert(0, os.path.abspath('.'))
 24 | sys.path.append(os.path.abspath('_themes'))
 25 | 
 26 | 
 27 | # -- General configuration ------------------------------------------------
 28 | 
 29 | # If your documentation needs a minimal Sphinx version, state it here.
 30 | #
 31 | # needs_sphinx = '1.0'
 32 | 
 33 | # Add any Sphinx extension module names here, as strings. They can be
 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 35 | # ones.
 36 | extensions = []
 37 | 
 38 | # Add any paths that contain templates here, relative to this directory.
 39 | templates_path = ['_templates']
 40 | 
 41 | # The suffix(es) of source filenames.
 42 | # You can specify multiple suffix as a list of string:
 43 | #
 44 | # source_suffix = ['.rst', '.md']
 45 | source_suffix = '.rst'
 46 | 
 47 | # The master toctree document.
 48 | master_doc = 'contents'
 49 | 
 50 | # General information about the project.
 51 | project = u'ceph-medic'
 52 | copyright = u'2017, Andrew Schoen, Alfredo Deza'
 53 | author = u'Andrew Schoen, Alfredo Deza'
 54 | 
 55 | # The version info for the project you're documenting, acts as replacement for
 56 | # |version| and |release|, also used in various other places throughout the
 57 | # built documents.
 58 | #
 59 | # The short X.Y version.
 60 | version = u'0.0.1'
 61 | # The full version, including alpha/beta/rc tags.
 62 | release = u'0.0.1'
 63 | 
 64 | # The language for content autogenerated by Sphinx. Refer to documentation
 65 | # for a list of supported languages.
 66 | #
 67 | # This is also used if you do content translation via gettext catalogs.
 68 | # Usually you set "language" from the command line for these cases.
 69 | language = None
 70 | 
 71 | # List of patterns, relative to source directory, that match files and
 72 | # directories to ignore when looking for source files.
 73 | # This patterns also effect to html_static_path and html_extra_path
 74 | exclude_patterns = []
 75 | 
 76 | # The name of the Pygments (syntax highlighting) style to use.
 77 | pygments_style = 'sphinx'
 78 | 
 79 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 80 | todo_include_todos = False
 81 | 
 82 | 
 83 | # -- Options for HTML output ----------------------------------------------
 84 | 
 85 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 86 | # a list of builtin themes.
 87 | #
 88 | html_theme = 'ceph'
 89 | # Add any paths that contain custom themes here, relative to this directory.
 90 | html_theme_path = ['_themes']
 91 | html_show_sphinx = False
 92 | html_sidebars = {
 93 |             '**': ['smarttoc.html', 'searchbox.html'],
 94 |                 }
 95 | 
 96 | # Theme options are theme-specific and customize the look and feel of a theme
 97 | # further.  For a list of options available for each theme, see the
 98 | # documentation.
 99 | #
100 | # html_theme_options = {}
101 | 
102 | # Add any paths that contain custom static files (such as style sheets) here,
103 | # relative to this directory. They are copied after the builtin static files,
104 | # so a file named "default.css" will overwrite the builtin "default.css".
105 | html_static_path = ['_static']
106 | 
107 | 
108 | # -- Options for HTMLHelp output ------------------------------------------
109 | 
110 | # Output file base name for HTML help builder.
111 | htmlhelp_basename = 'ceph-medicdoc'
112 | 
113 | 
114 | # -- Options for LaTeX output ---------------------------------------------
115 | 
116 | latex_elements = {
117 |     # The paper size ('letterpaper' or 'a4paper').
118 |     #
119 |     # 'papersize': 'letterpaper',
120 | 
121 |     # The font size ('10pt', '11pt' or '12pt').
122 |     #
123 |     # 'pointsize': '10pt',
124 | 
125 |     # Additional stuff for the LaTeX preamble.
126 |     #
127 |     # 'preamble': '',
128 | 
129 |     # Latex figure (float) alignment
130 |     #
131 |     # 'figure_align': 'htbp',
132 | }
133 | 
134 | # Grouping the document tree into LaTeX files. List of tuples
135 | # (source start file, target name, title,
136 | #  author, documentclass [howto, manual, or own class]).
137 | latex_documents = [
138 |     (master_doc, 'ceph-medic.tex', u'ceph-medic Documentation',
139 |      u'Andrew Schoen, Alfredo Deza', 'manual'),
140 | ]
141 | 
142 | 
143 | # -- Options for manual page output ---------------------------------------
144 | 
145 | # One entry per manual page. List of tuples
146 | # (source start file, name, description, authors, manual section).
147 | man_pages = [
148 |     (master_doc, 'ceph-medic', u'ceph-medic Documentation',
149 |      [author], 1)
150 | ]
151 | 
152 | 
153 | # -- Options for Texinfo output -------------------------------------------
154 | 
155 | # Grouping the document tree into Texinfo files. List of tuples
156 | # (source start file, target name, title, author,
157 | #  dir menu entry, description, category)
158 | texinfo_documents = [
159 |     (master_doc, 'ceph-medic', u'ceph-medic Documentation',
160 |      author, 'ceph-medic', 'One line description of project.',
161 |      'Miscellaneous'),
162 | ]
163 | 
164 | 
165 | 
166 | 


--------------------------------------------------------------------------------
/ceph_medic/terminal.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | 
  4 | class colorize(str):
  5 |     """
  6 |     Pretty simple to use::
  7 | 
  8 |         colorize.make('foo').bold
  9 |         colorize.make('foo').green
 10 |         colorize.make('foo').yellow
 11 |         colorize.make('foo').red
 12 |         colorize.make('foo').blue
 13 | 
 14 |     Otherwise you could go the long way (for example if you are
 15 |     testing this class)::
 16 | 
 17 |         string = colorize('foo')
 18 |         string._set_attributes()
 19 |         string.red
 20 | 
 21 |     """
 22 | 
 23 |     def __init__(self, string):
 24 |         self.stdout = sys.__stdout__
 25 |         self.appends = ''
 26 |         self.prepends = ''
 27 |         self.isatty = self.stdout.isatty()
 28 | 
 29 |     def _set_attributes(self):
 30 |         """
 31 |         Sets the attributes here because the str class does not
 32 |         allow to pass in anything other than a string to the constructor
 33 |         so we can't really mess with the other attributes.
 34 |         """
 35 |         for k, v in self.__colors__.items():
 36 |             setattr(self, k, self.make_color(v))
 37 | 
 38 |     def make_color(self, color):
 39 |         if not self.isatty or self.is_windows:
 40 |             return self
 41 |         return color + self + '\033[0m' + self.appends
 42 | 
 43 |     @property
 44 |     def __colors__(self):
 45 |         return  dict(
 46 |                 blue   = '\033[34m',
 47 |                 green  = '\033[92m',
 48 |                 yellow = '\033[33m',
 49 |                 red    = '\033[91m',
 50 |                 bold   = '\033[1m',
 51 |                 ends   = '\033[0m'
 52 |         )
 53 | 
 54 |     @property
 55 |     def is_windows(self):
 56 |         if sys.platform == 'win32':
 57 |             return True
 58 |         return False
 59 | 
 60 |     @classmethod
 61 |     def make(cls, string):
 62 |         """
 63 |         A helper method to return itself and workaround the fact that
 64 |         the str object doesn't allow extra arguments passed in to the
 65 |         constructor
 66 |         """
 67 |         obj = cls(string)
 68 |         obj._set_attributes()
 69 |         return obj
 70 | 
 71 | #
 72 | # Common string manipulations
 73 | #
 74 | red_arrow = colorize.make('-->').red
 75 | blue_arrow = colorize.make('-->').blue
 76 | yellow = lambda x: colorize.make(x).yellow
 77 | blue = lambda x: colorize.make(x).blue
 78 | green = lambda x: colorize.make(x).green
 79 | red = lambda x: colorize.make(x).red
 80 | bold = lambda x: colorize.make(x).bold
 81 | 
 82 | 
 83 | CRITICAL = 5
 84 | ERROR = 4
 85 | WARNING = 3
 86 | INFO = 2
 87 | DEBUG = 1
 88 | 
 89 | _level_names = {
 90 |     CRITICAL : 'critical',
 91 |     WARNING  : 'warning',
 92 |     INFO     : 'info',
 93 |     ERROR    : 'error',
 94 |     DEBUG    : 'debug'
 95 | }
 96 | 
 97 | _reverse_level_names = dict((v, k) for (k, v) in _level_names.items())
 98 | 
 99 | _level_colors = {
100 |     'remote'   : 'bold',
101 |     'critical' : 'red',
102 |     'warning'  : 'yellow',
103 |     'info'     : 'blue',
104 |     'debug'    : 'blue',
105 |     'error'    : 'red'
106 | }
107 | 
108 | 
109 | class _Write(object):
110 | 
111 |     def __init__(self, _writer=None, prefix='', suffix='', clear_line=False, flush=False):
112 |         self._writer = _writer or sys.stdout
113 |         self.suffix = suffix
114 |         self.prefix = prefix
115 |         self.flush = flush
116 |         self.clear_line = clear_line
117 | 
118 |     def bold(self, string):
119 |         self.write(bold(string))
120 | 
121 |     def raw(self, string):
122 |         self.write(string + '\n')
123 | 
124 |     def write(self, line):
125 |         padding = ''
126 |         if self.clear_line:
127 |             if len(line) > 80:
128 |                 padding = ' ' * 10
129 |             else:
130 |                 padding = ' ' * (80 - len(line))
131 |         line = line + padding
132 |         self._writer.write(self.prefix + line + self.suffix)
133 |         if self.flush:
134 |             self._writer.flush()
135 | 
136 | 
137 | write = _Write()
138 | loader = _Write(prefix='\r', clear_line=True)
139 | 
140 | 
141 | class LogMessage(object):
142 | 
143 |     def __init__(self, level_name, message, writer=None, config_level=None):
144 |         self.level_name = level_name
145 |         self.message = message
146 |         self.writer = writer or sys.stdout
147 |         self.config_level = config_level or self.get_config_level()
148 | 
149 |     def skip(self):
150 |         if self.level_int >= self.config_level:
151 |             return False
152 |         return True
153 | 
154 |     def header(self):
155 |         colored = colorize.make(self.base_string)
156 |         return getattr(colored, self.level_color)
157 | 
158 |     @property
159 |     def base_string(self):
160 |         if self.config_level < 2:
161 |             return "--> [%s]" % self.level_name
162 |         return "-->"
163 | 
164 |     @property
165 |     def level_int(self):
166 |         if self.level_name == 'remote':
167 |             return 2
168 |         return _reverse_level_names.get(self.level_name, 4)
169 | 
170 |     @property
171 |     def level_color(self):
172 |         return _level_colors.get(self.level_name, 'info')
173 | 
174 |     def line(self):
175 |         msg = self.message.rstrip('\n')
176 |         return "%s %s\n" % (self.header(), msg)
177 | 
178 |     def write(self):
179 |         if not self.skip():
180 |             self.writer.write(self.line())
181 | 
182 |     def get_config_level(self):
183 |         import ceph_medic
184 |         level = ceph_medic.config.verbosity
185 |         return _reverse_level_names.get(level, 4)
186 | 
187 | 
188 | def error(message):
189 |     return LogMessage('error', message).write()
190 | 
191 | 
192 | def debug(message):
193 |     return LogMessage('debug', message).write()
194 | 
195 | 
196 | def info(message):
197 |     return LogMessage('info', message).write()
198 | 
199 | 
200 | def warning(message):
201 |     return LogMessage('warning', message).write()
202 | 
203 | 
204 | def critical(message):
205 |     return LogMessage('critical', message).write()
206 | 


--------------------------------------------------------------------------------
/ceph_medic/tests/test_collector.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from ceph_medic import collector, metadata
  4 | from mock import Mock
  5 | 
  6 | 
  7 | class FakeConnRemoteModule(object):
  8 |     """
  9 |     A fake remote_module class to be used
 10 |     with Mocked connection objects.
 11 | 
 12 |     This class contains stubbed methods for functions
 13 |     in ceph_medic.remote.functions which get their return
 14 |     value from the class attribute return_values.
 15 | 
 16 |     When creating an instance pass a dictionary that maps
 17 |     function names to their return values.
 18 |     """
 19 | 
 20 |     def __init__(self, return_values):
 21 |         self.return_values = return_values
 22 | 
 23 |     def stat_path(self, *args, **kwargs):
 24 |         return self.return_values.get('stat_path', {})
 25 | 
 26 |     def path_tree(self, *args, **kwargs):
 27 |         return self.return_values.get('path_tree', {})
 28 | 
 29 | 
 30 | def get_tree(files=None, dirs=None):
 31 |     if files is None:
 32 |         files = ["file1.txt"]
 33 |     if dirs is None:
 34 |         dirs = ["dir1"]
 35 |     tree = dict(
 36 |         files=files,
 37 |         dirs=dirs,
 38 |     )
 39 |     return tree
 40 | 
 41 | 
 42 | def get_mock_connection(data=None, files=None, dirs=None):
 43 |     conn = Mock()
 44 |     tree = get_tree(files=files, dirs=dirs)
 45 |     default_data = dict(
 46 |         path_tree=tree
 47 |     )
 48 |     data = data or default_data
 49 |     conn.remote_module = FakeConnRemoteModule(data)
 50 |     return conn
 51 | 
 52 | 
 53 | class TestCollectPathMetadata(object):
 54 | 
 55 |     def test_metadata_includes_dirs(self):
 56 |         conn = get_mock_connection()
 57 |         result = collector.get_path_metadata(conn, "/some/path")
 58 |         assert "dirs" in result
 59 | 
 60 |     def test_metadata_includes_files(self):
 61 |         conn = get_mock_connection()
 62 |         result = collector.get_path_metadata(conn, "/some/path")
 63 |         assert "dirs" in result
 64 | 
 65 |     def test_metadata_includes_root_path(self):
 66 |         conn = get_mock_connection()
 67 |         result = collector.get_path_metadata(conn, "/some/path")
 68 |         assert "/some/path" in result["dirs"]
 69 | 
 70 |     def test_collects_root_path_when_no_files_or_dirs(self):
 71 |         conn = get_mock_connection(files=[], dirs=[])
 72 |         result = collector.get_path_metadata(conn, "/some/path")
 73 |         assert "/some/path" in result["dirs"]
 74 | 
 75 | 
 76 | class TestCollectPaths(object):
 77 | 
 78 |     @pytest.mark.parametrize(
 79 |         'path',
 80 |         ['/etc/ceph', '/var/lib/ceph', '/var/run/ceph'],
 81 |     )
 82 |     def test_includes_paths(self, path, monkeypatch):
 83 |         def mock_metadata(conn, p, **kw):
 84 |             return dict()
 85 |         monkeypatch.setattr(collector, 'get_path_metadata', mock_metadata)
 86 |         result = collector.collect_paths(Mock())
 87 |         assert path in result
 88 | 
 89 | 
 90 | class TestCollectSocketInfo(object):
 91 | 
 92 |     def tests_collects_sockets(self, monkeypatch):
 93 |         monkeypatch.setattr(collector.remote.commands, 'ceph_socket_version', lambda conn, socket: dict())
 94 |         monkeypatch.setattr(collector.remote.commands, 'daemon_socket_config', lambda conn, socket: dict())
 95 |         metadata = {
 96 |             'paths': {
 97 |                 '/var/run/ceph': {'files': ['/var/run/ceph/osd.asok']},
 98 |             },
 99 |         }
100 |         result = collector.collect_socket_info(Mock(), metadata)
101 |         assert '/var/run/ceph/osd.asok' in result
102 | 
103 |     def test_ignores_unknown_files(self, monkeypatch):
104 |         monkeypatch.setattr(collector.remote.commands, 'ceph_socket_version', lambda conn, socket: dict())
105 |         monkeypatch.setattr(collector.remote.commands, 'daemon_socket_config', lambda conn, socket: dict())
106 |         metadata = {
107 |             'paths': {
108 |                 '/var/run/ceph': {'files': ['/var/run/ceph/osd.asok', '/var/run/ceph/osd.log']},
109 |             },
110 |         }
111 |         result = collector.collect_socket_info(Mock(), metadata)
112 |         assert '/var/run/ceph/osd.log' not in result
113 | 
114 | 
115 | class TestCollect(object):
116 | 
117 |     def test_ignores_unknown_group(self):
118 |         metadata["nodes"] = dict(test=[])
119 |         # raises a RuntimeError because all nodes fail to connect
120 |         with pytest.raises(RuntimeError):
121 |             collector.collect()
122 | 
123 |     def test_collects_node_metadata(self, monkeypatch):
124 |         metadata["nodes"] = {
125 |             "mons": [{"host": "mon0"}],
126 |             "osds": [{"host": "osd0"}],
127 |         }
128 |         metadata["cluster_name"] = "ceph"
129 |         def mock_metadata(conn, hostname, cluster_nodes):
130 |             return dict(meta="data")
131 |         monkeypatch.setattr(collector, "get_connection",
132 |                             lambda host, container=None: Mock())
133 |         monkeypatch.setattr(collector, "get_node_metadata", mock_metadata)
134 |         monkeypatch.setattr(collector, "collect_cluster", lambda x: {})
135 |         collector.collect()
136 |         assert "mon0" in metadata["mons"]
137 |         assert "meta" in metadata["mons"]["mon0"]
138 | 
139 | 
140 | class TestGetNodeMetadata(object):
141 | 
142 |     @pytest.mark.parametrize(
143 |         'key',
144 |         ['ceph', 'devices', 'paths', 'network',],
145 |     )
146 |     def test_collects_metadata(self, key, monkeypatch):
147 |         def mock_metadata(*args, **kwargs):
148 |             return dict(meta="data")
149 |         monkeypatch.setattr(collector, "collect_devices", mock_metadata)
150 |         monkeypatch.setattr(collector, "collect_paths", mock_metadata)
151 |         monkeypatch.setattr(collector, "collect_network", mock_metadata)
152 |         monkeypatch.setattr(collector, "collect_ceph_info", mock_metadata)
153 |         monkeypatch.setattr(collector, "collect_socket_info", mock_metadata)
154 |         monkeypatch.setattr(collector, "collect_ceph_osd_info", mock_metadata)
155 |         result = collector.get_node_metadata(Mock(), "mon0", [])
156 |         assert key in result
157 | 


--------------------------------------------------------------------------------
/ceph_medic/remote/functions.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import grp
  3 | import pwd
  4 | import traceback
  5 | import sys
  6 | import subprocess
  7 | 
  8 | 
  9 | # Utilities
 10 | #
 11 | def capture_exception(error):
 12 |     details = {'attributes': {}}
 13 |     details['name'] = error.__class__.__name__
 14 |     details['repr'] = str(error)
 15 |     exc_type, exc_value, exc_traceback = sys.exc_info()
 16 |     details['traceback'] = ''.join(traceback.format_exception(exc_type, exc_value, exc_traceback))
 17 |     for attr in dir(error):
 18 |         if not attr.startswith('__'):
 19 |             try:
 20 |                 details['attributes'][attr] = str(getattr(error, attr))
 21 |             except Exception:
 22 |                 # getting an exception here is entirely possible, and since
 23 |                 # there is no remote logging there is nothing we can do other
 24 |                 # than eat it up. This section is going through each of the
 25 |                 # attributes of the exception raised so it is mildly acceptable
 26 |                 # to skip if anything is breaking
 27 |                 details['attributes'][attr] = None
 28 |     return details
 29 | 
 30 | 
 31 | def decoded(string):
 32 |     try:
 33 |         return string.decode('utf-8')
 34 |     except AttributeError:
 35 |         return string
 36 | 
 37 | 
 38 | # Paths
 39 | #
 40 | def stat_path(path, skip_dirs=None, skip_files=None, get_contents=False):
 41 |     """stat a path on a remote host"""
 42 |     # Capture all information about a path, optionally getting the contents of
 43 |     # the remote path if it is a file. Exceptions get appended to each dictionary
 44 |     # object associated with the path
 45 | 
 46 |     # .. note:: Neither ``skip_dirs`` nor ``skip_files`` is used here, but the
 47 |     # remote execution of functions use name-based arguments which does not allow
 48 |     # the use of ``**kw``
 49 |     metadata = {u'exception': {}}
 50 |     path = decoded(path)
 51 |     try:
 52 |         stat_info = os.stat(path)
 53 |         if get_contents and os.path.isfile(path):
 54 |             with open(path, 'r') as opened_file:
 55 |                 metadata[u'contents'] = decoded(opened_file.read())
 56 |     except Exception as error:
 57 |         return {'exception': capture_exception(error)}
 58 | 
 59 |     allowed_attrs = [
 60 |         'n_fields', 'n_sequence_fields', 'n_unnamed_fields', 'st_atime',
 61 |         'st_blksize', 'st_blocks', 'st_ctime', 'st_dev', 'st_gid', 'st_ino',
 62 |         'st_mode', 'st_mtime', 'st_nlink', 'st_rdev', 'st_size', 'st_uid'
 63 |     ]
 64 | 
 65 |     # get all the stat results back into the metadata
 66 |     for attr in dir(stat_info):
 67 |         attr = decoded(attr)
 68 |         if attr in allowed_attrs:
 69 |             value = decoded(getattr(stat_info, attr))
 70 |             metadata[attr] = value
 71 | 
 72 |     # translate the owner and group:
 73 |     try:
 74 |         metadata[u'owner'] = decoded(pwd.getpwuid(stat_info.st_uid)[0])
 75 |     except KeyError:
 76 |         metadata[u'owner'] = stat_info.st_uid
 77 |     try:
 78 |         metadata[u'group'] = decoded(grp.getgrgid(stat_info.st_gid)[0])
 79 |     except KeyError:
 80 |         metadata[u'group'] = stat_info.st_gid
 81 | 
 82 |     return metadata
 83 | 
 84 | 
 85 | def path_tree(path, skip_dirs=None, skip_files=None, get_contents=None):
 86 |     """generate a path tree"""
 87 |     # Generate a tree of paths, including directories and files, recursively, but
 88 |     # with the ability to exclude dirs and files with ``skip_dirs`` and
 89 |     # ``skip_files``.
 90 |     # The tree output groups the files and directories like::
 91 | 
 92 |     #     {
 93 |     #         'path': '/etc/ceph',
 94 |     #         'dirs': ['/etc/ceph/ceph.d/'],
 95 |     #         'files': ['/etc/ceph/ceph.d/test.conf', '/etc/ceph/rbdmap']
 96 |     #     }
 97 | 
 98 |     # .. note:: ``get_contents`` is not used here, but the remote execution of functions
 99 |     # use name-based arguments which does not allow the use of ``**kw``
100 |     try:
101 |         path = path.decode('utf-8')
102 |     except AttributeError:
103 |         pass
104 |     skip_files = skip_files or []
105 |     skip_dirs = skip_dirs or []
106 |     files = []
107 |     dirs = []
108 |     # traverse for files and directories, topdown allows us to trim the
109 |     # directories on the fly
110 |     for root, _dirs, _files in os.walk(path, topdown=True):
111 |         _dirs[:] = [d for d in _dirs if d not in skip_dirs]
112 |         for _file in _files:
113 |             absolute_path = os.path.join(root, _file)
114 |             if _file in skip_files:
115 |                 continue
116 |             files.append(absolute_path)
117 | 
118 |         for _dir in _dirs:
119 |             absolute_path = os.path.join(root, _dir)
120 |             dirs.append(absolute_path)
121 | 
122 |     # using the 'u' prefix forces python3<->python2 compatibility otherwise the
123 |     # keys would be bytes, regardless if input is a str which should've forced
124 |     # a 'str' behavior. The prefix is invalid syntax for Python 3.0 to 3.2, so
125 |     # this will be valid in Python 3.3 and newer and Python 2
126 |     return {u'path': path, u'dirs': dirs, u'files': files}
127 | 
128 | 
129 | def which(executable):
130 |     """find the location of an executable"""
131 |     locations = (
132 |         '/usr/local/bin',
133 |         '/bin',
134 |         '/usr/bin',
135 |         '/usr/local/sbin',
136 |         '/usr/sbin',
137 |         '/sbin',
138 |     )
139 | 
140 |     for location in locations:
141 |         executable_path = os.path.join(location, executable)
142 |         if os.path.exists(executable_path):
143 |             return executable_path
144 | 
145 | 
146 | def run(command):
147 |     """
148 |     run a command, return stdout, stderr, and exit code.
149 |     """
150 |     process = subprocess.Popen(
151 |         command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True
152 |     )
153 |     stdout = process.stdout.read().splitlines()
154 |     stderr = process.stderr.read().splitlines()
155 |     returncode = process.wait()
156 | 
157 |     return stdout, stderr, returncode
158 | 
159 | 
160 | # remoto magic, needed to execute these functions remotely
161 | if __name__ == '__channelexec__':
162 |     for item in channel:  # noqa
163 |         channel.send(eval(item))  # noqa
164 | 


--------------------------------------------------------------------------------
/ceph_medic/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import random
  3 | from ceph_medic import runner
  4 | import ceph_medic
  5 | from ceph_medic.tests import base_metadata
  6 | 
  7 | 
  8 | class FakeWriter(object):
  9 | 
 10 |     def __init__(self):
 11 |         self.calls = []
 12 |         self.write = self.raw
 13 |         self.loader = self
 14 | 
 15 |     def raw(self, string):
 16 |         self.calls.append(string)
 17 | 
 18 |     def bold(self, string):
 19 |         self.calls.append(string)
 20 | 
 21 |     def get_output(self):
 22 |         return '\n'.join(self.calls)
 23 | 
 24 | 
 25 | @pytest.fixture(scope='class', autouse=True)
 26 | def clear_metadata():
 27 |     ceph_medic.metadata = base_metadata
 28 | 
 29 | 
 30 | @pytest.fixture
 31 | def mon_keyring():
 32 |     def make_keyring(default=False):
 33 |         if default:
 34 |             key = "AQBvaBFZAAAAABAA9VHgwCg3rWn8fMaX8KL01A=="
 35 |         else:
 36 |             key = "%032x==" % random.getrandbits(128)
 37 | 
 38 |         return """
 39 |     [mon.]
 40 |         key = %s
 41 |             caps mon = "allow *"
 42 |         """ % key
 43 |     return make_keyring
 44 | 
 45 | 
 46 | @pytest.fixture
 47 | def terminal(monkeypatch):
 48 |     fake_writer = FakeWriter()
 49 |     monkeypatch.setattr(runner.terminal, 'write', fake_writer)
 50 |     return fake_writer
 51 | 
 52 | 
 53 | @pytest.fixture
 54 | def data():
 55 |     """
 56 |     Default data structure for remote nodes
 57 |     """
 58 |     def _data():
 59 |         return {
 60 |             'ceph': {'installed': True, 'version': '12.2.1', 'sockets':{}},
 61 |             'paths': {
 62 |                 '/etc/ceph': {'files': {}, 'dirs': {}},
 63 |                 '/var/lib/ceph': {'files': {}, 'dirs': {}},
 64 |             }
 65 |         }
 66 |     return _data
 67 | 
 68 | 
 69 | @pytest.fixture
 70 | def make_data(data, **kw):
 71 |     """
 72 |     Customize basic data structure on remote nodes
 73 |     """
 74 |     def update(dictionary=None):
 75 |         base = data()
 76 |         if not dictionary:
 77 |             return base
 78 |         base.update(dictionary)
 79 |         return base
 80 |     return update
 81 | 
 82 | 
 83 | @pytest.fixture
 84 | def make_nodes():
 85 |     """
 86 |     Helper to generate nodes for daemons
 87 |     """
 88 |     def make_data(**kw):
 89 |         """
 90 |         ``kw`` is expected to be a mapping between daemon name and hosts for
 91 |         that daemon, like::
 92 | 
 93 |             make_data(mons=['node1', 'node2']
 94 |         """
 95 |         # default set of nodes
 96 |         data = dict(
 97 |             (k, {}) for k in ['rgws', 'mgrs', 'mdss', 'clients', 'osds', 'mons']
 98 |         )
 99 |         for daemon, node_names in kw.items():
100 |             data[daemon] = [dict(host=node_name) for node_name in node_names]
101 |         return data
102 |     return make_data
103 | 
104 | 
105 | class Capture(object):
106 | 
107 |     def __init__(self, *a, **kw):
108 |         self.a = a
109 |         self.kw = kw
110 |         self.calls = []
111 |         self.return_values = kw.get('return_values', False)
112 |         self.always_returns = kw.get('always_returns', False)
113 | 
114 |     def __call__(self, *a, **kw):
115 |         self.calls.append({'args': a, 'kwargs': kw})
116 |         if self.always_returns:
117 |             return self.always_returns
118 |         if self.return_values:
119 |             return self.return_values.pop()
120 | 
121 | 
122 | class Factory(object):
123 | 
124 |     def __init__(self, **kw):
125 |         for k, v in kw.items():
126 |             setattr(self, k, v)
127 | 
128 | 
129 | @pytest.fixture
130 | def factory():
131 |     return Factory
132 | 
133 | 
134 | @pytest.fixture
135 | def conn():
136 |     """
137 |     Useful when trying to pass a ``conn`` object around that will porbably want
138 |     to log output
139 |     """
140 |     log = lambda x: x
141 |     logger = Factory(error=log, exception=log)
142 |     return Factory(logger=logger)
143 | 
144 | 
145 | @pytest.fixture
146 | def capture():
147 |     return Capture()
148 | 
149 | 
150 | @pytest.fixture
151 | def fake_run(monkeypatch):
152 |     fake_run = Capture()
153 |     monkeypatch.setattr('remoto.process.run', fake_run)
154 |     return fake_run
155 | 
156 | 
157 | @pytest.fixture
158 | def fake_check(monkeypatch):
159 |     fake_call = Capture(always_returns=([], [], 0))
160 |     monkeypatch.setattr('remoto.process.check', fake_call)
161 |     return fake_call
162 | 
163 | 
164 | @pytest.fixture
165 | def stub_check(monkeypatch):
166 |     """
167 |     Monkeypatches process.check, so that a caller can add behavior to the
168 |     response
169 |     """
170 |     def apply(return_values, module=None, string_module='remoto.process.check'):
171 |         """
172 |         ``return_values`` should be a tuple of 3 elements: stdout, stderr, and
173 |         code. This should mimic the ``check()`` return values. For example::
174 | 
175 |             (['stdout'], ['stderr'], 0)
176 | 
177 |         Each item in the stdout or stderr lists represents a line.
178 |         Additionally, if more than one response is wanted, a list with multiple
179 |         tuples can be provided::
180 | 
181 | 
182 |             [
183 |                 (['output'], [], 0),
184 |                 ([], ['error condition'], 1),
185 |                 (['output'], [], 0),
186 |             ]
187 | 
188 |         When patching, most of the time the default ``string_module`` will be
189 |         fine, but if it is required to patch an actual module with the added
190 |         string, then it is possible to use them accordingly: whne the module is
191 |         set, the call to ``monkeypatch`` will use both like::
192 | 
193 |             monkeypatch.setattr(module, 'function', value)
194 | 
195 |         Otherwise it will just patch it like::
196 | 
197 |             monkeypatch.setattr('remoto.process.check', value)
198 | 
199 |         """
200 |         if isinstance(return_values, tuple):
201 |             return_values = [return_values]
202 |         stubbed_call = Capture(return_values=return_values)
203 |         if module:
204 |             monkeypatch.setattr(module, string_module, stubbed_call)
205 |         else:
206 |             monkeypatch.setattr(string_module, stubbed_call)
207 |         return stubbed_call
208 | 
209 |     return apply
210 | 
211 | 
212 | @pytest.fixture(autouse=True)
213 | def reset_file_config(request, monkeypatch):
214 |     """
215 |     The globally available ``ceph_medic.config.file`` might get mangled in
216 |     tests, make sure that after evert test, it gets reset, preventing pollution
217 |     going into other tests later.
218 |     """
219 |     def fin():
220 |         ceph_medic.config.file = ceph_medic.UnloadedConfig()
221 |     request.addfinalizer(fin)
222 | 


--------------------------------------------------------------------------------
/ceph_medic/main.py:
--------------------------------------------------------------------------------
  1 | from ceph_medic import check, log
  2 | import sys
  3 | import os
  4 | from textwrap import dedent
  5 | from tambo import Transport
  6 | from execnet.gateway_bootstrap import HostNotFound
  7 | import ceph_medic
  8 | from ceph_medic.decorators import catches
  9 | from ceph_medic.util import configuration, hosts
 10 | from ceph_medic import terminal
 11 | 
 12 | 
 13 | class Medic(object):
 14 |     _help = """
 15 | ceph-medic: A utility to run system checks on a Ceph cluster.
 16 | 
 17 | Version: {version}
 18 | 
 19 | Global Options:
 20 |   --config              Path to a specific configuration file. Overrides the default:
 21 |                         $HOME/.cephmedic.conf.
 22 |   --cluster             Use a specific cluster name (defaults to 'ceph'). Alternatively,
 23 |                         this is inferred from a conf file name in /etc/ceph/
 24 |   --ssh-config          Specify an alternate configuration for SSH
 25 |   --version, version    Shows the current installed version
 26 |   --inventory           Prefer a ceph-ansible inventory (hosts) file instead of default
 27 |                         (cwd, /etc/ansible/hosts) locations
 28 |   --verbosity		Set verbosity level of logging output
 29 | 
 30 | {sub_help}
 31 | 
 32 | {config_path_header}: {config_path}
 33 | {hosts_file_header}: {hosts_file}
 34 | {configured_nodes}
 35 |     """
 36 |     mapper = {
 37 |         'check': check.Check,
 38 |         # TODO: this needs a bit more work, disabling for now
 39 |         #'generate': generate.Generate,
 40 |     }
 41 | 
 42 |     def __init__(self, argv=None, parse=True):
 43 |         if argv is None:
 44 |             argv = sys.argv
 45 |         if parse:
 46 |             self.main(argv)
 47 | 
 48 |     def help(self, sub_help=None):
 49 |         if self.hosts_file is None:
 50 |             hosts_file_header = terminal.red('Loaded Inventory Hosts file')
 51 |             hosts_file = 'No hosts file found in cwd, /etc/ansible/, or configured'
 52 |         else:
 53 |             hosts_file_header = terminal.green('Loaded Inventory Hosts file')
 54 |             hosts_file = self.hosts_file
 55 |         return self._help.format(
 56 |             version=ceph_medic.__version__,
 57 |             config_path=self.config_path,
 58 |             config_path_header=terminal.green('Loaded Config Path'),
 59 |             hosts_file=hosts_file,
 60 |             hosts_file_header=hosts_file_header,
 61 |             sub_help=sub_help,
 62 |             configured_nodes=self.configured_nodes
 63 |         )
 64 | 
 65 |     @property
 66 |     def configured_nodes(self):
 67 |         _help = dedent("""
 68 |             Configured nodes (loaded from inventory hosts file):
 69 |               OSDs: {osd_node_count}
 70 |               MONs: {mon_node_count}
 71 |               MGRs: {mgr_node_count}
 72 |               MDSs: {mds_node_count}
 73 |               RGWs: {rgw_node_count}""")
 74 |         if self.hosts_file:  # we have nodes that have been loaded
 75 |             nodes = ceph_medic.config.nodes
 76 |             return _help.format(
 77 |                 osd_node_count=len(nodes.get('osds', [])),
 78 |                 mon_node_count=len(nodes.get('mons', [])),
 79 |                 mds_node_count=len(nodes.get('mdss', [])),
 80 |                 mgr_node_count=len(nodes.get('mgrs', [])),
 81 |                 rgw_node_count=len(nodes.get('rgws', []))
 82 |             )
 83 |         return ''
 84 | 
 85 |     @catches((RuntimeError, KeyboardInterrupt, HostNotFound))
 86 |     def main(self, argv):
 87 |         options = [
 88 |             '--cluster', '--ssh-config', '--inventory',
 89 |             '--config', '--verbosity',
 90 |         ]
 91 |         parser = Transport(
 92 |             argv, options=options,
 93 |             check_help=False,
 94 |             check_version=False
 95 |         )
 96 |         parser.parse_args()
 97 | 
 98 |         self.config_path = parser.get('--config', configuration.location())
 99 | 
100 |         # load medic configuration
101 |         loaded_config = configuration.load(path=parser.get('--config', self.config_path))
102 | 
103 |         # this is the earliest we can have enough config to setup logging
104 |         log.setup(loaded_config)
105 |         ceph_medic.config.file = loaded_config
106 |         global_options = dict(ceph_medic.config.file._sections['global'])
107 | 
108 |         # SSH config
109 |         ceph_medic.config.ssh_config = parser.get('--ssh-config', global_options.get('--ssh-config'))
110 |         if ceph_medic.config.ssh_config:
111 |             ssh_config_path = ceph_medic.config.ssh_config
112 |             if not os.path.exists(ssh_config_path):
113 |                 terminal.error("the given ssh config path does not exist: %s" % ssh_config_path)
114 |                 sys.exit()
115 | 
116 |         ceph_medic.config.cluster_name = parser.get('--cluster', 'ceph')
117 |         ceph_medic.metadata['cluster_name'] = 'ceph'
118 | 
119 |         # Deployment Type
120 |         deployment_type = ceph_medic.config.file.get_safe('global', 'deployment_type', 'baremetal')
121 |         if deployment_type in ['kubernetes', 'openshift', 'k8s', 'oc']:
122 |             pod_hosts = hosts.container_platform(deployment_type)
123 |             ceph_medic.config.nodes = pod_hosts
124 |             ceph_medic.config.hosts_file = ':memory:'
125 |             self.hosts_file = ':memory:'
126 |         else:
127 |             # Hosts file
128 |             self.hosts_file = parser.get('--inventory', configuration.get_host_file())
129 | 
130 |             # find the hosts files, by the CLI first, fallback to the configuration
131 |             # file, and lastly if none of those are found or defined, try to load
132 |             # from well known locations (cwd, and /etc/ansible/)
133 |             loaded_hosts = configuration.load_hosts(
134 |                 parser.get('--inventory',
135 |                            global_options.get('--inventory', self.hosts_file)))
136 |             ceph_medic.config.nodes = loaded_hosts.nodes
137 |             ceph_medic.config.hosts_file = loaded_hosts.filename
138 |             self.hosts_file = loaded_hosts.filename
139 | 
140 |             if deployment_type in ['docker', 'podman']:
141 |                 ceph_medic.config.nodes = hosts.basic_containers(
142 |                         deployment_type)
143 | 
144 |         parser.catch_version = ceph_medic.__version__
145 |         parser.mapper = self.mapper
146 |         parser.catch_help = self.help(parser.subhelp())
147 |         if len(argv) <= 1:
148 |             return parser.print_help()
149 |         ceph_medic.config.config_path = self.config_path
150 |         parser.dispatch()
151 |         parser.catches_help()
152 |         parser.catches_version()
153 | 
154 |         # Verbosity
155 |         verbosity = parser.get('--verbosity', 'debug')
156 |         ceph_medic.config.verbosity = verbosity.lower()
157 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
  1 | .. ceph-medic documentation master file, created by
  2 |    sphinx-quickstart on Tue Jun 27 14:32:23 2017.
  3 |    You can adapt this file completely to your liking, but it should at least
  4 |    contain the root `toctree` directive.
  5 | 
  6 | =================================================
  7 | Introduction
  8 | =================================================
  9 | 
 10 | ``ceph-medic`` is a very simple tool that runs against a Ceph cluster to detect
 11 | common issues that might prevent correct functionality. It requires
 12 | non-interactive SSH access to accounts that can ``sudo`` without a password
 13 | prompt.
 14 | 
 15 | Usage
 16 | =====
 17 | 
 18 | The basic usage of ``ceph-medic`` is to perform checks against a ceph cluster
 19 | to identify potential issues with its installation or configuration. To do
 20 | this, run the following command::
 21 | 
 22 |     ceph-medic --inventory /path/to/hosts --ssh-config /path/to/ssh_config check
 23 | 
 24 | Inventory
 25 | ---------
 26 | ``ceph-medic`` needs to know the nodes that exist in your ceph cluster before
 27 | it can perform checks. The inventory (or ``hosts`` file) is a typical Ansible
 28 | inventory file and will be used to inform ``ceph-medic`` of the nodes in your
 29 | cluster and their respective roles.  The following standard host groups are
 30 | supported by ``ceph-medic``: ``mons``, ``osds``, ``rgws``, ``mdss``, ``mgrs``
 31 | and ``clients``.  An example ``hosts`` file would look like::
 32 | 
 33 |     [mons]
 34 |     mon0
 35 |     mon1
 36 | 
 37 |     [osds]
 38 |     osd0
 39 | 
 40 |     [mgrs]
 41 |     mgr0
 42 | 
 43 | The location of the ``hosts`` file can be passed into ``ceph-medic`` by using
 44 | the ``--inventory`` cli option (e.g ``ceph-medic --inventory /path/to/hosts``).
 45 | 
 46 | If the ``--inventory`` option is not defined, ``ceph-medic`` will first look in
 47 | the current working directory for a file named ``hosts``. If the file does not
 48 | exist, it will look for ``/etc/ansible/hosts`` to be used as the inventory.
 49 | 
 50 | .. note:: Defining the inventory location is also possible via the config file
 51 |           under the ``[global]`` section.
 52 | 
 53 | 
 54 | Inventory for Containers
 55 | ------------------------
 56 | Containerized deployments are also supported, via ``docker`` and ``podman``.
 57 | As with ``baremetal`` deployments, an inventory file is required. If the
 58 | cluster was deployed with ``ceph-ansible``, you may use that existing
 59 | inventory.
 60 | 
 61 | To configure ceph-medic to connect to a containerized cluster, the glocal section of the
 62 | configuration needs to define ``deployment_type`` to either ``docker`` or
 63 | ``podman``. For example::
 64 | 
 65 |     [global]
 66 | 
 67 |     deployment_type = podman
 68 | 
 69 | 
 70 | Inventory for Container Platforms
 71 | ---------------------------------
 72 | Both ``kubernetes`` and ``openshift`` platforms can host containers remotely,
 73 | but do allow to connect and retrieve information from a central location. 
 74 | To configure ceph-medic to connect to a platform, the glocal section of the
 75 | configuration needs to define ``deployment_type`` to either ``kubernetes``, which
 76 | uses the ``kubectl`` command, or ``openshift``, which uses the ``oc`` command. For example::
 77 | 
 78 |     [global]
 79 | 
 80 |     deployment_type = openshift
 81 | 
 82 | 
 83 | When using ``openshift`` or ``kubernetes`` as a deployment type, there is no
 84 | requirement to define a ``hosts`` file. The hosts are generated dynamically by
 85 | calling out to the platform and retrieving the pods. When the pods are
 86 | identified, they are grouped by deamon type (osd, mgr, rgw, mon, etc...).
 87 | 
 88 | SSH Config
 89 | ----------
 90 | 
 91 | All nodes in your ``hosts`` file must be configured to provide non-interactive
 92 | SSH access to accounts that can ``sudo`` without a password prompt.
 93 | 
 94 | .. note::
 95 |    This is the same ssh config required by ansible. If you've used ``ceph-ansible`` to deploy your
 96 |    cluster then your nodes are most likely already configured for this type of ssh access. If that
 97 |    is the case, using the same user that performed the initial deployment would be easiest.
 98 | 
 99 | To provide your ssh config you must use the ``--ssh-config`` flag and give it
100 | a path to a file that defines your ssh configuration. For example, a file like
101 | this is used to connect with a cluster comprised of vagrant vms::
102 | 
103 |     Host mon0
104 |       HostName 127.0.0.1
105 |       User vagrant
106 |       Port 2200
107 |       UserKnownHostsFile /dev/null
108 |       StrictHostKeyChecking no
109 |       PasswordAuthentication no
110 |       IdentityFile /Users/andrewschoen/.vagrant.d/insecure_private_key
111 |       IdentitiesOnly yes
112 |       LogLevel FATAL
113 | 
114 |     Host osd0
115 |       HostName 127.0.0.1
116 |       User vagrant
117 |       Port 2201
118 |       UserKnownHostsFile /dev/null
119 |       StrictHostKeyChecking no
120 |       PasswordAuthentication no
121 |       IdentityFile /Users/andrewschoen/.vagrant.d/insecure_private_key
122 |       IdentitiesOnly yes
123 |       LogLevel FATAL
124 | 
125 | 
126 | .. note:: SSH configuration is not needed when using ``kubernetes`` or
127 |           ``openshift``
128 | 
129 | 
130 | Logging
131 | -------
132 | 
133 | By default ``ceph-medic`` sends complete logs to the current working directory.
134 | This log file is more verbose than the output displayed on the terminal. To
135 | change where these logs are created, modify the default value for ``--log-path``
136 | in ``~/.cephmedic.conf``.
137 | 
138 | Running checks
139 | --------------
140 | 
141 | To perform checks against your cluster use the ``check`` subcommand. This will
142 | perform a series of general checks, as well as checks specific to each daemon.
143 | Sample output from this command will look like::
144 | 
145 |     ceph-medic --ssh-config vagrant_ssh_config check
146 |     Host: mgr0                  connection: [connected  ]
147 |     Host: mon0                  connection: [connected  ]
148 |     Host: osd0                  connection: [connected  ]
149 |     Collection completed!
150 | 
151 |     =======================  Starting remote check session  ========================
152 |     Version: 0.0.1    Cluster Name: "test"
153 |     Total hosts: [3]
154 |     OSDs:    1    MONs:    1     Clients:    0
155 |     MDSs:    0    RGWs:    0     MGRs:       1
156 | 
157 |     ================================================================================
158 | 
159 |     ---------- managers ----------
160 |      mgr0
161 | 
162 |     ------------ osds ------------
163 |      osd0
164 | 
165 |     ------------ mons ------------
166 |      mon0
167 | 
168 |     17 passed, 0 errors, on 4 hosts
169 | 
170 | 
171 | The logging can also be configured in the ``cephmedic.conf`` file in the global
172 | section::
173 | 
174 |     [global]
175 |     --log-path = .
176 | 
177 | To ensure that cluster checks run properly, at least one monitor node should have administrative privileges.
178 | 


--------------------------------------------------------------------------------
/ceph_medic/tests/test_runner.py:
--------------------------------------------------------------------------------
  1 | import ceph_medic
  2 | from ceph_medic import runner
  3 | from ceph_medic.tests import base_metadata
  4 | from textwrap import dedent
  5 | from ceph_medic.util import configuration
  6 | 
  7 | 
  8 | class TestRunner(object):
  9 | 
 10 |     def setup(self):
 11 |         runner.metadata = base_metadata
 12 | 
 13 |     def teardown(self):
 14 |         runner.metadata = base_metadata
 15 | 
 16 |     def test_calculate_total_hosts_is_0(self):
 17 |         run = runner.Runner()
 18 |         assert run.total_hosts == 0
 19 | 
 20 |     def test_calculate_hosts_single_daemon_type(self):
 21 |         ceph_medic.metadata['nodes']['osds'] = [{'host': 'node1'},{'host': 'node2'}]
 22 |         runner.metadata = ceph_medic.metadata
 23 |         run = runner.Runner()
 24 |         assert run.total_hosts == 2
 25 | 
 26 |     def test_count_from_different_daemon_types(self):
 27 |         ceph_medic.metadata['nodes']['osds'] = [{'host': 'node1'},{'host': 'node2'}]
 28 |         ceph_medic.metadata['nodes']['mons'] = [{'host': 'node3'},{'host': 'node4'}]
 29 |         runner.metadata = ceph_medic.metadata
 30 |         run = runner.Runner()
 31 |         assert run.total_hosts == 4
 32 | 
 33 | 
 34 | class TestReport(object):
 35 | 
 36 |     def setup(self):
 37 |         runner.metadata = base_metadata
 38 |         runner.metadata['nodes'] = {}
 39 |         self.results = runner.Runner()
 40 | 
 41 |     def test_reports_unhandled_internal_errors(self, terminal):
 42 |         self.results.internal_errors = ['I am an error']
 43 |         runner.report(self.results)
 44 |         assert 'While running checks, ceph-medic had 1 unhandled errors' in terminal.calls[-1]
 45 | 
 46 |     def test_reports_no_errors(self, terminal):
 47 |         runner.report(self.results)
 48 |         assert terminal.calls[0] == '\n0 passed, on 0 hosts'
 49 | 
 50 |     def test_reports_warning(self, terminal):
 51 |         self.results.warnings = 1
 52 |         runner.report(self.results)
 53 |         assert terminal.calls[0] == '\n0 passed, 1 warning, on 0 hosts'
 54 | 
 55 |     def test_reports_warnings(self, terminal):
 56 |         self.results.warnings = 2
 57 |         runner.report(self.results)
 58 |         assert terminal.calls[0] == '\n0 passed, 2 warnings, on 0 hosts'
 59 | 
 60 |     def test_reports_error(self, terminal):
 61 |         self.results.errors = 1
 62 |         runner.report(self.results)
 63 |         assert terminal.calls[0] == '\n0 passed, 1 error, on 0 hosts'
 64 | 
 65 |     def test_reports_errors(self, terminal):
 66 |         self.results.errors = 2
 67 |         runner.report(self.results)
 68 |         assert terminal.calls[0] == '\n0 passed, 2 errors, on 0 hosts'
 69 | 
 70 |     def test_reports_error_and_warning(self, terminal):
 71 |         self.results.errors = 1
 72 |         self.results.warnings = 1
 73 |         runner.report(self.results)
 74 |         assert terminal.calls[0] == '\n0 passed, 1 error, 1 warning, on 0 hosts'
 75 | 
 76 |     def test_reports_errors_and_warnings(self, terminal):
 77 |         self.results.errors = 2
 78 |         self.results.warnings = 2
 79 |         runner.report(self.results)
 80 |         assert terminal.calls[0] == '\n0 passed, 2 errors, 2 warnings, on 0 hosts'
 81 | 
 82 |     def test_reports_internal_errors(self, terminal):
 83 |         self.results.internal_errors = ['error 1', 'error 2']
 84 |         self.results.warnings = 2
 85 |         runner.report(self.results)
 86 |         assert terminal.calls[0] == '\n0 passed, 2 warnings, 2 internal errors, on 0 hosts'
 87 | 
 88 | 
 89 | class TestReportBasicOutput(object):
 90 | 
 91 |     def setup(self):
 92 |         contents = dedent("""
 93 |         [global]
 94 |         #
 95 |         """)
 96 |         conf = configuration.load_string(contents)
 97 |         ceph_medic.config.file = conf
 98 |         runner.metadata = base_metadata
 99 |         runner.metadata['cluster_name'] = 'ceph'
100 |         runner.Runner().run()
101 | 
102 |     def teardown(self):
103 |         runner.metadata = base_metadata
104 | 
105 |     def test_has_version(self, terminal):
106 |         assert 'Version: ' in terminal.get_output()
107 | 
108 |     def test_has_cluster_name(self, terminal):
109 |         assert 'Cluster Name: "ceph"' in terminal.get_output()
110 | 
111 |     def test_has_no_hosts(self, terminal):
112 |         assert 'Total hosts: [0]' in terminal.get_output()
113 | 
114 |     def test_has_a_header(self, terminal):
115 |         assert '==  Starting remote check session  ==' in terminal.get_output()
116 | 
117 |     def test_has_no_OSDs(self, terminal):
118 |         assert 'OSDs:    0' in terminal.get_output()
119 | 
120 |     def test_has_no_MONs(self, terminal):
121 |         assert 'MONs:    0' in terminal.get_output()
122 | 
123 |     def test_has_no_Clients(self, terminal):
124 |         assert 'Clients:    0' in terminal.get_output()
125 | 
126 |     def test_has_no_MDSs(self, terminal):
127 |         assert 'MDSs:    0' in terminal.get_output()
128 | 
129 |     def test_has_no_MGRs(self, terminal):
130 |         assert 'MGRs:       0' in terminal.get_output()
131 | 
132 |     def test_has_no_RGWs(self, terminal):
133 |         assert 'RGWs:    0' in terminal.get_output()
134 | 
135 | 
136 | class TestReportErrors(object):
137 | 
138 |     def setup(self):
139 |         contents = dedent("""
140 |         [global]
141 |         #
142 |         """)
143 |         conf = configuration.load_string(contents)
144 |         ceph_medic.config.file = conf
145 |         runner.metadata = base_metadata
146 |         runner.metadata['cluster_name'] = 'ceph'
147 |         runner.Runner().run()
148 | 
149 |     def teardown(self):
150 |         runner.metadata = base_metadata
151 | 
152 |     def test_get_new_lines_in_errors(self, terminal, mon_keyring, data, monkeypatch):
153 |         data_node1 = data()
154 |         data_node2 = data()
155 |         data_node1['paths']['/var/lib/ceph']['files'] = {
156 |             '/var/lib/ceph/mon/ceph-0/keyring': {'contents': mon_keyring()}
157 |         }
158 |         data_node1['paths']['/var/lib/ceph']['dirs'] = {
159 |             '/var/lib/ceph/osd/ceph-10': {},
160 |             '/var/lib/ceph/osd/ceph-11': {},
161 |             '/var/lib/ceph/osd/ceph-12': {},
162 |             '/var/lib/ceph/osd/ceph-13': {},
163 |             '/var/lib/ceph/osd/ceph-0': {},
164 |             '/var/lib/ceph/osd/ceph-1': {},
165 |             '/var/lib/ceph/osd/ceph-2': {},
166 |             '/var/lib/ceph/osd/ceph-3': {},
167 |         }
168 | 
169 |         data_node2['paths']['/var/lib/ceph']['files'] = {
170 |             '/var/lib/ceph/mon/ceph-1/keyring': {'contents': mon_keyring()},
171 |         }
172 |         data_node2['paths']['/var/lib/ceph']['dirs'] = {
173 |             '/var/lib/ceph/osd/ceph-10': {},
174 |             '/var/lib/ceph/osd/ceph-11': {},
175 |             '/var/lib/ceph/osd/ceph-12': {},
176 |             '/var/lib/ceph/osd/ceph-13': {},
177 |             '/var/lib/ceph/osd/ceph-0': {},
178 |             '/var/lib/ceph/osd/ceph-1': {},
179 |             '/var/lib/ceph/osd/ceph-2': {},
180 |             '/var/lib/ceph/osd/ceph-3': {},
181 |         }
182 | 
183 |         # set the data everywhere we need it
184 |         ceph_medic.metadata['mons'] = {'node1': data_node1, 'node2': data_node2}
185 |         monkeypatch.setattr(ceph_medic.checks.mons, 'metadata', ceph_medic.metadata)
186 | 
187 |         runner.Runner().run()
188 |         # Any line that is an error or a warning *must* end with a newline
189 |         for line in terminal.calls:
190 |             if line.lstrip().startswith(('E', 'W')):
191 |                 assert line.endswith('\n')
192 | 


--------------------------------------------------------------------------------
/docs/source/_themes/ceph/static/nature.css_t:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * nature.css_t
  3 |  * ~~~~~~~~~~~~
  4 |  *
  5 |  * Sphinx stylesheet -- nature theme.
  6 |  *
  7 |  * :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS.
  8 |  * :license: BSD, see LICENSE for details.
  9 |  *
 10 |  */
 11 |  
 12 | @import url("basic.css");
 13 |  
 14 | /* -- page layout ----------------------------------------------------------- */
 15 | 
 16 | @font-face {
 17 |     font-family: 'ApexSansMedium';
 18 |     src: url('font/ApexSans-Medium.eot');
 19 |     src: url('font/ApexSans-Medium.eot?#iefix') format('embedded-opentype'),
 20 |     url('font/ApexSans-Medium.woff') format('woff'),
 21 |     url('font/ApexSans-Medium.ttf') format('truetype'),
 22 |     url('font/ApexSans-Medium.svg#FontAwesome') format('svg');
 23 |     font-weight: normal;
 24 |     font-style: normal;
 25 | }
 26 | 
 27 | @font-face {
 28 |     font-family: 'ApexSansBook';
 29 |     src: url('font/ApexSans-Book.eot');
 30 |     src: url('font/ApexSans-Book.eot?#iefix') format('embedded-opentype'),
 31 |     url('font/ApexSans-Book.woff') format('woff'),
 32 |     url('font/ApexSans-Book.ttf') format('truetype'),
 33 |     url('font/ApexSans-Book.svg#FontAwesome') format('svg');
 34 |     font-weight: normal;
 35 |     font-style: normal;
 36 | }
 37 |  
 38 | body {
 39 |     font: 14px/1.4 Helvetica, Arial, sans-serif;
 40 |     background-color: #E6E8E8;
 41 |     color: #37424A;
 42 |     margin: 0;
 43 |     padding: 0;
 44 |     border-top: 5px solid #F05C56;
 45 | }
 46 | 
 47 | div.documentwrapper {
 48 |     float: left;
 49 |     width: 100%;
 50 | }
 51 | 
 52 | div.bodywrapper {
 53 |     margin: 0 0 0 330px;
 54 | }
 55 | 
 56 | hr {
 57 |     border: 1px solid #B1B4B6;
 58 | }
 59 |  
 60 | div.document {
 61 |     background-color: #ffffff;
 62 | }
 63 |  
 64 | div.body {
 65 |     background-color: #ffffff;
 66 |     color: #3E4349;
 67 |     padding: 0 30px 30px 30px;
 68 | }
 69 |  
 70 | div.footer {
 71 |     color: #222B31;
 72 |     width: 100%;
 73 |     padding: 13px 0;
 74 |     text-align: center;
 75 |     font-size: 75%;
 76 | }
 77 |  
 78 | div.footer a {
 79 |     color: #444;
 80 |     text-decoration: underline;
 81 | }
 82 |  
 83 | div.related {
 84 |     background-color: #80D2DC;
 85 |     line-height: 32px;
 86 |     color: #37424A;
 87 |     // text-shadow: 0px 1px 0 #444;
 88 |     font-size: 100%;
 89 |     border-top: #9C4850 5px solid;
 90 | }
 91 |  
 92 | div.related a {
 93 |     color: #37424A;
 94 |     text-decoration: none;
 95 | }
 96 | 
 97 | div.related a:hover {
 98 |     color: #fff;
 99 |     // text-decoration: underline;
100 | }
101 |  
102 | div.sphinxsidebar {
103 |     // font-size: 100%;
104 |     line-height: 1.5em;
105 |     width: 330px;
106 | }
107 | 
108 | div.sphinxsidebarwrapper{
109 |     padding: 20px 0;
110 |     background-color: #efefef;
111 | }
112 |  
113 | div.sphinxsidebar h3,
114 | div.sphinxsidebar h4 {
115 |     font-family: ApexSansMedium;
116 |     color: #e6e8e8;
117 |     font-size: 1.2em;
118 |     font-weight: normal;
119 |     margin: 0;
120 |     padding: 5px 10px;
121 |     background-color: #5e6a71;
122 |     // text-shadow: 1px 1px 0 white;
123 |     text-transform: uppercase;
124 | }
125 | 
126 | div.sphinxsidebar h4{
127 |     font-size: 1.1em;
128 | }
129 |  
130 | div.sphinxsidebar h3 a {
131 |     color: #e6e8e8;
132 | }
133 |  
134 |  
135 | div.sphinxsidebar p {
136 |     color: #888;
137 |     padding: 5px 20px;
138 | }
139 |  
140 | div.sphinxsidebar p.topless {
141 | }
142 |  
143 | div.sphinxsidebar ul {
144 |     margin: 10px 5px 10px 20px;
145 |     padding: 0;
146 |     color: #000;
147 | }
148 |  
149 | div.sphinxsidebar a {
150 |     color: #444;
151 | }
152 |  
153 | div.sphinxsidebar input {
154 |     border: 1px solid #ccc;
155 |     font-family: sans-serif;
156 |     font-size: 1em;
157 | }
158 | 
159 | div.sphinxsidebar input[type=text]{
160 |     margin-left: 20px;
161 | }
162 |  
163 | /* -- body styles ----------------------------------------------------------- */
164 |  
165 | a {
166 |     color: #F05C56;
167 |     text-decoration: none;
168 | }
169 |  
170 | a:hover {
171 |     color: #F05C56;
172 |     text-decoration: underline;
173 | }
174 |  
175 | div.body h1,
176 | div.body h2,
177 | div.body h3,
178 | div.body h4,
179 | div.body h5,
180 | div.body h6 {
181 |     // font-family: ApexSansMedium;
182 |     // background-color: #80D2DC;
183 |     // font-weight: normal;
184 |     // color: #37424a;
185 |     margin: 30px 0px 10px 0px;
186 |     padding: 5px 0 5px 0px;
187 |     // text-shadow: 0px 1px 0 white;
188 |     text-transform: uppercase;
189 | }
190 |  
191 | div.body h1 { font: 20px/2.0 ApexSansBook; color: #37424A; border-top: 20px solid white; margin-top: 0; }
192 | div.body h2 { font: 18px/1.8 ApexSansMedium; background-color: #5E6A71; color: #E6E8E8; padding: 5px 10px; }
193 | div.body h3 { font: 16px/1.6 ApexSansMedium; color: #37424A; }
194 | div.body h4 { font: 14px/1.4 Helvetica, Arial, sans-serif;  color: #37424A; }
195 | div.body h5 { font: 12px/1.2 Helvetica, Arial, sans-serif;  color: #37424A; }
196 | div.body h6 { font-size: 100%; color: #37424A; }
197 | 
198 | // div.body h2 { font-size: 150%; background-color: #E6E8E8; color: #37424A; }
199 | // div.body h3 { font-size: 120%; background-color: #E6E8E8; color: #37424A; }
200 | // div.body h4 { font-size: 110%; background-color: #E6E8E8; color: #37424A; }
201 | // div.body h5 { font-size: 100%; background-color: #E6E8E8; color: #37424A; }
202 | // div.body h6 { font-size: 100%; background-color: #E6E8E8; color: #37424A; }
203 |  
204 | a.headerlink {
205 |     color: #c60f0f;
206 |     font-size: 0.8em;
207 |     padding: 0 4px 0 4px;
208 |     text-decoration: none;
209 | }
210 |  
211 | a.headerlink:hover {
212 |     background-color: #c60f0f;
213 |     color: white;
214 | }
215 |  
216 | div.body p, div.body dd, div.body li {
217 |     line-height: 1.5em;
218 | }
219 |  
220 | div.admonition p.admonition-title + p {
221 |     display: inline;
222 | }
223 | 
224 | div.highlight{
225 |     background-color: white;
226 | }
227 | 
228 | div.note {
229 |     background-color: #e6e8e8;
230 |     border: 1px solid #ccc;
231 | }
232 |  
233 | div.seealso {
234 |     background-color: #ffc;
235 |     border: 1px solid #ff6;
236 | }
237 |  
238 | div.topic {
239 |     background-color: #efefef;
240 | }
241 |  
242 | div.warning {
243 |     background-color: #F05C56;
244 |     border: 1px solid #9C4850;
245 |     color: #fff;
246 | }
247 |  
248 | p.admonition-title {
249 |     display: inline;
250 | }
251 |  
252 | p.admonition-title:after {
253 |     content: ":";
254 | }
255 |  
256 | pre {
257 |     padding: 10px;
258 |     background-color: White;
259 |     color: #222;
260 |     line-height: 1.2em;
261 |     border: 1px solid #5e6a71;
262 |     font-size: 1.1em;
263 |     margin: 1.5em;
264 |     -webkit-box-shadow: 1px 1px 1px #e6e8e8;
265 |     -moz-box-shadow: 1px 1px 1px #e6e8e8;
266 | }
267 |  
268 | tt {
269 |     background-color: #ecf0f3;
270 |     color: #222;
271 |     /* padding: 1px 2px; */
272 |     font-size: 15px;
273 |     font-family: monospace;
274 | }
275 | 
276 | .viewcode-back {
277 |     font-family: Arial, sans-serif;
278 | }
279 | 
280 | div.viewcode-block:target {
281 |     background-color: #f4debf;
282 |     border-top: 1px solid #ac9;
283 |     border-bottom: 1px solid #ac9;
284 | }
285 | 
286 | table.docutils {
287 |     margin: 1.5em;
288 | }
289 | 
290 | div.sidebar {
291 |     border: 1px solid #5E6A71;
292 |     background-color: #E6E8E8;
293 | }
294 | 
295 | div.admonition.tip {
296 |     background-color: #80D2DC;
297 |     border: 1px solid #55AEBA;
298 | }
299 | 
300 | div.admonition.important {
301 |     background-color: #F05C56;
302 |     border: 1px solid #9C4850;
303 |     color: #fff;
304 | }
305 | 
306 | div.tip tt.literal {
307 |     background-color: #55aeba;
308 |     color: #fff;
309 | }
310 | 
311 | div.important tt.literal {
312 |     background-color: #9C4850;
313 |     color: #fff;
314 | }
315 | 
316 | h2 .literal {
317 |     color: #fff;
318 |     background-color: #37424a;
319 | }
320 | 
321 | dl.glossary dt {
322 | 	font-size: 1.0em;
323 | 	padding-top:20px;	
324 | 
325 | }


--------------------------------------------------------------------------------
/ceph_medic/checks/common.py:
--------------------------------------------------------------------------------
  1 | from collections import Counter
  2 | from ceph_medic import metadata, daemon_types
  3 | from ceph_medic.util import configuration, str_to_int
  4 | 
  5 | 
  6 | #
  7 | # Utilities
  8 | #
  9 | 
 10 | def get_fsid(data):
 11 |     # FIXME: might want to load this thing into ConfigParser so that we can fetch
 12 |     # information. ceph-deploy is a good example on how to do this. See:
 13 |     # https://github.com/ceph/ceph-deploy/blob/master/ceph_deploy/conf/ceph.py
 14 |     cluster_path = '/etc/ceph/%s.conf' % metadata['cluster_name']
 15 |     try:
 16 |         contents = data['paths']['/etc/ceph']['files'][cluster_path]['contents']
 17 |     except KeyError:
 18 |         return ''
 19 |     conf = configuration.load_string(contents)
 20 |     try:
 21 |         return conf.get_safe('global', 'fsid', '')
 22 |     except IndexError:
 23 |         return ''
 24 | 
 25 | 
 26 | def get_common_fsid():
 27 |     """
 28 |     Determine what is the most common Cluster FSID. If all of them are the same
 29 |     then we are fine, but if there is a mix, we need some base to compare to.
 30 |     """
 31 |     all_fsids = []
 32 | 
 33 |     for daemon_type in daemon_types:
 34 |         for node_metadata in metadata[daemon_type].values():
 35 |             fsids = get_host_fsids(node_metadata)
 36 |             all_fsids.extend(fsids)
 37 | 
 38 |     try:
 39 |         common_fsid = Counter(all_fsids).most_common()[0][0]
 40 |     except IndexError:
 41 |         return ''
 42 |     return common_fsid
 43 | 
 44 | 
 45 | def get_host_fsids(node_metadata):
 46 |     """
 47 |     Return all the cluster FSIDs found for each socket in a host
 48 |     """
 49 |     all_fsids = []
 50 |     for socket_metadata in node_metadata['ceph']['sockets'].values():
 51 |         config = socket_metadata.get('config', {})
 52 |         if not config:
 53 |             continue
 54 |         fsid = config.get('fsid')
 55 |         if not fsid:
 56 |             continue
 57 |         all_fsids.append(fsid)
 58 |     return all_fsids
 59 | 
 60 | 
 61 | #
 62 | # Warning checks
 63 | #
 64 | 
 65 | def check_colocated_running_mons_osds(host, data):
 66 |     code = 'WCOM1'
 67 |     msg = 'collocated OSDs with MONs running: %s'
 68 |     sockets = data['ceph']['sockets']
 69 |     running_mons = []
 70 |     running_osds = []
 71 |     for socket_name in sockets.keys():
 72 |         if "mon." in socket_name:
 73 |             running_mons.append(socket_name)
 74 |         elif "osd." in socket_name:
 75 |             running_osds.append(socket_name)
 76 |     if running_mons and running_osds:
 77 |         daemons = "\n    %s" % ','.join(running_osds)
 78 |         return code, msg % daemons
 79 | 
 80 | 
 81 | #
 82 | # Error checks
 83 | #
 84 | 
 85 | 
 86 | def check_ceph_conf_exists(host, data):
 87 |     cluster_conf = '/etc/ceph/%s.conf' % metadata['cluster_name']
 88 | 
 89 |     files = data['paths']['/etc/ceph']['files'].keys()
 90 |     if cluster_conf not in files:
 91 |         msg = "%s does not exist" % cluster_conf
 92 |         return 'ECOM1', msg
 93 | 
 94 | 
 95 | def check_ceph_executable_exists(host, data):
 96 |     if data['ceph']['installed'] is False:
 97 |         return 'ECOM2', 'ceph executable was not found in common paths when running `which`'
 98 | 
 99 | 
100 | def check_var_lib_ceph_dir(host, data):
101 |     code = 'ECOM3'
102 |     exception = data['paths']['/var/lib/ceph']['dirs']['/var/lib/ceph']['exception']
103 |     if exception:
104 |         msg = '/var/lib/ceph could not be parsed: %s' % exception['repr']
105 |         return code, msg
106 | 
107 | 
108 | def check_var_lib_ceph_permissions(host, data):
109 |     code = 'ECOM4'
110 |     group = data['paths']['/var/lib/ceph']['dirs']['/var/lib/ceph']['group']
111 |     owner = data['paths']['/var/lib/ceph']['dirs']['/var/lib/ceph']['owner']
112 |     if group == owner != 'ceph':
113 |         msg = '/var/lib/ceph has invalid ownership:  %s:%s, should be ceph:ceph' % (owner, group)
114 |         return code, msg
115 | 
116 | 
117 | def check_cluster_fsid(host, data):
118 |     code = 'ECOM5'
119 |     msg = 'fsid "%s" is different than host(s): %s'
120 |     mismatched_hosts = []
121 | 
122 |     current_fsid = get_fsid(data)
123 | 
124 |     # no fsid exists for the current host as defined in ceph.conf, let other
125 |     # checks note about this instead of reporting an empty FSID
126 |     if not current_fsid:
127 |         return
128 | 
129 |     for daemon, hosts in metadata['nodes'].items():
130 |         for host in hosts:
131 |             hostname = host['host']
132 |             host_fsid = get_fsid(metadata[daemon][hostname])
133 |             if host_fsid and current_fsid != host_fsid:
134 |                 mismatched_hosts.append(hostname)
135 | 
136 |     if mismatched_hosts:
137 |         return code, msg % (current_fsid, ','.join(mismatched_hosts))
138 | 
139 | 
140 | def check_ceph_version_parity(host, data):
141 |     code = 'ECOM6'
142 |     msg = '(installed) Ceph version "%s" is different than host(s): %s'
143 |     mismatched_hosts = []
144 |     host_version = data['ceph']['version']
145 |     for daemon, hosts in metadata['nodes'].items():
146 |         for host in hosts:
147 |             hostname = host['host']
148 |             version = metadata[daemon][hostname]['ceph']['version']
149 |             if host_version != version:
150 |                 mismatched_hosts.append(hostname)
151 | 
152 |     if mismatched_hosts:
153 |         return code, msg % (host_version, ','.join(mismatched_hosts))
154 | 
155 | 
156 | def check_ceph_socket_and_installed_version_parity(host, data):
157 |     code = 'ECOM7'
158 |     msg = '(installed) Ceph version "%s" is different than version from running socket(s): %s'
159 |     mismatched_sockets = []
160 |     host_version = data['ceph']['version']
161 |     sockets = data['ceph']['sockets']
162 |     for socket, socket_data in sockets.items():
163 |         socket_version = socket_data['version'].get('version')
164 |         if socket_version and socket_version not in host_version:
165 |             mismatched_sockets.append("%s:%s" % (socket, socket_version))
166 | 
167 |     if mismatched_sockets:
168 |         return code, msg % (host_version, ','.join(mismatched_sockets))
169 | 
170 | 
171 | def check_rgw_num_rados_handles(host, data):
172 |     """
173 |     Although this is an RGW setting, the way Ceph handles configurations can
174 |     have this setting be different depending on the daemon. Since we are
175 |     checking on every host and every socket, we are placing this check here
176 |     with common checks.
177 |     """
178 |     code = 'WCOM7'
179 |     msg = "rgw_num_rados_handles shouldn't be larger than 1, can lead to memory leaks: %s"
180 |     sockets = data['ceph']['sockets']
181 |     failed = []
182 |     for socket, socket_data in sockets.items():
183 |         config = socket_data.get('config', {})
184 |         if not config:
185 |             continue
186 |         rgw_num_rados_handles = config.get('rgw_num_rados_handles', 1)
187 |         name = socket.split('/var/run/ceph/')[-1]
188 |         rgw_num_rados_handles = str_to_int(rgw_num_rados_handles)
189 |         if rgw_num_rados_handles > 1:
190 |             failed.append(name)
191 | 
192 |     if failed:
193 |         return code, msg % ','.join(failed)
194 | 
195 | 
196 | def check_fsid_exists(host, data):
197 |     code = 'ECOM8'
198 |     msg = "'fsid' is missing in the ceph configuration"
199 | 
200 |     current_fsid = get_fsid(data)
201 |     if not current_fsid:
202 |         return code, msg
203 | 
204 | 
205 | def check_fsid_per_daemon(host, data):
206 |     """
207 |     In certain deployments types (hi rook!) the FSID will not be present in a
208 |     ceph conf file - it will be passed in *directly* to the daemon as an
209 |     argument. We aren't going to parse arguments, but the admin socket allows
210 |     us to poke inside and check what cluster FSID the daemon is associated
211 |     with.
212 |     """
213 |     code = 'ECOM9'
214 |     msg = 'Found cluster FSIDs from running sockets different than: %s'
215 |     sockets = data['ceph']['sockets']
216 |     common_fsid = get_common_fsid()
217 |     if not common_fsid:  # is this even possible?
218 |         return
219 | 
220 |     msg = msg % common_fsid
221 |     sockets = data['ceph']['sockets']
222 |     failed = False
223 |     for socket, socket_data in sockets.items():
224 |         config = socket_data.get('config', {})
225 |         if not config:
226 |             continue
227 |         socket_fsid = config.get('fsid')
228 |         if not socket_fsid:
229 |             continue
230 |         if socket_fsid != common_fsid:
231 |             name = socket.split('/var/run/ceph/')[-1]
232 |             msg += '\n    %s : %s' % (name, socket_fsid)
233 |             failed = True
234 |     if failed:
235 |         return code, msg
236 | 
237 | 
238 | def check_multiple_running_mons(host, data):
239 |     code = 'ECOM10'
240 |     msg = 'multiple running mons found: %s'
241 |     sockets = data['ceph']['sockets']
242 |     running_mons = []
243 |     for socket_name in sockets.keys():
244 |         if "mon." in socket_name:
245 |             running_mons.append(socket_name)
246 |     if len(running_mons) > 1:
247 |         return code, msg % ','.join(running_mons)
248 | 


--------------------------------------------------------------------------------
/ceph_medic/runner.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from ceph_medic import metadata, terminal, daemon_types
  3 | from ceph_medic import checks, __version__
  4 | from ceph_medic import config
  5 | 
  6 | logger = logging.getLogger(__name__)
  7 | 
  8 | 
  9 | class Runner(object):
 10 | 
 11 |     def __init__(self):
 12 |         self.passed = 0
 13 |         self.skipped = 0
 14 |         self.total = 0
 15 |         self.errors = 0
 16 |         self.warnings = 0
 17 |         self.ignore = []
 18 |         self.internal_errors = []
 19 | 
 20 |     @property
 21 |     def total_hosts(self):
 22 |         # XXX does not ensure unique nodes. In collocated scenarios, a single
 23 |         # node that is a 'mon' and an 'osd' would count as two nodes
 24 |         count = 0
 25 |         for daemon in metadata['nodes'].values():
 26 |             count += len(daemon)
 27 |         return count
 28 | 
 29 |     def run(self):
 30 |         """
 31 |         Go through all the daemons, and all checks. Single entrypoint for running
 32 |         checks everywhere.
 33 |         """
 34 |         start_header()
 35 |         for daemon_type in daemon_types:
 36 |             self.run_daemons(daemon_type)
 37 | 
 38 |         # these are checks that should run once per cluster
 39 |         nodes_header('cluster')
 40 |         self.run_cluster(checks.cluster)
 41 | 
 42 |         if metadata['failed_nodes']:
 43 |             terminal.write.bold('\n{daemon:-^30}\n'.format(daemon=' Failed Nodes '))
 44 |             for host, reason in metadata['failed_nodes'].items():
 45 |                 terminal.loader.write(' %s' % terminal.red(host))
 46 |                 terminal.write.write('\n')
 47 |                 reason_lines = reason.split('\n')
 48 |                 main_reason = reason_lines.pop(0)
 49 |                 terminal.write.write("  %s\n" % main_reason)
 50 |                 for line in reason_lines:
 51 |                     terminal.write.write("   %s\n" % line)
 52 |         self.total = self.errors + self.warnings + self.passed + len(self.internal_errors)
 53 |         return self
 54 | 
 55 |     def run_daemons(self, daemon_type):
 56 |         has_nodes = metadata[daemon_type]
 57 |         is_daemon = daemon_type in metadata['nodes']
 58 |         if has_nodes and is_daemon:  # we have nodes of this type to run
 59 |             nodes_header(daemon_type)
 60 |         else:
 61 |             return
 62 | 
 63 |         for host, data in metadata[daemon_type].items():
 64 |             modules = [checks.common, getattr(checks, daemon_type, None)]
 65 |             self.run_host(host, data, modules)
 66 | 
 67 |     def run_cluster(self, module):
 68 |         # XXX get the cluster name here
 69 |         cluster_name = '%s cluster' % metadata.get('cluster_name', 'ceph')
 70 |         terminal.loader.write(' %s' % terminal.yellow(cluster_name))
 71 |         has_error = False
 72 |         checks = collect_checks(module)
 73 |         for check in checks:
 74 |             try:
 75 |                 # TODO: figure out how to skip running a specific check if
 76 |                 # the code is ignored, maybe introspecting the function?
 77 |                 result = getattr(module, check)()
 78 |             except Exception as error:
 79 |                 result = None
 80 |                 logger.exception('check had an unhandled error: %s', check)
 81 |                 self.internal_errors.append(error)
 82 |             if result:
 83 |                 code, message = result
 84 |                 # XXX This is not ideal, we shouldn't need to get all the way here
 85 |                 # to make sure this is actually ignored. (Or maybe it doesn't matter?)
 86 |                 if code in self.ignore:
 87 |                     self.skipped += 1
 88 |                     # avoid writing anything else to the terminal, and just
 89 |                     # go to the next check
 90 |                     continue
 91 |                 if not has_error:
 92 |                     # XXX get the cluster name here
 93 |                     terminal.loader.write(' %s' % terminal.red(cluster_name))
 94 |                     terminal.write.write('\n')
 95 | 
 96 |                 if code.startswith('E'):
 97 |                     code = terminal.red(code)
 98 |                     self.errors += 1
 99 |                 elif code.startswith('W'):
100 |                     code = terminal.yellow(code)
101 |                     self.warnings += 1
102 |                 terminal.write.write("   %s: %s\n" % (code, message))
103 |                 has_error = True
104 |             else:
105 |                 self.passed += 1
106 | 
107 |         if not has_error:
108 |             terminal.loader.write(' %s\n' % terminal.green(cluster_name))
109 | 
110 |     def run_host(self, host, data, modules):
111 |         terminal.loader.write(' %s' % terminal.yellow(host))
112 |         has_error = False
113 |         for module in modules:
114 |             checks = collect_checks(module)
115 |             for check in checks:
116 |                 try:
117 |                     # TODO: figure out how to skip running a specific check if
118 |                     # the code is ignored, maybe introspecting the function?
119 |                     result = getattr(module, check)(host, data)
120 |                 except Exception as error:
121 |                     result = None
122 |                     logger.exception('check had an unhandled error: %s', check)
123 |                     self.internal_errors.append(error)
124 |                 if result:
125 |                     code, message = result
126 |                     # XXX This is not ideal, we shouldn't need to get all the way here
127 |                     # to make sure this is actually ignored. (Or maybe it doesn't matter?)
128 |                     if code in self.ignore:
129 |                         self.skipped += 1
130 |                         # avoid writing anything else to the terminal, and just
131 |                         # go to the next check
132 |                         continue
133 |                     if not has_error:
134 |                         terminal.loader.write(' %s' % terminal.red(host))
135 |                         terminal.write.write('\n')
136 | 
137 |                     if code.startswith('E'):
138 |                         self.errors += 1
139 |                         code = terminal.red(code)
140 |                     elif code.startswith('W'):
141 |                         self.warnings += 1
142 |                         code = terminal.yellow(code)
143 |                     terminal.write.write("   %s: %s\n" % (code, message))
144 |                     has_error = True
145 |                 else:
146 |                     self.passed += 1
147 | 
148 |         if not has_error:
149 |             terminal.loader.write(' %s\n' % terminal.green(host))
150 | 
151 | 
152 | run_errors = terminal.yellow("""
153 | While running checks, ceph-medic had %s unhandled errors, please look at the
154 | configured log file and report the issue along with the traceback.
155 | """)
156 | 
157 | 
158 | def report(results):
159 |     msg = "\n{passed}{error}{warning}{skipped}{internal_errors}{hosts}"
160 | 
161 |     if results.errors:
162 |         msg = terminal.red(msg)
163 |     elif results.warnings:
164 |         msg = terminal.yellow(msg)
165 |     else:
166 |         msg = terminal.green(msg)
167 | 
168 |     errors = warnings = internal_errors = ''
169 | 
170 |     if results.errors:
171 |         errors = '%s errors, ' % results.errors if results.errors > 1 else '1 error, '
172 |     if results.warnings:
173 |         warnings = '%s warnings, ' % results.warnings if results.warnings > 1 else '1 warning, '
174 |     if results.internal_errors:
175 |         internal_errors = "%s internal errors, " % len(results.internal_errors)
176 | 
177 |     terminal.write.raw(
178 |         msg.format(
179 |             passed="%s passed, " % results.passed,
180 |             error=errors,
181 |             warning=warnings,
182 |             skipped="%s skipped, " % results.skipped if results.skipped else '',
183 |             internal_errors=internal_errors,
184 |             hosts="on %s hosts" % results.total_hosts
185 |         )
186 |     )
187 |     if results.internal_errors:
188 |         terminal.write.raw(run_errors % len(results.internal_errors))
189 | 
190 | 
191 | start_header_tmpl = """
192 | {title:=^80}
193 | Version:    {version: >4}    Cluster Name: "{cluster_name}"
194 | Connection: {connection_type}
195 | Total hosts: [{total_hosts}]
196 | OSDs: {osds: >4}    MONs: {mons: >4}     Clients: {clients: >4}
197 | MDSs: {mdss: >4}    RGWs: {rgws: >4}     MGRs: {mgrs: >7}
198 | """
199 | 
200 | 
201 | def start_header():
202 |     connection_type = config.file.get_safe('global', 'deployment_type', 'ssh')
203 |     daemon_totals = dict((daemon, 0) for daemon in daemon_types)
204 |     total_hosts = 0
205 |     for daemon in daemon_types:
206 |         count = len(metadata[daemon].keys())
207 |         total_hosts += count
208 |         daemon_totals[daemon] = count
209 |     terminal.write.raw(start_header_tmpl.format(
210 |         title='  Starting remote check session  ',
211 |         version=__version__,
212 |         connection_type=connection_type,
213 |         total_hosts=total_hosts,
214 |         cluster_name=metadata['cluster_name'],
215 |         **daemon_totals))
216 |     terminal.write.raw('=' * 80)
217 | 
218 | 
219 | def nodes_header(daemon_type):
220 |     readable_daemons = {
221 |         'rgws': ' rados gateways ',
222 |         'mgrs': ' managers ',
223 |         'mons': ' mons ',
224 |         'osds': ' osds ',
225 |         'clients': ' clients ',
226 |         'cluster': ' cluster ',
227 |     }
228 | 
229 |     terminal.write.bold('\n{daemon:-^30}\n'.format(
230 |         daemon=readable_daemons.get(daemon_type, daemon_type)))
231 | 
232 | 
233 | def collect_checks(module):
234 |     checks = [i for i in dir(module) if i.startswith('check')]
235 |     return checks
236 | 


--------------------------------------------------------------------------------
/ceph_medic/collector.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Collect remote information on Ceph daemons, store everything in memory and make
  3 | it available as a global part of the module so that other checks can consume it
  4 | """
  5 | from ceph_medic import metadata, remote, terminal
  6 | from ceph_medic.terminal import loader
  7 | from ceph_medic.connection import get_connection
  8 | from execnet.gateway_bootstrap import HostNotFound
  9 | import logging
 10 | 
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | def collect_paths(conn):
 16 |     """
 17 |     Gather all the interesting paths from the remote system, stat them, and
 18 |     capture contents when needed.
 19 | 
 20 |     Generates a tree path, using the "path of interest" as key, and appending
 21 |     the absolute paths of files in the 'files' key and directories in the
 22 |     'dirs' key. A small subset of a tree would look
 23 |     very similar to::
 24 | 
 25 |         {
 26 |             '/etc/ceph': {
 27 |                 'dirs': {
 28 |                     '/etc/ceph/ceph.d': {...},
 29 |                 },
 30 |                 'files': {
 31 |                     '/etc/ceph/ceph.d/ceph.conf': {...},
 32 |                 },
 33 |             }
 34 |         }
 35 | 
 36 |     Each file and dir in a path tree will contain a set of keys populated
 37 |     mostly by calling ``stat`` on the remote system for that absolute path, in
 38 |     addition to capturing contents when "interesting files" are dfined. For
 39 |     example, the contents of a ``ceph.conf`` file will always be captured. This
 40 |     is how that file would look like in a tree path::
 41 | 
 42 | 
 43 |         {
 44 |             '/etc/ceph/ceph.d/test.conf':
 45 |                 {
 46 |                     'contents': '[osd]\nosd mkfs type = xfs\nosd mkfs options[...]    ',
 47 |                     'exception': {},
 48 |                     'group': 'ceph',
 49 |                     'n_fields': 16,
 50 |                     'n_sequence_fields': 10,
 51 |                     'n_unnamed_fields': 3,
 52 |                     'owner': 'ceph',
 53 |                     'st_atime': 1492721509.572292,
 54 |                     'st_blksize': 4096,
 55 |                     'st_blocks': 8,
 56 |                     'st_ctime': 1492721507.880156,
 57 |                     'st_dev': 64768L,
 58 |                     'st_gid': 167,
 59 |                     'st_ino': 100704475,
 60 |                     'st_mode': 33188,
 61 |                     'st_mtime': 1492721506.1060133,
 62 |                     'st_nlink': 1,
 63 |                     'st_rdev': 0,
 64 |                     'st_size': 650,
 65 |                     'st_uid': 167
 66 |                 },
 67 | 
 68 |         }
 69 | 
 70 |     .. note:: ``contents`` is captured using ``file.read()`` so its value will
 71 |               be a single line with possible line breaks (if any). For reading and
 72 |               parsing that key on each line a split must be done on the line break.
 73 | 
 74 |     """
 75 |     path_metadata = {}
 76 |     paths = {
 77 |         "/etc/ceph": {'get_contents': True},
 78 |         "/var/lib/ceph": {
 79 |             'get_contents': True,
 80 |             'skip_files': ['activate.monmap', 'superblock'],
 81 |             'skip_dirs': ['tmp', 'current', 'store.db']
 82 |         },
 83 |         "/var/run/ceph": {'get_contents': False},
 84 |     }
 85 |     for p, kw in paths.items():
 86 |         # Collect metadata about the files and dirs for the given path and assign
 87 |         # it back to the path_metadata for the current node
 88 |         path_metadata[p] = get_path_metadata(conn, p, **kw)
 89 |     return path_metadata
 90 | 
 91 | 
 92 | def get_path_metadata(conn, path, **kw):
 93 |     # generate the tree
 94 |     tree = conn.remote_module.path_tree(
 95 |         path,
 96 |         kw.get('skip_dirs'),
 97 |         kw.get('skip_files'),
 98 |         kw.get('get_contents')
 99 |     )
100 | 
101 |     files = {}
102 |     dirs = {}
103 | 
104 |     for i in tree['files']:
105 |         files[i] = conn.remote_module.stat_path(i, None, None, kw.get('get_contents'))
106 |     for i in tree['dirs']:
107 |         dirs[i] = conn.remote_module.stat_path(i, None, None, False)
108 | 
109 |     # actual root path
110 |     dirs[path] = conn.remote_module.stat_path(path, None, None, False)
111 | 
112 |     return {'dirs': dirs, 'files': files}
113 | 
114 | 
115 | def get_node_metadata(conn, hostname, cluster_nodes):
116 |     # "import" the remote functions so that remote calls using the
117 |     # functions can be executed
118 |     conn.import_module(remote.functions)
119 | 
120 |     node_metadata = {'ceph': {}}
121 | 
122 |     # collect paths and files first
123 |     loader.write('Host: %-*s  collecting: [%s]' % (40, hostname, terminal.yellow('paths')))
124 |     node_metadata['paths'] = collect_paths(conn)
125 |     loader.write('Host: %-*s  collecting: [%s]' % (40, hostname, terminal.green('paths')))
126 | 
127 |     # TODO: collect network information, passing all the cluster_nodes
128 |     # so that it can check for inter-node connectivity
129 |     loader.write('Host: %-*s  collecting: [%s]' % (40, hostname, terminal.yellow('network')))
130 |     node_metadata['network'] = collect_network(cluster_nodes)
131 |     loader.write('Host: %-*s  collecting: [%s]' % (40, hostname, terminal.green('network')))
132 | 
133 |     # TODO: collect device information
134 |     loader.write('Host: %-*s  collecting: [%s]' % (40, hostname, terminal.yellow('devices')))
135 |     node_metadata['devices'] = collect_devices()
136 |     loader.write('Host: %-*s  collecting: [%s]' % (40, hostname, terminal.green('devices')))
137 | 
138 |     # collect ceph information
139 |     loader.write('Host: %-*s  collecting: [%s]' % (40, hostname, terminal.yellow('ceph information')))
140 |     node_metadata['ceph'] = collect_ceph_info(conn)
141 |     node_metadata['ceph']['sockets'] = collect_socket_info(conn, node_metadata)
142 |     node_metadata['ceph']['osd'] = collect_ceph_osd_info(conn)
143 |     loader.write('Host: %-*s  collecting: [%s]' % (40, hostname, terminal.green('ceph information')))
144 | 
145 |     return node_metadata
146 | 
147 | 
148 | def collect():
149 |     """
150 |     The main collecting entrypoint. This function will call all the pieces
151 |     needed to build the complete metadata set of a remote system so that checks
152 |     can consume and verify that data.
153 | 
154 |     After collection is done, the full contents of the metadata are available
155 |     at ``ceph_medic.metadata``
156 |     """
157 |     cluster_nodes = metadata['nodes']
158 |     loader.write('collecting remote node information')
159 |     total_nodes = 0
160 |     failed_nodes = 0
161 |     has_cluster_data = False
162 | 
163 |     for node_type, nodes in cluster_nodes.items():
164 |         for node in nodes:
165 |             # check if a node type exists for this node before doing any work:
166 |             try:
167 |                 metadata[node_type]
168 |             except KeyError:
169 |                 msg = "Skipping node {} from unknown host group: {}".format(node, node_type)
170 |                 logger.warning(msg)
171 |                 continue
172 | 
173 |             total_nodes += 1
174 |             hostname = node['host']
175 |             loader.write('Host: %-40s  connection: [%-20s]' % (hostname, terminal.yellow('connecting')))
176 |             # TODO: make sure that the hostname is resolvable, trying to
177 |             # debug SSH issues with execnet is pretty hard/impossible, use
178 |             # util.net.host_is_resolvable
179 |             try:
180 |                 logger.debug('attempting connection to host: %s', node['host'])
181 |                 conn = get_connection(node['host'], container=node.get('container'))
182 |                 loader.write('Host: %-40s  connection: [%-20s]' % (hostname, terminal.green('connected')))
183 |                 loader.write('\n')
184 |             except HostNotFound as err:
185 |                 logger.exception('connection failed')
186 |                 loader.write('Host: %-40s  connection: [%-20s]' % (hostname, terminal.red('failed')))
187 |                 loader.write('\n')
188 |                 failed_nodes += 1
189 |                 if metadata[node_type].get(hostname):
190 |                     metadata[node_type].pop(hostname)
191 |                 metadata['nodes'][node_type] = [i for i in metadata['nodes'][node_type] if i['host'] != hostname]
192 |                 metadata['failed_nodes'].update({hostname: str(err)})
193 |                 continue
194 | 
195 |             # send the full node metadata for global scope so that the checks
196 |             # can consume this
197 |             metadata[node_type][hostname] = get_node_metadata(conn, hostname, cluster_nodes)
198 |             if node_type == 'mons':  # if node type is monitor, admin privileges are most likely authorized
199 |                 if not has_cluster_data:
200 |                     cluster_data = collect_cluster(conn)
201 |                 if cluster_data:
202 |                     metadata['cluster'] = cluster_data
203 |                     has_cluster_data = True
204 |             conn.exit()
205 | 
206 |     if failed_nodes == total_nodes:
207 |         loader.write(terminal.red('Collection failed!') + ' ' *70)
208 |         # TODO: this helps clear out the 'loader' line so that the error looks
209 |         # clean, but this manual clearing should be done automatically
210 |         terminal.write.raw('')
211 |         raise RuntimeError('All nodes failed to connect. Cannot run any checks')
212 |     if failed_nodes:
213 |         loader.write(terminal.yellow('Collection completed with some failed connections' + ' ' *70 + '\n'))
214 |     else:
215 |         loader.write('Collection completed!' + ' ' *70 + '\n')
216 | 
217 | 
218 | # Network
219 | #
220 | def collect_network(cluster_nodes):
221 |     """
222 |     Collect node-specific information, but also try to check connectivity to
223 |     other hosts that are passed in as ``cluster_nodes``
224 |     """
225 |     return {}
226 | 
227 | 
228 | # Devices
229 | #
230 | def collect_devices():
231 |     """
232 |     Get all the device information from the current node
233 |     """
234 |     return {}
235 | 
236 | 
237 | # Ceph
238 | #
239 | def collect_ceph_info(conn):
240 |     result = dict()
241 |     result['version'] = remote.commands.ceph_version(conn)
242 |     result['installed'] = remote.commands.ceph_is_installed(conn)
243 |     return result
244 | 
245 | 
246 | def collect_cluster(conn):
247 |     """
248 |     Captures useful cluster information like the status
249 |     """
250 |     result = dict()
251 |     result['status'] = remote.commands.ceph_status(conn)
252 |     return result
253 | 
254 | 
255 | # Ceph socket info
256 | #
257 | def collect_socket_info(conn, node_metadata):
258 |     sockets = [socket for socket in node_metadata['paths']['/var/run/ceph']['files']
259 |                if socket.endswith(".asok")]
260 |     result = dict()
261 |     for socket in sockets:
262 |         result[socket] = {'version': {}, 'config': {}}
263 |         result[socket]['version'] = remote.commands.ceph_socket_version(conn, socket)
264 |         result[socket]['config'] = remote.commands.daemon_socket_config(conn, socket)
265 |     return result
266 | 
267 | 
268 | # Ceph OSD info
269 | #
270 | def collect_ceph_osd_info(conn):
271 |     result = {'dump': {}}
272 |     result['dump'] = remote.commands.ceph_osd_dump(conn)
273 |     return result
274 | 


--------------------------------------------------------------------------------
/tests/functional/Vagrantfile:
--------------------------------------------------------------------------------
  1 | # -*- mode: ruby -*-
  2 | # vi: set ft=ruby :
  3 | 
  4 | require 'yaml'
  5 | require 'time'
  6 | VAGRANTFILE_API_VERSION = '2'
  7 | 
  8 | DEBUG = false
  9 | 
 10 | config_file=File.expand_path(File.join(File.dirname(__FILE__), 'vagrant_variables.yml'))
 11 | settings=YAML.load_file(config_file)
 12 | 
 13 | LABEL_PREFIX   = settings['label_prefix'] ? settings['label_prefix'] + "-" : ""
 14 | NMONS          = settings['mon_vms']
 15 | NOSDS          = settings['osd_vms']
 16 | NMDSS          = settings['mds_vms']
 17 | NRGWS          = settings['rgw_vms']
 18 | NNFSS          = settings['nfs_vms']
 19 | RESTAPI        = settings['restapi']
 20 | NRBD_MIRRORS   = settings['rbd_mirror_vms']
 21 | CLIENTS        = settings['client_vms']
 22 | NISCSI_GWS     = settings['iscsi_gw_vms']
 23 | PUBLIC_SUBNET  = settings['public_subnet']
 24 | CLUSTER_SUBNET = settings['cluster_subnet']
 25 | BOX            = settings['vagrant_box']
 26 | CLIENT_BOX     = settings['client_vagrant_box']
 27 | BOX_URL        = settings['vagrant_box_url']
 28 | SYNC_DIR       = settings['vagrant_sync_dir']
 29 | MEMORY         = settings['memory']
 30 | ETH            = settings['eth']
 31 | USER           = settings['ssh_username']
 32 | 
 33 | ASSIGN_STATIC_IP = settings.fetch('assign_static_ip', true)
 34 | DISABLE_SYNCED_FOLDER = settings.fetch('vagrant_disable_synced_folder', false)
 35 | DISK_UUID = Time.now.utc.to_i
 36 | 
 37 | def create_vmdk(name, size)
 38 |   dir = Pathname.new(__FILE__).expand_path.dirname
 39 |   path = File.join(dir, '.vagrant', name + '.vmdk')
 40 |   `vmware-vdiskmanager -c -s #{size} -t 0 -a scsi #{path} \
 41 |    2>&1 > /dev/null` unless File.exist?(path)
 42 | end
 43 | 
 44 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
 45 |   config.ssh.insert_key = false # workaround for https://github.com/mitchellh/vagrant/issues/5048
 46 |   config.ssh.private_key_path = settings['ssh_private_key_path']
 47 |   config.ssh.username = USER
 48 | 
 49 |   # When using libvirt, avoid errors like:
 50 |   # "CPU feature cmt not found"
 51 |   config.vm.provider :libvirt do |lv|
 52 |     lv.cpu_mode = 'host-passthrough'
 53 |   end
 54 | 
 55 |   # Faster bootup. Disables mounting the sync folder for libvirt and virtualbox
 56 |   if DISABLE_SYNCED_FOLDER
 57 |     config.vm.provider :virtualbox do |v,override|
 58 |       override.vm.synced_folder '.', SYNC_DIR, disabled: true
 59 |     end
 60 |     config.vm.provider :libvirt do |v,override|
 61 |       override.vm.synced_folder '.', SYNC_DIR, disabled: true
 62 |     end
 63 |   end
 64 | 
 65 |   (0..CLIENTS - 1).each do |i|
 66 |     config.vm.define "#{LABEL_PREFIX}client#{i}" do |client|
 67 |       client.vm.box = CLIENT_BOX
 68 |       client.vm.hostname = "#{LABEL_PREFIX}ceph-client#{i}"
 69 |       if ASSIGN_STATIC_IP
 70 |         client.vm.network :private_network,
 71 |           ip: "#{PUBLIC_SUBNET}.4#{i}"
 72 |       end
 73 |       # Virtualbox
 74 |       client.vm.provider :virtualbox do |vb|
 75 |         vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
 76 |       end
 77 | 
 78 |       # VMware
 79 |       client.vm.provider :vmware_fusion do |v|
 80 |         v.vmx['memsize'] = "#{MEMORY}"
 81 |       end
 82 | 
 83 |       # Libvirt
 84 |       client.vm.provider :libvirt do |lv|
 85 |         lv.memory = MEMORY
 86 |         lv.random_hostname = true
 87 |       end
 88 | 
 89 |       # Parallels
 90 |       client.vm.provider "parallels" do |prl|
 91 |         prl.name = "ceph-client#{i}"
 92 |         prl.memory = "#{MEMORY}"
 93 |       end
 94 | 
 95 |       client.vm.provider :linode do |provider|
 96 |         provider.label = client.vm.hostname
 97 |       end
 98 |     end
 99 |   end
100 | 
101 |   (0..NRGWS - 1).each do |i|
102 |     config.vm.define "#{LABEL_PREFIX}rgw#{i}" do |rgw|
103 |       rgw.vm.box = BOX
104 |       rgw.vm.box_url = BOX_URL
105 |       rgw.vm.hostname = "#{LABEL_PREFIX}ceph-rgw#{i}"
106 |       if ASSIGN_STATIC_IP
107 |         rgw.vm.network :private_network,
108 |           ip: "#{PUBLIC_SUBNET}.5#{i}"
109 |       end
110 | 
111 |       # Virtualbox
112 |       rgw.vm.provider :virtualbox do |vb|
113 |         vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
114 |       end
115 | 
116 |       # VMware
117 |       rgw.vm.provider :vmware_fusion do |v|
118 |         v.vmx['memsize'] = "#{MEMORY}"
119 |       end
120 | 
121 |       # Libvirt
122 |       rgw.vm.provider :libvirt do |lv|
123 |         lv.memory = MEMORY
124 |         lv.random_hostname = true
125 |       end
126 | 
127 |       # Parallels
128 |       rgw.vm.provider "parallels" do |prl|
129 |         prl.name = "ceph-rgw#{i}"
130 |         prl.memory = "#{MEMORY}"
131 |       end
132 | 
133 |       rgw.vm.provider :linode do |provider|
134 |         provider.label = rgw.vm.hostname
135 |       end
136 |     end
137 |   end
138 | 
139 |   (0..NNFSS - 1).each do |i|
140 |     config.vm.define "nfs#{i}" do |nfs|
141 |       nfs.vm.box = BOX
142 |       nfs.vm.box_url = BOX_URL
143 |       nfs.vm.hostname = "ceph-nfs#{i}"
144 |       if ASSIGN_STATIC_IP
145 |         nfs.vm.network :private_network,
146 |           ip: "#{PUBLIC_SUBNET}.6#{i}"
147 |       end
148 | 
149 |       # Virtualbox
150 |       nfs.vm.provider :virtualbox do |vb|
151 |         vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
152 |       end
153 | 
154 |       # VMware
155 |       nfs.vm.provider :vmware_fusion do |v|
156 |         v.vmx['memsize'] = "#{MEMORY}"
157 |       end
158 | 
159 |       # Libvirt
160 |       nfs.vm.provider :libvirt do |lv|
161 |         lv.memory = MEMORY
162 |         lv.random_hostname = true
163 |       end
164 | 
165 |       # Parallels
166 |       nfs.vm.provider "parallels" do |prl|
167 |         prl.name = "ceph-nfs#{i}"
168 |         prl.memory = "#{MEMORY}"
169 |       end
170 | 
171 |       nfs.vm.provider :linode do |provider|
172 |         provider.label = nfs.vm.hostname
173 |       end
174 |     end
175 |   end
176 | 
177 |   (0..NMDSS - 1).each do |i|
178 |     config.vm.define "#{LABEL_PREFIX}mds#{i}" do |mds|
179 |       mds.vm.box = BOX
180 |       mds.vm.box_url = BOX_URL
181 |       mds.vm.hostname = "#{LABEL_PREFIX}ceph-mds#{i}"
182 |       if ASSIGN_STATIC_IP
183 |         mds.vm.network :private_network,
184 |           ip: "#{PUBLIC_SUBNET}.7#{i}"
185 |       end
186 |       # Virtualbox
187 |       mds.vm.provider :virtualbox do |vb|
188 |         vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
189 |       end
190 | 
191 |       # VMware
192 |       mds.vm.provider :vmware_fusion do |v|
193 |         v.vmx['memsize'] = "#{MEMORY}"
194 |       end
195 | 
196 |       # Libvirt
197 |       mds.vm.provider :libvirt do |lv|
198 |         lv.memory = MEMORY
199 |         lv.random_hostname = true
200 |       end
201 |       # Parallels
202 |       mds.vm.provider "parallels" do |prl|
203 |         prl.name = "ceph-mds#{i}"
204 |         prl.memory = "#{MEMORY}"
205 |       end
206 | 
207 |       mds.vm.provider :linode do |provider|
208 |         provider.label = mds.vm.hostname
209 |       end
210 |     end
211 |   end
212 | 
213 |   (0..NRBD_MIRRORS - 1).each do |i|
214 |     config.vm.define "#{LABEL_PREFIX}rbd_mirror#{i}" do |rbd_mirror|
215 |       rbd_mirror.vm.box = BOX
216 |       rbd_mirror.vm.box_url = BOX_URL
217 |       rbd_mirror.vm.hostname = "#{LABEL_PREFIX}ceph-rbd-mirror#{i}"
218 |       if ASSIGN_STATIC_IP
219 |         rbd_mirror.vm.network :private_network,
220 |           ip: "#{PUBLIC_SUBNET}.8#{i}"
221 |       end
222 |       # Virtualbox
223 |       rbd_mirror.vm.provider :virtualbox do |vb|
224 |         vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
225 |       end
226 | 
227 |       # VMware
228 |       rbd_mirror.vm.provider :vmware_fusion do |v|
229 |         v.vmx['memsize'] = "#{MEMORY}"
230 |       end
231 | 
232 |       # Libvirt
233 |       rbd_mirror.vm.provider :libvirt do |lv|
234 |         lv.memory = MEMORY
235 |         lv.random_hostname = true
236 |       end
237 |       # Parallels
238 |       rbd_mirror.vm.provider "parallels" do |prl|
239 |         prl.name = "ceph-rbd-mirror#{i}"
240 |         prl.memory = "#{MEMORY}"
241 |       end
242 | 
243 |       rbd_mirror.vm.provider :linode do |provider|
244 |         provider.label = rbd_mirror.vm.hostname
245 |       end
246 |     end
247 |   end
248 | 
249 |   (0..NISCSI_GWS - 1).each do |i|
250 |     config.vm.define "#{LABEL_PREFIX}iscsi_gw#{i}" do |iscsi_gw|
251 |       iscsi_gw.vm.box = BOX
252 |       iscsi_gw.vm.box_url = BOX_URL
253 |       iscsi_gw.vm.hostname = "#{LABEL_PREFIX}ceph-iscsi-gw#{i}"
254 |       if ASSIGN_STATIC_IP
255 |         iscsi_gw.vm.network :private_network,
256 |           ip: "#{PUBLIC_SUBNET}.9#{i}"
257 |       end
258 |       # Virtualbox
259 |       iscsi_gw.vm.provider :virtualbox do |vb|
260 |         vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
261 |       end
262 | 
263 |       # VMware
264 |       iscsi_gw.vm.provider :vmware_fusion do |v|
265 |         v.vmx['memsize'] = "#{MEMORY}"
266 |       end
267 | 
268 |       # Libvirt
269 |       iscsi_gw.vm.provider :libvirt do |lv|
270 |         lv.memory = MEMORY
271 |         lv.random_hostname = true
272 |       end
273 |       # Parallels
274 |       iscsi_gw.vm.provider "parallels" do |prl|
275 |         prl.name = "ceph-iscsi-gw#{i}"
276 |         prl.memory = "#{MEMORY}"
277 |       end
278 | 
279 |       iscsi_gw.vm.provider :linode do |provider|
280 |         provider.label = iscsi_gw.vm.hostname
281 |       end
282 |     end
283 |   end
284 | 
285 |   (0..NMONS - 1).each do |i|
286 |     config.vm.define "#{LABEL_PREFIX}mon#{i}" do |mon|
287 |       mon.vm.box = BOX
288 |       mon.vm.box_url = BOX_URL
289 |       mon.vm.hostname = "#{LABEL_PREFIX}ceph-mon#{i}"
290 |       if ASSIGN_STATIC_IP
291 |         mon.vm.network :private_network,
292 |           ip: "#{PUBLIC_SUBNET}.1#{i}"
293 |       end
294 |       # Virtualbox
295 |       mon.vm.provider :virtualbox do |vb|
296 |         vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
297 |       end
298 | 
299 |       # VMware
300 |       mon.vm.provider :vmware_fusion do |v|
301 |         v.vmx['memsize'] = "#{MEMORY}"
302 |       end
303 | 
304 |       # Libvirt
305 |       mon.vm.provider :libvirt do |lv|
306 |         lv.memory = MEMORY
307 |         lv.random_hostname = true
308 |       end
309 | 
310 |       # Parallels
311 |       mon.vm.provider "parallels" do |prl|
312 |         prl.name = "ceph-mon#{i}"
313 |         prl.memory = "#{MEMORY}"
314 |       end
315 | 
316 |       mon.vm.provider :linode do |provider|
317 |         provider.label = mon.vm.hostname
318 |       end
319 |     end
320 |   end
321 | 
322 |   (0..NOSDS - 1).each do |i|
323 |     config.vm.define "#{LABEL_PREFIX}osd#{i}" do |osd|
324 |       osd.vm.box = BOX
325 |       osd.vm.box_url = BOX_URL
326 |       osd.vm.hostname = "#{LABEL_PREFIX}ceph-osd#{i}"
327 |       if ASSIGN_STATIC_IP
328 |         osd.vm.network :private_network,
329 |           ip: "#{PUBLIC_SUBNET}.10#{i}"
330 |         osd.vm.network :private_network,
331 |           ip: "#{CLUSTER_SUBNET}.20#{i}"
332 |       end
333 |       # Virtualbox
334 |       osd.vm.provider :virtualbox do |vb|
335 |         # Create our own controller for consistency and to remove VM dependency
336 |         vb.customize ['storagectl', :id,
337 |                       '--name', 'OSD Controller',
338 |                       '--add', 'scsi']
339 |         (0..2).each do |d|
340 |           vb.customize ['createhd',
341 |                         '--filename', "disk-#{i}-#{d}",
342 |                         '--size', '11000'] unless File.exist?("disk-#{i}-#{d}.vdi")
343 |           vb.customize ['storageattach', :id,
344 |                         '--storagectl', 'OSD Controller',
345 |                         '--port', 3 + d,
346 |                         '--device', 0,
347 |                         '--type', 'hdd',
348 |                         '--medium', "disk-#{i}-#{d}.vdi"]
349 |         end
350 |         vb.customize ['modifyvm', :id, '--memory', "#{MEMORY}"]
351 |       end
352 | 
353 |       # VMware
354 |       osd.vm.provider :vmware_fusion do |v|
355 |         (0..1).each do |d|
356 |           v.vmx["scsi0:#{d + 1}.present"] = 'TRUE'
357 |           v.vmx["scsi0:#{d + 1}.fileName"] =
358 |             create_vmdk("disk-#{i}-#{d}", '11000MB')
359 |         end
360 |         v.vmx['memsize'] = "#{MEMORY}"
361 |       end
362 | 
363 |       # Libvirt
364 |       driverletters = ('a'..'z').to_a
365 |       osd.vm.provider :libvirt do |lv|
366 |         # always make /dev/sd{a/b/c/d} so that CI can ensure that
367 |         # virtualbox and libvirt will have the same devices to use for OSDs
368 |         (0..3).each do |d|
369 |           lv.storage :file, :device => "hd#{driverletters[d]}", :path => "disk-#{i}-#{d}-#{DISK_UUID}.disk", :size => '12G', :bus => "ide"
370 |         end
371 |         lv.memory = MEMORY
372 |         lv.random_hostname = true
373 |       end
374 | 
375 |       # Parallels
376 |       osd.vm.provider "parallels" do |prl|
377 |         prl.name = "ceph-osd#{i}"
378 |         prl.memory = "#{MEMORY}"
379 |         (0..1).each do |d|
380 |           prl.customize ["set", :id,
381 |                          "--device-add",
382 |                          "hdd",
383 |                          "--iface",
384 |                          "sata"]
385 |         end
386 |       end
387 | 
388 |       osd.vm.provider :linode do |provider|
389 |         provider.label = osd.vm.hostname
390 |       end
391 | 
392 |     end
393 |   end
394 | end
395 | 


--------------------------------------------------------------------------------