├── src └── dron │ ├── py.typed │ ├── __main__.py │ ├── notify │ ├── ntfy_telegram.py │ ├── ntfy_desktop.py │ ├── ntfy_common.py │ ├── telegram.py │ ├── email.py │ └── common.py │ ├── conftest.py │ ├── api.py │ ├── launchd_wrapper.py │ ├── common.py │ ├── tests │ └── test_dron.py │ ├── cli.py │ ├── monitor.py │ ├── dron.py │ ├── launchd.py │ └── systemd.py ├── mypy.ini ├── pytest.ini ├── .ci ├── run └── release ├── LICENSE.txt ├── tox.ini ├── pyproject.toml ├── .gitignore ├── .github └── workflows │ └── main.yml ├── README.org └── ruff.toml /src/dron/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/dron/__main__.py: -------------------------------------------------------------------------------- 1 | # NOTE: import needs to be on top level as it's the entry point 2 | from .dron import main 3 | 4 | if __name__ == '__main__': 5 | main() 6 | -------------------------------------------------------------------------------- /src/dron/notify/ntfy_telegram.py: -------------------------------------------------------------------------------- 1 | from .common import get_parser 2 | from .ntfy_common import run_ntfy 3 | 4 | 5 | def main() -> None: 6 | p = get_parser() 7 | args = p.parse_args() 8 | run_ntfy(job=args.job, backend='telegram') 9 | 10 | 11 | if __name__ == '__main__': 12 | main() 13 | -------------------------------------------------------------------------------- /src/dron/notify/ntfy_desktop.py: -------------------------------------------------------------------------------- 1 | from .common import IS_SYSTEMD, get_parser 2 | from .ntfy_common import run_ntfy 3 | 4 | BACKEND = 'linux' if IS_SYSTEMD else 'darwin' 5 | 6 | 7 | def main() -> None: 8 | p = get_parser() 9 | args = p.parse_args() 10 | 11 | run_ntfy(job=args.job, backend=BACKEND) 12 | 13 | 14 | if __name__ == '__main__': 15 | main() 16 | -------------------------------------------------------------------------------- /src/dron/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.fixture(scope='session', autouse=True) 5 | def disable_verify_units_if_no_systemd(): 6 | ''' 7 | If we can't use systemd, we need to suppress systemd-specific linting 8 | ''' 9 | from . import common 10 | from .systemd import _is_missing_systemd 11 | 12 | reason = _is_missing_systemd() 13 | if reason is not None: 14 | common.VERIFY_UNITS = False 15 | try: 16 | yield 17 | finally: 18 | common.VERIFY_UNITS = True 19 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | pretty = True 3 | show_error_context = True 4 | show_column_numbers = True 5 | show_error_end = True 6 | 7 | check_untyped_defs = True 8 | 9 | # see https://mypy.readthedocs.io/en/stable/error_code_list2.html 10 | warn_redundant_casts = True 11 | strict_equality = True 12 | warn_unused_ignores = True 13 | enable_error_code = deprecated,redundant-expr,possibly-undefined,truthy-bool,truthy-iterable,ignore-without-code,unused-awaitable 14 | 15 | 16 | # an example of suppressing 17 | # [mypy-my.config.repos.pdfannots.pdfannots] 18 | # ignore_errors = True 19 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | # discover files that don't follow test_ naming. Useful to keep tests along with the source code 3 | python_files = *.py 4 | 5 | # this is necessary for --pyargs to discover implicit namespace packages correctly 6 | consider_namespace_packages = true 7 | 8 | # see https://docs.pytest.org/en/stable/reference/reference.html#confval-strict 9 | strict = true 10 | 11 | addopts = 12 | # prevent pytest cache from being created... it craps into project dir and I never use it anyway 13 | -p no:cacheprovider 14 | 15 | # -rap to print tests summary even when they are successful 16 | -rap 17 | --verbose 18 | 19 | # otherwise it won't discover doctests 20 | --doctest-modules 21 | 22 | # show all test durations (unless they are too short) 23 | --durations=0 24 | -------------------------------------------------------------------------------- /src/dron/notify/ntfy_common.py: -------------------------------------------------------------------------------- 1 | """ 2 | uses https://github.com/dschep/ntfy 3 | """ 4 | 5 | import logging 6 | import socket 7 | import subprocess 8 | import sys 9 | from typing import NoReturn 10 | 11 | 12 | # ty doesn't support NoReturn yet, see https://github.com/astral-sh/ty/issues/180 13 | def run_ntfy(*, job: str, backend: str) -> NoReturn: # ty: ignore[invalid-return-type] 14 | # TODO not sure what to do with --stdin arg here? 15 | # could probably use last N lines of log or something 16 | # TODO get last logs here? 17 | title = f'dron[{socket.gethostname()}]: {job} failed' 18 | body = title 19 | try: 20 | subprocess.check_call(['ntfy', '-b', backend, '-t', title, 'send', body]) 21 | except Exception as e: 22 | logging.exception(e) # noqa: LOG015 23 | # TODO fallback on email? 24 | sys.exit(1) 25 | sys.exit(0) 26 | -------------------------------------------------------------------------------- /.ci/run: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eu 3 | 4 | cd "$(dirname "$0")" 5 | cd .. # git root 6 | 7 | if ! command -v sudo; then 8 | # CI or Docker sometimes doesn't have it, so useful to have a dummy 9 | function sudo { 10 | "$@" 11 | } 12 | fi 13 | 14 | # --parallel-live to show outputs while it's running 15 | tox_cmd='run-parallel --parallel-live' 16 | if [ -n "${CI-}" ]; then 17 | # install OS specific stuff here 18 | case "$OSTYPE" in 19 | darwin*) 20 | # macos 21 | : 22 | ;; 23 | cygwin* | msys* | win*) 24 | # windows 25 | # ugh. parallel stuff seems super flaky under windows, some random failures, "file used by other process" and crap like that 26 | tox_cmd='run' 27 | ;; 28 | *) 29 | # must be linux? 30 | # necessary for dbus-python 31 | sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_SUSPEND=1 apt-get install --yes libdbus-1-dev libglib2.0-dev 32 | ;; 33 | esac 34 | fi 35 | 36 | # NOTE: expects uv installed 37 | uv tool run --with tox-uv tox $tox_cmd "$@" 38 | -------------------------------------------------------------------------------- /src/dron/notify/telegram.py: -------------------------------------------------------------------------------- 1 | """ 2 | uses telegram-send for Telegram notifications 3 | make sure to run "telegram-send --configure" beforehand! 4 | """ 5 | 6 | import asyncio 7 | import logging 8 | import socket 9 | import sys 10 | 11 | from .common import get_last_systemd_log, get_parser, get_stdin 12 | 13 | 14 | def send(*, message: str) -> None: 15 | import telegram_send # type: ignore[import-untyped] 16 | 17 | asyncio.run(telegram_send.send(messages=[message])) 18 | 19 | 20 | def main() -> None: 21 | p = get_parser() 22 | args = p.parse_args() 23 | 24 | job: str = args.job 25 | stdin: bool = args.stdin 26 | 27 | body = f'dron[{socket.gethostname()}]: {job} failed' 28 | 29 | last_log = get_stdin() if stdin else get_last_systemd_log(job) 30 | body += '\n' + '\n'.join(l.decode('utf8') for l in last_log) 31 | 32 | try: 33 | send(message=body) 34 | except Exception as e: 35 | logging.exception(e) # noqa: LOG015 36 | # TODO fallback on email? 37 | sys.exit(1) 38 | sys.exit(0) 39 | 40 | 41 | if __name__ == '__main__': 42 | main() 43 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2024 Dima Gerasimov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/dron/notify/email.py: -------------------------------------------------------------------------------- 1 | import socket 2 | from collections.abc import Iterator 3 | from subprocess import PIPE, Popen 4 | 5 | from .common import get_last_systemd_log, get_parser, get_stdin 6 | 7 | 8 | def send_payload(payload: Iterator[bytes]) -> None: 9 | with Popen(['sendmail', '-t'], stdin=PIPE) as po: 10 | stdin = po.stdin 11 | assert stdin is not None 12 | for line in payload: 13 | stdin.write(line) 14 | stdin.flush() 15 | rc = po.poll() 16 | assert rc == 0, rc 17 | 18 | 19 | def send_email(*, to: str, job: str, stdin: bool) -> None: 20 | def payload() -> Iterator[bytes]: 21 | hostname = socket.gethostname() 22 | yield f''' 23 | To: {to} 24 | From: dron 25 | Subject: {job} 26 | Content-Transfer-Encoding: 8bit 27 | Content-Type: text/plain; charset=UTF-8 28 | '''.lstrip().encode('utf8') 29 | last_log = get_stdin() if stdin else get_last_systemd_log(job) 30 | yield from last_log 31 | 32 | send_payload(payload()) 33 | 34 | 35 | def main() -> None: 36 | p = get_parser() 37 | p.add_argument('--to', required=True) 38 | args = p.parse_args() 39 | send_email(to=args.to, job=args.job, stdin=args.stdin) 40 | 41 | 42 | if __name__ == '__main__': 43 | main() 44 | -------------------------------------------------------------------------------- /.ci/release: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ''' 3 | Deploys Python package onto [[https://pypi.org][PyPi]] or [[https://test.pypi.org][test PyPi]]. 4 | 5 | - running manually 6 | 7 | You'll need =UV_PUBLISH_TOKEN= env variable 8 | 9 | - running on Github Actions 10 | 11 | Instead of env variable, relies on configuring github as Trusted publisher (https://docs.pypi.org/trusted-publishers/) -- both for test and regular pypi 12 | 13 | It's running as =pypi= job in [[file:.github/workflows/main.yml][Github Actions config]]. 14 | Packages are deployed on: 15 | - every master commit, onto test pypi 16 | - every new tag, onto production pypi 17 | ''' 18 | 19 | UV_PUBLISH_TOKEN = 'UV_PUBLISH_TOKEN' 20 | 21 | import argparse 22 | import os 23 | from pathlib import Path 24 | from subprocess import check_call 25 | 26 | is_ci = os.environ.get('CI') is not None 27 | 28 | 29 | def main() -> None: 30 | p = argparse.ArgumentParser() 31 | p.add_argument('--use-test-pypi', action='store_true') 32 | args = p.parse_args() 33 | 34 | publish_url = ['--publish-url', 'https://test.pypi.org/legacy/'] if args.use_test_pypi else [] 35 | 36 | root = Path(__file__).absolute().parent.parent 37 | os.chdir(root) # just in case 38 | 39 | check_call(['uv', 'build', '--clear']) 40 | 41 | if not is_ci: 42 | # CI relies on trusted publishers so doesn't need env variable 43 | assert UV_PUBLISH_TOKEN in os.environ, f'no {UV_PUBLISH_TOKEN} passed' 44 | 45 | check_call(['uv', 'publish', *publish_url]) 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /src/dron/notify/common.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import platform 3 | import shlex 4 | import sys 5 | from collections.abc import Iterator 6 | from subprocess import PIPE, STDOUT, Popen, check_output 7 | 8 | IS_SYSTEMD = platform.system() != 'Darwin' # if not systemd it's launchd 9 | 10 | 11 | def get_parser() -> argparse.ArgumentParser: 12 | p = argparse.ArgumentParser() 13 | p.add_argument('--job', required=True) 14 | p.add_argument('--stdin', action='store_true') 15 | return p 16 | 17 | 18 | def get_stdin() -> Iterator[bytes]: 19 | yield from sys.stdin.buffer 20 | 21 | 22 | def get_last_systemd_log(job: str) -> Iterator[bytes]: 23 | # output unit status 24 | cmd = ['systemctl', '--user', 'status', '--no-pager', job, '-o', 'cat'] 25 | yield b'$ ' + ' '.join(map(shlex.quote, cmd)).encode('utf8') + b'\n\n' 26 | with Popen(cmd, stdout=PIPE, stderr=STDOUT) as po: 27 | out = po.stdout 28 | assert out is not None 29 | yield from out 30 | rc = po.poll() 31 | assert rc in { 32 | 0, 33 | 3, # 3 means failure due to job exit code 34 | }, rc 35 | 36 | # for logs, we used to use --lines 1000000 in systemctl status 37 | # however, from around 2024 it stated consuming too much time 38 | # (as if it actually retrieved 1000000 lines and only then tooks the ones relevant to the unit??) 39 | 40 | cmd = ['systemctl', '--user', 'show', job, '-p', 'InvocationID', '--value'] 41 | invocation_id = check_output(cmd, text=True) 42 | invocation_id = invocation_id.strip() # for some reason dumps multiple lines? 43 | assert len(invocation_id) > 0 # just in case, todo maybe make defensive? 44 | 45 | yield b'\n' 46 | cmd = ['journalctl', '--no-pager', f'_SYSTEMD_INVOCATION_ID={invocation_id}'] 47 | yield b'$ ' + ' '.join(map(shlex.quote, cmd)).encode('utf8') + b'\n\n' 48 | with Popen(cmd, stdout=PIPE, stderr=STDOUT) as po: 49 | out = po.stdout 50 | assert out is not None 51 | yield from out 52 | rc = po.poll() 53 | assert rc == 0 54 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | minversion = 4 3 | 4 | # relies on the correct version of Python installed 5 | # (we rely on CI for the test matrix) 6 | envlist = ruff,tests,mypy,ty 7 | 8 | # https://github.com/tox-dev/tox/issues/20#issuecomment-247788333 9 | # hack to prevent .tox from crapping to the project directory 10 | toxworkdir = {env:TOXWORKDIR_BASE:}{toxinidir}/.tox 11 | 12 | [testenv] 13 | # TODO how to get package name from setuptools? 14 | package_name = "dron" 15 | pass_env = 16 | # useful for tests to know they are running under ci 17 | CI 18 | CI_* 19 | # respect user's cache dirs to prevent tox from crapping into project dir 20 | PYTHONPYCACHEPREFIX 21 | MYPY_CACHE_DIR 22 | RUFF_CACHE_DIR 23 | # necessary for systemd calls to work in tests 24 | XDG_RUNTIME_DIR 25 | 26 | set_env = 27 | # do not add current working directory to pythonpath 28 | # generally this is more robust and safer, prevents weird issues later on 29 | PYTHONSAFEPATH=1 30 | 31 | runner = uv-venv-lock-runner 32 | uv_sync_locked = false 33 | 34 | 35 | [testenv:ruff] 36 | skip_install = true 37 | dependency_groups = testing 38 | commands = 39 | {envpython} -m ruff check \ 40 | {posargs} 41 | 42 | 43 | [testenv:tests] 44 | dependency_groups = testing 45 | commands = 46 | # posargs allow test filtering, e.g. tox ... -- -k test_name 47 | {envpython} -m pytest \ 48 | --pyargs {[testenv]package_name} \ 49 | {posargs} 50 | 51 | 52 | [testenv:mypy] 53 | dependency_groups = typecheck 54 | commands = 55 | {envpython} -m mypy --no-install-types \ 56 | -p {[testenv]package_name} \ 57 | --txt-report .coverage.mypy \ 58 | --html-report .coverage.mypy \ 59 | # this is for github actions to upload to codecov.io 60 | # sadly xml coverage crashes on windows... so we need to disable it 61 | {env:CI_MYPY_COVERAGE} \ 62 | {posargs} 63 | 64 | 65 | [testenv:ty] 66 | dependency_groups = typecheck 67 | commands = 68 | {envpython} -m ty \ 69 | check \ 70 | {posargs} 71 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # see https://github.com/karlicoss/pymplate for up-to-date reference 2 | [project] 3 | dynamic = ["version"] # version is managed by build backend 4 | name = "dron" 5 | dependencies = [ 6 | "click" , # CLI 7 | "prompt_toolkit", # CLI 8 | "tzlocal" , # for monitor, to determine host timezone 9 | "textual" , # for 'new' monitor 10 | "tabulate" , # for 'old' monitor 11 | "termcolor" , # for 'old' monitor 12 | "mypy" , # for checking units 13 | "loguru" , # nicer logging 14 | "dbus-python; platform_system != 'Darwin'", # dbus interface to systemd 15 | ] 16 | requires-python = ">=3.12" 17 | # FIXME dbus 18 | 19 | ## these need to be set if you're planning to upload to pypi 20 | description = "What if cron and systemd had a baby?" 21 | license = {file = "LICENSE.txt"} 22 | authors = [ 23 | {name = "Dima Gerasimov (@karlicoss)", email = "karlicoss@gmail.com"}, 24 | ] 25 | maintainers = [ 26 | {name = "Dima Gerasimov (@karlicoss)", email = "karlicoss@gmail.com"}, 27 | ] 28 | [project.urls] 29 | Homepage = "https://github.com/karlicoss/dron" 30 | ## 31 | 32 | 33 | [project.optional-dependencies] 34 | notify-telegram = [ 35 | # version before that had a bug that prevented it from working 36 | # see https://github.com/rahiel/telegram-send/issues/115#issuecomment-1368728425 37 | "telegram-send>=0.37", 38 | ] 39 | [dependency-groups] 40 | testing = [ 41 | "pytest>=9", # need version 9 for proper namespace package support 42 | "ruff", 43 | 44 | "dron[notify-telegram]", 45 | ] 46 | typecheck = [ 47 | { include-group = "testing" }, 48 | "mypy", 49 | "lxml", # for mypy html coverage 50 | "ty>=0.0.1a34", 51 | 52 | "types-tabulate", 53 | ] 54 | 55 | [project.scripts] 56 | dron = "dron.__main__:main" 57 | 58 | 59 | [build-system] 60 | requires = ["hatchling", "hatch-vcs"] 61 | build-backend = "hatchling.build" 62 | 63 | # unfortunately have to duplicate project name here atm, see https://github.com/pypa/hatch/issues/1894 64 | [tool.hatch.build.targets.wheel] 65 | packages = ["src/dron"] 66 | 67 | [tool.hatch.version] 68 | source = "vcs" 69 | 70 | [tool.hatch.version.raw-options] 71 | version_scheme = "python-simplified-semver" 72 | local_scheme = "dirty-tag" 73 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/python,emacs 3 | # Edit at https://www.gitignore.io/?templates=python,emacs 4 | 5 | ### Emacs ### 6 | # -*- mode: gitignore; -*- 7 | *~ 8 | \#*\# 9 | /.emacs.desktop 10 | /.emacs.desktop.lock 11 | *.elc 12 | auto-save-list 13 | tramp 14 | .\#* 15 | 16 | # Org-mode 17 | .org-id-locations 18 | *_archive 19 | 20 | # flymake-mode 21 | *_flymake.* 22 | 23 | # eshell files 24 | /eshell/history 25 | /eshell/lastdir 26 | 27 | # elpa packages 28 | /elpa/ 29 | 30 | # reftex files 31 | *.rel 32 | 33 | # AUCTeX auto folder 34 | /auto/ 35 | 36 | # cask packages 37 | .cask/ 38 | dist/ 39 | 40 | # Flycheck 41 | flycheck_*.el 42 | 43 | # server auth directory 44 | /server/ 45 | 46 | # projectiles files 47 | .projectile 48 | 49 | # directory configuration 50 | .dir-locals.el 51 | 52 | # network security 53 | /network-security.data 54 | 55 | 56 | ### Python ### 57 | # Byte-compiled / optimized / DLL files 58 | __pycache__/ 59 | *.py[cod] 60 | *$py.class 61 | 62 | # C extensions 63 | *.so 64 | 65 | # Distribution / packaging 66 | .Python 67 | build/ 68 | develop-eggs/ 69 | downloads/ 70 | eggs/ 71 | .eggs/ 72 | lib/ 73 | lib64/ 74 | parts/ 75 | sdist/ 76 | var/ 77 | wheels/ 78 | pip-wheel-metadata/ 79 | share/python-wheels/ 80 | *.egg-info/ 81 | .installed.cfg 82 | *.egg 83 | MANIFEST 84 | 85 | # PyInstaller 86 | # Usually these files are written by a python script from a template 87 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 88 | *.manifest 89 | *.spec 90 | 91 | # Installer logs 92 | pip-log.txt 93 | pip-delete-this-directory.txt 94 | 95 | # Unit test / coverage reports 96 | htmlcov/ 97 | .tox/ 98 | .nox/ 99 | .coverage 100 | .coverage.* 101 | .cache 102 | nosetests.xml 103 | coverage.xml 104 | *.cover 105 | .hypothesis/ 106 | .pytest_cache/ 107 | 108 | # Translations 109 | *.mo 110 | *.pot 111 | 112 | # Scrapy stuff: 113 | .scrapy 114 | 115 | # Sphinx documentation 116 | docs/_build/ 117 | 118 | # PyBuilder 119 | target/ 120 | 121 | # pyenv 122 | .python-version 123 | 124 | # pipenv 125 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 126 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 127 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 128 | # install all needed dependencies. 129 | #Pipfile.lock 130 | 131 | # celery beat schedule file 132 | celerybeat-schedule 133 | 134 | # SageMath parsed files 135 | *.sage.py 136 | 137 | # Spyder project settings 138 | .spyderproject 139 | .spyproject 140 | 141 | # Rope project settings 142 | .ropeproject 143 | 144 | # Mr Developer 145 | .mr.developer.cfg 146 | .project 147 | .pydevproject 148 | 149 | # mkdocs documentation 150 | /site 151 | 152 | # mypy 153 | .mypy_cache/ 154 | .dmypy.json 155 | dmypy.json 156 | 157 | # Pyre type checker 158 | .pyre/ 159 | 160 | # End of https://www.gitignore.io/api/python,emacs 161 | -------------------------------------------------------------------------------- /src/dron/api.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import getpass 4 | import inspect 5 | import re 6 | import sys 7 | from collections.abc import Sequence 8 | from dataclasses import dataclass 9 | 10 | from .common import ( 11 | IS_SYSTEMD, 12 | Command, 13 | OnCalendar, 14 | When, 15 | wrap, 16 | ) 17 | 18 | OnFailureAction = str 19 | 20 | UnitName = str 21 | 22 | 23 | @dataclass 24 | class Job: 25 | when: When | None 26 | command: Command 27 | unit_name: UnitName 28 | on_failure: Sequence[OnFailureAction] 29 | kwargs: dict[str, str] 30 | 31 | 32 | # staticmethod isn't callable directly prior to 3.10 33 | def _email(to: str) -> str: 34 | return f'{sys.executable} -m dron.notify.email --job %n --to {to}' 35 | 36 | 37 | class notify: 38 | @staticmethod 39 | def email(to: str) -> str: 40 | return _email(to) 41 | 42 | email_local = _email(to='%u' if IS_SYSTEMD else getpass.getuser()) 43 | 44 | # TODO adapt to macos 45 | desktop_notification = f'{sys.executable} -m dron.notify.ntfy_desktop --job %n' 46 | 47 | telegram = f'{sys.executable} -m dron.notify.telegram --job %n' 48 | 49 | 50 | def job( 51 | when: When | None, 52 | command: Command, 53 | *, 54 | unit_name: str | None = None, 55 | on_failure: Sequence[OnFailureAction] = (notify.email_local,), 56 | **kwargs, 57 | ) -> Job: 58 | """ 59 | when: if None, then timer won't be created (still allows running job manually) 60 | unit_name: if None, then will attempt to guess from source code (experimental!) 61 | """ 62 | assert 'extra_email' not in kwargs, unit_name # deprecated 63 | 64 | stacklevel: int = kwargs.pop('stacklevel', 1) 65 | 66 | def guess_name() -> str | Exception: 67 | stack = inspect.stack() 68 | frame = stack[stacklevel + 1] # +1 for guess_name itself 69 | code_context_lines = frame.code_context 70 | # python should alway keep single line for code context? but just in case 71 | if code_context_lines is None or len(code_context_lines) != 1: 72 | return RuntimeError(f"Expected single code context line, got {code_context_lines=}") 73 | [code_context] = code_context_lines 74 | code_context = code_context.strip() 75 | rgx = r'(\w+)\s+=' 76 | m = re.match(rgx, code_context) # find assignment to variable 77 | if m is None: 78 | return RuntimeError(f"Couldn't guess from {code_context=} (regex {rgx=})") 79 | return m.group(1) 80 | 81 | if unit_name is None: 82 | guessed_name = guess_name() 83 | 84 | if isinstance(guessed_name, Exception): 85 | raise RuntimeError(f"{when} {command}: couldn't guess job name: {guessed_name}") 86 | 87 | unit_name = guessed_name 88 | 89 | return Job( 90 | when=when, 91 | command=command, 92 | unit_name=unit_name, 93 | on_failure=on_failure, 94 | kwargs=kwargs, 95 | ) 96 | 97 | 98 | __all__ = ( 99 | 'Command', 100 | 'Job', # todo maybe don't expose it? 101 | 'OnCalendar', 102 | 'OnFailureAction', 103 | 'When', 104 | 'job', 105 | 'notify', 106 | 'wrap', 107 | ) 108 | -------------------------------------------------------------------------------- /src/dron/launchd_wrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import os 4 | import shlex 5 | import sys 6 | from collections.abc import Iterator 7 | from pathlib import Path 8 | from subprocess import PIPE, STDOUT, Popen 9 | from typing import NoReturn 10 | 11 | from loguru import logger 12 | 13 | LOG_DIR = Path('~/Library/Logs/dron').expanduser() 14 | 15 | # OSX/launchd is a piece of shit and doesn't seem possible to just set it globally everywhere? 16 | # this works: launchctl setenv PYTHONPYCACHEPREFIX $PYTHONPYCACHEPREFIX 17 | # however unclear how to set it in a way that it's running before all other agents 18 | # allegedly possible to use global LaunchDaemon running as root, but doesn't seem possible to execute launchctl commands as other user from launchd plist?? 19 | PYCACHE_PATH = Path('~/.cache/pycache').expanduser() 20 | 21 | 22 | # ty doesn't support NoReturn yet, see https://github.com/astral-sh/ty/issues/180 23 | def main() -> NoReturn: # ty: ignore[invalid-return-type] 24 | p = argparse.ArgumentParser() 25 | p.add_argument('--notify', action='append') 26 | p.add_argument('--job', required=True) 27 | # hmm, this doesn't work with keyword args?? 28 | # p.add_argument('cmd', nargs=argparse.REMAINDER) 29 | args, rest = p.parse_known_args() 30 | 31 | assert rest[0] == '--', rest 32 | cmd = rest[1:] 33 | 34 | notify_cmds = [] if args.notify is None else args.notify 35 | job = args.job 36 | 37 | LOG_DIR.mkdir(parents=True, exist_ok=True) 38 | log_file = LOG_DIR / f'{job}.log' 39 | 40 | logger.add(log_file, rotation='100 MB') # todo configurable? or rely on osx rotation? 41 | 42 | env = {**os.environ} 43 | if "PYTHONPYCACHEPREFIX" not in env: 44 | env["PYTHONPYCACHEPREFIX"] = str(PYCACHE_PATH) 45 | 46 | # hmm, a bit crap transforming everything to stdout? but not much we can do? 47 | captured_log = [] 48 | try: 49 | with Popen(cmd, stdout=PIPE, stderr=STDOUT, env=env) as po: 50 | out = po.stdout 51 | assert out is not None 52 | for line in out: 53 | captured_log.append(line) 54 | sys.stdout.buffer.write(line) 55 | rc = po.poll() 56 | 57 | if rc == 0: 58 | # short circuit 59 | sys.exit(0) 60 | except Exception as e: 61 | # Popen istelf still fail due to permission denied or something 62 | logger.exception(e) 63 | captured_log.append(str(e).encode('utf8')) 64 | rc = 123 65 | 66 | def payload() -> Iterator[bytes]: 67 | yield f"exit code: {rc}\n".encode() 68 | yield b'command: \n' 69 | yield (' '.join(map(shlex.quote, cmd)) + '\n').encode('utf8') 70 | yield f'log file: {log_file}\n'.encode() 71 | yield b'\n' 72 | yield b'output (stdout + stderr):\n\n' 73 | # TODO shit -- if multiple notifications, can't use generator for captured_log 74 | # unless we notify simultaneously? 75 | yield from captured_log 76 | 77 | for line in payload(): 78 | logger.info(line.decode('utf8').rstrip('\n')) # meh 79 | 80 | for notify_cmd in notify_cmds: 81 | logger.info(f'notifying: {notify_cmd}') 82 | try: 83 | with Popen(notify_cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE) as po: 84 | sin = po.stdin 85 | assert sin is not None 86 | for line in payload(): 87 | sin.write(line) 88 | (sout, serr) = po.communicate() 89 | for l in sout.decode('utf8').splitlines(): 90 | logger.debug(l) 91 | for l in serr.decode('utf8').splitlines(): 92 | logger.debug(l) 93 | assert po.poll() == 0, notify_cmd 94 | except Exception as e: 95 | logger.error(f'notificaiton failed: {notify_cmd}') 96 | logger.exception(e) 97 | 98 | sys.exit(rc) 99 | 100 | 101 | if __name__ == '__main__': 102 | main() 103 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | # see https://github.com/karlicoss/pymplate for up-to-date reference 2 | 3 | name: CI 4 | on: 5 | push: 6 | branches: '*' 7 | tags: 'v[0-9]+.*' # only trigger on 'release' tags for PyPi 8 | # Ideally I would put this in the pypi job... but github syntax doesn't allow for regexes there :shrug: 9 | 10 | # Needed to trigger on others' PRs. 11 | # Note that people who fork it need to go to "Actions" tab on their fork and click "I understand my workflows, go ahead and enable them". 12 | pull_request: 13 | 14 | # Needed to trigger workflows manually. 15 | workflow_dispatch: 16 | inputs: 17 | debug_enabled: 18 | type: boolean 19 | description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)' 20 | required: false 21 | default: false 22 | 23 | schedule: 24 | - cron: '31 18 * * 5' # run every Friday 25 | 26 | 27 | jobs: 28 | build: 29 | strategy: 30 | fail-fast: false 31 | matrix: 32 | platform: [ubuntu-latest, macos-latest] 33 | python-version: ['3.12', '3.13', '3.14'] 34 | # vvv just an example of excluding stuff from matrix 35 | # exclude: [{platform: macos-latest, python-version: '3.6'}] 36 | 37 | runs-on: ${{ matrix.platform }} 38 | 39 | # useful for 'optional' pipelines 40 | # continue-on-error: ${{ matrix.platform == 'windows-latest' }} 41 | 42 | steps: 43 | # ugh https://github.com/actions/toolkit/blob/main/docs/commands.md#path-manipulation 44 | - run: echo "$HOME/.local/bin" >> $GITHUB_PATH 45 | 46 | - uses: actions/checkout@v6 47 | with: 48 | submodules: recursive 49 | fetch-depth: 0 # nicer to have all git history when debugging/for tests 50 | 51 | - uses: actions/setup-python@v6 52 | with: 53 | python-version: ${{ matrix.python-version }} 54 | 55 | - uses: astral-sh/setup-uv@v7 56 | with: 57 | enable-cache: false # we don't have lock files, so can't use them as cache key 58 | 59 | - uses: mxschmitt/action-tmate@v3 60 | if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} 61 | 62 | # explicit bash command is necessary for Windows CI runner, otherwise it thinks it's cmd... 63 | - run: bash .ci/run 64 | env: 65 | # only compute lxml coverage on ubuntu; it crashes on windows 66 | CI_MYPY_COVERAGE: ${{ matrix.platform == 'ubuntu-latest' && '--cobertura-xml-report .coverage.mypy' || '' }} 67 | 68 | - if: matrix.platform == 'ubuntu-latest' # no need to compute coverage for other platforms 69 | uses: codecov/codecov-action@v5 70 | with: 71 | fail_ci_if_error: true # default false 72 | token: ${{ secrets.CODECOV_TOKEN }} 73 | flags: mypy-${{ matrix.python-version }} 74 | files: .coverage.mypy/cobertura.xml 75 | 76 | 77 | pypi: 78 | # Do not run it for PRs/cron schedule etc. 79 | # NOTE: release tags are guarded by on: push: tags on the top. 80 | if: github.event_name == 'push' && (startsWith(github.event.ref, 'refs/tags/') || (github.event.ref == format('refs/heads/{0}', github.event.repository.master_branch))) 81 | # Ugh, I tried using matrix or something to explicitly generate only test pypi or prod pypi pipelines. 82 | # But github actions is so shit, it's impossible to do any logic at all, e.g. doesn't support conditional matrix, if/else statements for variables etc. 83 | 84 | needs: [build] # add all other jobs here 85 | 86 | runs-on: ubuntu-latest 87 | 88 | permissions: 89 | # necessary for Trusted Publishing 90 | id-token: write 91 | 92 | steps: 93 | # ugh https://github.com/actions/toolkit/blob/main/docs/commands.md#path-manipulation 94 | - run: echo "$HOME/.local/bin" >> $GITHUB_PATH 95 | 96 | - uses: actions/checkout@v6 97 | with: 98 | submodules: recursive 99 | fetch-depth: 0 # pull all commits to correctly infer vcs version 100 | 101 | - uses: actions/setup-python@v6 102 | with: 103 | python-version: '3.12' 104 | 105 | - uses: astral-sh/setup-uv@v7 106 | with: 107 | enable-cache: false # we don't have lock files, so can't use them as cache key 108 | 109 | - name: 'release to test pypi' 110 | # always deploy merged master to test pypi 111 | if: github.event.ref == format('refs/heads/{0}', github.event.repository.master_branch) 112 | run: .ci/release --use-test-pypi 113 | 114 | - name: 'release to prod pypi' 115 | # always deploy tags to release pypi 116 | if: startsWith(github.event.ref, 'refs/tags/') 117 | run: .ci/release 118 | -------------------------------------------------------------------------------- /README.org: -------------------------------------------------------------------------------- 1 | NOTE: readme is a little outdated (although mostly correct). 2 | Best source of truth is =--help=. 3 | 4 | #+begin_src python :results drawer :exports results 5 | import dron; return dron.make_parser().description 6 | #+end_src 7 | 8 | #+RESULTS: 9 | :results: 10 | dron -- simple frontend for Systemd, inspired by cron. 11 | 12 | - *d* stands for 'Systemd' 13 | - *ron* stands for 'cron' 14 | 15 | dron is my attempt to overcome things that make working with Systemd tedious 16 | :end: 17 | 18 | 19 | #+begin_src python :results drawer :exports results 20 | import dron; return dron.make_parser().epilog 21 | #+end_src 22 | 23 | #+RESULTS: 24 | :results: 25 | 26 | * What does it do? 27 | In short, you type ~dron edit~ and edit your config file, similarly to ~crontab -e~: 28 | 29 | : from dron.api import job 30 | : 31 | : # at the moment you're expected to define jobs() function that yields jobs 32 | : # in the future I might add more mechanisms 33 | : def jobs(): 34 | : # simple job that doesn't do much 35 | : yield job( 36 | : 'daily', 37 | : '/home/user/scripts/run-borg /home/user', 38 | : unit_name='borg-backup-home', 39 | : ) 40 | : 41 | : yield job( 42 | : 'daily', 43 | : 'linkchecker https://beepb00p.xyz', 44 | : unit_name='linkchecker-beepb00p', 45 | : ) 46 | : 47 | : # drontab is simply python code! 48 | : # so if you're annoyed by having to rememver Systemd syntax, you can use a helper function 49 | : def every(*, mins: int) -> str: 50 | : return f'*:0/{mins}' 51 | : 52 | : # make sure my website is alive, it will send local email on failure 53 | : yield job( 54 | : every(mins=10), 55 | : 'ping https://beepb00p.xyz', 56 | : unit_name='ping-beepb00p', 57 | : ) 58 | 59 | 60 | After you save your changes and exit the editor, your drontab is checked for syntax and applied 61 | 62 | - if checks have passed, your jobs are mapped onto Systemd units and started up 63 | - if there are potential errors, you are prompted to fix them before retrying 64 | 65 | * Why? 66 | In short, because I want to benefit from the heavy lifting that Systemd does: timeouts, resource management, restart policies, powerful scheduling specs and logging, 67 | while not having to manually manipulate numerous unit files and restart the daemon all over. 68 | 69 | I elaborate on what led me to implement it and motivation [[https://beepb00p.xyz/scheduler.html#what_do_i_want][here]]. Also: 70 | 71 | - why not just use [[https://beepb00p.xyz/scheduler.html#cron][cron]]? 72 | - why not just use [[https://beepb00p.xyz/scheduler.html#systemd][systemd]]? 73 | 74 | :end: 75 | 76 | 77 | * Setting up 78 | 79 | 1. install system dependencies (see =.ci/run= ) -- these are necessary for =dbus-python= library 80 | 2. install dron: =pip3 install --user git+https://github.com/karlicoss/dron= 81 | 3. install =sendmail= from your package manager if you want to recieve job failure emails 82 | 83 | * Using 84 | 85 | #+begin_src python :results value :exports results 86 | import dron; 87 | p = dron.make_parser() 88 | p.prog = '' 89 | p.epilog = '' 90 | return p.format_help() 91 | #+end_src 92 | 93 | #+RESULTS: 94 | #+begin_example 95 | usage: [-h] [--marker MARKER] {monitor,past,edit,apply,lint,uninstall} ... 96 | 97 | dron -- simple frontend for Systemd, inspired by cron. 98 | 99 | - *d* stands for 'Systemd' 100 | - *ron* stands for 'cron' 101 | 102 | dron is my attempt to overcome things that make working with Systemd tedious 103 | 104 | positional arguments: 105 | {monitor,past,edit,apply,lint,uninstall} 106 | monitor Monitor services/timers managed by dron 107 | past List past job runs 108 | edit Edit drontab (like 'crontab -e') 109 | apply Apply drontab (like 'crontab' with no args) 110 | lint Check drontab (no 'crontab' alternative, sadly!) 111 | uninstall Uninstall all managed jobs 112 | 113 | options: 114 | -h, --help show this help message and exit 115 | --marker MARKER Use custom marker instead of default `(MANAGED BY DRON)`. Possibly useful for developing/testing. 116 | #+end_example 117 | 118 | 119 | * Job syntax 120 | 121 | The idea is that it's a simple python DSL that lets you define simple jobs with minimal friction. 122 | 123 | However, if you wish you can pass arbitrary unit properties as keyword arguments as well. 124 | 125 | * Caveats 126 | - older systemd versions would only accept absolute path for =ExecStart=. That should be caught during =dron edit= though 127 | 128 | * Potential improvements 129 | - custom validation; at the moment it runs pylint, mypy and systemd verify 130 | - make it more atomic? 131 | 132 | E.g. roll back all the changes until daemon-reload 133 | - more failure report mechanisms? 134 | 135 | Ideally, benefit from [[https://github.com/dschep/ntfy][ntfy]] 136 | 137 | ** TODO add issues with various questions that I had in code? 138 | 139 | -------------------------------------------------------------------------------- /ruff.toml: -------------------------------------------------------------------------------- 1 | line-length = 120 # impacts import sorting 2 | 3 | lint.extend-select = [ 4 | "ALL", 5 | ] 6 | 7 | lint.ignore = [ 8 | "D", # annoying nags about docstrings 9 | "N", # pep naming 10 | "TCH", # type checking rules, mostly just suggests moving imports under TYPE_CHECKING 11 | "S", # bandit (security checks) -- tends to be not very useful, lots of nitpicks 12 | "DTZ", # datetimes checks -- complaining about missing tz and mostly false positives 13 | "FIX", # complains about fixmes/todos -- annoying 14 | "TD", # complains about todo formatting -- too annoying 15 | "ANN", # missing type annotations? seems way to strict though 16 | "EM" , # suggests assigning all exception messages into a variable first... pretty annoying 17 | 18 | ### too opinionated style checks 19 | "E501", # too long lines 20 | "E731", # assigning lambda instead of using def 21 | "E741", # Ambiguous variable name: `l` 22 | "E742", # Ambiguous class name: `O 23 | "E401", # Multiple imports on one line 24 | "F403", # import *` used; unable to detect undefined names 25 | ### 26 | 27 | ### 28 | "E722", # Do not use bare `except` ## Sometimes it's useful for defensive imports and that sort of thing.. 29 | "F811", # Redefinition of unused # this gets in the way of pytest fixtures (e.g. in cachew) 30 | 31 | ## might be nice .. but later and I don't wanna make it strict 32 | "E402", # Module level import not at top of file 33 | 34 | ### these are just nitpicky, we usually know better 35 | "PLR0911", # too many return statements 36 | "PLR0912", # too many branches 37 | "PLR0913", # too many function arguments 38 | "PLR0915", # too many statements 39 | "PLR1714", # consider merging multiple comparisons 40 | "PLR2044", # line with empty comment 41 | "PLR5501", # use elif instead of else if 42 | "PLR2004", # magic value in comparison -- super annoying in tests 43 | ### 44 | "PLR0402", # import X.Y as Y -- TODO maybe consider enabling it, but double check 45 | 46 | "B009", # calling gettattr with constant attribute -- this is useful to convince mypy 47 | "B010", # same as above, but setattr 48 | "B017", # pytest.raises(Exception) 49 | "B023", # seems to result in false positives? 50 | 51 | # complains about useless pass, but has sort of a false positive if the function has a docstring? 52 | # this is common for click entrypoints (e.g. in __main__), so disable 53 | "PIE790", 54 | 55 | # a bit too annoying, offers to convert for loops to list comprehension 56 | # , which may heart readability 57 | "PERF401", 58 | 59 | # suggests no using exception in for loops 60 | # we do use this technique a lot, plus in 3.11 happy path exception handling is "zero-cost" 61 | "PERF203", 62 | 63 | "RET504", # unnecessary assignment before returning -- that can be useful for readability 64 | "RET505", # unnecessary else after return -- can hurt readability 65 | 66 | "PLW0603", # global variable update.. we usually know why we are doing this 67 | "PLW2901", # for loop variable overwritten, usually this is intentional 68 | 69 | "PT011", # pytest raises is too broad 70 | 71 | "COM812", # trailing comma missing -- mostly just being annoying with long multiline strings 72 | 73 | "TRY003", # suggests defining exception messages in exception class -- kinda annoying 74 | "TRY201", # raise without specifying exception name -- sometimes hurts readability 75 | "TRY400", # a bit dumb, and results in false positives (see https://github.com/astral-sh/ruff/issues/18070) 76 | "TRY401", # redundant exception in logging.exception call? TODO double check, might result in excessive logging 77 | 78 | "TID252", # Prefer absolute imports over relative imports from parent modules 79 | 80 | ## too annoying 81 | "T20", # just complains about prints and pprints (TODO maybe consider later?) 82 | "Q", # flake quotes, too annoying 83 | "C90", # some complexity checking 84 | "G004", # logging statement uses f string 85 | "ERA001", # commented out code 86 | "SLF001", # private member accessed 87 | "BLE001", # do not catch 'blind' Exception 88 | "INP001", # complains about implicit namespace packages 89 | "SIM102", # if statements collapsing, often hurts readability 90 | "SIM103", # multiple conditions collapsing, often hurts readability 91 | "SIM105", # suggests using contextlib.suppress instad of try/except -- this wouldn't be mypy friendly 92 | "SIM108", # suggests using ternary operation instead of if -- hurts readability 93 | "SIM110", # suggests using any(...) instead of for look/return -- hurts readability 94 | "SIM117", # suggests using single with statement instead of nested -- doesn't work in tests 95 | "RSE102", # complains about missing parens in exceptions 96 | ## 97 | 98 | "PLC0415", # "imports should be at the top level" -- not realistic 99 | ] 100 | -------------------------------------------------------------------------------- /src/dron/common.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import platform 4 | import shlex 5 | import sys 6 | from collections.abc import Iterable, Sequence 7 | from dataclasses import asdict, dataclass, replace 8 | from datetime import datetime 9 | from pathlib import Path 10 | from typing import Any 11 | 12 | from loguru import logger # noqa: F401 13 | 14 | datetime_aware = datetime 15 | datetime_naive = datetime 16 | 17 | 18 | # TODO can remove this? although might be useful for tests 19 | VERIFY_UNITS: bool = True 20 | # TODO ugh. verify tries using already installed unit files so if they were bad, everything would fail 21 | # I guess could do two stages, i.e. units first, then timers 22 | # dunno, a bit less atomic though... 23 | 24 | 25 | def set_verify_off() -> None: 26 | global VERIFY_UNITS 27 | VERIFY_UNITS = False 28 | 29 | 30 | @dataclass 31 | class MonitorParams: 32 | with_success_rate: bool 33 | with_command: bool 34 | 35 | 36 | Unit = str 37 | Body = str 38 | UnitFile = Path 39 | 40 | 41 | @dataclass 42 | class UnitState: 43 | unit_file: UnitFile 44 | body: Body | None 45 | cmdline: Sequence[str] | None # can be None for timers 46 | 47 | 48 | @dataclass 49 | class SystemdUnitState(UnitState): 50 | dbus_properties: Any # seems like keeping this around massively speeds up dbus access... 51 | 52 | 53 | @dataclass 54 | class LaunchdUnitState(UnitState): 55 | # NOTE: can legit be str (e.g. if unit was never ran before) 56 | last_exit_code: str | None 57 | pid: str | None 58 | schedule: str | None 59 | 60 | 61 | State = Iterable[UnitState] 62 | 63 | 64 | IS_SYSTEMD = platform.system() != 'Darwin' # if not systemd it's launchd 65 | 66 | 67 | def unwrap[T](x: T | None) -> T: 68 | assert x is not None 69 | return x 70 | 71 | 72 | PathIsh = str | Path 73 | 74 | # if it's an str, assume it's already escaped 75 | # otherwise we are responsible for escaping.. 76 | Command = PathIsh | Sequence[PathIsh] 77 | 78 | 79 | OnCalendar = str 80 | TimerSpec = dict[str, str] # meh # TODO why is it a dict??? 81 | ALWAYS = 'always' 82 | When = OnCalendar | TimerSpec 83 | 84 | 85 | MANAGED_MARKER: str = '(MANAGED BY DRON)' 86 | 87 | 88 | def is_managed(body: str) -> bool: 89 | # switching off it because it's unfriendly to launchd 90 | legacy_marker = '' 91 | return MANAGED_MARKER in body or legacy_marker in body 92 | 93 | 94 | pytest_fixture: Any 95 | under_pytest = 'pytest' in sys.modules 96 | if under_pytest: 97 | import pytest 98 | 99 | pytest_fixture = pytest.fixture 100 | else: 101 | pytest_fixture = lambda f: f # no-op otherwise to prevent pytest import 102 | 103 | 104 | Escaped = str 105 | 106 | 107 | def escape(command: Command) -> Escaped: 108 | if isinstance(command, Escaped): 109 | return command 110 | elif isinstance(command, Path): 111 | return escape([command]) 112 | else: 113 | return ' '.join(shlex.quote(str(part)) for part in command) 114 | 115 | 116 | def wrap(script: PathIsh, command: Command) -> Escaped: 117 | return shlex.quote(str(script)) + ' ' + escape(command) 118 | 119 | 120 | def test_wrap() -> None: 121 | assert wrap('/bin/bash', ['-c', 'echo whatever']) == "/bin/bash -c 'echo whatever'" 122 | bin_ = Path('/bin/bash') 123 | assert wrap(bin_, "-c 'echo whatever'") == "/bin/bash -c 'echo whatever'" 124 | assert wrap(bin_, ['echo', bin_]) == "/bin/bash echo /bin/bash" 125 | assert wrap('cat', bin_) == "cat /bin/bash" 126 | 127 | 128 | @dataclass(order=True) 129 | class MonitorEntry: 130 | unit: str 131 | status: str 132 | left: str 133 | next: str 134 | schedule: str 135 | command: str | None 136 | pid: str | None 137 | 138 | """ 139 | 'status' is coming from systemd/launchd, and it's a string. 140 | 141 | So status_ok should be used instead if you actually want to rely on something robust. 142 | """ 143 | status_ok: bool 144 | 145 | 146 | def print_monitor(entries: Iterable[MonitorEntry]) -> None: 147 | entries = sorted( 148 | entries, 149 | key=lambda e: (e.pid is None, e.status_ok, e), 150 | ) 151 | 152 | import tabulate 153 | import termcolor 154 | 155 | tabulate.PRESERVE_WHITESPACE = True 156 | 157 | headers = [ 158 | 'UNIT', 159 | 'STATUS', 160 | 'LEFT', 161 | 'NEXT', 162 | 'SCHEDULE', 163 | ] 164 | with_command = any(x.command is not None for x in entries) 165 | if with_command: 166 | headers.append('COMMAND') 167 | 168 | items = [] 169 | for e in entries: 170 | e = replace( 171 | e, 172 | status=termcolor.colored(e.status, 'green' if e.status_ok else 'red'), 173 | ) 174 | if e.pid is not None: 175 | e = replace( 176 | e, 177 | next=termcolor.colored('running', 'yellow'), 178 | left='--', 179 | ) 180 | items.append(list(asdict(e).values())[: len(headers)]) 181 | print(tabulate.tabulate(items, headers=headers)) 182 | -------------------------------------------------------------------------------- /src/dron/tests/test_dron.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import sys 4 | from collections.abc import Iterator 5 | from pathlib import Path 6 | 7 | import pytest 8 | 9 | from ..dron import do_lint, load_jobs 10 | 11 | 12 | @pytest.fixture 13 | def tmp_pythonpath(tmp_path: Path) -> Iterator[Path]: 14 | ps = str(tmp_path) 15 | assert ps not in sys.path # just in case 16 | sys.path.insert(0, ps) 17 | try: 18 | yield tmp_path 19 | finally: 20 | sys.path.remove(ps) 21 | 22 | 23 | def test_load_jobs_basic(tmp_pythonpath: Path) -> None: 24 | tpath = Path(tmp_pythonpath) / 'test_drontab.py' 25 | tpath.write_text( 26 | ''' 27 | from typing import Iterator 28 | 29 | from dron.api import job, Job 30 | 31 | 32 | def jobs() -> Iterator[Job]: 33 | job3 = job( 34 | '03:10', 35 | ['/path/to/command.py', 'some', 'args', '3'], 36 | unit_name='job3', 37 | ) 38 | job1 = job( 39 | '01:10', 40 | ['/path/to/command.py', 'some', 'args', '1'], 41 | unit_name='job1', 42 | ) 43 | yield job1 44 | yield job( 45 | '02:10', 46 | ['/path/to/command.py', 'some', 'args', '2'], 47 | unit_name='job2', 48 | ) 49 | yield job3 50 | 51 | ''' 52 | ) 53 | loaded = list(load_jobs(tab_module='test_drontab')) 54 | [job1, job2, job3] = loaded 55 | 56 | assert job1.when == '01:10' 57 | assert job1.command == ['/path/to/command.py', 'some', 'args', '1'] 58 | assert job1.unit_name == 'job1' 59 | 60 | assert job2.when == '02:10' 61 | assert job2.command == ['/path/to/command.py', 'some', 'args', '2'] 62 | assert job2.unit_name == 'job2' 63 | 64 | assert job3.when == '03:10' 65 | assert job3.command == ['/path/to/command.py', 'some', 'args', '3'] 66 | assert job3.unit_name == 'job3' 67 | 68 | 69 | def test_load_jobs_dupes(tmp_pythonpath: Path) -> None: 70 | tpath = Path(tmp_pythonpath) / 'test_drontab.py' 71 | tpath.write_text( 72 | ''' 73 | from typing import Iterator 74 | 75 | from dron.api import job, Job 76 | 77 | def jobs() -> Iterator[Job]: 78 | yield job('00:00', 'echo', unit_name='job3') 79 | yield job('00:00', 'echo', unit_name='job1') 80 | # whoops! duplicate job name 81 | yield job('00:00', 'echo', unit_name='job3') 82 | ''' 83 | ) 84 | with pytest.raises(AssertionError): 85 | _loaded = list(load_jobs(tab_module='test_drontab')) 86 | 87 | 88 | def test_jobs_auto_naming(tmp_pythonpath: Path) -> None: 89 | tpath = Path(tmp_pythonpath) / 'test_drontab.py' 90 | tpath.write_text( 91 | ''' 92 | from typing import Iterator 93 | 94 | from dron.api import job, Job 95 | 96 | 97 | job2 = job( 98 | '00:02', 99 | 'echo', 100 | ) 101 | 102 | 103 | def job_maker(when) -> Job: 104 | return job(when, 'echo job maker', stacklevel=2) 105 | 106 | 107 | def jobs() -> Iterator[Job]: 108 | job_1 = job('00:01', 109 | 'echo', 110 | ) 111 | yield job2 112 | yield job('00:00', 'echo', unit_name='job_named') 113 | yield job_1 114 | job4 = \ 115 | job('00:04', 'echo') 116 | job5 = job_maker('00:05') 117 | yield job5 118 | yield job4 119 | ''' 120 | ) 121 | loaded = list(load_jobs(tab_module='test_drontab')) 122 | (job2, job_named, job_1, job5, job4) = loaded 123 | assert job_1.unit_name == 'job_1' 124 | assert job_1.when == '00:01' 125 | assert job2.unit_name == 'job2' 126 | assert job2.when == '00:02' 127 | assert job_named.unit_name == 'job_named' 128 | assert job_named.when == '00:00' 129 | assert job4.unit_name == 'job4' 130 | assert job4.when == '00:04' 131 | assert job5.unit_name == 'job5' 132 | assert job5.when == '00:05' 133 | 134 | 135 | def test_do_lint(tmp_pythonpath: Path) -> None: 136 | def OK(body: str) -> None: 137 | tpath = Path(tmp_pythonpath) / 'test_drontab.py' 138 | tpath.write_text(body) 139 | do_lint(tab_module='test_drontab') 140 | 141 | def FAILS(body: str) -> None: 142 | with pytest.raises(Exception): 143 | OK(body) 144 | 145 | FAILS( 146 | body=''' 147 | None.whatever 148 | ''' 149 | ) 150 | 151 | # no jobs 152 | FAILS( 153 | body=''' 154 | ''' 155 | ) 156 | 157 | OK( 158 | body=''' 159 | def jobs(): 160 | yield from [] 161 | ''' 162 | ) 163 | 164 | OK( 165 | body=''' 166 | from dron.api import job 167 | def jobs(): 168 | yield job( 169 | 'hourly', 170 | ['/bin/echo', '123'], 171 | unit_name='unit_test', 172 | ) 173 | ''' 174 | ) 175 | 176 | from ..systemd import _is_missing_systemd 177 | 178 | if not _is_missing_systemd(): 179 | from ..cli import _drontab_example 180 | 181 | # this test doesn't work without systemd yet, because launchd adapter doesn't support unquoted commands, at least yet.. 182 | example = _drontab_example() 183 | # ugh. some hackery to make it find the executable.. 184 | echo = " '/bin/echo" 185 | example = ( 186 | example.replace(" 'linkchecker", echo) 187 | .replace(" '/home/user/scripts/run-borg", echo) 188 | .replace(" 'ping", " '/bin/ping") 189 | ) 190 | OK(body=example) 191 | -------------------------------------------------------------------------------- /src/dron/cli.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import socket 4 | import sys 5 | from pprint import pprint 6 | 7 | import click 8 | 9 | from . import common, launchd, systemd 10 | from .api import UnitName 11 | from .common import ( 12 | IS_SYSTEMD, 13 | MonitorParams, 14 | Unit, 15 | escape, 16 | logger, 17 | print_monitor, 18 | set_verify_off, 19 | ) 20 | from .dron import ( 21 | apply, 22 | do_lint, 23 | get_entries_for_monitor, 24 | load_jobs, 25 | manage, 26 | managed_units, 27 | ) 28 | 29 | 30 | # TODO test it on CI? 31 | # TODO explicitly inject it into readme? 32 | def _drontab_example() -> str: 33 | return ''' 34 | from dron.api import job 35 | 36 | # at the moment you're expected to define jobs() function that yields jobs 37 | # in the future I might add more mechanisms 38 | def jobs(): 39 | # simple job that doesn't do much 40 | yield job( 41 | 'daily', 42 | '/home/user/scripts/run-borg /home/user', 43 | unit_name='borg-backup-home', 44 | ) 45 | 46 | yield job( 47 | 'daily', 48 | 'linkchecker https://beepb00p.xyz', 49 | unit_name='linkchecker-beepb00p', 50 | ) 51 | 52 | # drontab is simply python code! 53 | # so if you're annoyed by having to rememver Systemd syntax, you can use a helper function 54 | def every(*, mins: int) -> str: 55 | return f'*:0/{mins}' 56 | 57 | # make sure my website is alive, it will send local email on failure 58 | yield job( 59 | every(mins=10), 60 | 'ping https://beepb00p.xyz', 61 | unit_name='ping-beepb00p', 62 | ) 63 | '''.lstrip() 64 | 65 | 66 | def _get_epilog() -> str: 67 | return ''' 68 | * Why? 69 | 70 | In short, because I want to benefit from the heavy lifting that Systemd does: timeouts, resource management, restart policies, powerful scheduling specs and logging, 71 | while not having to manually manipulate numerous unit files and restart the daemon all over. 72 | 73 | I elaborate on what led me to implement it and motivation [[https://beepb00p.xyz/scheduler.html#what_do_i_want][here]]. Also: 74 | 75 | \b 76 | - why not just use [[https://beepb00p.xyz/scheduler.html#cron][cron]]? 77 | - why not just use [[https://beepb00p.xyz/scheduler.html#systemd][systemd]]? 78 | '''.strip() 79 | 80 | 81 | @click.group( 82 | context_settings={'show_default': True}, 83 | help=""" 84 | dron -- simple frontend for Systemd, inspired by cron. 85 | 86 | \b 87 | - *d* stands for 'Systemd' 88 | - *ron* stands for 'cron' 89 | 90 | dron is my attempt to overcome things that make working with Systemd tedious 91 | """.strip(), 92 | epilog=_get_epilog(), 93 | ) 94 | @click.option( 95 | '--marker', 96 | required=False, 97 | help=f'Use custom marker instead of default `{common.MANAGED_MARKER}`. Useful for developing/testing.', 98 | ) 99 | def cli(*, marker: str | None) -> None: 100 | if marker is not None: 101 | common.MANAGED_MARKER = marker 102 | 103 | 104 | arg_tab_module = click.option( 105 | '--module', 106 | 'tab_module', 107 | type=str, 108 | default=f'drontab.{socket.gethostname()}', 109 | ) 110 | 111 | 112 | # specify in readme??? 113 | # would be nice to use external checker.. 114 | # https://github.com/systemd/systemd/issues/8072 115 | # https://unix.stackexchange.com/questions/493187/systemd-under-ubuntu-18-04-1-fails-with-failed-to-create-user-slice-serv 116 | def _set_verify_off(ctx, param, value) -> None: # noqa: ARG001 117 | if value is True: 118 | set_verify_off() 119 | 120 | 121 | arg_no_verify = click.option( 122 | '--no-verify', 123 | is_flag=True, 124 | callback=_set_verify_off, 125 | expose_value=False, 126 | help='Skip systemctl verify step', 127 | ) 128 | 129 | 130 | @cli.command('lint') 131 | @arg_tab_module 132 | @arg_no_verify 133 | def cmd_lint(*, tab_module: str) -> None: 134 | # FIXME how to disable verity? 135 | # FIXME lint command isn't very interesting now btw? 136 | # perhaps instead, either add dry mode to apply 137 | # or split into the 'diff' part and side effect apply part 138 | _state = do_lint(tab_module) 139 | logger.info('all good') 140 | 141 | 142 | @cli.command('print') 143 | @arg_tab_module 144 | @click.option('--pretty', is_flag=True, help='Pretty print') 145 | @arg_no_verify 146 | def cmd_print(*, tab_module: str, pretty: bool) -> None: 147 | """Parse and print drontab""" 148 | jobs = list(load_jobs(tab_module=tab_module)) 149 | 150 | if pretty: 151 | import tabulate 152 | 153 | items = [ 154 | { 155 | 'UNIT': job.unit_name, 156 | 'SCHEDULE': job.when, 157 | 'COMMAND': escape(job.command), 158 | } 159 | for job in jobs 160 | ] 161 | print(tabulate.tabulate(items, headers="keys")) 162 | else: 163 | for j in jobs: 164 | print(j) 165 | 166 | 167 | # TODO --force? 168 | @cli.command('apply') 169 | @arg_tab_module 170 | def cmd_apply(*, tab_module: str) -> None: 171 | """Apply drontab (like 'crontab' with no args)""" 172 | apply(tab_module) 173 | 174 | 175 | @cli.command('debug') 176 | def cmd_debug() -> None: 177 | """Print some debug info""" 178 | managed = managed_units(with_body=False) # TODO not sure about body 179 | for x in managed: 180 | pprint(x, stream=sys.stderr) 181 | 182 | 183 | @cli.command('uninstall') 184 | def cmd_uninstall() -> None: 185 | """Remove all managed jobs (will ask for confirmation)""" 186 | click.confirm('Going to remove all dron managed jobs. Continue?', default=True, abort=True) 187 | manage([]) 188 | 189 | 190 | @cli.group('job') 191 | def cli_job() -> None: 192 | """Actions on individual jobs""" 193 | pass 194 | 195 | 196 | def _prompt_for_unit() -> UnitName: 197 | from prompt_toolkit import PromptSession 198 | from prompt_toolkit.completion import WordCompleter 199 | 200 | # TODO print options 201 | managed = list(managed_units(with_body=False)) 202 | units = [x.unit_file.stem for x in managed] 203 | 204 | print('Units under dron:', file=sys.stderr) 205 | for u in units: 206 | print(f'- {u}', file=sys.stderr) 207 | 208 | completer = WordCompleter(units, ignore_case=True) 209 | session = PromptSession("Select a unit: ", completer=completer) # type: ignore[var-annotated] 210 | selected = session.prompt() 211 | return selected 212 | 213 | 214 | arg_unit = click.argument('unit', type=Unit, default=_prompt_for_unit) 215 | 216 | 217 | @cli_job.command('past') 218 | @arg_unit 219 | def cmd_past(unit: Unit) -> None: 220 | if IS_SYSTEMD: 221 | # TODO hmm seems like this just exit with 0 if unit diesn't exist 222 | return systemd.cmd_past(unit) 223 | else: 224 | return launchd.cmd_past(unit) 225 | 226 | 227 | @cli_job.command('run') 228 | @arg_unit 229 | @click.option('--exec', 'do_exec', is_flag=True, help='Run directly, not via systemd/launchd') 230 | def cmd_run(*, unit: Unit, do_exec: bool) -> None: 231 | """Run the job right now, ignoring the timer""" 232 | if IS_SYSTEMD: 233 | return systemd.cmd_run(unit=unit, do_exec=do_exec) 234 | else: 235 | return launchd.cmd_run(unit=unit, do_exec=do_exec) 236 | 237 | 238 | @cli.command('monitor') 239 | @click.option('-n', type=float, default=1.0, help='refresh every n seconds') 240 | @click.option('--once', is_flag=True, help='only call once') 241 | @click.option('--rate', is_flag=True, help='Display success rate (unstable and potentially slow)') 242 | @click.option('--command', is_flag=True, help='Display command') 243 | # TODO maybe make command default? it's only marginally (~5-10%) slower than without it 244 | def cmd_monitor(*, n: float, once: bool, rate: bool, command: bool) -> None: 245 | """Monitor services/timers managed by dron""" 246 | params = MonitorParams( 247 | with_success_rate=rate, 248 | with_command=command, 249 | ) 250 | 251 | if once: 252 | # old style monitor 253 | # TODO think if it's worth integrating with timers? 254 | managed = list(managed_units(with_body=False)) # body slows down this call quite a bit 255 | if len(managed) == 0: 256 | logger.warning('no managed units!') 257 | 258 | logger.debug('starting monitor...') 259 | 260 | entries = get_entries_for_monitor(managed=managed, params=params) 261 | print_monitor(entries) 262 | else: 263 | from .monitor import MonitorApp 264 | 265 | app = MonitorApp( 266 | monitor_params=params, 267 | refresh_every=n, 268 | show_logger=False, 269 | ) 270 | app.run() 271 | 272 | 273 | def main() -> None: 274 | cli() 275 | -------------------------------------------------------------------------------- /src/dron/monitor.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import re 4 | import subprocess 5 | import traceback 6 | from dataclasses import asdict, fields 7 | from datetime import datetime 8 | from typing import Any, ClassVar, override 9 | 10 | from rich.text import Text 11 | from textual import work 12 | from textual.app import App, ComposeResult 13 | from textual.binding import Binding 14 | from textual.widgets import DataTable, Input, RichLog, Static 15 | from textual.widgets.data_table import RowKey 16 | 17 | from .common import MonitorEntry, MonitorParams 18 | from .dron import get_entries_for_monitor, managed_units 19 | from .notify.common import get_last_systemd_log 20 | from .systemd import _unit_success_rate 21 | 22 | MonitorEntries = dict[RowKey, MonitorEntry] 23 | 24 | 25 | def get_entries(params: MonitorParams, *, mock: bool = False) -> MonitorEntries: 26 | # mock is useful for testing without overhead etc from systemd 27 | if mock: 28 | entries = [] 29 | statuses = ['active', 'inactive', 'failed', 'exit-code: 1', 'exit-code: 127'] 30 | 31 | for i in range(200): 32 | unit_name = f"mock-service-{i:03d}.timer" 33 | status = statuses[i % len(statuses)] 34 | status_ok = status in ['active', 'inactive'] 35 | command = f"/usr/bin/mock-command-{i % 10}" if params.with_command else None 36 | 37 | entry = MonitorEntry( 38 | unit=unit_name, 39 | status=status, 40 | left='5min', 41 | next='2025-12-14 12:00:00', 42 | schedule='*:0/5', 43 | command=command, 44 | pid=None, 45 | status_ok=status_ok, 46 | ) 47 | entries.append(entry) 48 | else: 49 | managed = list(managed_units(with_body=False)) # body slows down this call quite a bit 50 | entries = get_entries_for_monitor(managed=managed, params=params) 51 | return {RowKey(e.unit): e for e in entries} 52 | 53 | 54 | class Clock(Static): 55 | """ 56 | Displays current time with millisecond precision. Useful for debugging. 57 | """ 58 | 59 | def update_time(self, dt: datetime) -> None: 60 | self.update(f"refreshed at: {dt.isoformat()}") 61 | 62 | 63 | class UnitsTable(DataTable): 64 | BINDINGS: ClassVar = [ 65 | Binding("j" , "cursor_down" , "Down" , show=False), 66 | Binding("k" , "cursor_up" , "Up" , show=False), 67 | Binding("h" , "cursor_left" , "Left" , show=False), 68 | Binding("l" , "cursor_right" , "Right" , show=False), 69 | Binding("g" , "scroll_top" , "Top" , show=False), 70 | Binding("G" , "scroll_bottom", "Bottom" , show=False), 71 | Binding("^" , "scroll_home" , "Start of line", show=False), 72 | Binding("$" , "scroll_end" , "End of line" , show=False), 73 | Binding("enter", "select_cursor", "Select" , show=False), 74 | ] # fmt: skip 75 | 76 | def __init__(self, params: MonitorParams) -> None: 77 | super().__init__( 78 | cursor_type='row', 79 | zebra_stripes=True, # alternating colours 80 | ) 81 | self.params = params 82 | 83 | self.entries: MonitorEntries | None = None 84 | self.filter_query: str = "" # "" means no filter 85 | 86 | # todo how to check it statically? MonitorEntry.pid isn't giving anything? 87 | excluded = { 88 | 'pid', 89 | 'status_ok', 90 | } 91 | if not self.params.with_command: 92 | excluded.add('command') 93 | # hmm a bit nasty that if we name it self.columns we might mess with base class 94 | # maybe not ideal to use inheritance here.. 95 | self.display_columns = [f.name for f in fields(MonitorEntry) if f.name not in excluded] 96 | 97 | @override 98 | def on_mount(self) -> None: 99 | for col in self.display_columns: 100 | self.add_column(label=col, key=col) 101 | 102 | def as_row(self, entry: MonitorEntry) -> dict[str, Any]: 103 | res = {k: v for k, v in asdict(entry).items() if k in self.display_columns} 104 | 105 | style = 'green' if entry.status_ok else 'red bold' 106 | res['status'] = Text(res['status'], style=style) 107 | 108 | # meh. workaround for next/left being max datetime 109 | if res['next'].startswith('9999-'): 110 | res['left'] = '--' 111 | res['next'] = Text('never', style='yellow') 112 | 113 | if entry.pid is not None: 114 | res['left'] = '--' 115 | res['next'] = Text('running', style='yellow') 116 | return res 117 | 118 | def set_filter(self, query: str) -> None: 119 | self.filter_query = query 120 | self.update_ui() 121 | 122 | def update_ui(self) -> None: 123 | entries = self.entries 124 | assert entries is not None 125 | # if None then only apply filter to existing data 126 | # TODO crap, but then need to keep previous entries 127 | 128 | if len(self.filter_query) > 0: 129 | entries = { 130 | k: v 131 | for k, v in entries.items() 132 | # concat all fields into single string for searching, that way can search in command as well 133 | if re.search(self.filter_query, ' '.join(str(x) for x in asdict(v).values()), re.IGNORECASE) 134 | } 135 | 136 | current_rows: set[RowKey] = set(self.rows.keys()) 137 | 138 | to_remove: set[RowKey] = {key for key in current_rows if key not in entries} 139 | for key in to_remove: 140 | self.remove_row(row_key=key) 141 | 142 | for key, entry in entries.items(): 143 | new_row = self.as_row(entry) 144 | if key not in current_rows: 145 | self.add_row(*new_row.values(), key=key.value) 146 | else: 147 | for col, new_value in new_row.items(): 148 | curr_value = self.get_cell(row_key=key, column_key=col) 149 | # hmm seems like DataTable is a bit dumb and even if value is the same, it does costly UI updates... 150 | # this is quite noticeable optimization 151 | if curr_value != new_value: 152 | self.update_cell(row_key=key, column_key=col, value=new_value, update_width=True) 153 | 154 | def sort_key(row: list[str]): 155 | # kinda annoying to do that because interlally DataTable keeps row as dict[ColKey, str]... 156 | # but before using the key it's converted to a sequence.. ugh 157 | is_running = 'running' in row[self.get_column_index('next')] 158 | failed = 'exit-code' in row[self.get_column_index('status')] 159 | return (not is_running, not failed, row[self.get_column_index('unit')]) 160 | 161 | # TODO hmm kinda annoying, doesn't look like it preserves cursor position 162 | # if the item pops on top of the list when a service is running? 163 | # but I guess not a huge deal now 164 | self.sort(key=sort_key) 165 | 166 | def show_details_in_pager(self, unit_name: RowKey) -> None: 167 | assert self.entries is not None 168 | entry = self.entries[unit_name] 169 | 170 | header_lines = [ 171 | f"Unit: {entry.unit}", 172 | ] 173 | if entry.command is not None: 174 | header_lines.append(f"Command: {entry.command}") 175 | 176 | try: 177 | rate = _unit_success_rate(entry.unit) 178 | except Exception as e: 179 | header_lines.append(f"Success rate: failed to get ({e})") 180 | else: 181 | header_lines.append(f"Success rate: {rate:.2%}") 182 | 183 | header_lines.append("\nLogs:\n") 184 | header_bytes = "\n".join(header_lines).encode() 185 | 186 | with self.app.suspend(): 187 | try: 188 | with subprocess.Popen(['less', '-R'], stdin=subprocess.PIPE) as p: 189 | stdin = p.stdin 190 | assert stdin is not None 191 | stdin.write(header_bytes) 192 | try: 193 | for chunk in get_last_systemd_log(entry.unit): 194 | stdin.write(chunk) 195 | except Exception: 196 | # exception can happen if unit never ran or who knows why else 197 | stdin.write(b"\n\nError fetching logs:\n") 198 | stdin.write(traceback.format_exc().encode()) 199 | except BrokenPipeError: 200 | # User exited pager early 201 | pass 202 | 203 | 204 | class SearchInput(Input): 205 | def __init__(self) -> None: 206 | super().__init__(placeholder="Filter units...", id="search") 207 | 208 | def on_input_changed(self, event: Input.Changed) -> None: 209 | self.app.query_one(UnitsTable).set_filter(event.value) 210 | 211 | def on_input_submitted(self, event: Input.Submitted) -> None: # noqa: ARG002 212 | if not self.value: 213 | self.display = False 214 | self.app.query_one(UnitsTable).focus() 215 | 216 | 217 | class MonitorApp(App): 218 | BINDINGS: ClassVar = [ 219 | Binding("/", "search", "Search"), 220 | Binding("q", "quit", "Quit"), 221 | # Disable default ctrl+q, conflicting with OS/terminal bindings 222 | Binding("ctrl+q", "pass", show=False, priority=True), 223 | ] 224 | 225 | CSS = """ 226 | Clock { 227 | dock: top; 228 | height: 1; 229 | background: $primary; 230 | color: $text; 231 | } 232 | SearchInput { 233 | dock: top; 234 | display: none; 235 | } 236 | UnitsTable { 237 | height: 1fr; 238 | } 239 | RichLog { 240 | height: 5; 241 | dock: bottom; 242 | border-top: solid $secondary; 243 | } 244 | """ 245 | 246 | def __init__( 247 | self, 248 | *, 249 | # annoying to have default args here, but it's convenient for interactive testing with 'textual run ...' 250 | monitor_params: MonitorParams = MonitorParams(with_success_rate=False, with_command=False), # noqa: B008 251 | refresh_every: float = 2.0, 252 | show_logger: bool = True, 253 | ) -> None: 254 | super().__init__() 255 | self.monitor_params = monitor_params 256 | self.refresh_every = refresh_every 257 | self.show_logger = show_logger 258 | 259 | @override 260 | def compose(self) -> ComposeResult: 261 | yield Clock() 262 | yield SearchInput() 263 | 264 | yield UnitsTable(params=self.monitor_params) 265 | 266 | # useful for debugging 267 | yield RichLog() 268 | 269 | @property 270 | def clock(self) -> Clock: 271 | return self.query_one(Clock) 272 | 273 | @property 274 | def search_input(self) -> SearchInput: 275 | return self.query_one(SearchInput) 276 | 277 | @property 278 | def units_table(self) -> UnitsTable: 279 | return self.query_one(UnitsTable) 280 | 281 | @property 282 | def rich_log(self) -> RichLog: 283 | return self.query_one(RichLog) 284 | 285 | def on_data_table_row_selected(self, event: DataTable.RowSelected) -> None: 286 | # Note: This handler needs to be in the App (or a parent widget) because 287 | # events bubble up from the child (UnitsTable) to the parent. 288 | # The widget itself does not receive its own bubbled events. 289 | if isinstance(event.control, UnitsTable): 290 | event.control.show_details_in_pager(event.row_key) 291 | 292 | # @override # TODO weird.. type checker complains it's not present in base class? 293 | def on_mount(self) -> None: 294 | if not self.show_logger: 295 | self.rich_log.display = False 296 | 297 | self.units_table.focus() 298 | 299 | self._update_entries() 300 | 301 | # Hmm tried using set_interval.. 302 | # But I think if refresh interval is low enough, it just cancels previous requests 303 | # , so ends up never rendering anything?? 304 | # self.set_interval(interval=self.refresh_every, callback=self._update_entries) 305 | # Instead relying on set_timer and tail call in update_entries_ui 306 | 307 | # exclusive cancels previous call if it happens still to run 308 | @work(exclusive=True, thread=True) 309 | def _update_entries(self) -> None: 310 | # NOTE: this only goes into dev console 311 | # need to run via 312 | # - TEXTUAL_DEBUG=1 uu run --with=textual-dev textual run --dev ... 313 | # - also need to run in another tab uu tool run --from textual-dev textual console 314 | # self.log("UPDATING") 315 | 316 | entries = get_entries(params=self.monitor_params) 317 | # TODO hmm it likely still spending some time in CPU, so not sure how much thread would help 318 | self.call_from_thread(self.update_entries_ui, entries) 319 | 320 | def update_entries_ui(self, entries: MonitorEntries) -> None: 321 | updated_at = datetime.now() 322 | 323 | self.rich_log.write(f'{updated_at} UPDATING!') 324 | 325 | self.units_table.entries = entries 326 | self.units_table.update_ui() 327 | 328 | self.clock.update_time(updated_at) 329 | self.rich_log.write(f'{updated_at} UPDATED!') 330 | 331 | if self.refresh_every > 0: 332 | self.set_timer(delay=self.refresh_every, callback=self._update_entries) 333 | else: 334 | # update as fast as possible 335 | self._update_entries() 336 | 337 | def action_search(self) -> None: 338 | search_input = self.search_input 339 | search_input.display = True 340 | search_input.focus() 341 | -------------------------------------------------------------------------------- /src/dron/dron.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import importlib.util 4 | import sys 5 | from collections import OrderedDict 6 | from collections.abc import Iterable, Iterator 7 | from concurrent.futures import ProcessPoolExecutor 8 | from difflib import unified_diff 9 | from itertools import tee 10 | from pathlib import Path 11 | from subprocess import check_call 12 | from typing import NamedTuple 13 | 14 | import click 15 | 16 | from . import launchd, systemd 17 | from .api import Job, UnitName 18 | from .common import ( 19 | ALWAYS, 20 | IS_SYSTEMD, 21 | Body, 22 | State, 23 | Unit, 24 | UnitFile, 25 | UnitState, 26 | logger, 27 | unwrap, 28 | ) 29 | from .systemd import _systemctl 30 | 31 | # todo appdirs? 32 | DRON_DIR = Path('~/.config/dron').expanduser() 33 | DRON_UNITS_DIR = DRON_DIR / 'units' 34 | DRON_UNITS_DIR.mkdir(parents=True, exist_ok=True) 35 | 36 | 37 | def verify_units(pre_units: list[tuple[UnitName, Body]]) -> None: 38 | # need an inline import here in case we modify this variable from cli/tests 39 | from .common import VERIFY_UNITS 40 | 41 | if not VERIFY_UNITS: 42 | return 43 | 44 | if len(pre_units) == 0: 45 | # otherwise systemd analayser would complain if we pass zero units 46 | return 47 | 48 | if not IS_SYSTEMD: 49 | for unit_name, body in pre_units: 50 | launchd.verify_unit(unit_name=unit_name, body=body) 51 | else: 52 | systemd.verify_units(pre_units=pre_units) 53 | 54 | 55 | def verify_unit(*, unit_name: UnitName, body: Body) -> None: 56 | return verify_units([(unit_name, body)]) 57 | 58 | 59 | def write_unit(*, unit: Unit, body: Body, prefix: Path = DRON_UNITS_DIR) -> None: 60 | unit_file = prefix / unit 61 | 62 | logger.info(f'writing unit file: {unit_file}') 63 | verify_unit(unit_name=unit_file.name, body=body) 64 | unit_file.write_text(body) 65 | 66 | 67 | def _daemon_reload() -> None: 68 | if IS_SYSTEMD: 69 | check_call(_systemctl('daemon-reload')) 70 | else: 71 | # no-op under launchd 72 | pass 73 | 74 | 75 | def managed_units(*, with_body: bool) -> State: 76 | if IS_SYSTEMD: 77 | yield from systemd.systemd_state(with_body=with_body) 78 | else: 79 | yield from launchd.launchd_state(with_body=with_body) 80 | 81 | 82 | def make_state(jobs: Iterable[Job]) -> State: 83 | pre_units = [] 84 | names: set[Unit] = set() 85 | for j in jobs: 86 | uname = j.unit_name 87 | 88 | assert uname not in names, j 89 | names.add(uname) 90 | 91 | if IS_SYSTEMD: 92 | s = systemd.service(unit_name=uname, command=j.command, on_failure=j.on_failure, **j.kwargs) 93 | pre_units.append((uname + '.service', s)) 94 | 95 | when = j.when 96 | if when is None: 97 | # manual job? 98 | continue 99 | if when == ALWAYS: 100 | continue 101 | t = systemd.timer(unit_name=uname, when=when) 102 | pre_units.append((uname + '.timer', t)) 103 | else: 104 | p = launchd.plist(unit_name=uname, command=j.command, on_failure=j.on_failure, when=j.when) 105 | pre_units.append((uname + '.plist', p)) 106 | 107 | verify_units(pre_units) 108 | 109 | for unit_file, body in pre_units: 110 | yield UnitState( 111 | unit_file=DRON_UNITS_DIR / unit_file, 112 | body=body, 113 | cmdline=None, # ugh, a bit crap, but from this code path cmdline doesn't matter 114 | ) 115 | 116 | 117 | # TODO bleh. too verbose.. 118 | class Update(NamedTuple): 119 | unit_file: UnitFile 120 | old_body: Body 121 | new_body: Body 122 | 123 | @property 124 | def unit(self) -> str: 125 | return self.unit_file.name 126 | 127 | 128 | class Delete(NamedTuple): 129 | unit_file: UnitFile 130 | 131 | @property 132 | def unit(self) -> str: 133 | return self.unit_file.name 134 | 135 | 136 | class Add(NamedTuple): 137 | unit_file: UnitFile 138 | body: Body 139 | 140 | @property 141 | def unit(self) -> str: 142 | return self.unit_file.name 143 | 144 | 145 | Action = Update | Delete | Add 146 | Plan = Iterable[Action] 147 | 148 | # TODO ugh. not sure how to verify them? 149 | 150 | 151 | def compute_plan(*, current: State, pending: State) -> Plan: 152 | # eh, I feel like i'm reinventing something already existing here... 153 | currentd = OrderedDict((x.unit_file, unwrap(x.body)) for x in current) 154 | pendingd = OrderedDict((x.unit_file, unwrap(x.body)) for x in pending) 155 | 156 | units = [c for c in currentd if c not in pendingd] + list(pendingd.keys()) 157 | for u in units: 158 | in_cur = u in currentd 159 | in_pen = u in pendingd 160 | if in_cur: 161 | if in_pen: 162 | # TODO not even sure I should emit it if bodies match?? 163 | yield Update(unit_file=u, old_body=currentd[u], new_body=pendingd[u]) 164 | else: 165 | yield Delete(unit_file=u) 166 | else: 167 | if in_pen: 168 | yield Add(unit_file=u, body=pendingd[u]) 169 | else: 170 | raise AssertionError("Can't happen") 171 | 172 | 173 | # TODO it's not apply, more like 'compute' and also plan is more like a diff between states? 174 | def apply_state(pending: State) -> None: 175 | current = list(managed_units(with_body=True)) 176 | 177 | pending_units = {s.unit_file.name for s in pending} 178 | 179 | def is_always_running(unit_path: Path) -> bool: 180 | name = unit_path.stem 181 | has_timer = f'{name}.timer' in pending_units 182 | # TODO meh. not ideal 183 | return not has_timer 184 | 185 | plan = list(compute_plan(current=current, pending=pending)) 186 | 187 | deletes: list[Delete] = [] 188 | adds: list[Add] = [] 189 | _updates: list[Update] = [] 190 | 191 | for a in plan: 192 | if isinstance(a, Delete): 193 | deletes.append(a) 194 | elif isinstance(a, Add): 195 | adds.append(a) 196 | elif isinstance(a, Update): 197 | _updates.append(a) 198 | else: 199 | raise TypeError("Can't happen", a) 200 | 201 | if len(deletes) == len(current) and len(deletes) > 0: 202 | msg = "Trying to delete all managed jobs" 203 | if click.confirm(f'{msg}. Are you sure?', default=False): 204 | pass 205 | else: 206 | raise RuntimeError(msg) 207 | 208 | Diff = list[str] 209 | nochange: list[Update] = [] 210 | updates: list[tuple[Update, Diff]] = [] 211 | 212 | for u in _updates: 213 | unit = a.unit 214 | diff: Diff = list( 215 | unified_diff( 216 | u.old_body.splitlines(keepends=True), 217 | u.new_body.splitlines(keepends=True), 218 | ) 219 | ) 220 | if len(diff) == 0: 221 | nochange.append(u) 222 | else: 223 | updates.append((u, diff)) 224 | 225 | # TODO list unit names here? 226 | logger.info(f'no change: {len(nochange)}') 227 | logger.info(f'disabling: {len(deletes)}') 228 | logger.info(f'updating : {len(updates)}') 229 | logger.info(f'adding : {len(adds)}') 230 | 231 | for a in deletes: 232 | if IS_SYSTEMD: 233 | # TODO stop timer first? 234 | check_call(_systemctl('stop', a.unit)) 235 | check_call(_systemctl('disable', a.unit)) 236 | else: 237 | launchd.launchctl_unload(unit=Path(a.unit).stem) 238 | for a in deletes: 239 | (DRON_UNITS_DIR / a.unit).unlink() 240 | 241 | for u, diff in updates: 242 | unit = u.unit 243 | unit_file = u.unit_file 244 | logger.info(f'updating {unit}') 245 | for d in diff: 246 | sys.stderr.write(d) 247 | write_unit(unit=u.unit, body=u.new_body) 248 | if IS_SYSTEMD: 249 | if unit.endswith('.service') and is_always_running(unit_file): 250 | # persistent unit needs a restart to pick up change 251 | _daemon_reload() 252 | check_call(_systemctl('restart', unit)) 253 | else: 254 | launchd.launchctl_reload(unit=Path(unit).stem, unit_file=unit_file) 255 | 256 | if unit.endswith('.timer'): 257 | _daemon_reload() 258 | # NOTE: need to be careful -- seems that job might trigger straightaway if it's on interval schedule 259 | # so if we change something unrelated (e.g. whitespace), it will start all jobs at the same time?? 260 | check_call(_systemctl('restart', u.unit)) 261 | 262 | for a in adds: 263 | logger.info(f'adding {a.unit_file}') 264 | # TODO when we add, assert that previous unit wasn't managed? otherwise we overwrite something 265 | write_unit(unit=a.unit, body=a.body) 266 | 267 | # need to load units before starting the timers.. 268 | _daemon_reload() 269 | 270 | for a in adds: 271 | unit_file = a.unit_file 272 | unit = unit_file.name 273 | logger.info(f'enabling {unit}') 274 | if unit.endswith('.service'): 275 | # quiet here because it warns that "The unit files have no installation config" 276 | # TODO maybe add [Install] section? dunno 277 | maybe_now = [] 278 | if is_always_running(unit_file): 279 | maybe_now = ['--now'] 280 | check_call(_systemctl('enable', unit_file, '--quiet', *maybe_now)) 281 | elif unit.endswith('.timer'): 282 | check_call(_systemctl('enable', unit_file, '--now')) 283 | elif unit.endswith('.plist'): 284 | launchd.launchctl_load(unit_file=unit_file) 285 | else: 286 | raise AssertionError(a) 287 | 288 | # TODO not sure if this reload is even necessary?? 289 | _daemon_reload() 290 | 291 | 292 | def manage(state: State) -> None: 293 | apply_state(pending=state) 294 | 295 | 296 | Error = str 297 | # TODO perhaps, return Plan or error instead? 298 | 299 | 300 | # eh, implicit convention that only one state will be emitted. oh well 301 | # FIXME rename from lint? just use compileall or something as a syntax check? 302 | def lint(tab_module: str) -> Iterator[Exception | State]: 303 | # TODO tbh compileall is pointless 304 | # - we can't find out source names property without importing 305 | # - we'll find out about errors during importing anyway 306 | 307 | try: 308 | jobs = load_jobs(tab_module) 309 | except Exception as e: 310 | # TODO could add better logging here? 'i.e. error while loading jobs' 311 | logger.exception(e) 312 | yield e 313 | return 314 | 315 | try: 316 | state = list(make_state(jobs)) 317 | except Exception as e: 318 | logger.exception(e) 319 | yield e 320 | return 321 | 322 | yield state 323 | 324 | 325 | def do_lint(tab_module: str) -> State: 326 | eit, vit = tee(lint(tab_module)) 327 | errors = [r for r in eit if isinstance(r, Exception)] 328 | values = [r for r in vit if not isinstance(r, Exception)] 329 | assert len(errors) == 0, errors 330 | [state] = values 331 | return state 332 | 333 | 334 | def _import_jobs(tab_module: str) -> list[Job]: 335 | module = importlib.import_module(tab_module) 336 | jobs_gen = getattr(module, 'jobs') # get dynamically to make type checking happy 337 | return list(jobs_gen()) 338 | 339 | 340 | def load_jobs(tab_module: str) -> Iterator[Job]: 341 | # actually import in a separate process to avoid mess with polluting sys.modules 342 | # shouldn't be a problem in most cases, but it was annoying during tests 343 | with ProcessPoolExecutor(max_workers=1) as pool: 344 | jobs = pool.submit(_import_jobs, tab_module).result() 345 | 346 | emitted: dict[str, Job] = {} 347 | for job in jobs: 348 | assert isinstance(job, Job), job # just in case for dumb typos 349 | assert job.unit_name not in emitted, (job, emitted[job.unit_name]) 350 | yield job 351 | emitted[job.unit_name] = job 352 | 353 | 354 | def apply(tab_module: str) -> None: 355 | # TODO rename do_lint to get_state? 356 | state = do_lint(tab_module) 357 | manage(state=state) 358 | 359 | 360 | get_entries_for_monitor = systemd.get_entries_for_monitor if IS_SYSTEMD else launchd.get_entries_for_monitor 361 | 362 | 363 | def main() -> None: 364 | from . import cli 365 | 366 | cli.main() 367 | 368 | 369 | if __name__ == '__main__': 370 | main() 371 | 372 | 373 | # TODO stuff I learnt: 374 | # TODO systemd-analyze --user unit-paths 375 | # TODO blame! 376 | # systemd-analyze verify -- check syntax 377 | 378 | # TODO would be nice to revert... via contextmanager? 379 | # TODO assert that managed by dron 380 | # TODO not sure what rollback should do w.r.t to 381 | # TODO perhaps, only reenable changed ones? ugh. makes it trickier... 382 | 383 | # TODO wonder if I remove timers, do they drop counts? 384 | # TODO FIXME ok, for now, it's fine, but with more sophisticated timers might be a bit annoying 385 | 386 | # TODO use python's literate types? 387 | 388 | 389 | # TODO wow, that's quite annoying. so timer has to be separate file. oh well. 390 | 391 | # TODO tui for confirming changes, show short diff? 392 | 393 | # TODO actually for me, stuff like 'hourly' makes little sense; I usually space out in time.. 394 | 395 | # https://bugs.python.org/issue31528 eh, probably can't use configparser.. plaintext is good enough though. 396 | 397 | 398 | # TODO later, implement logic for cleaning up old jobs 399 | 400 | 401 | # TODO not sure if should do one by one or all at once? 402 | # yeah, makes sense to do all at once... 403 | # TODO warn about dirty state? 404 | 405 | 406 | # TODO test with 'fake' systemd dir? 407 | 408 | # TODO the assumption is that managed jobs are not changed manually, or changed in a way that doesn't break anything 409 | # in general it's impossible to prevent anyway 410 | 411 | # def update_unit(unit_file: Unit, old_body: Body, new_body: Body) -> Action: 412 | # if old_body == new_body: 413 | # pass # TODO no-op? 414 | # else: 415 | # raise RuntimeError(unit_file, old_body, new_body) 416 | # # TODO hmm FIXME!! yield is a nice way to make function lazy?? 417 | 418 | 419 | # TODO that perhaps? https://askubuntu.com/a/897317/427470 420 | -------------------------------------------------------------------------------- /src/dron/launchd.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import itertools 4 | import json 5 | import os 6 | import re 7 | import shlex 8 | import sys 9 | import textwrap 10 | from collections.abc import Iterator, Sequence 11 | from datetime import timedelta 12 | from pathlib import Path 13 | from subprocess import PIPE, Popen, check_call, check_output 14 | from tempfile import TemporaryDirectory 15 | from typing import Any 16 | 17 | from .api import ( 18 | OnCalendar, 19 | OnFailureAction, 20 | When, 21 | ) 22 | from .common import ( 23 | ALWAYS, 24 | MANAGED_MARKER, 25 | Command, 26 | LaunchdUnitState, 27 | MonitorEntry, 28 | MonitorParams, 29 | State, 30 | Unit, 31 | UnitFile, 32 | logger, 33 | unwrap, 34 | ) 35 | 36 | # TODO custom launchd domain?? maybe instead could do dron/ or something? 37 | _LAUNCHD_DOMAIN = f'gui/{os.getuid()}' 38 | 39 | 40 | # in principle not necessary... 41 | # but makes it much easier to filter out logs & lobs from launchctl dump 42 | DRON_PREFIX = 'dron.' 43 | 44 | 45 | def _launchctl(*args: Path | str) -> list[Path | str]: 46 | return ['launchctl', *args] 47 | 48 | 49 | def _launch_agent(path: str) -> Path: 50 | # symlink for autostart 51 | assert path.endswith('.plist'), path # meh 52 | assert not Path(path).is_absolute(), path 53 | 54 | LA = Path('~/Library/LaunchAgents').expanduser() 55 | link = LA / path 56 | return link 57 | 58 | 59 | def fqn(name: Unit) -> str: 60 | return _LAUNCHD_DOMAIN + '/' + DRON_PREFIX + name 61 | 62 | 63 | def launchctl_load(*, unit_file: UnitFile) -> None: 64 | # bootstrap is nicer than load 65 | # load is super defensive, returns code 0 on errors 66 | check_call(_launchctl('bootstrap', _LAUNCHD_DOMAIN, unit_file)) 67 | _launch_agent(unit_file.name).symlink_to(unit_file) 68 | 69 | 70 | def launchctl_unload(*, unit: Unit) -> None: 71 | # bootout is more verbose than unload 72 | # in addition unload is super defensive, returns code 0 on errors 73 | check_call(_launchctl('bootout', fqn(unit))) 74 | _launch_agent(unit + '.plist').unlink() 75 | 76 | 77 | def launchctl_kickstart(*, unit: Unit) -> None: 78 | check_call(_launchctl('kickstart', fqn(unit))) 79 | 80 | 81 | def launchctl_reload(*, unit: Unit, unit_file: UnitFile) -> None: 82 | # don't think there is a better way? 83 | launchctl_unload(unit=unit) 84 | launchctl_load(unit_file=unit_file) 85 | 86 | 87 | def launchd_wrapper(*, job: str, on_failure: list[str]) -> list[str]: 88 | return [ 89 | sys.executable, 90 | '-B', # do not write byte code, otherwise it shits into dron directory if we're using editable install 91 | '-m', 'dron.launchd_wrapper', 92 | *itertools.chain.from_iterable(('--notify', n) for n in on_failure), 93 | '--job', job, 94 | '--', 95 | ] # fmt: skip 96 | 97 | 98 | def remove_launchd_wrapper(cmd: str) -> str: 99 | if ' dron.launchd_wrapper ' not in cmd: 100 | return cmd 101 | # uhh... not super reliable, but this is only used for monitor so hopefully fine 102 | [_, cmd] = cmd.split(' -- ', maxsplit=1) 103 | return cmd 104 | 105 | 106 | def plist( 107 | *, 108 | unit_name: str, 109 | command: Command, 110 | on_failure: Sequence[OnFailureAction], 111 | when: When | None = None, 112 | ) -> str: 113 | # TODO hmm, kinda mirrors 'escape' method, not sure 114 | cmd: Sequence[str] 115 | if isinstance(command, (list, tuple)): 116 | cmd = tuple(map(str, command)) 117 | elif isinstance(command, Path): 118 | cmd = [str(command)] 119 | elif isinstance(command, str) and ' ' not in command: 120 | cmd = [command] 121 | else: 122 | # unquoting and splitting is way trickier than quoting and joining... 123 | # not sure how to implement it p 124 | # maybe we just want bash -c in this case, dunno how to implement properly 125 | raise RuntimeError(command) 126 | del command 127 | 128 | mschedule = '' 129 | if when is None: 130 | # support later 131 | raise RuntimeError(unit_name) 132 | 133 | if when == ALWAYS: 134 | mschedule = 'KeepAlive\n' 135 | else: 136 | assert isinstance(when, OnCalendar), when 137 | # https://www.freedesktop.org/software/systemd/man/systemd.time.html# 138 | # fmt: off 139 | seconds = { 140 | 'minutely': 60, 141 | 'hourly' : 60 * 60, 142 | 'daily' : 60 * 60 * 24, 143 | }.get(when) 144 | # fmt: on 145 | if seconds is None: 146 | # ok, try systemd-like spec.. 147 | # fmt: off 148 | specs = [ 149 | (re.escape('*:0/') + r'(\d+)', 60), 150 | (re.escape('*:*:0/') + r'(\d+)', 1), 151 | ] 152 | # fmt: on 153 | for rgx, mult in specs: 154 | m = re.fullmatch(rgx, when) 155 | if m is not None: 156 | num = m.group(1) 157 | seconds = int(num) * mult 158 | break 159 | if seconds is None: 160 | # try to parse as hh:mm at least 161 | m = re.fullmatch(r'(\d\d):(\d\d)', when) 162 | assert m is not None, when 163 | hh = m.group(1) 164 | mm = m.group(2) 165 | mschedule = '\n'.join( 166 | [ 167 | 'StartCalendarInterval', 168 | '', 169 | 'Hour', 170 | f'{int(hh)}', 171 | 'Minute', 172 | f'{int(mm)}', 173 | '', 174 | ] 175 | ) 176 | else: 177 | mschedule = '\n'.join(('StartInterval', f'{seconds}')) 178 | 179 | assert mschedule != '', unit_name 180 | 181 | # meh.. not sure how to reconcile it better with systemd 182 | on_failure = [x.replace('--job %n', f'--job {unit_name}') + ' --stdin' for x in on_failure] 183 | 184 | # attempt to set argv[0] properly 185 | # hmm I was hoping it would make desktop notifications ('background service added' nicer) 186 | # but even after that it still only shows executable script name. ugh 187 | # program_argv = (unit_name, *cmd[1:]) 188 | program_argv = ( 189 | *launchd_wrapper(job=unit_name, on_failure=on_failure), 190 | *cmd, 191 | ) 192 | del cmd 193 | program_argvs = '\n'.join(f'{c}' for c in program_argv) 194 | 195 | # TODO add log file, although mailer is already capturing stdout 196 | # TODO hmm maybe use the same log file for all dron jobs? would make it easier to rotate? 197 | res = f''' 198 | 199 | 200 | 201 | 202 | 203 | Label 204 | {DRON_PREFIX}{unit_name} 205 | ProgramArguments 206 | 207 | {textwrap.indent(program_argvs, " " * 8)} 208 | 209 | 210 | RunAtLoad 211 | 212 | 213 | {textwrap.indent(mschedule, " " * 8)} 214 | 215 | Comment 216 | {MANAGED_MARKER} 217 | 218 | 219 | '''.lstrip() 220 | return res 221 | 222 | 223 | from .common import LaunchdUnitState 224 | 225 | 226 | def launchd_state(*, with_body: bool) -> Iterator[LaunchdUnitState]: 227 | # sadly doesn't look like it has json interface?? 228 | dump = check_output(['launchctl', 'dumpstate']).decode('utf8') 229 | 230 | name: str | None = None 231 | extras: dict[str, Any] = {} 232 | arguments: list[str] | None = None 233 | all_props: str | None = None 234 | fields = [ 235 | 'path', 236 | 'last exit code', 237 | 'pid', 238 | 'run interval', 239 | ] 240 | for line in dump.splitlines(): 241 | if name is None: 242 | # start of job description group 243 | name = line.removesuffix(' = {') 244 | all_props = '' 245 | continue 246 | elif line == '}': 247 | # end of job description group 248 | path: str | None = extras.get('path') 249 | if path is not None and 'dron' in path: 250 | # otherwsie likely some sort of system unit 251 | unit_file = Path(path) 252 | body = unit_file.read_text() if with_body else None 253 | 254 | # TODO extract 'state'?? 255 | 256 | periodic_schedule = extras.get('run interval') 257 | calendal_schedule = 'com.apple.launchd.calendarinterval' in unwrap(all_props) 258 | 259 | schedule: str | None 260 | if periodic_schedule is not None: 261 | schedule = 'every ' + periodic_schedule 262 | elif calendal_schedule: 263 | # TODO parse properly 264 | schedule = 'calendar' 265 | else: 266 | # NOTE: seems like keepalive attribute isn't present in launchd dumpstate output 267 | schedule = 'always' 268 | 269 | yield LaunchdUnitState( 270 | unit_file=Path(path), 271 | body=body, 272 | cmdline=tuple(extras['arguments']), 273 | # might not be present when we killed process manually? 274 | last_exit_code=extras.get('last exit code'), 275 | # pid might not be present (presumably when it's not running) 276 | pid=extras.get('pid'), 277 | schedule=schedule, 278 | ) 279 | name = None 280 | all_props = None 281 | extras = {} 282 | continue 283 | 284 | all_props = unwrap(all_props) + line + '\n' 285 | 286 | if arguments is not None: 287 | if line == '\t}': 288 | extras['arguments'] = arguments 289 | arguments = None 290 | else: 291 | arg = line.removeprefix('\t\t') 292 | arguments.append(arg) 293 | else: 294 | xx = line.removeprefix('\t') 295 | for f in fields: 296 | zz = f'{f} = ' 297 | if xx.startswith(zz): 298 | extras[f] = xx.removeprefix(zz) 299 | break 300 | # special handling.. 301 | if xx.startswith('arguments = '): 302 | arguments = [] 303 | 304 | 305 | def verify_unit(*, unit_name: str, body: str) -> None: 306 | with TemporaryDirectory() as tdir: 307 | tfile = Path(tdir) / unit_name 308 | tfile.write_text(body) 309 | check_call( 310 | [ 311 | 'plutil', 312 | '-lint', 313 | '-s', # silent on success 314 | tfile, 315 | ] 316 | ) 317 | 318 | 319 | def cmd_past(unit: Unit) -> None: 320 | sub = fqn('dron.' + unit) 321 | # fmt: off 322 | cmd = [ 323 | # todo maybe use 'stream'?? 324 | 'log', 'show', '--info', 325 | # '--last', '24h', 326 | # hmm vvv that doesn't work, if we pass pid, predicate is ignored? 327 | # '--process', '1', 328 | # hmm, oddly enough "&&" massively slows the predicate?? 329 | #'--predicate', f'processIdentifier=1 && (subsystem contains "gui/501/dron.{unit}")', 330 | '--predicate', f'subsystem contains "{sub}"', 331 | '--style', 'ndjson', 332 | '--color', 'always', 333 | ] 334 | # fmt: on 335 | with Popen(cmd, stdout=PIPE, encoding='utf8') as p: 336 | out = p.stdout 337 | assert out is not None 338 | for line in out: 339 | j = json.loads(line) 340 | if j.get('finished') == 1: 341 | # last event at the very end 342 | continue 343 | subsystem = j['subsystem'] 344 | # sometimes subsystem contains pid at the end, need to chop it off 345 | # also that's wjy we can't use "subsystem = " predicate :( 346 | subsystem = subsystem.split(' ')[0] 347 | if sub != subsystem: 348 | continue 349 | msg = j['eventMessage'] 350 | 351 | interesting = re.search(' spawned .* because', msg) or 'exited ' in msg 352 | if not interesting: 353 | continue 354 | ts = j['timestamp'] 355 | print(ts, sub, msg) 356 | 357 | 358 | def cmd_run(*, unit: Unit, do_exec: bool) -> None: 359 | if not do_exec: 360 | launchctl_kickstart(unit=unit) 361 | return 362 | 363 | states = [] 364 | for s in launchd_state(with_body=False): 365 | if s.unit_file.stem == unit: 366 | states.append(s) 367 | [state] = states 368 | cmdline = state.cmdline 369 | assert cmdline is not None, unit 370 | 371 | ## cut off launchd wrapper 372 | sep_i = cmdline.index('--') 373 | cmdline = cmdline[sep_i + 1 :] 374 | ## 375 | 376 | cmds = ' '.join(map(shlex.quote, cmdline)) 377 | logger.info(f'running: {cmds}') 378 | os.execvp( 379 | cmdline[0], 380 | list(cmdline), 381 | ) 382 | 383 | 384 | def get_entries_for_monitor(managed: State, *, params: MonitorParams) -> list[MonitorEntry]: 385 | # for now kinda copy pasted from systemd 386 | 387 | entries: list[MonitorEntry] = [] 388 | for s in managed: 389 | assert isinstance(s, LaunchdUnitState), s 390 | 391 | unit_file = s.unit_file 392 | name = unit_file.name.removesuffix('.plist') 393 | 394 | is_seconds = re.fullmatch(r'every (\d+) seconds', s.schedule or '') 395 | if is_seconds is not None: 396 | delta = timedelta(seconds=int(is_seconds.group(1))) 397 | # meh, but works for now 398 | ss = f'every {delta}' 399 | else: 400 | ss = str(s.schedule) 401 | 402 | schedule = ss 403 | command = None 404 | if params.with_command: 405 | cmdline = s.cmdline 406 | assert cmdline is not None, name # not None for launchd units 407 | command = ' '.join(map(shlex.quote, cmdline)) 408 | command = remove_launchd_wrapper(command) 409 | 410 | status_ok = s.last_exit_code == '0' 411 | status = 'success' if status_ok else f'exitcode {s.last_exit_code}' 412 | 413 | pid = s.pid 414 | 415 | entries.append( 416 | MonitorEntry( 417 | unit=name, 418 | status=status, 419 | left='n/a', 420 | next='n/a', 421 | schedule=schedule, 422 | command=command, 423 | pid=pid, 424 | status_ok=status_ok, 425 | ) 426 | ) 427 | return entries 428 | -------------------------------------------------------------------------------- /src/dron/systemd.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | import os 5 | import re 6 | import shlex 7 | import shutil 8 | from collections.abc import Iterator, Sequence 9 | from datetime import UTC, datetime, timedelta 10 | from functools import lru_cache 11 | from itertools import groupby 12 | from pathlib import Path 13 | from subprocess import PIPE, Popen, run 14 | from tempfile import TemporaryDirectory 15 | from typing import Any 16 | from zoneinfo import ZoneInfo 17 | 18 | from .api import ( 19 | OnFailureAction, 20 | When, 21 | ) 22 | from .common import ( 23 | MANAGED_MARKER, 24 | Body, 25 | Command, 26 | MonitorEntry, 27 | MonitorParams, 28 | State, 29 | SystemdUnitState, 30 | TimerSpec, 31 | Unit, 32 | datetime_aware, 33 | escape, 34 | is_managed, 35 | logger, 36 | ) 37 | 38 | 39 | def _is_missing_systemd() -> str | None: 40 | has_systemd = shutil.which('systemctl') is not None 41 | if not has_systemd: 42 | return "systemd not available, running under docker or osx" 43 | return None 44 | 45 | 46 | def _systemctl(*args: Path | str) -> list[Path | str]: 47 | return ['systemctl', '--user', *args] 48 | 49 | 50 | def managed_header() -> str: 51 | return f''' 52 | # {MANAGED_MARKER} 53 | # If you do any manual changes, they will be overridden on the next dron run 54 | '''.lstrip() 55 | 56 | 57 | # TODO how to come up with good implicit job name? 58 | # TODO do we need a special target for dron? 59 | def timer(*, unit_name: str, when: When) -> str: 60 | spec: TimerSpec 61 | if isinstance(when, str): 62 | spec = {'OnCalendar': when} 63 | else: 64 | spec = when 65 | 66 | specs = '\n'.join(f'{k}={v}' for k, v in spec.items()) 67 | 68 | return f''' 69 | {managed_header()} 70 | [Unit] 71 | Description=Timer for {unit_name} {MANAGED_MARKER} 72 | 73 | [Timer] 74 | {specs} 75 | 76 | [Install] 77 | WantedBy=timers.target 78 | '''.lstrip() 79 | 80 | 81 | # TODO add Restart=always and RestartSec? 82 | # TODO allow to pass extra args 83 | def service( 84 | *, 85 | unit_name: str, 86 | command: Command, 87 | on_failure: Sequence[OnFailureAction], 88 | **kwargs: str, 89 | ) -> str: 90 | # TODO not sure if something else needs to be escaped for ExecStart?? 91 | # todo systemd-escape? but only can be used for names 92 | 93 | # ok OnFailure is quite annoying since it can't take arguments etc... seems much easier to use ExecStopPost 94 | # (+ can possibly run on success too that way?) 95 | # https://unix.stackexchange.com/a/441662/180307 96 | cmd = escape(command) 97 | 98 | exec_stop_posts = [ 99 | f"ExecStopPost=/bin/sh -c 'if [ $$EXIT_STATUS != 0 ]; then {action}; fi'" for action in on_failure 100 | ] 101 | 102 | sections: dict[str, list[str]] = {} 103 | sections['[Unit]'] = [f'Description=Service for {unit_name} {MANAGED_MARKER}'] 104 | 105 | sections['[Service]'] = [ 106 | f'ExecStart={cmd}', 107 | *exec_stop_posts, 108 | ] 109 | 110 | for k, value in kwargs.items(): 111 | # ideally it would have section name 112 | m = re.search(r'(\[\w+\])(.*)', k) 113 | if m is not None: 114 | section = m.group(1) 115 | key = m.group(2) 116 | else: 117 | # 'legacy' behaviour, by default put into [Service] 118 | section = '[Service]' 119 | key = k 120 | if section not in sections: 121 | sections[section] = [] 122 | sections[section].append(f'{key}={value}') 123 | 124 | res = managed_header() 125 | for section_name, lines in sections.items(): 126 | res += '\n\n' + '\n'.join([section_name, *lines]) 127 | res += '\n' 128 | 129 | return res 130 | 131 | 132 | def test_managed() -> None: 133 | skip_if_no_systemd() 134 | from .dron import verify_unit 135 | 136 | assert is_managed(timer(unit_name='whatever', when='daily')) 137 | 138 | custom = ''' 139 | [Service] 140 | ExecStart=/bin/echo 123 141 | ''' 142 | verify_unit(unit_name='other.service', body=custom) # precondition 143 | assert not is_managed(custom) 144 | 145 | 146 | def verify_units(pre_units: list[tuple[Unit, Body]]) -> None: 147 | # ugh. systemd-analyze takes about 0.2 seconds for each unit for some reason 148 | # oddly enough, in bulk it works just as fast :thinking_face: 149 | # also doesn't work in parallel (i.e. parallel processes) 150 | # that ends up with some weird errors trying to connect to socket 151 | with TemporaryDirectory() as _tdir: 152 | tdir = Path(_tdir) 153 | for unit, body in pre_units: 154 | (tdir / unit).write_text(body) 155 | res = run(['systemd-analyze', '--user', 'verify', *tdir.glob('*')], capture_output=True, check=False) 156 | # ugh. apparently even exit code 0 doesn't guarantee correct output?? 157 | out = res.stdout.decode('utf8') 158 | err = res.stderr.decode('utf8') 159 | assert out == '', out 160 | if err == '': 161 | return 162 | 163 | err_lines = err.splitlines(keepends=True) 164 | unique_err_lines = [] 165 | # uhh.. in bulk mode it spams with tons of 'Cannot add dependency job' for some reason 166 | # I guess it kinda treats everything as dependent on each other?? 167 | # https://github.com/systemd/systemd/blob/b692ad36b99909453cf4f975a346e41d6afc68a0/src/core/transaction.c#L978 168 | for l in err_lines: 169 | if l not in unique_err_lines: 170 | unique_err_lines.append(l) 171 | err_lines = unique_err_lines 172 | 173 | if len(err_lines) == 0: 174 | return 175 | 176 | msg = f'failed checking , exit code {res.returncode}' 177 | logger.error(msg) 178 | logger.error('systemd-analyze output:') 179 | for line in err_lines: 180 | logger.error(line.strip()) 181 | raise RuntimeError(msg) 182 | 183 | 184 | def test_verify_systemd() -> None: 185 | skip_if_no_systemd() 186 | from .dron import verify_unit 187 | 188 | def FAILS(body: str) -> None: 189 | import pytest 190 | 191 | with pytest.raises(Exception): 192 | verify_unit(unit_name='whatever.service', body=body) 193 | 194 | def OK(body: str) -> None: 195 | verify_unit(unit_name='ok.service', body=body) 196 | 197 | OK( 198 | body=''' 199 | [Service] 200 | ExecStart=/bin/echo 123 201 | ''' 202 | ) 203 | 204 | from .api import notify 205 | 206 | on_failure = ( 207 | notify.email('test@gmail.com'), 208 | notify.desktop_notification, 209 | ) 210 | OK(body=service(unit_name='alala', command='/bin/echo 123', on_failure=on_failure)) 211 | 212 | # garbage 213 | FAILS(body='fewfewf') 214 | 215 | # no execstart 216 | FAILS( 217 | body=''' 218 | [Service] 219 | StandardOutput=journal 220 | ''' 221 | ) 222 | 223 | FAILS( 224 | body=''' 225 | [Service] 226 | ExecStart=yes 227 | StandardOutput=baaad 228 | ''' 229 | ) 230 | 231 | 232 | def _sd(s: str) -> str: 233 | return f'org.freedesktop.systemd1{s}' 234 | 235 | 236 | class BusManager: 237 | def __init__(self) -> None: 238 | # unused-ignore because on macos there is no dbus (but this code is still running mypy on CI) 239 | from dbus import ( # type: ignore[import-untyped,import-not-found,unused-ignore] 240 | Interface, 241 | SessionBus, 242 | ) 243 | 244 | self.Interface = Interface # meh 245 | 246 | self.bus = SessionBus() # note: SystemBus is for system-wide services 247 | systemd = self.bus.get_object(_sd(''), '/org/freedesktop/systemd1') 248 | self.manager = Interface(systemd, dbus_interface=_sd('.Manager')) 249 | 250 | def properties(self, u: Unit): 251 | service_unit = self.manager.GetUnit(u) 252 | service_proxy = self.bus.get_object(_sd(''), str(service_unit)) 253 | return self.Interface(service_proxy, dbus_interface='org.freedesktop.DBus.Properties') 254 | 255 | @staticmethod # meh 256 | def prop(obj, schema: str, name: str): 257 | return obj.Get(_sd(schema), name) 258 | 259 | @classmethod 260 | def exec_start(cls, props) -> Sequence[str]: 261 | dbus_exec_start = cls.prop(props, '.Service', 'ExecStart') 262 | return [str(x) for x in dbus_exec_start[0][1]] 263 | 264 | 265 | def systemd_state(*, with_body: bool) -> State: 266 | bus = BusManager() 267 | states = bus.manager.ListUnits() # ok nice, it's basically instant 268 | 269 | for state in states: 270 | name = state[0] 271 | descr = state[1] 272 | if not is_managed(descr): 273 | continue 274 | 275 | # todo annoying, this call still takes some time... but whatever ok 276 | props = bus.properties(name) 277 | 278 | # useful for debugging, can also use .Service if it's not a timer 279 | # all_properties = props.GetAll(_sd('.Unit')) 280 | # however GetAll seems slower than gettind individual properties 281 | 282 | # stale = int(bus.prop(props, '.Unit', 'NeedDaemonReload')) == 1 283 | # TODO do we actually need to resolve? 284 | unit_file = Path(str(bus.prop(props, '.Unit', 'FragmentPath'))).resolve() 285 | body = unit_file.read_text() if with_body else None 286 | cmdline: Sequence[str] | None 287 | if '.timer' in name: # meh 288 | cmdline = None 289 | else: 290 | cmdline = BusManager.exec_start(props) 291 | 292 | yield SystemdUnitState(unit_file=unit_file, body=body, cmdline=cmdline, dbus_properties=props) 293 | 294 | 295 | def test_managed_units() -> None: 296 | skip_if_no_systemd() 297 | # TODO wonder if i'd be able to use launchd on ci... 298 | from .cli import cmd_monitor 299 | from .dron import managed_units 300 | 301 | # shouldn't fail at least 302 | list(managed_units(with_body=True)) 303 | 304 | # TODO ugh. doesn't work on circleci, fails with 305 | # dbus.exceptions.DBusException: org.freedesktop.DBus.Error.BadAddress: Address does not contain a colon 306 | # todo maybe don't need it anymore with 20.04 circleci? 307 | if 'CI' not in os.environ: 308 | cmd_monitor.callback(n=1, once=True, command=True, rate=True) # type: ignore[misc] 309 | 310 | 311 | def skip_if_no_systemd() -> None: 312 | import pytest 313 | 314 | reason = _is_missing_systemd() 315 | if reason is not None: 316 | pytest.skip(f'No systemd: {reason}') 317 | 318 | 319 | _UTCMAX = datetime.max.replace(tzinfo=UTC) 320 | 321 | 322 | class MonitorHelper: 323 | def from_usec(self, usec) -> datetime_aware: 324 | u = int(usec) 325 | if u == 2**64 - 1: # apparently systemd uses max uint64 326 | # happens if the job is running ATM? 327 | return _UTCMAX 328 | else: 329 | return datetime.fromtimestamp(u / 10**6, tz=UTC) 330 | 331 | @property 332 | @lru_cache # noqa: B019 333 | def local_tz(self) -> ZoneInfo: 334 | try: 335 | # it's a required dependency, but still might fail in some weird environments? 336 | # e.g. if zoneinfo information isn't available 337 | from tzlocal import get_localzone 338 | 339 | return get_localzone() 340 | except Exception: 341 | logger.error("Couldn't determine local timezone! Falling back to UTC") 342 | return ZoneInfo('UTC') 343 | 344 | 345 | # TODO maybe format seconds prettier. dunno 346 | def _fmt_delta(d: timedelta) -> str: 347 | # format to reduce constant countdown... 348 | ad = abs(d) 349 | # get rid of microseconds 350 | ad = ad - timedelta(microseconds=ad.microseconds) 351 | 352 | day = timedelta(days=1) 353 | hour = timedelta(hours=1) 354 | minute = timedelta(minutes=1) 355 | gt = False 356 | if ad > day: 357 | full_days = ad // day 358 | hours = (ad % day) // hour 359 | ads = f'{full_days}d {hours}h' 360 | gt = True 361 | elif ad > minute: 362 | full_mins = ad // minute 363 | ad = timedelta(minutes=full_mins) 364 | ads = str(ad) 365 | gt = True 366 | else: 367 | # show exact 368 | ads = str(ad) 369 | if len(ads) == 7: 370 | ads = '0' + ads # meh. fix missing leading zero in hours.. 371 | ads = ('>' if gt else '') + ads 372 | return ads 373 | 374 | 375 | def get_entries_for_monitor(managed: State, *, params: MonitorParams) -> list[MonitorEntry]: 376 | mon = MonitorHelper() 377 | 378 | UTCNOW = datetime.now(tz=UTC) 379 | 380 | bus = BusManager() 381 | 382 | entries: list[MonitorEntry] = [] 383 | 384 | # sort so that neigbouring unit.service/unit.timer go one after another for grouping 385 | sort_key = lambda unit: unit.unit_file.name 386 | # for grouping, group by common stem of timers and services 387 | stem_name = lambda unit: sort_key(unit).split('.')[0] 388 | for k, _gr in groupby(sorted(managed, key=sort_key), key=stem_name): 389 | gr: list[SystemdUnitState] = [] 390 | for x in _gr: 391 | assert isinstance(x, SystemdUnitState), x # guaranteed by managed_units function 392 | gr.append(x) 393 | 394 | # if timer is None, guess that means the job is always running? 395 | timer: SystemdUnitState | None 396 | service: SystemdUnitState 397 | if len(gr) == 2: 398 | [service, timer] = gr 399 | else: 400 | assert len(gr) == 1, gr 401 | [service] = gr 402 | timer = None 403 | 404 | service_props = service.dbus_properties 405 | 406 | if timer is not None: 407 | props = timer.dbus_properties 408 | # FIXME this might be io bound? maybe make async or use thread pool? 409 | cal = bus.prop(props, '.Timer', 'TimersCalendar') 410 | next_ = bus.prop(props, '.Timer', 'NextElapseUSecRealtime') 411 | 412 | # note: there is also bus.prop(props, '.Timer', 'LastTriggerUSec'), but makes more sense to use unit to account for manual runs 413 | last = bus.prop(service_props, '.Unit', 'ActiveExitTimestamp') 414 | 415 | schedule = cal[0][1] # TODO is there a more reliable way to retrieve it?? 416 | # todo not sure if last is really that useful.. 417 | 418 | last_dt = mon.from_usec(last) 419 | next_dt = mon.from_usec(next_) 420 | nexts = next_dt.astimezone(mon.local_tz).replace(tzinfo=None, microsecond=0).isoformat() 421 | 422 | if next_dt == datetime.max: 423 | left_delta = timedelta(0) 424 | else: 425 | left_delta = next_dt - UTCNOW 426 | else: 427 | left_delta = timedelta(0) # TODO 428 | last_dt = UTCNOW 429 | nexts = 'n/a' 430 | schedule = 'always' 431 | 432 | left = f'{_fmt_delta(left_delta)!s:<9}' 433 | if last_dt.timestamp() == 0: 434 | ago = 'never' # TODO yellow? 435 | else: 436 | passed_delta = UTCNOW - last_dt 437 | ago = str(_fmt_delta(passed_delta)) 438 | # TODO instead of hacking microsecond, use 'NOW' or something? 439 | 440 | # TODO some summary too? e.g. how often in failed 441 | if params.with_command: 442 | exec_start = service.cmdline 443 | assert exec_start is not None, service # not None for services 444 | command = shlex.join(exec_start) 445 | else: 446 | command = None 447 | _pid: int | None = int(bus.prop(service_props, '.Service', 'MainPID')) 448 | pid = None if _pid == 0 else str(_pid) 449 | 450 | if params.with_success_rate: 451 | rate = _unit_success_rate(service.unit_file.name) 452 | rates = f' {rate:.2f}' 453 | else: 454 | rates = '' 455 | 456 | service_result = bus.prop(service_props, '.Service', 'Result') 457 | status_ok = service_result == 'success' 458 | status = f'{service_result:<9} {ago:<8}{rates}' 459 | 460 | entries.append( 461 | MonitorEntry( 462 | unit=k, 463 | status=status, 464 | left=left, 465 | next=nexts, 466 | schedule=schedule, 467 | command=command, 468 | pid=pid, 469 | status_ok=status_ok, 470 | ) 471 | ) 472 | return entries 473 | 474 | 475 | Json = dict[str, Any] 476 | 477 | 478 | def _unit_logs(unit: Unit) -> Iterator[Json]: 479 | # TODO so do I need to parse logs to get failure stats? perhaps json would be more reliable 480 | cmd = f'journalctl --user -u {unit} -o json -t systemd --output-fields UNIT_RESULT,JOB_TYPE,MESSAGE' 481 | with Popen(cmd.split(), stdout=PIPE) as po: 482 | stdout = po.stdout 483 | assert stdout is not None 484 | for line in stdout: 485 | j = json.loads(line.decode('utf8')) 486 | # apparently, successful runs aren't getting logged? not sure why 487 | # jt = j.get('JOB_TYPE') 488 | # ur = j.get('UNIT_RESULT') 489 | # not sure about this.. 490 | yield j 491 | 492 | 493 | def _unit_success_rate(unit: Unit) -> float: 494 | started = 0 495 | failed = 0 496 | # TODO not sure how much time it takes to query all journals? 497 | for j in _unit_logs(unit): 498 | jt = j.get('JOB_TYPE') 499 | ur = j.get('UNIT_RESULT') 500 | if jt is not None: 501 | assert ur is None 502 | started += 1 503 | elif ur is not None: 504 | assert jt is None 505 | failed += 1 506 | else: 507 | # TODO eh? sometimes jobs also report Succeeded status 508 | # e.g. syncthing-paranoid 509 | pass 510 | if started == 0: 511 | assert failed == 0, unit 512 | return 1.0 513 | success = started - failed 514 | return success / started 515 | 516 | 517 | def cmd_past(unit: Unit) -> None: 518 | mon = MonitorHelper() 519 | for j in _unit_logs(unit): 520 | ts = mon.from_usec(j['__REALTIME_TIMESTAMP']) 521 | msg = j['MESSAGE'] 522 | print(ts.isoformat(), msg) 523 | 524 | 525 | def cmd_run(*, unit: Unit, do_exec: bool) -> None: 526 | assert do_exec # support without exec later 527 | # TODO we might have called it before via managed_units.. maybe need to cache 528 | states = [] 529 | for s in systemd_state(with_body=False): 530 | # meh 531 | unit_name = s.unit_file.name 532 | if unit_name.endswith('.timer'): 533 | continue 534 | if s.unit_file.stem == unit: 535 | states.append(s) 536 | [state] = states 537 | cmdline = state.cmdline 538 | assert cmdline is not None 539 | cmds = ' '.join(map(shlex.quote, cmdline)) 540 | logger.info(f'running: {cmds}') 541 | os.execvp( 542 | cmdline[0], 543 | list(cmdline), 544 | ) 545 | 546 | 547 | # used to use this, keeping for now just for the refernce 548 | # def old_systemd_emailer() -> None: 549 | # user = getpass.getuser() 550 | # X = textwrap.dedent(f''' 551 | # [Unit] 552 | # Description=status email for %i to {user} 553 | # 554 | # [Service] 555 | # Type=oneshot 556 | # ExecStart={SYSTEMD_EMAIL} --to {user} --unit %i --journalctl-args "-o cat" 557 | # # TODO why these were suggested?? 558 | # # User=nobody 559 | # # Group=systemd-journal 560 | # ''') 561 | # 562 | # write_unit(unit=f'status-email@.service', body=X, prefix=SYSTEMD_USER_DIR) 563 | # # I guess makes sense to reload here; fairly atomic step 564 | # _daemon_reload() 565 | --------------------------------------------------------------------------------