├── .flake8 ├── .github └── workflows │ └── pythonpackage.yml ├── .gitignore ├── .gitpod.yml ├── LICENSE ├── MANIFEST.in ├── Makefile ├── docs ├── cli.md ├── dashboard1.png ├── dashboard2.png ├── dashboard3.png ├── full-logo.png ├── getting-started.md ├── history1.png ├── index.md └── logo.png ├── jobs ├── badpyjob.py ├── printer.py └── pyjob.py ├── mkdocs.yml ├── readme.md ├── setup.cfg ├── setup.py ├── skedulord ├── __init__.py ├── __main__.py ├── common.py ├── cron.py ├── dashboard.py ├── job.py └── templates │ ├── index.html │ ├── job.html │ ├── layout.html │ └── macros.html └── tests ├── schedule.yml ├── test_cron_parsing.py ├── test_happy_path.py └── test_unhappy_path.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-complexity=10 3 | max-line-length=120 4 | exclude = */__init__.py, */*/__init__.py -------------------------------------------------------------------------------- /.github/workflows/pythonpackage.yml: -------------------------------------------------------------------------------- 1 | name: Unit Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | 12 | jobs: 13 | build: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | max-parallel: 4 17 | matrix: 18 | python-version: [3.7, 3.8, 3.9] 19 | os: [ubuntu-latest] 20 | steps: 21 | - uses: actions/checkout@v1 22 | - name: setup ${{ matrix.python-version }} 23 | uses: actions/setup-python@v1 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: install 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install -e ".[dev]" 30 | - name: pytest 31 | run: pytest --disable-warnings 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .nox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | .pytest_cache/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | db.sqlite3 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # IPython 77 | profile_default/ 78 | ipython_config.py 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # SageMath parsed files 87 | *.sage.py 88 | 89 | # Environments 90 | .env 91 | .venv 92 | env/ 93 | venv/ 94 | ENV/ 95 | env.bak/ 96 | venv.bak/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | .dmypy.json 111 | dmypy.json 112 | 113 | # IDE 114 | .idea 115 | .vscode 116 | 117 | # GATSBY 118 | 119 | skedulord/web/templates/* 120 | skedulord/dashboard/build/* 121 | 122 | package-lock.json 123 | node_modules/* 124 | cypress/videos/* 125 | cypress/screenshots/* 126 | cov_html/* -------------------------------------------------------------------------------- /.gitpod.yml: -------------------------------------------------------------------------------- 1 | tasks: 2 | - init: pyenv local 3.7.2 && pip install -e ".[dev]" 3 | command: python setup.py develop -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 vincent d warmerdam 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include readme.md 2 | include LICENSE 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | flake: 2 | flake8 skedulord 3 | flake8 tests 4 | flake8 setup.py 5 | 6 | install: 7 | pip install -e ".[dev]" 8 | 9 | develop: install 10 | python setup.py develop 11 | 12 | test: 13 | pytest tests 14 | 15 | check: flake test clean 16 | 17 | clean: 18 | rm -rf .pytest_cache 19 | rm -rf build 20 | rm -rf dist 21 | rm -rf scikit_lego.egg-info 22 | rm -rf .ipynb_checkpoints 23 | rm -rf notebooks/.ipynb_checkpoints 24 | rm -rf skedulord.egg-info 25 | 26 | reset: 27 | python -m skedulord wipe disk --really --yes 28 | python -m skedulord wipe schedule --really --yes 29 | python -m skedulord run pyjob "python jobs/pyjob.py" --retry 1 --wait 0 30 | python -m skedulord run pyjob "python jobs/pyjob.py" --retry 1 --wait 0 31 | python -m skedulord run badpyjob "python jobs/badpyjob.py" --retry 3 --wait 0 32 | python -m skedulord run another-pyjob "python jobs/pyjob.py" --retry 1 --wait 0 33 | 34 | reset-big: 35 | python -m skedulord wipe disk --really --yes 36 | python -m skedulord wipe schedule --really --yes 37 | python -m skedulord run pyjob "python jobs/pyjob.py" --retry 1 --wait 0 38 | python -m skedulord run pyjob "python jobs/pyjob.py" --retry 1 --wait 0 39 | python -m skedulord run badpyjob "python jobs/badpyjob.py" --retry 3 --wait 0 40 | python -m skedulord run another-pyjob "python jobs/pyjob.py" --retry 1 --wait 0 41 | python -m skedulord run pyjob "python jobs/pyjob.py" --retry 1 --wait 0 42 | python -m skedulord run pyjob "python jobs/pyjob.py" --retry 1 --wait 0 43 | python -m skedulord run badpyjob "python jobs/badpyjob.py" --retry 3 --wait 1 44 | python -m skedulord run another-pyjob "python jobs/pyjob.py" --retry 1 --wait 0 45 | 46 | pypi: 47 | rm -rf dist 48 | python setup.py sdist 49 | python setup.py bdist_wheel --universal 50 | twine upload dist/* 51 | -------------------------------------------------------------------------------- /docs/cli.md: -------------------------------------------------------------------------------- 1 | ## `run` 2 | 3 | Run a single command, which is logged by skedulord. 4 | 5 | ```text 6 | Arguments: 7 | NAME The name you want to assign to the run. [required] 8 | COMMAND The command you want to run (in parentheses). [required] 9 | 10 | Options: 11 | --retry INTEGER The number of re-tries, should a job fail. [default: 1] 12 | --wait INTEGER The number of seconds between tries. [default: 60] 13 | --help Show this message and exit. 14 | ``` 15 | 16 | ## `schedule` 17 | 18 | Set (or reset) cron jobs based on config. 19 | 20 | ```text 21 | Arguments: 22 | CONFIG The config file containing the schedule. [required] 23 | 24 | Options: 25 | --help Show this message and exit. 26 | ``` 27 | 28 | ## `history` 29 | 30 | Shows a table with job status. 31 | 32 | ```text 33 | Options: 34 | --n INTEGER How many rows should the table show. 35 | [default: 10] 36 | 37 | --only-failures / --no-only-failures 38 | Only show failures. [default: False] 39 | --date TEXT Only show specific date. 40 | --jobname TEXT Only show jobs with specific name. 41 | --help Show this message and exit. 42 | ``` 43 | 44 | ## `build` 45 | 46 | Builds static html files so you may view a dashboard. 47 | 48 | ```text 49 | Options: 50 | --help Show this message and exit. 51 | ``` 52 | 53 | ## `serve` 54 | 55 | Serves the skedulord dashboard. 56 | 57 | ```text 58 | Options: 59 | --build / --no-build Build the site beforehand? [default: True] 60 | --port INTEGER How many rows should the table show. [default: 8000] 61 | --help Show this message and exit. 62 | ``` 63 | ## `wipe` 64 | 65 | Wipe the disk or schedule state. 66 | 67 | ```text 68 | Arguments: 69 | WHAT What to wipe. Either `disk` or `schedule`. [required] 70 | 71 | Options: 72 | --yes / --no-yes Are you sure? [default: False] 73 | --really / --no-really Really sure? [default: False] 74 | --user TEXT The name of the user. Default: curent user. 75 | --help Show this message and exit. 76 | ``` 77 | 78 | ## `version` 79 | 80 | Show the version. 81 | 82 | ```text 83 | Options: 84 | --help Show this message and exit. 85 | ``` 86 | -------------------------------------------------------------------------------- /docs/dashboard1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koaning/skedulord/78dc0e630743a059573c34efdd52104586b2c4de/docs/dashboard1.png -------------------------------------------------------------------------------- /docs/dashboard2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koaning/skedulord/78dc0e630743a059573c34efdd52104586b2c4de/docs/dashboard2.png -------------------------------------------------------------------------------- /docs/dashboard3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koaning/skedulord/78dc0e630743a059573c34efdd52104586b2c4de/docs/dashboard3.png -------------------------------------------------------------------------------- /docs/full-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koaning/skedulord/78dc0e630743a059573c34efdd52104586b2c4de/docs/full-logo.png -------------------------------------------------------------------------------- /docs/getting-started.md: -------------------------------------------------------------------------------- 1 | 2 | Let's say we've got this small python file. 3 | 4 | ```python 5 | # script.py 6 | for i in range(5): 7 | print(f"i am at iteration {i}") 8 | ``` 9 | 10 | The idea here is that `skedulord` can run this and keep track of logs. 11 | 12 | ```text 13 | python -m skedulord run jobname1 "python script.py" --retry 3 --wait 60 14 | python -m skedulord run jobname2 "python script.py" --retry 3 --wait 60 15 | ``` 16 | 17 | This will run the `"python script.py"` command as if you'd normally run it 18 | from the command line. The `skedulord` parts around it do some extra things though. 19 | 20 | ## Logs 21 | 22 | The main thing `skedulord` does is structure logs from your scripts. The logs 23 | are generated in a structured format so it's easy to find bugs. When we ran 24 | the `skedulord run` command we attached a jobname (aptly named `"jobname"`) which 25 | will also be the name of the folder where logs can be found. 26 | 27 | ```text 28 | > tree ~/.skedulord/ 29 | /Users/vincent/.skedulord/ 30 | ├── heartbeat.jsonl 31 | ├── jobname1 32 | │ ├── 2021-02-14T16:56:34.html 33 | │ └── 2021-02-14T16:56:34.txt 34 | └── jobname2 35 | ├── 2021-02-14T16:56:35.html 36 | └── 2021-02-14T16:56:35.txt 37 | 38 | ``` 39 | 40 | The logs themselves have a timestamp as the filename. There are basic `.txt` logs 41 | but also fancy `.html` logs which render nicely in a dashboard. 42 | 43 | ## Mechanics 44 | 45 | We've designed `skedulord` to also be able to rerun jobs if they fail. Hiccups are 46 | part of real life and sometimes we can make a job succeed by trying again 5 minutes 47 | later. This is why you can specify how many attempts you'd like the job to make by 48 | setting `--retry`. You can also specify the wait time between attempts via `--wait`. 49 | 50 | ## Schedule 51 | 52 | The nice thing about `skedulord` running from the command line is that you can schedule 53 | it via `crontab` too! The downside is that it can get complex. You'd need to ensure that 54 | the jobs have access to the correct virtual environments and this can become a drag. 55 | 56 | To help out, `skedulord` can also configure cron for you by configuring a `.yml` file. 57 | 58 | ```yaml 59 | # schedule.yml 60 | user: vincent 61 | schedule: 62 | - name: ping 63 | command: python /home/vincent/path/scripts/ping.py 64 | cron: "*/2 * * * *" 65 | - name: github issues downloader 66 | command: python /full/path/to/cli.py --repo foobar --output /Users/vincent/data 67 | cron: "0 1 * * *" 68 | - name: github actions downloader 69 | command: python /full/path/to/scrape.py --repo foobar --output /Users/vincent/data 70 | cron: "0 1 * * *" 71 | ``` 72 | 73 | > Note that it's important to give full paths to files here. 74 | 75 | Given a schedule like this, skedulord can schedule everything via; 76 | 77 | ```text 78 | python -m skedulord schedule schedule.yml 79 | ``` 80 | 81 | You can confirm yourself that all jobs are now scheduled to run, with skedulord 82 | taking care of all the logging. 83 | 84 | ```text 85 | crontab -e 86 | ``` 87 | 88 | > Note that Unless configured otherwise, skedulord will assume the same virtualenv 89 | as the one that ran the `skedulord schedule` command. This is important to be aware 90 | of if your scripts carry dependencies. 91 | 92 | ## Inspection 93 | 94 | Hopefully, your jobs won't fail. But if they do, you'd want to find the logs for the 95 | broken jobs as soon as possible. 96 | 97 | ```python 98 | python -m skedulord history 99 | ``` 100 | 101 |  102 | 103 | This history command has many query parameters that makes it easy for you to find the 104 | logs of the jobs that failed. 105 | 106 | ## Dashboard 107 | 108 | If you want, you can even use skedulord to run a small dashboard for you. It's nice and 109 | minimal as to not to distract you. 110 | 111 | ```python 112 | python -m skedulord serve 113 | ``` 114 | 115 | The landing page shows an overview of all jobs. 116 | 117 |  118 | 119 | You can click on the associated link to find all runs. 120 | 121 |  122 | 123 | From here you can explore the logs. We host both the raw .txt logs 124 | and a "fancy" variant that attemps some syntax highlighting. 125 | 126 |  127 | 128 | If you'd like to play around, we host a small demo of this dashboard [here](https://koaning.github.io/skedulord-demo/). 129 | 130 | ## Shutting Down 131 | 132 | If you're done with the app you can wipe the cronjobs and disk state from the command 133 | line as well. 134 | 135 | ```python 136 | # Wipe all the logs from disk. 137 | python -m skedulord wipe disk --yes --really 138 | # Wipe all the cron entries. 139 | python -m skedulord wipe cron --yes --really 140 | ``` -------------------------------------------------------------------------------- /docs/history1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koaning/skedulord/78dc0e630743a059573c34efdd52104586b2c4de/docs/history1.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 |  2 | 3 | > Skedulord is a tool that automates scheduling and logging of jobs. It's a 4 | layer on top of cron. It's mainly meant for python users but it can also be 5 | used for other tools launched from the command line. 6 | 7 | If you're new, check out the [getting started guide](https://koaning.github.io/skedulord/getting-started.html). 8 | 9 | ## Commands 10 | 11 | ```text 12 | Usage: __main__.py [OPTIONS] COMMAND [ARGS]... 13 | 14 | SKEDULORD: helps with cronjobs and logs. 15 | 16 | Options: 17 | --help Show this message and exit. 18 | 19 | Commands: 20 | build Builds static html files so you may view a dashboard. 21 | history Shows a table with job status. 22 | run Run a single command, which is logged by skedulord. 23 | schedule Set (or reset) cron jobs based on config. 24 | serve Opens the dashboard in a browser. 25 | version Show the version. 26 | wipe Wipe the disk or schedule state. 27 | ``` 28 | 29 | -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koaning/skedulord/78dc0e630743a059573c34efdd52104586b2c4de/docs/logo.png -------------------------------------------------------------------------------- /jobs/badpyjob.py: -------------------------------------------------------------------------------- 1 | def func1(a, b): 2 | return a + b 3 | 4 | 5 | def func2(c): 6 | print(c) 7 | 8 | 9 | if __name__ == "__main__": 10 | print("[INFO] starting!") 11 | func2(func1(1, '1')) 12 | -------------------------------------------------------------------------------- /jobs/printer.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | print(''.join(sys.argv)) 4 | -------------------------------------------------------------------------------- /jobs/pyjob.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | if __name__ == "__main__": 5 | for i in range(7): 6 | time.sleep(0.23) 7 | print({"iteration": i}) 8 | print({c: i for c in "abcd"}) 9 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: skedulord 2 | repo_url: https://github.com/koaning/skedulord 3 | site_url: https://koaning.github.io/skedulord/ 4 | site_description: A nice way to schedule and log. 5 | site_author: Vincent D. Warmerdam 6 | use_directory_urls: false 7 | nav: 8 | - Index: index.md 9 | - Getting Started: getting-started.md 10 | - CLI API: cli.md 11 | plugins: 12 | - search 13 | copyright: Copyright © 2020 Maintained by Vincent. 14 | theme: 15 | name: material 16 | logo: logo.png 17 | font: 18 | text: Ubuntu 19 | code: Ubuntu Mono 20 | feature: 21 | tabs: true 22 | palette: 23 | primary: white 24 | accent: teal 25 | features: 26 | - navigation.tabs 27 | markdown_extensions: 28 | - admonition 29 | - codehilite 30 | - pymdownx.inlinehilite 31 | - pymdownx.details 32 | - pymdownx.tabbed 33 | - pymdownx.superfences 34 | - pymdownx.highlight: 35 | use_pygments: true 36 | guess_lang: true 37 | - toc: 38 | permalink: true -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 |  2 | 3 | > Skedulord is a tool that automates scheduling and logging of jobs. It's a 4 | layer on top of cron. It's mainly meant for Python users but it can also be 5 | used for other tools launched from the command line. 6 | 7 | If you're new, check out the [getting started guide](https://koaning.github.io/skedulord/getting-started.html). 8 | 9 | ## Installation 10 | 11 | ```python 12 | pip install skedulord 13 | ``` 14 | 15 | If you like to live dangerously, you can also install from GitHub for the latest commit. 16 | 17 | ``` 18 | python -m pip install --upgrade "skedulord @ git+https://github.com/koaning/skedulord.git" 19 | ``` 20 | 21 | ## Usage 22 | 23 | These are all the commands available: 24 | 25 | ``` 26 | > python -m skedulord 27 | Usage: __main__.py [OPTIONS] COMMAND [ARGS]... 28 | 29 | SKEDULORD: helps with cronjobs and logs. 30 | 31 | Options: 32 | --help Show this message and exit. 33 | 34 | Commands: 35 | schedule Set (or reset) cron jobs based on config. 36 | run Run a single command, which is logged by skedulord. 37 | history Shows a table with job status. 38 | summary Shows a summary of all jobs. 39 | build Builds static html files so you may view a dashboard. 40 | serve Opens the dashboard in a browser. 41 | wipe Wipe the disk or schedule state. 42 | version Show the version. 43 | ``` 44 | 45 | ## Docmentation 46 | 47 | The docs are hosted on GitHub Pages and can be found [here](koaning.github.io/skedulord/). 48 | 49 | ## Demo 50 | 51 | Let's say we've got this small python file. 52 | 53 | ```python 54 | # script.py 55 | for i in range(5): 56 | print(f"i am at iteration {i}") 57 | ``` 58 | 59 | The idea here is that `skedulord` can run this and keep track of logs. 60 | 61 | ```text 62 | python -m skedulord run jobname1 "python script.py" --retry 3 --wait 60 63 | python -m skedulord run jobname2 "python script.py" --retry 3 --wait 60 64 | ``` 65 | 66 | This will run the `"python script.py"` command as if you'd normally run it 67 | from the command line. The `skedulord` parts around it do some extra things though. 68 | 69 | ## Logs 70 | 71 | The main thing `skedulord` does is structure logs from your scripts. The logs 72 | are generated in a structured format so it's easy to find bugs. When we ran 73 | the `skedulord run` command we attached a jobname (aptly named `"jobname"`) which 74 | will also be the name of the folder where logs can be found. 75 | 76 | ```text 77 | > tree ~/.skedulord/ 78 | /Users/vincent/.skedulord/ 79 | ├── heartbeat.jsonl 80 | ├── jobname1 81 | │ └── 2021-02-14T16:56:34.txt 82 | └── jobname2 83 | └── 2021-02-14T16:56:35.txt 84 | 85 | ``` 86 | 87 | The logs themselves have a timestamp as the filename. 88 | 89 | ## Mechanics 90 | 91 | We've designed `skedulord` to also be able to rerun jobs if they fail. Hiccups are 92 | part of real life and sometimes we can make a job succeed by trying again 5 minutes 93 | later. This is why you can specify how many attempts you'd like the job to make by 94 | setting `--retry`. You can also specify the wait time between attempts via `--wait`. 95 | 96 | ### Schedule 97 | 98 | The nice thing about `skedulord` running from the command line is that you can schedule 99 | it via `crontab` too! The downside is that it can get complex. You'd need to ensure that 100 | the jobs have access to the correct virtual environments and this can become a drag. 101 | 102 | To help out, `skedulord` can also configure cron for you by configuring a `.yml` file. 103 | 104 | ```yaml 105 | # schedule.yml 106 | user: vincent 107 | schedule: 108 | - name: ping 109 | command: python /home/vincent/path/scripts/ping.py 110 | cron: "*/2 * * * *" 111 | - name: github issues downloader 112 | command: python /full/path/to/cli.py --repo foobar --output /Users/vincent/data 113 | cron: "0 1 * * *" 114 | - name: github actions downloader 115 | command: python /full/path/to/scrape.py --repo foobar --output /Users/vincent/data 116 | cron: "0 1 * * *" 117 | ``` 118 | 119 | Given a schedule like this, skedulord can schedule everything via; 120 | 121 | ```text 122 | python -m skedulord schedule schedule.yml 123 | ``` 124 | 125 | You can confirm yourself that all jobs are now scheduled to run, with skedulord 126 | taking care of all the logging. 127 | 128 | ```text 129 | crontab -e 130 | ``` 131 | 132 | > Note that when a command starts with `python` skedulord will assume the same virtualenv 133 | as the one that ran the `skedulord schedule` command. You may also pass another Python path 134 | if you prefer to use another virtualenv. 135 | 136 | ### Dashboard 137 | 138 | If you want, you can even use skedulord to run a small dashboard for you to show 139 | all the logs from past jobs. These are all available from the terminal as well, 140 | but it's nice to have an extra interface. 141 | 142 | ```python 143 | python -m skedulord serve 144 | ``` 145 | 146 | The landing page shows an overview of all jobs. 147 | 148 |  149 | 150 | You can click on the associated link to find all runs. 151 | 152 |  153 | 154 | From here you can explore the logs. We host both the raw .txt logs 155 | and a "fancy" variant that attemps some syntax highlighting. 156 | 157 |  158 | 159 | If you'd like to play around, we host a small demo of this dashboard [here](https://koaning.github.io/skedulord-demo/). 160 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license_files = LICENSE 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | from skedulord import __version__ 5 | 6 | 7 | def read(fname): 8 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 9 | 10 | 11 | base_packages = ["PyYAML>=5.1.1", "rich>=9.10.0", "clumper>=0.2.8", "typer>=0.3.2", "python-crontab>=2.5.1"] 12 | 13 | dev_packages = ["pytest", "pytest-cov", "mkdocs-material>=6.2.8"] 14 | 15 | 16 | setup( 17 | name="skedulord", 18 | version=__version__, 19 | packages=find_packages(), 20 | long_description=read('readme.md'), 21 | long_description_content_type='text/markdown', 22 | url="https://koaning.github.io/skedulord/", 23 | author='Vincent D. Warmerdam', 24 | install_requires=base_packages, 25 | entry_points={ 26 | 'console_scripts': [ 27 | 'skedulord = skedulord.__main__:app' 28 | ], 29 | }, 30 | extras_require={ 31 | "dev": dev_packages 32 | }, 33 | package_data={'skedulord': ['templates/*.html']}, 34 | classifiers=['Programming Language :: Python :: 3', 35 | 'Programming Language :: Python :: 3.7', 36 | 'Programming Language :: Python :: 3.8', 37 | 'License :: OSI Approved :: MIT License'] 38 | ) 39 | -------------------------------------------------------------------------------- /skedulord/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "2.0.0" 2 | __all__ = ["__version__"] 3 | -------------------------------------------------------------------------------- /skedulord/__main__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import subprocess 4 | from pathlib import Path 5 | from typing import Union 6 | 7 | import typer 8 | from rich import print 9 | from rich.table import Table 10 | from clumper import Clumper 11 | 12 | from skedulord import __version__ as lord_version 13 | from skedulord.job import JobRunner 14 | from skedulord.common import SKEDULORD_PATH, heartbeat_path, skedulord_path 15 | from skedulord.cron import Cron, clean_cron, parse_job_from_settings 16 | from skedulord.dashboard import build_site 17 | 18 | app = typer.Typer( 19 | name="SKEDULORD", 20 | add_completion=False, 21 | help="SKEDULORD: helps with cronjobs and logs.", 22 | ) 23 | 24 | 25 | @app.command() 26 | def version(): 27 | """Show the version.""" 28 | print(lord_version) 29 | 30 | 31 | @app.command() 32 | def run( 33 | name: str = typer.Argument(..., help="The name you want to assign to the run."), 34 | command: str = typer.Argument( 35 | None, help="The command you want to run (in parentheses)." 36 | ), 37 | settings_path: Union[Path, None] = typer.Option(None, help="Schedule config to reference."), 38 | retry: int = typer.Option(2, help="The number of tries, should a job fail."), 39 | wait: int = typer.Option(60, help="The number of seconds between tries."), 40 | ): 41 | """Run a single command, which is logged by skedulord.""" 42 | if settings_path: 43 | settings = Clumper.read_yaml(settings_path).unpack("schedule").keep(lambda d: d['name'] == name).collect() 44 | command = parse_job_from_settings(settings, name) 45 | JobRunner(retry=retry, wait=wait, name=name, cmd=command).run() 46 | 47 | 48 | @app.command() 49 | def schedule( 50 | config: Path = typer.Argument( 51 | ..., help="The config file containing the schedule.", exists=True 52 | ) 53 | ): 54 | """Set (or reset) cron jobs based on config.""" 55 | Cron(config).set_new_cron() 56 | 57 | 58 | @app.command() 59 | def wipe( 60 | what: str = typer.Argument(..., help="What to wipe. Either `disk` or `schedule`."), 61 | yes: bool = typer.Option(False, is_flag=True, prompt=True, help="Are you sure?"), 62 | really: bool = typer.Option(False, is_flag=True, prompt=True, help="Really sure?"), 63 | user: str = typer.Option(None, help="The name of the user. Default: curent user."), 64 | ): 65 | """Wipe the disk or schedule state.""" 66 | if yes and really: 67 | if what == "disk": 68 | if Path(SKEDULORD_PATH).exists(): 69 | shutil.rmtree(SKEDULORD_PATH) 70 | print("Disk state has been cleaned.") 71 | if what == "schedule": 72 | if not user: 73 | name = subprocess.run(["whoami"], stdout=subprocess.PIPE) 74 | user = name.stdout.decode("utf8").strip() 75 | clean_cron(user=user) 76 | print("Cron state has been cleaned.") 77 | else: 78 | print("Crisis averted.") 79 | 80 | 81 | @app.command() 82 | def history( 83 | n: int = typer.Option(10, help="How many rows should the table show."), 84 | only_failures: bool = typer.Option(False, is_flag=True, help="Only show failures."), 85 | date: str = typer.Option(None, is_flag=True, help="Only show specific date."), 86 | name: str = typer.Option( 87 | None, is_flag=True, help="Only show jobs with specific name." 88 | ), 89 | ): 90 | """Shows a table with job status.""" 91 | clump = Clumper.read_jsonl(heartbeat_path()).sort( 92 | lambda _: _["start"], reverse=True 93 | ) 94 | if only_failures: 95 | clump = clump.keep(lambda _: _["status"] != "success") 96 | if name: 97 | clump = clump.keep(lambda _: name in _["name"]) 98 | if date: 99 | clump = clump.keep(lambda _: date in _["start"]) 100 | table = Table(title=None) 101 | table.add_column("status") 102 | table.add_column("date") 103 | table.add_column("name") 104 | table.add_column("logfile") 105 | for d in clump.head(n).collect(): 106 | table.add_row( 107 | f"[{'red' if d['status'] == 'fail' else 'green'}]{d['status']}[/]", 108 | d["start"], 109 | d["name"], 110 | d["logpath"], 111 | ) 112 | print(table) 113 | 114 | 115 | @app.command(name="build") 116 | def build(): 117 | """ 118 | Builds static html files so you may view a dashboard after. 119 | """ 120 | build_site() 121 | 122 | 123 | @app.command(name="serve") 124 | def serve( 125 | build: bool = typer.Option(True, help="Build the site beforehand?"), 126 | port: int = typer.Option(8000, help="The port number to use.") 127 | ): 128 | """ 129 | Serves the skedulord dashboard. 130 | """ 131 | if build: 132 | build_site() 133 | os.system(f"python -m http.server --directory {skedulord_path()} {port}") 134 | 135 | 136 | if __name__ == "__main__": 137 | app() 138 | -------------------------------------------------------------------------------- /skedulord/common.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import pathlib 4 | 5 | SKEDULORD_PATH = os.path.join(os.path.expanduser("~/.skedulord")) 6 | 7 | 8 | def skedulord_path() -> str: 9 | return pathlib.Path(SKEDULORD_PATH) 10 | 11 | 12 | def job_name_path(jobname) -> str: 13 | return skedulord_path() / jobname 14 | 15 | 16 | def heartbeat_path() -> str: 17 | return skedulord_path() / "heartbeat.jsonl" 18 | 19 | 20 | def log_heartbeat(run_id, name, command, tic, toc, status, logpath): 21 | heartbeat = { 22 | "id": run_id, 23 | "name": name, 24 | "command": command, 25 | "start": str(tic)[:19], 26 | "end": str(toc)[:19], 27 | "status": status, 28 | "logpath": logpath, 29 | } 30 | 31 | with open(heartbeat_path(), "a") as f: 32 | f.write(json.dumps(heartbeat) + "\n") 33 | -------------------------------------------------------------------------------- /skedulord/cron.py: -------------------------------------------------------------------------------- 1 | import typer 2 | import subprocess 3 | from clumper import Clumper 4 | from crontab import CronTab 5 | 6 | 7 | def clean_cron(user: str): 8 | """Removes all entries in cron.""" 9 | cron = CronTab(user=user) 10 | cron.remove_all() 11 | cron.write() 12 | 13 | 14 | def parse_job_from_settings(settings: dict, name: str) -> str: 15 | """Parse a job from a settings dictionary. """ 16 | if len(settings) == 0: 17 | print(f"The name `{name}` doesn't appear in supplied schedule config.") 18 | raise typer.Exit(code=1) 19 | cmd_settings = settings[0] 20 | arguments = " ".join([f"--{k} {v}" for k, v in cmd_settings.get('arguments', {}).items()]) 21 | 22 | # Ensure we remove the space at the end. 23 | return f"{cmd_settings['command']} {arguments}".rstrip() 24 | 25 | 26 | class Cron: 27 | def __init__(self, settings_path): 28 | self.settings = Clumper.read_yaml(settings_path).unpack("schedule").collect() 29 | 30 | def parse_cmd(self, setting: dict) -> str: 31 | """ 32 | Parse single cron setting into elaborate command for crontab. 33 | """ 34 | # If no venv is given we assume the one you're currently in. 35 | python = "python" 36 | if "venv" not in setting.keys(): 37 | output = subprocess.run(["which", "python"], capture_output=True) 38 | python = output.stdout.decode("ascii").replace("\n", "") 39 | 40 | # Set base values. 41 | retry = setting.get("retry", 2) 42 | wait = setting.get("wait", 60) 43 | 44 | # We only want to replace python if it is at the start. 45 | cmd = setting["command"] 46 | if cmd.startswith("python"): 47 | cmd = cmd.replace("python", python, 1) 48 | big_cmd = f'{python} -m skedulord run {setting["name"]} "{cmd}" --retry {retry} --wait {wait}' 49 | return big_cmd.rstrip() 50 | 51 | def set_new_cron(self): 52 | cron = CronTab(user=self.settings[0]["user"]) 53 | cron.remove_all() 54 | 55 | for s in self.settings: 56 | s["name"] = s["name"].replace(" ", "-") 57 | cmd = self.parse_cmd(s) 58 | job = cron.new(command=cmd, comment=s["name"]) 59 | job.setall(s["cron"]) 60 | cron.write() 61 | -------------------------------------------------------------------------------- /skedulord/dashboard.py: -------------------------------------------------------------------------------- 1 | import io 2 | import datetime as dt 3 | from pathlib import Path 4 | from clumper import Clumper 5 | from rich.console import Console 6 | from skedulord.common import skedulord_path 7 | from pkg_resources import resource_filename 8 | from jinja2 import Environment, FileSystemLoader, select_autoescape 9 | 10 | 11 | def create_html(logpath): 12 | text = Path(logpath).read_text() 13 | 14 | console = Console(record=True, file=io.StringIO(), log_path=False, log_time=False, width=2000) 15 | for line in text.split("\n"): 16 | console.print(line) 17 | console.save_html(str(logpath).replace(".txt", ".html")) 18 | 19 | 20 | def build_site(): 21 | heartbeats = Clumper.read_jsonl(Path(skedulord_path()) / "heartbeat.jsonl") 22 | clump = (heartbeats 23 | .mutate(jobname=lambda d: d['name'], 24 | details=lambda d: "link") 25 | .group_by("jobname") 26 | .agg(start=("start", "last"), 27 | end=("end", "last"), 28 | status=("status", "last")) 29 | .mutate(start_time = lambda d: dt.datetime.strptime(d['start'], "%Y-%m-%d %H:%M:%S"), 30 | end_time = lambda d: dt.datetime.strptime(d['end'], "%Y-%m-%d %H:%M:%S"), 31 | timediff = lambda d: (d['end_time'] - d['start_time']).seconds) 32 | .sort(lambda d: d['start_time'], reverse=True)) 33 | 34 | env = Environment( 35 | loader=FileSystemLoader(resource_filename('skedulord', 'templates')), 36 | autoescape=select_autoescape(['html', 'xml']) 37 | ) 38 | 39 | main_page = env.get_template('index.html').render(jobs=clump.collect()) 40 | Path(Path(skedulord_path()) / "index.html").write_text(main_page) 41 | 42 | for item in clump.collect(): 43 | jobname = item['jobname'] 44 | subset = (heartbeats 45 | .keep(lambda d: d['name'] == jobname) 46 | .mutate(start_time = lambda d: dt.datetime.strptime(d['start'], "%Y-%m-%d %H:%M:%S"), 47 | end_time = lambda d: dt.datetime.strptime(d['end'], "%Y-%m-%d %H:%M:%S"), 48 | timediff = lambda d: (d['end_time'] - d['start_time']).seconds, 49 | txt_path = lambda d: f"{jobname}/{d['start'].replace(' ', 'T')}.txt", 50 | html_path = lambda d: f"{jobname}/{d['start'].replace(' ', 'T')}.html",) 51 | .sort(lambda d: d['start_time'], reverse=True)) 52 | 53 | job_page = env.get_template('job.html').render(jobname=jobname, runs=subset.collect()) 54 | Path(Path(skedulord_path()) / f"{jobname}.html").write_text(job_page) 55 | 56 | for p in Path(skedulord_path()).glob("*/*.txt"): 57 | create_html(p) 58 | -------------------------------------------------------------------------------- /skedulord/job.py: -------------------------------------------------------------------------------- 1 | import io 2 | import json 3 | import time 4 | import uuid 5 | import pathlib 6 | import subprocess 7 | import datetime as dt 8 | from skedulord.common import job_name_path, log_heartbeat 9 | from pathlib import Path 10 | 11 | class JobRunner: 12 | """ 13 | Object in charge of running a job and logging it. 14 | """ 15 | 16 | def __init__(self, name, cmd, retry=3, wait=60): 17 | self.name = name 18 | self.cmd = cmd 19 | self.retry = retry 20 | self.wait = wait 21 | self.start_time = str(dt.datetime.now())[:19].replace(" ", "T") 22 | self.logpath = Path(job_name_path(name)) / f"{self.start_time}.txt" 23 | pathlib.Path(self.logpath).parent.mkdir(parents=True, exist_ok=True) 24 | pathlib.Path(self.logpath).touch() 25 | self.file = self.logpath.open("a") 26 | 27 | def _attempt_cmd(self, command, name, run_id): 28 | tries = 1 29 | stop = False 30 | while not stop: 31 | info = {"name": name, "command": command, "run_id": run_id, "attempt": tries, "timestamp": str(dt.datetime.now())} 32 | self.file.writelines([json.dumps(info), "\n"]) 33 | output = subprocess.run( 34 | command.split(" "), 35 | cwd=str(pathlib.Path().cwd()), 36 | stdout=subprocess.PIPE, 37 | stderr=subprocess.STDOUT, 38 | encoding="utf-8", 39 | universal_newlines=True, 40 | ) 41 | for line in output.stdout.split("\n"): 42 | self.file.writelines([line, "\n"]) 43 | if output.returncode == 0: 44 | stop = True 45 | else: 46 | tries += 1 47 | if tries > self.retry: 48 | stop = True 49 | else: 50 | time.sleep(self.wait) 51 | return "fail" if tries > self.retry else "success" 52 | 53 | def run(self): 54 | """ 55 | Run and log a command. 56 | """ 57 | run_id = str(uuid.uuid4())[:8] 58 | start_time = self.start_time 59 | status = self._attempt_cmd(command=self.cmd, name=self.name, run_id=run_id) 60 | endtime = str(dt.datetime.now())[:19] 61 | job_name_path(self.name).mkdir(parents=True, exist_ok=True) 62 | logpath = str(job_name_path(self.name) / f"{start_time}.txt") 63 | log_heartbeat( 64 | run_id=run_id, 65 | name=self.name, 66 | command=self.cmd, 67 | status=status, 68 | tic=start_time.replace("T", " "), 69 | toc=endtime, 70 | logpath=logpath 71 | ) 72 | -------------------------------------------------------------------------------- /skedulord/templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | 4 | {% block content %} 5 | 6 | 7 |
You can inspect all the logs from the tables below.
14 |Jobname | 23 |Status | 24 |Time Taken | 25 |Last Run | 26 |Runs | 27 |
---|---|---|---|---|
{{job['jobname']}} | 33 |34 | {% if job['status'] == 'success' %} 35 | 38 | {% endif %} 39 | {% if job['status'] != 'success' %} 40 | 43 | {% endif %} 44 | | {{job['timediff']}}s | 45 |{{job['start']}} | 46 |47 | 48 | 51 | 52 | | 53 |
You can inspect all the logs from the {{jobname}} job below.
14 |Run | 23 |Status | 24 |Datetime | 25 |Time Taken | 26 |Logs | 27 |
---|---|---|---|---|
{{run['id']}} | 33 |34 | {% if run['status'] == 'success' %} 35 | 38 | {% endif %} 39 | {% if run['status'] != 'success' %} 40 | 43 | {% endif %} 44 | | {{run['start']}} | 45 |{{run['timediff']}}s | 46 |47 | txt 48 | html 49 | | 50 |