├── .github └── workflows │ └── codeql-analysis.yml ├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── Makefile ├── README.md ├── conf ├── logrotate.d │ └── nohang └── nohang │ ├── nohang-desktop.conf.in │ ├── nohang.conf.in │ └── test.conf ├── deb ├── DEBIAN │ ├── conffiles │ ├── control │ └── postinst └── build.sh ├── docs ├── FAQ.ru.md ├── nohang.manpage.md ├── oom-sort.manpage.md ├── psi-top.manpage.md └── psi2log.manpage.md ├── man ├── nohang.8 ├── oom-sort.1 ├── psi-top.1 └── psi2log.1 ├── openrc ├── nohang-desktop.in └── nohang.in ├── src ├── nohang ├── oom-sort ├── psi-top └── psi2log └── systemd ├── nohang-desktop.service.in └── nohang.service.in /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ master ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ master ] 20 | schedule: 21 | - cron: '19 19 * * 2' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | 28 | strategy: 29 | fail-fast: false 30 | matrix: 31 | language: [ 'python' ] 32 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 33 | # Learn more: 34 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 35 | 36 | steps: 37 | - name: Checkout repository 38 | uses: actions/checkout@v2 39 | 40 | # Initializes the CodeQL tools for scanning. 41 | - name: Initialize CodeQL 42 | uses: github/codeql-action/init@v1 43 | with: 44 | languages: ${{ matrix.language }} 45 | # If you wish to specify custom queries, you can do so here or in a config file. 46 | # By default, queries listed here will override any specified in a config file. 47 | # Prefix the list here with "+" to use these queries and those in the config file. 48 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 49 | 50 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 51 | # If this step fails, then you should remove it and run the build manually (see below) 52 | - name: Autobuild 53 | uses: github/codeql-action/autobuild@v1 54 | 55 | # ℹ️ Command-line programs to run using the OS shell. 56 | # 📚 https://git.io/JvXDl 57 | 58 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 59 | # and modify them (or add more) to build your code if your project 60 | # uses a compiled language 61 | 62 | #- run: | 63 | # make bootstrap 64 | # make release 65 | 66 | - name: Perform CodeQL Analysis 67 | uses: github/codeql-action/analyze@v1 68 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *.cover 46 | .hypothesis/ 47 | .pytest_cache/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | db.sqlite3 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # Environments 84 | .env 85 | .venv 86 | env/ 87 | venv/ 88 | ENV/ 89 | env.bak/ 90 | venv.bak/ 91 | 92 | # Spyder project settings 93 | .spyderproject 94 | .spyproject 95 | 96 | # Rope project settings 97 | .ropeproject 98 | 99 | # mkdocs documentation 100 | /site 101 | 102 | # mypy 103 | .mypy_cache/ 104 | 105 | # Kate 106 | .kate-swp 107 | 108 | # deb 109 | /deb/package/ 110 | /deb/package.deb 111 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: bionic 2 | 3 | language: python 4 | 5 | sudo: required 6 | 7 | script: 8 | - sudo make install 9 | - sudo systemctl enable --now nohang.service 10 | - sudo systemctl stop nohang.service 11 | - sudo systemctl enable --now nohang-desktop.service 12 | - sudo systemctl stop nohang-desktop.service 13 | 14 | - oom-sort -h 15 | - oom-sort 16 | 17 | - nohang -h 18 | - nohang -v 19 | 20 | - nohang --check --config /usr/local/etc/nohang/nohang.conf 21 | - nohang --check --config /usr/local/etc/nohang/nohang-desktop.conf 22 | - nohang --check --config conf/nohang/test.conf 23 | 24 | - sudo nohang --config /usr/local/etc/nohang/nohang.conf --tasks 25 | - sudo nohang --config /usr/local/etc/nohang/nohang-desktop.conf --tasks 26 | 27 | - /bin/sleep 60 & 28 | - sudo bash -c "nohang --monitor --config conf/nohang/test.conf & tail /dev/zero & sleep 30 && pkill python3" 29 | - sudo cat /var/log/nohang/nohang.log 30 | 31 | - sudo make uninstall 32 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | This changelog is outdated. It will be updated later. 4 | 5 | ## [Unreleased] 6 | 7 | - Added new CLI options: 8 | - -v, --version 9 | - -m, --memload 10 | - --monitor 11 | - --tasks 12 | - --check-config 13 | - Possible process crashes are fixed: 14 | - Fixed crash at startup due to `UnicodeDecodeError` on some systems 15 | - Handled `UnicodeDecodeError` if victim name consists of many unicode characters ([rfjakob/earlyoom#110](https://github.com/rfjakob/earlyoom/issues/110)) 16 | - Fixed process crash before performing corrective actions if Python 3.4 or lower are used to interpret nohang 17 | - Improve output: 18 | - Display `oom_score`, `oom_score_adj`, `Ancestry`, `EUID`, `State`, `VmSize`, `RssAnon`, `RssFile`, `RssShmem`, `CGroup_v1`, `CGroup_v2`, `Realpath`, `Cmdline` and `Lifetime` of the victim in corrective action reports 19 | - Added memory report interval 20 | - Added delta memory info (the rate of change of available memory) 21 | - Print statistics on corrective actions after each corrective action 22 | - Added ability to print a process table before each corrective action 23 | - Added the ability to log into a separate file 24 | - Improved GUI warnings: 25 | - Reduced the idle time of the daemon in the process of launching a notification 26 | - All notify-send calls are made using the `nohang_notify_helper` script, in which all timeouts are handled (not anymore: nohang_notify_helper has been removed) 27 | - Native python implementation of `env` search without running `ps` to notify all users if nohang started with UID=0. 28 | - Improved modifing badness via matching with regular expressions: 29 | - Added the ability to set many different `badness_adj` for processes depending on the matching `Name`, `CGroup_v1`, `CGroup_v2`, `cmdline`, `realpath`, `environ` and `EUID` with the specified regular expressions ([issue #11](https://github.com/hakavlad/nohang/issues/11)) 30 | - Fix: replace `re.fullmatch()` by `re.search()` 31 | - Reduced memory usage: 32 | - Reduced memory usage and startup time (using `sys.argv` instead of `argparse`) 33 | - Reduced memory usage with `mlockall()` using `MCL_ONFAULT` ([rfjakob/earlyoom#112](https://github.com/rfjakob/earlyoom/issues/112)) 34 | - Lock all memory by default using mlockall() 35 | - Added new tools: 36 | - `oom-sort` 37 | - `psi-top` 38 | - `psi2log` 39 | - Improve poll rate algorithm 40 | - Fixed Makefile for installation on CentOS 7 (remove gzip `-k` option). 41 | - Added `max_post_sigterm_victim_lifetime` option: send SIGKILL to the victim if it doesn't respond to SIGTERM for a certain time 42 | - Added `post_kill_exe` option (the ability to run any command after killing a victim) 43 | - Added `warning_exe` option (the ability to run any command instead of GUI low memory warnings) 44 | - Added `victim_cache_time` option 45 | - Improved victim search algorithm (do it ~30% faster) ([rfjakob/earlyoom#114](https://github.com/rfjakob/earlyoom/issues/114)) 46 | - Improved limiting `oom_score_adj`: now it can works with UID != 0 47 | - Fixed conf parsing: use of `line.partition('=')` instead of `line.split('=')` 48 | - Removed self-defense options from the config, use systemd unit scheduling instead 49 | - Added the ability to send any signal instead of SIGTERM for processes with certain names 50 | - Added support for `PSI` 51 | - Recheck memory levels after finding a victim to prevent killing innocent victims in some cases ([issue #20](https://github.com/hakavlad/nohang/issues/20)) 52 | - Now one corrective action to one victim can be applied only once. 53 | - Ignoring zram by default, checking for this has become optional. 54 | - Improved user input validation 55 | - Improved documentation 56 | - Handle signals (SIGTERM, SIGINT, SIGQUIT, SIGHUP), print total stat by corrective actions at exit. 57 | 58 | ## [0.1] - 2018-11-23 59 | 60 | [unreleased]: https://github.com/hakavlad/nohang/compare/v0.1...HEAD 61 | [0.1]: https://github.com/hakavlad/nohang/releases/tag/v0.1 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Alexey Avramov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | DESTDIR ?= 2 | PREFIX ?= /usr/local 3 | SYSCONFDIR ?= /usr/local/etc 4 | SYSTEMDUNITDIR ?= /usr/local/lib/systemd/system 5 | 6 | BINDIR ?= $(PREFIX)/bin 7 | SBINDIR ?= $(PREFIX)/sbin 8 | DATADIR ?= $(PREFIX)/share 9 | DOCDIR ?= $(DATADIR)/doc/nohang 10 | MANDIR ?= $(DATADIR)/man 11 | 12 | PANDOC := $(shell command -v pandoc 2> /dev/null) 13 | 14 | all: 15 | @ echo "Use: make install, make install-openrc, make uninstall" 16 | 17 | update-manpages: 18 | 19 | ifdef PANDOC 20 | pandoc docs/nohang.manpage.md -s -t man > man/nohang.8 21 | pandoc docs/oom-sort.manpage.md -s -t man > man/oom-sort.1 22 | pandoc docs/psi2log.manpage.md -s -t man > man/psi2log.1 23 | pandoc docs/psi-top.manpage.md -s -t man > man/psi-top.1 24 | else 25 | @echo "pandoc is not installed, skipping manpages generation" 26 | endif 27 | 28 | base: 29 | install -p -d $(DESTDIR)$(SBINDIR) 30 | install -p -m0755 src/nohang $(DESTDIR)$(SBINDIR)/nohang 31 | 32 | install -p -d $(DESTDIR)$(BINDIR) 33 | install -p -m0755 src/oom-sort $(DESTDIR)$(BINDIR)/oom-sort 34 | install -p -m0755 src/psi-top $(DESTDIR)$(BINDIR)/psi-top 35 | install -p -m0755 src/psi2log $(DESTDIR)$(BINDIR)/psi2log 36 | 37 | install -p -d $(DESTDIR)$(SYSCONFDIR)/nohang 38 | 39 | sed "s|:TARGET_DATADIR:|$(DATADIR)|" \ 40 | conf/nohang/nohang.conf.in > nohang.conf 41 | 42 | sed "s|:TARGET_DATADIR:|$(DATADIR)|" \ 43 | conf/nohang/nohang-desktop.conf.in > nohang-desktop.conf 44 | 45 | install -p -m0644 nohang.conf $(DESTDIR)$(SYSCONFDIR)/nohang/nohang.conf 46 | install -p -m0644 nohang-desktop.conf $(DESTDIR)$(SYSCONFDIR)/nohang/nohang-desktop.conf 47 | 48 | install -p -d $(DESTDIR)$(DATADIR)/nohang 49 | 50 | install -p -m0644 nohang.conf $(DESTDIR)$(DATADIR)/nohang/nohang.conf 51 | install -p -m0644 nohang-desktop.conf $(DESTDIR)$(DATADIR)/nohang/nohang-desktop.conf 52 | 53 | -git describe --tags --long --dirty > version 54 | install -p -m0644 version $(DESTDIR)$(DATADIR)/nohang/version 55 | 56 | rm -fv nohang.conf 57 | rm -fv nohang-desktop.conf 58 | rm -fv version 59 | 60 | install -p -d $(DESTDIR)/etc/logrotate.d 61 | install -p -m0644 conf/logrotate.d/nohang $(DESTDIR)/etc/logrotate.d/nohang 62 | 63 | install -p -d $(DESTDIR)$(MANDIR)/man1 64 | gzip -9cn man/oom-sort.1 > $(DESTDIR)$(MANDIR)/man1/oom-sort.1.gz 65 | gzip -9cn man/psi-top.1 > $(DESTDIR)$(MANDIR)/man1/psi-top.1.gz 66 | gzip -9cn man/psi2log.1 > $(DESTDIR)$(MANDIR)/man1/psi2log.1.gz 67 | 68 | install -p -d $(DESTDIR)$(MANDIR)/man8 69 | 70 | sed "s|:SYSCONFDIR:|$(SYSCONFDIR)|g; s|:DATADIR:|$(DATADIR)|g" \ 71 | man/nohang.8 > nohang.8 72 | 73 | gzip -9cn nohang.8 > $(DESTDIR)$(MANDIR)/man8/nohang.8.gz 74 | rm -fv nohang.8 75 | 76 | install -p -d $(DESTDIR)$(DOCDIR) 77 | install -p -m0644 README.md $(DESTDIR)$(DOCDIR)/README.md 78 | install -p -m0644 CHANGELOG.md $(DESTDIR)$(DOCDIR)/CHANGELOG.md 79 | 80 | units: 81 | install -p -d $(DESTDIR)$(SYSTEMDUNITDIR) 82 | 83 | sed "s|:TARGET_SBINDIR:|$(SBINDIR)|; s|:TARGET_SYSCONFDIR:|$(SYSCONFDIR)|" \ 84 | systemd/nohang.service.in > nohang.service 85 | 86 | sed "s|:TARGET_SBINDIR:|$(SBINDIR)|; s|:TARGET_SYSCONFDIR:|$(SYSCONFDIR)|" \ 87 | systemd/nohang-desktop.service.in > nohang-desktop.service 88 | 89 | install -p -m0644 nohang.service $(DESTDIR)$(SYSTEMDUNITDIR)/nohang.service 90 | install -p -m0644 nohang-desktop.service $(DESTDIR)$(SYSTEMDUNITDIR)/nohang-desktop.service 91 | 92 | rm -fv nohang.service 93 | rm -fv nohang-desktop.service 94 | 95 | chcon: 96 | chcon -t systemd_unit_file_t $(DESTDIR)$(SYSTEMDUNITDIR)/nohang.service || : 97 | chcon -t systemd_unit_file_t $(DESTDIR)$(SYSTEMDUNITDIR)/nohang-desktop.service || : 98 | 99 | daemon-reload: 100 | systemctl daemon-reload || : 101 | 102 | build_deb: base units 103 | 104 | reinstall-deb: 105 | set -v 106 | deb/build.sh 107 | sudo apt install --reinstall ./deb/package.deb 108 | 109 | install: base units chcon daemon-reload 110 | # This is fine. 111 | 112 | install-openrc: base 113 | install -p -d $(DESTDIR)$(SYSCONFDIR)/init.d 114 | sed "s|:TARGET_SBINDIR:|$(SBINDIR)|; s|:TARGET_SYSCONFDIR:|$(SYSCONFDIR)|" \ 115 | openrc/nohang.in > openrc/nohang 116 | sed "s|:TARGET_SBINDIR:|$(SBINDIR)|; s|:TARGET_SYSCONFDIR:|$(SYSCONFDIR)|" \ 117 | openrc/nohang-desktop.in > openrc/nohang-desktop 118 | install -p -m0775 openrc/nohang $(DESTDIR)$(SYSCONFDIR)/init.d/nohang 119 | install -p -m0775 openrc/nohang-desktop $(DESTDIR)$(SYSCONFDIR)/init.d/nohang-desktop 120 | rm -fv openrc/nohang 121 | rm -fv openrc/nohang-desktop 122 | 123 | uninstall-base: 124 | rm -fv $(DESTDIR)$(SBINDIR)/nohang 125 | rm -fv $(DESTDIR)$(BINDIR)/oom-sort 126 | rm -fv $(DESTDIR)$(BINDIR)/psi-top 127 | rm -fv $(DESTDIR)$(BINDIR)/psi2log 128 | 129 | rm -fv $(DESTDIR)$(MANDIR)/man1/oom-sort.1.gz 130 | rm -fv $(DESTDIR)$(MANDIR)/man1/psi-top.1.gz 131 | rm -fv $(DESTDIR)$(MANDIR)/man1/psi2log.1.gz 132 | 133 | rm -fv $(DESTDIR)$(MANDIR)/man8/nohang.8.gz 134 | 135 | rm -fvr $(DESTDIR)$/etc/logrotate.d/nohang 136 | rm -fvr $(DESTDIR)$(DOCDIR)/ 137 | rm -fvr $(DESTDIR)/var/log/nohang/ 138 | rm -fvr $(DESTDIR)$(DATADIR)/nohang/ 139 | rm -fvr $(DESTDIR)$(SYSCONFDIR)/nohang/ 140 | 141 | uninstall-units: 142 | systemctl stop nohang.service || : 143 | systemctl stop nohang-desktop.service || : 144 | systemctl disable nohang.service || : 145 | systemctl disable nohang-desktop.service || : 146 | 147 | rm -fv $(DESTDIR)$(SYSTEMDUNITDIR)/nohang.service 148 | rm -fv $(DESTDIR)$(SYSTEMDUNITDIR)/nohang-desktop.service 149 | 150 | uninstall-openrc: 151 | rc-service nohang-desktop stop || : 152 | rc-service nohang stop || : 153 | 154 | rm -fv $(DESTDIR)$(SYSCONFDIR)/init.d/nohang 155 | rm -fv $(DESTDIR)$(SYSCONFDIR)/init.d/nohang-desktop 156 | 157 | uninstall: uninstall-base uninstall-units daemon-reload uninstall-openrc 158 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![pic](https://i.imgur.com/scXQ312.png) 2 | 3 | # nohang 4 | 5 | [![Build Status](https://travis-ci.org/hakavlad/nohang.svg?branch=master)](https://travis-ci.org/hakavlad/nohang) 6 | ![CodeQL](https://github.com/hakavlad/nohang/workflows/CodeQL/badge.svg) 7 | [![Packaging status](https://repology.org/badge/tiny-repos/nohang.svg)](https://repology.org/project/nohang/versions) 8 | 9 | `nohang` package provides a highly configurable daemon for Linux which is able to correctly prevent [out of memory](https://en.wikipedia.org/wiki/Out_of_memory) (OOM) and keep system responsiveness in low memory conditions. 10 | 11 | The package also includes additional diagnostic tools (`oom-sort`, `psi2log`, `psi-top`). 12 | 13 | ## What is the problem? 14 | 15 | OOM conditions may cause [freezes](https://en.wikipedia.org/wiki/Hang_(computing)), [livelocks](https://en.wikipedia.org/wiki/Deadlock#Livelock), drop [caches](https://en.wikipedia.org/wiki/Page_cache) and processes to be killed (via sending [SIGKILL](https://en.wikipedia.org/wiki/Signal_(IPC)#SIGKILL)) instead of trying to terminate them correctly (via sending [SIGTERM](https://en.wikipedia.org/wiki/Signal_(IPC)#SIGTERM) or takes other corrective action). Some applications may crash if it's impossible to allocate memory. 16 | 17 | Here are the statements of some users: 18 | 19 | > "How do I prevent Linux from freezing when out of memory? 20 | Today I (accidentally) ran some program on my Linux box that quickly used a lot of memory. My system froze, became unresponsive and thus I was unable to kill the offender. 21 | How can I prevent this in the future? Can't it at least keep a responsive core or something running?" 22 | 23 | — [serverfault](https://serverfault.com/questions/390623/how-do-i-prevent-linux-from-freezing-when-out-of-memory) 24 | 25 | > "With or without swap it still freezes before the OOM killer gets run automatically. This is really a kernel bug that should be fixed (i.e. run OOM killer earlier, before dropping all disk cache). Unfortunately kernel developers and a lot of other folk fail to see the problem. Common suggestions such as disable/enable swap, buy more RAM, run less processes, set limits etc. do not address the underlying problem that the kernel's low memory handling sucks camel's balls." 26 | 27 | — [serverfault](https://serverfault.com/questions/390623/how-do-i-prevent-linux-from-freezing-when-out-of-memory#comment417508_390625) 28 | 29 | > "The traditional Linux OOM killer works fine in some cases, but in others it kicks in too late, resulting in the system entering a [livelock](https://en.wikipedia.org/wiki/Deadlock#Livelock) for an indeterminate period." 30 | 31 | — [engineering.fb.com](https://engineering.fb.com/production-engineering/oomd/) 32 | 33 | Also look at these discussions: 34 | - Why are low memory conditions handled so badly? [[r/linux](https://www.reddit.com/r/linux/comments/56r4xj/why_are_low_memory_conditions_handled_so_badly/)] 35 | - Memory management "more effective" on Windows than Linux? (in preventing total system lockup) [[r/linux](https://www.reddit.com/r/linux/comments/aqd9mh/memory_management_more_effective_on_windows_than/)] 36 | - Let's talk about the elephant in the room - the Linux kernel's inability to gracefully handle low memory pressure [[original LKML post](https://lkml.org/lkml/2019/8/4/15) | [r/linux](https://www.reddit.com/r/linux/comments/cmg48b/lets_talk_about_the_elephant_in_the_room_the/) | [Hacker News](https://news.ycombinator.com/item?id=20620545) | [slashdot](https://linux.slashdot.org/story/19/08/06/1839206/linux-performs-poorly-in-low-ram--memory-pressure-situations-on-the-desktop) | [phoronix](https://www.phoronix.com/forums/forum/phoronix/general-discussion/1118164-yes-linux-does-bad-in-low-ram-memory-pressure-situations-on-the-desktop) | [opennet.ru](https://www.opennet.ru/opennews/art.shtml?num=51231) | [linux.org.ru](https://www.linux.org.ru/forum/talks/15151526)] 37 | 38 | ## Solution 39 | 40 | Use one of the userspace OOM killers: 41 | - [earlyoom](https://github.com/rfjakob/earlyoom): This is a simple, stable and tiny OOM prevention daemon written in C (the best choice for emedded and old servers). It has a minimum dependencies and can work with oldest kernels. It is enabled by default on Fedora 32 Workstation (and F33 KDE). 42 | - [oomd](https://github.com/facebookincubator/oomd): This is a userspace OOM killer for linux systems written in C++ and developed by Facebook. This is the best choice for use in large data centers. It needs Linux 4.20+. 43 | - [systemd-oomd](https://man7.org/linux/man-pages/man8/systemd-oomd.service.8.html): Provided by systemd as `systemd-oomd.service` that uses cgroups-v2 and pressure stall information (PSI) to monitor and take action on processes before an OOM occurs in kernel space. It's used by default on [desktop versions of Fedora 34](https://fedoraproject.org/wiki/Changes/EnableSystemdOomd). 44 | - [low-memory-monitor](https://gitlab.freedesktop.org/hadess/low-memory-monitor/): There's a [project announcement](http://www.hadess.net/2019/08/low-memory-monitor-new-project.html). 45 | - [psi-monitor](https://github.com/endlessm/eos-boot-helper/tree/master/psi-monitor): It's used by default on [Endless OS](https://endlessos.com/). 46 | - `nohang`: nohang is earlyoom on steroids and has many useful features, see below. Maybe this is a good choice for modern desktops and servers if you need fine-tuning. Previously it was used by default on [Garuda Linux](https://garudalinux.org/). 47 | 48 | Use these tools to improve responsiveness during heavy swapping: 49 | - MGLRU patchset is merged in Linux 6.1. Setting `min_ttl_ms` > 50 can help you. 50 | - [le9-patch](https://github.com/hakavlad/le9-patch): [PATCH] mm: Protect clean file pages under memory pressure to prevent thrashing, avoid high latency and prevent livelock in near-OOM conditions. It's kernel-side solution that can fix the OOM killer behavior. 51 | - [prelockd](https://github.com/hakavlad/prelockd): Lock executables and shared libraries in memory to improve system responsiveness under low-memory conditions. 52 | - [memavaild](https://github.com/hakavlad/memavaild): Keep amount of available memory by evicting memory of selected cgroups into swap space. 53 | - [uresourced](https://gitlab.freedesktop.org/benzea/uresourced): This daemon will give resource allocations to active graphical users. It's [enabled by default](https://fedoraproject.org/wiki/Changes/Reserve_resources_for_active_user_WS) on Fedora 33 Workstation. 54 | 55 | Of course, you can also [download more RAM](https://downloadmoreram.com/), tune [virtual memory](https://www.kernel.org/doc/Documentation/sysctl/vm.txt), use [zram](https://www.kernel.org/doc/Documentation/blockdev/zram.txt)/[zswap](https://www.kernel.org/doc/Documentation/vm/zswap.txt) and use [limits](https://www.freedesktop.org/software/systemd/man/systemd.resource-control.html) for cgroups. 56 | 57 | ## Features 58 | 59 | - Sending the SIGTERM signal is default corrective action. If the victim does not respond to SIGTERM, with a further drop in the level of memory it gets SIGKILL; 60 | - Customizing victim selection: impact on the badness of processes via matching their names, cgroups, exe realpathes, environs, cmdlines and euids with specified regular expressions; 61 | - Customizing corrective actions: if the name or control group of the victim matches a certain regex pattern, you can run any command instead of sending the SIGTERM signal (the default corrective action) to the victim. For example: 62 | - `systemctl restart foo`; 63 | - `kill -INT $PID` (you can override the signal sent to the victim, $PID will be replaced by the victim's PID). 64 | - GUI notifications: 65 | - Notification of corrective actions taken and displaying the name and PID of the victim; 66 | - Low memory warnings. 67 | - [zram](https://www.kernel.org/doc/Documentation/blockdev/zram.txt) support (`mem_used_total` as a trigger); 68 | - [PSI](https://lwn.net/Articles/759658/) ([pressure stall information](https://facebookmicrosites.github.io/psi/)) support; 69 | - Optional checking kernel messages for OOM events; 70 | - Easy setup with configuration files ([nohang.conf](https://github.com/hakavlad/nohang/blob/master/conf/nohang/nohang.conf.in), [nohang-desktop.conf](https://github.com/hakavlad/nohang/blob/master/conf/nohang/nohang-desktop.conf.in)). 71 | 72 | ## Demo 73 | 74 | `nohang` prevents Out Of Memory with GUI notifications: 75 | 76 | - [https://youtu.be/ChTNu9m7uMU](https://youtu.be/ChTNu9m7uMU) – just old demo without swap space. 77 | - [https://youtu.be/UCwZS5uNLu0](https://youtu.be/UCwZS5uNLu0) – running multiple fast memory hogs at the same time without swap space. 78 | - [https://youtu.be/PLVWgNrVNlc](https://youtu.be/PLVWgNrVNlc) – opening multiple chromium tabs with 2.3 GiB memory and 1.8 GiB swap space on zram. 79 | 80 | ## Requirements 81 | 82 | For basic usage: 83 | - `Linux` (>= 3.14, since `MemAvailable` appeared in `/proc/meminfo`) 84 | - `Python` (>= 3.3) 85 | 86 | To respond to `PSI` metrics (optional): 87 | - `Linux` (>= 4.20) with `CONFIG_PSI=y` 88 | 89 | To show GUI notifications (optional): 90 | - [notification server](https://wiki.archlinux.org/index.php/Desktop_notifications#Notification_servers) (most of desktop environments use their own implementations) 91 | - `libnotify` (Arch Linux, Fedora, openSUSE) or `libnotify-bin` (Debian GNU/Linux, Ubuntu) 92 | - `sudo` if nohang started with UID=0. 93 | 94 | ## Memory and CPU usage 95 | 96 | - VmRSS is about 10–14 MiB instead of the settings, about 10–11 MiB by default (with Python <= 3.8), about 16–17 MiB with Python 3.9. 97 | - CPU usage depends on the level of available memory and monitoring intensity. 98 | 99 | ## Warnings 100 | 101 | - the daemon runs with super-user privileges and has full access to all private memory of all processes and sensitive user data; 102 | - the daemon does not forbid you to shoot yourself in the foot: with some settings, unwanted killings of processes can occur; 103 | - the daemon is not a panacea: there are no universal settings that reliably protect against all types of threats. 104 | 105 | ## Known problems 106 | 107 | - The documentation is terrible. 108 | - The ZFS ARC cache is memory-reclaimable, like the Linux buffer cache. However, in contrast to the buffer cache, it currently does not count to MemAvailable (see [openzfs/zfs#10255](https://github.com/openzfs/zfs/issues/10255)). See also https://github.com/rfjakob/earlyoom/pull/191 and https://github.com/hakavlad/nohang/issues/89. 109 | - Linux kernels without `CONFIG_CGROUP_CPUACCT=y` ([linux-ck](https://wiki.archlinux.org/index.php/Linux-ck), for example) provide incorrect PSI metrics, see [issue](https://github.com/hakavlad/nohang/issues/25#issuecomment-643716504). 110 | 111 | ## nohang vs nohang-desktop 112 | 113 | `nohang` comes with two configs: `nohang.conf` and `nohang-desktop.conf`. `nohang` comes with two systemd service unit files: `nohang.service` and `nohang-desktop.service`. Choose one. 114 | 115 | - `nohang.conf` provides vanilla default settings without PSI checking enabled, without any badness correction and without GUI notifications enabled. 116 | - `nohang-desktop.conf` provides default settings optimized for desktop usage. 117 | 118 | ## How to install 119 | 120 | #### To install on [Fedora](https://src.fedoraproject.org/rpms/nohang/): 121 | 122 | Orphaned for 6+ weeks, not available. 123 | 124 | #### To install on RHEL 7 and RHEL 8: 125 | 126 | nohang is avaliable in [EPEL repos](https://fedoraproject.org/wiki/EPEL). 127 | ```bash 128 | sudo yum install nohang 129 | sudo systemctl enable nohang.service 130 | sudo systemctl start nohang.service 131 | ``` 132 | To enable PSI on RHEL 8 pass `psi=1` to kernel boot cmdline. 133 | 134 | #### For Arch Linux there's an [AUR package](https://aur.archlinux.org/packages/nohang-git/) 135 | 136 | Use your favorite [AUR helper](https://wiki.archlinux.org/index.php/AUR_helpers). For example, 137 | ```bash 138 | yay -S nohang-git 139 | sudo systemctl enable --now nohang-desktop.service 140 | ``` 141 | 142 | #### To install on Ubuntu 20.04/20.10 143 | 144 | To install from [PPA](https://launchpad.net/~oibaf/+archive/ubuntu/test/): 145 | ```bash 146 | sudo add-apt-repository ppa:oibaf/test 147 | sudo apt update 148 | sudo apt install nohang 149 | sudo systemctl enable --now nohang-desktop.service 150 | ``` 151 | 152 | #### To install on Debian and Ubuntu-based systems: 153 | 154 | Outdated and buggy nohang v0.1 release was packaged for [Debian 11](https://packages.debian.org/bullseye/source/nohang) and [Ubuntu 20.10](https://packages.ubuntu.com/source/groovy/nohang). 155 | 156 | It's easy to build a deb package with the latest git snapshot. Install build dependencies: 157 | ```bash 158 | sudo apt install make fakeroot 159 | ``` 160 | 161 | Clone the latest git snapshot and run the build script to build the package: 162 | ```bash 163 | git clone https://github.com/hakavlad/nohang.git && cd nohang 164 | deb/build.sh 165 | ``` 166 | 167 | Install the package: 168 | ```bash 169 | sudo apt install --reinstall ./deb/package.deb 170 | ``` 171 | 172 | Start and enable `nohang.service` or `nohang-desktop.service` after installing the package: 173 | ```bash 174 | sudo systemctl enable --now nohang-desktop.service 175 | ``` 176 | 177 | #### To install on Gentoo and derivatives (e.g. Funtoo): 178 | 179 | Add the [eph kit](https://git.sr.ht/~happy_shredder/eph_kit) overlay, for example using layman or as a local repository. 180 | Then update your repos: 181 | 182 | ```bash 183 | sudo layman -S # if added via layman 184 | sudo emerge --sync # local repo on Gentoo 185 | sudo ego sync # local repo on Funtoo 186 | ``` 187 | 188 | Install: 189 | 190 | ```bash 191 | sudo emerge -a nohang 192 | ``` 193 | 194 | Start the service: 195 | 196 | ```bash 197 | sudo rc-service nohang-desktop start 198 | ``` 199 | 200 | Optionally add to startup: 201 | 202 | ```bash 203 | sudo rc-update add nohang-desktop default 204 | ``` 205 | 206 | #### To install the latest version on any distro: 207 | ```bash 208 | git clone https://github.com/hakavlad/nohang.git && cd nohang 209 | sudo make install 210 | ``` 211 | 212 | Config files will be located in `/usr/local/etc/nohang/`. To enable and start unit without GUI notifications: 213 | ```bash 214 | sudo systemctl enable --now nohang.service 215 | ``` 216 | 217 | To enable and start unit with GUI notifications: 218 | ```bash 219 | sudo systemctl enable --now nohang-desktop.service 220 | ``` 221 | 222 | On systems with OpenRC: 223 | ```bash 224 | sudo make install-openrc 225 | ``` 226 | 227 | To uninstall: 228 | ```bash 229 | sudo make uninstall 230 | ``` 231 | 232 | ## Command line options 233 | 234 | ``` 235 | ./nohang -h 236 | usage: nohang [-h|--help] [-v|--version] [-m|--memload] 237 | [-c|--config CONFIG] [--check] [--monitor] [--tasks] 238 | 239 | optional arguments: 240 | -h, --help show this help message and exit 241 | -v, --version show version of installed package and exit 242 | -m, --memload consume memory until 40 MiB (MemAvailable + SwapFree) 243 | remain free, and terminate the process 244 | -c CONFIG, --config CONFIG 245 | path to the config file. This should only be used 246 | with one of the following options: 247 | --monitor, --tasks, --check 248 | --check check and show the configuration and exit. This should 249 | only be used with -c/--config CONFIG option 250 | --monitor start monitoring. This should only be used with 251 | -c/--config CONFIG option 252 | --tasks show tasks state and exit. This should only be used 253 | with -c/--config CONFIG option 254 | ``` 255 | 256 | ## How to configure 257 | 258 | The program can be configured by editing the config file. The configuration includes the following sections: 259 | 260 | 0. Checking kernel messages for OOM events; 261 | 1. Common zram settings; 262 | 2. Common PSI settings; 263 | 3. Poll rate; 264 | 4. Warnings and notifications; 265 | 5. Soft threshold; 266 | 6. Hard threshold; 267 | 7. Customize victim selection; 268 | 8. Customize soft corrective actions; 269 | 9. Misc settings; 270 | 10. Verbosity, debug, logging. 271 | 272 | Just read the description of the parameters and edit the values. Please restart the daemon to apply the changes. 273 | 274 | ## How to test nohang 275 | 276 | - The safest way is to run `nohang --memload`. This causes memory consumption, and the process will exits before OOM occurs. 277 | - Another way is to run `tail /dev/zero`. This causes fast memory comsumption and causes OOM at the end. 278 | 279 | If testing occurs while `nohang` is running, these processes should be terminated before OOM occurs. 280 | 281 | ## Tasks state 282 | 283 | Run `sudo nohang -c/--config CONFIG --tasks` to see the table of prosesses with their badness values, oom_scores, names, UIDs etc. 284 | 285 |
286 | Output example 287 | 288 | ``` 289 | Config: /etc/nohang/nohang.conf 290 | ################################################################################################################### 291 | # PID PPID badness oom_score oom_score_adj eUID S VmSize VmRSS VmSwap Name CGroup 292 | #------- ------- ------- --------- ------------- ---------- - ------ ----- ------ --------------- -------- 293 | # 336 1 1 1 0 0 S 85 25 0 systemd-journal /system.slice/systemd-journald.service 294 | # 383 1 0 0 -1000 0 S 46 5 0 systemd-udevd /system.slice/systemd-udevd.service 295 | # 526 2238 7 7 0 1000 S 840 96 0 kate /user.slice/user-1000.slice/session-7.scope 296 | # 650 1 3 3 0 1000 S 760 50 0 kate /user.slice/user-1000.slice/session-7.scope 297 | # 731 1 0 0 0 100 S 126 4 0 systemd-timesyn /system.slice/systemd-timesyncd.service 298 | # 756 1 0 0 0 105 S 181 3 0 rtkit-daemon /system.slice/rtkit-daemon.service 299 | # 759 1 0 0 0 0 S 277 7 0 accounts-daemon /system.slice/accounts-daemon.service 300 | # 761 1 0 0 0 0 S 244 3 0 rsyslogd /system.slice/rsyslog.service 301 | # 764 1 0 0 -900 108 S 45 5 0 dbus-daemon /system.slice/dbus.service 302 | # 805 1 0 0 0 0 S 46 5 0 systemd-logind /system.slice/systemd-logind.service 303 | # 806 1 0 0 0 0 S 35 3 0 irqbalance /system.slice/irqbalance.service 304 | # 813 1 0 0 0 0 S 29 3 0 cron /system.slice/cron.service 305 | # 814 1 11 11 0 0 S 176 160 0 memlockd /system.slice/memlockd.service 306 | # 815 1 0 0 -10 0 S 32 9 0 python3 /fork.slice/fork-bomb.slice/fork-bomb-killer.slice/fork-bomb-killer.service 307 | # 823 1 0 0 0 0 S 25 4 0 smartd /system.slice/smartd.service 308 | # 826 1 0 0 0 113 S 46 3 0 avahi-daemon /system.slice/avahi-daemon.service 309 | # 850 826 0 0 0 113 S 46 0 0 avahi-daemon /system.slice/avahi-daemon.service 310 | # 868 1 0 0 0 0 S 281 8 0 polkitd /system.slice/polkit.service 311 | # 903 1 1 1 0 0 S 4094 16 0 stunnel4 /system.slice/stunnel4.service 312 | # 940 1 0 0 -600 0 S 39 10 0 python3 /nohang.slice/nohang.service 313 | # 1014 1 0 0 0 13 S 22 2 0 obfs-local /system.slice/obfs-local.service 314 | # 1015 1 0 0 0 1000 S 36 4 0 ss-local /system.slice/ss-local.service 315 | # 1023 1 0 0 0 116 S 33 2 0 dnscrypt-proxy /system.slice/dnscrypt-proxy.service 316 | # 1029 1 1 1 0 119 S 4236 16 0 privoxy /system.slice/privoxy.service 317 | # 1035 1 0 0 0 0 S 355 6 0 lightdm /system.slice/lightdm.service 318 | # 1066 1 0 0 0 0 S 45 7 0 wpa_supplicant /system.slice/wpa_supplicant.service 319 | # 1178 1 0 0 0 0 S 14 2 0 agetty /system.slice/system-getty.slice/getty@tty1.service 320 | # 1294 1 0 0 -1000 0 S 4 1 0 watchdog /system.slice/watchdog.service 321 | # 1632 1 1 1 0 1000 S 1391 22 0 pulseaudio /user.slice/user-1000.slice/session-2.scope 322 | # 1689 1632 0 0 0 1000 S 125 5 0 gconf-helper /user.slice/user-1000.slice/session-2.scope 323 | # 1711 1 0 0 0 0 S 367 8 0 udisksd /system.slice/udisks2.service 324 | # 1819 1 0 0 0 0 S 304 8 0 upowerd /system.slice/upower.service 325 | # 1879 1 0 0 0 1000 S 64 7 0 systemd /user.slice/user-1000.slice/user@1000.service/init.scope 326 | # 1880 1879 0 0 0 1000 S 229 2 0 (sd-pam) /user.slice/user-1000.slice/user@1000.service/init.scope 327 | # 1888 1 0 0 0 0 S 14 2 0 agetty /system.slice/system-getty.slice/getty@tty2.service 328 | # 1889 1 0 0 0 0 S 14 2 0 agetty /system.slice/system-getty.slice/getty@tty3.service 329 | # 1890 1 0 0 0 0 S 14 2 0 agetty /system.slice/system-getty.slice/getty@tty4.service 330 | # 1891 1 0 0 0 0 S 14 2 0 agetty /system.slice/system-getty.slice/getty@tty5.service 331 | # 1892 1 0 0 0 0 S 14 2 0 agetty /system.slice/system-getty.slice/getty@tty6.service 332 | # 1893 1035 14 14 0 0 R 623 208 0 Xorg /system.slice/lightdm.service 333 | # 1904 1 0 0 0 111 S 64 7 0 systemd /user.slice/user-111.slice/user@111.service/init.scope 334 | # 1905 1904 0 0 0 111 S 229 2 0 (sd-pam) /user.slice/user-111.slice/user@111.service/init.scope 335 | # 1916 1904 0 0 0 111 S 44 3 0 dbus-daemon /user.slice/user-111.slice/user@111.service/dbus.service 336 | # 1920 1 0 0 0 111 S 215 5 0 at-spi2-registr /user.slice/user-111.slice/session-c2.scope 337 | # 1922 1904 0 0 0 111 S 278 6 0 gvfsd /user.slice/user-111.slice/user@111.service/gvfs-daemon.service 338 | # 1935 1035 0 0 0 0 S 238 6 0 lightdm /user.slice/user-1000.slice/session-7.scope 339 | # 1942 1 0 0 0 1000 S 210 9 0 gnome-keyring-d /user.slice/user-1000.slice/session-7.scope 340 | # 1944 1935 1 1 0 1000 S 411 21 0 mate-session /user.slice/user-1000.slice/session-7.scope 341 | # 1952 1879 0 0 0 1000 S 45 5 0 dbus-daemon /user.slice/user-1000.slice/user@1000.service/dbus.service 342 | # 1981 1944 0 0 0 1000 S 11 0 0 ssh-agent /user.slice/user-1000.slice/session-7.scope 343 | # 1984 1879 0 0 0 1000 S 278 6 0 gvfsd /user.slice/user-1000.slice/user@1000.service/gvfs-daemon.service 344 | # 1990 1879 0 0 0 1000 S 341 5 0 at-spi-bus-laun /user.slice/user-1000.slice/user@1000.service/at-spi-dbus-bus.service 345 | # 1995 1990 0 0 0 1000 S 44 4 0 dbus-daemon /user.slice/user-1000.slice/user@1000.service/at-spi-dbus-bus.service 346 | # 1997 1879 0 0 0 1000 S 215 5 0 at-spi2-registr /user.slice/user-1000.slice/user@1000.service/at-spi-dbus-bus.service 347 | # 2000 1879 0 0 0 1000 S 184 5 0 dconf-service /user.slice/user-1000.slice/user@1000.service/dbus.service 348 | # 2009 1944 2 2 0 1000 S 1308 35 0 mate-settings-d /user.slice/user-1000.slice/session-7.scope 349 | # 2013 1944 2 2 0 1000 S 436 32 0 marco /user.slice/user-1000.slice/session-7.scope 350 | # 2024 1944 4 4 0 1000 S 1258 55 0 caja /user.slice/user-1000.slice/session-7.scope 351 | # 2032 1 1 1 0 1000 S 333 18 0 msd-locate-poin /user.slice/user-1000.slice/session-7.scope 352 | # 2033 1879 0 0 0 1000 S 348 11 0 gvfs-udisks2-vo /user.slice/user-1000.slice/user@1000.service/gvfs-udisks2-volume-monitor.service 353 | # 2036 1944 1 1 0 1000 S 331 17 0 polkit-mate-aut /user.slice/user-1000.slice/session-7.scope 354 | # 2038 1944 5 5 0 1000 S 682 78 0 mate-panel /user.slice/user-1000.slice/session-7.scope 355 | # 2041 1944 2 2 0 1000 S 514 31 0 nm-applet /user.slice/user-1000.slice/session-7.scope 356 | # 2046 1944 1 1 0 1000 S 495 25 0 mate-power-mana /user.slice/user-1000.slice/session-7.scope 357 | # 2047 1944 2 2 0 1000 S 692 32 0 mate-volume-con /user.slice/user-1000.slice/session-7.scope 358 | # 2049 1944 3 3 0 1000 S 548 44 0 mate-screensave /user.slice/user-1000.slice/session-7.scope 359 | # 2059 1879 0 0 0 1000 S 263 5 0 gvfs-goa-volume /user.slice/user-1000.slice/user@1000.service/gvfs-goa-volume-monitor.service 360 | # 2076 1879 0 0 0 1000 S 352 7 0 gvfsd-trash /user.slice/user-1000.slice/user@1000.service/gvfs-daemon.service 361 | # 2077 1879 0 0 0 1000 S 362 7 0 gvfs-afc-volume /user.slice/user-1000.slice/user@1000.service/gvfs-afc-volume-monitor.service 362 | # 2087 1879 0 0 0 1000 S 263 5 0 gvfs-mtp-volume /user.slice/user-1000.slice/user@1000.service/gvfs-mtp-volume-monitor.service 363 | # 2093 1879 0 0 0 1000 S 275 6 0 gvfs-gphoto2-vo /user.slice/user-1000.slice/user@1000.service/gvfs-gphoto2-volume-monitor.service 364 | # 2106 1879 3 3 0 1000 S 544 42 0 wnck-applet /user.slice/user-1000.slice/user@1000.service/dbus.service 365 | # 2108 1879 1 1 0 1000 S 396 21 0 notification-ar /user.slice/user-1000.slice/user@1000.service/dbus.service 366 | # 2112 1879 1 1 0 1000 S 499 25 0 mate-sensors-ap /user.slice/user-1000.slice/user@1000.service/dbus.service 367 | # 2113 1879 1 1 0 1000 S 390 21 0 mate-brightness /user.slice/user-1000.slice/user@1000.service/dbus.service 368 | # 2114 1879 1 1 0 1000 S 534 22 0 mate-multiload- /user.slice/user-1000.slice/user@1000.service/dbus.service 369 | # 2118 1879 2 2 0 1000 S 547 29 0 clock-applet /user.slice/user-1000.slice/user@1000.service/dbus.service 370 | # 2152 1879 1 1 0 1000 S 218 22 0 gvfsd-metadata /user.slice/user-1000.slice/user@1000.service/gvfs-metadata.service 371 | # 2206 1 3 3 0 110 S 106 48 0 tor /system.slice/system-tor.slice/tor@default.service 372 | # 2229 1 3 3 0 1000 S 999 42 0 kactivitymanage /user.slice/user-1000.slice/session-7.scope 373 | # 2238 1 0 0 0 1000 S 150 9 0 kdeinit5 /user.slice/user-1000.slice/session-7.scope 374 | # 2239 2238 3 3 0 1000 S 648 41 0 klauncher /user.slice/user-1000.slice/session-7.scope 375 | # 3959 1 1 1 0 0 S 615 18 0 NetworkManager /system.slice/NetworkManager.service 376 | # 3977 3959 0 0 0 0 S 20 4 0 dhclient /system.slice/NetworkManager.service 377 | # 5626 1879 0 0 0 1000 S 355 7 0 gvfsd-network /user.slice/user-1000.slice/user@1000.service/gvfs-daemon.service 378 | # 5637 1879 1 1 0 1000 S 623 14 0 gvfsd-smb-brows /user.slice/user-1000.slice/user@1000.service/gvfs-daemon.service 379 | # 6296 1879 0 0 0 1000 S 435 7 0 gvfsd-dnssd /user.slice/user-1000.slice/user@1000.service/gvfs-daemon.service 380 | # 11129 1879 3 3 0 1000 S 597 42 0 kded5 /user.slice/user-1000.slice/user@1000.service/dbus.service 381 | # 11136 1879 2 2 0 1000 S 639 39 0 kuiserver5 /user.slice/user-1000.slice/user@1000.service/dbus.service 382 | # 11703 1879 3 3 0 1000 S 500 45 0 mate-system-mon /user.slice/user-1000.slice/user@1000.service/dbus.service 383 | # 16798 1879 0 0 0 1000 S 346 10 0 gvfsd-http /user.slice/user-1000.slice/user@1000.service/gvfs-daemon.service 384 | # 18133 1 3 3 0 1000 S 760 49 0 kate /user.slice/user-1000.slice/session-7.scope 385 | # 18144 2038 1 1 0 1000 S 301 23 0 lxterminal /user.slice/user-1000.slice/session-7.scope 386 | # 18147 18144 0 0 0 1000 S 14 2 0 gnome-pty-helpe /user.slice/user-1000.slice/session-7.scope 387 | # 18148 18144 1 1 0 1000 S 42 26 0 bash /user.slice/user-1000.slice/session-7.scope 388 | # 18242 2238 1 1 0 1000 S 194 14 0 file.so /user.slice/user-1000.slice/session-7.scope 389 | # 18246 18148 0 0 0 0 S 54 4 0 sudo /user.slice/user-1000.slice/session-7.scope 390 | # 19003 1 0 0 0 0 S 310 12 0 packagekitd /system.slice/packagekit.service 391 | # 26993 2038 91 91 0 1000 S 3935 1256 0 firefox-esr /user.slice/user-1000.slice/session-7.scope 392 | # 27275 26993 121 121 0 1000 S 3957 1684 0 Web Content /user.slice/user-1000.slice/session-7.scope 393 | # 30374 1 1 1 0 1000 S 167 14 0 VBoxXPCOMIPCD /user.slice/user-1000.slice/session-7.scope 394 | # 30380 1 2 2 0 1000 S 958 27 0 VBoxSVC /user.slice/user-1000.slice/session-7.scope 395 | # 30549 30380 86 86 0 1000 S 5332 1192 0 VirtualBox /user.slice/user-1000.slice/session-7.scope 396 | # 30875 1 1 1 0 1000 S 345 26 0 leafpad /user.slice/user-1000.slice/session-7.scope 397 | # 32689 1 7 7 0 1000 S 896 99 0 dolphin /user.slice/user-1000.slice/session-7.scope 398 | ################################################################################################################### 399 | Process with highest badness (found in 55 ms): 400 | PID: 27275, Name: Web Content, badness: 121 401 | ``` 402 |
403 | 404 | 405 | ## Logging 406 | 407 | To view the latest entries in the log (for systemd users): 408 | ```bash 409 | sudo journalctl -eu nohang.service 410 | 411 | #### or 412 | 413 | sudo journalctl -eu nohang-desktop.service 414 | ``` 415 | 416 | You can also enable `separate_log` in the config to logging in `/var/log/nohang/nohang.log`. 417 | 418 | ## oom-sort 419 | 420 | `oom-sort` is an additional diagnostic tool that will be installed with `nohang` package. It sorts the processes in descending order of their `oom_score` and also displays `oom_score_adj`, `Uid`, `Pid`, `Name`, `VmRSS`, `VmSwap` and optionally `cmdline`. Run `oom-sort --help` for more info. Man page: [oom-sort.manpage.md](docs/oom-sort.manpage.md). 421 | 422 | Usage: 423 | 424 | ```bash 425 | oom-sort 426 | ``` 427 | 428 |
429 | Output example 430 | 431 | ``` 432 | oom_score oom_score_adj UID PID Name VmRSS VmSwap cmdline 433 | --------- ------------- ---- ----- --------------- ------- -------- ------- 434 | 23 0 0 964 Xorg 58 M 22 M /usr/libexec/Xorg -background none :0 vt01 -nolisten tcp -novtswitch -auth /var/run/lxdm/lxdm-:0.auth 435 | 13 0 1000 1365 pcmanfm 38 M 10 M pcmanfm --desktop --profile LXDE 436 | 10 0 1000 1408 dnfdragora-upda 9 M 27 M /usr/bin/python3 /bin/dnfdragora-updater 437 | 5 0 0 822 firewalld 0 M 19 M /usr/bin/python3 /usr/sbin/firewalld --nofork --nopid 438 | 5 0 1000 1364 lxpanel 18 M 2 M lxpanel --profile LXDE 439 | 5 0 1000 1685 nm-applet 6 M 12 M nm-applet 440 | 5 0 1000 1862 lxterminal 16 M 2 M lxterminal 441 | 4 0 996 890 polkitd 8 M 6 M /usr/lib/polkit-1/polkitd --no-debug 442 | 4 0 1000 1703 pnmixer 6 M 11 M pnmixer 443 | 3 0 0 649 systemd-journal 10 M 1 M /usr/lib/systemd/systemd-journald 444 | 3 0 1000 1360 openbox 9 M 2 M openbox --config-file /home/user/.config/openbox/lxde-rc.xml 445 | 3 0 1000 1363 notification-da 3 M 10 M /usr/libexec/notification-daemon 446 | 2 0 1000 1744 clipit 5 M 3 M clipit 447 | 2 0 1000 2619 python3 9 M 0 M python3 /bin/oom-sort 448 | 1 0 0 809 rsyslogd 3 M 3 M /usr/sbin/rsyslogd -n 449 | 1 0 0 825 udisksd 2 M 2 M /usr/libexec/udisks2/udisksd 450 | 1 0 0 873 sssd_nss 4 M 1 M /usr/libexec/sssd/sssd_nss --uid 0 --gid 0 --logger=files 451 | 1 0 0 876 systemd-logind 2 M 2 M /usr/lib/systemd/systemd-logind 452 | 1 0 0 907 abrt-dump-journ 2 M 1 M /usr/bin/abrt-dump-journal-oops -fxtD 453 | 1 0 0 920 NetworkManager 3 M 2 M /usr/sbin/NetworkManager --no-daemon 454 | 1 0 1000 1115 systemd 4 M 1 M /usr/lib/systemd/systemd --user 455 | 1 0 1000 1118 (sd-pam) 0 M 5 M (sd-pam) 456 | 1 0 1000 1366 xscreensaver 5 M 0 M xscreensaver -no-splash 457 | 1 0 1000 1851 gvfsd-trash 3 M 1 M /usr/libexec/gvfsd-trash --spawner :1.6 /org/gtk/gvfs/exec_spaw/0 458 | 1 0 1000 1969 gvfsd-metadata 6 M 0 M /usr/libexec/gvfsd-metadata 459 | 1 0 1000 2262 bash 5 M 0 M bash 460 | 0 -1000 0 675 systemd-udevd 0 M 4 M /usr/lib/systemd/systemd-udevd 461 | 0 -1000 0 787 auditd 0 M 1 M /sbin/auditd 462 | 0 0 0 807 ModemManager 0 M 1 M /usr/sbin/ModemManager 463 | 0 0 0 808 smartd 0 M 1 M /usr/sbin/smartd -n -q never 464 | 0 0 0 810 alsactl 0 M 0 M /usr/sbin/alsactl -s -n 19 -c -E ALSA_CONFIG_PATH=/etc/alsa/alsactl.conf --initfile=/lib/alsa/init/00main rdaemon 465 | 0 0 0 811 mcelog 0 M 0 M /usr/sbin/mcelog --ignorenodev --daemon --foreground 466 | 0 0 172 813 rtkit-daemon 0 M 0 M /usr/libexec/rtkit-daemon 467 | 0 0 0 814 VBoxService 0 M 1 M /usr/sbin/VBoxService -f 468 | 0 0 0 817 rngd 0 M 1 M /sbin/rngd -f 469 | 0 -900 81 818 dbus-daemon 3 M 0 M /usr/bin/dbus-daemon --system --address=systemd: --nofork --nopidfile --systemd-activation --syslog-only 470 | 0 0 0 823 irqbalance 0 M 0 M /usr/sbin/irqbalance --foreground 471 | 0 0 70 824 avahi-daemon 0 M 0 M avahi-daemon: running [linux.local] 472 | 0 0 0 826 sssd 0 M 2 M /usr/sbin/sssd -i --logger=files 473 | 0 0 995 838 chronyd 1 M 0 M /usr/sbin/chronyd 474 | 0 0 0 849 gssproxy 0 M 1 M /usr/sbin/gssproxy -D 475 | 0 0 0 866 abrtd 0 M 2 M /usr/sbin/abrtd -d -s 476 | 0 0 70 870 avahi-daemon 0 M 0 M avahi-daemon: chroot helper 477 | 0 0 0 871 sssd_be 0 M 2 M /usr/libexec/sssd/sssd_be --domain implicit_files --uid 0 --gid 0 --logger=files 478 | 0 0 0 875 accounts-daemon 0 M 1 M /usr/libexec/accounts-daemon 479 | 0 0 0 906 abrt-dump-journ 1 M 2 M /usr/bin/abrt-dump-journal-core -D -T -f -e 480 | 0 0 0 908 abrt-dump-journ 1 M 2 M /usr/bin/abrt-dump-journal-xorg -fxtD 481 | 0 0 0 950 crond 2 M 1 M /usr/sbin/crond -n 482 | 0 0 0 951 atd 0 M 0 M /usr/sbin/atd -f 483 | 0 0 0 953 lxdm-binary 0 M 0 M /usr/sbin/lxdm-binary 484 | 0 0 0 1060 dhclient 0 M 2 M /sbin/dhclient -d -q -sf /usr/libexec/nm-dhcp-helper -pf /var/run/dhclient-enp0s3.pid -lf /var/lib/NetworkManager/dhclient-939eab05-4796-3792-af24-9f76cf53ca7f-enp0s3.lease -cf /var/lib/NetworkManager/dhclient-enp0s3.conf enp0s3 485 | 0 0 0 1105 lxdm-session 0 M 1 M /usr/libexec/lxdm-session 486 | 0 0 1000 1123 pulseaudio 0 M 3 M /usr/bin/pulseaudio --daemonize=no 487 | 0 0 1000 1124 lxsession 1 M 2 M /usr/bin/lxsession -s LXDE -e LXDE 488 | 0 0 1000 1134 dbus-daemon 2 M 0 M /usr/bin/dbus-daemon --session --address=systemd: --nofork --nopidfile --systemd-activation --syslog-only 489 | 0 0 1000 1215 imsettings-daem 0 M 1 M /usr/libexec/imsettings-daemon 490 | 0 0 1000 1218 gvfsd 3 M 1 M /usr/libexec/gvfsd 491 | 0 0 1000 1223 gvfsd-fuse 0 M 1 M /usr/libexec/gvfsd-fuse /run/user/1000/gvfs -f -o big_writes 492 | 0 0 1000 1309 VBoxClient 0 M 0 M /usr/bin/VBoxClient --display 493 | 0 0 1000 1310 VBoxClient 0 M 0 M /usr/bin/VBoxClient --clipboard 494 | 0 0 1000 1311 VBoxClient 0 M 0 M /usr/bin/VBoxClient --draganddrop 495 | 0 0 1000 1312 VBoxClient 0 M 0 M /usr/bin/VBoxClient --display 496 | 0 0 1000 1313 VBoxClient 1 M 0 M /usr/bin/VBoxClient --clipboard 497 | 0 0 1000 1316 VBoxClient 0 M 0 M /usr/bin/VBoxClient --seamless 498 | 0 0 1000 1318 VBoxClient 0 M 0 M /usr/bin/VBoxClient --seamless 499 | 0 0 1000 1320 VBoxClient 0 M 0 M /usr/bin/VBoxClient --draganddrop 500 | 0 0 1000 1334 ssh-agent 0 M 0 M /usr/bin/ssh-agent /bin/sh -c exec -l bash -c "/usr/bin/startlxde" 501 | 0 0 1000 1362 lxpolkit 0 M 1 M lxpolkit 502 | 0 0 1000 1370 lxclipboard 0 M 1 M lxclipboard 503 | 0 0 1000 1373 ssh-agent 0 M 1 M /usr/bin/ssh-agent -s 504 | 0 0 1000 1485 agent 0 M 1 M /usr/libexec/geoclue-2.0/demos/agent 505 | 0 0 1000 1751 menu-cached 0 M 1 M /usr/libexec/menu-cache/menu-cached /run/user/1000/menu-cached-:0 506 | 0 0 1000 1780 at-spi-bus-laun 0 M 1 M /usr/libexec/at-spi-bus-launcher 507 | 0 0 1000 1786 dbus-daemon 1 M 0 M /usr/bin/dbus-daemon --config-file=/usr/share/defaults/at-spi2/accessibility.conf --nofork --print-address 3 508 | 0 0 1000 1792 at-spi2-registr 1 M 1 M /usr/libexec/at-spi2-registryd --use-gnome-session 509 | 0 0 1000 1840 gvfs-udisks2-vo 0 M 2 M /usr/libexec/gvfs-udisks2-volume-monitor 510 | 0 0 1000 1863 gnome-pty-helpe 1 M 0 M gnome-pty-helper 511 | 0 0 1000 1864 bash 0 M 1 M bash 512 | 0 0 0 1899 sudo 0 M 1 M sudo -i 513 | 0 0 0 1901 bash 0 M 1 M -bash 514 | 0 0 0 1953 oomd_bin 0 M 0 M oomd_bin -f /sys/fs/cgroup/unified 515 | 0 -600 0 2562 python3 10 M 0 M python3 /usr/sbin/nohang --config /etc/nohang/nohang.conf 516 | ``` 517 |
518 | 519 | Kthreads, zombies and Pid 1 will not be displayed. 520 | 521 | ## psi-top 522 | 523 | psi-top is script that prints the PSI metrics values for every cgroup. It requires `Linux` >= 4.20 with `CONFIG_PSI=y`. Man page: [psi-top.manpage.md](docs/psi-top.manpage.md). 524 | 525 |
526 | Output example 527 | 528 | ``` 529 | $ psi-top 530 | cgroup2 mountpoint: /sys/fs/cgroup 531 | avg10 avg60 avg300 avg10 avg60 avg300 cgroup2 532 | ----- ----- ------ ----- ----- ------ --------- 533 | some 0.00 0.21 1.56 | full 0.00 0.16 1.14 [SYSTEM_WIDE] 534 | some 0.00 0.21 1.56 | full 0.00 0.16 1.14 535 | some 0.00 0.15 1.11 | full 0.00 0.12 0.89 /user.slice 536 | some 45.92 28.77 20.19 | full 45.05 28.17 19.56 /user.slice/user-1000.slice 537 | some 1.44 4.67 9.24 | full 1.44 4.65 9.20 /user.slice/user-1000.slice/user@1000.service 538 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/pulseaudio.service 539 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/gvfs-daemon.service 540 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/dbus.socket 541 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/gvfs-udisks2-volume-monitor.service 542 | some 0.25 1.97 4.05 | full 0.25 1.96 4.03 /user.slice/user-1000.slice/user@1000.service/xfce4-notifyd.service 543 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/init.scope 544 | some 0.00 0.66 1.99 | full 0.00 0.66 1.97 /user.slice/user-1000.slice/user@1000.service/gpg-agent.service 545 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/gvfs-gphoto2-volume-monitor.service 546 | some 0.93 0.75 0.20 | full 0.93 0.75 0.20 /user.slice/user-1000.slice/user@1000.service/at-spi-dbus-bus.service 547 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/gvfs-metadata.service 548 | some 0.00 2.44 6.78 | full 0.00 2.43 6.74 /user.slice/user-1000.slice/user@1000.service/dbus.service 549 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/gvfs-mtp-volume-monitor.service 550 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/gvfs-afc-volume-monitor.service 551 | some 44.99 28.30 19.41 | full 44.10 27.70 18.79 /user.slice/user-1000.slice/session-2.scope 552 | some 0.00 0.31 0.53 | full 0.00 0.31 0.53 /init.scope 553 | some 7.25 11.40 13.34 | full 7.23 11.32 13.24 /system.slice 554 | some 0.00 0.01 0.02 | full 0.00 0.01 0.02 /system.slice/systemd-udevd.service 555 | some 0.00 0.58 1.55 | full 0.00 0.58 1.55 /system.slice/cronie.service 556 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/sys-kernel-config.mount 557 | some 0.00 0.22 0.35 | full 0.00 0.22 0.35 /system.slice/polkit.service 558 | some 0.00 0.06 0.20 | full 0.00 0.06 0.20 /system.slice/rtkit-daemon.service 559 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/sys-kernel-debug.mount 560 | some 0.00 0.14 0.62 | full 0.00 0.14 0.62 /system.slice/accounts-daemon.service 561 | some 7.86 11.48 12.56 | full 7.84 11.42 12.51 /system.slice/lightdm.service 562 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/ModemManager.service 563 | some 0.00 1.82 5.47 | full 0.00 1.81 5.43 /system.slice/systemd-journald.service 564 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/dev-mqueue.mount 565 | some 0.00 1.64 4.07 | full 0.00 1.64 4.07 /system.slice/NetworkManager.service 566 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/tmp.mount 567 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/lvm2-lvmetad.service 568 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/dev-disk-by\x2duuid-5d7355c0\x2dc131\x2d40c5\x2d8541\x2d1e04ad7c8b8d.swap 569 | some 0.00 0.09 0.11 | full 0.00 0.09 0.11 /system.slice/upower.service 570 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/udisks2.service 571 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/dev-hugepages.mount 572 | some 0.00 0.27 0.49 | full 0.00 0.27 0.48 /system.slice/dbus.service 573 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/system-getty.slice 574 | some 0.00 0.12 0.20 | full 0.00 0.12 0.20 /system.slice/avahi-daemon.service 575 | some 0.00 0.18 0.30 | full 0.00 0.18 0.30 /system.slice/systemd-logind.service 576 | ``` 577 |
578 | 579 | ## psi2log 580 | 581 | psi2log is a CLI tool that can check and log PSI metrics from specified target. It requires `Linux` >= 4.20 with `CONFIG_PSI=y`. Man page: [psi2log.manpage.md](docs/psi2log.manpage.md). 582 | 583 |
584 | Output example 585 | 586 | ``` 587 | $ psi2log 588 | Starting psi2log 589 | target: SYSTEM_WIDE 590 | period: 2 591 | ------------------------------------------------------------------------------------------------------------------ 592 | some cpu pressure || some memory pressure | full memory pressure || some io pressure | full io pressure 593 | ---------------------||----------------------|----------------------||----------------------|--------------------- 594 | avg10 avg60 avg300 || avg10 avg60 avg300 | avg10 avg60 avg300 || avg10 avg60 avg300 | avg10 avg60 avg300 595 | ------ ------ ------ || ------ ------ ------ | ------ ------ ------ || ------ ------ ------ | ------ ------ ------ 596 | 0.13 0.26 0.08 || 3.36 10.31 3.47 | 2.68 7.69 2.56 || 20.24 26.90 8.60 | 18.80 23.16 7.33 597 | 0.11 0.25 0.08 || 2.75 9.97 3.45 | 2.20 7.44 2.54 || 18.38 26.34 8.61 | 17.21 22.73 7.35 598 | 0.09 0.25 0.07 || 2.25 9.65 3.43 | 1.80 7.20 2.52 || 15.05 25.48 8.55 | 14.09 21.99 7.30 599 | 0.07 0.24 0.07 || 1.84 9.33 3.40 | 1.47 6.96 2.51 || 13.05 24.78 8.52 | 12.26 21.40 7.28 600 | ^C 601 | Peak values: avg10 avg60 avg300 602 | ----------- ------ ------ ------ 603 | some cpu 0.13 0.26 0.08 604 | ----------- ------ ------ ------ 605 | some memory 3.36 10.31 3.47 606 | full memory 2.68 7.69 2.56 607 | ----------- ------ ------ ------ 608 | some io 20.24 26.90 8.61 609 | full io 18.80 23.16 7.35 610 | $ psi2log -t /user.slice -l pm.log 611 | Starting psi2log 612 | target: /user.slice 613 | period: 2 614 | log file: pm.log 615 | cgroup2 mountpoint: /sys/fs/cgroup 616 | ------------------------------------------------------------------------------------------------------------------ 617 | some cpu pressure || some memory pressure | full memory pressure || some io pressure | full io pressure 618 | ---------------------||----------------------|----------------------||----------------------|--------------------- 619 | avg10 avg60 avg300 || avg10 avg60 avg300 | avg10 avg60 avg300 || avg10 avg60 avg300 | avg10 avg60 avg300 620 | ------ ------ ------ || ------ ------ ------ | ------ ------ ------ || ------ ------ ------ | ------ ------ ------ 621 | 28.32 11.97 3.03 || 0.00 1.05 1.65 | 0.00 0.85 1.33 || 0.55 7.79 7.21 | 0.54 7.52 6.80 622 | 29.53 12.72 3.25 || 0.00 1.01 1.64 | 0.00 0.82 1.32 || 0.81 7.60 7.17 | 0.44 7.27 6.76 623 | 29.80 13.32 3.44 || 0.00 0.98 1.63 | 0.00 0.79 1.31 || 0.66 7.35 7.12 | 0.36 7.03 6.71 624 | 29.83 13.86 3.62 || 0.00 0.95 1.62 | 0.00 0.77 1.30 || 0.54 7.11 7.08 | 0.30 6.80 6.66 625 | 29.86 14.39 3.80 || 0.00 0.91 1.60 | 0.00 0.74 1.29 || 0.44 6.88 7.03 | 0.24 6.58 6.62 626 | 30.07 14.94 3.99 || 0.00 0.88 1.59 | 0.00 0.72 1.28 || 0.36 6.65 6.98 | 0.20 6.36 6.57 627 | ^C 628 | Peak values: avg10 avg60 avg300 629 | ----------- ------ ------ ------ 630 | some cpu 30.07 14.94 3.99 631 | ----------- ------ ------ ------ 632 | some memory 0.00 1.05 1.65 633 | full memory 0.00 0.85 1.33 634 | ----------- ------ ------ ------ 635 | some io 0.81 7.79 7.21 636 | full io 0.54 7.52 6.80 637 | ``` 638 |
639 | 640 | ## Contribution 641 | 642 | - Use cases, feature requests and any questions are [welcome](https://github.com/hakavlad/nohang/issues). 643 | - Pull requests in `dev` branch are welcome. 644 | 645 | ## Documentation 646 | 647 | - [nohang.manpage.md](docs/nohang.manpage.md) 648 | - [oom-sort.manpage.md](docs/oom-sort.manpage.md) 649 | - [psi2log.manpage.md](docs/psi2log.manpage.md) 650 | - [psi-top.manpage.md](docs/psi-top.manpage.md) 651 | - [FAQ.ru.md](docs/FAQ.ru.md) 652 | - [CHANGELOG.md](CHANGELOG.md) 653 | 654 | ## License 655 | 656 | This project is licensed under the terms of the [MIT license](LICENSE). 657 | -------------------------------------------------------------------------------- /conf/logrotate.d/nohang: -------------------------------------------------------------------------------- 1 | /var/log/nohang/*.log { 2 | missingok 3 | copytruncate 4 | notifempty 5 | size 1M 6 | rotate 5 7 | compress 8 | delaycompress 9 | } 10 | -------------------------------------------------------------------------------- /conf/nohang/nohang-desktop.conf.in: -------------------------------------------------------------------------------- 1 | ## This is the configuration file of the nohang daemon. 2 | 3 | ## The configuration includes the following sections: 4 | ## 0. Check kernel messages for OOM events 5 | ## 1. Common zram settings 6 | ## 2. Common PSI settings 7 | ## 3. Poll rate 8 | ## 4. Warnings and notifications 9 | ## 5. Soft (SIGTERM) threshold 10 | ## 6. Hard (SIGKILL) threshold 11 | ## 7. Customize victim selection: adjusting badness of processes 12 | ## 8. Customize soft corrective actions 13 | ## 9. Misc settings 14 | ## 10. Verbosity, debug, logging 15 | 16 | ## WARNING! 17 | ## - Lines starting with #, tabs and whitespace characters are comments. 18 | ## - Lines starting with @ contain optional parameters that may be repeated. 19 | ## - All values are case sensitive. 20 | ## - nohang doesn't forbid you to shoot yourself in the foot. Be careful! 21 | ## - Restart the daemon after editing the file to apply the new settings. 22 | ## - You can find the file with default values here: :TARGET_DATADIR:/nohang/nohang.conf 23 | 24 | ## To find config keys descriptions see man(8) nohang 25 | 26 | ############################################################################### 27 | 28 | ## 0. Check kernel messages for OOM events 29 | 30 | # @check_kmsg 31 | ## Type: boolean 32 | ## Comment/uncomment to disable/enable checking kmsg for OOM events 33 | 34 | # @debug_kmsg 35 | ## Type: boolean 36 | ## Comment/uncomment to disable/enable debug checking kmsg 37 | 38 | ############################################################################### 39 | 40 | 1. Common zram settings 41 | 42 | Key: zram_checking_enabled 43 | Description: 44 | Type: boolean 45 | Valid values: True | False 46 | Default value: False 47 | 48 | zram_checking_enabled = False 49 | 50 | ############################################################################### 51 | 52 | 2. Common PSI settings 53 | 54 | Key: psi_checking_enabled 55 | Description: 56 | Type: boolean 57 | Valid values: True | False 58 | Default value: True 59 | 60 | psi_checking_enabled = True 61 | 62 | Key: psi_path 63 | Description: 64 | Type: string 65 | Valid values: any string 66 | Default value: /proc/pressure/memory 67 | 68 | psi_path = /proc/pressure/memory 69 | 70 | Key: psi_metrics 71 | Description: 72 | Type: string 73 | Valid values: some_avg10, some_avg60, some_avg300, 74 | full_avg10, full_avg60, full_avg300 75 | Default value: full_avg10 76 | 77 | psi_metrics = full_avg10 78 | 79 | Key: psi_excess_duration 80 | Description: 81 | Type: float 82 | Valid values: >= 0 83 | Default value: 30 84 | 85 | psi_excess_duration = 30 86 | 87 | Key: psi_post_action_delay 88 | Description: 89 | Type: float 90 | Valid values: >= 10 91 | Default value: 15 92 | 93 | psi_post_action_delay = 15 94 | 95 | ############################################################################### 96 | 97 | 3. Poll rate 98 | 99 | Key: fill_rate_mem 100 | Description: 101 | Type: float 102 | Valid values: >= 100 103 | Default value: 6000 104 | 105 | fill_rate_mem = 6000 106 | 107 | Key: fill_rate_swap 108 | Description: 109 | Type: float 110 | Valid values: >= 100 111 | Default value: 2000 112 | 113 | fill_rate_swap = 2000 114 | 115 | Key: fill_rate_zram 116 | Description: 117 | Type: float 118 | Valid values: >= 100 119 | Default value: 4000 120 | 121 | fill_rate_zram = 4000 122 | 123 | Key: max_sleep 124 | Description: 125 | Type: float 126 | Valid values: >= 0.01 and >= min_sleep 127 | Default value: 3 128 | 129 | max_sleep = 3 130 | 131 | Key: min_sleep 132 | Description: 133 | Type: float 134 | Valid values: >= 0.01 and <= max_sleep 135 | Default value: 0.1 136 | 137 | min_sleep = 0.1 138 | 139 | ############################################################################### 140 | 141 | 4. Warnings and notifications 142 | 143 | 4.1. GUI notifications after corrective actions 144 | 145 | Key: post_action_gui_notifications 146 | Description: 147 | Type: boolean 148 | Valid values: True | False 149 | Default value: True 150 | 151 | post_action_gui_notifications = True 152 | 153 | Key: hide_corrective_action_type 154 | Description: 155 | Type: boolean 156 | Valid values: True | False 157 | Default value: False 158 | 159 | hide_corrective_action_type = False 160 | 161 | 4.2. Low memory warnings 162 | 163 | Key: low_memory_warnings_enabled 164 | Description: 165 | Type: boolean 166 | Valid values: True | False 167 | Default value: True 168 | 169 | low_memory_warnings_enabled = True 170 | 171 | Key: warning_exe 172 | Description: 173 | Type: string 174 | Valid values: any string 175 | Default value: (empty string) 176 | 177 | warning_exe = 178 | 179 | Key: warning_threshold_min_mem 180 | Description: 181 | Type: float (with % or M) 182 | Valid values: from the range [0; 100] % 183 | Default value: 20 % 184 | 185 | warning_threshold_min_mem = 20 % 186 | 187 | Key: warning_threshold_min_swap 188 | Description: 189 | Type: float (with % or M) 190 | Valid values: [0; 100] % or >= 0 M 191 | Default value: 20 % 192 | 193 | warning_threshold_min_swap = 25 % 194 | 195 | Key: warning_threshold_max_zram 196 | Description: 197 | Type: float (with % or M) 198 | Valid values: from the range [0; 100] % 199 | Default value: 45 % 200 | 201 | warning_threshold_max_zram = 45 % 202 | 203 | Key: warning_threshold_max_psi 204 | Description: 205 | Type: float 206 | Valid values: from the range [0; 100] 207 | Default value: 10 208 | 209 | warning_threshold_max_psi = 10 210 | 211 | Key: min_post_warning_delay 212 | Description: 213 | Type: float 214 | Valid values: >= 1 215 | Default value: 60 216 | 217 | min_post_warning_delay = 60 218 | 219 | Key: env_cache_time 220 | Description: 221 | Type: float 222 | Valid values: >= 0 223 | Default value: 300 224 | 225 | env_cache_time = 300 226 | 227 | ############################################################################### 228 | 229 | 5. Soft threshold (thresholds for sending the SIGTERM signal or 230 | implementing other soft corrective action) 231 | 232 | Key: soft_threshold_min_mem 233 | Description: 234 | Type: float (with % or M) 235 | Valid values: from the range [0; 50] % 236 | Default value: 5 % 237 | 238 | soft_threshold_min_mem = 5 % 239 | 240 | Key: soft_threshold_min_swap 241 | Description: 242 | Type: float (with % or M) 243 | Valid values: [0; 100] % or >= 0 M 244 | Default value: 10 % 245 | 246 | soft_threshold_min_swap = 10 % 247 | 248 | Key: soft_threshold_max_zram 249 | Description: 250 | Type: float (with % or M) 251 | Valid values: from the range [10; 90] % 252 | Default value: 55 % 253 | 254 | soft_threshold_max_zram = 55 % 255 | 256 | Key: soft_threshold_max_psi 257 | Description: 258 | Type: float 259 | Valid values: from the range [5; 100] 260 | Default value: 40 261 | 262 | soft_threshold_max_psi = 40 263 | 264 | ############################################################################### 265 | 266 | 6. Hard threshold (thresholds for sending the SIGKILL signal) 267 | 268 | Key: hard_threshold_min_mem 269 | Description: 270 | Type: float (with % or M) 271 | Valid values: from the range [0; 50] % 272 | Default value: 2 % 273 | 274 | hard_threshold_min_mem = 2 % 275 | 276 | Key: hard_threshold_min_swap 277 | Description: 278 | Type: float (with % or M) 279 | Valid values: [0; 100] % or >= 0 M 280 | Default value: 4 % 281 | 282 | hard_threshold_min_swap = 4 % 283 | 284 | Key: hard_threshold_max_zram 285 | Description: 286 | Type: float (with % or M) 287 | Valid values: from the range [10; 90] % 288 | Default value: 60 % 289 | 290 | hard_threshold_max_zram = 60 % 291 | 292 | Key: hard_threshold_max_psi 293 | Description: 294 | Type: float 295 | Valid values: from the range [5; 100] 296 | Default value: 90 297 | 298 | hard_threshold_max_psi = 90 299 | 300 | ############################################################################### 301 | 302 | 7. Customize victim selection: adjusting badness of processes 303 | 304 | 7.1. Ignore positive oom_score_adj 305 | 306 | Key: ignore_positive_oom_score_adj 307 | Description: 308 | Type: boolean 309 | Valid values: True | False 310 | Default value: False 311 | 312 | ignore_positive_oom_score_adj = False 313 | 314 | 7.2.1. Matching process names with RE patterns change their badness 315 | 316 | Syntax: 317 | 318 | @BADNESS_ADJ_RE_NAME badness_adj /// RE_pattern 319 | 320 | New badness value will be += badness_adj 321 | 322 | It is possible to compare multiple patterns 323 | with different badness_adj values. 324 | 325 | Example: 326 | @BADNESS_ADJ_RE_NAME -500 /// ^sshd$ 327 | 328 | Prefer terminating Firefox tabs instead of terminating the entire browser. 329 | (In Chromium and Electron-based apps child processes get oom_score_adj=300 330 | by default.) 331 | @BADNESS_ADJ_RE_NAME 200 /// ^(Web Content|Privileged Cont|file:// Content)$ 332 | 333 | @BADNESS_ADJ_RE_NAME -200 /// ^(dnf|yum|packagekitd)$ 334 | 335 | 336 | 7.2.2. Matching CGroup_v1-line with RE patterns 337 | 338 | @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/system\.slice/ 339 | 340 | @BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$ 341 | 342 | @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/ 343 | 344 | 7.2.3. Matching CGroup_v2-line with RE patterns 345 | 346 | @BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload 347 | 348 | 7.2.4. Matching eUIDs with RE patterns 349 | 350 | @BADNESS_ADJ_RE_UID -100 /// ^0$ 351 | 352 | 7.2.5. Matching /proc/[pid]/exe realpath with RE patterns 353 | 354 | Example: 355 | @BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo$ 356 | 357 | Protect X. 358 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/libexec/Xorg|/usr/lib/xorg/Xorg|/usr/lib/Xorg|/usr/bin/X|/usr/bin/Xorg|/usr/bin/Xwayland|/usr/bin/weston|/usr/bin/sway)$ 359 | 360 | Protect GNOME. 361 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/gnome-shell|/usr/bin/metacity|/usr/bin/mutter|/usr/lib/gnome-session/gnome-session-binary|/usr/libexec/gnome-session-binary|/usr/libexec/gnome-session-ctl)$ 362 | 363 | Protect KDE Plasma. 364 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/plasma-desktop|/usr/bin/plasmashell|/usr/bin/plasma_session|/usr/bin/kwin|/usr/bin/kwin_x11|/usr/bin/kwin_wayland)$ 365 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/startplasma-wayland|/usr/lib/x86_64-linux-gnu/libexec/startplasma-waylandsession|/usr/bin/ksmserver)$ 366 | 367 | Protect Cinnamon. 368 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/cinnamon|/usr/bin/muffin|/usr/bin/cinnamon-session|/usr/bin/cinnamon-launcher)$ 369 | 370 | Protect Xfce. 371 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/xfwm4|/usr/bin/xfce4-session|/usr/bin/xfce4-panel|/usr/bin/xfdesktop)$ 372 | 373 | Protect Mate. 374 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/marco|/usr/bin/mate-session|/usr/bin/caja|/usr/bin/mate-panel)$ 375 | 376 | Protect LXQt. 377 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/lxqt-panel|/usr/bin/pcmanfm-qt|/usr/bin/lxqt-session)$ 378 | 379 | Protect Budgie Desktop. 380 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/budgie-wm|/usr/bin/budgie-panel)$ 381 | 382 | Protect other. 383 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/compiz|/usr/bin/openbox|/usr/bin/fluxbox|/usr/bin/awesome|/usr/bin/icewm|/usr/bin/enlightenment|/usr/bin/gala|/usr/bin/wingpanel|/usr/bin/i3)$ 384 | 385 | Protect display managers. 386 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/sbin/gdm|/usr/sbin/gdm3|/usr/sbin/sddm|/usr/bin/sddm|/usr/lib/x86_64-linux-gnu/sddm/sddm-helper|/usr/bin/slim|/usr/sbin/lightdm|/usr/libexec/gdm-session-worker|/usr/libexec/gdm-wayland-session|/usr/lib/gdm3/gdm-wayland-session|/usr/lib/gdm3/gdm-session-worker)$ 387 | @BADNESS_ADJ_RE_REALPATH -200 /// ^/usr/lib/gdm3/ 388 | 389 | Protect systemd-logind. 390 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/lib/systemd/systemd-logind|/usr/lib/systemd/systemd-logind)$ 391 | 392 | Protect `systemd --user`. 393 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/lib/systemd/systemd|/usr/lib/systemd/systemd)$ 394 | 395 | Protect dbus. 396 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/dbus-daemon|/usr/bin/dbus-run-session|/usr/bin/dbus-broker-launcher|/usr/bin/dbus-broker)$ 397 | 398 | Protect package managers and distro installers. 399 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/calamares|/usr/bin/dpkg|/usr/bin/pacman|/usr/bin/yay|/usr/bin/pamac|/usr/bin/pamac-daemon|/usr/bin/pamac-manager)$ 400 | 401 | Prefer stress. 402 | @BADNESS_ADJ_RE_REALPATH 900 /// ^(/usr/bin/stress|/usr/bin/stress-ng)$ 403 | 404 | 405 | 7.2.6. Matching /proc/[pid]/cwd realpath with RE patterns 406 | 407 | @BADNESS_ADJ_RE_CWD 200 /// ^/home/ 408 | 409 | 7.2.7. Matching cmdlines with RE patterns 410 | WARNING: using this option can greatly slow down the search for a victim 411 | in conditions of heavily swapping. 412 | 413 | Prefer Chromium tabs and Electron-based apps 414 | @BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer 415 | 416 | Prefer Firefox tabs (Web Content and WebExtensions) 417 | @BADNESS_ADJ_RE_CMDLINE 100 /// -appomni 418 | 419 | @BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox 420 | 421 | 7.2.8. Matching environ with RE patterns 422 | WARNING: using this option can greatly slow down the search for a victim 423 | in conditions of heavily swapping. 424 | 425 | @BADNESS_ADJ_RE_ENVIRON 100 /// USER=user 426 | 427 | 428 | Note that you can control badness also via systemd units via 429 | OOMScoreAdjust, see 430 | www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust= 431 | 432 | ############################################################################### 433 | 434 | 8. Customize soft corrective actions 435 | 436 | Run the command instead of sending a signal with at soft corrective action 437 | if the victim's name or cgroup matches the regular expression. 438 | 439 | Syntax: 440 | KEY REGEXP SEPARATOR COMMAND 441 | 442 | @SOFT_ACTION_RE_NAME ^foo$ /// kill -USR1 $PID 443 | @SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE 444 | @SOFT_ACTION_RE_CGROUP_V2 /foo\.service$ /// systemctl restart $SERVICE 445 | 446 | $PID will be replaced by process PID. 447 | $NAME will be replaced by process name. 448 | $SERVICE will be replaced by .service if it exists (overwise it will be 449 | relpaced by empty line) 450 | 451 | ############################################################################### 452 | 453 | 9. Misc settings 454 | 455 | Key: max_soft_exit_time 456 | Description: 457 | Type: float 458 | Valid values: >= 0.1 459 | Default value: 10 460 | 461 | max_soft_exit_time = 10 462 | 463 | Key: post_kill_exe 464 | Description: 465 | Type: string 466 | Valid values: any string 467 | Default value: (empty string) 468 | 469 | post_kill_exe = 470 | 471 | Key: min_badness 472 | Description: 473 | Type: integer 474 | Valid values: >= 1 475 | Default value: 1 476 | 477 | min_badness = 1 478 | 479 | Key: post_soft_action_delay 480 | Description: 481 | Type: float 482 | Valid values: >= 0.1 483 | Default value: 3 484 | 485 | post_soft_action_delay = 3 486 | 487 | Key: post_zombie_delay 488 | Description: 489 | Type: float 490 | Valid values: >= 0 491 | Default value: 0.1 492 | 493 | post_zombie_delay = 0.1 494 | 495 | Key: victim_cache_time 496 | Description: 497 | Type: float 498 | Valid values: >= 0 499 | Default value: 10 500 | 501 | victim_cache_time = 10 502 | 503 | Key: exe_timeout 504 | Description: 505 | Type: float 506 | Valid values: >= 0.1 507 | Default value: 20 508 | 509 | exe_timeout = 20 510 | 511 | ############################################################################### 512 | 513 | 10. Verbosity, debug, logging 514 | 515 | Key: print_config_at_startup 516 | Description: 517 | Type: boolean 518 | Valid values: True | False 519 | Default value: False 520 | 521 | print_config_at_startup = False 522 | 523 | Key: print_mem_check_results 524 | Description: 525 | Type: boolean 526 | Valid values: True | False 527 | Default value: False 528 | 529 | print_mem_check_results = False 530 | 531 | Key: min_mem_report_interval 532 | Description: 533 | Type: float 534 | Valid values: >= 0 535 | Default value: 60 536 | 537 | min_mem_report_interval = 60 538 | 539 | Key: print_proc_table 540 | Description: 541 | Type: boolean 542 | Valid values: True | False 543 | Default value: False 544 | 545 | print_proc_table = False 546 | 547 | Key: extra_table_info 548 | Description: 549 | WARNING: using "cmdline" or "environ" keys can greatly slow down 550 | the search for a victim in conditions of heavy swapping. 551 | Type: string 552 | Valid values: None, cgroup_v1, cgroup_v2, realpath, 553 | cwd, cmdline, environ 554 | Default value: None 555 | 556 | extra_table_info = None 557 | 558 | Key: print_victim_status 559 | Description: 560 | Type: boolean 561 | Valid values: True | False 562 | Default value: True 563 | 564 | print_victim_status = True 565 | 566 | Key: print_victim_cmdline 567 | Description: 568 | Type: boolean 569 | Valid values: True | False 570 | Default value: False 571 | 572 | print_victim_cmdline = False 573 | 574 | Key: max_victim_ancestry_depth 575 | Description: 576 | Type: integer 577 | Valid values: >= 1 578 | Default value: 3 579 | 580 | max_victim_ancestry_depth = 3 581 | 582 | Key: print_statistics 583 | Description: 584 | Type: boolean 585 | Valid values: True | False 586 | Default value: True 587 | 588 | print_statistics = True 589 | 590 | Key: debug_psi 591 | Description: 592 | Type: boolean 593 | Valid values: True | False 594 | Default value: False 595 | 596 | debug_psi = False 597 | 598 | Key: debug_gui_notifications 599 | Description: 600 | Type: boolean 601 | Valid values: True | False 602 | Default value: False 603 | 604 | debug_gui_notifications = False 605 | 606 | Key: debug_sleep 607 | Description: 608 | Type: boolean 609 | Valid values: True | False 610 | Default value: False 611 | 612 | debug_sleep = False 613 | 614 | Key: debug_threading 615 | Description: 616 | Type: boolean 617 | Valid values: True | False 618 | Default value: False 619 | 620 | debug_threading = False 621 | 622 | Key: separate_log 623 | Description: 624 | Type: boolean 625 | Valid values: True | False 626 | Default value: False 627 | 628 | separate_log = False 629 | 630 | ############################################################################### 631 | 632 | Use cases, feature requests and any questions are welcome: 633 | https://github.com/hakavlad/nohang/issues 634 | -------------------------------------------------------------------------------- /conf/nohang/nohang.conf.in: -------------------------------------------------------------------------------- 1 | ## This is the configuration file of the nohang daemon. 2 | 3 | ## The configuration includes the following sections: 4 | ## 0. Check kernel messages for OOM events 5 | ## 1. Common zram settings 6 | ## 2. Common PSI settings 7 | ## 3. Poll rate 8 | ## 4. Warnings and notifications 9 | ## 5. Soft (SIGTERM) threshold 10 | ## 6. Hard (SIGKILL) threshold 11 | ## 7. Customize victim selection: adjusting badness of processes 12 | ## 8. Customize soft corrective actions 13 | ## 9. Misc settings 14 | ## 10. Verbosity, debug, logging 15 | 16 | ## WARNING! 17 | ## - Lines starting with #, tabs and whitespace characters are comments. 18 | ## - Lines starting with @ contain optional parameters that may be repeated. 19 | ## - All values are case sensitive. 20 | ## - nohang doesn't forbid you to shoot yourself in the foot. Be careful! 21 | ## - Restart the daemon after editing the file to apply the new settings. 22 | ## - You can find the file with default values here: :TARGET_DATADIR:/nohang/nohang.conf 23 | 24 | ## To find config keys descriptions see man(8) nohang 25 | 26 | ############################################################################### 27 | 28 | ## 0. Check kernel messages for OOM events 29 | 30 | # @check_kmsg 31 | ## Type: boolean 32 | ## Comment/uncomment to disable/enable checking kmsg for OOM events 33 | 34 | # @debug_kmsg 35 | ## Type: boolean 36 | ## Comment/uncomment to disable/enable debug checking kmsg 37 | 38 | ############################################################################### 39 | 40 | ## 1. Common zram settings 41 | 42 | zram_checking_enabled = False 43 | ## Type: boolean, valid values: True | False 44 | ## Default value: False 45 | 46 | ############################################################################### 47 | 48 | ## 2. Common PSI settings 49 | 50 | psi_checking_enabled = False 51 | ## Type: boolean, valid values: True | False 52 | ## Default value: False 53 | 54 | psi_path = /proc/pressure/memory 55 | ## Type: string; valid values: any string 56 | ## Default value: /proc/pressure/memory 57 | 58 | psi_metrics = full_avg10 59 | ## Type: string; valid values: some_avg10, some_avg60, some_avg300, 60 | ## full_avg10, full_avg60, full_avg300 61 | ## Default value: full_avg10 62 | 63 | psi_excess_duration = 30 64 | ## Type: float; valid values: >= 0 65 | ## Default value: 30 66 | 67 | psi_post_action_delay = 15 68 | ## Type: float; valid values: >= 10 69 | ## Default value: 15 70 | 71 | ############################################################################### 72 | 73 | ## 3. Poll rate 74 | 75 | fill_rate_mem = 6000 76 | ## Type: float; valid values: >= 100 77 | ## Default value: 6000 78 | 79 | fill_rate_swap = 2000 80 | ## Type: float; valid values: >= 100 81 | ## Default value: 2000 82 | 83 | fill_rate_zram = 4000 84 | ## Type: float; valid values: >= 100 85 | ## Default value: 4000 86 | 87 | max_sleep = 3 88 | ## Type: float; valid values: >= 0.01 and >= min_sleep 89 | ## Default value: 3 90 | 91 | min_sleep = 0.1 92 | ## Type: float; valid values: >= 0.01 and <= max_sleep 93 | ## Default value: 0.1 94 | 95 | ############################################################################### 96 | 97 | ## 4. Warnings and notifications 98 | 99 | ## 4.1. GUI notifications after corrective actions 100 | 101 | post_action_gui_notifications = False 102 | ## Type: boolean; valid values: True | False 103 | ## Default value: False 104 | 105 | hide_corrective_action_type = False 106 | ## Type: boolean; valid values: True | False 107 | ## Default value: False 108 | 109 | 110 | ## 4.2. Low memory warnings 111 | 112 | low_memory_warnings_enabled = False 113 | ## Type: boolean; valid values: True | False 114 | ## Default value: False 115 | 116 | warning_exe = 117 | ## Type: string; valid values: any string 118 | ## Default value: (empty string) 119 | 120 | 121 | warning_threshold_min_mem = 20 % 122 | ## Type: float (with % or M); valid values: from the range [0; 100] % 123 | ## Default value: 20 % 124 | 125 | warning_threshold_min_swap = 25 % 126 | ## Type: float (with % or M); valid values: [0; 100] % or >= 0 M 127 | ## Default value: 20 % 128 | 129 | warning_threshold_max_zram = 45 % 130 | ## Type: float (with % or M); valid values: from the range [0; 100] % 131 | ## Default value: 45 % 132 | 133 | warning_threshold_max_psi = 10 134 | ## Type: float; valid values: from the range [0; 100] 135 | ## Default value: 10 136 | 137 | 138 | min_post_warning_delay = 60 139 | ## Type: float; valid values: >= 1 140 | ## Default value: 60 141 | 142 | env_cache_time = 300 143 | ## Type: float; valid values: >= 0 144 | ## Default value: 300 145 | 146 | ############################################################################### 147 | 148 | ## 5. Soft threshold (thresholds for sending the SIGTERM signal or 149 | ## implementing other soft corrective action) 150 | 151 | soft_threshold_min_mem = 5 % 152 | ## Type: float (with % or M); valid values: from the range [0; 50] % 153 | ## Default value: 5 % 154 | 155 | soft_threshold_min_swap = 10 % 156 | ## Type: float (with % or M); valid values: [0; 100] % or >= 0 M 157 | ## Default value: 10 % 158 | 159 | soft_threshold_max_zram = 55 % 160 | ## Type: float (with % or M); valid values: from the range [10; 90] % 161 | ## Default value: 55 % 162 | 163 | soft_threshold_max_psi = 40 164 | ## Type: float; valid values: from the range [5; 100] 165 | ## Default value: 40 166 | 167 | ############################################################################### 168 | 169 | ## 6. Hard threshold (thresholds for sending the SIGKILL signal) 170 | 171 | hard_threshold_min_mem = 2 % 172 | ## Type: float (with % or M); valid values: from the range [0; 50] % 173 | ## Default value: 2 % 174 | 175 | hard_threshold_min_swap = 4 % 176 | ## Type: float (with % or M); valid values: [0; 100] % or >= 0 M 177 | ## Default value: 4 % 178 | 179 | hard_threshold_max_zram = 60 % 180 | ## Type: float (with % or M); valid values: from the range [10; 90] % 181 | ## Default value: 60 % 182 | 183 | hard_threshold_max_psi = 90 184 | ## Type: float; valid values: from the range [5; 100] 185 | ## Default value: 90 186 | 187 | ############################################################################### 188 | 189 | ## 7. Customize victim selection: adjusting badness of processes 190 | 191 | ## 7.1. Ignore positive oom_score_adj 192 | 193 | ignore_positive_oom_score_adj = False 194 | ## Type: boolean; valid values: True | False 195 | ## Default value: False 196 | 197 | 198 | ## 7.2. Matching process properties with regular expressions to change their 199 | ## badness. 200 | 201 | ## Syntax: 202 | ## @BADNESS_ADJ_RE_PROPERTY badness_adj /// RE_pattern 203 | ## New badness value will be added to process's badness_adj 204 | 205 | ## It is possible to compare multiple patterns 206 | ## with different badness_adj values. 207 | 208 | 209 | ## 7.2.1. Matching process names with RE patterns to change their badness 210 | ## Example: 211 | # @BADNESS_ADJ_RE_NAME 200 /// ^Web Content$ 212 | 213 | ## 7.2.2. Matching CGroup_v1-line with RE patterns 214 | # @BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$ 215 | # @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/ 216 | 217 | ## 7.2.3. Matching CGroup_v2-line with RE patterns 218 | # @BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload 219 | 220 | ## 7.2.4. Matching eUIDs with RE patterns 221 | # @BADNESS_ADJ_RE_UID -100 /// ^0$ 222 | 223 | ## 7.2.5. Matching /proc/[pid]/exe realpath with RE patterns 224 | ## Example: 225 | # @BADNESS_ADJ_RE_REALPATH 900 /// ^(/usr/bin/stress|/usr/bin/stress-ng)$ 226 | 227 | ## 7.2.6. Matching /proc/[pid]/cwd realpath with RE patterns 228 | # @BADNESS_ADJ_RE_CWD 200 /// ^/home/ 229 | 230 | ## 7.2.7. Matching cmdlines with RE patterns 231 | ## WARNING: using this option can greatly slow down the search for a victim 232 | ## in conditions of intense swapping. 233 | 234 | ## Prefer Chromium tabs and Electron-based apps 235 | # @BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer 236 | 237 | ## Prefer Firefox tabs (Web Content and WebExtensions) 238 | # @BADNESS_ADJ_RE_CMDLINE 100 /// -appomni 239 | 240 | ## Avoid Virtualbox processes 241 | # @BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox 242 | 243 | ## 7.2.8. Matching environ with RE patterns 244 | ## WARNING: using this option can greatly slow down the search for a victim 245 | ## in conditions of heavy swapping. 246 | # @BADNESS_ADJ_RE_ENVIRON 100 /// USER=user 247 | 248 | 249 | # Note that you can control badness also via systemd units via 250 | # OOMScoreAdjust, see 251 | # www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust= 252 | 253 | ############################################################################### 254 | 255 | ## 8. Customize soft corrective actions 256 | 257 | ## Run the command instead of sending a signal with at soft corrective action 258 | ## if the victim's name or cgroup matches the regular expression. 259 | 260 | ## Syntax: 261 | ## KEY REGEXP SEPARATOR COMMAND 262 | 263 | # @SOFT_ACTION_RE_NAME ^foo$ /// kill -USR1 $PID 264 | # @SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE 265 | # @SOFT_ACTION_RE_CGROUP_V2 /foo\.service$ /// systemctl restart $SERVICE 266 | 267 | ## $PID will be replaced by process PID. 268 | ## $NAME will be replaced by process name. 269 | ## $SERVICE will be replaced by .service if it exists (overwise it will be 270 | ## relpaced by empty line) 271 | 272 | ############################################################################### 273 | 274 | ## 9. Misc settings 275 | 276 | max_soft_exit_time = 10 277 | ## Type: float; valid values: >= 0.1 278 | ## Default value: 10 279 | 280 | post_kill_exe = 281 | ## Type: string; valid values: any string 282 | ## Default value: (empty string) 283 | 284 | min_badness = 1 285 | ## Type: integer; valid values: >= 1 286 | ## Default value: 1 287 | ## nohang will do nothing if the badness of all processes is below min_badness 288 | ## (actualy it will spam to stdout/log) 289 | 290 | post_soft_action_delay = 3 291 | ## Type: float; valid values: >= 0.1 292 | ## Default value: 3 293 | 294 | post_zombie_delay = 0.1 295 | ## Type: float; valid values: >= 0 296 | ## Default value: 0.1 297 | 298 | victim_cache_time = 10 299 | ## Type: float; valid values: >= 0 300 | ## Default value: 10 301 | 302 | exe_timeout = 20 303 | ## Type: float; valid values: >= 0.1 304 | ## Default value: 20 305 | 306 | ############################################################################### 307 | 308 | ## 10. Verbosity, debug, logging 309 | 310 | print_config_at_startup = False 311 | ## Type: boolean; valid values: True | False 312 | ## Default value: False 313 | 314 | print_mem_check_results = False 315 | ## Type: boolean; valid values: True | False 316 | ## Default value: False 317 | 318 | min_mem_report_interval = 60 319 | ## Type: float; valid values: >= 0 320 | ## Default value: 60 321 | 322 | print_proc_table = False 323 | ## Type: boolean; valid values: True | False 324 | ## Default value: False 325 | 326 | extra_table_info = None 327 | ## Type: string; valid values: None, cgroup_v1, cgroup_v2, realpath, cwd, 328 | ## cmdline, environ 329 | ## Default value: None 330 | ## WARNING: using "cmdline" or "environ" keys can greatly slow down the search 331 | ## for a victim in conditions of heavy swapping. 332 | 333 | print_victim_status = True 334 | ## Type: boolean; valid values: True | False 335 | ## Default value: True 336 | 337 | print_victim_cmdline = False 338 | ## Type: boolean; valid values: True | False 339 | ## Default value: False 340 | 341 | max_victim_ancestry_depth = 3 342 | ## Type: integer; valid values: >= 1 343 | ## Default value: 3 344 | 345 | print_statistics = True 346 | 347 | ## Type: boolean; valid values: True | False 348 | ## Default value: True 349 | 350 | debug_psi = False 351 | ## Type: boolean; valid values: True | False 352 | ## Default value: False 353 | 354 | debug_gui_notifications = False 355 | ## Type: boolean; valid values: True | False 356 | ## Default value: False 357 | 358 | debug_sleep = False 359 | ## Type: boolean; valid values: True | False 360 | ## Default value: False 361 | 362 | debug_threading = False 363 | ## Type: boolean; valid values: True | False 364 | ## Default value: False 365 | 366 | separate_log = False 367 | ## Type: boolean; valid values: True | False 368 | ## Default value: False 369 | 370 | ############################################################################### 371 | 372 | ## Use cases, feature requests and any questions are welcome: 373 | ## https://github.com/hakavlad/nohang/issues 374 | ## 375 | -------------------------------------------------------------------------------- /conf/nohang/test.conf: -------------------------------------------------------------------------------- 1 | ## This is the configuration file of the nohang daemon. 2 | 3 | ## The configuration includes the following sections: 4 | ## 0. Check kernel messages for OOM events 5 | ## 1. Common zram settings 6 | ## 2. Common PSI settings 7 | ## 3. Poll rate 8 | ## 4. Warnings and notifications 9 | ## 5. Soft (SIGTERM) threshold 10 | ## 6. Hard (SIGKILL) threshold 11 | ## 7. Customize victim selection: adjusting badness of processes 12 | ## 8. Customize soft corrective actions 13 | ## 9. Misc settings 14 | ## 10. Verbosity, debug, logging 15 | 16 | ## WARNING! 17 | ## - Lines starting with #, tabs and whitespace characters are comments. 18 | ## - Lines starting with @ contain optional parameters that may be repeated. 19 | ## - All values are case sensitive. 20 | ## - nohang doesn't forbid you to shoot yourself in the foot. Be careful! 21 | ## - Restart the daemon after editing the file to apply the new settings. 22 | ## - You can find the file with default values here: :TARGET_DATADIR:/nohang/nohang.conf 23 | 24 | ## To find config keys descriptions see man(8) nohang 25 | 26 | ############################################################################### 27 | 28 | ## 0. Check kernel messages for OOM events 29 | 30 | # @check_kmsg 31 | ## Type: boolean 32 | ## Comment/uncomment to disable/enable checking kmsg for OOM events 33 | 34 | # @debug_kmsg 35 | ## Type: boolean 36 | ## Comment/uncomment to disable/enable debug checking kmsg 37 | 38 | ############################################################################### 39 | 40 | 1. Common zram settings 41 | 42 | Key: zram_checking_enabled 43 | Description: 44 | Type: boolean 45 | Valid values: True and False 46 | Default value: False 47 | 48 | zram_checking_enabled = True 49 | 50 | ############################################################################### 51 | 52 | 2. Common PSI settings 53 | 54 | Description: 55 | Type: boolean 56 | Valid values: True and False 57 | 58 | psi_checking_enabled = True 59 | 60 | Description: 61 | Type: string 62 | Valid values: 63 | 64 | psi_path = /proc/pressure/memory 65 | 66 | Description: 67 | Type: string 68 | Valid values: 69 | 70 | psi_metrics = full_avg10 71 | 72 | Description: 73 | Type: float 74 | Valid values: 75 | 76 | psi_excess_duration = 60 77 | 78 | Description: 79 | Type: float 80 | Valid values: 81 | 82 | psi_post_action_delay = 60 83 | 84 | ############################################################################### 85 | 86 | 3. Poll rate 87 | 88 | Description: 89 | Type: float 90 | Valid values: 91 | 92 | fill_rate_mem = 4000 93 | 94 | Description: 95 | Type: float 96 | Valid values: 97 | 98 | fill_rate_swap = 1500 99 | 100 | Description: 101 | Type: float 102 | Valid values: 103 | 104 | fill_rate_zram = 6000 105 | 106 | Description: 107 | Type: float 108 | Valid values: 109 | 110 | max_sleep = 3 111 | 112 | Description: 113 | Type: float 114 | Valid values: 115 | 116 | min_sleep = 0.1 117 | 118 | ############################################################################### 119 | 120 | 4. Warnings and notifications 121 | 122 | 4.1. GUI notifications after corrective actions 123 | 124 | Description: 125 | Type: boolean 126 | Valid values: True and False 127 | 128 | post_action_gui_notifications = True 129 | 130 | Description: 131 | Type: boolean 132 | Valid values: True and False 133 | 134 | hide_corrective_action_type = False 135 | 136 | 4.2. Low memory warnings 137 | 138 | Description: 139 | Type: boolean 140 | Valid values: True and False 141 | 142 | low_memory_warnings_enabled = True 143 | 144 | Description: 145 | Type: string 146 | Valid values: 147 | 148 | warning_exe = 149 | 150 | Description: 151 | Type: float (+ % or M) 152 | Valid values: 153 | 154 | warning_threshold_min_mem = 20 % 155 | 156 | Description: 157 | Type: float (+ % or M) 158 | Valid values: 159 | 160 | warning_threshold_min_swap = 20 % 161 | 162 | Description: 163 | Type: float (+ % or M) 164 | Valid values: 165 | 166 | warning_threshold_max_zram = 50 % 167 | 168 | Description: 169 | Type: float 170 | Valid values: 171 | 172 | warning_threshold_max_psi = 100 173 | 174 | Description: 175 | Type: float 176 | Valid values: 177 | 178 | min_post_warning_delay = 30 179 | 180 | Description: 181 | Type: float 182 | Valid values: 183 | 184 | env_cache_time = 300 185 | 186 | ############################################################################### 187 | 188 | 5. Soft threshold 189 | 190 | Description: 191 | Type: float (+ % or M) 192 | Valid values: 193 | 194 | soft_threshold_min_mem = 20 % 195 | 196 | Description: 197 | Type: float (+ % or M) 198 | Valid values: 199 | 200 | soft_threshold_min_swap = 20 % 201 | 202 | Description: 203 | Type: float (+ % or M) 204 | Valid values: 205 | 206 | soft_threshold_max_zram = 60 % 207 | 208 | Description: 209 | Type: float 210 | Valid values: 211 | 212 | soft_threshold_max_psi = 60 213 | 214 | ############################################################################### 215 | 216 | 6. Hard threshold 217 | 218 | hard_threshold_min_mem = 2 % 219 | 220 | Description: 221 | Type: float (+ % or M) 222 | Valid values: 223 | 224 | hard_threshold_min_swap = 2 % 225 | 226 | Description: 227 | Type: float (+ % or M) 228 | Valid values: 229 | 230 | hard_threshold_max_zram = 65 % 231 | 232 | Description: 233 | Type: float 234 | Valid values: 235 | 236 | hard_threshold_max_psi = 90 237 | 238 | ############################################################################### 239 | 240 | 7. Customize victim selection: adjusting badness of processes 241 | 242 | 7.1. Ignore positive oom_score_adj 243 | 244 | Description: 245 | Type: boolean 246 | Valid values: True and False 247 | 248 | ignore_positive_oom_score_adj = True 249 | 250 | 7.3.1. Matching process names with RE patterns change their badness 251 | 252 | Syntax: 253 | 254 | @BADNESS_ADJ_RE_NAME badness_adj /// RE_pattern 255 | 256 | New badness value will be += badness_adj 257 | 258 | It is possible to compare multiple patterns 259 | with different badness_adj values. 260 | 261 | Example: 262 | @BADNESS_ADJ_RE_NAME -500 /// ^sshd$ 263 | 264 | 7.3.2. Matching CGroup_v1-line with RE patterns 265 | 266 | @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/system\.slice/ 267 | 268 | @BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$ 269 | 270 | @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/ 271 | 272 | 7.3.3. Matching CGroup_v2-line with RE patterns 273 | 274 | @BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload 275 | 276 | 7.3.4. Matching eUIDs with RE patterns 277 | 278 | @BADNESS_ADJ_RE_UID -100 /// ^0$ 279 | 280 | 7.3.5. Matching realpath with RE patterns 281 | 282 | @BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo 283 | 284 | 7.3.5.1. Matching cwd with RE patterns 285 | 286 | @BADNESS_ADJ_RE_CWD 20 /// ^/home/ 287 | 288 | 7.3.6. Matching cmdlines with RE patterns 289 | 290 | @BADNESS_ADJ_RE_CMDLINE 2000 /// ^/bin/sleep 291 | 292 | 293 | Prefer chromium tabs and electron-based apps 294 | @BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer 295 | 296 | Prefer firefox tabs (Web Content and WebExtensions) 297 | @BADNESS_ADJ_RE_CMDLINE 100 /// -appomni 298 | 299 | @BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox 300 | 301 | 7.3.7. Matching environ with RE patterns 302 | 303 | @BADNESS_ADJ_RE_ENVIRON 100 /// USER=user 304 | 305 | 306 | Note that you can control badness also via systemd units via 307 | OOMScoreAdjust, see 308 | www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust= 309 | 310 | ############################################################################### 311 | 312 | 8. Customize soft corrective actions 313 | 314 | TODO: docs 315 | 316 | Syntax: 317 | KEY REGEXP SEPARATOR COMMAND 318 | 319 | 320 | @SOFT_ACTION_RE_NAME ^tail$ /// kill -SEGV $PID 321 | 322 | 323 | @SOFT_ACTION_RE_NAME ^foo$ /// kill -SEGV $PID 324 | @SOFT_ACTION_RE_NAME ^bash$ /// kill -9 $PID 325 | 326 | @SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE 327 | @SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE 328 | 329 | @SOFT_ACTION_RE_NAME ^tail$ /// kill -TERM $PID 330 | 331 | $PID will be replaced by process PID. 332 | $NAME will be replaced by process name. 333 | $SERVICE will be replaced by .service if it exists (overwise it will be 334 | relpaced by empty line) 335 | 336 | ############################################################################### 337 | 338 | 9. Misc settings 339 | 340 | Description: 341 | Type: float 342 | Valid values: 343 | 344 | max_soft_exit_time = 10 345 | 346 | Description: 347 | Type: string 348 | Valid values: 349 | 350 | post_kill_exe = 351 | 352 | Description: 353 | Type: integer 354 | Valid values: 355 | 356 | min_badness = 10 357 | 358 | Description: 359 | Type: float 360 | Valid values: 361 | 362 | post_soft_action_delay = 3 363 | 364 | Description: 365 | Type: float 366 | Valid values: 367 | 368 | post_zombie_delay = 0.1 369 | 370 | Description: 371 | Type: float 372 | Valid values: 373 | 374 | victim_cache_time = 10 375 | 376 | Description: 377 | Type: float 378 | Valid values: 379 | 380 | exe_timeout = 20 381 | 382 | ############################################################################### 383 | 384 | 10. Verbosity, debug, logging 385 | 386 | Description: 387 | Type: boolean 388 | Valid values: True and False 389 | 390 | print_config_at_startup = True 391 | 392 | Description: 393 | Type: boolean 394 | Valid values: True and False 395 | 396 | print_mem_check_results = True 397 | 398 | Description: 399 | Type: float 400 | Valid values: 401 | 402 | min_mem_report_interval = 0 403 | 404 | Description: 405 | Type: boolean 406 | Valid values: True and False 407 | 408 | print_proc_table = True 409 | 410 | Description: 411 | Type: string 412 | Valid values: 413 | None 414 | cgroup_v1 415 | cgroup_v2 416 | realpath 417 | cwd 418 | cmdline 419 | environ 420 | 421 | extra_table_info = None 422 | 423 | Description: 424 | Type: boolean 425 | Valid values: True and False 426 | 427 | print_victim_status = True 428 | 429 | Description: 430 | Type: boolean 431 | Valid values: True and False 432 | 433 | print_victim_cmdline = True 434 | 435 | Description: 436 | Type: integer 437 | Valid values: 438 | 439 | max_victim_ancestry_depth = 99 440 | 441 | Description: 442 | Type: boolean 443 | Valid values: True and False 444 | 445 | print_statistics = True 446 | 447 | Description: 448 | Type: boolean 449 | Valid values: True and False 450 | 451 | debug_psi = True 452 | 453 | Description: 454 | Type: boolean 455 | Valid values: True and False 456 | 457 | debug_gui_notifications = True 458 | 459 | Description: 460 | Type: boolean 461 | Valid values: True and False 462 | 463 | debug_sleep = True 464 | 465 | Description: 466 | Type: boolean 467 | Valid values: True and False 468 | 469 | debug_threading = True 470 | 471 | Description: 472 | Type: boolean 473 | Valid values: True and False 474 | 475 | separate_log = True 476 | 477 | ############################################################################### 478 | 479 | Use cases, feature requests and any questions are welcome: 480 | https://github.com/hakavlad/nohang/issues 481 | -------------------------------------------------------------------------------- /deb/DEBIAN/conffiles: -------------------------------------------------------------------------------- 1 | /etc/nohang/nohang.conf 2 | /etc/nohang/nohang-desktop.conf 3 | /etc/logrotate.d/nohang 4 | -------------------------------------------------------------------------------- /deb/DEBIAN/control: -------------------------------------------------------------------------------- 1 | Package: nohang 2 | Version: 0.2.0 3 | Section: admin 4 | Architecture: all 5 | Depends: python3 6 | Suggests: libnotify-bin, sudo, logrotate 7 | Maintainer: Alexey Avramov 8 | Priority: optional 9 | Homepage: https://github.com/hakavlad/nohang 10 | Description: Sophisticated low memory handler 11 | nohang is a highly configurable daemon for Linux which is able to correctly 12 | prevent out of memory (OOM) and keep system responsiveness in low memory 13 | conditions. The package also includes additional diagnostic tools: oom-sort, 14 | psi2log, psi-top. 15 | -------------------------------------------------------------------------------- /deb/DEBIAN/postinst: -------------------------------------------------------------------------------- 1 | systemctl daemon-reload 2 | -------------------------------------------------------------------------------- /deb/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -v 2 | make \ 3 | DESTDIR=deb/package \ 4 | PREFIX=/usr \ 5 | SYSCONFDIR=/etc \ 6 | SYSTEMDUNITDIR=/lib/systemd/system \ 7 | build_deb 8 | cd deb 9 | cp -r DEBIAN package/ 10 | fakeroot dpkg-deb --build package 11 | -------------------------------------------------------------------------------- /docs/FAQ.ru.md: -------------------------------------------------------------------------------- 1 | 2 | # FAQ для русскоязычных 3 | 4 | ### Каковы основные особенности демона? 5 | 6 | - Явная и гибкая конфигурация через конфигурационный файл. Все, что может быть настраиваемо, по возможности вынесено в конфиг. Таким образом, запуск демона без конфига невозможен. Также пользователь может видеть все значения ключей конфига. Минимум скрытых параметров. 7 | - Возможность поэтапного реагирования на нехватку памяти. Можно настроить три порога реакции: 8 | 1. Для отправки GUI уведомдений о нехватке памяти (либо выполнение произвольной команды, например отправки e-mail) 9 | 2. Порог отправки сигнала SIGTERM (в большинстве случаев коррекция происходи здесь). Это главное корректирующее действие, после которого большинство процессов завершаются, по возможности корректно. 10 | 3. Если жертва не реагирует на SIGTERM, то получит сигнал SIGKILL при дальнейшем уменьшении объема доступной памяти, или по прошествии определенного времени (ключ конфига max_soft_exit_time). 11 | - Возможность реагирования на разные виды раздражителей: 12 | 1. При наличии пространства подкачки демон реагирует на объем доступного пространства подкачки (SwapFree) при условии, что порог доступной памяти также ниже заданного уровня. При отсутствии пространства подкачки демон реагирует на объем доступной памяти (MemAvailable). 13 | 2. При наличии пространства подкачки демон может реагировать на превышение метрик PSI, если это задано в конфиге. Корректирующее действие происходит если в течение заданного времени (psi_excess_duration) порог доступной памяти и порог заданной метрики PSI превышены одновременно, но не ранее чем через psi_post_action_delay после предыдущего корректирующего действия. 14 | 3. Возможность реакции на размер mem_used_total, если смонтированы устойства zram. 15 | - Возможность влияния на выбор жертвы при корректирующем действии путем сопоставления различных характеристик процесса (name, exe realpath, euid, cgroup etc) с заданными регулярными выражениями. Это похоже на маханизм, используемый в ядре, однако вместо oom_score_adj для отдельных PID можно задать badness_adj для всех процессов, подходящих под определенные критерии. 16 | - Возможность GUI уведомлений о совершенных корректирующих действиях. 17 | - Возможность кастомизации корректирующего действия. Эта возможность еще сырая. Включает в себя: 18 | 1. На мягком (SIGTERM) пороге для процессов с заданными свойствами, если они становятся жертвами, выполнять заданную команду. 19 | 2. На жестком (SIGKILL) пороге можно с помощью ключа post_kill_exe задать произвольную команду, которая будет выполняться после любого жесткого корректирующего действия. 20 | 21 | ### Почему не триггерим ядерный OOM killer? 22 | 23 | ### Что такое PSI и как это помогает в обработке нехватки памяти? 24 | 25 | ### Как проверить поддержку PSI ядром? 26 | 27 | ### Зачем нужен ключ zram_checking_enabled? 28 | 29 | ### Как демон предотвращает убийство невиновных? 30 | 31 | ### Не показываются GUI уведомления. В чем возможная причина? 32 | 33 | ### В KDE Plasma история GUI уведомлений не сохраняется. Как исправить? 34 | 35 | ### Как пользоваться oom-sort? 36 | 37 | ### Как пользоваться psi-top? 38 | 39 | ### Как пользоваться psi2log? 40 | 41 | ### nohang vs earlyoom 42 | 43 | ### nohang vs oomd 44 | 45 | ### Как запустить и протестировать nohang без установки? 46 | 47 | ### Что не так с ZFS? 48 | 49 | ### В каких ситуациях демон не поможет? 50 | 51 | ### Почему в настройках реакции на метрики PSI по умолчанию предлагается реагирование на some avg10, а не full avg10? 52 | 53 | ### Система зависает, демон не помогает. В чем проблема и что делать? 54 | 55 | ### Как протестировать работу демона? Как создать нагрузку на память? 56 | 57 | ### В каких случаях лучше не включать проверку PSI? 58 | 59 | ### nohang vs nohang-desktop: в чем разница? 60 | 61 | ### Как это вообще работает? 62 | 63 | ### Как получить список доступных для мониторинга файлов PSI? 64 | 65 | ### Поддерживается ли убийство групп процессов? 66 | 67 | Нет, но поддержка этого может быть добавлена в будущих релизах. 68 | 69 | ### Как смотреть логи? 70 | 71 | 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /docs/nohang.manpage.md: -------------------------------------------------------------------------------- 1 | % nohang(8) | Linux System Administrator's Manual 2 | 3 | # NAME 4 | nohang - A sophisticated low memory handler 5 | 6 | # SYNOPSIS 7 | **nohang** [**OPTION**]... 8 | 9 | # DESCRIPTION 10 | nohang is a highly configurable daemon for Linux which is able to correctly prevent out of memory (OOM) and keep system responsiveness in low memory conditions. 11 | 12 | # REQUIREMENTS 13 | 14 | #### For basic usage: 15 | - Linux (>= 3.14, since MemAvailable appeared in /proc/meminfo) 16 | - Python (>= 3.3) 17 | 18 | #### To respond to PSI metrics (optional): 19 | - Linux (>= 4.20) with CONFIG_PSI=y 20 | 21 | #### To show GUI notifications (optional): 22 | - notification server (most of desktop environments use their own implementations) 23 | - libnotify (Arch Linux, Fedora, openSUSE) or libnotify-bin (Debian GNU/Linux, Ubuntu) 24 | - sudo if nohang started with UID=0. 25 | 26 | # COMMAND-LINE OPTIONS 27 | 28 | #### -h, --help 29 | show this help message and exit 30 | 31 | #### -v, --version 32 | show version of installed package and exit 33 | 34 | #### -m, --memload 35 | consume memory until 40 MiB (MemAvailable + SwapFree) remain free, and terminate the process 36 | 37 | #### -c CONFIG, --config CONFIG 38 | path to the config file. This should only be used with one of the following options: 39 | --monitor, --tasks, --check 40 | 41 | #### --check 42 | check and show the configuration and exit. This should only be used with -c/--config CONFIG option 43 | 44 | #### --monitor 45 | start monitoring. This should only be used with -c/--config CONFIG option 46 | 47 | #### --tasks 48 | show tasks state and exit. This should only be used with -c/--config CONFIG option 49 | 50 | # FILES 51 | 52 | #### :SYSCONFDIR:/nohang/nohang.conf 53 | path to vanilla nohang configuration file 54 | 55 | #### :SYSCONFDIR:/nohang/nohang-desktop.conf 56 | path to configuration file with settings optimized for desktop usage 57 | 58 | #### :DATADIR:/nohang/nohang.conf 59 | path to file with *default* nohang.conf values 60 | 61 | #### :DATADIR:/nohang/nohang-desktop.conf 62 | path to file with *default* nohang-desktop.conf values 63 | 64 | #### /var/log/nohang/nohang.log 65 | optional log file that stores entries if separate_log=True in the config 66 | 67 | #### /etc/logrotate.d/nohang 68 | logrotate config file that controls rotation in /var/log/nohang/ 69 | 70 | # nohang.conf vs nohang-desktop.conf 71 | - nohang.conf provides vanilla default settings without PSI checking enabled, without any badness correction and without GUI notifications enabled. 72 | - nohang-desktop.conf provides default settings optimized for desktop usage. 73 | 74 | # PROBLEMS 75 | The next problems can occur with out-of-tree kernels and modules: 76 | 77 | - The ZFS ARC cache is memory-reclaimable, like the Linux buffer cache. However, in contrast to the buffer cache, it currently does not count to MemAvailable [1]. See also [2] and [3]. 78 | - Linux kernels without CONFIG_CGROUP_CPUACCT=y (linux-ck, for example) provide incorrect PSI metrics, see this thread [4]. 79 | 80 | # HOW TO CONFIGURE 81 | The program can be configured by editing the config file. The configuration includes the following sections: 82 | 83 | - Memory levels to respond to as an OOM threat 84 | - Response on PSI memory metrics 85 | - The frequency of checking the level of available memory (and CPU usage) 86 | - The prevention of killing innocent victims 87 | - Impact on the badness of processes via matching their names, cmdlines and UIDs with regular expressions 88 | - The execution of a specific command or sending any signal instead of sending the SIGTERM signal 89 | - GUI notifications: 90 | - notifications of corrective actions taken 91 | - low memory warnings 92 | - Verbosity 93 | - Misc 94 | 95 | Just read the description of the parameters and edit the values. Restart the daemon to apply the changes. 96 | 97 | # CHECK CONFIG 98 | Check the config for errors: 99 | 100 | $ nohang --check --config /path/to/config 101 | 102 | # HOW TO TEST 103 | The safest way is to run **nohang --memload**. This causes memory consumption, and the process will exits before OOM occurs. Another way is to run **tail /dev/zero**. This causes fast memory comsumption and causes OOM at the end. If testing occurs while nohang is running, these processes should be terminated before OOM occurs. 104 | 105 | # LOGGING 106 | To view the latest entries in the log (for systemd users): 107 | 108 | $ **sudo journalctl -eu nohang.service** 109 | 110 | or 111 | 112 | $ **sudo journalctl -eu nohang-desktop.service** 113 | 114 | You can also enable **separate_log** in the config to logging in **/var/log/nohang/nohang.log**. 115 | 116 | # SIGNALS 117 | Sending SIGTERM, SIGINT, SIGQUIT or SIGHUP signals to the nohang process causes it displays corrective action stats and exits. 118 | 119 | # REPORTING BUGS 120 | Please ask any questions and report bugs at . 121 | 122 | # AUTHOR 123 | Written by Alexey Avramov . 124 | 125 | # HOMEPAGE 126 | Homepage is . 127 | 128 | # SEE ALSO 129 | oom-sort(1), psi-top(1), psi2log(1) 130 | 131 | # NOTES 132 | 133 | 1. https://github.com/openzfs/zfs/issues/10255 134 | 2. https://github.com/rfjakob/earlyoom/pull/191#issuecomment-622314296 135 | 3. https://github.com/hakavlad/nohang/issues/89 136 | 4. https://github.com/hakavlad/nohang/issues/25#issuecomment-521390412 137 | -------------------------------------------------------------------------------- /docs/oom-sort.manpage.md: -------------------------------------------------------------------------------- 1 | % oom-sort(1) | General Commands Manual 2 | 3 | # NAME 4 | oom-sort - sort processes by oom_score 5 | 6 | # SYNOPSIS 7 | **oom-sort** [**OPTION**]... 8 | 9 | # DESCRIPTION 10 | oom-sort is script that sorts tasks by oom_score by default. oom-sort is part of nohang package. 11 | 12 | # OPTIONS 13 | 14 | #### -h, --help 15 | show this help message and exit 16 | 17 | #### --num NUM, -n NUM 18 | max number of lines; default: 99999 19 | 20 | #### --len LEN, -l LEN 21 | max cmdline length; default: 99999 22 | 23 | #### --sort SORT, -s SORT 24 | sort by unit; available units: oom_score, oom_score_adj, UID, PID, Name, VmRSS, VmSwap, cmdline (optional); default unit: oom_score 25 | 26 | # REPORTING BUGS 27 | Please ask any questions and report bugs at . 28 | 29 | # AUTHOR 30 | Written by Alexey Avramov . 31 | 32 | # HOMEPAGE 33 | Homepage is . 34 | 35 | # SEE ALSO 36 | psi-top(1), psi2log(1), nohang(8) 37 | -------------------------------------------------------------------------------- /docs/psi-top.manpage.md: -------------------------------------------------------------------------------- 1 | % psi-top(1) | General Commands Manual 2 | 3 | # NAME 4 | psi-top - print the PSI metrics values for every cgroup. 5 | 6 | # SYNOPSIS 7 | **psi-top** [**OPTION**]... 8 | 9 | # DESCRIPTION 10 | psi-top is script that prints the PSI metrics values for every cgroup. psi-top is part of nohang package. 11 | 12 | # OPTIONS 13 | 14 | #### -h, --help 15 | show this help message and exit 16 | 17 | #### -m METRICS, --metrics METRICS 18 | metrics (memory, io or cpu) 19 | 20 | # EXAMPLES 21 | $ psi-top 22 | 23 | $ psi-top --metrics io 24 | 25 | $ psi-top -m cpu 26 | 27 | # REPORTING BUGS 28 | Please ask any questions and report bugs at . 29 | 30 | # AUTHOR 31 | Written by Alexey Avramov . 32 | 33 | # HOMEPAGE 34 | Homepage is . 35 | 36 | # SEE ALSO 37 | oom-sort(1), psi2log(1), nohang(8) 38 | -------------------------------------------------------------------------------- /docs/psi2log.manpage.md: -------------------------------------------------------------------------------- 1 | % psi2log(1) | General Commands Manual 2 | 3 | # NAME 4 | psi2log \- PSI metrics monitor and logger 5 | 6 | # SYNOPSIS 7 | **psi2log** [**OPTION**]... 8 | 9 | # DESCRIPTION 10 | psi2log is a CLI tool that can check and log PSI metrics from specified target. psi2log is part of nohang package. 11 | 12 | # OPTIONS 13 | 14 | #### -h, --help 15 | show this help message and exit 16 | 17 | #### -t TARGET, --target TARGET 18 | target (cgroup_v2 or SYTSTEM_WIDE) 19 | 20 | #### -i INTERVAL, --interval INTERVAL 21 | interval in sec 22 | 23 | #### -l LOG, --log LOG 24 | path to log file 25 | 26 | #### -m MODE, --mode MODE 27 | mode (0, 1 or 2) 28 | 29 | #### -s SUPPRESS_OUTPUT, --suppress-output SUPPRESS_OUTPUT 30 | suppress output 31 | 32 | # EXAMPLES 33 | $ psi2log 34 | 35 | $ psi2log --mode 2 36 | 37 | $ psi2log --target /user.slice --interval 1.5 --log psi.log 38 | 39 | # SIGNALS 40 | Sending SIGTERM, SIGINT, SIGQUIT or SIGHUP signals to the psi2log process causes it displays peak values and exits.. 41 | 42 | # REPORTING BUGS 43 | Please ask any questions and report bugs at . 44 | 45 | # AUTHOR 46 | Written by Alexey Avramov . 47 | 48 | # HOMEPAGE 49 | Homepage is . 50 | 51 | # SEE ALSO 52 | oom-sort(1), psi-top(1), nohang(8) 53 | -------------------------------------------------------------------------------- /man/nohang.8: -------------------------------------------------------------------------------- 1 | .\" Automatically generated by Pandoc 1.17.2 2 | .\" 3 | .TH "nohang" "8" "" "" "Linux System Administrator\[aq]s Manual" 4 | .hy 5 | .SH NAME 6 | .PP 7 | nohang \- A sophisticated low memory handler 8 | .SH SYNOPSIS 9 | .PP 10 | \f[B]nohang\f[] [\f[B]OPTION\f[]]... 11 | .SH DESCRIPTION 12 | .PP 13 | nohang is a highly configurable daemon for Linux which is able to 14 | correctly prevent out of memory (OOM) and keep system responsiveness in 15 | low memory conditions. 16 | .SH REQUIREMENTS 17 | .SS For basic usage: 18 | .IP \[bu] 2 19 | Linux (>= 3.14, since MemAvailable appeared in /proc/meminfo) 20 | .IP \[bu] 2 21 | Python (>= 3.3) 22 | .SS To respond to PSI metrics (optional): 23 | .IP \[bu] 2 24 | Linux (>= 4.20) with CONFIG_PSI=y 25 | .SS To show GUI notifications (optional): 26 | .IP \[bu] 2 27 | notification server (most of desktop environments use their own 28 | implementations) 29 | .IP \[bu] 2 30 | libnotify (Arch Linux, Fedora, openSUSE) or libnotify\-bin (Debian 31 | GNU/Linux, Ubuntu) 32 | .IP \[bu] 2 33 | sudo if nohang started with UID=0. 34 | .SH COMMAND\-LINE OPTIONS 35 | .SS \-h, \-\-help 36 | .PP 37 | show this help message and exit 38 | .SS \-v, \-\-version 39 | .PP 40 | show version of installed package and exit 41 | .SS \-m, \-\-memload 42 | .PP 43 | consume memory until 40 MiB (MemAvailable + SwapFree) remain free, and 44 | terminate the process 45 | .SS \-c CONFIG, \-\-config CONFIG 46 | .PP 47 | path to the config file. 48 | This should only be used with one of the following options: \-\-monitor, 49 | \-\-tasks, \-\-check 50 | .SS \-\-check 51 | .PP 52 | check and show the configuration and exit. 53 | This should only be used with \-c/\-\-config CONFIG option 54 | .SS \-\-monitor 55 | .PP 56 | start monitoring. 57 | This should only be used with \-c/\-\-config CONFIG option 58 | .SS \-\-tasks 59 | .PP 60 | show tasks state and exit. 61 | This should only be used with \-c/\-\-config CONFIG option 62 | .SH FILES 63 | .SS :SYSCONFDIR:/nohang/nohang.conf 64 | .PP 65 | path to vanilla nohang configuration file 66 | .SS :SYSCONFDIR:/nohang/nohang\-desktop.conf 67 | .PP 68 | path to configuration file with settings optimized for desktop usage 69 | .SS :DATADIR:/nohang/nohang.conf 70 | .PP 71 | path to file with \f[I]default\f[] nohang.conf values 72 | .SS :DATADIR:/nohang/nohang\-desktop.conf 73 | .PP 74 | path to file with \f[I]default\f[] nohang\-desktop.conf values 75 | .SS /var/log/nohang/nohang.log 76 | .PP 77 | optional log file that stores entries if separate_log=True in the config 78 | .SS /etc/logrotate.d/nohang 79 | .PP 80 | logrotate config file that controls rotation in /var/log/nohang/ 81 | .SH nohang.conf vs nohang\-desktop.conf 82 | .IP \[bu] 2 83 | nohang.conf provides vanilla default settings without PSI checking 84 | enabled, without any badness correction and without GUI notifications 85 | enabled. 86 | .IP \[bu] 2 87 | nohang\-desktop.conf provides default settings optimized for desktop 88 | usage. 89 | .SH PROBLEMS 90 | .PP 91 | The next problems can occur with out\-of\-tree kernels and modules: 92 | .IP \[bu] 2 93 | The ZFS ARC cache is memory\-reclaimable, like the Linux buffer cache. 94 | However, in contrast to the buffer cache, it currently does not count to 95 | MemAvailable [1]. 96 | See also [2] and [3]. 97 | .IP \[bu] 2 98 | Linux kernels without CONFIG_CGROUP_CPUACCT=y (linux\-ck, for example) 99 | provide incorrect PSI metrics, see this thread [4]. 100 | .SH HOW TO CONFIGURE 101 | .PP 102 | The program can be configured by editing the config file. 103 | The configuration includes the following sections: 104 | .IP \[bu] 2 105 | Memory levels to respond to as an OOM threat 106 | .IP \[bu] 2 107 | Response on PSI memory metrics 108 | .IP \[bu] 2 109 | The frequency of checking the level of available memory (and CPU usage) 110 | .IP \[bu] 2 111 | The prevention of killing innocent victims 112 | .IP \[bu] 2 113 | Impact on the badness of processes via matching their names, cmdlines 114 | and UIDs with regular expressions 115 | .IP \[bu] 2 116 | The execution of a specific command or sending any signal instead of 117 | sending the SIGTERM signal 118 | .IP \[bu] 2 119 | GUI notifications: 120 | .RS 2 121 | .IP \[bu] 2 122 | notifications of corrective actions taken 123 | .IP \[bu] 2 124 | low memory warnings 125 | .RE 126 | .IP \[bu] 2 127 | Verbosity 128 | .IP \[bu] 2 129 | Misc 130 | .PP 131 | Just read the description of the parameters and edit the values. 132 | Restart the daemon to apply the changes. 133 | .SH CHECK CONFIG 134 | .PP 135 | Check the config for errors: 136 | .PP 137 | $ nohang \-\-check \-\-config /path/to/config 138 | .SH HOW TO TEST 139 | .PP 140 | The safest way is to run \f[B]nohang \-\-memload\f[]. 141 | This causes memory consumption, and the process will exits before OOM 142 | occurs. 143 | Another way is to run \f[B]tail /dev/zero\f[]. 144 | This causes fast memory comsumption and causes OOM at the end. 145 | If testing occurs while nohang is running, these processes should be 146 | terminated before OOM occurs. 147 | .SH LOGGING 148 | .PP 149 | To view the latest entries in the log (for systemd users): 150 | .PP 151 | $ \f[B]sudo journalctl \-eu nohang.service\f[] 152 | .PP 153 | or 154 | .PP 155 | $ \f[B]sudo journalctl \-eu nohang\-desktop.service\f[] 156 | .PP 157 | You can also enable \f[B]separate_log\f[] in the config to logging in 158 | \f[B]/var/log/nohang/nohang.log\f[]. 159 | .SH SIGNALS 160 | .PP 161 | Sending SIGTERM, SIGINT, SIGQUIT or SIGHUP signals to the nohang process 162 | causes it displays corrective action stats and exits. 163 | .SH REPORTING BUGS 164 | .PP 165 | Please ask any questions and report bugs at 166 | . 167 | .SH AUTHOR 168 | .PP 169 | Written by Alexey Avramov . 170 | .SH HOMEPAGE 171 | .PP 172 | Homepage is . 173 | .SH SEE ALSO 174 | .PP 175 | oom\-sort(1), psi\-top(1), psi2log(1) 176 | .SH NOTES 177 | .IP "1." 3 178 | https://github.com/openzfs/zfs/issues/10255 179 | .IP "2." 3 180 | https://github.com/rfjakob/earlyoom/pull/191#issuecomment\-622314296 181 | .IP "3." 3 182 | https://github.com/hakavlad/nohang/issues/89 183 | .IP "4." 3 184 | https://github.com/hakavlad/nohang/issues/25#issuecomment\-521390412 185 | -------------------------------------------------------------------------------- /man/oom-sort.1: -------------------------------------------------------------------------------- 1 | .\" Automatically generated by Pandoc 1.17.2 2 | .\" 3 | .TH "oom\-sort" "1" "" "" "General Commands Manual" 4 | .hy 5 | .SH NAME 6 | .PP 7 | oom\-sort \- sort processes by oom_score 8 | .SH SYNOPSIS 9 | .PP 10 | \f[B]oom\-sort\f[] [\f[B]OPTION\f[]]... 11 | .SH DESCRIPTION 12 | .PP 13 | oom\-sort is script that sorts tasks by oom_score by default. 14 | oom\-sort is part of nohang package. 15 | .SH OPTIONS 16 | .SS \-h, \-\-help 17 | .PP 18 | show this help message and exit 19 | .SS \-\-num NUM, \-n NUM 20 | .PP 21 | max number of lines; default: 99999 22 | .SS \-\-len LEN, \-l LEN 23 | .PP 24 | max cmdline length; default: 99999 25 | .SS \-\-sort SORT, \-s SORT 26 | .PP 27 | sort by unit; available units: oom_score, oom_score_adj, UID, PID, Name, 28 | VmRSS, VmSwap, cmdline (optional); default unit: oom_score 29 | .SH REPORTING BUGS 30 | .PP 31 | Please ask any questions and report bugs at 32 | . 33 | .SH AUTHOR 34 | .PP 35 | Written by Alexey Avramov . 36 | .SH HOMEPAGE 37 | .PP 38 | Homepage is . 39 | .SH SEE ALSO 40 | .PP 41 | psi\-top(1), psi2log(1), nohang(8) 42 | -------------------------------------------------------------------------------- /man/psi-top.1: -------------------------------------------------------------------------------- 1 | .\" Automatically generated by Pandoc 1.17.2 2 | .\" 3 | .TH "psi\-top" "1" "" "" "General Commands Manual" 4 | .hy 5 | .SH NAME 6 | .PP 7 | psi\-top \- print the PSI metrics values for every cgroup. 8 | .SH SYNOPSIS 9 | .PP 10 | \f[B]psi\-top\f[] [\f[B]OPTION\f[]]... 11 | .SH DESCRIPTION 12 | .PP 13 | psi\-top is script that prints the PSI metrics values for every cgroup. 14 | psi\-top is part of nohang package. 15 | .SH OPTIONS 16 | .SS \-h, \-\-help 17 | .PP 18 | show this help message and exit 19 | .SS \-m METRICS, \-\-metrics METRICS 20 | .PP 21 | metrics (memory, io or cpu) 22 | .SH EXAMPLES 23 | .PP 24 | $ psi\-top 25 | .PP 26 | $ psi\-top \-\-metrics io 27 | .PP 28 | $ psi\-top \-m cpu 29 | .SH REPORTING BUGS 30 | .PP 31 | Please ask any questions and report bugs at 32 | . 33 | .SH AUTHOR 34 | .PP 35 | Written by Alexey Avramov . 36 | .SH HOMEPAGE 37 | .PP 38 | Homepage is . 39 | .SH SEE ALSO 40 | .PP 41 | oom\-sort(1), psi2log(1), nohang(8) 42 | -------------------------------------------------------------------------------- /man/psi2log.1: -------------------------------------------------------------------------------- 1 | .\" Automatically generated by Pandoc 1.17.2 2 | .\" 3 | .TH "psi2log" "1" "" "" "General Commands Manual" 4 | .hy 5 | .SH NAME 6 | .PP 7 | psi2log \- PSI metrics monitor and logger 8 | .SH SYNOPSIS 9 | .PP 10 | \f[B]psi2log\f[] [\f[B]OPTION\f[]]... 11 | .SH DESCRIPTION 12 | .PP 13 | psi2log is a CLI tool that can check and log PSI metrics from specified 14 | target. 15 | psi2log is part of nohang package. 16 | .SH OPTIONS 17 | .SS \-h, \-\-help 18 | .PP 19 | show this help message and exit 20 | .SS \-t TARGET, \-\-target TARGET 21 | .PP 22 | target (cgroup_v2 or SYTSTEM_WIDE) 23 | .SS \-i INTERVAL, \-\-interval INTERVAL 24 | .PP 25 | interval in sec 26 | .SS \-l LOG, \-\-log LOG 27 | .PP 28 | path to log file 29 | .SS \-m MODE, \-\-mode MODE 30 | .PP 31 | mode (0, 1 or 2) 32 | .SS \-s SUPPRESS_OUTPUT, \-\-suppress\-output SUPPRESS_OUTPUT 33 | .PP 34 | suppress output 35 | .SH EXAMPLES 36 | .PP 37 | $ psi2log 38 | .PP 39 | $ psi2log \-\-mode 2 40 | .PP 41 | $ psi2log \-\-target /user.slice \-\-interval 1.5 \-\-log psi.log 42 | .SH SIGNALS 43 | .PP 44 | Sending SIGTERM, SIGINT, SIGQUIT or SIGHUP signals to the psi2log 45 | process causes it displays peak values and exits.. 46 | .SH REPORTING BUGS 47 | .PP 48 | Please ask any questions and report bugs at 49 | . 50 | .SH AUTHOR 51 | .PP 52 | Written by Alexey Avramov . 53 | .SH HOMEPAGE 54 | .PP 55 | Homepage is . 56 | .SH SEE ALSO 57 | .PP 58 | oom\-sort(1), psi\-top(1), nohang(8) 59 | -------------------------------------------------------------------------------- /openrc/nohang-desktop.in: -------------------------------------------------------------------------------- 1 | #!/sbin/openrc-run 2 | 3 | name="nohang-desktop daemon" 4 | description="Sophisticated low memory handler" 5 | command=:TARGET_SBINDIR:/nohang 6 | command_args="--monitor --config :TARGET_SYSCONFDIR:/nohang/nohang-desktop.conf" 7 | pidfile="/var/run/nohang-desktop" 8 | start_stop_daemon_args="--background --make-pidfile" 9 | -------------------------------------------------------------------------------- /openrc/nohang.in: -------------------------------------------------------------------------------- 1 | #!/sbin/openrc-run 2 | 3 | name="nohang daemon" 4 | description="Sophisticated low memory handler" 5 | command=:TARGET_SBINDIR:/nohang 6 | command_args="--monitor --config :TARGET_SYSCONFDIR:/nohang/nohang.conf" 7 | pidfile="/var/run/nohang" 8 | start_stop_daemon_args="--background --make-pidfile" 9 | -------------------------------------------------------------------------------- /src/oom-sort: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | sort processes by oom_score 4 | """ 5 | 6 | 7 | from argparse import ArgumentParser 8 | from operator import itemgetter 9 | from os import listdir 10 | 11 | 12 | def pid_to_oom_score(pid): 13 | with open('/proc/{}/oom_score'.format(pid), 'rb', buffering=0) as f: 14 | return int(f.read()) 15 | 16 | 17 | def pid_to_oom_score_adj(pid): 18 | with open('/proc/{}/oom_score_adj'.format(pid), 'rb', buffering=0) as f: 19 | return int(f.read()) 20 | 21 | 22 | def pid_to_cmdline(pid): 23 | with open('/proc/{}/cmdline'.format(pid), 'rb', buffering=0) as f: 24 | return f.read().decode('utf-8', 'ignore').replace( 25 | '\x00', ' ').rstrip() 26 | 27 | 28 | def pid_to_status_units(pid): 29 | with open('/proc/{}/status'.format(pid), 'rb', buffering=0) as f: 30 | f_list = f.read().decode('utf-8', 'ignore').split('\n') 31 | for i in range(len(f_list)): 32 | if i == 1: 33 | name = f_list[0].split('\t')[1] 34 | if i == uid_index: 35 | uid = f_list[i].split('\t')[2] 36 | if i == vm_rss_index: 37 | vm_rss = f_list[i].split('\t')[1][:-3] 38 | if i == vm_swap_index: 39 | vm_swap = f_list[i].split('\t')[1][:-3] 40 | return name, uid, vm_rss, vm_swap 41 | 42 | 43 | def get_max_pid_len(): 44 | with open('/proc/sys/kernel/pid_max') as f: 45 | for line in f: 46 | return len(line.strip()) 47 | 48 | 49 | sort_dict = { 50 | 'PID': 0, 51 | 'oom_score': 1, 52 | 'oom_score_adj': 2, 53 | 'cmdline': 3, 54 | 'Name': 4, 55 | 'UID': 5, 56 | 'VmRSS': 6, 57 | 'VmSwap': 7 58 | } 59 | 60 | 61 | parser = ArgumentParser() 62 | 63 | parser.add_argument( 64 | '--num', 65 | '-n', 66 | help="""max number of lines; default: 99999""", 67 | default=99999, 68 | type=str 69 | ) 70 | 71 | parser.add_argument( 72 | '--len', 73 | '-l', 74 | help="""max cmdline length; default: 99999""", 75 | default=99999, 76 | type=int 77 | ) 78 | 79 | 80 | parser.add_argument( 81 | '--sort', 82 | '-s', 83 | help="""sort by unit; default: oom_score""", 84 | default='oom_score', 85 | type=str 86 | ) 87 | 88 | 89 | args = parser.parse_args() 90 | 91 | display_cmdline = args.len 92 | 93 | num_lines = args.num 94 | 95 | sort_by = args.sort 96 | 97 | 98 | if sort_by not in sort_dict: 99 | print('Invalid -s/--sort value. Valid values are:\nPID\noom_score [defau' 100 | 'lt value]\noom_score_adj\nUID\nName\ncmdline\nVmRSS\nVmSwap') 101 | exit() 102 | 103 | 104 | # find VmRSS, VmSwap and UID positions in /proc/*/status for further 105 | # searching positions of UID, VmRSS and VmSwap in each process 106 | 107 | with open('/proc/self/status') as file: 108 | status_list = file.readlines() 109 | 110 | status_names = [] 111 | for s in status_list: 112 | status_names.append(s.split(':')[0]) 113 | 114 | uid_index = status_names.index('Uid') 115 | vm_rss_index = status_names.index('VmRSS') 116 | vm_swap_index = status_names.index('VmSwap') 117 | 118 | 119 | # get sorted list with pid, oom_score, oom_score_adj, cmdline 120 | # get status units: name, uid, rss, swap 121 | 122 | 123 | oom_list = [] 124 | 125 | for pid in listdir('/proc'): 126 | 127 | # skip non-numeric entries and PID 1 128 | if pid.isdigit() is False or pid == '1': 129 | continue 130 | 131 | try: 132 | 133 | oom_score = pid_to_oom_score(pid) 134 | 135 | oom_score_adj = pid_to_oom_score_adj(pid) 136 | 137 | cmdline = pid_to_cmdline(pid) 138 | if cmdline == '': 139 | continue 140 | 141 | name, uid, vm_rss, vm_swap = pid_to_status_units(pid) 142 | 143 | except FileNotFoundError: 144 | continue 145 | 146 | except ProcessLookupError: 147 | continue 148 | 149 | except Exception as e: 150 | print(e) 151 | exit(1) 152 | 153 | oom_list.append(( 154 | int(pid), int(oom_score), int(oom_score_adj), cmdline, 155 | name, int(uid), int(vm_rss), int(vm_swap))) 156 | 157 | # list sorted by oom_score 158 | oom_list_sorted = sorted( 159 | oom_list, key=itemgetter(int(sort_dict[sort_by])), reverse=True) 160 | 161 | 162 | # find width of columns 163 | 164 | 165 | max_pid_len = get_max_pid_len() 166 | 167 | 168 | max_uid_len = len(str(sorted( 169 | oom_list, key=itemgetter(5), reverse=True)[0][5])) 170 | 171 | 172 | max_vm_rss_len = len(str(round( 173 | sorted(oom_list, key=itemgetter(6), reverse=True)[0][6] / 1024))) 174 | 175 | if max_vm_rss_len < 5: 176 | max_vm_rss_len = 5 177 | 178 | 179 | # print output 180 | 181 | if display_cmdline == 0: 182 | 183 | print( 184 | 'oom_score oom_score_adj{}UID{}PID Name {}VmRSS VmSwap'.format( 185 | ' ' * (max_uid_len - 2), 186 | ' ' * (max_pid_len - 2), 187 | ' ' * max_vm_rss_len 188 | ) 189 | ) 190 | 191 | print( 192 | '--------- ------------- {} {} --------------- {}-- --------'.format( 193 | '-' * max_uid_len, 194 | '-' * max_pid_len, 195 | '-' * max_vm_rss_len 196 | ) 197 | ) 198 | 199 | else: 200 | 201 | print( 202 | 'oom_score oom_score_adj{}UID{}PID Name {}VmRSS VmSwa' 203 | 'p cmdline'.format( 204 | ' ' * (max_uid_len - 2), 205 | ' ' * (max_pid_len - 2), 206 | ' ' * max_vm_rss_len 207 | ) 208 | ) 209 | 210 | print( 211 | '--------- ------------- {} {} --------------- {}-- ------' 212 | '-- -------'.format( 213 | '-' * max_uid_len, 214 | '-' * max_pid_len, 215 | '-' * max_vm_rss_len 216 | ) 217 | ) 218 | 219 | # print processes stats sorted by sort_dict[sort_by] 220 | 221 | for i in oom_list_sorted[:int(num_lines)]: 222 | 223 | pid = i[0] 224 | oom_score = i[1] 225 | oom_score_adj = i[2] 226 | cmdline = i[3] 227 | name = i[4] 228 | uid = i[5] 229 | vm_rss = i[6] 230 | vm_swap = i[7] 231 | 232 | print( 233 | '{} {} {} {} {} {} M {} M {}'.format( 234 | str(oom_score).rjust(9), 235 | str(oom_score_adj).rjust(13), 236 | str(uid).rjust(max_uid_len), 237 | str(pid).rjust(max_pid_len), 238 | name.ljust(15), 239 | str(round(vm_rss / 1024.0)).rjust(max_vm_rss_len, ' '), 240 | str(round(vm_swap / 1024.0)).rjust(6, ' '), 241 | cmdline[:display_cmdline] 242 | ) 243 | ) 244 | -------------------------------------------------------------------------------- /src/psi-top: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | from argparse import ArgumentParser 5 | 6 | 7 | def psi_path_to_metrics(psi_path): 8 | """ 9 | """ 10 | with open(psi_path) as f: 11 | psi_list = f.readlines() 12 | 13 | some_list, full_list = psi_list[0].split(' '), psi_list[1].split(' ') 14 | 15 | some_avg10 = some_list[1].split('=')[1] 16 | some_avg60 = some_list[2].split('=')[1] 17 | some_avg300 = some_list[3].split('=')[1] 18 | 19 | full_avg10 = full_list[1].split('=')[1] 20 | full_avg60 = full_list[2].split('=')[1] 21 | full_avg300 = full_list[3].split('=')[1] 22 | 23 | return (some_avg10, some_avg60, some_avg300, 24 | full_avg10, full_avg60, full_avg300) 25 | 26 | 27 | def psi_path_to_metrics_cpu(psi_path): 28 | """ 29 | """ 30 | with open(psi_path) as f: 31 | psi_list = f.readlines() 32 | 33 | some_list = psi_list[0].rstrip().split(' ') 34 | 35 | some_avg10 = some_list[1].split('=')[1] 36 | some_avg60 = some_list[2].split('=')[1] 37 | some_avg300 = some_list[3].split('=')[1] 38 | 39 | return (some_avg10, some_avg60, some_avg300) 40 | 41 | 42 | def cgroup2_root(): 43 | """ 44 | """ 45 | with open(mounts) as f: 46 | for line in f: 47 | if cgroup2_separator in line: 48 | return line.partition(cgroup2_separator)[0].partition(' ')[2] 49 | 50 | 51 | def get_psi_mem_files(cgroup2_path, met): 52 | """ 53 | """ 54 | path_list = [] 55 | 56 | for root, dirs, files in os.walk(cgroup2_path): 57 | for file in files: 58 | path = os.path.join(root, file) 59 | if path.endswith('/{}.pressure'.format(met)): 60 | path_list.append(path) 61 | 62 | return path_list 63 | 64 | 65 | def psi_path_to_cgroup2(path): 66 | """ 67 | """ 68 | 69 | if path.endswith('/cpu.pressure'): 70 | return path.partition(cgroup2_mountpoint)[ 71 | 2].partition('/cpu.pressure')[0] 72 | 73 | if path.endswith('/io.pressure'): 74 | return path.partition(cgroup2_mountpoint)[ 75 | 2].partition('/io.pressure')[0] 76 | 77 | if path.endswith('/memory.pressure'): 78 | return path.partition(cgroup2_mountpoint)[ 79 | 2].partition('/memory.pressure')[0] 80 | 81 | 82 | parser = ArgumentParser() 83 | 84 | parser.add_argument( 85 | '-m', 86 | '--metrics', 87 | help="""metrics (memory, io or cpu)""", 88 | default='memory', 89 | type=str 90 | ) 91 | 92 | 93 | args = parser.parse_args() 94 | 95 | 96 | met = args.metrics 97 | 98 | 99 | if not (met == 'memory' or met == 'io' or met == 'cpu'): 100 | print('ERROR: invalid metrics:', met) 101 | exit(1) 102 | 103 | 104 | psi_path = '/proc/pressure/{}'.format(met) 105 | mounts = '/proc/mounts' 106 | cgroup2_separator = ' cgroup2 rw,' 107 | 108 | cgroup2_mountpoint = cgroup2_root() 109 | 110 | 111 | if cgroup2_mountpoint is None: 112 | print('ERROR: cgroup_v2 hierarchy is not mounted') 113 | exit(1) 114 | 115 | 116 | try: 117 | psi_path_to_metrics('/proc/pressure/memory') 118 | except Exception as e: 119 | print('ERROR: {}'.format(e)) 120 | print('PSI metrics are not provided by the kernel. Exit.') 121 | exit(1) 122 | 123 | 124 | if cgroup2_mountpoint is not None: 125 | y = get_psi_mem_files(cgroup2_mountpoint, met) 126 | 127 | 128 | path_list = get_psi_mem_files(cgroup2_mountpoint, met) 129 | 130 | 131 | head_mem_io = '''PSI metrics: {} 132 | cgroup_v2 mountpoint: {} 133 | =====================|======================| 134 | some | full | 135 | -------------------- | -------------------- | 136 | avg10 avg60 avg300 | avg10 avg60 avg300 | cgroup_v2 137 | ------ ------ ------ | ------ ------ ------ | -----------'''.format( 138 | met, cgroup2_mountpoint) 139 | 140 | 141 | head_cpu = '''PSI metrics: {} 142 | cgroup_v2 mountpoint: {} 143 | =====================| 144 | some | 145 | -------------------- | 146 | avg10 avg60 avg300 | cgroup_v2 147 | ------ ------ ------ | -----------'''.format( 148 | met, cgroup2_mountpoint) 149 | 150 | 151 | if met == 'cpu': 152 | print(head_cpu) 153 | else: 154 | print(head_mem_io) 155 | 156 | 157 | if met == 'cpu': 158 | 159 | some_avg10, some_avg60, some_avg300 = psi_path_to_metrics_cpu(psi_path) 160 | 161 | print('{} {} {} | {}'.format( 162 | some_avg10.rjust(6), 163 | some_avg60.rjust(6), 164 | some_avg300.rjust(6), 165 | 'SYSTEM_WIDE')) 166 | 167 | else: 168 | 169 | (some_avg10, some_avg60, some_avg300, full_avg10, full_avg60, full_avg300 170 | ) = psi_path_to_metrics(psi_path) 171 | 172 | print('{} {} {} | {} {} {} | {}'.format( 173 | some_avg10.rjust(6), 174 | some_avg60.rjust(6), 175 | some_avg300.rjust(6), 176 | full_avg10.rjust(6), 177 | full_avg60.rjust(6), 178 | full_avg300.rjust(6), 'SYSTEM_WIDE')) 179 | 180 | 181 | for psi_path in path_list: 182 | 183 | if met == 'cpu': 184 | some_avg10, some_avg60, some_avg300 = psi_path_to_metrics_cpu(psi_path) 185 | 186 | print('{} {} {} | {}'.format( 187 | some_avg10.rjust(6), 188 | some_avg60.rjust(6), 189 | some_avg300.rjust(6), 190 | psi_path_to_cgroup2(psi_path))) 191 | 192 | else: 193 | (some_avg10, some_avg60, some_avg300, 194 | full_avg10, full_avg60, full_avg300) = psi_path_to_metrics(psi_path) 195 | 196 | print('{} {} {} | {} {} {} | {}'.format( 197 | some_avg10.rjust(6), 198 | some_avg60.rjust(6), 199 | some_avg300.rjust(6), 200 | full_avg10.rjust(6), 201 | full_avg60.rjust(6), 202 | full_avg300.rjust(6), psi_path_to_cgroup2(psi_path))) 203 | -------------------------------------------------------------------------------- /src/psi2log: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """psi2log - PSI metrics monitor and logger""" 3 | 4 | from argparse import ArgumentParser 5 | from ctypes import CDLL 6 | from signal import SIGHUP, SIGINT, SIGQUIT, SIGTERM, signal 7 | from sys import exit, stdout 8 | from time import monotonic, sleep 9 | 10 | 11 | def read_path(path): 12 | """ 13 | """ 14 | try: 15 | fd[path].seek(0) 16 | except ValueError: 17 | try: 18 | fd[path] = open(path, 'rb', buffering=0) 19 | except FileNotFoundError as e: 20 | log(e) 21 | return None 22 | except KeyError: 23 | try: 24 | fd[path] = open(path, 'rb', buffering=0) 25 | except FileNotFoundError as e: 26 | log(e) 27 | return None 28 | try: 29 | return fd[path].read(99999).decode() 30 | except OSError as e: 31 | log(e) 32 | fd[path].close() 33 | return None 34 | 35 | 36 | def form1(num): 37 | """ 38 | """ 39 | s = str(num).split('.') 40 | return '{}.{:0<2}'.format(s[0], s[1]) 41 | 42 | 43 | def form2(num): 44 | """ 45 | """ 46 | s = str(round(num, 1)).split('.') 47 | return '{}.{:0<1}'.format(s[0], s[1]) 48 | 49 | 50 | def signal_handler(signum, frame): 51 | """ 52 | """ 53 | def signal_handler_inner(signum, frame): 54 | pass 55 | 56 | for i in sig_list: 57 | signal(i, signal_handler_inner) 58 | 59 | if signum == SIGINT: 60 | print('') 61 | 62 | lpd = len(peaks_dict) 63 | 64 | if lpd == 15: 65 | log('=================================') 66 | log('Peak values: avg10 avg60 avg300') 67 | log('----------- ------ ------ ------') 68 | log('some cpu {:>6} {:>6} {:>6}'.format( 69 | form1(peaks_dict['c_some_avg10']), 70 | form1(peaks_dict['c_some_avg60']), 71 | form1(peaks_dict['c_some_avg300']), 72 | )) 73 | log('----------- ------ ------ ------') 74 | log('some io {:>6} {:>6} {:>6}'.format( 75 | form1(peaks_dict['i_some_avg10']), 76 | form1(peaks_dict['i_some_avg60']), 77 | form1(peaks_dict['i_some_avg300']), 78 | )) 79 | 80 | log('full io {:>6} {:>6} {:>6}'.format( 81 | form1(peaks_dict['i_full_avg10']), 82 | form1(peaks_dict['i_full_avg60']), 83 | form1(peaks_dict['i_full_avg300']), 84 | )) 85 | 86 | log('----------- ------ ------ ------') 87 | 88 | log('some memory {:>6} {:>6} {:>6}'.format( 89 | form1(peaks_dict['m_some_avg10']), 90 | form1(peaks_dict['m_some_avg60']), 91 | form1(peaks_dict['m_some_avg300']), 92 | )) 93 | 94 | log('full memory {:>6} {:>6} {:>6}'.format( 95 | form1(peaks_dict['m_full_avg10']), 96 | form1(peaks_dict['m_full_avg60']), 97 | form1(peaks_dict['m_full_avg300']), 98 | )) 99 | 100 | if lpd == 5: 101 | log('----- | ----- ----- | ----- ----- | --------') 102 | log('{:>5} | {:>5} {:>5} | {:>5} {:>5} | peaks'.format( 103 | form2(peaks_dict['avg_cs']), 104 | form2(peaks_dict['avg_is']), 105 | form2(peaks_dict['avg_if']), 106 | form2(peaks_dict['avg_ms']), 107 | form2(peaks_dict['avg_mf']) 108 | )) 109 | 110 | if target == 'SYSTEM_WIDE': 111 | log_stall_times() 112 | 113 | if separate_log: 114 | logging.info('') 115 | 116 | exit() 117 | 118 | 119 | def log_stall_times(): 120 | """ 121 | """ 122 | total_cs_1 = psi_file_cpu_to_total(cpu_file) 123 | total_is_1, total_if_1 = psi_file_mem_to_total(io_file) 124 | total_ms_1, total_mf_1 = psi_file_mem_to_total(memory_file) 125 | t = monotonic() - t_0 126 | 127 | M = 1000000 128 | 129 | dcs = (total_cs_1 - total_cs_0) / M 130 | dis = (total_is_1 - total_is_0) / M 131 | dif = (total_if_1 - total_if_0) / M 132 | dms = (total_ms_1 - total_ms_0) / M 133 | dmf = (total_mf_1 - total_mf_0) / M 134 | 135 | if mode == '0' or mode == '1': 136 | log('=================================') 137 | else: 138 | log('--') 139 | 140 | log('Stall times for the last {}s:'.format(round(t, 1))) 141 | log('-----------') 142 | log('some cpu {}s, avg {}%'.format( 143 | round(dcs, 1), 144 | round(dcs / t * 100, 1) 145 | )) 146 | log('-----------') 147 | log('some io {}s, avg {}%'.format( 148 | round(dis, 1), 149 | round(dis / t * 100, 1) 150 | )) 151 | log('full io {}s, avg {}%'.format( 152 | round(dif, 1), 153 | round(dif / t * 100, 1) 154 | )) 155 | log('-----------') 156 | 157 | log('some memory {}s, avg {}%'.format( 158 | round(dms, 1), 159 | round(dms / t * 100, 1) 160 | )) 161 | 162 | log('full memory {}s, avg {}%'.format( 163 | round(dmf, 1), 164 | round(dmf / t * 100, 1) 165 | )) 166 | 167 | 168 | def cgroup2_root(): 169 | """ 170 | """ 171 | with open(mounts) as f: 172 | for line in f: 173 | if cgroup2_separator in line: 174 | return line.partition(cgroup2_separator)[0].partition(' ')[2] 175 | 176 | 177 | def mlockall(): 178 | """ 179 | """ 180 | MCL_CURRENT = 1 181 | MCL_FUTURE = 2 182 | MCL_ONFAULT = 4 183 | 184 | libc = CDLL(None, use_errno=True) 185 | result = libc.mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT) 186 | 187 | if result != 0: 188 | result = libc.mlockall(MCL_CURRENT | MCL_FUTURE) 189 | if result != 0: 190 | log('WARNING: cannot lock process memory: [Errno {}]'.format( 191 | result)) 192 | else: 193 | log('Prosess memory locked with MCL_CURRENT | MCL_FUTURE') 194 | else: 195 | log('Process memory locked with ' 196 | 'MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT') 197 | 198 | 199 | def psi_file_mem_to_metrics0(psi_path): 200 | """ 201 | """ 202 | with open(psi_path) as f: 203 | psi_list = f.readlines() 204 | some_list, full_list = psi_list[0].split(' '), psi_list[1].split(' ') 205 | some_avg10 = some_list[1].split('=')[1] 206 | some_avg60 = some_list[2].split('=')[1] 207 | some_avg300 = some_list[3].split('=')[1] 208 | full_avg10 = full_list[1].split('=')[1] 209 | full_avg60 = full_list[2].split('=')[1] 210 | full_avg300 = full_list[3].split('=')[1] 211 | return (some_avg10, some_avg60, some_avg300, 212 | full_avg10, full_avg60, full_avg300) 213 | 214 | 215 | def psi_file_mem_to_metrics(psi_path): 216 | """ 217 | """ 218 | foo = read_path(psi_path) 219 | 220 | if foo is None: 221 | return None 222 | 223 | try: 224 | psi_list = foo.split('\n') 225 | 226 | some_list, full_list = psi_list[0].split(' '), psi_list[1].split(' ') 227 | some_avg10 = some_list[1].split('=')[1] 228 | some_avg60 = some_list[2].split('=')[1] 229 | some_avg300 = some_list[3].split('=')[1] 230 | full_avg10 = full_list[1].split('=')[1] 231 | full_avg60 = full_list[2].split('=')[1] 232 | full_avg300 = full_list[3].split('=')[1] 233 | return (some_avg10, some_avg60, some_avg300, 234 | full_avg10, full_avg60, full_avg300) 235 | 236 | except Exception as e: 237 | log('{}'.format(e)) 238 | return None 239 | 240 | 241 | def psi_file_cpu_to_metrics(psi_path): 242 | """ 243 | """ 244 | foo = read_path(psi_path) 245 | 246 | if foo is None: 247 | return None 248 | 249 | try: 250 | psi_list = foo.split('\n') 251 | 252 | some_list = psi_list[0].split(' ') 253 | some_avg10 = some_list[1].split('=')[1] 254 | some_avg60 = some_list[2].split('=')[1] 255 | some_avg300 = some_list[3].split('=')[1] 256 | return (some_avg10, some_avg60, some_avg300) 257 | 258 | except Exception as e: 259 | log('{}'.format(e)) 260 | return None 261 | 262 | 263 | def psi_file_mem_to_total(psi_path): 264 | """ 265 | """ 266 | foo = read_path(psi_path) 267 | 268 | if foo is None: 269 | return None 270 | 271 | try: 272 | psi_list = foo.split('\n') 273 | 274 | some_list, full_list = psi_list[0].split(' '), psi_list[1].split(' ') 275 | some_total = some_list[4].split('=')[1] 276 | full_total = full_list[4].split('=')[1] 277 | 278 | return int(some_total), int(full_total) 279 | 280 | except Exception as e: 281 | log('{}'.format(e)) 282 | return None 283 | 284 | 285 | def psi_file_cpu_to_total(psi_path): 286 | """ 287 | """ 288 | foo = read_path(psi_path) 289 | 290 | if foo is None: 291 | return None 292 | 293 | try: 294 | psi_list = foo.split('\n') 295 | 296 | some_list = psi_list[0].split(' ') 297 | some_total = some_list[4].split('=')[1] 298 | 299 | return int(some_total) 300 | 301 | except Exception as e: 302 | log('{}'.format(e)) 303 | return None 304 | 305 | 306 | def print_head_0(): 307 | """ 308 | """ 309 | log('===================================================================' 310 | '============') 311 | log(' cpu || io || memory') 312 | log('============= || ============================= || =================' 313 | '============') 314 | log(' some || some | full || some | ' 315 | ' full') 316 | log('------------- || ------------- | ------------- || ------------- | -' 317 | '------------') 318 | log(' avg10 avg60 || avg10 avg60 | avg10 avg60 || avg10 avg60 | ' 319 | 'avg10 avg60') 320 | log('------ ------ || ------ ------ | ------ ------ || ------ ------ | -' 321 | '----- ------') 322 | 323 | 324 | def print_head_1(): 325 | """ 326 | """ 327 | log('====================================================================' 328 | '==============================================') 329 | log(' cpu || io ' 330 | '|| memory') 331 | log('==================== || =========================================== ' 332 | '|| ===========================================') 333 | log(' some || some | full ' 334 | '|| some | full') 335 | log('-------------------- || -------------------- | -------------------- ' 336 | '|| -------------------- | --------------------') 337 | log(' avg10 avg60 avg300 || avg10 avg60 avg300 | avg10 avg60 avg300 ' 338 | '|| avg10 avg60 avg300 | avg10 avg60 avg300') 339 | log('------ ------ ------ || ------ ------ ------ | ------ ------ ------ ' 340 | '|| ------ ------ ------ | ------ ------ ------') 341 | 342 | 343 | def print_head_2(): 344 | """ 345 | """ 346 | log('----- - ----------- - ----------- -') 347 | log(' cpu | io | memory |') 348 | log('----- | ----------- | ----------- |') 349 | log(' some | some full | some full | interval') 350 | log('----- | ----- ----- | ----- ----- | --------') 351 | 352 | 353 | def log(*msg): 354 | """ 355 | """ 356 | if not SUPPRESS_OUTPUT: 357 | print(*msg) 358 | if separate_log: 359 | logging.info(*msg) 360 | 361 | 362 | def log_head(*msg): 363 | """ 364 | """ 365 | print(*msg) 366 | if separate_log: 367 | logging.info(*msg) 368 | 369 | 370 | parser = ArgumentParser() 371 | 372 | parser.add_argument( 373 | '-t', 374 | '--target', 375 | help="""target (cgroup_v2 or SYSTEM_WIDE)""", 376 | default='SYSTEM_WIDE', 377 | type=str 378 | ) 379 | 380 | 381 | parser.add_argument( 382 | '-i', 383 | '--interval', 384 | help="""interval in sec""", 385 | default=2, 386 | type=float 387 | ) 388 | 389 | 390 | parser.add_argument( 391 | '-l', 392 | '--log', 393 | help="""path to log file""", 394 | default=None, 395 | type=str 396 | ) 397 | 398 | 399 | parser.add_argument( 400 | '-m', 401 | '--mode', 402 | help="""mode (0, 1 or 2)""", 403 | default='0', 404 | type=str 405 | ) 406 | 407 | 408 | parser.add_argument( 409 | '-s', 410 | '--suppress-output', 411 | help="""suppress output""", 412 | default='False', 413 | type=str 414 | ) 415 | 416 | 417 | args = parser.parse_args() 418 | target = args.target 419 | mode = args.mode 420 | interval = args.interval 421 | log_file = args.log 422 | suppress_output = args.suppress_output 423 | 424 | if target != 'SYSTEM_WIDE': 425 | target = '/' + target.strip('/') 426 | 427 | 428 | if log_file is None: 429 | separate_log = False 430 | else: 431 | separate_log = True 432 | import logging 433 | 434 | if separate_log: 435 | try: 436 | logging.basicConfig( 437 | filename=log_file, 438 | level=logging.INFO, 439 | format="%(asctime)s: %(message)s") 440 | except Exception as e: 441 | print(e) 442 | exit(1) 443 | 444 | 445 | if suppress_output == 'False': 446 | SUPPRESS_OUTPUT = False 447 | elif suppress_output == 'True': 448 | SUPPRESS_OUTPUT = True 449 | else: 450 | log_head('error: argument -s/--suppress-output: valid values are ' 451 | 'False and True') 452 | exit(1) 453 | 454 | 455 | if log_file is not None: 456 | logstring = 'log file: {}, '.format(log_file) 457 | else: 458 | logstring = 'log file is not set, ' 459 | 460 | 461 | if interval < 1: 462 | log_head('error: argument -i/--interval: the value must be greater than or' 463 | ' equal to 1') 464 | exit(1) 465 | 466 | 467 | if not (mode == '0' or mode == '1' or mode == '2'): 468 | log_head('ERROR: invalid mode. Valid values are 0, 1 and 2. Exit.') 469 | exit(1) 470 | 471 | 472 | try: 473 | psi_file_mem_to_metrics0('/proc/pressure/memory') 474 | except Exception as e: 475 | log_head('ERROR: {}'.format(e)) 476 | log_head('PSI metrics are not provided by the kernel. Exit.') 477 | exit(1) 478 | 479 | 480 | log_head('Starting psi2log, target: {}, mode: {}, interval: {} sec, {}suppress' 481 | ' output: {}'.format( 482 | target, mode, round(interval, 3), logstring, suppress_output)) 483 | 484 | 485 | fd = dict() 486 | 487 | 488 | if target == 'SYSTEM_WIDE': 489 | system_wide = True 490 | source_dir = '/proc/pressure' 491 | cpu_file = '/proc/pressure/cpu' 492 | io_file = '/proc/pressure/io' 493 | memory_file = '/proc/pressure/memory' 494 | log_head('PSI source dir: /proc/pressure/, source files: cpu, io, memory') 495 | else: 496 | system_wide = False 497 | mounts = '/proc/mounts' 498 | cgroup2_separator = ' cgroup2 rw,' 499 | cgroup2_mountpoint = cgroup2_root() 500 | 501 | if cgroup2_mountpoint is None: 502 | log('ERROR: unified cgroup hierarchy is not mounted, exit') 503 | exit(1) 504 | 505 | source_dir = cgroup2_mountpoint + target 506 | cpu_file = source_dir + '/cpu.pressure' 507 | io_file = source_dir + '/io.pressure' 508 | memory_file = source_dir + '/memory.pressure' 509 | log_head('PSI source dir: {}{}/, source files: cpu.pressure, io.pressure,' 510 | ' memory.pressure'.format(cgroup2_mountpoint, target)) 511 | 512 | 513 | abnormal_interval = 1.01 * interval 514 | abnormal_inaccuracy = 0.05 515 | 516 | 517 | if target == 'SYSTEM_WIDE': 518 | total_cs_0 = psi_file_cpu_to_total(cpu_file) 519 | total_is_0, total_if_0 = psi_file_mem_to_total(io_file) 520 | total_ms_0, total_mf_0 = psi_file_mem_to_total(memory_file) 521 | t_0 = monotonic() 522 | 523 | 524 | peaks_dict = dict() 525 | 526 | 527 | sig_list = [SIGTERM, SIGINT, SIGQUIT, SIGHUP] 528 | 529 | for i in sig_list: 530 | signal(i, signal_handler) 531 | 532 | 533 | mlockall() 534 | 535 | 536 | if mode == '0': 537 | 538 | print_head_0() 539 | 540 | while True: 541 | 542 | try: 543 | 544 | (c_some_avg10, c_some_avg60, c_some_avg300 545 | ) = psi_file_cpu_to_metrics(cpu_file) 546 | 547 | (i_some_avg10, i_some_avg60, i_some_avg300, 548 | i_full_avg10, i_full_avg60, i_full_avg300 549 | ) = psi_file_mem_to_metrics(io_file) 550 | 551 | (m_some_avg10, m_some_avg60, m_some_avg300, 552 | m_full_avg10, m_full_avg60, m_full_avg300 553 | ) = psi_file_mem_to_metrics(memory_file) 554 | 555 | except TypeError: 556 | stdout.flush() 557 | sleep(interval) 558 | continue 559 | 560 | log('{:>6} {:>6} || {:>6} {:>6} | {:>6} {:>6} || {:>6} {:>6} | {:>6} ' 561 | '{:>6}'.format( 562 | 563 | c_some_avg10, c_some_avg60, 564 | 565 | i_some_avg10, i_some_avg60, 566 | i_full_avg10, i_full_avg60, 567 | 568 | m_some_avg10, m_some_avg60, 569 | m_full_avg10, m_full_avg60 570 | 571 | )) 572 | 573 | c_some_avg10 = float(c_some_avg10) 574 | if ('c_some_avg10' not in peaks_dict or 575 | peaks_dict['c_some_avg10'] < c_some_avg10): 576 | peaks_dict['c_some_avg10'] = c_some_avg10 577 | 578 | c_some_avg60 = float(c_some_avg60) 579 | if ('c_some_avg60' not in peaks_dict or 580 | peaks_dict['c_some_avg60'] < c_some_avg60): 581 | peaks_dict['c_some_avg60'] = c_some_avg60 582 | 583 | c_some_avg300 = float(c_some_avg300) 584 | if ('c_some_avg300' not in peaks_dict or 585 | peaks_dict['c_some_avg300'] < c_some_avg300): 586 | peaks_dict['c_some_avg300'] = c_some_avg300 587 | 588 | ####################################################################### 589 | 590 | i_some_avg10 = float(i_some_avg10) 591 | if ('i_some_avg10' not in peaks_dict or 592 | peaks_dict['i_some_avg10'] < i_some_avg10): 593 | peaks_dict['i_some_avg10'] = i_some_avg10 594 | 595 | i_some_avg60 = float(i_some_avg60) 596 | if ('i_some_avg60' not in peaks_dict or 597 | peaks_dict['i_some_avg60'] < i_some_avg60): 598 | peaks_dict['i_some_avg60'] = i_some_avg60 599 | 600 | i_some_avg300 = float(i_some_avg300) 601 | if ('i_some_avg300' not in peaks_dict or 602 | peaks_dict['i_some_avg300'] < i_some_avg300): 603 | peaks_dict['i_some_avg300'] = i_some_avg300 604 | 605 | i_full_avg10 = float(i_full_avg10) 606 | if ('i_full_avg10' not in peaks_dict or 607 | peaks_dict['i_full_avg10'] < i_full_avg10): 608 | peaks_dict['i_full_avg10'] = i_full_avg10 609 | 610 | i_full_avg60 = float(i_full_avg60) 611 | if ('i_full_avg60' not in peaks_dict or 612 | peaks_dict['i_full_avg60'] < i_full_avg60): 613 | peaks_dict['i_full_avg60'] = i_full_avg60 614 | 615 | i_full_avg300 = float(i_full_avg300) 616 | if ('i_full_avg300' not in peaks_dict or 617 | peaks_dict['i_full_avg300'] < i_full_avg300): 618 | peaks_dict['i_full_avg300'] = i_full_avg300 619 | 620 | ####################################################################### 621 | 622 | m_some_avg10 = float(m_some_avg10) 623 | if ('m_some_avg10' not in peaks_dict or 624 | peaks_dict['m_some_avg10'] < m_some_avg10): 625 | peaks_dict['m_some_avg10'] = m_some_avg10 626 | 627 | m_some_avg60 = float(m_some_avg60) 628 | if ('m_some_avg60' not in peaks_dict or 629 | peaks_dict['m_some_avg60'] < m_some_avg60): 630 | peaks_dict['m_some_avg60'] = m_some_avg60 631 | 632 | m_some_avg300 = float(m_some_avg300) 633 | if ('m_some_avg300' not in peaks_dict or 634 | peaks_dict['m_some_avg300'] < m_some_avg300): 635 | peaks_dict['m_some_avg300'] = m_some_avg300 636 | 637 | m_full_avg10 = float(m_full_avg10) 638 | if ('m_full_avg10' not in peaks_dict or 639 | peaks_dict['m_full_avg10'] < m_full_avg10): 640 | peaks_dict['m_full_avg10'] = m_full_avg10 641 | 642 | m_full_avg60 = float(m_full_avg60) 643 | if ('m_full_avg60' not in peaks_dict or 644 | peaks_dict['m_full_avg60'] < m_full_avg60): 645 | peaks_dict['m_full_avg60'] = m_full_avg60 646 | 647 | m_full_avg300 = float(m_full_avg300) 648 | if ('m_full_avg300' not in peaks_dict or 649 | peaks_dict['m_full_avg300'] < m_full_avg300): 650 | peaks_dict['m_full_avg300'] = m_full_avg300 651 | 652 | stdout.flush() 653 | sleep(interval) 654 | 655 | 656 | if mode == '1': 657 | 658 | print_head_1() 659 | 660 | while True: 661 | 662 | try: 663 | 664 | (c_some_avg10, c_some_avg60, c_some_avg300 665 | ) = psi_file_cpu_to_metrics(cpu_file) 666 | 667 | (i_some_avg10, i_some_avg60, i_some_avg300, 668 | i_full_avg10, i_full_avg60, i_full_avg300 669 | ) = psi_file_mem_to_metrics(io_file) 670 | 671 | (m_some_avg10, m_some_avg60, m_some_avg300, 672 | m_full_avg10, m_full_avg60, m_full_avg300 673 | ) = psi_file_mem_to_metrics(memory_file) 674 | 675 | except TypeError: 676 | stdout.flush() 677 | sleep(interval) 678 | continue 679 | 680 | log('{:>6} {:>6} {:>6} || {:>6} {:>6} {:>6} | {:>6} {:>6} {:>6} || ' 681 | '{:>6} {:>6} {:>6} | {:>6} {:>6} {:>6}'.format( 682 | 683 | c_some_avg10, c_some_avg60, c_some_avg300, 684 | 685 | i_some_avg10, i_some_avg60, i_some_avg300, 686 | i_full_avg10, i_full_avg60, i_full_avg300, 687 | 688 | m_some_avg10, m_some_avg60, m_some_avg300, 689 | m_full_avg10, m_full_avg60, m_full_avg300 690 | 691 | )) 692 | 693 | c_some_avg10 = float(c_some_avg10) 694 | if ('c_some_avg10' not in peaks_dict or 695 | peaks_dict['c_some_avg10'] < c_some_avg10): 696 | peaks_dict['c_some_avg10'] = c_some_avg10 697 | 698 | c_some_avg60 = float(c_some_avg60) 699 | if ('c_some_avg60' not in peaks_dict or 700 | peaks_dict['c_some_avg60'] < c_some_avg60): 701 | peaks_dict['c_some_avg60'] = c_some_avg60 702 | 703 | c_some_avg300 = float(c_some_avg300) 704 | if ('c_some_avg300' not in peaks_dict or 705 | peaks_dict['c_some_avg300'] < c_some_avg300): 706 | peaks_dict['c_some_avg300'] = c_some_avg300 707 | 708 | ####################################################################### 709 | 710 | i_some_avg10 = float(i_some_avg10) 711 | if ('i_some_avg10' not in peaks_dict or 712 | peaks_dict['i_some_avg10'] < i_some_avg10): 713 | peaks_dict['i_some_avg10'] = i_some_avg10 714 | 715 | i_some_avg60 = float(i_some_avg60) 716 | if ('i_some_avg60' not in peaks_dict or 717 | peaks_dict['i_some_avg60'] < i_some_avg60): 718 | peaks_dict['i_some_avg60'] = i_some_avg60 719 | 720 | i_some_avg300 = float(i_some_avg300) 721 | if ('i_some_avg300' not in peaks_dict or 722 | peaks_dict['i_some_avg300'] < i_some_avg300): 723 | peaks_dict['i_some_avg300'] = i_some_avg300 724 | 725 | i_full_avg10 = float(i_full_avg10) 726 | if ('i_full_avg10' not in peaks_dict or 727 | peaks_dict['i_full_avg10'] < i_full_avg10): 728 | peaks_dict['i_full_avg10'] = i_full_avg10 729 | 730 | i_full_avg60 = float(i_full_avg60) 731 | if ('i_full_avg60' not in peaks_dict or 732 | peaks_dict['i_full_avg60'] < i_full_avg60): 733 | peaks_dict['i_full_avg60'] = i_full_avg60 734 | 735 | i_full_avg300 = float(i_full_avg300) 736 | if ('i_full_avg300' not in peaks_dict or 737 | peaks_dict['i_full_avg300'] < i_full_avg300): 738 | peaks_dict['i_full_avg300'] = i_full_avg300 739 | 740 | ####################################################################### 741 | 742 | m_some_avg10 = float(m_some_avg10) 743 | if ('m_some_avg10' not in peaks_dict or 744 | peaks_dict['m_some_avg10'] < m_some_avg10): 745 | peaks_dict['m_some_avg10'] = m_some_avg10 746 | 747 | m_some_avg60 = float(m_some_avg60) 748 | if ('m_some_avg60' not in peaks_dict or 749 | peaks_dict['m_some_avg60'] < m_some_avg60): 750 | peaks_dict['m_some_avg60'] = m_some_avg60 751 | 752 | m_some_avg300 = float(m_some_avg300) 753 | if ('m_some_avg300' not in peaks_dict or 754 | peaks_dict['m_some_avg300'] < m_some_avg300): 755 | peaks_dict['m_some_avg300'] = m_some_avg300 756 | 757 | m_full_avg10 = float(m_full_avg10) 758 | if ('m_full_avg10' not in peaks_dict or 759 | peaks_dict['m_full_avg10'] < m_full_avg10): 760 | peaks_dict['m_full_avg10'] = m_full_avg10 761 | 762 | m_full_avg60 = float(m_full_avg60) 763 | if ('m_full_avg60' not in peaks_dict or 764 | peaks_dict['m_full_avg60'] < m_full_avg60): 765 | peaks_dict['m_full_avg60'] = m_full_avg60 766 | 767 | m_full_avg300 = float(m_full_avg300) 768 | if ('m_full_avg300' not in peaks_dict or 769 | peaks_dict['m_full_avg300'] < m_full_avg300): 770 | peaks_dict['m_full_avg300'] = m_full_avg300 771 | 772 | stdout.flush() 773 | sleep(interval) 774 | 775 | 776 | print_head_2() 777 | 778 | try: 779 | 780 | total_cs0 = psi_file_cpu_to_total(cpu_file) 781 | total_is0, total_if0 = psi_file_mem_to_total(io_file) 782 | total_ms0, total_mf0 = psi_file_mem_to_total(memory_file) 783 | monotonic0 = monotonic() 784 | stdout.flush() 785 | sleep(interval) 786 | 787 | except TypeError: 788 | stdout.flush() 789 | sleep(interval) 790 | 791 | TT = 10000 792 | 793 | while True: 794 | 795 | try: 796 | 797 | total_cs1 = psi_file_cpu_to_total(cpu_file) 798 | total_is1, total_if1 = psi_file_mem_to_total(io_file) 799 | total_ms1, total_mf1 = psi_file_mem_to_total(memory_file) 800 | monotonic1 = monotonic() 801 | dm = monotonic1 - monotonic0 802 | 803 | if dm > abnormal_interval and dm - interval > abnormal_inaccuracy: 804 | log('WARNING: abnormal interval ({} sec), metrics may be prov' 805 | 'ided incorrect'.format(round(dm, 3))) 806 | 807 | monotonic0 = monotonic1 808 | 809 | except TypeError: 810 | stdout.flush() 811 | sleep(interval) 812 | continue 813 | 814 | dtotal_cs = total_cs1 - total_cs0 815 | avg_cs = dtotal_cs / dm / TT 816 | if 'avg_cs' not in peaks_dict or peaks_dict['avg_cs'] < avg_cs: 817 | peaks_dict['avg_cs'] = avg_cs 818 | total_cs0 = total_cs1 819 | 820 | dtotal_is = total_is1 - total_is0 821 | avg_is = dtotal_is / dm / TT 822 | if 'avg_is' not in peaks_dict or peaks_dict['avg_is'] < avg_is: 823 | peaks_dict['avg_is'] = avg_is 824 | total_is0 = total_is1 825 | 826 | dtotal_if = total_if1 - total_if0 827 | avg_if = dtotal_if / dm / TT 828 | if 'avg_if' not in peaks_dict or peaks_dict['avg_if'] < avg_if: 829 | peaks_dict['avg_if'] = avg_if 830 | total_if0 = total_if1 831 | 832 | dtotal_ms = total_ms1 - total_ms0 833 | avg_ms = dtotal_ms / dm / TT 834 | if 'avg_ms' not in peaks_dict or peaks_dict['avg_ms'] < avg_ms: 835 | peaks_dict['avg_ms'] = avg_ms 836 | total_ms0 = total_ms1 837 | 838 | dtotal_mf = total_mf1 - total_mf0 839 | avg_mf = dtotal_mf / dm / TT 840 | if 'avg_mf' not in peaks_dict or peaks_dict['avg_mf'] < avg_mf: 841 | peaks_dict['avg_mf'] = avg_mf 842 | total_mf0 = total_mf1 843 | 844 | log('{:>5} | {:>5} {:>5} | {:>5} {:>5} | {}'.format( 845 | 846 | round(avg_cs, 1), 847 | 848 | round(avg_is, 1), 849 | round(avg_if, 1), 850 | 851 | round(avg_ms, 1), 852 | round(avg_mf, 1), 853 | 854 | round(dm, 2) 855 | )) 856 | 857 | stdout.flush() 858 | sleep(interval) 859 | -------------------------------------------------------------------------------- /systemd/nohang-desktop.service.in: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Sophisticated low memory handler 3 | Documentation=man:nohang(8) https://github.com/hakavlad/nohang 4 | Conflicts=nohang.service 5 | After=sysinit.target 6 | 7 | [Service] 8 | ExecStart=:TARGET_SBINDIR:/nohang --monitor --config :TARGET_SYSCONFDIR:/nohang/nohang-desktop.conf 9 | Slice=hostcritical.slice 10 | SyslogIdentifier=nohang-desktop 11 | KillMode=mixed 12 | Restart=always 13 | RestartSec=0 14 | 15 | CPUSchedulingResetOnFork=true 16 | RestrictRealtime=yes 17 | 18 | TasksMax=25 19 | MemoryMax=100M 20 | MemorySwapMax=100M 21 | 22 | UMask=0027 23 | ProtectSystem=strict 24 | ReadWritePaths=/var/log 25 | InaccessiblePaths=/home /root 26 | ProtectKernelTunables=true 27 | ProtectKernelModules=true 28 | ProtectControlGroups=true 29 | ProtectHostname=true 30 | MemoryDenyWriteExecute=yes 31 | RestrictNamespaces=yes 32 | LockPersonality=yes 33 | PrivateTmp=true 34 | DeviceAllow=/dev/kmsg rw 35 | DevicePolicy=closed 36 | 37 | # Capabilities whitelist: 38 | # CAP_KILL is required to send signals 39 | # CAP_IPC_LOCK is required to mlockall() 40 | # CAP_SYS_PTRACE is required to check /proc/[pid]/exe realpathes 41 | # CAP_DAC_READ_SEARCH is required to read /proc/[pid]/environ files 42 | # CAP_DAC_OVERRIDE fixes #94 43 | # CAP_DAC_READ_SEARCH CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID CAP_SYS_RESOURCE 44 | # are required to send GUI notifications 45 | # CAP_SYSLOG is required to check /dev/kmsg for OOM events 46 | 47 | CapabilityBoundingSet=CAP_KILL CAP_IPC_LOCK CAP_SYS_PTRACE \ 48 | CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID \ 49 | CAP_SYS_RESOURCE CAP_SYSLOG 50 | 51 | # `PrivateNetwork=true` breaks GUI notifications on oldstable distros 52 | # (Debian 8, CentOS 7, Linux Mint 18). On modern distros you can set 53 | # PrivateNetwork=true for security reasons. 54 | #PrivateNetwork=true 55 | 56 | # Set realtime CPU scheduling policy if you want 57 | #CPUSchedulingPolicy=rr 58 | #CPUSchedulingPriority=1 59 | 60 | [Install] 61 | WantedBy=multi-user.target 62 | -------------------------------------------------------------------------------- /systemd/nohang.service.in: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Sophisticated low memory handler 3 | Documentation=man:nohang(8) https://github.com/hakavlad/nohang 4 | Conflicts=nohang-desktop.service 5 | After=sysinit.target 6 | 7 | [Service] 8 | ExecStart=:TARGET_SBINDIR:/nohang --monitor --config :TARGET_SYSCONFDIR:/nohang/nohang.conf 9 | Slice=hostcritical.slice 10 | SyslogIdentifier=nohang 11 | KillMode=mixed 12 | Restart=always 13 | RestartSec=0 14 | 15 | CPUSchedulingResetOnFork=true 16 | RestrictRealtime=yes 17 | 18 | TasksMax=25 19 | MemoryMax=100M 20 | MemorySwapMax=100M 21 | 22 | UMask=0027 23 | ProtectSystem=strict 24 | ReadWritePaths=/var/log 25 | InaccessiblePaths=/home /root 26 | ProtectKernelTunables=true 27 | ProtectKernelModules=true 28 | ProtectControlGroups=true 29 | ProtectHostname=true 30 | MemoryDenyWriteExecute=yes 31 | RestrictNamespaces=yes 32 | LockPersonality=yes 33 | PrivateTmp=true 34 | DeviceAllow=/dev/kmsg rw 35 | DevicePolicy=closed 36 | 37 | # Capabilities whitelist: 38 | # CAP_KILL is required to send signals 39 | # CAP_IPC_LOCK is required to mlockall() 40 | # CAP_SYS_PTRACE is required to check /proc/[pid]/exe realpathes 41 | # CAP_DAC_READ_SEARCH is required to read /proc/[pid]/environ files 42 | # CAP_DAC_OVERRIDE fixes #94 43 | # CAP_DAC_READ_SEARCH CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID CAP_SYS_RESOURCE 44 | # are required to send GUI notifications 45 | # CAP_SYSLOG is required to check /dev/kmsg for OOM events 46 | 47 | CapabilityBoundingSet=CAP_KILL CAP_IPC_LOCK CAP_SYS_PTRACE \ 48 | CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID \ 49 | CAP_SYS_RESOURCE CAP_SYSLOG 50 | 51 | # `PrivateNetwork=true` breaks GUI notifications on oldstable distros 52 | # (Debian 8, CentOS 7, Linux Mint 18). On modern distros you can set 53 | # PrivateNetwork=true for security reasons. 54 | #PrivateNetwork=true 55 | 56 | # Set realtime CPU scheduling policy if you want 57 | #CPUSchedulingPolicy=rr 58 | #CPUSchedulingPriority=1 59 | 60 | [Install] 61 | WantedBy=multi-user.target 62 | --------------------------------------------------------------------------------