├── .github
└── workflows
│ └── codeql-analysis.yml
├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── LICENSE
├── Makefile
├── README.md
├── conf
├── logrotate.d
│ └── nohang
└── nohang
│ ├── nohang-desktop.conf.in
│ ├── nohang.conf.in
│ └── test.conf
├── deb
├── DEBIAN
│ ├── conffiles
│ ├── control
│ └── postinst
└── build.sh
├── docs
├── FAQ.ru.md
├── nohang.manpage.md
├── oom-sort.manpage.md
├── psi-top.manpage.md
└── psi2log.manpage.md
├── man
├── nohang.8
├── oom-sort.1
├── psi-top.1
└── psi2log.1
├── openrc
├── nohang-desktop.in
└── nohang.in
├── src
├── nohang
├── oom-sort
├── psi-top
└── psi2log
└── systemd
├── nohang-desktop.service.in
└── nohang.service.in
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
1 | # For most projects, this workflow file will not need changing; you simply need
2 | # to commit it to your repository.
3 | #
4 | # You may wish to alter this file to override the set of languages analyzed,
5 | # or to provide custom queries or build logic.
6 | #
7 | # ******** NOTE ********
8 | # We have attempted to detect the languages in your repository. Please check
9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 |
14 | on:
15 | push:
16 | branches: [ master ]
17 | pull_request:
18 | # The branches below must be a subset of the branches above
19 | branches: [ master ]
20 | schedule:
21 | - cron: '19 19 * * 2'
22 |
23 | jobs:
24 | analyze:
25 | name: Analyze
26 | runs-on: ubuntu-latest
27 |
28 | strategy:
29 | fail-fast: false
30 | matrix:
31 | language: [ 'python' ]
32 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
33 | # Learn more:
34 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
35 |
36 | steps:
37 | - name: Checkout repository
38 | uses: actions/checkout@v2
39 |
40 | # Initializes the CodeQL tools for scanning.
41 | - name: Initialize CodeQL
42 | uses: github/codeql-action/init@v1
43 | with:
44 | languages: ${{ matrix.language }}
45 | # If you wish to specify custom queries, you can do so here or in a config file.
46 | # By default, queries listed here will override any specified in a config file.
47 | # Prefix the list here with "+" to use these queries and those in the config file.
48 | # queries: ./path/to/local/query, your-org/your-repo/queries@main
49 |
50 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
51 | # If this step fails, then you should remove it and run the build manually (see below)
52 | - name: Autobuild
53 | uses: github/codeql-action/autobuild@v1
54 |
55 | # ℹ️ Command-line programs to run using the OS shell.
56 | # 📚 https://git.io/JvXDl
57 |
58 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
59 | # and modify them (or add more) to build your code if your project
60 | # uses a compiled language
61 |
62 | #- run: |
63 | # make bootstrap
64 | # make release
65 |
66 | - name: Perform CodeQL Analysis
67 | uses: github/codeql-action/analyze@v1
68 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *.cover
46 | .hypothesis/
47 | .pytest_cache/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 | db.sqlite3
57 |
58 | # Flask stuff:
59 | instance/
60 | .webassets-cache
61 |
62 | # Scrapy stuff:
63 | .scrapy
64 |
65 | # Sphinx documentation
66 | docs/_build/
67 |
68 | # PyBuilder
69 | target/
70 |
71 | # Jupyter Notebook
72 | .ipynb_checkpoints
73 |
74 | # pyenv
75 | .python-version
76 |
77 | # celery beat schedule file
78 | celerybeat-schedule
79 |
80 | # SageMath parsed files
81 | *.sage.py
82 |
83 | # Environments
84 | .env
85 | .venv
86 | env/
87 | venv/
88 | ENV/
89 | env.bak/
90 | venv.bak/
91 |
92 | # Spyder project settings
93 | .spyderproject
94 | .spyproject
95 |
96 | # Rope project settings
97 | .ropeproject
98 |
99 | # mkdocs documentation
100 | /site
101 |
102 | # mypy
103 | .mypy_cache/
104 |
105 | # Kate
106 | .kate-swp
107 |
108 | # deb
109 | /deb/package/
110 | /deb/package.deb
111 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | dist: bionic
2 |
3 | language: python
4 |
5 | sudo: required
6 |
7 | script:
8 | - sudo make install
9 | - sudo systemctl enable --now nohang.service
10 | - sudo systemctl stop nohang.service
11 | - sudo systemctl enable --now nohang-desktop.service
12 | - sudo systemctl stop nohang-desktop.service
13 |
14 | - oom-sort -h
15 | - oom-sort
16 |
17 | - nohang -h
18 | - nohang -v
19 |
20 | - nohang --check --config /usr/local/etc/nohang/nohang.conf
21 | - nohang --check --config /usr/local/etc/nohang/nohang-desktop.conf
22 | - nohang --check --config conf/nohang/test.conf
23 |
24 | - sudo nohang --config /usr/local/etc/nohang/nohang.conf --tasks
25 | - sudo nohang --config /usr/local/etc/nohang/nohang-desktop.conf --tasks
26 |
27 | - /bin/sleep 60 &
28 | - sudo bash -c "nohang --monitor --config conf/nohang/test.conf & tail /dev/zero & sleep 30 && pkill python3"
29 | - sudo cat /var/log/nohang/nohang.log
30 |
31 | - sudo make uninstall
32 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | This changelog is outdated. It will be updated later.
4 |
5 | ## [Unreleased]
6 |
7 | - Added new CLI options:
8 | - -v, --version
9 | - -m, --memload
10 | - --monitor
11 | - --tasks
12 | - --check-config
13 | - Possible process crashes are fixed:
14 | - Fixed crash at startup due to `UnicodeDecodeError` on some systems
15 | - Handled `UnicodeDecodeError` if victim name consists of many unicode characters ([rfjakob/earlyoom#110](https://github.com/rfjakob/earlyoom/issues/110))
16 | - Fixed process crash before performing corrective actions if Python 3.4 or lower are used to interpret nohang
17 | - Improve output:
18 | - Display `oom_score`, `oom_score_adj`, `Ancestry`, `EUID`, `State`, `VmSize`, `RssAnon`, `RssFile`, `RssShmem`, `CGroup_v1`, `CGroup_v2`, `Realpath`, `Cmdline` and `Lifetime` of the victim in corrective action reports
19 | - Added memory report interval
20 | - Added delta memory info (the rate of change of available memory)
21 | - Print statistics on corrective actions after each corrective action
22 | - Added ability to print a process table before each corrective action
23 | - Added the ability to log into a separate file
24 | - Improved GUI warnings:
25 | - Reduced the idle time of the daemon in the process of launching a notification
26 | - All notify-send calls are made using the `nohang_notify_helper` script, in which all timeouts are handled (not anymore: nohang_notify_helper has been removed)
27 | - Native python implementation of `env` search without running `ps` to notify all users if nohang started with UID=0.
28 | - Improved modifing badness via matching with regular expressions:
29 | - Added the ability to set many different `badness_adj` for processes depending on the matching `Name`, `CGroup_v1`, `CGroup_v2`, `cmdline`, `realpath`, `environ` and `EUID` with the specified regular expressions ([issue #11](https://github.com/hakavlad/nohang/issues/11))
30 | - Fix: replace `re.fullmatch()` by `re.search()`
31 | - Reduced memory usage:
32 | - Reduced memory usage and startup time (using `sys.argv` instead of `argparse`)
33 | - Reduced memory usage with `mlockall()` using `MCL_ONFAULT` ([rfjakob/earlyoom#112](https://github.com/rfjakob/earlyoom/issues/112))
34 | - Lock all memory by default using mlockall()
35 | - Added new tools:
36 | - `oom-sort`
37 | - `psi-top`
38 | - `psi2log`
39 | - Improve poll rate algorithm
40 | - Fixed Makefile for installation on CentOS 7 (remove gzip `-k` option).
41 | - Added `max_post_sigterm_victim_lifetime` option: send SIGKILL to the victim if it doesn't respond to SIGTERM for a certain time
42 | - Added `post_kill_exe` option (the ability to run any command after killing a victim)
43 | - Added `warning_exe` option (the ability to run any command instead of GUI low memory warnings)
44 | - Added `victim_cache_time` option
45 | - Improved victim search algorithm (do it ~30% faster) ([rfjakob/earlyoom#114](https://github.com/rfjakob/earlyoom/issues/114))
46 | - Improved limiting `oom_score_adj`: now it can works with UID != 0
47 | - Fixed conf parsing: use of `line.partition('=')` instead of `line.split('=')`
48 | - Removed self-defense options from the config, use systemd unit scheduling instead
49 | - Added the ability to send any signal instead of SIGTERM for processes with certain names
50 | - Added support for `PSI`
51 | - Recheck memory levels after finding a victim to prevent killing innocent victims in some cases ([issue #20](https://github.com/hakavlad/nohang/issues/20))
52 | - Now one corrective action to one victim can be applied only once.
53 | - Ignoring zram by default, checking for this has become optional.
54 | - Improved user input validation
55 | - Improved documentation
56 | - Handle signals (SIGTERM, SIGINT, SIGQUIT, SIGHUP), print total stat by corrective actions at exit.
57 |
58 | ## [0.1] - 2018-11-23
59 |
60 | [unreleased]: https://github.com/hakavlad/nohang/compare/v0.1...HEAD
61 | [0.1]: https://github.com/hakavlad/nohang/releases/tag/v0.1
62 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Alexey Avramov
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | DESTDIR ?=
2 | PREFIX ?= /usr/local
3 | SYSCONFDIR ?= /usr/local/etc
4 | SYSTEMDUNITDIR ?= /usr/local/lib/systemd/system
5 |
6 | BINDIR ?= $(PREFIX)/bin
7 | SBINDIR ?= $(PREFIX)/sbin
8 | DATADIR ?= $(PREFIX)/share
9 | DOCDIR ?= $(DATADIR)/doc/nohang
10 | MANDIR ?= $(DATADIR)/man
11 |
12 | PANDOC := $(shell command -v pandoc 2> /dev/null)
13 |
14 | all:
15 | @ echo "Use: make install, make install-openrc, make uninstall"
16 |
17 | update-manpages:
18 |
19 | ifdef PANDOC
20 | pandoc docs/nohang.manpage.md -s -t man > man/nohang.8
21 | pandoc docs/oom-sort.manpage.md -s -t man > man/oom-sort.1
22 | pandoc docs/psi2log.manpage.md -s -t man > man/psi2log.1
23 | pandoc docs/psi-top.manpage.md -s -t man > man/psi-top.1
24 | else
25 | @echo "pandoc is not installed, skipping manpages generation"
26 | endif
27 |
28 | base:
29 | install -p -d $(DESTDIR)$(SBINDIR)
30 | install -p -m0755 src/nohang $(DESTDIR)$(SBINDIR)/nohang
31 |
32 | install -p -d $(DESTDIR)$(BINDIR)
33 | install -p -m0755 src/oom-sort $(DESTDIR)$(BINDIR)/oom-sort
34 | install -p -m0755 src/psi-top $(DESTDIR)$(BINDIR)/psi-top
35 | install -p -m0755 src/psi2log $(DESTDIR)$(BINDIR)/psi2log
36 |
37 | install -p -d $(DESTDIR)$(SYSCONFDIR)/nohang
38 |
39 | sed "s|:TARGET_DATADIR:|$(DATADIR)|" \
40 | conf/nohang/nohang.conf.in > nohang.conf
41 |
42 | sed "s|:TARGET_DATADIR:|$(DATADIR)|" \
43 | conf/nohang/nohang-desktop.conf.in > nohang-desktop.conf
44 |
45 | install -p -m0644 nohang.conf $(DESTDIR)$(SYSCONFDIR)/nohang/nohang.conf
46 | install -p -m0644 nohang-desktop.conf $(DESTDIR)$(SYSCONFDIR)/nohang/nohang-desktop.conf
47 |
48 | install -p -d $(DESTDIR)$(DATADIR)/nohang
49 |
50 | install -p -m0644 nohang.conf $(DESTDIR)$(DATADIR)/nohang/nohang.conf
51 | install -p -m0644 nohang-desktop.conf $(DESTDIR)$(DATADIR)/nohang/nohang-desktop.conf
52 |
53 | -git describe --tags --long --dirty > version
54 | install -p -m0644 version $(DESTDIR)$(DATADIR)/nohang/version
55 |
56 | rm -fv nohang.conf
57 | rm -fv nohang-desktop.conf
58 | rm -fv version
59 |
60 | install -p -d $(DESTDIR)/etc/logrotate.d
61 | install -p -m0644 conf/logrotate.d/nohang $(DESTDIR)/etc/logrotate.d/nohang
62 |
63 | install -p -d $(DESTDIR)$(MANDIR)/man1
64 | gzip -9cn man/oom-sort.1 > $(DESTDIR)$(MANDIR)/man1/oom-sort.1.gz
65 | gzip -9cn man/psi-top.1 > $(DESTDIR)$(MANDIR)/man1/psi-top.1.gz
66 | gzip -9cn man/psi2log.1 > $(DESTDIR)$(MANDIR)/man1/psi2log.1.gz
67 |
68 | install -p -d $(DESTDIR)$(MANDIR)/man8
69 |
70 | sed "s|:SYSCONFDIR:|$(SYSCONFDIR)|g; s|:DATADIR:|$(DATADIR)|g" \
71 | man/nohang.8 > nohang.8
72 |
73 | gzip -9cn nohang.8 > $(DESTDIR)$(MANDIR)/man8/nohang.8.gz
74 | rm -fv nohang.8
75 |
76 | install -p -d $(DESTDIR)$(DOCDIR)
77 | install -p -m0644 README.md $(DESTDIR)$(DOCDIR)/README.md
78 | install -p -m0644 CHANGELOG.md $(DESTDIR)$(DOCDIR)/CHANGELOG.md
79 |
80 | units:
81 | install -p -d $(DESTDIR)$(SYSTEMDUNITDIR)
82 |
83 | sed "s|:TARGET_SBINDIR:|$(SBINDIR)|; s|:TARGET_SYSCONFDIR:|$(SYSCONFDIR)|" \
84 | systemd/nohang.service.in > nohang.service
85 |
86 | sed "s|:TARGET_SBINDIR:|$(SBINDIR)|; s|:TARGET_SYSCONFDIR:|$(SYSCONFDIR)|" \
87 | systemd/nohang-desktop.service.in > nohang-desktop.service
88 |
89 | install -p -m0644 nohang.service $(DESTDIR)$(SYSTEMDUNITDIR)/nohang.service
90 | install -p -m0644 nohang-desktop.service $(DESTDIR)$(SYSTEMDUNITDIR)/nohang-desktop.service
91 |
92 | rm -fv nohang.service
93 | rm -fv nohang-desktop.service
94 |
95 | chcon:
96 | chcon -t systemd_unit_file_t $(DESTDIR)$(SYSTEMDUNITDIR)/nohang.service || :
97 | chcon -t systemd_unit_file_t $(DESTDIR)$(SYSTEMDUNITDIR)/nohang-desktop.service || :
98 |
99 | daemon-reload:
100 | systemctl daemon-reload || :
101 |
102 | build_deb: base units
103 |
104 | reinstall-deb:
105 | set -v
106 | deb/build.sh
107 | sudo apt install --reinstall ./deb/package.deb
108 |
109 | install: base units chcon daemon-reload
110 | # This is fine.
111 |
112 | install-openrc: base
113 | install -p -d $(DESTDIR)$(SYSCONFDIR)/init.d
114 | sed "s|:TARGET_SBINDIR:|$(SBINDIR)|; s|:TARGET_SYSCONFDIR:|$(SYSCONFDIR)|" \
115 | openrc/nohang.in > openrc/nohang
116 | sed "s|:TARGET_SBINDIR:|$(SBINDIR)|; s|:TARGET_SYSCONFDIR:|$(SYSCONFDIR)|" \
117 | openrc/nohang-desktop.in > openrc/nohang-desktop
118 | install -p -m0775 openrc/nohang $(DESTDIR)$(SYSCONFDIR)/init.d/nohang
119 | install -p -m0775 openrc/nohang-desktop $(DESTDIR)$(SYSCONFDIR)/init.d/nohang-desktop
120 | rm -fv openrc/nohang
121 | rm -fv openrc/nohang-desktop
122 |
123 | uninstall-base:
124 | rm -fv $(DESTDIR)$(SBINDIR)/nohang
125 | rm -fv $(DESTDIR)$(BINDIR)/oom-sort
126 | rm -fv $(DESTDIR)$(BINDIR)/psi-top
127 | rm -fv $(DESTDIR)$(BINDIR)/psi2log
128 |
129 | rm -fv $(DESTDIR)$(MANDIR)/man1/oom-sort.1.gz
130 | rm -fv $(DESTDIR)$(MANDIR)/man1/psi-top.1.gz
131 | rm -fv $(DESTDIR)$(MANDIR)/man1/psi2log.1.gz
132 |
133 | rm -fv $(DESTDIR)$(MANDIR)/man8/nohang.8.gz
134 |
135 | rm -fvr $(DESTDIR)$/etc/logrotate.d/nohang
136 | rm -fvr $(DESTDIR)$(DOCDIR)/
137 | rm -fvr $(DESTDIR)/var/log/nohang/
138 | rm -fvr $(DESTDIR)$(DATADIR)/nohang/
139 | rm -fvr $(DESTDIR)$(SYSCONFDIR)/nohang/
140 |
141 | uninstall-units:
142 | systemctl stop nohang.service || :
143 | systemctl stop nohang-desktop.service || :
144 | systemctl disable nohang.service || :
145 | systemctl disable nohang-desktop.service || :
146 |
147 | rm -fv $(DESTDIR)$(SYSTEMDUNITDIR)/nohang.service
148 | rm -fv $(DESTDIR)$(SYSTEMDUNITDIR)/nohang-desktop.service
149 |
150 | uninstall-openrc:
151 | rc-service nohang-desktop stop || :
152 | rc-service nohang stop || :
153 |
154 | rm -fv $(DESTDIR)$(SYSCONFDIR)/init.d/nohang
155 | rm -fv $(DESTDIR)$(SYSCONFDIR)/init.d/nohang-desktop
156 |
157 | uninstall: uninstall-base uninstall-units daemon-reload uninstall-openrc
158 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 |
3 | # nohang
4 |
5 | [](https://travis-ci.org/hakavlad/nohang)
6 | 
7 | [](https://repology.org/project/nohang/versions)
8 |
9 | `nohang` package provides a highly configurable daemon for Linux which is able to correctly prevent [out of memory](https://en.wikipedia.org/wiki/Out_of_memory) (OOM) and keep system responsiveness in low memory conditions.
10 |
11 | The package also includes additional diagnostic tools (`oom-sort`, `psi2log`, `psi-top`).
12 |
13 | ## What is the problem?
14 |
15 | OOM conditions may cause [freezes](https://en.wikipedia.org/wiki/Hang_(computing)), [livelocks](https://en.wikipedia.org/wiki/Deadlock#Livelock), drop [caches](https://en.wikipedia.org/wiki/Page_cache) and processes to be killed (via sending [SIGKILL](https://en.wikipedia.org/wiki/Signal_(IPC)#SIGKILL)) instead of trying to terminate them correctly (via sending [SIGTERM](https://en.wikipedia.org/wiki/Signal_(IPC)#SIGTERM) or takes other corrective action). Some applications may crash if it's impossible to allocate memory.
16 |
17 | Here are the statements of some users:
18 |
19 | > "How do I prevent Linux from freezing when out of memory?
20 | Today I (accidentally) ran some program on my Linux box that quickly used a lot of memory. My system froze, became unresponsive and thus I was unable to kill the offender.
21 | How can I prevent this in the future? Can't it at least keep a responsive core or something running?"
22 |
23 | — [serverfault](https://serverfault.com/questions/390623/how-do-i-prevent-linux-from-freezing-when-out-of-memory)
24 |
25 | > "With or without swap it still freezes before the OOM killer gets run automatically. This is really a kernel bug that should be fixed (i.e. run OOM killer earlier, before dropping all disk cache). Unfortunately kernel developers and a lot of other folk fail to see the problem. Common suggestions such as disable/enable swap, buy more RAM, run less processes, set limits etc. do not address the underlying problem that the kernel's low memory handling sucks camel's balls."
26 |
27 | — [serverfault](https://serverfault.com/questions/390623/how-do-i-prevent-linux-from-freezing-when-out-of-memory#comment417508_390625)
28 |
29 | > "The traditional Linux OOM killer works fine in some cases, but in others it kicks in too late, resulting in the system entering a [livelock](https://en.wikipedia.org/wiki/Deadlock#Livelock) for an indeterminate period."
30 |
31 | — [engineering.fb.com](https://engineering.fb.com/production-engineering/oomd/)
32 |
33 | Also look at these discussions:
34 | - Why are low memory conditions handled so badly? [[r/linux](https://www.reddit.com/r/linux/comments/56r4xj/why_are_low_memory_conditions_handled_so_badly/)]
35 | - Memory management "more effective" on Windows than Linux? (in preventing total system lockup) [[r/linux](https://www.reddit.com/r/linux/comments/aqd9mh/memory_management_more_effective_on_windows_than/)]
36 | - Let's talk about the elephant in the room - the Linux kernel's inability to gracefully handle low memory pressure [[original LKML post](https://lkml.org/lkml/2019/8/4/15) | [r/linux](https://www.reddit.com/r/linux/comments/cmg48b/lets_talk_about_the_elephant_in_the_room_the/) | [Hacker News](https://news.ycombinator.com/item?id=20620545) | [slashdot](https://linux.slashdot.org/story/19/08/06/1839206/linux-performs-poorly-in-low-ram--memory-pressure-situations-on-the-desktop) | [phoronix](https://www.phoronix.com/forums/forum/phoronix/general-discussion/1118164-yes-linux-does-bad-in-low-ram-memory-pressure-situations-on-the-desktop) | [opennet.ru](https://www.opennet.ru/opennews/art.shtml?num=51231) | [linux.org.ru](https://www.linux.org.ru/forum/talks/15151526)]
37 |
38 | ## Solution
39 |
40 | Use one of the userspace OOM killers:
41 | - [earlyoom](https://github.com/rfjakob/earlyoom): This is a simple, stable and tiny OOM prevention daemon written in C (the best choice for emedded and old servers). It has a minimum dependencies and can work with oldest kernels. It is enabled by default on Fedora 32 Workstation (and F33 KDE).
42 | - [oomd](https://github.com/facebookincubator/oomd): This is a userspace OOM killer for linux systems written in C++ and developed by Facebook. This is the best choice for use in large data centers. It needs Linux 4.20+.
43 | - [systemd-oomd](https://man7.org/linux/man-pages/man8/systemd-oomd.service.8.html): Provided by systemd as `systemd-oomd.service` that uses cgroups-v2 and pressure stall information (PSI) to monitor and take action on processes before an OOM occurs in kernel space. It's used by default on [desktop versions of Fedora 34](https://fedoraproject.org/wiki/Changes/EnableSystemdOomd).
44 | - [low-memory-monitor](https://gitlab.freedesktop.org/hadess/low-memory-monitor/): There's a [project announcement](http://www.hadess.net/2019/08/low-memory-monitor-new-project.html).
45 | - [psi-monitor](https://github.com/endlessm/eos-boot-helper/tree/master/psi-monitor): It's used by default on [Endless OS](https://endlessos.com/).
46 | - `nohang`: nohang is earlyoom on steroids and has many useful features, see below. Maybe this is a good choice for modern desktops and servers if you need fine-tuning. Previously it was used by default on [Garuda Linux](https://garudalinux.org/).
47 |
48 | Use these tools to improve responsiveness during heavy swapping:
49 | - MGLRU patchset is merged in Linux 6.1. Setting `min_ttl_ms` > 50 can help you.
50 | - [le9-patch](https://github.com/hakavlad/le9-patch): [PATCH] mm: Protect clean file pages under memory pressure to prevent thrashing, avoid high latency and prevent livelock in near-OOM conditions. It's kernel-side solution that can fix the OOM killer behavior.
51 | - [prelockd](https://github.com/hakavlad/prelockd): Lock executables and shared libraries in memory to improve system responsiveness under low-memory conditions.
52 | - [memavaild](https://github.com/hakavlad/memavaild): Keep amount of available memory by evicting memory of selected cgroups into swap space.
53 | - [uresourced](https://gitlab.freedesktop.org/benzea/uresourced): This daemon will give resource allocations to active graphical users. It's [enabled by default](https://fedoraproject.org/wiki/Changes/Reserve_resources_for_active_user_WS) on Fedora 33 Workstation.
54 |
55 | Of course, you can also [download more RAM](https://downloadmoreram.com/), tune [virtual memory](https://www.kernel.org/doc/Documentation/sysctl/vm.txt), use [zram](https://www.kernel.org/doc/Documentation/blockdev/zram.txt)/[zswap](https://www.kernel.org/doc/Documentation/vm/zswap.txt) and use [limits](https://www.freedesktop.org/software/systemd/man/systemd.resource-control.html) for cgroups.
56 |
57 | ## Features
58 |
59 | - Sending the SIGTERM signal is default corrective action. If the victim does not respond to SIGTERM, with a further drop in the level of memory it gets SIGKILL;
60 | - Customizing victim selection: impact on the badness of processes via matching their names, cgroups, exe realpathes, environs, cmdlines and euids with specified regular expressions;
61 | - Customizing corrective actions: if the name or control group of the victim matches a certain regex pattern, you can run any command instead of sending the SIGTERM signal (the default corrective action) to the victim. For example:
62 | - `systemctl restart foo`;
63 | - `kill -INT $PID` (you can override the signal sent to the victim, $PID will be replaced by the victim's PID).
64 | - GUI notifications:
65 | - Notification of corrective actions taken and displaying the name and PID of the victim;
66 | - Low memory warnings.
67 | - [zram](https://www.kernel.org/doc/Documentation/blockdev/zram.txt) support (`mem_used_total` as a trigger);
68 | - [PSI](https://lwn.net/Articles/759658/) ([pressure stall information](https://facebookmicrosites.github.io/psi/)) support;
69 | - Optional checking kernel messages for OOM events;
70 | - Easy setup with configuration files ([nohang.conf](https://github.com/hakavlad/nohang/blob/master/conf/nohang/nohang.conf.in), [nohang-desktop.conf](https://github.com/hakavlad/nohang/blob/master/conf/nohang/nohang-desktop.conf.in)).
71 |
72 | ## Demo
73 |
74 | `nohang` prevents Out Of Memory with GUI notifications:
75 |
76 | - [https://youtu.be/ChTNu9m7uMU](https://youtu.be/ChTNu9m7uMU) – just old demo without swap space.
77 | - [https://youtu.be/UCwZS5uNLu0](https://youtu.be/UCwZS5uNLu0) – running multiple fast memory hogs at the same time without swap space.
78 | - [https://youtu.be/PLVWgNrVNlc](https://youtu.be/PLVWgNrVNlc) – opening multiple chromium tabs with 2.3 GiB memory and 1.8 GiB swap space on zram.
79 |
80 | ## Requirements
81 |
82 | For basic usage:
83 | - `Linux` (>= 3.14, since `MemAvailable` appeared in `/proc/meminfo`)
84 | - `Python` (>= 3.3)
85 |
86 | To respond to `PSI` metrics (optional):
87 | - `Linux` (>= 4.20) with `CONFIG_PSI=y`
88 |
89 | To show GUI notifications (optional):
90 | - [notification server](https://wiki.archlinux.org/index.php/Desktop_notifications#Notification_servers) (most of desktop environments use their own implementations)
91 | - `libnotify` (Arch Linux, Fedora, openSUSE) or `libnotify-bin` (Debian GNU/Linux, Ubuntu)
92 | - `sudo` if nohang started with UID=0.
93 |
94 | ## Memory and CPU usage
95 |
96 | - VmRSS is about 10–14 MiB instead of the settings, about 10–11 MiB by default (with Python <= 3.8), about 16–17 MiB with Python 3.9.
97 | - CPU usage depends on the level of available memory and monitoring intensity.
98 |
99 | ## Warnings
100 |
101 | - the daemon runs with super-user privileges and has full access to all private memory of all processes and sensitive user data;
102 | - the daemon does not forbid you to shoot yourself in the foot: with some settings, unwanted killings of processes can occur;
103 | - the daemon is not a panacea: there are no universal settings that reliably protect against all types of threats.
104 |
105 | ## Known problems
106 |
107 | - The documentation is terrible.
108 | - The ZFS ARC cache is memory-reclaimable, like the Linux buffer cache. However, in contrast to the buffer cache, it currently does not count to MemAvailable (see [openzfs/zfs#10255](https://github.com/openzfs/zfs/issues/10255)). See also https://github.com/rfjakob/earlyoom/pull/191 and https://github.com/hakavlad/nohang/issues/89.
109 | - Linux kernels without `CONFIG_CGROUP_CPUACCT=y` ([linux-ck](https://wiki.archlinux.org/index.php/Linux-ck), for example) provide incorrect PSI metrics, see [issue](https://github.com/hakavlad/nohang/issues/25#issuecomment-643716504).
110 |
111 | ## nohang vs nohang-desktop
112 |
113 | `nohang` comes with two configs: `nohang.conf` and `nohang-desktop.conf`. `nohang` comes with two systemd service unit files: `nohang.service` and `nohang-desktop.service`. Choose one.
114 |
115 | - `nohang.conf` provides vanilla default settings without PSI checking enabled, without any badness correction and without GUI notifications enabled.
116 | - `nohang-desktop.conf` provides default settings optimized for desktop usage.
117 |
118 | ## How to install
119 |
120 | #### To install on [Fedora](https://src.fedoraproject.org/rpms/nohang/):
121 |
122 | Orphaned for 6+ weeks, not available.
123 |
124 | #### To install on RHEL 7 and RHEL 8:
125 |
126 | nohang is avaliable in [EPEL repos](https://fedoraproject.org/wiki/EPEL).
127 | ```bash
128 | sudo yum install nohang
129 | sudo systemctl enable nohang.service
130 | sudo systemctl start nohang.service
131 | ```
132 | To enable PSI on RHEL 8 pass `psi=1` to kernel boot cmdline.
133 |
134 | #### For Arch Linux there's an [AUR package](https://aur.archlinux.org/packages/nohang-git/)
135 |
136 | Use your favorite [AUR helper](https://wiki.archlinux.org/index.php/AUR_helpers). For example,
137 | ```bash
138 | yay -S nohang-git
139 | sudo systemctl enable --now nohang-desktop.service
140 | ```
141 |
142 | #### To install on Ubuntu 20.04/20.10
143 |
144 | To install from [PPA](https://launchpad.net/~oibaf/+archive/ubuntu/test/):
145 | ```bash
146 | sudo add-apt-repository ppa:oibaf/test
147 | sudo apt update
148 | sudo apt install nohang
149 | sudo systemctl enable --now nohang-desktop.service
150 | ```
151 |
152 | #### To install on Debian and Ubuntu-based systems:
153 |
154 | Outdated and buggy nohang v0.1 release was packaged for [Debian 11](https://packages.debian.org/bullseye/source/nohang) and [Ubuntu 20.10](https://packages.ubuntu.com/source/groovy/nohang).
155 |
156 | It's easy to build a deb package with the latest git snapshot. Install build dependencies:
157 | ```bash
158 | sudo apt install make fakeroot
159 | ```
160 |
161 | Clone the latest git snapshot and run the build script to build the package:
162 | ```bash
163 | git clone https://github.com/hakavlad/nohang.git && cd nohang
164 | deb/build.sh
165 | ```
166 |
167 | Install the package:
168 | ```bash
169 | sudo apt install --reinstall ./deb/package.deb
170 | ```
171 |
172 | Start and enable `nohang.service` or `nohang-desktop.service` after installing the package:
173 | ```bash
174 | sudo systemctl enable --now nohang-desktop.service
175 | ```
176 |
177 | #### To install on Gentoo and derivatives (e.g. Funtoo):
178 |
179 | Add the [eph kit](https://git.sr.ht/~happy_shredder/eph_kit) overlay, for example using layman or as a local repository.
180 | Then update your repos:
181 |
182 | ```bash
183 | sudo layman -S # if added via layman
184 | sudo emerge --sync # local repo on Gentoo
185 | sudo ego sync # local repo on Funtoo
186 | ```
187 |
188 | Install:
189 |
190 | ```bash
191 | sudo emerge -a nohang
192 | ```
193 |
194 | Start the service:
195 |
196 | ```bash
197 | sudo rc-service nohang-desktop start
198 | ```
199 |
200 | Optionally add to startup:
201 |
202 | ```bash
203 | sudo rc-update add nohang-desktop default
204 | ```
205 |
206 | #### To install the latest version on any distro:
207 | ```bash
208 | git clone https://github.com/hakavlad/nohang.git && cd nohang
209 | sudo make install
210 | ```
211 |
212 | Config files will be located in `/usr/local/etc/nohang/`. To enable and start unit without GUI notifications:
213 | ```bash
214 | sudo systemctl enable --now nohang.service
215 | ```
216 |
217 | To enable and start unit with GUI notifications:
218 | ```bash
219 | sudo systemctl enable --now nohang-desktop.service
220 | ```
221 |
222 | On systems with OpenRC:
223 | ```bash
224 | sudo make install-openrc
225 | ```
226 |
227 | To uninstall:
228 | ```bash
229 | sudo make uninstall
230 | ```
231 |
232 | ## Command line options
233 |
234 | ```
235 | ./nohang -h
236 | usage: nohang [-h|--help] [-v|--version] [-m|--memload]
237 | [-c|--config CONFIG] [--check] [--monitor] [--tasks]
238 |
239 | optional arguments:
240 | -h, --help show this help message and exit
241 | -v, --version show version of installed package and exit
242 | -m, --memload consume memory until 40 MiB (MemAvailable + SwapFree)
243 | remain free, and terminate the process
244 | -c CONFIG, --config CONFIG
245 | path to the config file. This should only be used
246 | with one of the following options:
247 | --monitor, --tasks, --check
248 | --check check and show the configuration and exit. This should
249 | only be used with -c/--config CONFIG option
250 | --monitor start monitoring. This should only be used with
251 | -c/--config CONFIG option
252 | --tasks show tasks state and exit. This should only be used
253 | with -c/--config CONFIG option
254 | ```
255 |
256 | ## How to configure
257 |
258 | The program can be configured by editing the config file. The configuration includes the following sections:
259 |
260 | 0. Checking kernel messages for OOM events;
261 | 1. Common zram settings;
262 | 2. Common PSI settings;
263 | 3. Poll rate;
264 | 4. Warnings and notifications;
265 | 5. Soft threshold;
266 | 6. Hard threshold;
267 | 7. Customize victim selection;
268 | 8. Customize soft corrective actions;
269 | 9. Misc settings;
270 | 10. Verbosity, debug, logging.
271 |
272 | Just read the description of the parameters and edit the values. Please restart the daemon to apply the changes.
273 |
274 | ## How to test nohang
275 |
276 | - The safest way is to run `nohang --memload`. This causes memory consumption, and the process will exits before OOM occurs.
277 | - Another way is to run `tail /dev/zero`. This causes fast memory comsumption and causes OOM at the end.
278 |
279 | If testing occurs while `nohang` is running, these processes should be terminated before OOM occurs.
280 |
281 | ## Tasks state
282 |
283 | Run `sudo nohang -c/--config CONFIG --tasks` to see the table of prosesses with their badness values, oom_scores, names, UIDs etc.
284 |
285 |
286 | Output example
287 |
288 | ```
289 | Config: /etc/nohang/nohang.conf
290 | ###################################################################################################################
291 | # PID PPID badness oom_score oom_score_adj eUID S VmSize VmRSS VmSwap Name CGroup
292 | #------- ------- ------- --------- ------------- ---------- - ------ ----- ------ --------------- --------
293 | # 336 1 1 1 0 0 S 85 25 0 systemd-journal /system.slice/systemd-journald.service
294 | # 383 1 0 0 -1000 0 S 46 5 0 systemd-udevd /system.slice/systemd-udevd.service
295 | # 526 2238 7 7 0 1000 S 840 96 0 kate /user.slice/user-1000.slice/session-7.scope
296 | # 650 1 3 3 0 1000 S 760 50 0 kate /user.slice/user-1000.slice/session-7.scope
297 | # 731 1 0 0 0 100 S 126 4 0 systemd-timesyn /system.slice/systemd-timesyncd.service
298 | # 756 1 0 0 0 105 S 181 3 0 rtkit-daemon /system.slice/rtkit-daemon.service
299 | # 759 1 0 0 0 0 S 277 7 0 accounts-daemon /system.slice/accounts-daemon.service
300 | # 761 1 0 0 0 0 S 244 3 0 rsyslogd /system.slice/rsyslog.service
301 | # 764 1 0 0 -900 108 S 45 5 0 dbus-daemon /system.slice/dbus.service
302 | # 805 1 0 0 0 0 S 46 5 0 systemd-logind /system.slice/systemd-logind.service
303 | # 806 1 0 0 0 0 S 35 3 0 irqbalance /system.slice/irqbalance.service
304 | # 813 1 0 0 0 0 S 29 3 0 cron /system.slice/cron.service
305 | # 814 1 11 11 0 0 S 176 160 0 memlockd /system.slice/memlockd.service
306 | # 815 1 0 0 -10 0 S 32 9 0 python3 /fork.slice/fork-bomb.slice/fork-bomb-killer.slice/fork-bomb-killer.service
307 | # 823 1 0 0 0 0 S 25 4 0 smartd /system.slice/smartd.service
308 | # 826 1 0 0 0 113 S 46 3 0 avahi-daemon /system.slice/avahi-daemon.service
309 | # 850 826 0 0 0 113 S 46 0 0 avahi-daemon /system.slice/avahi-daemon.service
310 | # 868 1 0 0 0 0 S 281 8 0 polkitd /system.slice/polkit.service
311 | # 903 1 1 1 0 0 S 4094 16 0 stunnel4 /system.slice/stunnel4.service
312 | # 940 1 0 0 -600 0 S 39 10 0 python3 /nohang.slice/nohang.service
313 | # 1014 1 0 0 0 13 S 22 2 0 obfs-local /system.slice/obfs-local.service
314 | # 1015 1 0 0 0 1000 S 36 4 0 ss-local /system.slice/ss-local.service
315 | # 1023 1 0 0 0 116 S 33 2 0 dnscrypt-proxy /system.slice/dnscrypt-proxy.service
316 | # 1029 1 1 1 0 119 S 4236 16 0 privoxy /system.slice/privoxy.service
317 | # 1035 1 0 0 0 0 S 355 6 0 lightdm /system.slice/lightdm.service
318 | # 1066 1 0 0 0 0 S 45 7 0 wpa_supplicant /system.slice/wpa_supplicant.service
319 | # 1178 1 0 0 0 0 S 14 2 0 agetty /system.slice/system-getty.slice/getty@tty1.service
320 | # 1294 1 0 0 -1000 0 S 4 1 0 watchdog /system.slice/watchdog.service
321 | # 1632 1 1 1 0 1000 S 1391 22 0 pulseaudio /user.slice/user-1000.slice/session-2.scope
322 | # 1689 1632 0 0 0 1000 S 125 5 0 gconf-helper /user.slice/user-1000.slice/session-2.scope
323 | # 1711 1 0 0 0 0 S 367 8 0 udisksd /system.slice/udisks2.service
324 | # 1819 1 0 0 0 0 S 304 8 0 upowerd /system.slice/upower.service
325 | # 1879 1 0 0 0 1000 S 64 7 0 systemd /user.slice/user-1000.slice/user@1000.service/init.scope
326 | # 1880 1879 0 0 0 1000 S 229 2 0 (sd-pam) /user.slice/user-1000.slice/user@1000.service/init.scope
327 | # 1888 1 0 0 0 0 S 14 2 0 agetty /system.slice/system-getty.slice/getty@tty2.service
328 | # 1889 1 0 0 0 0 S 14 2 0 agetty /system.slice/system-getty.slice/getty@tty3.service
329 | # 1890 1 0 0 0 0 S 14 2 0 agetty /system.slice/system-getty.slice/getty@tty4.service
330 | # 1891 1 0 0 0 0 S 14 2 0 agetty /system.slice/system-getty.slice/getty@tty5.service
331 | # 1892 1 0 0 0 0 S 14 2 0 agetty /system.slice/system-getty.slice/getty@tty6.service
332 | # 1893 1035 14 14 0 0 R 623 208 0 Xorg /system.slice/lightdm.service
333 | # 1904 1 0 0 0 111 S 64 7 0 systemd /user.slice/user-111.slice/user@111.service/init.scope
334 | # 1905 1904 0 0 0 111 S 229 2 0 (sd-pam) /user.slice/user-111.slice/user@111.service/init.scope
335 | # 1916 1904 0 0 0 111 S 44 3 0 dbus-daemon /user.slice/user-111.slice/user@111.service/dbus.service
336 | # 1920 1 0 0 0 111 S 215 5 0 at-spi2-registr /user.slice/user-111.slice/session-c2.scope
337 | # 1922 1904 0 0 0 111 S 278 6 0 gvfsd /user.slice/user-111.slice/user@111.service/gvfs-daemon.service
338 | # 1935 1035 0 0 0 0 S 238 6 0 lightdm /user.slice/user-1000.slice/session-7.scope
339 | # 1942 1 0 0 0 1000 S 210 9 0 gnome-keyring-d /user.slice/user-1000.slice/session-7.scope
340 | # 1944 1935 1 1 0 1000 S 411 21 0 mate-session /user.slice/user-1000.slice/session-7.scope
341 | # 1952 1879 0 0 0 1000 S 45 5 0 dbus-daemon /user.slice/user-1000.slice/user@1000.service/dbus.service
342 | # 1981 1944 0 0 0 1000 S 11 0 0 ssh-agent /user.slice/user-1000.slice/session-7.scope
343 | # 1984 1879 0 0 0 1000 S 278 6 0 gvfsd /user.slice/user-1000.slice/user@1000.service/gvfs-daemon.service
344 | # 1990 1879 0 0 0 1000 S 341 5 0 at-spi-bus-laun /user.slice/user-1000.slice/user@1000.service/at-spi-dbus-bus.service
345 | # 1995 1990 0 0 0 1000 S 44 4 0 dbus-daemon /user.slice/user-1000.slice/user@1000.service/at-spi-dbus-bus.service
346 | # 1997 1879 0 0 0 1000 S 215 5 0 at-spi2-registr /user.slice/user-1000.slice/user@1000.service/at-spi-dbus-bus.service
347 | # 2000 1879 0 0 0 1000 S 184 5 0 dconf-service /user.slice/user-1000.slice/user@1000.service/dbus.service
348 | # 2009 1944 2 2 0 1000 S 1308 35 0 mate-settings-d /user.slice/user-1000.slice/session-7.scope
349 | # 2013 1944 2 2 0 1000 S 436 32 0 marco /user.slice/user-1000.slice/session-7.scope
350 | # 2024 1944 4 4 0 1000 S 1258 55 0 caja /user.slice/user-1000.slice/session-7.scope
351 | # 2032 1 1 1 0 1000 S 333 18 0 msd-locate-poin /user.slice/user-1000.slice/session-7.scope
352 | # 2033 1879 0 0 0 1000 S 348 11 0 gvfs-udisks2-vo /user.slice/user-1000.slice/user@1000.service/gvfs-udisks2-volume-monitor.service
353 | # 2036 1944 1 1 0 1000 S 331 17 0 polkit-mate-aut /user.slice/user-1000.slice/session-7.scope
354 | # 2038 1944 5 5 0 1000 S 682 78 0 mate-panel /user.slice/user-1000.slice/session-7.scope
355 | # 2041 1944 2 2 0 1000 S 514 31 0 nm-applet /user.slice/user-1000.slice/session-7.scope
356 | # 2046 1944 1 1 0 1000 S 495 25 0 mate-power-mana /user.slice/user-1000.slice/session-7.scope
357 | # 2047 1944 2 2 0 1000 S 692 32 0 mate-volume-con /user.slice/user-1000.slice/session-7.scope
358 | # 2049 1944 3 3 0 1000 S 548 44 0 mate-screensave /user.slice/user-1000.slice/session-7.scope
359 | # 2059 1879 0 0 0 1000 S 263 5 0 gvfs-goa-volume /user.slice/user-1000.slice/user@1000.service/gvfs-goa-volume-monitor.service
360 | # 2076 1879 0 0 0 1000 S 352 7 0 gvfsd-trash /user.slice/user-1000.slice/user@1000.service/gvfs-daemon.service
361 | # 2077 1879 0 0 0 1000 S 362 7 0 gvfs-afc-volume /user.slice/user-1000.slice/user@1000.service/gvfs-afc-volume-monitor.service
362 | # 2087 1879 0 0 0 1000 S 263 5 0 gvfs-mtp-volume /user.slice/user-1000.slice/user@1000.service/gvfs-mtp-volume-monitor.service
363 | # 2093 1879 0 0 0 1000 S 275 6 0 gvfs-gphoto2-vo /user.slice/user-1000.slice/user@1000.service/gvfs-gphoto2-volume-monitor.service
364 | # 2106 1879 3 3 0 1000 S 544 42 0 wnck-applet /user.slice/user-1000.slice/user@1000.service/dbus.service
365 | # 2108 1879 1 1 0 1000 S 396 21 0 notification-ar /user.slice/user-1000.slice/user@1000.service/dbus.service
366 | # 2112 1879 1 1 0 1000 S 499 25 0 mate-sensors-ap /user.slice/user-1000.slice/user@1000.service/dbus.service
367 | # 2113 1879 1 1 0 1000 S 390 21 0 mate-brightness /user.slice/user-1000.slice/user@1000.service/dbus.service
368 | # 2114 1879 1 1 0 1000 S 534 22 0 mate-multiload- /user.slice/user-1000.slice/user@1000.service/dbus.service
369 | # 2118 1879 2 2 0 1000 S 547 29 0 clock-applet /user.slice/user-1000.slice/user@1000.service/dbus.service
370 | # 2152 1879 1 1 0 1000 S 218 22 0 gvfsd-metadata /user.slice/user-1000.slice/user@1000.service/gvfs-metadata.service
371 | # 2206 1 3 3 0 110 S 106 48 0 tor /system.slice/system-tor.slice/tor@default.service
372 | # 2229 1 3 3 0 1000 S 999 42 0 kactivitymanage /user.slice/user-1000.slice/session-7.scope
373 | # 2238 1 0 0 0 1000 S 150 9 0 kdeinit5 /user.slice/user-1000.slice/session-7.scope
374 | # 2239 2238 3 3 0 1000 S 648 41 0 klauncher /user.slice/user-1000.slice/session-7.scope
375 | # 3959 1 1 1 0 0 S 615 18 0 NetworkManager /system.slice/NetworkManager.service
376 | # 3977 3959 0 0 0 0 S 20 4 0 dhclient /system.slice/NetworkManager.service
377 | # 5626 1879 0 0 0 1000 S 355 7 0 gvfsd-network /user.slice/user-1000.slice/user@1000.service/gvfs-daemon.service
378 | # 5637 1879 1 1 0 1000 S 623 14 0 gvfsd-smb-brows /user.slice/user-1000.slice/user@1000.service/gvfs-daemon.service
379 | # 6296 1879 0 0 0 1000 S 435 7 0 gvfsd-dnssd /user.slice/user-1000.slice/user@1000.service/gvfs-daemon.service
380 | # 11129 1879 3 3 0 1000 S 597 42 0 kded5 /user.slice/user-1000.slice/user@1000.service/dbus.service
381 | # 11136 1879 2 2 0 1000 S 639 39 0 kuiserver5 /user.slice/user-1000.slice/user@1000.service/dbus.service
382 | # 11703 1879 3 3 0 1000 S 500 45 0 mate-system-mon /user.slice/user-1000.slice/user@1000.service/dbus.service
383 | # 16798 1879 0 0 0 1000 S 346 10 0 gvfsd-http /user.slice/user-1000.slice/user@1000.service/gvfs-daemon.service
384 | # 18133 1 3 3 0 1000 S 760 49 0 kate /user.slice/user-1000.slice/session-7.scope
385 | # 18144 2038 1 1 0 1000 S 301 23 0 lxterminal /user.slice/user-1000.slice/session-7.scope
386 | # 18147 18144 0 0 0 1000 S 14 2 0 gnome-pty-helpe /user.slice/user-1000.slice/session-7.scope
387 | # 18148 18144 1 1 0 1000 S 42 26 0 bash /user.slice/user-1000.slice/session-7.scope
388 | # 18242 2238 1 1 0 1000 S 194 14 0 file.so /user.slice/user-1000.slice/session-7.scope
389 | # 18246 18148 0 0 0 0 S 54 4 0 sudo /user.slice/user-1000.slice/session-7.scope
390 | # 19003 1 0 0 0 0 S 310 12 0 packagekitd /system.slice/packagekit.service
391 | # 26993 2038 91 91 0 1000 S 3935 1256 0 firefox-esr /user.slice/user-1000.slice/session-7.scope
392 | # 27275 26993 121 121 0 1000 S 3957 1684 0 Web Content /user.slice/user-1000.slice/session-7.scope
393 | # 30374 1 1 1 0 1000 S 167 14 0 VBoxXPCOMIPCD /user.slice/user-1000.slice/session-7.scope
394 | # 30380 1 2 2 0 1000 S 958 27 0 VBoxSVC /user.slice/user-1000.slice/session-7.scope
395 | # 30549 30380 86 86 0 1000 S 5332 1192 0 VirtualBox /user.slice/user-1000.slice/session-7.scope
396 | # 30875 1 1 1 0 1000 S 345 26 0 leafpad /user.slice/user-1000.slice/session-7.scope
397 | # 32689 1 7 7 0 1000 S 896 99 0 dolphin /user.slice/user-1000.slice/session-7.scope
398 | ###################################################################################################################
399 | Process with highest badness (found in 55 ms):
400 | PID: 27275, Name: Web Content, badness: 121
401 | ```
402 |
403 |
404 |
405 | ## Logging
406 |
407 | To view the latest entries in the log (for systemd users):
408 | ```bash
409 | sudo journalctl -eu nohang.service
410 |
411 | #### or
412 |
413 | sudo journalctl -eu nohang-desktop.service
414 | ```
415 |
416 | You can also enable `separate_log` in the config to logging in `/var/log/nohang/nohang.log`.
417 |
418 | ## oom-sort
419 |
420 | `oom-sort` is an additional diagnostic tool that will be installed with `nohang` package. It sorts the processes in descending order of their `oom_score` and also displays `oom_score_adj`, `Uid`, `Pid`, `Name`, `VmRSS`, `VmSwap` and optionally `cmdline`. Run `oom-sort --help` for more info. Man page: [oom-sort.manpage.md](docs/oom-sort.manpage.md).
421 |
422 | Usage:
423 |
424 | ```bash
425 | oom-sort
426 | ```
427 |
428 |
429 | Output example
430 |
431 | ```
432 | oom_score oom_score_adj UID PID Name VmRSS VmSwap cmdline
433 | --------- ------------- ---- ----- --------------- ------- -------- -------
434 | 23 0 0 964 Xorg 58 M 22 M /usr/libexec/Xorg -background none :0 vt01 -nolisten tcp -novtswitch -auth /var/run/lxdm/lxdm-:0.auth
435 | 13 0 1000 1365 pcmanfm 38 M 10 M pcmanfm --desktop --profile LXDE
436 | 10 0 1000 1408 dnfdragora-upda 9 M 27 M /usr/bin/python3 /bin/dnfdragora-updater
437 | 5 0 0 822 firewalld 0 M 19 M /usr/bin/python3 /usr/sbin/firewalld --nofork --nopid
438 | 5 0 1000 1364 lxpanel 18 M 2 M lxpanel --profile LXDE
439 | 5 0 1000 1685 nm-applet 6 M 12 M nm-applet
440 | 5 0 1000 1862 lxterminal 16 M 2 M lxterminal
441 | 4 0 996 890 polkitd 8 M 6 M /usr/lib/polkit-1/polkitd --no-debug
442 | 4 0 1000 1703 pnmixer 6 M 11 M pnmixer
443 | 3 0 0 649 systemd-journal 10 M 1 M /usr/lib/systemd/systemd-journald
444 | 3 0 1000 1360 openbox 9 M 2 M openbox --config-file /home/user/.config/openbox/lxde-rc.xml
445 | 3 0 1000 1363 notification-da 3 M 10 M /usr/libexec/notification-daemon
446 | 2 0 1000 1744 clipit 5 M 3 M clipit
447 | 2 0 1000 2619 python3 9 M 0 M python3 /bin/oom-sort
448 | 1 0 0 809 rsyslogd 3 M 3 M /usr/sbin/rsyslogd -n
449 | 1 0 0 825 udisksd 2 M 2 M /usr/libexec/udisks2/udisksd
450 | 1 0 0 873 sssd_nss 4 M 1 M /usr/libexec/sssd/sssd_nss --uid 0 --gid 0 --logger=files
451 | 1 0 0 876 systemd-logind 2 M 2 M /usr/lib/systemd/systemd-logind
452 | 1 0 0 907 abrt-dump-journ 2 M 1 M /usr/bin/abrt-dump-journal-oops -fxtD
453 | 1 0 0 920 NetworkManager 3 M 2 M /usr/sbin/NetworkManager --no-daemon
454 | 1 0 1000 1115 systemd 4 M 1 M /usr/lib/systemd/systemd --user
455 | 1 0 1000 1118 (sd-pam) 0 M 5 M (sd-pam)
456 | 1 0 1000 1366 xscreensaver 5 M 0 M xscreensaver -no-splash
457 | 1 0 1000 1851 gvfsd-trash 3 M 1 M /usr/libexec/gvfsd-trash --spawner :1.6 /org/gtk/gvfs/exec_spaw/0
458 | 1 0 1000 1969 gvfsd-metadata 6 M 0 M /usr/libexec/gvfsd-metadata
459 | 1 0 1000 2262 bash 5 M 0 M bash
460 | 0 -1000 0 675 systemd-udevd 0 M 4 M /usr/lib/systemd/systemd-udevd
461 | 0 -1000 0 787 auditd 0 M 1 M /sbin/auditd
462 | 0 0 0 807 ModemManager 0 M 1 M /usr/sbin/ModemManager
463 | 0 0 0 808 smartd 0 M 1 M /usr/sbin/smartd -n -q never
464 | 0 0 0 810 alsactl 0 M 0 M /usr/sbin/alsactl -s -n 19 -c -E ALSA_CONFIG_PATH=/etc/alsa/alsactl.conf --initfile=/lib/alsa/init/00main rdaemon
465 | 0 0 0 811 mcelog 0 M 0 M /usr/sbin/mcelog --ignorenodev --daemon --foreground
466 | 0 0 172 813 rtkit-daemon 0 M 0 M /usr/libexec/rtkit-daemon
467 | 0 0 0 814 VBoxService 0 M 1 M /usr/sbin/VBoxService -f
468 | 0 0 0 817 rngd 0 M 1 M /sbin/rngd -f
469 | 0 -900 81 818 dbus-daemon 3 M 0 M /usr/bin/dbus-daemon --system --address=systemd: --nofork --nopidfile --systemd-activation --syslog-only
470 | 0 0 0 823 irqbalance 0 M 0 M /usr/sbin/irqbalance --foreground
471 | 0 0 70 824 avahi-daemon 0 M 0 M avahi-daemon: running [linux.local]
472 | 0 0 0 826 sssd 0 M 2 M /usr/sbin/sssd -i --logger=files
473 | 0 0 995 838 chronyd 1 M 0 M /usr/sbin/chronyd
474 | 0 0 0 849 gssproxy 0 M 1 M /usr/sbin/gssproxy -D
475 | 0 0 0 866 abrtd 0 M 2 M /usr/sbin/abrtd -d -s
476 | 0 0 70 870 avahi-daemon 0 M 0 M avahi-daemon: chroot helper
477 | 0 0 0 871 sssd_be 0 M 2 M /usr/libexec/sssd/sssd_be --domain implicit_files --uid 0 --gid 0 --logger=files
478 | 0 0 0 875 accounts-daemon 0 M 1 M /usr/libexec/accounts-daemon
479 | 0 0 0 906 abrt-dump-journ 1 M 2 M /usr/bin/abrt-dump-journal-core -D -T -f -e
480 | 0 0 0 908 abrt-dump-journ 1 M 2 M /usr/bin/abrt-dump-journal-xorg -fxtD
481 | 0 0 0 950 crond 2 M 1 M /usr/sbin/crond -n
482 | 0 0 0 951 atd 0 M 0 M /usr/sbin/atd -f
483 | 0 0 0 953 lxdm-binary 0 M 0 M /usr/sbin/lxdm-binary
484 | 0 0 0 1060 dhclient 0 M 2 M /sbin/dhclient -d -q -sf /usr/libexec/nm-dhcp-helper -pf /var/run/dhclient-enp0s3.pid -lf /var/lib/NetworkManager/dhclient-939eab05-4796-3792-af24-9f76cf53ca7f-enp0s3.lease -cf /var/lib/NetworkManager/dhclient-enp0s3.conf enp0s3
485 | 0 0 0 1105 lxdm-session 0 M 1 M /usr/libexec/lxdm-session
486 | 0 0 1000 1123 pulseaudio 0 M 3 M /usr/bin/pulseaudio --daemonize=no
487 | 0 0 1000 1124 lxsession 1 M 2 M /usr/bin/lxsession -s LXDE -e LXDE
488 | 0 0 1000 1134 dbus-daemon 2 M 0 M /usr/bin/dbus-daemon --session --address=systemd: --nofork --nopidfile --systemd-activation --syslog-only
489 | 0 0 1000 1215 imsettings-daem 0 M 1 M /usr/libexec/imsettings-daemon
490 | 0 0 1000 1218 gvfsd 3 M 1 M /usr/libexec/gvfsd
491 | 0 0 1000 1223 gvfsd-fuse 0 M 1 M /usr/libexec/gvfsd-fuse /run/user/1000/gvfs -f -o big_writes
492 | 0 0 1000 1309 VBoxClient 0 M 0 M /usr/bin/VBoxClient --display
493 | 0 0 1000 1310 VBoxClient 0 M 0 M /usr/bin/VBoxClient --clipboard
494 | 0 0 1000 1311 VBoxClient 0 M 0 M /usr/bin/VBoxClient --draganddrop
495 | 0 0 1000 1312 VBoxClient 0 M 0 M /usr/bin/VBoxClient --display
496 | 0 0 1000 1313 VBoxClient 1 M 0 M /usr/bin/VBoxClient --clipboard
497 | 0 0 1000 1316 VBoxClient 0 M 0 M /usr/bin/VBoxClient --seamless
498 | 0 0 1000 1318 VBoxClient 0 M 0 M /usr/bin/VBoxClient --seamless
499 | 0 0 1000 1320 VBoxClient 0 M 0 M /usr/bin/VBoxClient --draganddrop
500 | 0 0 1000 1334 ssh-agent 0 M 0 M /usr/bin/ssh-agent /bin/sh -c exec -l bash -c "/usr/bin/startlxde"
501 | 0 0 1000 1362 lxpolkit 0 M 1 M lxpolkit
502 | 0 0 1000 1370 lxclipboard 0 M 1 M lxclipboard
503 | 0 0 1000 1373 ssh-agent 0 M 1 M /usr/bin/ssh-agent -s
504 | 0 0 1000 1485 agent 0 M 1 M /usr/libexec/geoclue-2.0/demos/agent
505 | 0 0 1000 1751 menu-cached 0 M 1 M /usr/libexec/menu-cache/menu-cached /run/user/1000/menu-cached-:0
506 | 0 0 1000 1780 at-spi-bus-laun 0 M 1 M /usr/libexec/at-spi-bus-launcher
507 | 0 0 1000 1786 dbus-daemon 1 M 0 M /usr/bin/dbus-daemon --config-file=/usr/share/defaults/at-spi2/accessibility.conf --nofork --print-address 3
508 | 0 0 1000 1792 at-spi2-registr 1 M 1 M /usr/libexec/at-spi2-registryd --use-gnome-session
509 | 0 0 1000 1840 gvfs-udisks2-vo 0 M 2 M /usr/libexec/gvfs-udisks2-volume-monitor
510 | 0 0 1000 1863 gnome-pty-helpe 1 M 0 M gnome-pty-helper
511 | 0 0 1000 1864 bash 0 M 1 M bash
512 | 0 0 0 1899 sudo 0 M 1 M sudo -i
513 | 0 0 0 1901 bash 0 M 1 M -bash
514 | 0 0 0 1953 oomd_bin 0 M 0 M oomd_bin -f /sys/fs/cgroup/unified
515 | 0 -600 0 2562 python3 10 M 0 M python3 /usr/sbin/nohang --config /etc/nohang/nohang.conf
516 | ```
517 |
518 |
519 | Kthreads, zombies and Pid 1 will not be displayed.
520 |
521 | ## psi-top
522 |
523 | psi-top is script that prints the PSI metrics values for every cgroup. It requires `Linux` >= 4.20 with `CONFIG_PSI=y`. Man page: [psi-top.manpage.md](docs/psi-top.manpage.md).
524 |
525 |
526 | Output example
527 |
528 | ```
529 | $ psi-top
530 | cgroup2 mountpoint: /sys/fs/cgroup
531 | avg10 avg60 avg300 avg10 avg60 avg300 cgroup2
532 | ----- ----- ------ ----- ----- ------ ---------
533 | some 0.00 0.21 1.56 | full 0.00 0.16 1.14 [SYSTEM_WIDE]
534 | some 0.00 0.21 1.56 | full 0.00 0.16 1.14
535 | some 0.00 0.15 1.11 | full 0.00 0.12 0.89 /user.slice
536 | some 45.92 28.77 20.19 | full 45.05 28.17 19.56 /user.slice/user-1000.slice
537 | some 1.44 4.67 9.24 | full 1.44 4.65 9.20 /user.slice/user-1000.slice/user@1000.service
538 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/pulseaudio.service
539 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/gvfs-daemon.service
540 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/dbus.socket
541 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/gvfs-udisks2-volume-monitor.service
542 | some 0.25 1.97 4.05 | full 0.25 1.96 4.03 /user.slice/user-1000.slice/user@1000.service/xfce4-notifyd.service
543 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/init.scope
544 | some 0.00 0.66 1.99 | full 0.00 0.66 1.97 /user.slice/user-1000.slice/user@1000.service/gpg-agent.service
545 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/gvfs-gphoto2-volume-monitor.service
546 | some 0.93 0.75 0.20 | full 0.93 0.75 0.20 /user.slice/user-1000.slice/user@1000.service/at-spi-dbus-bus.service
547 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/gvfs-metadata.service
548 | some 0.00 2.44 6.78 | full 0.00 2.43 6.74 /user.slice/user-1000.slice/user@1000.service/dbus.service
549 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/gvfs-mtp-volume-monitor.service
550 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /user.slice/user-1000.slice/user@1000.service/gvfs-afc-volume-monitor.service
551 | some 44.99 28.30 19.41 | full 44.10 27.70 18.79 /user.slice/user-1000.slice/session-2.scope
552 | some 0.00 0.31 0.53 | full 0.00 0.31 0.53 /init.scope
553 | some 7.25 11.40 13.34 | full 7.23 11.32 13.24 /system.slice
554 | some 0.00 0.01 0.02 | full 0.00 0.01 0.02 /system.slice/systemd-udevd.service
555 | some 0.00 0.58 1.55 | full 0.00 0.58 1.55 /system.slice/cronie.service
556 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/sys-kernel-config.mount
557 | some 0.00 0.22 0.35 | full 0.00 0.22 0.35 /system.slice/polkit.service
558 | some 0.00 0.06 0.20 | full 0.00 0.06 0.20 /system.slice/rtkit-daemon.service
559 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/sys-kernel-debug.mount
560 | some 0.00 0.14 0.62 | full 0.00 0.14 0.62 /system.slice/accounts-daemon.service
561 | some 7.86 11.48 12.56 | full 7.84 11.42 12.51 /system.slice/lightdm.service
562 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/ModemManager.service
563 | some 0.00 1.82 5.47 | full 0.00 1.81 5.43 /system.slice/systemd-journald.service
564 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/dev-mqueue.mount
565 | some 0.00 1.64 4.07 | full 0.00 1.64 4.07 /system.slice/NetworkManager.service
566 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/tmp.mount
567 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/lvm2-lvmetad.service
568 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/dev-disk-by\x2duuid-5d7355c0\x2dc131\x2d40c5\x2d8541\x2d1e04ad7c8b8d.swap
569 | some 0.00 0.09 0.11 | full 0.00 0.09 0.11 /system.slice/upower.service
570 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/udisks2.service
571 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/dev-hugepages.mount
572 | some 0.00 0.27 0.49 | full 0.00 0.27 0.48 /system.slice/dbus.service
573 | some 0.00 0.00 0.00 | full 0.00 0.00 0.00 /system.slice/system-getty.slice
574 | some 0.00 0.12 0.20 | full 0.00 0.12 0.20 /system.slice/avahi-daemon.service
575 | some 0.00 0.18 0.30 | full 0.00 0.18 0.30 /system.slice/systemd-logind.service
576 | ```
577 |
578 |
579 | ## psi2log
580 |
581 | psi2log is a CLI tool that can check and log PSI metrics from specified target. It requires `Linux` >= 4.20 with `CONFIG_PSI=y`. Man page: [psi2log.manpage.md](docs/psi2log.manpage.md).
582 |
583 |
584 | Output example
585 |
586 | ```
587 | $ psi2log
588 | Starting psi2log
589 | target: SYSTEM_WIDE
590 | period: 2
591 | ------------------------------------------------------------------------------------------------------------------
592 | some cpu pressure || some memory pressure | full memory pressure || some io pressure | full io pressure
593 | ---------------------||----------------------|----------------------||----------------------|---------------------
594 | avg10 avg60 avg300 || avg10 avg60 avg300 | avg10 avg60 avg300 || avg10 avg60 avg300 | avg10 avg60 avg300
595 | ------ ------ ------ || ------ ------ ------ | ------ ------ ------ || ------ ------ ------ | ------ ------ ------
596 | 0.13 0.26 0.08 || 3.36 10.31 3.47 | 2.68 7.69 2.56 || 20.24 26.90 8.60 | 18.80 23.16 7.33
597 | 0.11 0.25 0.08 || 2.75 9.97 3.45 | 2.20 7.44 2.54 || 18.38 26.34 8.61 | 17.21 22.73 7.35
598 | 0.09 0.25 0.07 || 2.25 9.65 3.43 | 1.80 7.20 2.52 || 15.05 25.48 8.55 | 14.09 21.99 7.30
599 | 0.07 0.24 0.07 || 1.84 9.33 3.40 | 1.47 6.96 2.51 || 13.05 24.78 8.52 | 12.26 21.40 7.28
600 | ^C
601 | Peak values: avg10 avg60 avg300
602 | ----------- ------ ------ ------
603 | some cpu 0.13 0.26 0.08
604 | ----------- ------ ------ ------
605 | some memory 3.36 10.31 3.47
606 | full memory 2.68 7.69 2.56
607 | ----------- ------ ------ ------
608 | some io 20.24 26.90 8.61
609 | full io 18.80 23.16 7.35
610 | $ psi2log -t /user.slice -l pm.log
611 | Starting psi2log
612 | target: /user.slice
613 | period: 2
614 | log file: pm.log
615 | cgroup2 mountpoint: /sys/fs/cgroup
616 | ------------------------------------------------------------------------------------------------------------------
617 | some cpu pressure || some memory pressure | full memory pressure || some io pressure | full io pressure
618 | ---------------------||----------------------|----------------------||----------------------|---------------------
619 | avg10 avg60 avg300 || avg10 avg60 avg300 | avg10 avg60 avg300 || avg10 avg60 avg300 | avg10 avg60 avg300
620 | ------ ------ ------ || ------ ------ ------ | ------ ------ ------ || ------ ------ ------ | ------ ------ ------
621 | 28.32 11.97 3.03 || 0.00 1.05 1.65 | 0.00 0.85 1.33 || 0.55 7.79 7.21 | 0.54 7.52 6.80
622 | 29.53 12.72 3.25 || 0.00 1.01 1.64 | 0.00 0.82 1.32 || 0.81 7.60 7.17 | 0.44 7.27 6.76
623 | 29.80 13.32 3.44 || 0.00 0.98 1.63 | 0.00 0.79 1.31 || 0.66 7.35 7.12 | 0.36 7.03 6.71
624 | 29.83 13.86 3.62 || 0.00 0.95 1.62 | 0.00 0.77 1.30 || 0.54 7.11 7.08 | 0.30 6.80 6.66
625 | 29.86 14.39 3.80 || 0.00 0.91 1.60 | 0.00 0.74 1.29 || 0.44 6.88 7.03 | 0.24 6.58 6.62
626 | 30.07 14.94 3.99 || 0.00 0.88 1.59 | 0.00 0.72 1.28 || 0.36 6.65 6.98 | 0.20 6.36 6.57
627 | ^C
628 | Peak values: avg10 avg60 avg300
629 | ----------- ------ ------ ------
630 | some cpu 30.07 14.94 3.99
631 | ----------- ------ ------ ------
632 | some memory 0.00 1.05 1.65
633 | full memory 0.00 0.85 1.33
634 | ----------- ------ ------ ------
635 | some io 0.81 7.79 7.21
636 | full io 0.54 7.52 6.80
637 | ```
638 |
639 |
640 | ## Contribution
641 |
642 | - Use cases, feature requests and any questions are [welcome](https://github.com/hakavlad/nohang/issues).
643 | - Pull requests in `dev` branch are welcome.
644 |
645 | ## Documentation
646 |
647 | - [nohang.manpage.md](docs/nohang.manpage.md)
648 | - [oom-sort.manpage.md](docs/oom-sort.manpage.md)
649 | - [psi2log.manpage.md](docs/psi2log.manpage.md)
650 | - [psi-top.manpage.md](docs/psi-top.manpage.md)
651 | - [FAQ.ru.md](docs/FAQ.ru.md)
652 | - [CHANGELOG.md](CHANGELOG.md)
653 |
654 | ## License
655 |
656 | This project is licensed under the terms of the [MIT license](LICENSE).
657 |
--------------------------------------------------------------------------------
/conf/logrotate.d/nohang:
--------------------------------------------------------------------------------
1 | /var/log/nohang/*.log {
2 | missingok
3 | copytruncate
4 | notifempty
5 | size 1M
6 | rotate 5
7 | compress
8 | delaycompress
9 | }
10 |
--------------------------------------------------------------------------------
/conf/nohang/nohang-desktop.conf.in:
--------------------------------------------------------------------------------
1 | ## This is the configuration file of the nohang daemon.
2 |
3 | ## The configuration includes the following sections:
4 | ## 0. Check kernel messages for OOM events
5 | ## 1. Common zram settings
6 | ## 2. Common PSI settings
7 | ## 3. Poll rate
8 | ## 4. Warnings and notifications
9 | ## 5. Soft (SIGTERM) threshold
10 | ## 6. Hard (SIGKILL) threshold
11 | ## 7. Customize victim selection: adjusting badness of processes
12 | ## 8. Customize soft corrective actions
13 | ## 9. Misc settings
14 | ## 10. Verbosity, debug, logging
15 |
16 | ## WARNING!
17 | ## - Lines starting with #, tabs and whitespace characters are comments.
18 | ## - Lines starting with @ contain optional parameters that may be repeated.
19 | ## - All values are case sensitive.
20 | ## - nohang doesn't forbid you to shoot yourself in the foot. Be careful!
21 | ## - Restart the daemon after editing the file to apply the new settings.
22 | ## - You can find the file with default values here: :TARGET_DATADIR:/nohang/nohang.conf
23 |
24 | ## To find config keys descriptions see man(8) nohang
25 |
26 | ###############################################################################
27 |
28 | ## 0. Check kernel messages for OOM events
29 |
30 | # @check_kmsg
31 | ## Type: boolean
32 | ## Comment/uncomment to disable/enable checking kmsg for OOM events
33 |
34 | # @debug_kmsg
35 | ## Type: boolean
36 | ## Comment/uncomment to disable/enable debug checking kmsg
37 |
38 | ###############################################################################
39 |
40 | 1. Common zram settings
41 |
42 | Key: zram_checking_enabled
43 | Description:
44 | Type: boolean
45 | Valid values: True | False
46 | Default value: False
47 |
48 | zram_checking_enabled = False
49 |
50 | ###############################################################################
51 |
52 | 2. Common PSI settings
53 |
54 | Key: psi_checking_enabled
55 | Description:
56 | Type: boolean
57 | Valid values: True | False
58 | Default value: True
59 |
60 | psi_checking_enabled = True
61 |
62 | Key: psi_path
63 | Description:
64 | Type: string
65 | Valid values: any string
66 | Default value: /proc/pressure/memory
67 |
68 | psi_path = /proc/pressure/memory
69 |
70 | Key: psi_metrics
71 | Description:
72 | Type: string
73 | Valid values: some_avg10, some_avg60, some_avg300,
74 | full_avg10, full_avg60, full_avg300
75 | Default value: full_avg10
76 |
77 | psi_metrics = full_avg10
78 |
79 | Key: psi_excess_duration
80 | Description:
81 | Type: float
82 | Valid values: >= 0
83 | Default value: 30
84 |
85 | psi_excess_duration = 30
86 |
87 | Key: psi_post_action_delay
88 | Description:
89 | Type: float
90 | Valid values: >= 10
91 | Default value: 15
92 |
93 | psi_post_action_delay = 15
94 |
95 | ###############################################################################
96 |
97 | 3. Poll rate
98 |
99 | Key: fill_rate_mem
100 | Description:
101 | Type: float
102 | Valid values: >= 100
103 | Default value: 6000
104 |
105 | fill_rate_mem = 6000
106 |
107 | Key: fill_rate_swap
108 | Description:
109 | Type: float
110 | Valid values: >= 100
111 | Default value: 2000
112 |
113 | fill_rate_swap = 2000
114 |
115 | Key: fill_rate_zram
116 | Description:
117 | Type: float
118 | Valid values: >= 100
119 | Default value: 4000
120 |
121 | fill_rate_zram = 4000
122 |
123 | Key: max_sleep
124 | Description:
125 | Type: float
126 | Valid values: >= 0.01 and >= min_sleep
127 | Default value: 3
128 |
129 | max_sleep = 3
130 |
131 | Key: min_sleep
132 | Description:
133 | Type: float
134 | Valid values: >= 0.01 and <= max_sleep
135 | Default value: 0.1
136 |
137 | min_sleep = 0.1
138 |
139 | ###############################################################################
140 |
141 | 4. Warnings and notifications
142 |
143 | 4.1. GUI notifications after corrective actions
144 |
145 | Key: post_action_gui_notifications
146 | Description:
147 | Type: boolean
148 | Valid values: True | False
149 | Default value: True
150 |
151 | post_action_gui_notifications = True
152 |
153 | Key: hide_corrective_action_type
154 | Description:
155 | Type: boolean
156 | Valid values: True | False
157 | Default value: False
158 |
159 | hide_corrective_action_type = False
160 |
161 | 4.2. Low memory warnings
162 |
163 | Key: low_memory_warnings_enabled
164 | Description:
165 | Type: boolean
166 | Valid values: True | False
167 | Default value: True
168 |
169 | low_memory_warnings_enabled = True
170 |
171 | Key: warning_exe
172 | Description:
173 | Type: string
174 | Valid values: any string
175 | Default value: (empty string)
176 |
177 | warning_exe =
178 |
179 | Key: warning_threshold_min_mem
180 | Description:
181 | Type: float (with % or M)
182 | Valid values: from the range [0; 100] %
183 | Default value: 20 %
184 |
185 | warning_threshold_min_mem = 20 %
186 |
187 | Key: warning_threshold_min_swap
188 | Description:
189 | Type: float (with % or M)
190 | Valid values: [0; 100] % or >= 0 M
191 | Default value: 20 %
192 |
193 | warning_threshold_min_swap = 25 %
194 |
195 | Key: warning_threshold_max_zram
196 | Description:
197 | Type: float (with % or M)
198 | Valid values: from the range [0; 100] %
199 | Default value: 45 %
200 |
201 | warning_threshold_max_zram = 45 %
202 |
203 | Key: warning_threshold_max_psi
204 | Description:
205 | Type: float
206 | Valid values: from the range [0; 100]
207 | Default value: 10
208 |
209 | warning_threshold_max_psi = 10
210 |
211 | Key: min_post_warning_delay
212 | Description:
213 | Type: float
214 | Valid values: >= 1
215 | Default value: 60
216 |
217 | min_post_warning_delay = 60
218 |
219 | Key: env_cache_time
220 | Description:
221 | Type: float
222 | Valid values: >= 0
223 | Default value: 300
224 |
225 | env_cache_time = 300
226 |
227 | ###############################################################################
228 |
229 | 5. Soft threshold (thresholds for sending the SIGTERM signal or
230 | implementing other soft corrective action)
231 |
232 | Key: soft_threshold_min_mem
233 | Description:
234 | Type: float (with % or M)
235 | Valid values: from the range [0; 50] %
236 | Default value: 5 %
237 |
238 | soft_threshold_min_mem = 5 %
239 |
240 | Key: soft_threshold_min_swap
241 | Description:
242 | Type: float (with % or M)
243 | Valid values: [0; 100] % or >= 0 M
244 | Default value: 10 %
245 |
246 | soft_threshold_min_swap = 10 %
247 |
248 | Key: soft_threshold_max_zram
249 | Description:
250 | Type: float (with % or M)
251 | Valid values: from the range [10; 90] %
252 | Default value: 55 %
253 |
254 | soft_threshold_max_zram = 55 %
255 |
256 | Key: soft_threshold_max_psi
257 | Description:
258 | Type: float
259 | Valid values: from the range [5; 100]
260 | Default value: 40
261 |
262 | soft_threshold_max_psi = 40
263 |
264 | ###############################################################################
265 |
266 | 6. Hard threshold (thresholds for sending the SIGKILL signal)
267 |
268 | Key: hard_threshold_min_mem
269 | Description:
270 | Type: float (with % or M)
271 | Valid values: from the range [0; 50] %
272 | Default value: 2 %
273 |
274 | hard_threshold_min_mem = 2 %
275 |
276 | Key: hard_threshold_min_swap
277 | Description:
278 | Type: float (with % or M)
279 | Valid values: [0; 100] % or >= 0 M
280 | Default value: 4 %
281 |
282 | hard_threshold_min_swap = 4 %
283 |
284 | Key: hard_threshold_max_zram
285 | Description:
286 | Type: float (with % or M)
287 | Valid values: from the range [10; 90] %
288 | Default value: 60 %
289 |
290 | hard_threshold_max_zram = 60 %
291 |
292 | Key: hard_threshold_max_psi
293 | Description:
294 | Type: float
295 | Valid values: from the range [5; 100]
296 | Default value: 90
297 |
298 | hard_threshold_max_psi = 90
299 |
300 | ###############################################################################
301 |
302 | 7. Customize victim selection: adjusting badness of processes
303 |
304 | 7.1. Ignore positive oom_score_adj
305 |
306 | Key: ignore_positive_oom_score_adj
307 | Description:
308 | Type: boolean
309 | Valid values: True | False
310 | Default value: False
311 |
312 | ignore_positive_oom_score_adj = False
313 |
314 | 7.2.1. Matching process names with RE patterns change their badness
315 |
316 | Syntax:
317 |
318 | @BADNESS_ADJ_RE_NAME badness_adj /// RE_pattern
319 |
320 | New badness value will be += badness_adj
321 |
322 | It is possible to compare multiple patterns
323 | with different badness_adj values.
324 |
325 | Example:
326 | @BADNESS_ADJ_RE_NAME -500 /// ^sshd$
327 |
328 | Prefer terminating Firefox tabs instead of terminating the entire browser.
329 | (In Chromium and Electron-based apps child processes get oom_score_adj=300
330 | by default.)
331 | @BADNESS_ADJ_RE_NAME 200 /// ^(Web Content|Privileged Cont|file:// Content)$
332 |
333 | @BADNESS_ADJ_RE_NAME -200 /// ^(dnf|yum|packagekitd)$
334 |
335 |
336 | 7.2.2. Matching CGroup_v1-line with RE patterns
337 |
338 | @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/system\.slice/
339 |
340 | @BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$
341 |
342 | @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/
343 |
344 | 7.2.3. Matching CGroup_v2-line with RE patterns
345 |
346 | @BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload
347 |
348 | 7.2.4. Matching eUIDs with RE patterns
349 |
350 | @BADNESS_ADJ_RE_UID -100 /// ^0$
351 |
352 | 7.2.5. Matching /proc/[pid]/exe realpath with RE patterns
353 |
354 | Example:
355 | @BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo$
356 |
357 | Protect X.
358 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/libexec/Xorg|/usr/lib/xorg/Xorg|/usr/lib/Xorg|/usr/bin/X|/usr/bin/Xorg|/usr/bin/Xwayland|/usr/bin/weston|/usr/bin/sway)$
359 |
360 | Protect GNOME.
361 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/gnome-shell|/usr/bin/metacity|/usr/bin/mutter|/usr/lib/gnome-session/gnome-session-binary|/usr/libexec/gnome-session-binary|/usr/libexec/gnome-session-ctl)$
362 |
363 | Protect KDE Plasma.
364 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/plasma-desktop|/usr/bin/plasmashell|/usr/bin/plasma_session|/usr/bin/kwin|/usr/bin/kwin_x11|/usr/bin/kwin_wayland)$
365 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/startplasma-wayland|/usr/lib/x86_64-linux-gnu/libexec/startplasma-waylandsession|/usr/bin/ksmserver)$
366 |
367 | Protect Cinnamon.
368 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/cinnamon|/usr/bin/muffin|/usr/bin/cinnamon-session|/usr/bin/cinnamon-launcher)$
369 |
370 | Protect Xfce.
371 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/xfwm4|/usr/bin/xfce4-session|/usr/bin/xfce4-panel|/usr/bin/xfdesktop)$
372 |
373 | Protect Mate.
374 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/marco|/usr/bin/mate-session|/usr/bin/caja|/usr/bin/mate-panel)$
375 |
376 | Protect LXQt.
377 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/lxqt-panel|/usr/bin/pcmanfm-qt|/usr/bin/lxqt-session)$
378 |
379 | Protect Budgie Desktop.
380 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/budgie-wm|/usr/bin/budgie-panel)$
381 |
382 | Protect other.
383 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/compiz|/usr/bin/openbox|/usr/bin/fluxbox|/usr/bin/awesome|/usr/bin/icewm|/usr/bin/enlightenment|/usr/bin/gala|/usr/bin/wingpanel|/usr/bin/i3)$
384 |
385 | Protect display managers.
386 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/sbin/gdm|/usr/sbin/gdm3|/usr/sbin/sddm|/usr/bin/sddm|/usr/lib/x86_64-linux-gnu/sddm/sddm-helper|/usr/bin/slim|/usr/sbin/lightdm|/usr/libexec/gdm-session-worker|/usr/libexec/gdm-wayland-session|/usr/lib/gdm3/gdm-wayland-session|/usr/lib/gdm3/gdm-session-worker)$
387 | @BADNESS_ADJ_RE_REALPATH -200 /// ^/usr/lib/gdm3/
388 |
389 | Protect systemd-logind.
390 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/lib/systemd/systemd-logind|/usr/lib/systemd/systemd-logind)$
391 |
392 | Protect `systemd --user`.
393 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/lib/systemd/systemd|/usr/lib/systemd/systemd)$
394 |
395 | Protect dbus.
396 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/dbus-daemon|/usr/bin/dbus-run-session|/usr/bin/dbus-broker-launcher|/usr/bin/dbus-broker)$
397 |
398 | Protect package managers and distro installers.
399 | @BADNESS_ADJ_RE_REALPATH -200 /// ^(/usr/bin/calamares|/usr/bin/dpkg|/usr/bin/pacman|/usr/bin/yay|/usr/bin/pamac|/usr/bin/pamac-daemon|/usr/bin/pamac-manager)$
400 |
401 | Prefer stress.
402 | @BADNESS_ADJ_RE_REALPATH 900 /// ^(/usr/bin/stress|/usr/bin/stress-ng)$
403 |
404 |
405 | 7.2.6. Matching /proc/[pid]/cwd realpath with RE patterns
406 |
407 | @BADNESS_ADJ_RE_CWD 200 /// ^/home/
408 |
409 | 7.2.7. Matching cmdlines with RE patterns
410 | WARNING: using this option can greatly slow down the search for a victim
411 | in conditions of heavily swapping.
412 |
413 | Prefer Chromium tabs and Electron-based apps
414 | @BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer
415 |
416 | Prefer Firefox tabs (Web Content and WebExtensions)
417 | @BADNESS_ADJ_RE_CMDLINE 100 /// -appomni
418 |
419 | @BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox
420 |
421 | 7.2.8. Matching environ with RE patterns
422 | WARNING: using this option can greatly slow down the search for a victim
423 | in conditions of heavily swapping.
424 |
425 | @BADNESS_ADJ_RE_ENVIRON 100 /// USER=user
426 |
427 |
428 | Note that you can control badness also via systemd units via
429 | OOMScoreAdjust, see
430 | www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
431 |
432 | ###############################################################################
433 |
434 | 8. Customize soft corrective actions
435 |
436 | Run the command instead of sending a signal with at soft corrective action
437 | if the victim's name or cgroup matches the regular expression.
438 |
439 | Syntax:
440 | KEY REGEXP SEPARATOR COMMAND
441 |
442 | @SOFT_ACTION_RE_NAME ^foo$ /// kill -USR1 $PID
443 | @SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE
444 | @SOFT_ACTION_RE_CGROUP_V2 /foo\.service$ /// systemctl restart $SERVICE
445 |
446 | $PID will be replaced by process PID.
447 | $NAME will be replaced by process name.
448 | $SERVICE will be replaced by .service if it exists (overwise it will be
449 | relpaced by empty line)
450 |
451 | ###############################################################################
452 |
453 | 9. Misc settings
454 |
455 | Key: max_soft_exit_time
456 | Description:
457 | Type: float
458 | Valid values: >= 0.1
459 | Default value: 10
460 |
461 | max_soft_exit_time = 10
462 |
463 | Key: post_kill_exe
464 | Description:
465 | Type: string
466 | Valid values: any string
467 | Default value: (empty string)
468 |
469 | post_kill_exe =
470 |
471 | Key: min_badness
472 | Description:
473 | Type: integer
474 | Valid values: >= 1
475 | Default value: 1
476 |
477 | min_badness = 1
478 |
479 | Key: post_soft_action_delay
480 | Description:
481 | Type: float
482 | Valid values: >= 0.1
483 | Default value: 3
484 |
485 | post_soft_action_delay = 3
486 |
487 | Key: post_zombie_delay
488 | Description:
489 | Type: float
490 | Valid values: >= 0
491 | Default value: 0.1
492 |
493 | post_zombie_delay = 0.1
494 |
495 | Key: victim_cache_time
496 | Description:
497 | Type: float
498 | Valid values: >= 0
499 | Default value: 10
500 |
501 | victim_cache_time = 10
502 |
503 | Key: exe_timeout
504 | Description:
505 | Type: float
506 | Valid values: >= 0.1
507 | Default value: 20
508 |
509 | exe_timeout = 20
510 |
511 | ###############################################################################
512 |
513 | 10. Verbosity, debug, logging
514 |
515 | Key: print_config_at_startup
516 | Description:
517 | Type: boolean
518 | Valid values: True | False
519 | Default value: False
520 |
521 | print_config_at_startup = False
522 |
523 | Key: print_mem_check_results
524 | Description:
525 | Type: boolean
526 | Valid values: True | False
527 | Default value: False
528 |
529 | print_mem_check_results = False
530 |
531 | Key: min_mem_report_interval
532 | Description:
533 | Type: float
534 | Valid values: >= 0
535 | Default value: 60
536 |
537 | min_mem_report_interval = 60
538 |
539 | Key: print_proc_table
540 | Description:
541 | Type: boolean
542 | Valid values: True | False
543 | Default value: False
544 |
545 | print_proc_table = False
546 |
547 | Key: extra_table_info
548 | Description:
549 | WARNING: using "cmdline" or "environ" keys can greatly slow down
550 | the search for a victim in conditions of heavy swapping.
551 | Type: string
552 | Valid values: None, cgroup_v1, cgroup_v2, realpath,
553 | cwd, cmdline, environ
554 | Default value: None
555 |
556 | extra_table_info = None
557 |
558 | Key: print_victim_status
559 | Description:
560 | Type: boolean
561 | Valid values: True | False
562 | Default value: True
563 |
564 | print_victim_status = True
565 |
566 | Key: print_victim_cmdline
567 | Description:
568 | Type: boolean
569 | Valid values: True | False
570 | Default value: False
571 |
572 | print_victim_cmdline = False
573 |
574 | Key: max_victim_ancestry_depth
575 | Description:
576 | Type: integer
577 | Valid values: >= 1
578 | Default value: 3
579 |
580 | max_victim_ancestry_depth = 3
581 |
582 | Key: print_statistics
583 | Description:
584 | Type: boolean
585 | Valid values: True | False
586 | Default value: True
587 |
588 | print_statistics = True
589 |
590 | Key: debug_psi
591 | Description:
592 | Type: boolean
593 | Valid values: True | False
594 | Default value: False
595 |
596 | debug_psi = False
597 |
598 | Key: debug_gui_notifications
599 | Description:
600 | Type: boolean
601 | Valid values: True | False
602 | Default value: False
603 |
604 | debug_gui_notifications = False
605 |
606 | Key: debug_sleep
607 | Description:
608 | Type: boolean
609 | Valid values: True | False
610 | Default value: False
611 |
612 | debug_sleep = False
613 |
614 | Key: debug_threading
615 | Description:
616 | Type: boolean
617 | Valid values: True | False
618 | Default value: False
619 |
620 | debug_threading = False
621 |
622 | Key: separate_log
623 | Description:
624 | Type: boolean
625 | Valid values: True | False
626 | Default value: False
627 |
628 | separate_log = False
629 |
630 | ###############################################################################
631 |
632 | Use cases, feature requests and any questions are welcome:
633 | https://github.com/hakavlad/nohang/issues
634 |
--------------------------------------------------------------------------------
/conf/nohang/nohang.conf.in:
--------------------------------------------------------------------------------
1 | ## This is the configuration file of the nohang daemon.
2 |
3 | ## The configuration includes the following sections:
4 | ## 0. Check kernel messages for OOM events
5 | ## 1. Common zram settings
6 | ## 2. Common PSI settings
7 | ## 3. Poll rate
8 | ## 4. Warnings and notifications
9 | ## 5. Soft (SIGTERM) threshold
10 | ## 6. Hard (SIGKILL) threshold
11 | ## 7. Customize victim selection: adjusting badness of processes
12 | ## 8. Customize soft corrective actions
13 | ## 9. Misc settings
14 | ## 10. Verbosity, debug, logging
15 |
16 | ## WARNING!
17 | ## - Lines starting with #, tabs and whitespace characters are comments.
18 | ## - Lines starting with @ contain optional parameters that may be repeated.
19 | ## - All values are case sensitive.
20 | ## - nohang doesn't forbid you to shoot yourself in the foot. Be careful!
21 | ## - Restart the daemon after editing the file to apply the new settings.
22 | ## - You can find the file with default values here: :TARGET_DATADIR:/nohang/nohang.conf
23 |
24 | ## To find config keys descriptions see man(8) nohang
25 |
26 | ###############################################################################
27 |
28 | ## 0. Check kernel messages for OOM events
29 |
30 | # @check_kmsg
31 | ## Type: boolean
32 | ## Comment/uncomment to disable/enable checking kmsg for OOM events
33 |
34 | # @debug_kmsg
35 | ## Type: boolean
36 | ## Comment/uncomment to disable/enable debug checking kmsg
37 |
38 | ###############################################################################
39 |
40 | ## 1. Common zram settings
41 |
42 | zram_checking_enabled = False
43 | ## Type: boolean, valid values: True | False
44 | ## Default value: False
45 |
46 | ###############################################################################
47 |
48 | ## 2. Common PSI settings
49 |
50 | psi_checking_enabled = False
51 | ## Type: boolean, valid values: True | False
52 | ## Default value: False
53 |
54 | psi_path = /proc/pressure/memory
55 | ## Type: string; valid values: any string
56 | ## Default value: /proc/pressure/memory
57 |
58 | psi_metrics = full_avg10
59 | ## Type: string; valid values: some_avg10, some_avg60, some_avg300,
60 | ## full_avg10, full_avg60, full_avg300
61 | ## Default value: full_avg10
62 |
63 | psi_excess_duration = 30
64 | ## Type: float; valid values: >= 0
65 | ## Default value: 30
66 |
67 | psi_post_action_delay = 15
68 | ## Type: float; valid values: >= 10
69 | ## Default value: 15
70 |
71 | ###############################################################################
72 |
73 | ## 3. Poll rate
74 |
75 | fill_rate_mem = 6000
76 | ## Type: float; valid values: >= 100
77 | ## Default value: 6000
78 |
79 | fill_rate_swap = 2000
80 | ## Type: float; valid values: >= 100
81 | ## Default value: 2000
82 |
83 | fill_rate_zram = 4000
84 | ## Type: float; valid values: >= 100
85 | ## Default value: 4000
86 |
87 | max_sleep = 3
88 | ## Type: float; valid values: >= 0.01 and >= min_sleep
89 | ## Default value: 3
90 |
91 | min_sleep = 0.1
92 | ## Type: float; valid values: >= 0.01 and <= max_sleep
93 | ## Default value: 0.1
94 |
95 | ###############################################################################
96 |
97 | ## 4. Warnings and notifications
98 |
99 | ## 4.1. GUI notifications after corrective actions
100 |
101 | post_action_gui_notifications = False
102 | ## Type: boolean; valid values: True | False
103 | ## Default value: False
104 |
105 | hide_corrective_action_type = False
106 | ## Type: boolean; valid values: True | False
107 | ## Default value: False
108 |
109 |
110 | ## 4.2. Low memory warnings
111 |
112 | low_memory_warnings_enabled = False
113 | ## Type: boolean; valid values: True | False
114 | ## Default value: False
115 |
116 | warning_exe =
117 | ## Type: string; valid values: any string
118 | ## Default value: (empty string)
119 |
120 |
121 | warning_threshold_min_mem = 20 %
122 | ## Type: float (with % or M); valid values: from the range [0; 100] %
123 | ## Default value: 20 %
124 |
125 | warning_threshold_min_swap = 25 %
126 | ## Type: float (with % or M); valid values: [0; 100] % or >= 0 M
127 | ## Default value: 20 %
128 |
129 | warning_threshold_max_zram = 45 %
130 | ## Type: float (with % or M); valid values: from the range [0; 100] %
131 | ## Default value: 45 %
132 |
133 | warning_threshold_max_psi = 10
134 | ## Type: float; valid values: from the range [0; 100]
135 | ## Default value: 10
136 |
137 |
138 | min_post_warning_delay = 60
139 | ## Type: float; valid values: >= 1
140 | ## Default value: 60
141 |
142 | env_cache_time = 300
143 | ## Type: float; valid values: >= 0
144 | ## Default value: 300
145 |
146 | ###############################################################################
147 |
148 | ## 5. Soft threshold (thresholds for sending the SIGTERM signal or
149 | ## implementing other soft corrective action)
150 |
151 | soft_threshold_min_mem = 5 %
152 | ## Type: float (with % or M); valid values: from the range [0; 50] %
153 | ## Default value: 5 %
154 |
155 | soft_threshold_min_swap = 10 %
156 | ## Type: float (with % or M); valid values: [0; 100] % or >= 0 M
157 | ## Default value: 10 %
158 |
159 | soft_threshold_max_zram = 55 %
160 | ## Type: float (with % or M); valid values: from the range [10; 90] %
161 | ## Default value: 55 %
162 |
163 | soft_threshold_max_psi = 40
164 | ## Type: float; valid values: from the range [5; 100]
165 | ## Default value: 40
166 |
167 | ###############################################################################
168 |
169 | ## 6. Hard threshold (thresholds for sending the SIGKILL signal)
170 |
171 | hard_threshold_min_mem = 2 %
172 | ## Type: float (with % or M); valid values: from the range [0; 50] %
173 | ## Default value: 2 %
174 |
175 | hard_threshold_min_swap = 4 %
176 | ## Type: float (with % or M); valid values: [0; 100] % or >= 0 M
177 | ## Default value: 4 %
178 |
179 | hard_threshold_max_zram = 60 %
180 | ## Type: float (with % or M); valid values: from the range [10; 90] %
181 | ## Default value: 60 %
182 |
183 | hard_threshold_max_psi = 90
184 | ## Type: float; valid values: from the range [5; 100]
185 | ## Default value: 90
186 |
187 | ###############################################################################
188 |
189 | ## 7. Customize victim selection: adjusting badness of processes
190 |
191 | ## 7.1. Ignore positive oom_score_adj
192 |
193 | ignore_positive_oom_score_adj = False
194 | ## Type: boolean; valid values: True | False
195 | ## Default value: False
196 |
197 |
198 | ## 7.2. Matching process properties with regular expressions to change their
199 | ## badness.
200 |
201 | ## Syntax:
202 | ## @BADNESS_ADJ_RE_PROPERTY badness_adj /// RE_pattern
203 | ## New badness value will be added to process's badness_adj
204 |
205 | ## It is possible to compare multiple patterns
206 | ## with different badness_adj values.
207 |
208 |
209 | ## 7.2.1. Matching process names with RE patterns to change their badness
210 | ## Example:
211 | # @BADNESS_ADJ_RE_NAME 200 /// ^Web Content$
212 |
213 | ## 7.2.2. Matching CGroup_v1-line with RE patterns
214 | # @BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$
215 | # @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/
216 |
217 | ## 7.2.3. Matching CGroup_v2-line with RE patterns
218 | # @BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload
219 |
220 | ## 7.2.4. Matching eUIDs with RE patterns
221 | # @BADNESS_ADJ_RE_UID -100 /// ^0$
222 |
223 | ## 7.2.5. Matching /proc/[pid]/exe realpath with RE patterns
224 | ## Example:
225 | # @BADNESS_ADJ_RE_REALPATH 900 /// ^(/usr/bin/stress|/usr/bin/stress-ng)$
226 |
227 | ## 7.2.6. Matching /proc/[pid]/cwd realpath with RE patterns
228 | # @BADNESS_ADJ_RE_CWD 200 /// ^/home/
229 |
230 | ## 7.2.7. Matching cmdlines with RE patterns
231 | ## WARNING: using this option can greatly slow down the search for a victim
232 | ## in conditions of intense swapping.
233 |
234 | ## Prefer Chromium tabs and Electron-based apps
235 | # @BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer
236 |
237 | ## Prefer Firefox tabs (Web Content and WebExtensions)
238 | # @BADNESS_ADJ_RE_CMDLINE 100 /// -appomni
239 |
240 | ## Avoid Virtualbox processes
241 | # @BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox
242 |
243 | ## 7.2.8. Matching environ with RE patterns
244 | ## WARNING: using this option can greatly slow down the search for a victim
245 | ## in conditions of heavy swapping.
246 | # @BADNESS_ADJ_RE_ENVIRON 100 /// USER=user
247 |
248 |
249 | # Note that you can control badness also via systemd units via
250 | # OOMScoreAdjust, see
251 | # www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
252 |
253 | ###############################################################################
254 |
255 | ## 8. Customize soft corrective actions
256 |
257 | ## Run the command instead of sending a signal with at soft corrective action
258 | ## if the victim's name or cgroup matches the regular expression.
259 |
260 | ## Syntax:
261 | ## KEY REGEXP SEPARATOR COMMAND
262 |
263 | # @SOFT_ACTION_RE_NAME ^foo$ /// kill -USR1 $PID
264 | # @SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE
265 | # @SOFT_ACTION_RE_CGROUP_V2 /foo\.service$ /// systemctl restart $SERVICE
266 |
267 | ## $PID will be replaced by process PID.
268 | ## $NAME will be replaced by process name.
269 | ## $SERVICE will be replaced by .service if it exists (overwise it will be
270 | ## relpaced by empty line)
271 |
272 | ###############################################################################
273 |
274 | ## 9. Misc settings
275 |
276 | max_soft_exit_time = 10
277 | ## Type: float; valid values: >= 0.1
278 | ## Default value: 10
279 |
280 | post_kill_exe =
281 | ## Type: string; valid values: any string
282 | ## Default value: (empty string)
283 |
284 | min_badness = 1
285 | ## Type: integer; valid values: >= 1
286 | ## Default value: 1
287 | ## nohang will do nothing if the badness of all processes is below min_badness
288 | ## (actualy it will spam to stdout/log)
289 |
290 | post_soft_action_delay = 3
291 | ## Type: float; valid values: >= 0.1
292 | ## Default value: 3
293 |
294 | post_zombie_delay = 0.1
295 | ## Type: float; valid values: >= 0
296 | ## Default value: 0.1
297 |
298 | victim_cache_time = 10
299 | ## Type: float; valid values: >= 0
300 | ## Default value: 10
301 |
302 | exe_timeout = 20
303 | ## Type: float; valid values: >= 0.1
304 | ## Default value: 20
305 |
306 | ###############################################################################
307 |
308 | ## 10. Verbosity, debug, logging
309 |
310 | print_config_at_startup = False
311 | ## Type: boolean; valid values: True | False
312 | ## Default value: False
313 |
314 | print_mem_check_results = False
315 | ## Type: boolean; valid values: True | False
316 | ## Default value: False
317 |
318 | min_mem_report_interval = 60
319 | ## Type: float; valid values: >= 0
320 | ## Default value: 60
321 |
322 | print_proc_table = False
323 | ## Type: boolean; valid values: True | False
324 | ## Default value: False
325 |
326 | extra_table_info = None
327 | ## Type: string; valid values: None, cgroup_v1, cgroup_v2, realpath, cwd,
328 | ## cmdline, environ
329 | ## Default value: None
330 | ## WARNING: using "cmdline" or "environ" keys can greatly slow down the search
331 | ## for a victim in conditions of heavy swapping.
332 |
333 | print_victim_status = True
334 | ## Type: boolean; valid values: True | False
335 | ## Default value: True
336 |
337 | print_victim_cmdline = False
338 | ## Type: boolean; valid values: True | False
339 | ## Default value: False
340 |
341 | max_victim_ancestry_depth = 3
342 | ## Type: integer; valid values: >= 1
343 | ## Default value: 3
344 |
345 | print_statistics = True
346 |
347 | ## Type: boolean; valid values: True | False
348 | ## Default value: True
349 |
350 | debug_psi = False
351 | ## Type: boolean; valid values: True | False
352 | ## Default value: False
353 |
354 | debug_gui_notifications = False
355 | ## Type: boolean; valid values: True | False
356 | ## Default value: False
357 |
358 | debug_sleep = False
359 | ## Type: boolean; valid values: True | False
360 | ## Default value: False
361 |
362 | debug_threading = False
363 | ## Type: boolean; valid values: True | False
364 | ## Default value: False
365 |
366 | separate_log = False
367 | ## Type: boolean; valid values: True | False
368 | ## Default value: False
369 |
370 | ###############################################################################
371 |
372 | ## Use cases, feature requests and any questions are welcome:
373 | ## https://github.com/hakavlad/nohang/issues
374 | ##
375 |
--------------------------------------------------------------------------------
/conf/nohang/test.conf:
--------------------------------------------------------------------------------
1 | ## This is the configuration file of the nohang daemon.
2 |
3 | ## The configuration includes the following sections:
4 | ## 0. Check kernel messages for OOM events
5 | ## 1. Common zram settings
6 | ## 2. Common PSI settings
7 | ## 3. Poll rate
8 | ## 4. Warnings and notifications
9 | ## 5. Soft (SIGTERM) threshold
10 | ## 6. Hard (SIGKILL) threshold
11 | ## 7. Customize victim selection: adjusting badness of processes
12 | ## 8. Customize soft corrective actions
13 | ## 9. Misc settings
14 | ## 10. Verbosity, debug, logging
15 |
16 | ## WARNING!
17 | ## - Lines starting with #, tabs and whitespace characters are comments.
18 | ## - Lines starting with @ contain optional parameters that may be repeated.
19 | ## - All values are case sensitive.
20 | ## - nohang doesn't forbid you to shoot yourself in the foot. Be careful!
21 | ## - Restart the daemon after editing the file to apply the new settings.
22 | ## - You can find the file with default values here: :TARGET_DATADIR:/nohang/nohang.conf
23 |
24 | ## To find config keys descriptions see man(8) nohang
25 |
26 | ###############################################################################
27 |
28 | ## 0. Check kernel messages for OOM events
29 |
30 | # @check_kmsg
31 | ## Type: boolean
32 | ## Comment/uncomment to disable/enable checking kmsg for OOM events
33 |
34 | # @debug_kmsg
35 | ## Type: boolean
36 | ## Comment/uncomment to disable/enable debug checking kmsg
37 |
38 | ###############################################################################
39 |
40 | 1. Common zram settings
41 |
42 | Key: zram_checking_enabled
43 | Description:
44 | Type: boolean
45 | Valid values: True and False
46 | Default value: False
47 |
48 | zram_checking_enabled = True
49 |
50 | ###############################################################################
51 |
52 | 2. Common PSI settings
53 |
54 | Description:
55 | Type: boolean
56 | Valid values: True and False
57 |
58 | psi_checking_enabled = True
59 |
60 | Description:
61 | Type: string
62 | Valid values:
63 |
64 | psi_path = /proc/pressure/memory
65 |
66 | Description:
67 | Type: string
68 | Valid values:
69 |
70 | psi_metrics = full_avg10
71 |
72 | Description:
73 | Type: float
74 | Valid values:
75 |
76 | psi_excess_duration = 60
77 |
78 | Description:
79 | Type: float
80 | Valid values:
81 |
82 | psi_post_action_delay = 60
83 |
84 | ###############################################################################
85 |
86 | 3. Poll rate
87 |
88 | Description:
89 | Type: float
90 | Valid values:
91 |
92 | fill_rate_mem = 4000
93 |
94 | Description:
95 | Type: float
96 | Valid values:
97 |
98 | fill_rate_swap = 1500
99 |
100 | Description:
101 | Type: float
102 | Valid values:
103 |
104 | fill_rate_zram = 6000
105 |
106 | Description:
107 | Type: float
108 | Valid values:
109 |
110 | max_sleep = 3
111 |
112 | Description:
113 | Type: float
114 | Valid values:
115 |
116 | min_sleep = 0.1
117 |
118 | ###############################################################################
119 |
120 | 4. Warnings and notifications
121 |
122 | 4.1. GUI notifications after corrective actions
123 |
124 | Description:
125 | Type: boolean
126 | Valid values: True and False
127 |
128 | post_action_gui_notifications = True
129 |
130 | Description:
131 | Type: boolean
132 | Valid values: True and False
133 |
134 | hide_corrective_action_type = False
135 |
136 | 4.2. Low memory warnings
137 |
138 | Description:
139 | Type: boolean
140 | Valid values: True and False
141 |
142 | low_memory_warnings_enabled = True
143 |
144 | Description:
145 | Type: string
146 | Valid values:
147 |
148 | warning_exe =
149 |
150 | Description:
151 | Type: float (+ % or M)
152 | Valid values:
153 |
154 | warning_threshold_min_mem = 20 %
155 |
156 | Description:
157 | Type: float (+ % or M)
158 | Valid values:
159 |
160 | warning_threshold_min_swap = 20 %
161 |
162 | Description:
163 | Type: float (+ % or M)
164 | Valid values:
165 |
166 | warning_threshold_max_zram = 50 %
167 |
168 | Description:
169 | Type: float
170 | Valid values:
171 |
172 | warning_threshold_max_psi = 100
173 |
174 | Description:
175 | Type: float
176 | Valid values:
177 |
178 | min_post_warning_delay = 30
179 |
180 | Description:
181 | Type: float
182 | Valid values:
183 |
184 | env_cache_time = 300
185 |
186 | ###############################################################################
187 |
188 | 5. Soft threshold
189 |
190 | Description:
191 | Type: float (+ % or M)
192 | Valid values:
193 |
194 | soft_threshold_min_mem = 20 %
195 |
196 | Description:
197 | Type: float (+ % or M)
198 | Valid values:
199 |
200 | soft_threshold_min_swap = 20 %
201 |
202 | Description:
203 | Type: float (+ % or M)
204 | Valid values:
205 |
206 | soft_threshold_max_zram = 60 %
207 |
208 | Description:
209 | Type: float
210 | Valid values:
211 |
212 | soft_threshold_max_psi = 60
213 |
214 | ###############################################################################
215 |
216 | 6. Hard threshold
217 |
218 | hard_threshold_min_mem = 2 %
219 |
220 | Description:
221 | Type: float (+ % or M)
222 | Valid values:
223 |
224 | hard_threshold_min_swap = 2 %
225 |
226 | Description:
227 | Type: float (+ % or M)
228 | Valid values:
229 |
230 | hard_threshold_max_zram = 65 %
231 |
232 | Description:
233 | Type: float
234 | Valid values:
235 |
236 | hard_threshold_max_psi = 90
237 |
238 | ###############################################################################
239 |
240 | 7. Customize victim selection: adjusting badness of processes
241 |
242 | 7.1. Ignore positive oom_score_adj
243 |
244 | Description:
245 | Type: boolean
246 | Valid values: True and False
247 |
248 | ignore_positive_oom_score_adj = True
249 |
250 | 7.3.1. Matching process names with RE patterns change their badness
251 |
252 | Syntax:
253 |
254 | @BADNESS_ADJ_RE_NAME badness_adj /// RE_pattern
255 |
256 | New badness value will be += badness_adj
257 |
258 | It is possible to compare multiple patterns
259 | with different badness_adj values.
260 |
261 | Example:
262 | @BADNESS_ADJ_RE_NAME -500 /// ^sshd$
263 |
264 | 7.3.2. Matching CGroup_v1-line with RE patterns
265 |
266 | @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/system\.slice/
267 |
268 | @BADNESS_ADJ_RE_CGROUP_V1 50 /// /foo\.service$
269 |
270 | @BADNESS_ADJ_RE_CGROUP_V1 -50 /// ^/user\.slice/
271 |
272 | 7.3.3. Matching CGroup_v2-line with RE patterns
273 |
274 | @BADNESS_ADJ_RE_CGROUP_V2 100 /// ^/workload
275 |
276 | 7.3.4. Matching eUIDs with RE patterns
277 |
278 | @BADNESS_ADJ_RE_UID -100 /// ^0$
279 |
280 | 7.3.5. Matching realpath with RE patterns
281 |
282 | @BADNESS_ADJ_RE_REALPATH 20 /// ^/usr/bin/foo
283 |
284 | 7.3.5.1. Matching cwd with RE patterns
285 |
286 | @BADNESS_ADJ_RE_CWD 20 /// ^/home/
287 |
288 | 7.3.6. Matching cmdlines with RE patterns
289 |
290 | @BADNESS_ADJ_RE_CMDLINE 2000 /// ^/bin/sleep
291 |
292 |
293 | Prefer chromium tabs and electron-based apps
294 | @BADNESS_ADJ_RE_CMDLINE 200 /// --type=renderer
295 |
296 | Prefer firefox tabs (Web Content and WebExtensions)
297 | @BADNESS_ADJ_RE_CMDLINE 100 /// -appomni
298 |
299 | @BADNESS_ADJ_RE_CMDLINE -200 /// ^/usr/lib/virtualbox
300 |
301 | 7.3.7. Matching environ with RE patterns
302 |
303 | @BADNESS_ADJ_RE_ENVIRON 100 /// USER=user
304 |
305 |
306 | Note that you can control badness also via systemd units via
307 | OOMScoreAdjust, see
308 | www.freedesktop.org/software/systemd/man/systemd.exec.html#OOMScoreAdjust=
309 |
310 | ###############################################################################
311 |
312 | 8. Customize soft corrective actions
313 |
314 | TODO: docs
315 |
316 | Syntax:
317 | KEY REGEXP SEPARATOR COMMAND
318 |
319 |
320 | @SOFT_ACTION_RE_NAME ^tail$ /// kill -SEGV $PID
321 |
322 |
323 | @SOFT_ACTION_RE_NAME ^foo$ /// kill -SEGV $PID
324 | @SOFT_ACTION_RE_NAME ^bash$ /// kill -9 $PID
325 |
326 | @SOFT_ACTION_RE_CGROUP_V1 ^/system\.slice/ /// systemctl restart $SERVICE
327 | @SOFT_ACTION_RE_CGROUP_V1 /foo\.service$ /// systemctl restart $SERVICE
328 |
329 | @SOFT_ACTION_RE_NAME ^tail$ /// kill -TERM $PID
330 |
331 | $PID will be replaced by process PID.
332 | $NAME will be replaced by process name.
333 | $SERVICE will be replaced by .service if it exists (overwise it will be
334 | relpaced by empty line)
335 |
336 | ###############################################################################
337 |
338 | 9. Misc settings
339 |
340 | Description:
341 | Type: float
342 | Valid values:
343 |
344 | max_soft_exit_time = 10
345 |
346 | Description:
347 | Type: string
348 | Valid values:
349 |
350 | post_kill_exe =
351 |
352 | Description:
353 | Type: integer
354 | Valid values:
355 |
356 | min_badness = 10
357 |
358 | Description:
359 | Type: float
360 | Valid values:
361 |
362 | post_soft_action_delay = 3
363 |
364 | Description:
365 | Type: float
366 | Valid values:
367 |
368 | post_zombie_delay = 0.1
369 |
370 | Description:
371 | Type: float
372 | Valid values:
373 |
374 | victim_cache_time = 10
375 |
376 | Description:
377 | Type: float
378 | Valid values:
379 |
380 | exe_timeout = 20
381 |
382 | ###############################################################################
383 |
384 | 10. Verbosity, debug, logging
385 |
386 | Description:
387 | Type: boolean
388 | Valid values: True and False
389 |
390 | print_config_at_startup = True
391 |
392 | Description:
393 | Type: boolean
394 | Valid values: True and False
395 |
396 | print_mem_check_results = True
397 |
398 | Description:
399 | Type: float
400 | Valid values:
401 |
402 | min_mem_report_interval = 0
403 |
404 | Description:
405 | Type: boolean
406 | Valid values: True and False
407 |
408 | print_proc_table = True
409 |
410 | Description:
411 | Type: string
412 | Valid values:
413 | None
414 | cgroup_v1
415 | cgroup_v2
416 | realpath
417 | cwd
418 | cmdline
419 | environ
420 |
421 | extra_table_info = None
422 |
423 | Description:
424 | Type: boolean
425 | Valid values: True and False
426 |
427 | print_victim_status = True
428 |
429 | Description:
430 | Type: boolean
431 | Valid values: True and False
432 |
433 | print_victim_cmdline = True
434 |
435 | Description:
436 | Type: integer
437 | Valid values:
438 |
439 | max_victim_ancestry_depth = 99
440 |
441 | Description:
442 | Type: boolean
443 | Valid values: True and False
444 |
445 | print_statistics = True
446 |
447 | Description:
448 | Type: boolean
449 | Valid values: True and False
450 |
451 | debug_psi = True
452 |
453 | Description:
454 | Type: boolean
455 | Valid values: True and False
456 |
457 | debug_gui_notifications = True
458 |
459 | Description:
460 | Type: boolean
461 | Valid values: True and False
462 |
463 | debug_sleep = True
464 |
465 | Description:
466 | Type: boolean
467 | Valid values: True and False
468 |
469 | debug_threading = True
470 |
471 | Description:
472 | Type: boolean
473 | Valid values: True and False
474 |
475 | separate_log = True
476 |
477 | ###############################################################################
478 |
479 | Use cases, feature requests and any questions are welcome:
480 | https://github.com/hakavlad/nohang/issues
481 |
--------------------------------------------------------------------------------
/deb/DEBIAN/conffiles:
--------------------------------------------------------------------------------
1 | /etc/nohang/nohang.conf
2 | /etc/nohang/nohang-desktop.conf
3 | /etc/logrotate.d/nohang
4 |
--------------------------------------------------------------------------------
/deb/DEBIAN/control:
--------------------------------------------------------------------------------
1 | Package: nohang
2 | Version: 0.2.0
3 | Section: admin
4 | Architecture: all
5 | Depends: python3
6 | Suggests: libnotify-bin, sudo, logrotate
7 | Maintainer: Alexey Avramov
8 | Priority: optional
9 | Homepage: https://github.com/hakavlad/nohang
10 | Description: Sophisticated low memory handler
11 | nohang is a highly configurable daemon for Linux which is able to correctly
12 | prevent out of memory (OOM) and keep system responsiveness in low memory
13 | conditions. The package also includes additional diagnostic tools: oom-sort,
14 | psi2log, psi-top.
15 |
--------------------------------------------------------------------------------
/deb/DEBIAN/postinst:
--------------------------------------------------------------------------------
1 | systemctl daemon-reload
2 |
--------------------------------------------------------------------------------
/deb/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh -v
2 | make \
3 | DESTDIR=deb/package \
4 | PREFIX=/usr \
5 | SYSCONFDIR=/etc \
6 | SYSTEMDUNITDIR=/lib/systemd/system \
7 | build_deb
8 | cd deb
9 | cp -r DEBIAN package/
10 | fakeroot dpkg-deb --build package
11 |
--------------------------------------------------------------------------------
/docs/FAQ.ru.md:
--------------------------------------------------------------------------------
1 |
2 | # FAQ для русскоязычных
3 |
4 | ### Каковы основные особенности демона?
5 |
6 | - Явная и гибкая конфигурация через конфигурационный файл. Все, что может быть настраиваемо, по возможности вынесено в конфиг. Таким образом, запуск демона без конфига невозможен. Также пользователь может видеть все значения ключей конфига. Минимум скрытых параметров.
7 | - Возможность поэтапного реагирования на нехватку памяти. Можно настроить три порога реакции:
8 | 1. Для отправки GUI уведомдений о нехватке памяти (либо выполнение произвольной команды, например отправки e-mail)
9 | 2. Порог отправки сигнала SIGTERM (в большинстве случаев коррекция происходи здесь). Это главное корректирующее действие, после которого большинство процессов завершаются, по возможности корректно.
10 | 3. Если жертва не реагирует на SIGTERM, то получит сигнал SIGKILL при дальнейшем уменьшении объема доступной памяти, или по прошествии определенного времени (ключ конфига max_soft_exit_time).
11 | - Возможность реагирования на разные виды раздражителей:
12 | 1. При наличии пространства подкачки демон реагирует на объем доступного пространства подкачки (SwapFree) при условии, что порог доступной памяти также ниже заданного уровня. При отсутствии пространства подкачки демон реагирует на объем доступной памяти (MemAvailable).
13 | 2. При наличии пространства подкачки демон может реагировать на превышение метрик PSI, если это задано в конфиге. Корректирующее действие происходит если в течение заданного времени (psi_excess_duration) порог доступной памяти и порог заданной метрики PSI превышены одновременно, но не ранее чем через psi_post_action_delay после предыдущего корректирующего действия.
14 | 3. Возможность реакции на размер mem_used_total, если смонтированы устойства zram.
15 | - Возможность влияния на выбор жертвы при корректирующем действии путем сопоставления различных характеристик процесса (name, exe realpath, euid, cgroup etc) с заданными регулярными выражениями. Это похоже на маханизм, используемый в ядре, однако вместо oom_score_adj для отдельных PID можно задать badness_adj для всех процессов, подходящих под определенные критерии.
16 | - Возможность GUI уведомлений о совершенных корректирующих действиях.
17 | - Возможность кастомизации корректирующего действия. Эта возможность еще сырая. Включает в себя:
18 | 1. На мягком (SIGTERM) пороге для процессов с заданными свойствами, если они становятся жертвами, выполнять заданную команду.
19 | 2. На жестком (SIGKILL) пороге можно с помощью ключа post_kill_exe задать произвольную команду, которая будет выполняться после любого жесткого корректирующего действия.
20 |
21 | ### Почему не триггерим ядерный OOM killer?
22 |
23 | ### Что такое PSI и как это помогает в обработке нехватки памяти?
24 |
25 | ### Как проверить поддержку PSI ядром?
26 |
27 | ### Зачем нужен ключ zram_checking_enabled?
28 |
29 | ### Как демон предотвращает убийство невиновных?
30 |
31 | ### Не показываются GUI уведомления. В чем возможная причина?
32 |
33 | ### В KDE Plasma история GUI уведомлений не сохраняется. Как исправить?
34 |
35 | ### Как пользоваться oom-sort?
36 |
37 | ### Как пользоваться psi-top?
38 |
39 | ### Как пользоваться psi2log?
40 |
41 | ### nohang vs earlyoom
42 |
43 | ### nohang vs oomd
44 |
45 | ### Как запустить и протестировать nohang без установки?
46 |
47 | ### Что не так с ZFS?
48 |
49 | ### В каких ситуациях демон не поможет?
50 |
51 | ### Почему в настройках реакции на метрики PSI по умолчанию предлагается реагирование на some avg10, а не full avg10?
52 |
53 | ### Система зависает, демон не помогает. В чем проблема и что делать?
54 |
55 | ### Как протестировать работу демона? Как создать нагрузку на память?
56 |
57 | ### В каких случаях лучше не включать проверку PSI?
58 |
59 | ### nohang vs nohang-desktop: в чем разница?
60 |
61 | ### Как это вообще работает?
62 |
63 | ### Как получить список доступных для мониторинга файлов PSI?
64 |
65 | ### Поддерживается ли убийство групп процессов?
66 |
67 | Нет, но поддержка этого может быть добавлена в будущих релизах.
68 |
69 | ### Как смотреть логи?
70 |
71 |
72 |
73 |
74 |
75 |
76 |
--------------------------------------------------------------------------------
/docs/nohang.manpage.md:
--------------------------------------------------------------------------------
1 | % nohang(8) | Linux System Administrator's Manual
2 |
3 | # NAME
4 | nohang - A sophisticated low memory handler
5 |
6 | # SYNOPSIS
7 | **nohang** [**OPTION**]...
8 |
9 | # DESCRIPTION
10 | nohang is a highly configurable daemon for Linux which is able to correctly prevent out of memory (OOM) and keep system responsiveness in low memory conditions.
11 |
12 | # REQUIREMENTS
13 |
14 | #### For basic usage:
15 | - Linux (>= 3.14, since MemAvailable appeared in /proc/meminfo)
16 | - Python (>= 3.3)
17 |
18 | #### To respond to PSI metrics (optional):
19 | - Linux (>= 4.20) with CONFIG_PSI=y
20 |
21 | #### To show GUI notifications (optional):
22 | - notification server (most of desktop environments use their own implementations)
23 | - libnotify (Arch Linux, Fedora, openSUSE) or libnotify-bin (Debian GNU/Linux, Ubuntu)
24 | - sudo if nohang started with UID=0.
25 |
26 | # COMMAND-LINE OPTIONS
27 |
28 | #### -h, --help
29 | show this help message and exit
30 |
31 | #### -v, --version
32 | show version of installed package and exit
33 |
34 | #### -m, --memload
35 | consume memory until 40 MiB (MemAvailable + SwapFree) remain free, and terminate the process
36 |
37 | #### -c CONFIG, --config CONFIG
38 | path to the config file. This should only be used with one of the following options:
39 | --monitor, --tasks, --check
40 |
41 | #### --check
42 | check and show the configuration and exit. This should only be used with -c/--config CONFIG option
43 |
44 | #### --monitor
45 | start monitoring. This should only be used with -c/--config CONFIG option
46 |
47 | #### --tasks
48 | show tasks state and exit. This should only be used with -c/--config CONFIG option
49 |
50 | # FILES
51 |
52 | #### :SYSCONFDIR:/nohang/nohang.conf
53 | path to vanilla nohang configuration file
54 |
55 | #### :SYSCONFDIR:/nohang/nohang-desktop.conf
56 | path to configuration file with settings optimized for desktop usage
57 |
58 | #### :DATADIR:/nohang/nohang.conf
59 | path to file with *default* nohang.conf values
60 |
61 | #### :DATADIR:/nohang/nohang-desktop.conf
62 | path to file with *default* nohang-desktop.conf values
63 |
64 | #### /var/log/nohang/nohang.log
65 | optional log file that stores entries if separate_log=True in the config
66 |
67 | #### /etc/logrotate.d/nohang
68 | logrotate config file that controls rotation in /var/log/nohang/
69 |
70 | # nohang.conf vs nohang-desktop.conf
71 | - nohang.conf provides vanilla default settings without PSI checking enabled, without any badness correction and without GUI notifications enabled.
72 | - nohang-desktop.conf provides default settings optimized for desktop usage.
73 |
74 | # PROBLEMS
75 | The next problems can occur with out-of-tree kernels and modules:
76 |
77 | - The ZFS ARC cache is memory-reclaimable, like the Linux buffer cache. However, in contrast to the buffer cache, it currently does not count to MemAvailable [1]. See also [2] and [3].
78 | - Linux kernels without CONFIG_CGROUP_CPUACCT=y (linux-ck, for example) provide incorrect PSI metrics, see this thread [4].
79 |
80 | # HOW TO CONFIGURE
81 | The program can be configured by editing the config file. The configuration includes the following sections:
82 |
83 | - Memory levels to respond to as an OOM threat
84 | - Response on PSI memory metrics
85 | - The frequency of checking the level of available memory (and CPU usage)
86 | - The prevention of killing innocent victims
87 | - Impact on the badness of processes via matching their names, cmdlines and UIDs with regular expressions
88 | - The execution of a specific command or sending any signal instead of sending the SIGTERM signal
89 | - GUI notifications:
90 | - notifications of corrective actions taken
91 | - low memory warnings
92 | - Verbosity
93 | - Misc
94 |
95 | Just read the description of the parameters and edit the values. Restart the daemon to apply the changes.
96 |
97 | # CHECK CONFIG
98 | Check the config for errors:
99 |
100 | $ nohang --check --config /path/to/config
101 |
102 | # HOW TO TEST
103 | The safest way is to run **nohang --memload**. This causes memory consumption, and the process will exits before OOM occurs. Another way is to run **tail /dev/zero**. This causes fast memory comsumption and causes OOM at the end. If testing occurs while nohang is running, these processes should be terminated before OOM occurs.
104 |
105 | # LOGGING
106 | To view the latest entries in the log (for systemd users):
107 |
108 | $ **sudo journalctl -eu nohang.service**
109 |
110 | or
111 |
112 | $ **sudo journalctl -eu nohang-desktop.service**
113 |
114 | You can also enable **separate_log** in the config to logging in **/var/log/nohang/nohang.log**.
115 |
116 | # SIGNALS
117 | Sending SIGTERM, SIGINT, SIGQUIT or SIGHUP signals to the nohang process causes it displays corrective action stats and exits.
118 |
119 | # REPORTING BUGS
120 | Please ask any questions and report bugs at .
121 |
122 | # AUTHOR
123 | Written by Alexey Avramov .
124 |
125 | # HOMEPAGE
126 | Homepage is .
127 |
128 | # SEE ALSO
129 | oom-sort(1), psi-top(1), psi2log(1)
130 |
131 | # NOTES
132 |
133 | 1. https://github.com/openzfs/zfs/issues/10255
134 | 2. https://github.com/rfjakob/earlyoom/pull/191#issuecomment-622314296
135 | 3. https://github.com/hakavlad/nohang/issues/89
136 | 4. https://github.com/hakavlad/nohang/issues/25#issuecomment-521390412
137 |
--------------------------------------------------------------------------------
/docs/oom-sort.manpage.md:
--------------------------------------------------------------------------------
1 | % oom-sort(1) | General Commands Manual
2 |
3 | # NAME
4 | oom-sort - sort processes by oom_score
5 |
6 | # SYNOPSIS
7 | **oom-sort** [**OPTION**]...
8 |
9 | # DESCRIPTION
10 | oom-sort is script that sorts tasks by oom_score by default. oom-sort is part of nohang package.
11 |
12 | # OPTIONS
13 |
14 | #### -h, --help
15 | show this help message and exit
16 |
17 | #### --num NUM, -n NUM
18 | max number of lines; default: 99999
19 |
20 | #### --len LEN, -l LEN
21 | max cmdline length; default: 99999
22 |
23 | #### --sort SORT, -s SORT
24 | sort by unit; available units: oom_score, oom_score_adj, UID, PID, Name, VmRSS, VmSwap, cmdline (optional); default unit: oom_score
25 |
26 | # REPORTING BUGS
27 | Please ask any questions and report bugs at .
28 |
29 | # AUTHOR
30 | Written by Alexey Avramov .
31 |
32 | # HOMEPAGE
33 | Homepage is .
34 |
35 | # SEE ALSO
36 | psi-top(1), psi2log(1), nohang(8)
37 |
--------------------------------------------------------------------------------
/docs/psi-top.manpage.md:
--------------------------------------------------------------------------------
1 | % psi-top(1) | General Commands Manual
2 |
3 | # NAME
4 | psi-top - print the PSI metrics values for every cgroup.
5 |
6 | # SYNOPSIS
7 | **psi-top** [**OPTION**]...
8 |
9 | # DESCRIPTION
10 | psi-top is script that prints the PSI metrics values for every cgroup. psi-top is part of nohang package.
11 |
12 | # OPTIONS
13 |
14 | #### -h, --help
15 | show this help message and exit
16 |
17 | #### -m METRICS, --metrics METRICS
18 | metrics (memory, io or cpu)
19 |
20 | # EXAMPLES
21 | $ psi-top
22 |
23 | $ psi-top --metrics io
24 |
25 | $ psi-top -m cpu
26 |
27 | # REPORTING BUGS
28 | Please ask any questions and report bugs at .
29 |
30 | # AUTHOR
31 | Written by Alexey Avramov .
32 |
33 | # HOMEPAGE
34 | Homepage is .
35 |
36 | # SEE ALSO
37 | oom-sort(1), psi2log(1), nohang(8)
38 |
--------------------------------------------------------------------------------
/docs/psi2log.manpage.md:
--------------------------------------------------------------------------------
1 | % psi2log(1) | General Commands Manual
2 |
3 | # NAME
4 | psi2log \- PSI metrics monitor and logger
5 |
6 | # SYNOPSIS
7 | **psi2log** [**OPTION**]...
8 |
9 | # DESCRIPTION
10 | psi2log is a CLI tool that can check and log PSI metrics from specified target. psi2log is part of nohang package.
11 |
12 | # OPTIONS
13 |
14 | #### -h, --help
15 | show this help message and exit
16 |
17 | #### -t TARGET, --target TARGET
18 | target (cgroup_v2 or SYTSTEM_WIDE)
19 |
20 | #### -i INTERVAL, --interval INTERVAL
21 | interval in sec
22 |
23 | #### -l LOG, --log LOG
24 | path to log file
25 |
26 | #### -m MODE, --mode MODE
27 | mode (0, 1 or 2)
28 |
29 | #### -s SUPPRESS_OUTPUT, --suppress-output SUPPRESS_OUTPUT
30 | suppress output
31 |
32 | # EXAMPLES
33 | $ psi2log
34 |
35 | $ psi2log --mode 2
36 |
37 | $ psi2log --target /user.slice --interval 1.5 --log psi.log
38 |
39 | # SIGNALS
40 | Sending SIGTERM, SIGINT, SIGQUIT or SIGHUP signals to the psi2log process causes it displays peak values and exits..
41 |
42 | # REPORTING BUGS
43 | Please ask any questions and report bugs at .
44 |
45 | # AUTHOR
46 | Written by Alexey Avramov .
47 |
48 | # HOMEPAGE
49 | Homepage is .
50 |
51 | # SEE ALSO
52 | oom-sort(1), psi-top(1), nohang(8)
53 |
--------------------------------------------------------------------------------
/man/nohang.8:
--------------------------------------------------------------------------------
1 | .\" Automatically generated by Pandoc 1.17.2
2 | .\"
3 | .TH "nohang" "8" "" "" "Linux System Administrator\[aq]s Manual"
4 | .hy
5 | .SH NAME
6 | .PP
7 | nohang \- A sophisticated low memory handler
8 | .SH SYNOPSIS
9 | .PP
10 | \f[B]nohang\f[] [\f[B]OPTION\f[]]...
11 | .SH DESCRIPTION
12 | .PP
13 | nohang is a highly configurable daemon for Linux which is able to
14 | correctly prevent out of memory (OOM) and keep system responsiveness in
15 | low memory conditions.
16 | .SH REQUIREMENTS
17 | .SS For basic usage:
18 | .IP \[bu] 2
19 | Linux (>= 3.14, since MemAvailable appeared in /proc/meminfo)
20 | .IP \[bu] 2
21 | Python (>= 3.3)
22 | .SS To respond to PSI metrics (optional):
23 | .IP \[bu] 2
24 | Linux (>= 4.20) with CONFIG_PSI=y
25 | .SS To show GUI notifications (optional):
26 | .IP \[bu] 2
27 | notification server (most of desktop environments use their own
28 | implementations)
29 | .IP \[bu] 2
30 | libnotify (Arch Linux, Fedora, openSUSE) or libnotify\-bin (Debian
31 | GNU/Linux, Ubuntu)
32 | .IP \[bu] 2
33 | sudo if nohang started with UID=0.
34 | .SH COMMAND\-LINE OPTIONS
35 | .SS \-h, \-\-help
36 | .PP
37 | show this help message and exit
38 | .SS \-v, \-\-version
39 | .PP
40 | show version of installed package and exit
41 | .SS \-m, \-\-memload
42 | .PP
43 | consume memory until 40 MiB (MemAvailable + SwapFree) remain free, and
44 | terminate the process
45 | .SS \-c CONFIG, \-\-config CONFIG
46 | .PP
47 | path to the config file.
48 | This should only be used with one of the following options: \-\-monitor,
49 | \-\-tasks, \-\-check
50 | .SS \-\-check
51 | .PP
52 | check and show the configuration and exit.
53 | This should only be used with \-c/\-\-config CONFIG option
54 | .SS \-\-monitor
55 | .PP
56 | start monitoring.
57 | This should only be used with \-c/\-\-config CONFIG option
58 | .SS \-\-tasks
59 | .PP
60 | show tasks state and exit.
61 | This should only be used with \-c/\-\-config CONFIG option
62 | .SH FILES
63 | .SS :SYSCONFDIR:/nohang/nohang.conf
64 | .PP
65 | path to vanilla nohang configuration file
66 | .SS :SYSCONFDIR:/nohang/nohang\-desktop.conf
67 | .PP
68 | path to configuration file with settings optimized for desktop usage
69 | .SS :DATADIR:/nohang/nohang.conf
70 | .PP
71 | path to file with \f[I]default\f[] nohang.conf values
72 | .SS :DATADIR:/nohang/nohang\-desktop.conf
73 | .PP
74 | path to file with \f[I]default\f[] nohang\-desktop.conf values
75 | .SS /var/log/nohang/nohang.log
76 | .PP
77 | optional log file that stores entries if separate_log=True in the config
78 | .SS /etc/logrotate.d/nohang
79 | .PP
80 | logrotate config file that controls rotation in /var/log/nohang/
81 | .SH nohang.conf vs nohang\-desktop.conf
82 | .IP \[bu] 2
83 | nohang.conf provides vanilla default settings without PSI checking
84 | enabled, without any badness correction and without GUI notifications
85 | enabled.
86 | .IP \[bu] 2
87 | nohang\-desktop.conf provides default settings optimized for desktop
88 | usage.
89 | .SH PROBLEMS
90 | .PP
91 | The next problems can occur with out\-of\-tree kernels and modules:
92 | .IP \[bu] 2
93 | The ZFS ARC cache is memory\-reclaimable, like the Linux buffer cache.
94 | However, in contrast to the buffer cache, it currently does not count to
95 | MemAvailable [1].
96 | See also [2] and [3].
97 | .IP \[bu] 2
98 | Linux kernels without CONFIG_CGROUP_CPUACCT=y (linux\-ck, for example)
99 | provide incorrect PSI metrics, see this thread [4].
100 | .SH HOW TO CONFIGURE
101 | .PP
102 | The program can be configured by editing the config file.
103 | The configuration includes the following sections:
104 | .IP \[bu] 2
105 | Memory levels to respond to as an OOM threat
106 | .IP \[bu] 2
107 | Response on PSI memory metrics
108 | .IP \[bu] 2
109 | The frequency of checking the level of available memory (and CPU usage)
110 | .IP \[bu] 2
111 | The prevention of killing innocent victims
112 | .IP \[bu] 2
113 | Impact on the badness of processes via matching their names, cmdlines
114 | and UIDs with regular expressions
115 | .IP \[bu] 2
116 | The execution of a specific command or sending any signal instead of
117 | sending the SIGTERM signal
118 | .IP \[bu] 2
119 | GUI notifications:
120 | .RS 2
121 | .IP \[bu] 2
122 | notifications of corrective actions taken
123 | .IP \[bu] 2
124 | low memory warnings
125 | .RE
126 | .IP \[bu] 2
127 | Verbosity
128 | .IP \[bu] 2
129 | Misc
130 | .PP
131 | Just read the description of the parameters and edit the values.
132 | Restart the daemon to apply the changes.
133 | .SH CHECK CONFIG
134 | .PP
135 | Check the config for errors:
136 | .PP
137 | $ nohang \-\-check \-\-config /path/to/config
138 | .SH HOW TO TEST
139 | .PP
140 | The safest way is to run \f[B]nohang \-\-memload\f[].
141 | This causes memory consumption, and the process will exits before OOM
142 | occurs.
143 | Another way is to run \f[B]tail /dev/zero\f[].
144 | This causes fast memory comsumption and causes OOM at the end.
145 | If testing occurs while nohang is running, these processes should be
146 | terminated before OOM occurs.
147 | .SH LOGGING
148 | .PP
149 | To view the latest entries in the log (for systemd users):
150 | .PP
151 | $ \f[B]sudo journalctl \-eu nohang.service\f[]
152 | .PP
153 | or
154 | .PP
155 | $ \f[B]sudo journalctl \-eu nohang\-desktop.service\f[]
156 | .PP
157 | You can also enable \f[B]separate_log\f[] in the config to logging in
158 | \f[B]/var/log/nohang/nohang.log\f[].
159 | .SH SIGNALS
160 | .PP
161 | Sending SIGTERM, SIGINT, SIGQUIT or SIGHUP signals to the nohang process
162 | causes it displays corrective action stats and exits.
163 | .SH REPORTING BUGS
164 | .PP
165 | Please ask any questions and report bugs at
166 | .
167 | .SH AUTHOR
168 | .PP
169 | Written by Alexey Avramov .
170 | .SH HOMEPAGE
171 | .PP
172 | Homepage is .
173 | .SH SEE ALSO
174 | .PP
175 | oom\-sort(1), psi\-top(1), psi2log(1)
176 | .SH NOTES
177 | .IP "1." 3
178 | https://github.com/openzfs/zfs/issues/10255
179 | .IP "2." 3
180 | https://github.com/rfjakob/earlyoom/pull/191#issuecomment\-622314296
181 | .IP "3." 3
182 | https://github.com/hakavlad/nohang/issues/89
183 | .IP "4." 3
184 | https://github.com/hakavlad/nohang/issues/25#issuecomment\-521390412
185 |
--------------------------------------------------------------------------------
/man/oom-sort.1:
--------------------------------------------------------------------------------
1 | .\" Automatically generated by Pandoc 1.17.2
2 | .\"
3 | .TH "oom\-sort" "1" "" "" "General Commands Manual"
4 | .hy
5 | .SH NAME
6 | .PP
7 | oom\-sort \- sort processes by oom_score
8 | .SH SYNOPSIS
9 | .PP
10 | \f[B]oom\-sort\f[] [\f[B]OPTION\f[]]...
11 | .SH DESCRIPTION
12 | .PP
13 | oom\-sort is script that sorts tasks by oom_score by default.
14 | oom\-sort is part of nohang package.
15 | .SH OPTIONS
16 | .SS \-h, \-\-help
17 | .PP
18 | show this help message and exit
19 | .SS \-\-num NUM, \-n NUM
20 | .PP
21 | max number of lines; default: 99999
22 | .SS \-\-len LEN, \-l LEN
23 | .PP
24 | max cmdline length; default: 99999
25 | .SS \-\-sort SORT, \-s SORT
26 | .PP
27 | sort by unit; available units: oom_score, oom_score_adj, UID, PID, Name,
28 | VmRSS, VmSwap, cmdline (optional); default unit: oom_score
29 | .SH REPORTING BUGS
30 | .PP
31 | Please ask any questions and report bugs at
32 | .
33 | .SH AUTHOR
34 | .PP
35 | Written by Alexey Avramov .
36 | .SH HOMEPAGE
37 | .PP
38 | Homepage is .
39 | .SH SEE ALSO
40 | .PP
41 | psi\-top(1), psi2log(1), nohang(8)
42 |
--------------------------------------------------------------------------------
/man/psi-top.1:
--------------------------------------------------------------------------------
1 | .\" Automatically generated by Pandoc 1.17.2
2 | .\"
3 | .TH "psi\-top" "1" "" "" "General Commands Manual"
4 | .hy
5 | .SH NAME
6 | .PP
7 | psi\-top \- print the PSI metrics values for every cgroup.
8 | .SH SYNOPSIS
9 | .PP
10 | \f[B]psi\-top\f[] [\f[B]OPTION\f[]]...
11 | .SH DESCRIPTION
12 | .PP
13 | psi\-top is script that prints the PSI metrics values for every cgroup.
14 | psi\-top is part of nohang package.
15 | .SH OPTIONS
16 | .SS \-h, \-\-help
17 | .PP
18 | show this help message and exit
19 | .SS \-m METRICS, \-\-metrics METRICS
20 | .PP
21 | metrics (memory, io or cpu)
22 | .SH EXAMPLES
23 | .PP
24 | $ psi\-top
25 | .PP
26 | $ psi\-top \-\-metrics io
27 | .PP
28 | $ psi\-top \-m cpu
29 | .SH REPORTING BUGS
30 | .PP
31 | Please ask any questions and report bugs at
32 | .
33 | .SH AUTHOR
34 | .PP
35 | Written by Alexey Avramov .
36 | .SH HOMEPAGE
37 | .PP
38 | Homepage is .
39 | .SH SEE ALSO
40 | .PP
41 | oom\-sort(1), psi2log(1), nohang(8)
42 |
--------------------------------------------------------------------------------
/man/psi2log.1:
--------------------------------------------------------------------------------
1 | .\" Automatically generated by Pandoc 1.17.2
2 | .\"
3 | .TH "psi2log" "1" "" "" "General Commands Manual"
4 | .hy
5 | .SH NAME
6 | .PP
7 | psi2log \- PSI metrics monitor and logger
8 | .SH SYNOPSIS
9 | .PP
10 | \f[B]psi2log\f[] [\f[B]OPTION\f[]]...
11 | .SH DESCRIPTION
12 | .PP
13 | psi2log is a CLI tool that can check and log PSI metrics from specified
14 | target.
15 | psi2log is part of nohang package.
16 | .SH OPTIONS
17 | .SS \-h, \-\-help
18 | .PP
19 | show this help message and exit
20 | .SS \-t TARGET, \-\-target TARGET
21 | .PP
22 | target (cgroup_v2 or SYTSTEM_WIDE)
23 | .SS \-i INTERVAL, \-\-interval INTERVAL
24 | .PP
25 | interval in sec
26 | .SS \-l LOG, \-\-log LOG
27 | .PP
28 | path to log file
29 | .SS \-m MODE, \-\-mode MODE
30 | .PP
31 | mode (0, 1 or 2)
32 | .SS \-s SUPPRESS_OUTPUT, \-\-suppress\-output SUPPRESS_OUTPUT
33 | .PP
34 | suppress output
35 | .SH EXAMPLES
36 | .PP
37 | $ psi2log
38 | .PP
39 | $ psi2log \-\-mode 2
40 | .PP
41 | $ psi2log \-\-target /user.slice \-\-interval 1.5 \-\-log psi.log
42 | .SH SIGNALS
43 | .PP
44 | Sending SIGTERM, SIGINT, SIGQUIT or SIGHUP signals to the psi2log
45 | process causes it displays peak values and exits..
46 | .SH REPORTING BUGS
47 | .PP
48 | Please ask any questions and report bugs at
49 | .
50 | .SH AUTHOR
51 | .PP
52 | Written by Alexey Avramov .
53 | .SH HOMEPAGE
54 | .PP
55 | Homepage is .
56 | .SH SEE ALSO
57 | .PP
58 | oom\-sort(1), psi\-top(1), nohang(8)
59 |
--------------------------------------------------------------------------------
/openrc/nohang-desktop.in:
--------------------------------------------------------------------------------
1 | #!/sbin/openrc-run
2 |
3 | name="nohang-desktop daemon"
4 | description="Sophisticated low memory handler"
5 | command=:TARGET_SBINDIR:/nohang
6 | command_args="--monitor --config :TARGET_SYSCONFDIR:/nohang/nohang-desktop.conf"
7 | pidfile="/var/run/nohang-desktop"
8 | start_stop_daemon_args="--background --make-pidfile"
9 |
--------------------------------------------------------------------------------
/openrc/nohang.in:
--------------------------------------------------------------------------------
1 | #!/sbin/openrc-run
2 |
3 | name="nohang daemon"
4 | description="Sophisticated low memory handler"
5 | command=:TARGET_SBINDIR:/nohang
6 | command_args="--monitor --config :TARGET_SYSCONFDIR:/nohang/nohang.conf"
7 | pidfile="/var/run/nohang"
8 | start_stop_daemon_args="--background --make-pidfile"
9 |
--------------------------------------------------------------------------------
/src/oom-sort:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | sort processes by oom_score
4 | """
5 |
6 |
7 | from argparse import ArgumentParser
8 | from operator import itemgetter
9 | from os import listdir
10 |
11 |
12 | def pid_to_oom_score(pid):
13 | with open('/proc/{}/oom_score'.format(pid), 'rb', buffering=0) as f:
14 | return int(f.read())
15 |
16 |
17 | def pid_to_oom_score_adj(pid):
18 | with open('/proc/{}/oom_score_adj'.format(pid), 'rb', buffering=0) as f:
19 | return int(f.read())
20 |
21 |
22 | def pid_to_cmdline(pid):
23 | with open('/proc/{}/cmdline'.format(pid), 'rb', buffering=0) as f:
24 | return f.read().decode('utf-8', 'ignore').replace(
25 | '\x00', ' ').rstrip()
26 |
27 |
28 | def pid_to_status_units(pid):
29 | with open('/proc/{}/status'.format(pid), 'rb', buffering=0) as f:
30 | f_list = f.read().decode('utf-8', 'ignore').split('\n')
31 | for i in range(len(f_list)):
32 | if i == 1:
33 | name = f_list[0].split('\t')[1]
34 | if i == uid_index:
35 | uid = f_list[i].split('\t')[2]
36 | if i == vm_rss_index:
37 | vm_rss = f_list[i].split('\t')[1][:-3]
38 | if i == vm_swap_index:
39 | vm_swap = f_list[i].split('\t')[1][:-3]
40 | return name, uid, vm_rss, vm_swap
41 |
42 |
43 | def get_max_pid_len():
44 | with open('/proc/sys/kernel/pid_max') as f:
45 | for line in f:
46 | return len(line.strip())
47 |
48 |
49 | sort_dict = {
50 | 'PID': 0,
51 | 'oom_score': 1,
52 | 'oom_score_adj': 2,
53 | 'cmdline': 3,
54 | 'Name': 4,
55 | 'UID': 5,
56 | 'VmRSS': 6,
57 | 'VmSwap': 7
58 | }
59 |
60 |
61 | parser = ArgumentParser()
62 |
63 | parser.add_argument(
64 | '--num',
65 | '-n',
66 | help="""max number of lines; default: 99999""",
67 | default=99999,
68 | type=str
69 | )
70 |
71 | parser.add_argument(
72 | '--len',
73 | '-l',
74 | help="""max cmdline length; default: 99999""",
75 | default=99999,
76 | type=int
77 | )
78 |
79 |
80 | parser.add_argument(
81 | '--sort',
82 | '-s',
83 | help="""sort by unit; default: oom_score""",
84 | default='oom_score',
85 | type=str
86 | )
87 |
88 |
89 | args = parser.parse_args()
90 |
91 | display_cmdline = args.len
92 |
93 | num_lines = args.num
94 |
95 | sort_by = args.sort
96 |
97 |
98 | if sort_by not in sort_dict:
99 | print('Invalid -s/--sort value. Valid values are:\nPID\noom_score [defau'
100 | 'lt value]\noom_score_adj\nUID\nName\ncmdline\nVmRSS\nVmSwap')
101 | exit()
102 |
103 |
104 | # find VmRSS, VmSwap and UID positions in /proc/*/status for further
105 | # searching positions of UID, VmRSS and VmSwap in each process
106 |
107 | with open('/proc/self/status') as file:
108 | status_list = file.readlines()
109 |
110 | status_names = []
111 | for s in status_list:
112 | status_names.append(s.split(':')[0])
113 |
114 | uid_index = status_names.index('Uid')
115 | vm_rss_index = status_names.index('VmRSS')
116 | vm_swap_index = status_names.index('VmSwap')
117 |
118 |
119 | # get sorted list with pid, oom_score, oom_score_adj, cmdline
120 | # get status units: name, uid, rss, swap
121 |
122 |
123 | oom_list = []
124 |
125 | for pid in listdir('/proc'):
126 |
127 | # skip non-numeric entries and PID 1
128 | if pid.isdigit() is False or pid == '1':
129 | continue
130 |
131 | try:
132 |
133 | oom_score = pid_to_oom_score(pid)
134 |
135 | oom_score_adj = pid_to_oom_score_adj(pid)
136 |
137 | cmdline = pid_to_cmdline(pid)
138 | if cmdline == '':
139 | continue
140 |
141 | name, uid, vm_rss, vm_swap = pid_to_status_units(pid)
142 |
143 | except FileNotFoundError:
144 | continue
145 |
146 | except ProcessLookupError:
147 | continue
148 |
149 | except Exception as e:
150 | print(e)
151 | exit(1)
152 |
153 | oom_list.append((
154 | int(pid), int(oom_score), int(oom_score_adj), cmdline,
155 | name, int(uid), int(vm_rss), int(vm_swap)))
156 |
157 | # list sorted by oom_score
158 | oom_list_sorted = sorted(
159 | oom_list, key=itemgetter(int(sort_dict[sort_by])), reverse=True)
160 |
161 |
162 | # find width of columns
163 |
164 |
165 | max_pid_len = get_max_pid_len()
166 |
167 |
168 | max_uid_len = len(str(sorted(
169 | oom_list, key=itemgetter(5), reverse=True)[0][5]))
170 |
171 |
172 | max_vm_rss_len = len(str(round(
173 | sorted(oom_list, key=itemgetter(6), reverse=True)[0][6] / 1024)))
174 |
175 | if max_vm_rss_len < 5:
176 | max_vm_rss_len = 5
177 |
178 |
179 | # print output
180 |
181 | if display_cmdline == 0:
182 |
183 | print(
184 | 'oom_score oom_score_adj{}UID{}PID Name {}VmRSS VmSwap'.format(
185 | ' ' * (max_uid_len - 2),
186 | ' ' * (max_pid_len - 2),
187 | ' ' * max_vm_rss_len
188 | )
189 | )
190 |
191 | print(
192 | '--------- ------------- {} {} --------------- {}-- --------'.format(
193 | '-' * max_uid_len,
194 | '-' * max_pid_len,
195 | '-' * max_vm_rss_len
196 | )
197 | )
198 |
199 | else:
200 |
201 | print(
202 | 'oom_score oom_score_adj{}UID{}PID Name {}VmRSS VmSwa'
203 | 'p cmdline'.format(
204 | ' ' * (max_uid_len - 2),
205 | ' ' * (max_pid_len - 2),
206 | ' ' * max_vm_rss_len
207 | )
208 | )
209 |
210 | print(
211 | '--------- ------------- {} {} --------------- {}-- ------'
212 | '-- -------'.format(
213 | '-' * max_uid_len,
214 | '-' * max_pid_len,
215 | '-' * max_vm_rss_len
216 | )
217 | )
218 |
219 | # print processes stats sorted by sort_dict[sort_by]
220 |
221 | for i in oom_list_sorted[:int(num_lines)]:
222 |
223 | pid = i[0]
224 | oom_score = i[1]
225 | oom_score_adj = i[2]
226 | cmdline = i[3]
227 | name = i[4]
228 | uid = i[5]
229 | vm_rss = i[6]
230 | vm_swap = i[7]
231 |
232 | print(
233 | '{} {} {} {} {} {} M {} M {}'.format(
234 | str(oom_score).rjust(9),
235 | str(oom_score_adj).rjust(13),
236 | str(uid).rjust(max_uid_len),
237 | str(pid).rjust(max_pid_len),
238 | name.ljust(15),
239 | str(round(vm_rss / 1024.0)).rjust(max_vm_rss_len, ' '),
240 | str(round(vm_swap / 1024.0)).rjust(6, ' '),
241 | cmdline[:display_cmdline]
242 | )
243 | )
244 |
--------------------------------------------------------------------------------
/src/psi-top:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import os
4 | from argparse import ArgumentParser
5 |
6 |
7 | def psi_path_to_metrics(psi_path):
8 | """
9 | """
10 | with open(psi_path) as f:
11 | psi_list = f.readlines()
12 |
13 | some_list, full_list = psi_list[0].split(' '), psi_list[1].split(' ')
14 |
15 | some_avg10 = some_list[1].split('=')[1]
16 | some_avg60 = some_list[2].split('=')[1]
17 | some_avg300 = some_list[3].split('=')[1]
18 |
19 | full_avg10 = full_list[1].split('=')[1]
20 | full_avg60 = full_list[2].split('=')[1]
21 | full_avg300 = full_list[3].split('=')[1]
22 |
23 | return (some_avg10, some_avg60, some_avg300,
24 | full_avg10, full_avg60, full_avg300)
25 |
26 |
27 | def psi_path_to_metrics_cpu(psi_path):
28 | """
29 | """
30 | with open(psi_path) as f:
31 | psi_list = f.readlines()
32 |
33 | some_list = psi_list[0].rstrip().split(' ')
34 |
35 | some_avg10 = some_list[1].split('=')[1]
36 | some_avg60 = some_list[2].split('=')[1]
37 | some_avg300 = some_list[3].split('=')[1]
38 |
39 | return (some_avg10, some_avg60, some_avg300)
40 |
41 |
42 | def cgroup2_root():
43 | """
44 | """
45 | with open(mounts) as f:
46 | for line in f:
47 | if cgroup2_separator in line:
48 | return line.partition(cgroup2_separator)[0].partition(' ')[2]
49 |
50 |
51 | def get_psi_mem_files(cgroup2_path, met):
52 | """
53 | """
54 | path_list = []
55 |
56 | for root, dirs, files in os.walk(cgroup2_path):
57 | for file in files:
58 | path = os.path.join(root, file)
59 | if path.endswith('/{}.pressure'.format(met)):
60 | path_list.append(path)
61 |
62 | return path_list
63 |
64 |
65 | def psi_path_to_cgroup2(path):
66 | """
67 | """
68 |
69 | if path.endswith('/cpu.pressure'):
70 | return path.partition(cgroup2_mountpoint)[
71 | 2].partition('/cpu.pressure')[0]
72 |
73 | if path.endswith('/io.pressure'):
74 | return path.partition(cgroup2_mountpoint)[
75 | 2].partition('/io.pressure')[0]
76 |
77 | if path.endswith('/memory.pressure'):
78 | return path.partition(cgroup2_mountpoint)[
79 | 2].partition('/memory.pressure')[0]
80 |
81 |
82 | parser = ArgumentParser()
83 |
84 | parser.add_argument(
85 | '-m',
86 | '--metrics',
87 | help="""metrics (memory, io or cpu)""",
88 | default='memory',
89 | type=str
90 | )
91 |
92 |
93 | args = parser.parse_args()
94 |
95 |
96 | met = args.metrics
97 |
98 |
99 | if not (met == 'memory' or met == 'io' or met == 'cpu'):
100 | print('ERROR: invalid metrics:', met)
101 | exit(1)
102 |
103 |
104 | psi_path = '/proc/pressure/{}'.format(met)
105 | mounts = '/proc/mounts'
106 | cgroup2_separator = ' cgroup2 rw,'
107 |
108 | cgroup2_mountpoint = cgroup2_root()
109 |
110 |
111 | if cgroup2_mountpoint is None:
112 | print('ERROR: cgroup_v2 hierarchy is not mounted')
113 | exit(1)
114 |
115 |
116 | try:
117 | psi_path_to_metrics('/proc/pressure/memory')
118 | except Exception as e:
119 | print('ERROR: {}'.format(e))
120 | print('PSI metrics are not provided by the kernel. Exit.')
121 | exit(1)
122 |
123 |
124 | if cgroup2_mountpoint is not None:
125 | y = get_psi_mem_files(cgroup2_mountpoint, met)
126 |
127 |
128 | path_list = get_psi_mem_files(cgroup2_mountpoint, met)
129 |
130 |
131 | head_mem_io = '''PSI metrics: {}
132 | cgroup_v2 mountpoint: {}
133 | =====================|======================|
134 | some | full |
135 | -------------------- | -------------------- |
136 | avg10 avg60 avg300 | avg10 avg60 avg300 | cgroup_v2
137 | ------ ------ ------ | ------ ------ ------ | -----------'''.format(
138 | met, cgroup2_mountpoint)
139 |
140 |
141 | head_cpu = '''PSI metrics: {}
142 | cgroup_v2 mountpoint: {}
143 | =====================|
144 | some |
145 | -------------------- |
146 | avg10 avg60 avg300 | cgroup_v2
147 | ------ ------ ------ | -----------'''.format(
148 | met, cgroup2_mountpoint)
149 |
150 |
151 | if met == 'cpu':
152 | print(head_cpu)
153 | else:
154 | print(head_mem_io)
155 |
156 |
157 | if met == 'cpu':
158 |
159 | some_avg10, some_avg60, some_avg300 = psi_path_to_metrics_cpu(psi_path)
160 |
161 | print('{} {} {} | {}'.format(
162 | some_avg10.rjust(6),
163 | some_avg60.rjust(6),
164 | some_avg300.rjust(6),
165 | 'SYSTEM_WIDE'))
166 |
167 | else:
168 |
169 | (some_avg10, some_avg60, some_avg300, full_avg10, full_avg60, full_avg300
170 | ) = psi_path_to_metrics(psi_path)
171 |
172 | print('{} {} {} | {} {} {} | {}'.format(
173 | some_avg10.rjust(6),
174 | some_avg60.rjust(6),
175 | some_avg300.rjust(6),
176 | full_avg10.rjust(6),
177 | full_avg60.rjust(6),
178 | full_avg300.rjust(6), 'SYSTEM_WIDE'))
179 |
180 |
181 | for psi_path in path_list:
182 |
183 | if met == 'cpu':
184 | some_avg10, some_avg60, some_avg300 = psi_path_to_metrics_cpu(psi_path)
185 |
186 | print('{} {} {} | {}'.format(
187 | some_avg10.rjust(6),
188 | some_avg60.rjust(6),
189 | some_avg300.rjust(6),
190 | psi_path_to_cgroup2(psi_path)))
191 |
192 | else:
193 | (some_avg10, some_avg60, some_avg300,
194 | full_avg10, full_avg60, full_avg300) = psi_path_to_metrics(psi_path)
195 |
196 | print('{} {} {} | {} {} {} | {}'.format(
197 | some_avg10.rjust(6),
198 | some_avg60.rjust(6),
199 | some_avg300.rjust(6),
200 | full_avg10.rjust(6),
201 | full_avg60.rjust(6),
202 | full_avg300.rjust(6), psi_path_to_cgroup2(psi_path)))
203 |
--------------------------------------------------------------------------------
/src/psi2log:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """psi2log - PSI metrics monitor and logger"""
3 |
4 | from argparse import ArgumentParser
5 | from ctypes import CDLL
6 | from signal import SIGHUP, SIGINT, SIGQUIT, SIGTERM, signal
7 | from sys import exit, stdout
8 | from time import monotonic, sleep
9 |
10 |
11 | def read_path(path):
12 | """
13 | """
14 | try:
15 | fd[path].seek(0)
16 | except ValueError:
17 | try:
18 | fd[path] = open(path, 'rb', buffering=0)
19 | except FileNotFoundError as e:
20 | log(e)
21 | return None
22 | except KeyError:
23 | try:
24 | fd[path] = open(path, 'rb', buffering=0)
25 | except FileNotFoundError as e:
26 | log(e)
27 | return None
28 | try:
29 | return fd[path].read(99999).decode()
30 | except OSError as e:
31 | log(e)
32 | fd[path].close()
33 | return None
34 |
35 |
36 | def form1(num):
37 | """
38 | """
39 | s = str(num).split('.')
40 | return '{}.{:0<2}'.format(s[0], s[1])
41 |
42 |
43 | def form2(num):
44 | """
45 | """
46 | s = str(round(num, 1)).split('.')
47 | return '{}.{:0<1}'.format(s[0], s[1])
48 |
49 |
50 | def signal_handler(signum, frame):
51 | """
52 | """
53 | def signal_handler_inner(signum, frame):
54 | pass
55 |
56 | for i in sig_list:
57 | signal(i, signal_handler_inner)
58 |
59 | if signum == SIGINT:
60 | print('')
61 |
62 | lpd = len(peaks_dict)
63 |
64 | if lpd == 15:
65 | log('=================================')
66 | log('Peak values: avg10 avg60 avg300')
67 | log('----------- ------ ------ ------')
68 | log('some cpu {:>6} {:>6} {:>6}'.format(
69 | form1(peaks_dict['c_some_avg10']),
70 | form1(peaks_dict['c_some_avg60']),
71 | form1(peaks_dict['c_some_avg300']),
72 | ))
73 | log('----------- ------ ------ ------')
74 | log('some io {:>6} {:>6} {:>6}'.format(
75 | form1(peaks_dict['i_some_avg10']),
76 | form1(peaks_dict['i_some_avg60']),
77 | form1(peaks_dict['i_some_avg300']),
78 | ))
79 |
80 | log('full io {:>6} {:>6} {:>6}'.format(
81 | form1(peaks_dict['i_full_avg10']),
82 | form1(peaks_dict['i_full_avg60']),
83 | form1(peaks_dict['i_full_avg300']),
84 | ))
85 |
86 | log('----------- ------ ------ ------')
87 |
88 | log('some memory {:>6} {:>6} {:>6}'.format(
89 | form1(peaks_dict['m_some_avg10']),
90 | form1(peaks_dict['m_some_avg60']),
91 | form1(peaks_dict['m_some_avg300']),
92 | ))
93 |
94 | log('full memory {:>6} {:>6} {:>6}'.format(
95 | form1(peaks_dict['m_full_avg10']),
96 | form1(peaks_dict['m_full_avg60']),
97 | form1(peaks_dict['m_full_avg300']),
98 | ))
99 |
100 | if lpd == 5:
101 | log('----- | ----- ----- | ----- ----- | --------')
102 | log('{:>5} | {:>5} {:>5} | {:>5} {:>5} | peaks'.format(
103 | form2(peaks_dict['avg_cs']),
104 | form2(peaks_dict['avg_is']),
105 | form2(peaks_dict['avg_if']),
106 | form2(peaks_dict['avg_ms']),
107 | form2(peaks_dict['avg_mf'])
108 | ))
109 |
110 | if target == 'SYSTEM_WIDE':
111 | log_stall_times()
112 |
113 | if separate_log:
114 | logging.info('')
115 |
116 | exit()
117 |
118 |
119 | def log_stall_times():
120 | """
121 | """
122 | total_cs_1 = psi_file_cpu_to_total(cpu_file)
123 | total_is_1, total_if_1 = psi_file_mem_to_total(io_file)
124 | total_ms_1, total_mf_1 = psi_file_mem_to_total(memory_file)
125 | t = monotonic() - t_0
126 |
127 | M = 1000000
128 |
129 | dcs = (total_cs_1 - total_cs_0) / M
130 | dis = (total_is_1 - total_is_0) / M
131 | dif = (total_if_1 - total_if_0) / M
132 | dms = (total_ms_1 - total_ms_0) / M
133 | dmf = (total_mf_1 - total_mf_0) / M
134 |
135 | if mode == '0' or mode == '1':
136 | log('=================================')
137 | else:
138 | log('--')
139 |
140 | log('Stall times for the last {}s:'.format(round(t, 1)))
141 | log('-----------')
142 | log('some cpu {}s, avg {}%'.format(
143 | round(dcs, 1),
144 | round(dcs / t * 100, 1)
145 | ))
146 | log('-----------')
147 | log('some io {}s, avg {}%'.format(
148 | round(dis, 1),
149 | round(dis / t * 100, 1)
150 | ))
151 | log('full io {}s, avg {}%'.format(
152 | round(dif, 1),
153 | round(dif / t * 100, 1)
154 | ))
155 | log('-----------')
156 |
157 | log('some memory {}s, avg {}%'.format(
158 | round(dms, 1),
159 | round(dms / t * 100, 1)
160 | ))
161 |
162 | log('full memory {}s, avg {}%'.format(
163 | round(dmf, 1),
164 | round(dmf / t * 100, 1)
165 | ))
166 |
167 |
168 | def cgroup2_root():
169 | """
170 | """
171 | with open(mounts) as f:
172 | for line in f:
173 | if cgroup2_separator in line:
174 | return line.partition(cgroup2_separator)[0].partition(' ')[2]
175 |
176 |
177 | def mlockall():
178 | """
179 | """
180 | MCL_CURRENT = 1
181 | MCL_FUTURE = 2
182 | MCL_ONFAULT = 4
183 |
184 | libc = CDLL(None, use_errno=True)
185 | result = libc.mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)
186 |
187 | if result != 0:
188 | result = libc.mlockall(MCL_CURRENT | MCL_FUTURE)
189 | if result != 0:
190 | log('WARNING: cannot lock process memory: [Errno {}]'.format(
191 | result))
192 | else:
193 | log('Prosess memory locked with MCL_CURRENT | MCL_FUTURE')
194 | else:
195 | log('Process memory locked with '
196 | 'MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT')
197 |
198 |
199 | def psi_file_mem_to_metrics0(psi_path):
200 | """
201 | """
202 | with open(psi_path) as f:
203 | psi_list = f.readlines()
204 | some_list, full_list = psi_list[0].split(' '), psi_list[1].split(' ')
205 | some_avg10 = some_list[1].split('=')[1]
206 | some_avg60 = some_list[2].split('=')[1]
207 | some_avg300 = some_list[3].split('=')[1]
208 | full_avg10 = full_list[1].split('=')[1]
209 | full_avg60 = full_list[2].split('=')[1]
210 | full_avg300 = full_list[3].split('=')[1]
211 | return (some_avg10, some_avg60, some_avg300,
212 | full_avg10, full_avg60, full_avg300)
213 |
214 |
215 | def psi_file_mem_to_metrics(psi_path):
216 | """
217 | """
218 | foo = read_path(psi_path)
219 |
220 | if foo is None:
221 | return None
222 |
223 | try:
224 | psi_list = foo.split('\n')
225 |
226 | some_list, full_list = psi_list[0].split(' '), psi_list[1].split(' ')
227 | some_avg10 = some_list[1].split('=')[1]
228 | some_avg60 = some_list[2].split('=')[1]
229 | some_avg300 = some_list[3].split('=')[1]
230 | full_avg10 = full_list[1].split('=')[1]
231 | full_avg60 = full_list[2].split('=')[1]
232 | full_avg300 = full_list[3].split('=')[1]
233 | return (some_avg10, some_avg60, some_avg300,
234 | full_avg10, full_avg60, full_avg300)
235 |
236 | except Exception as e:
237 | log('{}'.format(e))
238 | return None
239 |
240 |
241 | def psi_file_cpu_to_metrics(psi_path):
242 | """
243 | """
244 | foo = read_path(psi_path)
245 |
246 | if foo is None:
247 | return None
248 |
249 | try:
250 | psi_list = foo.split('\n')
251 |
252 | some_list = psi_list[0].split(' ')
253 | some_avg10 = some_list[1].split('=')[1]
254 | some_avg60 = some_list[2].split('=')[1]
255 | some_avg300 = some_list[3].split('=')[1]
256 | return (some_avg10, some_avg60, some_avg300)
257 |
258 | except Exception as e:
259 | log('{}'.format(e))
260 | return None
261 |
262 |
263 | def psi_file_mem_to_total(psi_path):
264 | """
265 | """
266 | foo = read_path(psi_path)
267 |
268 | if foo is None:
269 | return None
270 |
271 | try:
272 | psi_list = foo.split('\n')
273 |
274 | some_list, full_list = psi_list[0].split(' '), psi_list[1].split(' ')
275 | some_total = some_list[4].split('=')[1]
276 | full_total = full_list[4].split('=')[1]
277 |
278 | return int(some_total), int(full_total)
279 |
280 | except Exception as e:
281 | log('{}'.format(e))
282 | return None
283 |
284 |
285 | def psi_file_cpu_to_total(psi_path):
286 | """
287 | """
288 | foo = read_path(psi_path)
289 |
290 | if foo is None:
291 | return None
292 |
293 | try:
294 | psi_list = foo.split('\n')
295 |
296 | some_list = psi_list[0].split(' ')
297 | some_total = some_list[4].split('=')[1]
298 |
299 | return int(some_total)
300 |
301 | except Exception as e:
302 | log('{}'.format(e))
303 | return None
304 |
305 |
306 | def print_head_0():
307 | """
308 | """
309 | log('==================================================================='
310 | '============')
311 | log(' cpu || io || memory')
312 | log('============= || ============================= || ================='
313 | '============')
314 | log(' some || some | full || some | '
315 | ' full')
316 | log('------------- || ------------- | ------------- || ------------- | -'
317 | '------------')
318 | log(' avg10 avg60 || avg10 avg60 | avg10 avg60 || avg10 avg60 | '
319 | 'avg10 avg60')
320 | log('------ ------ || ------ ------ | ------ ------ || ------ ------ | -'
321 | '----- ------')
322 |
323 |
324 | def print_head_1():
325 | """
326 | """
327 | log('===================================================================='
328 | '==============================================')
329 | log(' cpu || io '
330 | '|| memory')
331 | log('==================== || =========================================== '
332 | '|| ===========================================')
333 | log(' some || some | full '
334 | '|| some | full')
335 | log('-------------------- || -------------------- | -------------------- '
336 | '|| -------------------- | --------------------')
337 | log(' avg10 avg60 avg300 || avg10 avg60 avg300 | avg10 avg60 avg300 '
338 | '|| avg10 avg60 avg300 | avg10 avg60 avg300')
339 | log('------ ------ ------ || ------ ------ ------ | ------ ------ ------ '
340 | '|| ------ ------ ------ | ------ ------ ------')
341 |
342 |
343 | def print_head_2():
344 | """
345 | """
346 | log('----- - ----------- - ----------- -')
347 | log(' cpu | io | memory |')
348 | log('----- | ----------- | ----------- |')
349 | log(' some | some full | some full | interval')
350 | log('----- | ----- ----- | ----- ----- | --------')
351 |
352 |
353 | def log(*msg):
354 | """
355 | """
356 | if not SUPPRESS_OUTPUT:
357 | print(*msg)
358 | if separate_log:
359 | logging.info(*msg)
360 |
361 |
362 | def log_head(*msg):
363 | """
364 | """
365 | print(*msg)
366 | if separate_log:
367 | logging.info(*msg)
368 |
369 |
370 | parser = ArgumentParser()
371 |
372 | parser.add_argument(
373 | '-t',
374 | '--target',
375 | help="""target (cgroup_v2 or SYSTEM_WIDE)""",
376 | default='SYSTEM_WIDE',
377 | type=str
378 | )
379 |
380 |
381 | parser.add_argument(
382 | '-i',
383 | '--interval',
384 | help="""interval in sec""",
385 | default=2,
386 | type=float
387 | )
388 |
389 |
390 | parser.add_argument(
391 | '-l',
392 | '--log',
393 | help="""path to log file""",
394 | default=None,
395 | type=str
396 | )
397 |
398 |
399 | parser.add_argument(
400 | '-m',
401 | '--mode',
402 | help="""mode (0, 1 or 2)""",
403 | default='0',
404 | type=str
405 | )
406 |
407 |
408 | parser.add_argument(
409 | '-s',
410 | '--suppress-output',
411 | help="""suppress output""",
412 | default='False',
413 | type=str
414 | )
415 |
416 |
417 | args = parser.parse_args()
418 | target = args.target
419 | mode = args.mode
420 | interval = args.interval
421 | log_file = args.log
422 | suppress_output = args.suppress_output
423 |
424 | if target != 'SYSTEM_WIDE':
425 | target = '/' + target.strip('/')
426 |
427 |
428 | if log_file is None:
429 | separate_log = False
430 | else:
431 | separate_log = True
432 | import logging
433 |
434 | if separate_log:
435 | try:
436 | logging.basicConfig(
437 | filename=log_file,
438 | level=logging.INFO,
439 | format="%(asctime)s: %(message)s")
440 | except Exception as e:
441 | print(e)
442 | exit(1)
443 |
444 |
445 | if suppress_output == 'False':
446 | SUPPRESS_OUTPUT = False
447 | elif suppress_output == 'True':
448 | SUPPRESS_OUTPUT = True
449 | else:
450 | log_head('error: argument -s/--suppress-output: valid values are '
451 | 'False and True')
452 | exit(1)
453 |
454 |
455 | if log_file is not None:
456 | logstring = 'log file: {}, '.format(log_file)
457 | else:
458 | logstring = 'log file is not set, '
459 |
460 |
461 | if interval < 1:
462 | log_head('error: argument -i/--interval: the value must be greater than or'
463 | ' equal to 1')
464 | exit(1)
465 |
466 |
467 | if not (mode == '0' or mode == '1' or mode == '2'):
468 | log_head('ERROR: invalid mode. Valid values are 0, 1 and 2. Exit.')
469 | exit(1)
470 |
471 |
472 | try:
473 | psi_file_mem_to_metrics0('/proc/pressure/memory')
474 | except Exception as e:
475 | log_head('ERROR: {}'.format(e))
476 | log_head('PSI metrics are not provided by the kernel. Exit.')
477 | exit(1)
478 |
479 |
480 | log_head('Starting psi2log, target: {}, mode: {}, interval: {} sec, {}suppress'
481 | ' output: {}'.format(
482 | target, mode, round(interval, 3), logstring, suppress_output))
483 |
484 |
485 | fd = dict()
486 |
487 |
488 | if target == 'SYSTEM_WIDE':
489 | system_wide = True
490 | source_dir = '/proc/pressure'
491 | cpu_file = '/proc/pressure/cpu'
492 | io_file = '/proc/pressure/io'
493 | memory_file = '/proc/pressure/memory'
494 | log_head('PSI source dir: /proc/pressure/, source files: cpu, io, memory')
495 | else:
496 | system_wide = False
497 | mounts = '/proc/mounts'
498 | cgroup2_separator = ' cgroup2 rw,'
499 | cgroup2_mountpoint = cgroup2_root()
500 |
501 | if cgroup2_mountpoint is None:
502 | log('ERROR: unified cgroup hierarchy is not mounted, exit')
503 | exit(1)
504 |
505 | source_dir = cgroup2_mountpoint + target
506 | cpu_file = source_dir + '/cpu.pressure'
507 | io_file = source_dir + '/io.pressure'
508 | memory_file = source_dir + '/memory.pressure'
509 | log_head('PSI source dir: {}{}/, source files: cpu.pressure, io.pressure,'
510 | ' memory.pressure'.format(cgroup2_mountpoint, target))
511 |
512 |
513 | abnormal_interval = 1.01 * interval
514 | abnormal_inaccuracy = 0.05
515 |
516 |
517 | if target == 'SYSTEM_WIDE':
518 | total_cs_0 = psi_file_cpu_to_total(cpu_file)
519 | total_is_0, total_if_0 = psi_file_mem_to_total(io_file)
520 | total_ms_0, total_mf_0 = psi_file_mem_to_total(memory_file)
521 | t_0 = monotonic()
522 |
523 |
524 | peaks_dict = dict()
525 |
526 |
527 | sig_list = [SIGTERM, SIGINT, SIGQUIT, SIGHUP]
528 |
529 | for i in sig_list:
530 | signal(i, signal_handler)
531 |
532 |
533 | mlockall()
534 |
535 |
536 | if mode == '0':
537 |
538 | print_head_0()
539 |
540 | while True:
541 |
542 | try:
543 |
544 | (c_some_avg10, c_some_avg60, c_some_avg300
545 | ) = psi_file_cpu_to_metrics(cpu_file)
546 |
547 | (i_some_avg10, i_some_avg60, i_some_avg300,
548 | i_full_avg10, i_full_avg60, i_full_avg300
549 | ) = psi_file_mem_to_metrics(io_file)
550 |
551 | (m_some_avg10, m_some_avg60, m_some_avg300,
552 | m_full_avg10, m_full_avg60, m_full_avg300
553 | ) = psi_file_mem_to_metrics(memory_file)
554 |
555 | except TypeError:
556 | stdout.flush()
557 | sleep(interval)
558 | continue
559 |
560 | log('{:>6} {:>6} || {:>6} {:>6} | {:>6} {:>6} || {:>6} {:>6} | {:>6} '
561 | '{:>6}'.format(
562 |
563 | c_some_avg10, c_some_avg60,
564 |
565 | i_some_avg10, i_some_avg60,
566 | i_full_avg10, i_full_avg60,
567 |
568 | m_some_avg10, m_some_avg60,
569 | m_full_avg10, m_full_avg60
570 |
571 | ))
572 |
573 | c_some_avg10 = float(c_some_avg10)
574 | if ('c_some_avg10' not in peaks_dict or
575 | peaks_dict['c_some_avg10'] < c_some_avg10):
576 | peaks_dict['c_some_avg10'] = c_some_avg10
577 |
578 | c_some_avg60 = float(c_some_avg60)
579 | if ('c_some_avg60' not in peaks_dict or
580 | peaks_dict['c_some_avg60'] < c_some_avg60):
581 | peaks_dict['c_some_avg60'] = c_some_avg60
582 |
583 | c_some_avg300 = float(c_some_avg300)
584 | if ('c_some_avg300' not in peaks_dict or
585 | peaks_dict['c_some_avg300'] < c_some_avg300):
586 | peaks_dict['c_some_avg300'] = c_some_avg300
587 |
588 | #######################################################################
589 |
590 | i_some_avg10 = float(i_some_avg10)
591 | if ('i_some_avg10' not in peaks_dict or
592 | peaks_dict['i_some_avg10'] < i_some_avg10):
593 | peaks_dict['i_some_avg10'] = i_some_avg10
594 |
595 | i_some_avg60 = float(i_some_avg60)
596 | if ('i_some_avg60' not in peaks_dict or
597 | peaks_dict['i_some_avg60'] < i_some_avg60):
598 | peaks_dict['i_some_avg60'] = i_some_avg60
599 |
600 | i_some_avg300 = float(i_some_avg300)
601 | if ('i_some_avg300' not in peaks_dict or
602 | peaks_dict['i_some_avg300'] < i_some_avg300):
603 | peaks_dict['i_some_avg300'] = i_some_avg300
604 |
605 | i_full_avg10 = float(i_full_avg10)
606 | if ('i_full_avg10' not in peaks_dict or
607 | peaks_dict['i_full_avg10'] < i_full_avg10):
608 | peaks_dict['i_full_avg10'] = i_full_avg10
609 |
610 | i_full_avg60 = float(i_full_avg60)
611 | if ('i_full_avg60' not in peaks_dict or
612 | peaks_dict['i_full_avg60'] < i_full_avg60):
613 | peaks_dict['i_full_avg60'] = i_full_avg60
614 |
615 | i_full_avg300 = float(i_full_avg300)
616 | if ('i_full_avg300' not in peaks_dict or
617 | peaks_dict['i_full_avg300'] < i_full_avg300):
618 | peaks_dict['i_full_avg300'] = i_full_avg300
619 |
620 | #######################################################################
621 |
622 | m_some_avg10 = float(m_some_avg10)
623 | if ('m_some_avg10' not in peaks_dict or
624 | peaks_dict['m_some_avg10'] < m_some_avg10):
625 | peaks_dict['m_some_avg10'] = m_some_avg10
626 |
627 | m_some_avg60 = float(m_some_avg60)
628 | if ('m_some_avg60' not in peaks_dict or
629 | peaks_dict['m_some_avg60'] < m_some_avg60):
630 | peaks_dict['m_some_avg60'] = m_some_avg60
631 |
632 | m_some_avg300 = float(m_some_avg300)
633 | if ('m_some_avg300' not in peaks_dict or
634 | peaks_dict['m_some_avg300'] < m_some_avg300):
635 | peaks_dict['m_some_avg300'] = m_some_avg300
636 |
637 | m_full_avg10 = float(m_full_avg10)
638 | if ('m_full_avg10' not in peaks_dict or
639 | peaks_dict['m_full_avg10'] < m_full_avg10):
640 | peaks_dict['m_full_avg10'] = m_full_avg10
641 |
642 | m_full_avg60 = float(m_full_avg60)
643 | if ('m_full_avg60' not in peaks_dict or
644 | peaks_dict['m_full_avg60'] < m_full_avg60):
645 | peaks_dict['m_full_avg60'] = m_full_avg60
646 |
647 | m_full_avg300 = float(m_full_avg300)
648 | if ('m_full_avg300' not in peaks_dict or
649 | peaks_dict['m_full_avg300'] < m_full_avg300):
650 | peaks_dict['m_full_avg300'] = m_full_avg300
651 |
652 | stdout.flush()
653 | sleep(interval)
654 |
655 |
656 | if mode == '1':
657 |
658 | print_head_1()
659 |
660 | while True:
661 |
662 | try:
663 |
664 | (c_some_avg10, c_some_avg60, c_some_avg300
665 | ) = psi_file_cpu_to_metrics(cpu_file)
666 |
667 | (i_some_avg10, i_some_avg60, i_some_avg300,
668 | i_full_avg10, i_full_avg60, i_full_avg300
669 | ) = psi_file_mem_to_metrics(io_file)
670 |
671 | (m_some_avg10, m_some_avg60, m_some_avg300,
672 | m_full_avg10, m_full_avg60, m_full_avg300
673 | ) = psi_file_mem_to_metrics(memory_file)
674 |
675 | except TypeError:
676 | stdout.flush()
677 | sleep(interval)
678 | continue
679 |
680 | log('{:>6} {:>6} {:>6} || {:>6} {:>6} {:>6} | {:>6} {:>6} {:>6} || '
681 | '{:>6} {:>6} {:>6} | {:>6} {:>6} {:>6}'.format(
682 |
683 | c_some_avg10, c_some_avg60, c_some_avg300,
684 |
685 | i_some_avg10, i_some_avg60, i_some_avg300,
686 | i_full_avg10, i_full_avg60, i_full_avg300,
687 |
688 | m_some_avg10, m_some_avg60, m_some_avg300,
689 | m_full_avg10, m_full_avg60, m_full_avg300
690 |
691 | ))
692 |
693 | c_some_avg10 = float(c_some_avg10)
694 | if ('c_some_avg10' not in peaks_dict or
695 | peaks_dict['c_some_avg10'] < c_some_avg10):
696 | peaks_dict['c_some_avg10'] = c_some_avg10
697 |
698 | c_some_avg60 = float(c_some_avg60)
699 | if ('c_some_avg60' not in peaks_dict or
700 | peaks_dict['c_some_avg60'] < c_some_avg60):
701 | peaks_dict['c_some_avg60'] = c_some_avg60
702 |
703 | c_some_avg300 = float(c_some_avg300)
704 | if ('c_some_avg300' not in peaks_dict or
705 | peaks_dict['c_some_avg300'] < c_some_avg300):
706 | peaks_dict['c_some_avg300'] = c_some_avg300
707 |
708 | #######################################################################
709 |
710 | i_some_avg10 = float(i_some_avg10)
711 | if ('i_some_avg10' not in peaks_dict or
712 | peaks_dict['i_some_avg10'] < i_some_avg10):
713 | peaks_dict['i_some_avg10'] = i_some_avg10
714 |
715 | i_some_avg60 = float(i_some_avg60)
716 | if ('i_some_avg60' not in peaks_dict or
717 | peaks_dict['i_some_avg60'] < i_some_avg60):
718 | peaks_dict['i_some_avg60'] = i_some_avg60
719 |
720 | i_some_avg300 = float(i_some_avg300)
721 | if ('i_some_avg300' not in peaks_dict or
722 | peaks_dict['i_some_avg300'] < i_some_avg300):
723 | peaks_dict['i_some_avg300'] = i_some_avg300
724 |
725 | i_full_avg10 = float(i_full_avg10)
726 | if ('i_full_avg10' not in peaks_dict or
727 | peaks_dict['i_full_avg10'] < i_full_avg10):
728 | peaks_dict['i_full_avg10'] = i_full_avg10
729 |
730 | i_full_avg60 = float(i_full_avg60)
731 | if ('i_full_avg60' not in peaks_dict or
732 | peaks_dict['i_full_avg60'] < i_full_avg60):
733 | peaks_dict['i_full_avg60'] = i_full_avg60
734 |
735 | i_full_avg300 = float(i_full_avg300)
736 | if ('i_full_avg300' not in peaks_dict or
737 | peaks_dict['i_full_avg300'] < i_full_avg300):
738 | peaks_dict['i_full_avg300'] = i_full_avg300
739 |
740 | #######################################################################
741 |
742 | m_some_avg10 = float(m_some_avg10)
743 | if ('m_some_avg10' not in peaks_dict or
744 | peaks_dict['m_some_avg10'] < m_some_avg10):
745 | peaks_dict['m_some_avg10'] = m_some_avg10
746 |
747 | m_some_avg60 = float(m_some_avg60)
748 | if ('m_some_avg60' not in peaks_dict or
749 | peaks_dict['m_some_avg60'] < m_some_avg60):
750 | peaks_dict['m_some_avg60'] = m_some_avg60
751 |
752 | m_some_avg300 = float(m_some_avg300)
753 | if ('m_some_avg300' not in peaks_dict or
754 | peaks_dict['m_some_avg300'] < m_some_avg300):
755 | peaks_dict['m_some_avg300'] = m_some_avg300
756 |
757 | m_full_avg10 = float(m_full_avg10)
758 | if ('m_full_avg10' not in peaks_dict or
759 | peaks_dict['m_full_avg10'] < m_full_avg10):
760 | peaks_dict['m_full_avg10'] = m_full_avg10
761 |
762 | m_full_avg60 = float(m_full_avg60)
763 | if ('m_full_avg60' not in peaks_dict or
764 | peaks_dict['m_full_avg60'] < m_full_avg60):
765 | peaks_dict['m_full_avg60'] = m_full_avg60
766 |
767 | m_full_avg300 = float(m_full_avg300)
768 | if ('m_full_avg300' not in peaks_dict or
769 | peaks_dict['m_full_avg300'] < m_full_avg300):
770 | peaks_dict['m_full_avg300'] = m_full_avg300
771 |
772 | stdout.flush()
773 | sleep(interval)
774 |
775 |
776 | print_head_2()
777 |
778 | try:
779 |
780 | total_cs0 = psi_file_cpu_to_total(cpu_file)
781 | total_is0, total_if0 = psi_file_mem_to_total(io_file)
782 | total_ms0, total_mf0 = psi_file_mem_to_total(memory_file)
783 | monotonic0 = monotonic()
784 | stdout.flush()
785 | sleep(interval)
786 |
787 | except TypeError:
788 | stdout.flush()
789 | sleep(interval)
790 |
791 | TT = 10000
792 |
793 | while True:
794 |
795 | try:
796 |
797 | total_cs1 = psi_file_cpu_to_total(cpu_file)
798 | total_is1, total_if1 = psi_file_mem_to_total(io_file)
799 | total_ms1, total_mf1 = psi_file_mem_to_total(memory_file)
800 | monotonic1 = monotonic()
801 | dm = monotonic1 - monotonic0
802 |
803 | if dm > abnormal_interval and dm - interval > abnormal_inaccuracy:
804 | log('WARNING: abnormal interval ({} sec), metrics may be prov'
805 | 'ided incorrect'.format(round(dm, 3)))
806 |
807 | monotonic0 = monotonic1
808 |
809 | except TypeError:
810 | stdout.flush()
811 | sleep(interval)
812 | continue
813 |
814 | dtotal_cs = total_cs1 - total_cs0
815 | avg_cs = dtotal_cs / dm / TT
816 | if 'avg_cs' not in peaks_dict or peaks_dict['avg_cs'] < avg_cs:
817 | peaks_dict['avg_cs'] = avg_cs
818 | total_cs0 = total_cs1
819 |
820 | dtotal_is = total_is1 - total_is0
821 | avg_is = dtotal_is / dm / TT
822 | if 'avg_is' not in peaks_dict or peaks_dict['avg_is'] < avg_is:
823 | peaks_dict['avg_is'] = avg_is
824 | total_is0 = total_is1
825 |
826 | dtotal_if = total_if1 - total_if0
827 | avg_if = dtotal_if / dm / TT
828 | if 'avg_if' not in peaks_dict or peaks_dict['avg_if'] < avg_if:
829 | peaks_dict['avg_if'] = avg_if
830 | total_if0 = total_if1
831 |
832 | dtotal_ms = total_ms1 - total_ms0
833 | avg_ms = dtotal_ms / dm / TT
834 | if 'avg_ms' not in peaks_dict or peaks_dict['avg_ms'] < avg_ms:
835 | peaks_dict['avg_ms'] = avg_ms
836 | total_ms0 = total_ms1
837 |
838 | dtotal_mf = total_mf1 - total_mf0
839 | avg_mf = dtotal_mf / dm / TT
840 | if 'avg_mf' not in peaks_dict or peaks_dict['avg_mf'] < avg_mf:
841 | peaks_dict['avg_mf'] = avg_mf
842 | total_mf0 = total_mf1
843 |
844 | log('{:>5} | {:>5} {:>5} | {:>5} {:>5} | {}'.format(
845 |
846 | round(avg_cs, 1),
847 |
848 | round(avg_is, 1),
849 | round(avg_if, 1),
850 |
851 | round(avg_ms, 1),
852 | round(avg_mf, 1),
853 |
854 | round(dm, 2)
855 | ))
856 |
857 | stdout.flush()
858 | sleep(interval)
859 |
--------------------------------------------------------------------------------
/systemd/nohang-desktop.service.in:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=Sophisticated low memory handler
3 | Documentation=man:nohang(8) https://github.com/hakavlad/nohang
4 | Conflicts=nohang.service
5 | After=sysinit.target
6 |
7 | [Service]
8 | ExecStart=:TARGET_SBINDIR:/nohang --monitor --config :TARGET_SYSCONFDIR:/nohang/nohang-desktop.conf
9 | Slice=hostcritical.slice
10 | SyslogIdentifier=nohang-desktop
11 | KillMode=mixed
12 | Restart=always
13 | RestartSec=0
14 |
15 | CPUSchedulingResetOnFork=true
16 | RestrictRealtime=yes
17 |
18 | TasksMax=25
19 | MemoryMax=100M
20 | MemorySwapMax=100M
21 |
22 | UMask=0027
23 | ProtectSystem=strict
24 | ReadWritePaths=/var/log
25 | InaccessiblePaths=/home /root
26 | ProtectKernelTunables=true
27 | ProtectKernelModules=true
28 | ProtectControlGroups=true
29 | ProtectHostname=true
30 | MemoryDenyWriteExecute=yes
31 | RestrictNamespaces=yes
32 | LockPersonality=yes
33 | PrivateTmp=true
34 | DeviceAllow=/dev/kmsg rw
35 | DevicePolicy=closed
36 |
37 | # Capabilities whitelist:
38 | # CAP_KILL is required to send signals
39 | # CAP_IPC_LOCK is required to mlockall()
40 | # CAP_SYS_PTRACE is required to check /proc/[pid]/exe realpathes
41 | # CAP_DAC_READ_SEARCH is required to read /proc/[pid]/environ files
42 | # CAP_DAC_OVERRIDE fixes #94
43 | # CAP_DAC_READ_SEARCH CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID CAP_SYS_RESOURCE
44 | # are required to send GUI notifications
45 | # CAP_SYSLOG is required to check /dev/kmsg for OOM events
46 |
47 | CapabilityBoundingSet=CAP_KILL CAP_IPC_LOCK CAP_SYS_PTRACE \
48 | CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID \
49 | CAP_SYS_RESOURCE CAP_SYSLOG
50 |
51 | # `PrivateNetwork=true` breaks GUI notifications on oldstable distros
52 | # (Debian 8, CentOS 7, Linux Mint 18). On modern distros you can set
53 | # PrivateNetwork=true for security reasons.
54 | #PrivateNetwork=true
55 |
56 | # Set realtime CPU scheduling policy if you want
57 | #CPUSchedulingPolicy=rr
58 | #CPUSchedulingPriority=1
59 |
60 | [Install]
61 | WantedBy=multi-user.target
62 |
--------------------------------------------------------------------------------
/systemd/nohang.service.in:
--------------------------------------------------------------------------------
1 | [Unit]
2 | Description=Sophisticated low memory handler
3 | Documentation=man:nohang(8) https://github.com/hakavlad/nohang
4 | Conflicts=nohang-desktop.service
5 | After=sysinit.target
6 |
7 | [Service]
8 | ExecStart=:TARGET_SBINDIR:/nohang --monitor --config :TARGET_SYSCONFDIR:/nohang/nohang.conf
9 | Slice=hostcritical.slice
10 | SyslogIdentifier=nohang
11 | KillMode=mixed
12 | Restart=always
13 | RestartSec=0
14 |
15 | CPUSchedulingResetOnFork=true
16 | RestrictRealtime=yes
17 |
18 | TasksMax=25
19 | MemoryMax=100M
20 | MemorySwapMax=100M
21 |
22 | UMask=0027
23 | ProtectSystem=strict
24 | ReadWritePaths=/var/log
25 | InaccessiblePaths=/home /root
26 | ProtectKernelTunables=true
27 | ProtectKernelModules=true
28 | ProtectControlGroups=true
29 | ProtectHostname=true
30 | MemoryDenyWriteExecute=yes
31 | RestrictNamespaces=yes
32 | LockPersonality=yes
33 | PrivateTmp=true
34 | DeviceAllow=/dev/kmsg rw
35 | DevicePolicy=closed
36 |
37 | # Capabilities whitelist:
38 | # CAP_KILL is required to send signals
39 | # CAP_IPC_LOCK is required to mlockall()
40 | # CAP_SYS_PTRACE is required to check /proc/[pid]/exe realpathes
41 | # CAP_DAC_READ_SEARCH is required to read /proc/[pid]/environ files
42 | # CAP_DAC_OVERRIDE fixes #94
43 | # CAP_DAC_READ_SEARCH CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID CAP_SYS_RESOURCE
44 | # are required to send GUI notifications
45 | # CAP_SYSLOG is required to check /dev/kmsg for OOM events
46 |
47 | CapabilityBoundingSet=CAP_KILL CAP_IPC_LOCK CAP_SYS_PTRACE \
48 | CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_AUDIT_WRITE CAP_SETUID CAP_SETGID \
49 | CAP_SYS_RESOURCE CAP_SYSLOG
50 |
51 | # `PrivateNetwork=true` breaks GUI notifications on oldstable distros
52 | # (Debian 8, CentOS 7, Linux Mint 18). On modern distros you can set
53 | # PrivateNetwork=true for security reasons.
54 | #PrivateNetwork=true
55 |
56 | # Set realtime CPU scheduling policy if you want
57 | #CPUSchedulingPolicy=rr
58 | #CPUSchedulingPriority=1
59 |
60 | [Install]
61 | WantedBy=multi-user.target
62 |
--------------------------------------------------------------------------------