├── resmon ├── __init__.py └── resmon.py ├── requirements.txt ├── LICENSE ├── .gitignore ├── setup.py └── README.md /resmon/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | setuptools 2 | pip 3 | psutil 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Xiangyu Bu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/linux,python 3 | 4 | ### Linux ### 5 | *~ 6 | 7 | # temporary files which can be created if a process still has a handle open of a deleted file 8 | .fuse_hidden* 9 | 10 | # KDE directory preferences 11 | .directory 12 | 13 | # Linux trash folder which might appear on any partition or disk 14 | .Trash-* 15 | 16 | # .nfs files are created when an open file is removed but is still being accessed 17 | .nfs* 18 | 19 | 20 | ### Python ### 21 | # Byte-compiled / optimized / DLL files 22 | __pycache__/ 23 | *.py[cod] 24 | *$py.class 25 | 26 | # C extensions 27 | *.so 28 | 29 | # Distribution / packaging 30 | .Python 31 | env/ 32 | build/ 33 | develop-eggs/ 34 | dist/ 35 | downloads/ 36 | eggs/ 37 | .eggs/ 38 | lib/ 39 | lib64/ 40 | parts/ 41 | sdist/ 42 | var/ 43 | *.egg-info/ 44 | .installed.cfg 45 | *.egg 46 | 47 | # PyInstaller 48 | # Usually these files are written by a python script from a template 49 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 50 | *.manifest 51 | *.spec 52 | 53 | # Installer logs 54 | pip-log.txt 55 | pip-delete-this-directory.txt 56 | 57 | # Unit test / coverage reports 58 | htmlcov/ 59 | .tox/ 60 | .coverage 61 | .coverage.* 62 | .cache 63 | nosetests.xml 64 | coverage.xml 65 | *,cover 66 | .hypothesis/ 67 | 68 | # Translations 69 | *.mo 70 | *.pot 71 | 72 | # Django stuff: 73 | *.log 74 | local_settings.py 75 | 76 | # Flask stuff: 77 | instance/ 78 | .webassets-cache 79 | 80 | # Scrapy stuff: 81 | .scrapy 82 | 83 | # Sphinx documentation 84 | docs/_build/ 85 | 86 | # PyBuilder 87 | target/ 88 | 89 | # IPython Notebook 90 | .ipynb_checkpoints 91 | 92 | # pyenv 93 | .python-version 94 | 95 | # celery beat schedule file 96 | celerybeat-schedule 97 | 98 | # dotenv 99 | .env 100 | 101 | # virtualenv 102 | .venv/ 103 | venv/ 104 | ENV/ 105 | 106 | # Spyder project settings 107 | .spyderproject 108 | 109 | # Rope project settings 110 | .ropeproject 111 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import os 4 | import platform 5 | 6 | try: 7 | from setuptools import setup, find_packages 8 | except ImportError: 9 | from ez_setup import use_setuptools 10 | use_setuptools() 11 | from setuptools import setup, find_packages 12 | 13 | try: 14 | with open(os.path.join(os.path.dirname(__file__), '..', 'README.md')) as f: 15 | readme = f.read() 16 | except IOError: 17 | readme = 'Please read README.md for more details.' 18 | 19 | 20 | description = """ 21 | A resource monitor that records resource usage (e.g., CPU usage, RAM usage and free, 22 | disk I/O count, NIC speed, etc.) and outputs the data in CSV format that is easy to 23 | post-process. 24 | """ 25 | 26 | with open('requirements.txt', 'r') as f: 27 | install_requires = f.readlines() 28 | 29 | setup( 30 | name='python-resmon', 31 | version='1.0.2', 32 | author='Xiangyu Bu', 33 | author_email='xybu92@live.com', 34 | license='MIT License', 35 | keywords=['resource', 'monitor', 'csv', 'process', 'usage'], 36 | url='https://github.com/xybu/python-resmon', 37 | description=description, 38 | long_description=readme, 39 | classifiers=[ 40 | 'Development Status :: 4 - Beta', 41 | 'Environment :: Console', 42 | 'Intended Audience :: Developers', 43 | 'Intended Audience :: System Administrators', 44 | 'Intended Audience :: Information Technology', 45 | 'License :: OSI Approved :: MIT License', 46 | 'Operating System :: POSIX :: Linux', 47 | 'Programming Language :: Python', 48 | 'Programming Language :: Python :: 3', 49 | 'Topic :: System', 50 | 'Topic :: System :: Monitoring', 51 | 'Topic :: System :: Networking :: Monitoring', 52 | 'Topic :: Utilities'], 53 | install_requires=install_requires, 54 | packages=find_packages(), 55 | include_package_data=True, 56 | exclude_package_data={'': ['README.*']}, 57 | entry_points={ 58 | 'console_scripts': [ 59 | 'resmon = resmon.resmon:main', 60 | ] 61 | } 62 | ) 63 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | python-resmon 2 | ============= 3 | 4 | # Intro 5 | 6 | A resource monitor that records resource usage (e.g., CPU usage, RAM usage and free, disk I/O 7 | count, NIC speed, etc.) and outputs the data in CSV format that is easy to post-process. 8 | 9 | Compared to collecting and parsing the output of `top` and `atop`, this script incurs less overhead and the 10 | CSV output is much easier to parse, making it handy for experiments that need resource monitoring 11 | and processing of the generated data sets. 12 | 13 | The script is written in Python 3 syntax and depends on [psutil](https://github.com/giampaolo/psutil). 14 | 15 | # Installation 16 | 17 | First make sure Python 3 interpreter and `pip` are installed. 18 | 19 | ```bash 20 | # Install python3-dev on ubuntu because a dependency requires Python.h. 21 | $ sudo apt install python3-dev 22 | 23 | # Install pip. 24 | $ wget -O- https://bootstrap.pypa.io/get-pip.py | sudo python3 25 | ``` 26 | 27 | Use `pip` to pull the repository and install: 28 | 29 | ```bash 30 | # Install to user space. Remove "--user" to install system-wide. 31 | $ pip install --user git+ssh://git@github.com/xybu/python-resmon.git 32 | ``` 33 | 34 | Or install from repository manually, 35 | 36 | ```bash 37 | # Grab source code. 38 | $ git clone git@github.com:xybu/python-resmon.git 39 | $ cd python-resmon 40 | 41 | # Install dependencies. 42 | $ pip install -r requirements.txt # might need sudo 43 | 44 | # Install python-resmon. 45 | $ ./setup.py install --user 46 | ``` 47 | 48 | # Uninstallation 49 | 50 | To uninstall the package, 51 | 52 | ```bash 53 | $ pip uninstall python-resmon 54 | ``` 55 | 56 | # Usage 57 | 58 | After installing, try with command `resmon --help` for more info: 59 | 60 | ```bash 61 | xb@precision:[~/projects/python-resmon]: resmon --help 62 | usage: resmon [-h] [--delay DELAY] [--flush] [--outfile [OUTFILE]] 63 | [--nic [NIC]] [--nic-outfile [NIC_OUTFILE]] [--enable-ps] 64 | [--ps-keywords [PS_KEYWORDS [PS_KEYWORDS ...]]] 65 | [--ps-pids [PS_PIDS [PS_PIDS ...]]] [--ps-outfile [PS_OUTFILE]] 66 | 67 | Monitor system-wide resource availability. Optionally monitor processes that 68 | match the specified criteria and their children. 69 | 70 | optional arguments: 71 | -h, --help show this help message and exit 72 | --delay DELAY, -d DELAY 73 | Interval, in sec, to poll information. 74 | --flush, -f If present, flush the output files after each line is 75 | written. 76 | --outfile [OUTFILE], -o [OUTFILE] 77 | Name of system monitor output file. If unset, print to 78 | stdout. 79 | --nic [NIC], -n [NIC] 80 | Specify particular NICs, separated by a comma, to 81 | monitor. Default is none. 82 | --nic-outfile [NIC_OUTFILE] 83 | Name of the NIC monitor output file. Use "{nic}" as 84 | placeholder for NIC name. Default: 85 | "netstat.{nic}.csv". 86 | --enable-ps, -p Enable process-keyword monitor. 87 | --ps-keywords [PS_KEYWORDS [PS_KEYWORDS ...]] 88 | Include processes whose name contains the keyword and 89 | their children. 90 | --ps-pids [PS_PIDS [PS_PIDS ...]] 91 | Include the specified PIDs and their children. 92 | --ps-outfile [PS_OUTFILE] 93 | Name of the process monitor output file. Default: 94 | "psstat.csv". 95 | ``` 96 | 97 | To terminate the monitor, send `SIGINT` or `SIGTERM` signal to the process. 98 | 99 | # Example 100 | 101 | Sample output of resource monitor (note that the operation of obtaining statistics is not atomic -- there is slight 102 | time difference between obtaining sets of metrics; therefore a sum value (e.g., `%CPU` below) may not equal the sum 103 | of individual values (e.g., `%CPUx`)): 104 | 105 | ``` 106 | Timestamp, Uptime, NCPU, %CPU, %CPU0, %CPU1, %CPU2, %CPU3, %MEM, mem.total.KB, mem.used.KB, mem.avail.KB, mem.free.KB, %SWAP, swap.total.KB, swap.used.KB, swap.free.KB, io.read, io.write, io.read.KB, io.write.KB, io.read.ms, io.write.ms 107 | 1475022339, 0, 4, 400.0, 100.0, 0.0, 100.0, 0.0, 15.3, 4103824, 995080, 3475692, 3108744, 0.0, 4183036, 0, 4183036, 0, 0, 0, 0, 0, 0 108 | 1475022341, 2, 4, 158.0, 61.7, 41.2, 26.5, 28.2, 15.7, 4103824, 1019080, 3459788, 3084744, 0.0, 4183036, 0, 4183036, 532, 2, 8264, 12, 3432, 28 109 | 1475022343, 4, 4, 185.2, 36.3, 49.3, 29.9, 65.3, 17.1, 4103824, 1085820, 3402592, 3018004, 0.0, 4183036, 0, 4183036, 152, 31, 9492, 256, 816, 1376 110 | 1475022345, 6, 4, 222.8, 49.6, 36.3, 74.4, 58.2, 17.8, 4103824, 1120424, 3371292, 2983400, 0.0, 4183036, 0, 4183036, 46, 2, 3168, 24, 92, 24 111 | 1475022347, 8, 4, 226.4, 30.0, 35.3, 94.6, 36.6, 19.1, 4103824, 1174332, 3319400, 2929492, 0.0, 4183036, 0, 4183036, 30, 2, 1916, 12, 128, 24 112 | 1475022349, 10, 4, 173.6, 35.5, 27.5, 73.8, 22.9, 21.9, 4103824, 1287532, 3206692, 2816292, 0.0, 4183036, 0, 4183036, 4, 30, 304, 244, 12, 220 113 | 1475022351, 12, 4, 163.6, 36.0, 47.4, 41.2, 38.3, 21.9, 4103824, 1287932, 3206372, 2815892, 0.0, 4183036, 0, 4183036, 0, 3, 0, 16, 0, 52 114 | 1475022353, 14, 4, 218.8, 60.5, 55.2, 57.6, 45.1, 21.9, 4103824, 1290036, 3204276, 2813788, 0.0, 4183036, 0, 4183036, 0, 17, 0, 136, 0, 88 115 | 1475022355, 16, 4, 196.0, 55.2, 44.9, 43.5, 52.0, 21.9, 4103824, 1290416, 3203964, 2813408, 0.0, 4183036, 0, 4183036, 0, 1, 0, 4, 0, 0 116 | ... 117 | ``` 118 | 119 | Sample output of NIC monitor (the example was pasted from an output that monitored a NIC named `enp34s0`): 120 | 121 | ``` 122 | Timestamp, Uptime, NIC, sent.B, recv.B, sent.pkts, recv.pkts, err.in, err.out, drop.in, drop.out 123 | 1475022339, 0, enp34s0, 0, 0, 0, 0, 0, 0, 0, 0 124 | 1475022341, 2, enp34s0, 0, 4272386, 0, 13394, 0, 0, 0, 0 125 | 1475022343, 4, enp34s0, 0, 7097273, 0, 20839, 0, 0, 0, 0 126 | 1475022345, 6, enp34s0, 0, 8107324, 0, 21161, 0, 0, 0, 0 127 | 1475022347, 8, enp34s0, 0, 12865759, 0, 24632, 0, 0, 0, 0 128 | 1475022349, 10, enp34s0, 0, 3188676, 0, 13986, 0, 0, 0, 0 129 | 1475022351, 12, enp34s0, 0, 3315832, 0, 14091, 0, 0, 0, 0 130 | 1475022353, 14, enp34s0, 0, 12910705, 0, 24074, 0, 0, 0, 0 131 | 1475022355, 16, enp34s0, 0, 6147833, 0, 17204, 0, 0, 0, 0 132 | 1475022357, 18, enp34s0, 0, 14606762, 0, 28263, 0, 0, 0, 0 133 | 1475022359, 20, enp34s0, 0, 5930482, 0, 19017, 0, 0, 0, 0 134 | ... 135 | ``` 136 | 137 | Sample output of process keyword-based monitor (in the example run, the keyword is set to `qemu`, meaning that the 138 | resource usage printed is the sum of resource used by all processes containing keyword `qemu` and all their (direct 139 | and indirect) child processes): 140 | 141 | ``` 142 | Timestamp, Uptime, %CPU, %MEM, io.read, io.read.KB, io.write, io.write.KB, mem.rss.KB, nctxsw, nthreads 143 | 1475022339, 0, 0.0, 11.789, 19269, 168236, 11619, 592, 483808, 11414, 23 144 | 1475022341, 2, 61.9, 11.789, 19326, 168960, 11638, 592, 483808, 11452, 23 145 | 1475022343, 4, 118.0, 13.139, 19821, 178452, 11846, 868, 539184, 11778, 23 146 | 1475022345, 6, 169.6, 13.937, 19998, 181620, 11905, 868, 571952, 11896, 23 147 | 1475022347, 8, 174.0, 15.185, 20090, 183536, 11942, 912, 623152, 11957, 23 148 | 1475022349, 10, 131.4, 17.934, 20119, 183840, 11958, 1000, 735992, 11982, 23 149 | 1475022351, 12, 130.0, 17.934, 20119, 183840, 11958, 1000, 735992, 11985, 23 150 | 1475022353, 14, 168.5, 17.984, 20124, 183840, 11975, 1124, 738040, 11992, 23 151 | 1475022355, 16, 153.9, 17.984, 20134, 183840, 11984, 1128, 738040, 12004, 23 152 | 1475022357, 18, 191.5, 17.94, 20134, 183840, 11984, 1128, 736232, 12007, 21 153 | 1475022359, 20, 172.6, 17.943, 20149, 183840, 12005, 1300, 736356, 12022, 16 154 | ... 155 | ``` 156 | 157 | # Support 158 | 159 | Contribute to the code base or report bugs by committing to the repository (https://github.com/xybu/python-resmon) or creating issues. 160 | -------------------------------------------------------------------------------- /resmon/resmon.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | """ 4 | resmon.py 5 | 6 | Resource monitor monitors system wide resource usage and availability. This script assumes that the number 7 | of CPU cores does not change throughout the course. 8 | 9 | NIC monitor component monitors the speed, in terms of Bps and Pkts/sec, and error and drop counts, of the specified NICs. 10 | 11 | Process monitor component monitors resource usage of of a subset of living processes. 12 | For example, if keyword "docker" is given, then it reports, every T seconds, the sum of resource 13 | (CPU, RSS, IO, CtxSw, NThreads) usage of all processes whose name contains "docker" and their child processes. 14 | 15 | @author Xiangyu Bu 16 | """ 17 | 18 | import argparse 19 | import os 20 | import sched 21 | import signal 22 | import sys 23 | import time 24 | import psutil 25 | 26 | 27 | class SystemMonitor: 28 | 29 | def __init__(self, outfile_name=None, flush=False): 30 | print('System monitor started.', file=sys.stderr) 31 | ncores = self.ncores = psutil.cpu_count() 32 | if outfile_name is None: 33 | self.outfile = sys.stdout 34 | else: 35 | self.outfile = open(outfile_name, 'w') 36 | self.flush = flush 37 | self.outfile.write( 38 | 'Timestamp, Uptime, NCPU, %CPU, ' + ', '.join(['%CPU' + str(i) for i in range(ncores)]) + 39 | ', %MEM, mem.total.KB, mem.used.KB, mem.avail.KB, mem.free.KB' + 40 | ', %SWAP, swap.total.KB, swap.used.KB, swap.free.KB' + 41 | ', io.read, io.write, io.read.KB, io.write.KB, io.read.ms, io.write.ms\n') 42 | self.prev_disk_stat = psutil.disk_io_counters() 43 | self.starttime = int(time.time()) 44 | self.poll_stat() 45 | 46 | def __enter__(self): 47 | return self 48 | 49 | def __exit__(self, exc_type, exc_value, traceback): 50 | if not hasattr(self, 'closed'): 51 | self.close() 52 | 53 | def close(self): 54 | if self.outfile is not sys.stdout: 55 | self.outfile.close() 56 | self.closed = True 57 | print('System monitor closed.', file=sys.stderr) 58 | 59 | def poll_stat(self): 60 | timestamp = int(time.time()) 61 | uptime = timestamp - self.starttime 62 | total_cpu_percent = psutil.cpu_percent(percpu=False) 63 | percpu_percent = psutil.cpu_percent(percpu=True) 64 | mem_stat = psutil.virtual_memory() 65 | swap_stat = psutil.swap_memory() 66 | disk_stat = psutil.disk_io_counters() 67 | 68 | line = str(timestamp) + ', ' + str(uptime) + ', ' + \ 69 | str(self.ncores) + ', ' + str(total_cpu_percent*self.ncores) + ', ' 70 | line += ', '.join([str(i) for i in percpu_percent]) 71 | line += ', ' + str(mem_stat.percent) + ', ' + str(mem_stat.total >> 10) + ', ' + str( 72 | mem_stat.used >> 10) + ', ' + str(mem_stat.available >> 10) + ', ' + str(mem_stat.free >> 10) 73 | line += ', ' + str(swap_stat.percent) + ', ' + str(swap_stat.total >> 10) + \ 74 | ', ' + str(swap_stat.used >> 10) + ', ' + str(swap_stat.free >> 10) 75 | line += ', ' + str(disk_stat.read_count - self.prev_disk_stat.read_count) + ', ' + str(disk_stat.write_count - self.prev_disk_stat.write_count) + \ 76 | ', ' + str((disk_stat.read_bytes - self.prev_disk_stat.read_bytes) >> 10) + ', ' + str((disk_stat.write_bytes - self.prev_disk_stat.write_bytes) >> 10) + \ 77 | ', ' + str(disk_stat.read_time - self.prev_disk_stat.read_time) + \ 78 | ', ' + str(disk_stat.write_time - self.prev_disk_stat.write_time) 79 | 80 | self.outfile.write(line + '\n') 81 | if self.flush: 82 | self.outfile.flush() 83 | self.prev_disk_stat = disk_stat 84 | 85 | 86 | class NetworkInterfaceMonitor: 87 | 88 | def __init__(self, outfile_pattern='netstat.{nic}.csv', nics=[], flush=False): 89 | print('NIC monitor started.', file=sys.stderr) 90 | all_nics = psutil.net_if_stats() 91 | self.nic_files = dict() 92 | self.flush = flush 93 | for nic_name in nics: 94 | nic_name = nic_name.strip() 95 | if nic_name not in all_nics: 96 | print('Error: NIC "%s" does not exist. Skip.' % 97 | nic_name, file=sys.stderr) 98 | else: 99 | self.nic_files[nic_name] = self.create_new_logfile( 100 | outfile_pattern, nic_name) 101 | if len(self.nic_files) == 0: 102 | raise ValueError('No NIC to monitor.') 103 | self.prev_stat = dict() 104 | for nic, stat in psutil.net_io_counters(pernic=True).items(): 105 | if nic in self.nic_files: 106 | self.prev_stat[nic] = stat 107 | self.starttime = int(time.time()) 108 | self.poll_stat() 109 | 110 | def __enter__(self): 111 | return self 112 | 113 | def __exit__(self, exc_type, exc_value, traceback): 114 | if not hasattr(self, 'closed'): 115 | self.close() 116 | 117 | def close(self): 118 | for f in self.nic_files.values(): 119 | f.close() 120 | self.closed = True 121 | print('NIC monitor closed.', file=sys.stderr) 122 | 123 | def create_new_logfile(self, pattern, nic_name): 124 | f = open(pattern.format(nic=nic_name), 'w') 125 | f.write( 126 | 'Timestamp, Uptime, NIC, sent.B, recv.B, sent.pkts, recv.pkts, err.in, err.out, drop.in, drop.out\n') 127 | return f 128 | 129 | def poll_stat(self): 130 | timestamp = int(time.time()) 131 | uptime = timestamp - self.starttime 132 | net_stat = psutil.net_io_counters(pernic=True) 133 | for nic, f in self.nic_files.items(): 134 | stat = net_stat[nic] 135 | prevstat = self.prev_stat[nic] 136 | f.write(str(timestamp) + ', ' + str(uptime) + ', ' + nic + ', ' + 137 | str(stat.bytes_sent-prevstat.bytes_sent) + ', ' + str(stat.bytes_recv-prevstat.bytes_recv) + ', ' + 138 | str(stat.packets_sent-prevstat.packets_sent) + ', ' + str(stat.packets_recv-prevstat.packets_recv) + ', ' + 139 | str(stat.errin-prevstat.errin) + ', ' + str(stat.errout-prevstat.errout) + ', ' + str(stat.dropin-prevstat.dropin) + ', ' + str(stat.dropout-prevstat.dropout) + '\n') 140 | if self.flush: 141 | f.flush() 142 | self.prev_stat = net_stat 143 | 144 | 145 | class ProcessSetMonitor: 146 | 147 | BASE_STAT = { 148 | 'io.read': 0, 149 | 'io.write': 0, 150 | 'io.read.KB': 0, 151 | 'io.write.KB': 0, 152 | 'mem.rss.KB': 0, 153 | '%MEM': 0, 154 | '%CPU': 0, 155 | 'nctxsw': 0, 156 | 'nthreads': 0 157 | } 158 | 159 | KEYS = sorted(BASE_STAT.keys()) 160 | 161 | def __init__(self, keywords, pids, outfile_name, flush=False): 162 | print('ProcessSet monitor started.', file=sys.stderr) 163 | if outfile_name is None: 164 | self.outfile = sys.stdout 165 | else: 166 | self.outfile = open(outfile_name, 'w') 167 | self.pids = pids 168 | self.keywords = keywords 169 | self.flush = flush 170 | self.outfile.write('Timestamp, Uptime, ' + ', '.join(self.KEYS) + '\n') 171 | self.starttime = int(time.time()) 172 | self.poll_stat() 173 | 174 | def __enter__(self): 175 | return self 176 | 177 | def __exit__(self, exc_type, exc_value, traceback): 178 | if not hasattr(self, 'closed'): 179 | self.close() 180 | 181 | def close(self): 182 | if self.outfile is not sys.stdout: 183 | self.outfile.close() 184 | self.closed = True 185 | print('ProcessSet monitor closed.', file=sys.stderr) 186 | 187 | def _stat_proc(self, proc, stat, visited): 188 | """ Recursively stat a process and its child processes. """ 189 | if proc.pid in visited: 190 | return 191 | visited.add(proc.pid) 192 | io = proc.io_counters() 193 | mem_rss = proc.memory_info().rss 194 | mem_percent = proc.memory_percent('rss') 195 | nctxsw = proc.num_ctx_switches() 196 | nctxsw = nctxsw.voluntary + nctxsw.involuntary 197 | nthreads = proc.num_threads() 198 | cpu_percent = proc.cpu_percent() 199 | stat['io.read'] += io.read_count 200 | stat['io.write'] += io.write_count 201 | stat['io.read.KB'] += io.read_bytes 202 | stat['io.write.KB'] += io.write_bytes 203 | stat['mem.rss.KB'] += mem_rss 204 | stat['%MEM'] += mem_percent 205 | stat['nctxsw'] += nctxsw 206 | stat['nthreads'] += nthreads 207 | stat['%CPU'] += cpu_percent 208 | for c in proc.children(): 209 | self._stat_proc(c, stat, visited) 210 | 211 | def poll_stat(self): 212 | visited = set() 213 | curr_stat = dict(self.BASE_STAT) 214 | timestamp = int(time.time()) 215 | uptime = timestamp - self.starttime 216 | for proc in psutil.process_iter(): 217 | try: 218 | pinfo = proc.as_dict(attrs=['pid', 'name']) 219 | except psutil.NoSuchProcess: 220 | pass 221 | else: 222 | if pinfo['pid'] not in visited: 223 | if pinfo['pid'] in self.pids: 224 | self._stat_proc(proc, curr_stat, visited) 225 | else: 226 | for k in self.keywords: 227 | if k in pinfo['name'].lower(): 228 | self._stat_proc(proc, curr_stat, visited) 229 | break # for keyword 230 | curr_stat['%CPU'] = round(curr_stat['%CPU'], 3) 231 | curr_stat['%MEM'] = round(curr_stat['%MEM'], 3) 232 | curr_stat['io.read.KB'] >>= 10 233 | curr_stat['io.write.KB'] >>= 10 234 | curr_stat['mem.rss.KB'] >>= 10 235 | line = str(timestamp) + ', ' + str(uptime) + ', ' + \ 236 | ', '.join([str(curr_stat[k]) for k in self.KEYS]) + '\n' 237 | self.outfile.write(line) 238 | if self.flush: 239 | self.outfile.flush() 240 | 241 | 242 | def chprio(prio): 243 | try: 244 | psutil.Process(os.getpid()).nice(prio) 245 | except: 246 | print('Warning: failed to elevate priority!', file=sys.stderr) 247 | 248 | 249 | def sigterm(signum, frame): 250 | raise KeyboardInterrupt() 251 | 252 | 253 | def main(): 254 | parser = argparse.ArgumentParser( 255 | description='Monitor system-wide resource availability. Optionally monitor processes that match the specified criteria and their children.') 256 | parser.add_argument('--delay', '-d', type=int, default=1, help='Interval, in sec, to poll information.') 257 | parser.add_argument('--flush', '-f', default=False, action='store_true', 258 | help='If present, flush the output files after each line is written.') 259 | parser.add_argument('--outfile', '-o', type=str, nargs='?', default=None, 260 | required=False, help='Name of system monitor output file. If unset, print to stdout.') 261 | parser.add_argument('--nic', '-n', type=str, nargs='?', default=None, required=False, 262 | help='Specify particular NICs, separated by a comma, to monitor. Default is none.') 263 | parser.add_argument('--nic-outfile', type=str, nargs='?', 264 | default='netstat.{nic}.csv', help='Name of the NIC monitor output file. Use "{nic}" as placeholder for NIC name. Default: "netstat.{nic}.csv".') 265 | parser.add_argument('--enable-ps', '-p', default=False, 266 | action='store_true', help='Enable process-keyword monitor.') 267 | parser.add_argument('--ps-keywords', type=str, nargs='*', 268 | help='Include processes whose name contains the keyword and their children.') 269 | parser.add_argument('--ps-pids', type=int, nargs='*', 270 | help='Include the specified PIDs and their children.') 271 | parser.add_argument('--ps-outfile', type=str, nargs='?', default='psstat.csv', 272 | help='Name of the process monitor output file. Default: "psstat.csv".') 273 | args = parser.parse_args() 274 | if args.enable_ps and ((not args.ps_keywords or len(args.ps_keywords) == 0) and (not args.ps_pids or len(args.ps_pids) == 0)): 275 | parser.error('--enable-ps requires --ps-keywords or --ps-pids.') 276 | 277 | if args.ps_pids is None: 278 | args.ps_pids = set() 279 | else: 280 | args.ps_pids = set(args.ps_pids) 281 | 282 | if args.ps_keywords is None: 283 | args.ps_keywords = [] 284 | else: 285 | # Convert to lowercase to achieve case IN-sensitiveness. 286 | args.ps_keywords = [k.lower() for k in args.ps_keywords] 287 | 288 | signal.signal(signal.SIGTERM, sigterm) 289 | 290 | try: 291 | chprio(-20) 292 | scheduler = sched.scheduler(time.time, time.sleep) 293 | sm = SystemMonitor(args.outfile, args.flush) 294 | 295 | enable_nic_mon = args.nic is not None 296 | if enable_nic_mon: 297 | try: 298 | nm = NetworkInterfaceMonitor( 299 | args.nic_outfile, args.nic.split(','), args.flush) 300 | except ValueError as e: 301 | print('Error: ' + str(e), file=sys.stderr) 302 | enable_nic_mon = False 303 | 304 | if args.enable_ps: 305 | pm = ProcessSetMonitor( 306 | args.ps_keywords, args.ps_pids, args.ps_outfile, args.flush) 307 | 308 | i = 1 309 | starttime = time.time() 310 | while True: 311 | scheduler.enterabs( 312 | time=starttime + i*args.delay, priority=2, action=SystemMonitor.poll_stat, argument=(sm, )) 313 | if enable_nic_mon: 314 | scheduler.enterabs(time=starttime + i*args.delay, priority=1, 315 | action=NetworkInterfaceMonitor.poll_stat, argument=(nm, )) 316 | if args.enable_ps: 317 | scheduler.enterabs( 318 | time=starttime + i*args.delay, priority=0, action=ProcessSetMonitor.poll_stat, argument=(pm, )) 319 | scheduler.run() 320 | i += 1 321 | 322 | except KeyboardInterrupt: 323 | sm.close() 324 | if enable_nic_mon: 325 | nm.close() 326 | if args.enable_ps: 327 | pm.close() 328 | sys.exit(0) 329 | 330 | 331 | if __name__ == '__main__': 332 | main() 333 | --------------------------------------------------------------------------------