├── .ci.yml ├── .github └── workflows │ └── test.yml ├── .gitignore ├── LICENSE ├── README.md ├── common.py ├── graph.png ├── graph.py ├── requirements.txt ├── sargraph.py ├── scripts └── test.sh └── watch.py /.ci.yml: -------------------------------------------------------------------------------- 1 | image: debian:bullseye 2 | 3 | before_script: 4 | - apt-get update 5 | - apt-get install -qqy --no-install-recommends python3 python3-pip git colorized-logs 6 | - pip3 install git+https://github.com/antmicro/tuttest.git 7 | 8 | simple_test: 9 | script: 10 | - ./scripts/test.sh 11 | - cat plot.ascii | ansi2txt 12 | artifacts: 13 | when: always 14 | paths: 15 | - "plot.svg" 16 | - "plot.png" 17 | - "plot.ascii" 18 | - "plot.html" 19 | - "example.log" 20 | - "example.txt" 21 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Sargraph tests 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | jobs: 10 | readme-tests: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout sources 14 | uses: actions/checkout@v3 15 | - name: Install dependencies 16 | run: | 17 | sudo apt-get update 18 | sudo apt-get install -qqy --no-install-recommends python3 python3-pip git colorized-logs 19 | sudo pip3 install git+https://github.com/antmicro/tuttest.git 20 | - name: Run README.md snippets 21 | run: | 22 | sudo ./scripts/test.sh 23 | cat plot.ascii | ansi2txt 24 | echo -en '## Sargraph summary\n\n' >> $GITHUB_STEP_SUMMARY 25 | echo -en '```\n' >> $GITHUB_STEP_SUMMARY 26 | cat plot.ascii | ansi2txt >> $GITHUB_STEP_SUMMARY 27 | echo -en '\n```\n' >> $GITHUB_STEP_SUMMARY 28 | 29 | - name: Upload artifacts 30 | if: always() 31 | uses: actions/upload-artifact@v3 32 | with: 33 | name: sargraph-sample-graphs 34 | path: | 35 | plot.svg 36 | plot.png 37 | plot.ascii 38 | plot.html 39 | example.txt 40 | example.log 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sargraph 2 | 3 | Copyright (c) 2019-2023 [Antmicro](https://www.antmicro.com) 4 | 5 | This is a simple python tool that uses "sysstat" ("sar") to save information on CPU, RAM and disk usage. 6 | The process runs in background and can be controlled with a set of sargraph sub-commands. 7 | 8 | The tool can use "gnuplot" to optionally plot the data it collected. 9 | Supported plot formats are PNG, SVG and ASCII, they are determined by filename extensions. 10 | 11 | # Install requirements 12 | 13 | The sargraph requires `gnuplot`, `sysstat` (`sar`), `python3`, `coreutils` and `screen` to operate. 14 | In Debian you can install them with: 15 | 16 | ``` 17 | # install system dependencies 18 | apt-get update 19 | apt-get install -qqy --no-install-recommends \ 20 | coreutils \ 21 | git \ 22 | gnuplot-nox \ 23 | python3 \ 24 | python3-pip \ 25 | screen \ 26 | sysstat 27 | 28 | # install Python dependencies 29 | pip3 install -r requirements.txt 30 | ``` 31 | 32 | For rendering HTML plots, you additionally have to install: 33 | 34 | ``` 35 | pip3 install git+https://github.com/antmicro/servis#egg=servis[bokeh] 36 | ``` 37 | 38 | # Example graph 39 | 40 | ![graph](graph.png) 41 | 42 | 43 | # Usage 44 | All sargraph commands use the following pattern: 45 | 46 | ``` 47 | $ ./sargraph.py [session_name] [command] [args...] 48 | ``` 49 | 50 | Let's create a sample disk on which we will run stress tests: 51 | 52 | ``` 53 | dd if=/dev/zero of=sample_disk.ext4 bs=1M count=130 54 | mkfs.ext4 sample_disk.ext4 55 | mkdir -p ./mountpoint && mount sample_disk.ext4 ./mountpoint 56 | ``` 57 | 58 | For the purpose of stress tests let's install `stress`: 59 | 60 | ``` 61 | apt-get install stress 62 | ``` 63 | 64 | ## Starting a session 65 | 66 | Start a background session and name it `example`: 67 | ``` 68 | ./sargraph.py example start -m ./mountpoint 69 | ``` 70 | 71 | The data will be saved in `example.txt`. 72 | Logs from screen will be written to `example.log`. 73 | 74 | `-m` flag allows to specify a chosen filesystem/mountpoint. 75 | 76 | ## Adding a label 77 | 78 | Add labels that will be placed as comments in the collected dataset. 79 | They will be also visible on the plots: 80 | ``` 81 | ./sargraph.py example label "Compilation start" 82 | ``` 83 | 84 | After this, let's simulate some processing: 85 | 86 | ``` 87 | pushd ./mountpoint 88 | stress -c 16 -i 1 -m 1 --vm-bytes 512M -d 1 --hdd-bytes 70M -t 160s 89 | popd 90 | ``` 91 | 92 | ## Plotting a running session 93 | 94 | It is possible to plot data collected so far in a still running session: 95 | ``` 96 | # Save plot to SVG 97 | ./sargraph.py example save plot.svg 98 | # Save plot to PNG 99 | ./sargraph.py example save plot.png 100 | # Save plot to ASCII that can be printed in terminal 101 | ./sargraph.py example save plot.ascii 102 | # Save plot to interactive HTML plot 103 | ./sargraph.py example save plot.html 104 | ``` 105 | 106 | The supported formats are: 107 | 108 | * `png` format 109 | * `svg` format 110 | * `html` format 111 | * `ascii` format - plot is rendered to text file that can be displayed in terminal 112 | 113 | ## Stopping a session 114 | 115 | Stop a session and create a final `plot.png` plot file if no other plot was created so far: 116 | ``` 117 | ./sargraph.py example stop 118 | ``` 119 | 120 | The filename of the final plot can be changed if its placed after the `stop` command. 121 | If the name is `none` then no plot will be created. 122 | 123 | ## Plotting a closed session 124 | 125 | Plot data collected in a session that is not running anymore. 126 | ``` 127 | ./sargraph.py example plot plot.ascii 128 | ``` 129 | The command requires the `example.txt` log file to be present in the working directory. 130 | -------------------------------------------------------------------------------- /common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # 4 | # (c) 2019-2023 Antmicro 5 | # License: Apache-2.0 6 | # 7 | 8 | 9 | import os 10 | import subprocess 11 | import sys 12 | import re 13 | 14 | 15 | # Increase major number for general changes, middle number for smaller changes 16 | # that can cause incompatibilities and minor number for regular fixes 17 | SARGRAPH_VERSION = "2.5.0" 18 | 19 | # Define units for use with unit_str 20 | TIME_UNITS = ['seconds', 'minutes', 'hours'] 21 | DATA_UNITS = ['B', 'kB', 'MB', 'GB', 'TB', 'PB'] 22 | SPEED_UNITS = ['Mb/s', 'Gb/s', 'Tb/s', 'Pb/s'] 23 | 24 | # Print an error message and exit with non-zero status 25 | def fail(msg): 26 | print(f"Error: {msg}", file=sys.stderr) 27 | sys.exit(1) 28 | 29 | 30 | # Run process, return subprocess object on success, exit script on failure 31 | def run_or_fail(*argv, **kwargs): 32 | try: 33 | p = subprocess.Popen(argv, **kwargs) 34 | except: 35 | fail(f"'{argv[0]}' tool not found") 36 | return p 37 | 38 | 39 | # Check if a process is running 40 | def pid_running(pid): 41 | return os.path.exists(f"/proc/{pid}") 42 | 43 | 44 | # Convert a string to float, also when the separator is a comma 45 | def stof(s): 46 | return float(s.replace(',', '.')) 47 | 48 | 49 | # Return a string without given suffix or unchange if it doesn't have it 50 | def cut_suffix(s, sfx): 51 | if s.endswith(sfx): 52 | s = s[:-len(sfx)] 53 | return s 54 | 55 | 56 | # Scale a value until it has a convenient size and unit, round the value 57 | # and return a string representation with the new value and its unit 58 | def unit_str(value, units, step=1024): 59 | value = float(value) 60 | biggest = len(units) - 1 61 | unit = 0 62 | 63 | while value >= step and unit < biggest: 64 | value /= step 65 | unit += 1 66 | return f"{round(value, 2)} {units[unit]}" 67 | 68 | 69 | # Get the first group from a given match and convert to required type 70 | def scan(regex, conv, string): 71 | match = re.search(regex, string) 72 | if not match: 73 | return None 74 | try: 75 | value = conv(match.group(1)) 76 | except ValueError: 77 | return None 78 | return value 79 | 80 | 81 | # Return True iff version string `a` is greater than or equal to `b` 82 | def is_version_ge(a, b): 83 | a = [int(n) for n in a.split('.')] 84 | b = [int(n) for n in b.split('.')] 85 | 86 | if len(a) != len(b): 87 | return len(a) > len(b) 88 | for i, _ in enumerate(a): 89 | if a[i] != b[i]: 90 | break 91 | return a[i] >= b[i] 92 | -------------------------------------------------------------------------------- /graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antmicro/sargraph/a204242eb3805c2d3fcb2dff25837ca158a3c4fa/graph.png -------------------------------------------------------------------------------- /graph.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # 4 | # (c) 2019-2023 Antmicro 5 | # License: Apache-2.0 6 | # 7 | 8 | 9 | import datetime 10 | import os 11 | import socket 12 | import subprocess 13 | import time 14 | from common import * 15 | from pathlib import Path 16 | import tempfile 17 | 18 | global gnuplot 19 | 20 | GNUPLOT_VERSION_EXPECTED = "5.0" 21 | 22 | # Every summary variable requires a default value in case it missed in a session log 23 | START_DATE = "" 24 | END_DATE = "" 25 | AVERAGE_LOAD = 0.0 26 | MAX_USED_RAM = 0 27 | MAX_USED_FS = 0 28 | MAX_TX = 0 29 | MAX_RX = 0 30 | TOTAL_TX = 0 31 | TOTAL_RX = 0 32 | TOTAL_RAM = 0 33 | TOTAL_FS = 0 34 | NAME_FS = "unknown" 35 | NAME_IFACE = "unknown" 36 | 37 | UNAME = "unknown" 38 | CPUS = 0 39 | CPU_NAME = "unknown" 40 | DURATION = 0.0 41 | 42 | GPU_NAME = None 43 | GPU_DRIVER = None 44 | AVERAGE_GPU_LOAD = 0 45 | TOTAL_GPU_RAM = 0 46 | MAX_USED_GPU_RAM = 0 47 | 48 | HOST = socket.gethostname() 49 | 50 | # The number of plots on the graph 51 | NUMBER_OF_PLOTS = 5 52 | RAM_DATA_POSITION = 1 53 | 54 | # The default format 55 | OUTPUT_TYPE = "pngcairo" 56 | OUTPUT_EXT = "png" 57 | 58 | labels = [] 59 | 60 | 61 | # Check if the avaliable gnuplot has a required version 62 | p = run_or_fail("gnuplot", "--version", stdout=subprocess.PIPE) 63 | version = scan(r"gnuplot (\S+)", str, p.stdout.readline().decode()) 64 | if not is_version_ge(version, GNUPLOT_VERSION_EXPECTED): 65 | fail( 66 | f"gnuplot version too low. Need at least {GNUPLOT_VERSION_EXPECTED} found {version}") 67 | 68 | def split_data_file(session): 69 | sar_data = [] 70 | psu_data = [] 71 | 72 | # Read the input file 73 | with open(f"{session}.txt", 'r') as file: 74 | in_summary = False 75 | for line in file: 76 | if line.startswith('#'): 77 | sar_data.append(line.strip()) 78 | else: 79 | if line.startswith('sar'): 80 | sar_data.append(line.split(' ', 1)[1].strip()) 81 | elif line.startswith('psu'): 82 | psu_data.append(line.split(' ', 1)[1].strip()) 83 | 84 | temp_dir = tempfile.mkdtemp() 85 | with open(os.path.join(temp_dir, 'sar_data.txt'), 'w') as sar_data_file: 86 | sar_data_file.write("\n".join(sar_data)) 87 | print(file=sar_data_file) 88 | 89 | with open(os.path.join(temp_dir, 'psu_data.txt'), 'w') as psu_data_file: 90 | psu_data_file.write("\n".join(psu_data)) 91 | print(file=psu_data_file) 92 | 93 | # in order: sar file, mem file 94 | return [ 95 | os.path.join(temp_dir, 'sar_data.txt'), 96 | os.path.join(temp_dir, 'psu_data.txt') 97 | ] 98 | 99 | 100 | # Run a command in a running gnuplot process 101 | def g(command): 102 | global gnuplot 103 | 104 | if not (gnuplot.poll() is None): 105 | print("Error: gnuplot not running!") 106 | return 107 | # print ("gnuplot> %s" % command) 108 | try: 109 | command = b"%s\n" % command 110 | except: 111 | command = b"%s\n" % str.encode(command) 112 | gnuplot.stdin.write(b"%s\n" % command) 113 | gnuplot.stdin.flush() 114 | 115 | if command == b"quit\n": 116 | while gnuplot.poll() is None: 117 | time.sleep(0.25) 118 | 119 | 120 | # Get gnuplot font size with respect to differences betwen SVG and PNG terminals 121 | def fix_size(size): 122 | if OUTPUT_TYPE == "svg": 123 | size = int(size*1.25) 124 | return size 125 | 126 | 127 | # Plot a single column of values from data.txt 128 | def plot(ylabel, title, sar_file, column, space=3, autoscale=None): 129 | if autoscale is None: 130 | g("set yrange [0:100]") 131 | g("set cbrange [0:100]") 132 | else: 133 | g("unset xdata") 134 | g("set yrange [0:*]") 135 | g(f"stats '{sar_file}' using {column}") 136 | g(f"set yrange [0:STATS_max*{autoscale}]") 137 | g(f"set cbrange [0:STATS_max*{autoscale}]") 138 | g("set xdata time") 139 | g(f"set ylabel '{ylabel}'") 140 | g(f"set title \"{{/:Bold {title}}}" + ("\\n" * space) + "\"") 141 | g(f"plot '{sar_file}' using 1:{column}:{column} title 'cpu' with boxes palette") 142 | 143 | def plot_stacked(ylabel, title, ram_file, column, tmpfs_color, other_cache_color, space=3, autoscale=None): 144 | if autoscale is None: 145 | g("set yrange [0:100]") 146 | g("set cbrange [0:100]") 147 | else: 148 | g("unset xdata") 149 | g("set yrange [0:*]") 150 | g(f"stats '{ram_data}' using {column}") 151 | g(f"set yrange [0:STATS_max*{autoscale}]") 152 | g(f"set cbrange [0:STATS_max*{autoscale}]") 153 | g("set xdata time") 154 | g(f"set ylabel '{ylabel}'") 155 | g(f"set title \"{{/:Bold {title}}}" + ("\\n" * space) + "\"") 156 | g('set style data histograms') 157 | g('set style histogram rowstacked') 158 | g('set key reverse below Left width -25') 159 | g(f"plot '{ram_file}' using 1:($3 + ${column}):{column} title 'RAM' with boxes palette, \ 160 | '' using 1:5 with boxes title 'Shared mem' lc rgb '{tmpfs_color}', \ 161 | '' using 1:($3 - $5) with boxes title 'Other cache (freed automatically)' lc rgb '{other_cache_color}'") 162 | g('unset key') 163 | # Read additional information from 'data.txt' comments 164 | def read_comments(sar_file): 165 | global START_DATE 166 | global END_DATE 167 | global AVERAGE_LOAD 168 | global MAX_USED_RAM 169 | global MAX_USED_FS 170 | global TOTAL_RAM 171 | global TOTAL_FS 172 | global NAME_FS 173 | global UNAME 174 | global CPUS 175 | global CPU_NAME 176 | global DURATION 177 | global MAX_RX 178 | global MAX_TX 179 | global TOTAL_RX 180 | global TOTAL_TX 181 | global NAME_IFACE 182 | global GPU_NAME 183 | global GPU_DRIVER 184 | global AVERAGE_GPU_LOAD 185 | global TOTAL_GPU_RAM 186 | global MAX_USED_GPU_RAM 187 | global NUMBER_OF_PLOTS 188 | 189 | data_version = None 190 | 191 | with open(sar_file, "r") as f: 192 | for line in f: 193 | value = None 194 | 195 | if len(line) <= 0: 196 | continue 197 | 198 | if line[0] != '#': 199 | if not START_DATE: 200 | START_DATE = scan("^(\S+)", str, line) 201 | END_DATE = scan("^(\S+)", str, line) 202 | 203 | value = scan("label: (.+)", str, line) 204 | if value is not None: 205 | key = scan("(\S+) label:", str, line) 206 | labels.append([key, value]) 207 | 208 | # Comments are not mixed with anything else, so skip 209 | continue 210 | 211 | # Override summary variables. If they're missing, their default values are kept 212 | value = scan("sargraph version: (\d+\.\d+)", str, line) 213 | if value is not None: 214 | data_version = value 215 | 216 | value = scan("machine: ([^,]+)", str, line) 217 | if value is not None: 218 | UNAME = value 219 | 220 | value = scan("cpu count: ([^,]+)", int, line) 221 | if value is not None: 222 | CPUS = value 223 | 224 | value = scan("cpu: ([^,\n]+)", str, line) 225 | if value is not None: 226 | CPU_NAME = value 227 | 228 | value = scan("observed disk: ([^,]+)", str, line) 229 | if value is not None: 230 | NAME_FS = value 231 | 232 | value = scan("observed network: ([^,]+)", str, line) 233 | if value is not None: 234 | NAME_IFACE = value 235 | 236 | value = scan("total ram: (\S+)", stof, line) 237 | if value is not None: 238 | TOTAL_RAM = value 239 | 240 | value = scan("max ram used: (\S+)", stof, line) 241 | if value is not None: 242 | MAX_USED_RAM = value 243 | 244 | value = scan("total disk space: (\S+)", stof, line) 245 | if value is not None: 246 | TOTAL_FS = value 247 | 248 | value = scan("max received: (\S+)", stof, line) 249 | if value is not None: 250 | MAX_RX = value 251 | 252 | value = scan("max sent: (\S+)", stof, line) 253 | if value is not None: 254 | MAX_TX = value 255 | 256 | value = scan("total received: (\S+)", stof, line) 257 | if value is not None: 258 | TOTAL_RX = value 259 | 260 | value = scan("total sent: (\S+)", stof, line) 261 | if value is not None: 262 | TOTAL_TX = value 263 | 264 | value = scan("duration: (\S+)", stof, line) 265 | if value is not None: 266 | DURATION = value 267 | 268 | value = scan("max disk used: (\S+)", stof, line) 269 | if value is not None: 270 | MAX_USED_FS = value 271 | 272 | value = scan("average load: (\S+)", stof, line) 273 | if value is not None: 274 | AVERAGE_LOAD = value 275 | 276 | value = scan("total gpu ram: (\S+)", stof, line) 277 | if value is not None: 278 | TOTAL_GPU_RAM = value 279 | 280 | value = scan("max gpu ram used: (\S+)", stof, line) 281 | if value is not None: 282 | MAX_USED_GPU_RAM = value 283 | 284 | value = scan("gpu: ([^,\n]+)", str, line) 285 | if value is not None: 286 | GPU_NAME = value 287 | 288 | value = scan("gpu driver: ([^,\n]+)", str, line) 289 | if value is not None: 290 | GPU_DRIVER = value 291 | 292 | value = scan("average gpu load: (\S+)", stof, line) 293 | if value is not None: 294 | AVERAGE_GPU_LOAD = value 295 | 296 | if data_version != scan("^(\d+\.\d+)", str, SARGRAPH_VERSION): 297 | print("Warning: the data comes from an incompatible version of sargraph") 298 | 299 | # Translate the values to their value-unit representations 300 | TOTAL_RAM = unit_str(TOTAL_RAM, DATA_UNITS) 301 | MAX_USED_RAM = unit_str(MAX_USED_RAM, DATA_UNITS) 302 | 303 | TOTAL_FS = unit_str(TOTAL_FS, DATA_UNITS) 304 | MAX_USED_FS = unit_str(MAX_USED_FS, DATA_UNITS) 305 | 306 | MAX_RX = unit_str(MAX_RX, SPEED_UNITS) 307 | MAX_TX = unit_str(MAX_TX, SPEED_UNITS) 308 | 309 | TOTAL_RX = unit_str(TOTAL_RX, DATA_UNITS) 310 | TOTAL_TX = unit_str(TOTAL_TX, DATA_UNITS) 311 | 312 | if TOTAL_GPU_RAM: 313 | TOTAL_GPU_RAM = unit_str(TOTAL_GPU_RAM, DATA_UNITS) 314 | # Add GPU RAM utilization and GPU utilization plots 315 | NUMBER_OF_PLOTS += 2 316 | if MAX_USED_GPU_RAM: 317 | MAX_USED_GPU_RAM = unit_str(MAX_USED_GPU_RAM, DATA_UNITS) 318 | 319 | DURATION = unit_str(DURATION, TIME_UNITS, 60) 320 | 321 | 322 | def graph(session, tmpfs_color, other_cache_color, fname='plot'): 323 | global OUTPUT_TYPE 324 | global OUTPUT_EXT 325 | 326 | global labels 327 | 328 | global gnuplot 329 | 330 | labels = [] 331 | 332 | # The default format 333 | OUTPUT_TYPE = "pngcairo" 334 | OUTPUT_EXT = "png" 335 | if "SARGRAPH_OUTPUT_TYPE" in os.environ: 336 | otype = os.environ["SARGRAPH_OUTPUT_TYPE"].lower() 337 | 338 | # png is the default, so don't change anything 339 | if otype != "png": 340 | OUTPUT_TYPE = otype 341 | OUTPUT_EXT = otype 342 | elif fname.lower().endswith('.png'): 343 | # png is the default, so don't change anything 344 | pass 345 | elif fname.lower().endswith('.svg'): 346 | OUTPUT_TYPE = "svg" 347 | OUTPUT_EXT = "svg" 348 | elif fname.lower().endswith('.ascii'): 349 | OUTPUT_TYPE = "ascii" 350 | OUTPUT_EXT = "ascii" 351 | elif fname.lower().endswith('.html'): 352 | OUTPUT_TYPE = "html" 353 | OUTPUT_EXT = "html" 354 | else: 355 | pass 356 | # fail("unknown graph extension") 357 | 358 | # Leave just the base name 359 | fname = cut_suffix(fname, f".{OUTPUT_EXT}") 360 | sar_file, ram_file = split_data_file(session) 361 | 362 | # ASCII plots have their own routine 363 | if OUTPUT_TYPE == "ascii": 364 | return servis_graph(sar_file, ram_file, fname) 365 | 366 | # HTML plots have their own routine 367 | if OUTPUT_TYPE == "html": 368 | return servis_graph(sar_file, ram_file, fname, "html") 369 | 370 | read_comments(sar_file) 371 | 372 | gnuplot = run_or_fail("gnuplot", stdin=subprocess.PIPE, 373 | stdout=subprocess.PIPE) 374 | 375 | sdt = datetime.datetime.strptime(START_DATE, '%Y-%m-%d-%H:%M:%S') 376 | edt = datetime.datetime.strptime(END_DATE, '%Y-%m-%d-%H:%M:%S') 377 | 378 | seconds_between = (edt - sdt).total_seconds() 379 | if seconds_between < 100: 380 | seconds_between = 100 381 | 382 | nsdt = sdt - datetime.timedelta(seconds=(seconds_between * 0.01)) 383 | nedt = edt + datetime.timedelta(seconds=(seconds_between * 0.01)) 384 | 385 | g(f"set terminal {OUTPUT_TYPE} size 1200,1600 background '#332d37' font 'monospace,{fix_size(8)}'") 386 | 387 | g(f"set ylabel tc rgb 'white' font 'monospace,{fix_size(8)}'") 388 | 389 | g("set datafile commentschars '#'") 390 | 391 | g("set timefmt '%s'") 392 | g("set xdata time") 393 | g("set border lc rgb 'white'") 394 | g("set key tc rgb 'white'") 395 | g("set timefmt '%Y-%m-%d-%H:%M:%S'") 396 | g("set xtics format '%H:%M:%S'") 397 | g(f"set xtics font 'monospace,{fix_size(8)}' tc rgb 'white'") 398 | g(f"set ytics font 'monospace,{fix_size(8)}' tc rgb 'white'") 399 | g("set grid xtics ytics ls 12 lc rgb '#c4c2c5'") 400 | g("set style fill solid") 401 | g("set palette defined ( 0.0 '#00af91', 0.25 '#00af91', 0.75 '#d83829', 1.0 '#d83829' )") 402 | g("unset colorbox") 403 | g("unset key") 404 | g("set rmargin 6") 405 | 406 | g(f"set output '{fname}.{OUTPUT_EXT}'") 407 | 408 | title_machine = f"Running on {{/:Bold {HOST}}} \@ {{/:Bold {UNAME}}}, {{/:Bold {CPUS}}} threads x {{/:Bold {CPU_NAME}}}" 409 | title_specs = f"Total ram: {{/:Bold {TOTAL_RAM}}}, Total disk space: {{/:Bold {TOTAL_FS}}}" 410 | if TOTAL_GPU_RAM != 0: 411 | title_gpu = f"\\nGPU: {{/:Bold {GPU_NAME}}} (driver {{/:Bold {GPU_DRIVER}}}, total ram: {{/:Bold {TOTAL_GPU_RAM}}})" 412 | else: 413 | title_gpu = "" 414 | title_times = f"Duration: {{/:Bold {START_DATE}}} .. {{/:Bold {END_DATE}}} ({DURATION})" 415 | 416 | g(f"set multiplot layout {NUMBER_OF_PLOTS},1 title \"\\n{title_machine}\\n{title_specs}{title_gpu}\\n{title_times}\" offset screen -0.475, 0 left tc rgb 'white'") 417 | 418 | g(f"set title tc rgb 'white' font 'monospace,{fix_size(11)}'") 419 | 420 | g(f"set xrange ['{nsdt.strftime('%Y-%m-%d-%H:%M:%S')}':'{nedt.strftime('%Y-%m-%d-%H:%M:%S')}']") 421 | 422 | i = 0 423 | for label in labels: 424 | if i % 2 == 0: 425 | offset = 1.08 426 | else: 427 | offset = 1.20 428 | 429 | i = i + 1 430 | 431 | content = f"{{[{i}] {label[1][0:30]}" 432 | length = len(label[1][0:30]) + len(str(i)) + 5 433 | if OUTPUT_EXT == "svg": 434 | length *= 0.75 435 | 436 | # Draw the dotted line 437 | g(f"set arrow nohead from '{label[0]}', graph 0.01 to '{label[0]}', graph {offset-0.04} front lc rgb '#e74a3c' dt 2") 438 | 439 | # Draw the small rectangle at its bottom 440 | g(f"set object rect at '{label[0]}', graph 0.0 size char 0.5, char 0.5 front lc rgb '#d83829' fc rgb '#f15f32'") 441 | 442 | # Draw the label rectangle 443 | g(f"set object rect at '{label[0]}', graph {offset} size char {length}, char 1.3 fs border lc rgb '#d83829' fc rgb '#f15f32'") 444 | 445 | # Add text to the label 446 | g(f"set label at '{label[0]}', graph {offset} '{content}' center tc rgb 'white' font 'monospace,{fix_size(7)}'") 447 | 448 | if i <= 0: 449 | space = 1 450 | elif i <= 1: 451 | space = 2 452 | else: 453 | space = 3 454 | 455 | g("set object rectangle from graph 0, graph 0 to graph 2, graph 2 behind fillcolor rgb '#000000' fillstyle solid noborder") 456 | 457 | # Set scale for plots displayed in relative units (%) 458 | plot("CPU load (%)", 459 | f"CPU load (average = {AVERAGE_LOAD:.2f} %)", sar_file, 2, space=space) 460 | plot_stacked(f"RAM usage (100% = {TOTAL_RAM})", 461 | f"RAM usage (max = {MAX_USED_RAM})", ram_file, 4, tmpfs_color, other_cache_color, space=space) 462 | plot(f"FS usage (100% = {TOTAL_FS})", f"{NAME_FS} usage (max = {MAX_USED_FS})", 463 | sar_file, 3, space=space) 464 | 465 | plot(f"{NAME_IFACE} received (Mb/s)", 466 | f"{NAME_IFACE} data received (max = {MAX_RX}, total = {TOTAL_RX})", 467 | sar_file, 4, space=space, autoscale=1.2) 468 | plot(f"{NAME_IFACE} sent (Mb/s)", 469 | f"{NAME_IFACE} data sent (max = {MAX_TX}, total = {TOTAL_TX})", 470 | sar_file, 5, space=space, autoscale=1.2) 471 | 472 | # GPU params 473 | if TOTAL_GPU_RAM != 0: 474 | plot("GPU load (%)", 475 | f"GPU load (average = {AVERAGE_GPU_LOAD} %)", sar_file, 6, space=space) 476 | plot(f"GPU RAM usage (100% = {TOTAL_GPU_RAM})", 477 | f"GPU RAM usage (max = {MAX_USED_GPU_RAM})", sar_file, 7, space=space) 478 | 479 | g("unset multiplot") 480 | g("unset output") 481 | g("quit") 482 | 483 | 484 | def read_data(sar_file, ram_file): 485 | xdata = list() 486 | xdata_ram = list() 487 | ydata = [[] for _ in range(NUMBER_OF_PLOTS)] 488 | with open(sar_file, "r") as f: 489 | for line in f: 490 | if(line[0] != '#'): 491 | line = line.split(" ") 492 | date = datetime.datetime.strptime(line[0], '%Y-%m-%d-%H:%M:%S') 493 | xdata.append(date) 494 | for i in range(NUMBER_OF_PLOTS): 495 | if i != RAM_DATA_POSITION: 496 | ydata[i].append(stof(line[i+1 - int(i > RAM_DATA_POSITION)])) 497 | with open(ram_file, 'r') as f: 498 | for line in f: 499 | if(line[0] != '#'): 500 | line = line.split(" ") 501 | date = datetime.datetime.strptime(line[0], '%Y-%m-%d-%H:%M:%S.%f') 502 | xdata_ram.append(date) 503 | ydata[RAM_DATA_POSITION].append(100-stof(line[1])) 504 | 505 | return (xdata, xdata_ram, ydata) 506 | 507 | 508 | def convert_labels_to_tags(labels): 509 | tags = [] 510 | for [label_date, label_name] in labels: 511 | label_date = datetime.datetime.strptime( 512 | label_date, '%Y-%m-%d-%H:%M:%S') 513 | label_ts = int(label_date.replace( 514 | tzinfo=datetime.timezone.utc).timestamp()*1000)/1000 515 | tags.append({'name': label_name, 516 | 'timestamp': label_ts}) 517 | return tags 518 | 519 | 520 | def servis_graph(sar_file, ram_file, fname='plot', output_ext='ascii'): 521 | read_comments(sar_file) 522 | xdata, xdata_ram, ydata = read_data(sar_file, ram_file) 523 | titles = [f"""CPU load (average = {AVERAGE_LOAD} %)""", 524 | f"""RAM usage (max = {MAX_USED_RAM})""", 525 | f"""{NAME_FS} usage (max = {MAX_USED_FS})""", 526 | f"""{NAME_IFACE} data received (max = {MAX_RX})""", 527 | f"""{NAME_IFACE} data sent (max = {MAX_TX})"""] 528 | 529 | if TOTAL_GPU_RAM != 0: 530 | titles.extend([ 531 | f"GPU load (average = {AVERAGE_GPU_LOAD} %)", 532 | f"GPU RAM usage (max = {MAX_USED_GPU_RAM})" 533 | ]) 534 | 535 | y_titles = ["CPU load (%)", 536 | f"RAM usage (100% = {TOTAL_RAM})", 537 | f"FS usage (100% = {TOTAL_FS})", 538 | f"{NAME_IFACE} received", 539 | f"{NAME_IFACE} sent"] 540 | 541 | if TOTAL_GPU_RAM != 0: 542 | y_titles.extend([ 543 | "GPU load (%)", 544 | f"GPU RAM usage (100% = {TOTAL_GPU_RAM})" 545 | ]) 546 | 547 | xdata_to_int = [int(timestamp.replace( 548 | tzinfo=datetime.timezone.utc).timestamp()*1000)/1000 549 | for timestamp in xdata] 550 | 551 | summary = f"Running on {UNAME}, {CPUS} threads x {CPU_NAME}\n" 552 | summary += f"Total ram: {TOTAL_RAM}, Total disk space: {TOTAL_FS}\n" 553 | if TOTAL_GPU_RAM != 0: 554 | summary += f"GPU: {GPU_NAME} (driver {GPU_DRIVER}), total ram: {TOTAL_GPU_RAM}" 555 | summary += f"Duration: {START_DATE} .. {END_DATE} ({DURATION})" 556 | 557 | y_ranges = [ 558 | (0, 100), 559 | (0, 100), 560 | (0, 100), 561 | None, 562 | None, 563 | ] 564 | 565 | if TOTAL_GPU_RAM != 0: 566 | y_ranges.extend([ 567 | (0, 100), 568 | (0, 100) 569 | ]) 570 | 571 | from servis import render_multiple_time_series_plot 572 | if output_ext == 'ascii': 573 | xdatas = [[xdata_to_int]] * (NUMBER_OF_PLOTS - 1) 574 | xdatas.insert(1, [[ 575 | int(timestamp.replace( 576 | tzinfo=datetime.timezone.utc).timestamp()*1000)/1000 577 | for timestamp in xdata_ram 578 | ]]) 579 | 580 | render_multiple_time_series_plot( 581 | ydatas=[[yd] for yd in ydata], 582 | xdatas=xdatas, 583 | title=summary, 584 | subtitles=titles, 585 | xtitles=['time'] * NUMBER_OF_PLOTS, 586 | xunits=[None] * NUMBER_OF_PLOTS, 587 | ytitles=y_titles, 588 | yunits=[None] * NUMBER_OF_PLOTS, 589 | y_ranges=y_ranges, 590 | outpath=Path(fname), 591 | trimxvalues=False, 592 | bins=0, 593 | figsize=(900, 700) 594 | ) 595 | elif output_ext == 'html': 596 | converted_labels = convert_labels_to_tags(labels) 597 | xdatas = [ 598 | int(timestamp.replace( 599 | tzinfo=datetime.timezone.utc).timestamp()*1000)/1000 600 | for timestamp in xdata_ram 601 | ] 602 | xdatas = [xdata_to_int] + [xdatas] + [xdata_to_int * (NUMBER_OF_PLOTS - 2)] 603 | render_multiple_time_series_plot( 604 | ydatas=ydata, 605 | xdatas=xdatas, 606 | title=summary, 607 | subtitles=titles, 608 | xtitles=['time'] * NUMBER_OF_PLOTS, 609 | xunits=[None] * NUMBER_OF_PLOTS, 610 | ytitles=y_titles, 611 | yunits=[None] * NUMBER_OF_PLOTS, 612 | y_ranges=y_ranges, 613 | outpath=Path(fname), 614 | outputext=['html'], 615 | trimxvalues=False, 616 | figsize=(1200, 1600), 617 | tags=[converted_labels] * NUMBER_OF_PLOTS, 618 | setgradientcolors=True 619 | ) 620 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/antmicro/servis 2 | psutil 3 | -------------------------------------------------------------------------------- /sargraph.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # 4 | # (c) 2019-2023 Antmicro 5 | # License: Apache-2.0 6 | # 7 | 8 | import argparse 9 | import sys 10 | import time 11 | 12 | import graph 13 | import watch 14 | 15 | from common import * 16 | 17 | # Declare and parse command line flags 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('session', metavar='SESSION-NAME', type=str, nargs='?', default=None, help='sargraph session name') 20 | parser.add_argument('command', metavar='COMMAND', type=str, nargs='*', help='send command') 21 | parser.add_argument('-f', metavar='DEVICE-NAME', type=str, nargs='?', default=None, dest='fsdev', help='observe a chosen filesystem') 22 | parser.add_argument('-m', metavar='MOUNT-DIR', type=str, nargs='?', default=None, dest='fspath', help='observe a chosen filesystem') 23 | parser.add_argument('-n', metavar='IFACE-NAME', type=str, nargs='?', default=None, dest='iface', help='observe chosen network iface') 24 | parser.add_argument('-o', metavar='OUTPUT-NAME', type=str, nargs='?', default='data', dest='name', help='set output base names') 25 | parser.add_argument('-t', metavar='TMPFS-COLOR', type=str, nargs='?', default='#f2c71b', dest='tmpfs', help='set tmpfs plot color' ) 26 | parser.add_argument('-c', metavar='CACHE-COLOR', type=str, nargs='?', default='#ee7af0', dest='cache', help='set cache plot color' ) 27 | args = parser.parse_args() 28 | 29 | def send(sid, msg): 30 | p = subprocess.Popen(["screen", "-S", sid, "-X", "stuff", f"{msg}\n"]) 31 | while p.poll() is None: 32 | time.sleep(0.1) 33 | 34 | # Check if sar is available 35 | p = run_or_fail("sar", "-V", stdout=subprocess.PIPE) 36 | 37 | # Check if screen is available 38 | p = run_or_fail("screen", "-v", stdout=subprocess.PIPE) 39 | version = scan("Screen version (\d+)", int, p.stdout.readline().decode()) 40 | if version is None: 41 | fail("'screen' tool returned unknown output") 42 | 43 | # If the script was run with no parameters, run in background and gather data 44 | if args.session is None: 45 | # Find requested disk device 46 | if args.fspath: 47 | args.fspath = os.path.realpath(args.fspath) 48 | with open("/proc/self/mounts", "r") as f: 49 | while args.fsdev is None: 50 | args.fsdev = scan(f"^(/dev/\S+)\s+{re.escape(args.fspath)}\s+", str, f.readline()) 51 | if not args.fsdev: 52 | fail(f"no device is mounted on {args.fspath}") 53 | 54 | watch.watch(args.name, args.fsdev, args.iface, args.tmpfs, args.cache) 55 | sys.exit(0) 56 | 57 | # Now handle the commands 58 | 59 | # Check if a command was provided 60 | if len(args.command) <= 0: 61 | fail("command not provided") 62 | 63 | # Get session name and command name 64 | sid = args.session 65 | cmd = args.command 66 | 67 | if cmd[0] == "start": 68 | print(f"Starting sargraph session '{sid}'") 69 | 70 | # Spawn watcher process, *sys.argv[3:] is all arguments after 'chart start' + '-o [log name]' if not given 71 | if "-o" not in sys.argv: 72 | sys.argv += ["-o", sid] 73 | p = subprocess.Popen(["screen", "-Logfile", f"{sid}.log", "-dmSL", sid, os.path.realpath(__file__), *sys.argv[3:]]) 74 | 75 | while p.poll() is None: 76 | time.sleep(0.1) 77 | gpid = 0 78 | j = 0 79 | time.sleep(1) 80 | print(f"Session '{sid}' started") 81 | elif cmd[0] == "stop": 82 | print(f"Terminating sargraph session '{sid}'") 83 | 84 | try: 85 | gpid = int(os.popen(f"screen -ls | grep '.{sid}' | tr -d ' \t' | cut -f 1 -d '.'").read()) 86 | except: 87 | print("Warning: cannot find pid.") 88 | gpid = -1 89 | if len(cmd) < 2: 90 | send(sid, "command:q:") 91 | else: 92 | send(sid, f"command:q:{cmd[1]}") 93 | if gpid == -1: 94 | print("Waiting 3 seconds.") 95 | time.sleep(3) 96 | else: 97 | while pid_running(gpid): 98 | time.sleep(0.25) 99 | elif cmd[0] == "label": 100 | # Check if the label name was provided 101 | if len(cmd) < 2: 102 | fail("label command requires an additional parameter") 103 | print(f"Adding label '{cmd[1]}' to sargraph session '{sid}'.") 104 | send(sid, f"label:{cmd[1]}") 105 | elif cmd[0] == 'save': 106 | print(f"Saving graph from session '{sid}'.") 107 | if len(cmd) < 2: 108 | send(sid, "command:s:") 109 | else: 110 | send(sid, f"command:s:{cmd[1]}") 111 | elif cmd[0] == 'plot': 112 | if len(cmd) < 2: 113 | graph.graph(sid, args.tmpfs, args.cache) 114 | else: 115 | graph.graph(sid, args.tmpfs, args.cache, cmd[1]) 116 | else: 117 | fail(f"unknown command '{cmd[0]}'") 118 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | tuttest README.md | grep -v '^\$' | bash - 6 | -------------------------------------------------------------------------------- /watch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # 4 | # (c) 2019-2023 Antmicro 5 | # License: Apache-2.0 6 | # 7 | 8 | 9 | import datetime 10 | import fcntl 11 | import os 12 | import re 13 | import select 14 | import signal 15 | import subprocess 16 | import sys 17 | import time 18 | import psutil 19 | import sched 20 | from threading import Thread, Lock 21 | 22 | import graph 23 | 24 | from common import * 25 | 26 | die = 0 27 | 28 | 29 | # Initialize summary variables 30 | SAMPLE_NUMBER = 0 31 | TOTAL_RAM = 0 32 | START_DATE = "" 33 | END_DATE = "" 34 | TOTAL_LOAD = 0.0 35 | MAX_USED_RAM = 0 36 | MAX_USED_FS = 0 37 | TOTAL_FS = 0 38 | MAX_TX = 0 39 | MAX_RX = 0 40 | START_TX = 0 41 | START_RX = 0 42 | END_TX = 0 43 | END_RX = 0 44 | 45 | TOTAL_GPU_LOAD = 0.0 46 | TOTAL_GPU_RAM = 0 47 | MAX_USED_GPU_RAM = 0 48 | DATA_FILE_HANDLE = None 49 | 50 | FS_NAME = None 51 | FS_SAR_INDEX = None 52 | 53 | IFACE_NAME = None 54 | IFACE_SAR_INDEX = None 55 | 56 | # Handle SIGTERM 57 | def kill_handler(a, b): 58 | global die 59 | die = 1 60 | 61 | class ThreadSafeFileWriter: 62 | def __init__(self, filename): 63 | self.filename = filename 64 | self.lock = Lock() 65 | 66 | def write(self, data): 67 | with self.lock: 68 | with open(self.filename, 'a') as file: 69 | file.write(data + '\n') 70 | 71 | # Read a single table from sar output 72 | def read_table(psar): 73 | # Find the header 74 | f = psar.stdout 75 | while True: 76 | header = f.readline().decode().split() 77 | if len(header) > 0: 78 | break 79 | if psar.poll() is not None: 80 | raise ValueError("The subprocess has exited") 81 | 82 | # The first columns is always just time 83 | header[0] = 'time' 84 | 85 | table = {} 86 | for title in header: 87 | table[title] = [] 88 | 89 | # Read rows 90 | while True: 91 | row = f.readline().decode().split() 92 | if len(row) <= 0: 93 | break 94 | if psar.poll() is not None: 95 | raise ValueError("The subprocess has exited") 96 | 97 | for i, value in enumerate(row): 98 | table[header[i]].append(value) 99 | 100 | return table 101 | 102 | 103 | # Read received/sent bytes from a given interface's sys stats 104 | def read_iface_stats(iface): 105 | with open(f"/sys/class/net/{iface}/statistics/rx_bytes") as f: 106 | rx = scan(r"(\d+)", int, f.readline()) 107 | with open(f"/sys/class/net/{iface}/statistics/tx_bytes") as f: 108 | tx = scan(r"(\d+)", int, f.readline()) 109 | return rx, tx 110 | 111 | 112 | # Initialize 'data.txt' where the data is dumped 113 | def initialize(session, machine): 114 | global TOTAL_RAM 115 | global TOTAL_GPU_RAM 116 | 117 | with open("/proc/meminfo") as f: 118 | TOTAL_RAM = int(scan("MemTotal:\s+(\d+)", float, f.read())) 119 | 120 | uname = machine.split(" ")[0:2] 121 | uname = f"{uname[0]} {uname[1]}" 122 | 123 | cpus = int(machine.split(" CPU)")[0].split("(")[-1]) 124 | 125 | cpu_name = "unknown" 126 | 127 | with open("/proc/cpuinfo") as f: 128 | for line in f: 129 | if "model name" in line: 130 | cpu_name = line.replace("\n", "").split(": ")[1] 131 | break 132 | header = [ 133 | f"# sargraph version: {SARGRAPH_VERSION}", 134 | f"pid: {os.getpid()}", 135 | f"machine: {uname}", 136 | f"cpu count: {cpus}", 137 | f"cpu: {cpu_name}" 138 | ] 139 | try: 140 | pgpu = subprocess.run( 141 | 'nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader,nounits'.split(' '), 142 | capture_output=True 143 | ) 144 | if pgpu.returncode == 0: 145 | gpuname, gpudriver, memory_total = pgpu.stdout.decode('utf-8').rsplit(', ', 2) 146 | header.extend([ 147 | f"gpu: {gpuname}", 148 | f"gpu driver: {gpudriver}" 149 | ]) 150 | TOTAL_GPU_RAM = int(memory_total) 151 | except Exception as e: 152 | print(e) 153 | pass 154 | with open(f"{session}.txt", "w") as f: 155 | print(*header, sep=", ", file=f) 156 | 157 | 158 | # Add a summary comment to 'data.txt' 159 | def summarize(session): 160 | # Is there anything to be summarized? 161 | if SAMPLE_NUMBER == 0: 162 | return 163 | 164 | average_load = TOTAL_LOAD / float(SAMPLE_NUMBER) 165 | max_used_ram = MAX_USED_RAM * 1024.0 166 | total_ram = TOTAL_RAM * 1024.0 167 | max_used_fs = MAX_USED_FS * 1024.0 * 1024.0 168 | total_fs = TOTAL_FS * 1024 * 1024 169 | max_tx = MAX_TX / 128 # kB/s to Mb/s 170 | max_rx = MAX_RX / 128 # kB/s to Mb/s 171 | total_tx = END_TX-START_TX 172 | total_rx = END_RX-START_RX 173 | 174 | sdt = datetime.datetime.strptime(START_DATE, '%Y-%m-%d %H:%M:%S') 175 | edt = datetime.datetime.strptime(END_DATE, '%Y-%m-%d %H:%M:%S') 176 | delta_t = (edt - sdt).total_seconds() 177 | 178 | summary = [ 179 | f"# total ram: {total_ram:.2f} B", 180 | f"total disk space: {total_fs:.2f} B", 181 | f"max ram used: {max_used_ram:.2f} B", 182 | f"max disk used: {max_used_fs:.2f} B", 183 | f"average load: {average_load:.2f} %", 184 | f"observed disk: {FS_NAME}", 185 | f"max received: {max_rx:.2f} Mb/s", 186 | f"max sent: {max_tx:.2f} Mb/s", 187 | f"observed network: {IFACE_NAME}", 188 | f"duration: {delta_t} seconds", 189 | f"total received: {total_rx} b", 190 | f"total sent: {total_tx} b" 191 | ] 192 | 193 | if TOTAL_GPU_RAM != 0: 194 | summary.extend([ 195 | f"total gpu ram: {TOTAL_GPU_RAM * 1024 * 1024:.2f} B", # default units are MiB 196 | f"max gpu ram used: {MAX_USED_GPU_RAM * 1024 * 1024:.2f} B", # default units are MiB 197 | f"average gpu load: {TOTAL_GPU_LOAD / SAMPLE_NUMBER:.2f} %" 198 | ]) 199 | 200 | DATA_FILE_HANDLE.write(", ".join([str(i) for i in summary])) 201 | 202 | def get_meminfo(scheduler): 203 | global MAX_USED_RAM 204 | global DATA_FILE_HANDLE 205 | scheduler.enter(0.1, 1, get_meminfo, (scheduler,)) 206 | now = datetime.datetime.now() 207 | date = now.strftime("%Y-%m-%d") 208 | daytime = now.strftime("%H:%M:%S.%f") 209 | ram_data = psutil.virtual_memory() 210 | if (ram_data.total - ram_data.free) // 1024 > MAX_USED_RAM: 211 | MAX_USED_RAM = (ram_data.total - ram_data.free) // 1024 212 | line = [ 213 | date + "-" + daytime, 214 | 100 * ram_data.free / ram_data.total, 215 | 100 * ram_data.cached / ram_data.total, 216 | 100 * ram_data.used / ram_data.total, 217 | 100 * ram_data.shared / ram_data.total, 218 | ] 219 | DATA_FILE_HANDLE.write(" ".join(["psu"]+[str(i) for i in line])) 220 | 221 | # Run sar and gather data from it 222 | def watch(session, fsdev, iface, tmpfs_color, other_cache_color): 223 | global SAMPLE_NUMBER 224 | global START_DATE 225 | global END_DATE 226 | global TOTAL_LOAD 227 | global MAX_USED_RAM 228 | global MAX_USED_FS 229 | global MAX_RX 230 | global MAX_TX 231 | global TOTAL_FS 232 | global START_RX 233 | global START_TX 234 | global END_RX 235 | global END_TX 236 | global TOTAL_RAM 237 | global FS_SAR_INDEX 238 | global FS_NAME 239 | global IFACE_NAME 240 | global IFACE_SAR_INDEX 241 | global TOTAL_GPU_LOAD 242 | global TOTAL_GPU_RAM 243 | global MAX_USED_GPU_RAM 244 | global DATA_FILE_HANDLE 245 | 246 | if DATA_FILE_HANDLE == None: 247 | DATA_FILE_HANDLE = ThreadSafeFileWriter(f"{session}.txt"); 248 | 249 | 250 | global die 251 | 252 | # Was a graph alreay produced by save command from sargraph? 253 | dont_plot = False 254 | 255 | my_env = os.environ 256 | my_env["S_TIME_FORMAT"] = "ISO" 257 | 258 | psar = run_or_fail("sar", "-F", "-u", "-n", "DEV", "1", stdout=subprocess.PIPE, env=my_env) 259 | 260 | s = sched.scheduler(time.time, time.sleep) 261 | mem_ev = s.enter(0, 1, get_meminfo, (s,)) 262 | thread = Thread(target = s.run) 263 | thread.start() 264 | 265 | # subprocess for GPU data fetching in the background 266 | try: 267 | pgpu = subprocess.Popen( 268 | 'nvidia-smi --query-gpu=utilization.gpu,memory.used --format=csv,noheader,nounits -l 1'.split(' '), 269 | stdout=subprocess.PIPE, 270 | env=my_env 271 | ) 272 | except: 273 | pgpu = None 274 | 275 | machine = psar.stdout.readline().decode() 276 | initialize(session, machine) 277 | psar.stdout.readline() 278 | 279 | signal.signal(signal.SIGTERM, kill_handler) 280 | 281 | # Make stdin nonblocking to continue working when no command is sent 282 | flags = fcntl.fcntl(sys.stdin, fcntl.F_GETFL) 283 | fcntl.fcntl(sys.stdin, fcntl.F_SETFL, flags | os.O_NONBLOCK) 284 | 285 | # Gather data from sar output 286 | curr_gpu_util = 0 287 | curr_gpu_mem = 0 288 | 289 | while 1: 290 | # Await sar output or a command sent from command handler in sargraph.py 291 | readlist = [psar.stdout, sys.stdin] 292 | if pgpu: 293 | readlist.append(pgpu.stdout) 294 | rlist, _, _ = select.select(readlist, [], [], 0.25) 295 | now = datetime.datetime.now() 296 | if sys.stdin in rlist: 297 | label_line = sys.stdin.readline().replace("\n", "") 298 | if label_line.startswith("command:"): 299 | label_line = label_line[len("command:"):] 300 | if label_line.startswith("q:"): 301 | label_line = label_line[len("q:"):] 302 | 303 | list(map(s.cancel, s.queue)) 304 | summarize(session) 305 | if label_line == "none": 306 | pass 307 | elif label_line: 308 | graph.graph(session, tmpfs_color, other_cache_color, label_line) 309 | elif not dont_plot: 310 | graph.graph(session, tmpfs_color, other_cache_color) 311 | dont_plot = True 312 | die = 1 313 | break 314 | elif label_line.startswith("s:"): 315 | label_line = label_line[len("s:"):] 316 | 317 | dont_plot = True 318 | 319 | if label_line != "none": 320 | list(map(s.cancel, s.queue)) 321 | summarize(session) 322 | if not label_line: 323 | graph.graph(session, tmpfs_color, other_cache_color) 324 | else: 325 | graph.graph(session, tmpfs_color, other_cache_color, label_line) 326 | elif label_line.startswith('label:'): 327 | label_line = label_line[len('label:'):] 328 | with open(f"{session}.txt", "a") as f: 329 | timestamp = now.strftime("%Y-%m-%d-%H:%M:%S") 330 | print(f"# {timestamp} label: {label_line}", file=f) 331 | if psar.stdout not in rlist: 332 | continue 333 | 334 | date = now.strftime("%Y-%m-%d") 335 | daytime = now.strftime("%H:%M:%S") 336 | 337 | # Read and process CPU data 338 | try: 339 | cpu_data = read_table(psar) 340 | if START_DATE == "": 341 | START_DATE = date + " " + daytime 342 | TOTAL_LOAD += stof(cpu_data["%user"][0]) 343 | SAMPLE_NUMBER += 1 344 | 345 | if TOTAL_RAM == 0: 346 | TOTAL_RAM = psutil.virtual_memory().total // 1024 347 | 348 | # Read and process network data 349 | net_data = read_table(psar) 350 | if IFACE_SAR_INDEX is None: 351 | if iface: 352 | IFACE_SAR_INDEX = net_data['IFACE'].index(iface) 353 | else: 354 | maxj, maxv = 0, 0 355 | for j, used in enumerate(net_data['IFACE']): 356 | v = stof(net_data['rxkB/s'][j]) 357 | if maxv < v: 358 | maxj, maxv = j, v 359 | IFACE_SAR_INDEX = maxj 360 | if IFACE_NAME is None: 361 | IFACE_NAME = net_data['IFACE'][IFACE_SAR_INDEX] 362 | if START_RX <= 0 or START_TX <= 0: 363 | START_RX, START_TX = read_iface_stats(IFACE_NAME) 364 | END_RX, END_TX = read_iface_stats(IFACE_NAME) 365 | if MAX_RX < stof(net_data['rxkB/s'][IFACE_SAR_INDEX]): 366 | MAX_RX = stof(net_data['rxkB/s'][IFACE_SAR_INDEX]) 367 | if MAX_TX < stof(net_data['txkB/s'][IFACE_SAR_INDEX]): 368 | MAX_TX = stof(net_data['txkB/s'][IFACE_SAR_INDEX]) 369 | 370 | # Read and process FS data 371 | fs_data = read_table(psar) 372 | if FS_SAR_INDEX is None: 373 | if fsdev: 374 | FS_SAR_INDEX = fs_data['FILESYSTEM'].index(fsdev) 375 | else: 376 | maxj, maxv = 0, 0 377 | for j, free in enumerate(fs_data['MBfsfree']): 378 | v = stof(fs_data['MBfsfree'][j]) + stof(fs_data['MBfsused'][j]) 379 | if maxv < v: 380 | maxj, maxv = j, v 381 | FS_SAR_INDEX = maxj 382 | if FS_NAME is None: 383 | FS_NAME = fs_data["FILESYSTEM"][FS_SAR_INDEX] 384 | if TOTAL_FS == 0: 385 | TOTAL_FS = (stof(fs_data['MBfsused'][FS_SAR_INDEX]) + stof(fs_data['MBfsfree'][FS_SAR_INDEX])) 386 | if MAX_USED_FS < int(fs_data['MBfsused'][FS_SAR_INDEX]): 387 | MAX_USED_FS = int(fs_data['MBfsused'][FS_SAR_INDEX]) 388 | 389 | END_DATE = date + " " + daytime 390 | timestamp = date + "-" + daytime 391 | except ValueError as e: 392 | print("Sar process has exited - quitting sargraph") 393 | break 394 | 395 | if pgpu and pgpu.stdout in rlist: 396 | line = pgpu.stdout.readline().decode('utf-8') 397 | if pgpu.poll() is not None: 398 | print("nvidia-smi stopped working, reason:") 399 | print(line) 400 | print(f"Error code: {pgpu.returncode}") 401 | print("Closing the GPU statistics collection") 402 | pgpu = None 403 | else: 404 | try: 405 | curr_gpu_util, curr_gpu_mem = [ 406 | int(val.strip()) for val in line.split(', ') 407 | ] 408 | if MAX_USED_GPU_RAM < curr_gpu_mem: 409 | MAX_USED_GPU_RAM = curr_gpu_mem 410 | TOTAL_GPU_LOAD += curr_gpu_util 411 | except ValueError: 412 | print(f"nvidia-smi error readout: {line}") 413 | if "Unknown Error" in line: 414 | # No valid readouts from now on, let's terminate current nvidia-smi session 415 | pgpu.terminate() 416 | pgpu = None 417 | 418 | line = [ 419 | timestamp, 420 | cpu_data['%user'][0], 421 | fs_data['%fsused'][FS_SAR_INDEX], 422 | stof(net_data['rxkB/s'][IFACE_SAR_INDEX])/128, # kB/s to Mb/s 423 | stof(net_data['txkB/s'][IFACE_SAR_INDEX])/128, # kB/s to Mb/s 424 | ] 425 | if pgpu and TOTAL_GPU_RAM != 0: 426 | line.extend([ 427 | f'{curr_gpu_util:.2f}', 428 | f'{curr_gpu_mem / TOTAL_GPU_RAM * 100.0:.2f}' 429 | ]) 430 | DATA_FILE_HANDLE.write(" ".join(["sar"]+[str(i) for i in line])) 431 | 432 | if die: 433 | break 434 | 435 | list(map(s.cancel, s.queue)) 436 | thread.join() 437 | 438 | # This runs if we were stopped by SIGTERM and no plot was made so far 439 | if not dont_plot: 440 | summarize(session) 441 | graph.graph(session, tmpfs_color, other_cache_color) 442 | --------------------------------------------------------------------------------