├── .github ├── dependabot.yml └── workflows │ ├── release.yml │ └── unit_tests.yml ├── .gitignore ├── LICENSE ├── README.md ├── device_smi ├── __init__.py ├── amd.py ├── apple.py ├── base.py ├── cpu.py ├── device.py ├── intel.py ├── nvidia.py └── os.py ├── format ├── format.sh └── ruff.toml ├── requirements.txt ├── setup.py └── tests ├── cpu.py ├── gpu.py └── os.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: / 5 | schedule: 6 | interval: daily 7 | groups: 8 | github-actions: 9 | patterns: 10 | - "*" 11 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | release: 5 | types: [ published ] 6 | repository_dispatch: 7 | workflow_dispatch: 8 | inputs: 9 | upload_pypi: 10 | description: 'upload to PyPI' 11 | type: boolean 12 | required: false 13 | default: false 14 | 15 | jobs: 16 | build: 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | 22 | - uses: actions/setup-python@v5 23 | with: 24 | python-version: 3.11 25 | cache: 'pip' 26 | 27 | - name: Install requirements 28 | run: pip install build setuptools twine -U 29 | 30 | - name: Build package 31 | run: | 32 | python -m build --sdist 33 | twine check dist/* 34 | 35 | - name: Upload sdist to pypi 36 | if: (github.event_name == 'release' || github.event.inputs.upload_pypi == 'true') && !cancelled() 37 | env: 38 | TWINE_USERNAME: "__token__" 39 | TWINE_PASSWORD: ${{ secrets.PYPI_KEY }} 40 | run: | 41 | python -m twine upload dist/*gz 42 | -------------------------------------------------------------------------------- /.github/workflows/unit_tests.yml: -------------------------------------------------------------------------------- 1 | name: Unit tests 2 | 3 | defaults: 4 | run: 5 | shell: bash -le {0} 6 | 7 | on: 8 | push: 9 | repository_dispatch: 10 | workflow_dispatch: 11 | 12 | env: 13 | CUDA_DEVICE_ORDER: PCI_BUS_ID 14 | 15 | concurrency: 16 | group: ${{ github.ref }}-workflow-unit-tests 17 | cancel-in-progress: true 18 | 19 | jobs: 20 | test: 21 | runs-on: ${{ matrix.os }} 22 | 23 | strategy: 24 | fail-fast: false 25 | matrix: 26 | os: [ ubuntu-latest, macos-latest, macos-13, windows-latest ] # macos-latest arm64, macos-13 intel 27 | steps: 28 | - uses: actions/checkout@v4 29 | 30 | - uses: actions/setup-python@v5 31 | with: 32 | python-version: 3.11 33 | cache: 'pip' 34 | 35 | - name: install 36 | run: pip install . 37 | 38 | - name: test os 39 | run: python tests/os.py 40 | 41 | - name: test cpu 42 | if: always() 43 | run: python tests/cpu.py 44 | 45 | freebsd: 46 | runs-on: ubuntu-latest 47 | steps: 48 | - uses: actions/checkout@v4 49 | 50 | - name: clean dir 51 | run: rm -rf .git 52 | 53 | - name: Test in FreeBSD 54 | uses: vmactions/freebsd-vm@v1 55 | with: 56 | copyback: false 57 | prepare: | 58 | env ASSUME_ALWAYS_YES=yes pkg install -y python py311-pip 59 | run: | 60 | python -V 61 | python -m venv venv 62 | . venv/bin/activate 63 | pip install . 64 | python tests/os.py 65 | 66 | solaris: 67 | runs-on: ubuntu-latest 68 | steps: 69 | - uses: actions/checkout@v4 70 | 71 | - name: clean dir 72 | run: rm -rf .git 73 | 74 | - name: Test in Solaris 75 | uses: vmactions/solaris-vm@v1 76 | with: 77 | copyback: false 78 | run: | 79 | python -V 80 | python -m venv venv 81 | source venv/bin/activate 82 | pip install . 83 | python tests/os.py 84 | 85 | wsl: 86 | runs-on: windows-latest 87 | steps: 88 | - uses: actions/checkout@v4 89 | 90 | - uses: Vampire/setup-wsl@v6 91 | with: 92 | distribution: Ubuntu-24.04 93 | additional-packages: 94 | python3-pip 95 | python3-venv 96 | 97 | - name: test os 98 | shell: wsl-bash -u root {0} 99 | run: | 100 | python3 -m venv venv 101 | source venv/bin/activate 102 | pip install . 103 | python tests/os.py 104 | 105 | gpu: 106 | runs-on: [self-hosted, Linux] 107 | container: 108 | image: 10.0.14.248:5000/modelcloud/gptqmodel:compiler_cuda126-torch2.6.0-python313 109 | steps: 110 | - uses: actions/checkout@v4 111 | 112 | - name: install pciutils 113 | run: apt update && apt install -y pciutils 114 | 115 | - name: install 116 | run: pip install . 117 | 118 | - name: test os 119 | run: python tests/os.py 120 | 121 | - name: Find suitable GPU 122 | run: | 123 | gpu_id=$(curl -s "http://10.0.14.248/gpu/get?id=${{ github.run_id }}×tamp=$(date +%s%3N)&runner=${RUNNER_NAME}&force=1") 124 | echo "CUDA_VISIBLE_DEVICES=$gpu_id" >> $GITHUB_ENV 125 | echo "CUDA_VISIBLE_DEVICES=$gpu_id" 126 | 127 | - name: test cpu 128 | run: python tests/cpu.py 129 | 130 | - name: test gpu 131 | if: always() && !cancelled() 132 | run: python tests/gpu.py 133 | 134 | rocm: 135 | runs-on: [self-hosted, rocm] 136 | container: 137 | image: 10.0.14.248:5000/modelcloud/gptqmodel:github-ci-v6-rocm 138 | options: --device /dev/dri --device /dev/kfd --ipc=host 139 | steps: 140 | - uses: actions/checkout@v4 141 | 142 | - name: install 143 | run: pip install . 144 | 145 | - name: test os 146 | run: python tests/os.py 147 | 148 | - name: test cpu 149 | run: python tests/cpu.py 150 | 151 | - name: test gpu 152 | if: always() && !cancelled() 153 | run: python tests/gpu.py 154 | 155 | m4: 156 | runs-on: [self-hosted, m4] 157 | steps: 158 | - uses: actions/checkout@v4 159 | 160 | - name: Run test 161 | run: | 162 | export PATH="/opt/homebrew/bin:$PATH" && eval "$(pyenv init -)" 163 | pyenv global 3.11.11 && python -m venv venv 164 | source venv/bin/activate 165 | 166 | pip install wheel 167 | 168 | echo "=== installing device-smi" 169 | pip install . --no-build-isolation 170 | 171 | echo "=== running cpu test" 172 | python tests/cpu.py 173 | 174 | echo "=== running gpu test" 175 | python tests/gpu.py 176 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .cadence/ 3 | build/ 4 | dist/ 5 | device_smi.egg-info/ 6 | venv/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |

Device-SMI

4 | Self-contained Python lib with zero-dependencies that give you a unified `device` properties for `gpu`, `cpu`, and `npu`. No more calling separate tools such as `nvidia-smi` or `/proc/cpuinfo` and parsing it yourself. 5 |
6 | 7 |

8 | GitHub release 9 | PyPI - Version 10 | PyPI Downloads 11 | License 12 | 13 |

14 | 15 | ## News 16 | * 03/01/2025 [0.4.1](https://github.com/ModelCloud/Device-SMI/releases/tag/v0.4.1) Fix compat with AMD `ROCm` 6.3.2 and `MI300X`. 17 | * 02/26/2025 [0.4.0](https://github.com/ModelCloud/Device-SMI/releases/tag/v0.4.0) Added AMD GPU support. Validated with `amd-smi` on `7900XTX`. Use `PowerShell` for auto-device selection on `Windows` platform. 18 | * 12/20/2024 [0.3.3](https://github.com/ModelCloud/Device-SMI/releases/tag/v0.3.3) Patch fix for Windows install compatibility. 19 | * 12/05/2024 [0.3.2](https://github.com/ModelCloud/Device-SMI/releases/tag/v0.3.2) Added Windows `WSL` support. 20 | * 12/03/2024 [0.3.1](https://github.com/ModelCloud/Device-SMI/releases/tag/v0.3.1) Added `CPUDevice` compat for Windows. 21 | * 12/02/2024 [0.3.0](https://github.com/ModelCloud/Device-SMI/releases/tag/v0.3.0) Added `OSDevice`.[`name`, `version`, `kernel`, `arch`] for Linux/MacOS/Windows/FreeBSD/Solaris. Added `cpu.`[`count`, `cores`, `threads`] properties. Bug fix for gpu device index. 22 | * 11/29/2024 [0.2.1](https://github.com/ModelCloud/Device-SMI/releases/tag/v0.2.1) Added `pcie.`[`gen`, `speed`, `id`] + [`firmware`, `driver`] properties to `GPU` device. 23 | 24 | ## Features 25 | 26 | - Retrieve information for both CPU and GPU devices. 27 | - Includes details about memory usage, utilization, driver, pcie info when applicable, and other device specifications. 28 | - Zero pypi dependency. 29 | - Linux/MacOS support 30 | 31 | Supported Devices: 32 | 33 | - **OS**: Linux, MacOS, Windows, FreeBSD, Solaris 34 | - **CPU**: [Intel/AMD/Apple] Linux/MacOS system interface 35 | - **NVIDIA GPU**: NVIDIA System Management Interface `nvidia-smi` 36 | - **Intel XPU**: Intel/XPU System Management Interface `xpu-smi` 37 | - **AMD ROCm/GPU**: AMD System Management Interface `amd-smi` 38 | - **Apple GPU**: MacOS interfaces 39 | 40 | ## Usage 41 | 42 | For OS, use `os` to init a new Device object. 43 | 44 | ```py 45 | from device_smi import Device 46 | 47 | dev = Device("os") 48 | print(dev) 49 | ``` 50 | 51 | Output: (Ubuntu 22.04) 52 | 53 | > {'type': 'os', 'name': 'ubuntu', 'version': '22.04', 'kernel': '6.12.1-x64v3-xanmod2', 'arch': 'x86_64'} 54 | 55 | For GPU/XPU, use [`gpu`, `cuda`] for Nvidia and `xpu` for Intel/XPU. Index usage for multiple GPUs: `cuda:0` 56 | 57 | ```py 58 | from device_smi import Device 59 | 60 | dev = Device("cuda:0") 61 | print(dev) 62 | ``` 63 | 64 | Output: (Nvidia 4090) 65 | 66 | > {'pcie': {'gen': 2, 'speed': 1, 'id': '00000000:16:00.0'}, 'gpu': {'driver': '565.57.01', 'firmware': '95.02.3C.40.E7'}, 'type': 'gpu', 'model': 'geforce rtx 4090', 'memory_total': 25757220864, 'vendor': 'nvidia', 'features': ['8.9']} 67 | 68 | For CPU, use `cpu` to init a new Device object. 69 | 70 | ```py 71 | from device_smi import Device 72 | 73 | dev = Device("cpu") 74 | print(dev) 75 | ``` 76 | 77 | Output: (AMD EPYC 7443) 78 | 79 | > {'type': 'cpu', 'model': 'epyc 7443', 'vendor': 'amd', 'memory_total': 1000000000000, 'count': 2, 'cores': 48, 'threads': 96, 'features': ['3dnowprefetch', 'abm', 'adx', 'aes', 'amd_ppin', 'aperfmperf', 'apic', 'arat', 'avx', 'avx2', 'bmi1', 'bmi2', 'bpext', 'brs', 'cat_l3', 'cdp_l3', 'clflush', 'clflushopt', 'clwb', 'clzero', 'cmov', 'cmp_legacy', 'constant_tsc', 'cpb', 'cpuid', 'cqm', 'cqm_llc', 'cqm_mbm_local', 'cqm_mbm_total', 'cqm_occup_llc', 'cr8_legacy', 'cx16', 'cx8', 'de', 'debug_swap', 'decodeassists', 'erms', 'extapic', 'extd_apicid', 'f16c', 'flushbyasid', 'fma', 'fpu', 'fsgsbase', 'fsrm', 'fxsr', 'fxsr_opt', 'ht', 'hw_pstate', 'ibpb', 'ibrs', 'ibs', 'invpcid', 'irperf', 'lahf_lm', 'lbrv', 'lm', 'mba', 'mca', 'mce', 'misalignsse', 'mmx', 'mmxext', 'monitor', 'movbe', 'msr', 'mtrr', 'mwaitx', 'nonstop_tsc', 'nopl', 'npt', 'nrip_save', 'nx', 'ospke', 'osvw', 'overflow_recov', 'pae', 'pat', 'pausefilter', 'pcid', 'pclmulqdq', 'pdpe1gb', 'perfctr_core', 'perfctr_llc', 'perfctr_nb', 'pfthreshold', 'pge', 'pku', 'pni', 'popcnt', 'pse', 'pse36', 'rapl', 'rdpid', 'rdpru', 'rdrand', 'rdseed', 'rdt_a', 'rdtscp', 'rep_good', 'sep', 'sha_ni', 'skinit', 'smap', 'smca', 'smep', 'ssbd', 'sse', 'sse2', 'sse4_1', 'sse4_2', 'sse4a', 'ssse3', 'stibp', 'succor', 'svm', 'svm_lock', 'syscall', 'tce', 'topoext', 'tsc', 'tsc_scale', 'umip', 'user_shstk', 'v_spec_ctrl', 'v_vmsave_vmload', 'vaes', 'vgif', 'vmcb_clean', 'vme', 'vmmcall', 'vpclmulqdq', 'wbnoinvd', 'wdt', 'xgetbv1', 'xsave', 'xsavec', 'xsaveerptr', 'xsaveopt', 'xsaves', 'xtopology']} 80 | 81 | 82 | ## Roadmap 83 | 84 | - Support Intel/Gaudi 85 | - Support Google/TPU 86 | - Add NPU support (ARM/Intel/AMD) 87 | -------------------------------------------------------------------------------- /device_smi/__init__.py: -------------------------------------------------------------------------------- 1 | from .device import Device as Device 2 | -------------------------------------------------------------------------------- /device_smi/amd.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | from .base import GPU, BaseMetrics, GPUDevice, Pcie, _run 5 | 6 | 7 | class AMDGPUMetrics(BaseMetrics): 8 | pass 9 | 10 | class AMDDevice(GPUDevice): 11 | def __init__(self, cls, index): 12 | super().__init__(cls, index) 13 | self.gpu_id = self._get_gpu_id() 14 | 15 | try: 16 | args = ["amd-smi", "static", "--gpu", f"{self.gpu_id}"] 17 | 18 | result = self.to_dict(_run(args=args).lower()) 19 | market_name = re.findall(r'\[(.*?)]', result["market_name"]) 20 | if market_name: 21 | market_name = market_name[0] 22 | else: 23 | market_name = result["market_name"] 24 | model = market_name.split("/")[0].strip() 25 | total_memory= result["size"].removesuffix("mb").strip() 26 | pci_bus_id = result["bdf"] 27 | pcie_gen = result['pcie_interface_version'].removeprefix("gen").strip() 28 | pcie_width = result['max_pcie_width'] 29 | driver = result["driver"] 30 | features = [result['target_graphics_version']] 31 | 32 | args = ["amd-smi", "firmware", "--gpu", f"{self.gpu_id}"] 33 | lines = _run(args=args).lower().splitlines() 34 | 35 | result = {} 36 | current_id = None 37 | for line in lines: 38 | line = line.strip() 39 | if line.startswith("fw_id:"): 40 | current_id = line.split(":")[1].strip() 41 | elif line.startswith("fw_version:") and current_id: 42 | result[current_id] = line.split(":")[1].strip() 43 | firmware = f"{result}" 44 | 45 | if model.lower().startswith("amd"): 46 | model = model[len("amd"):] 47 | 48 | cls.model = model.strip().lower() 49 | cls.memory_total = int(total_memory) * 1024 * 1024 # bytes 50 | cls.vendor = "amd" 51 | cls.features = features 52 | cls.pcie = Pcie(gen=int(pcie_gen), speed=int(pcie_width), id=pci_bus_id) 53 | cls.gpu = GPU(driver=driver, firmware=firmware) 54 | except FileNotFoundError: 55 | raise FileNotFoundError() 56 | except Exception as e: 57 | raise e 58 | 59 | def _get_gpu_id(self): 60 | hips = os.environ.get("HIP_VISIBLE_DEVICES", "") 61 | hip_list = hips.split(",") if hips else [] 62 | if hip_list and len(hip_list) > self.index: 63 | return hip_list[self.index] 64 | else: 65 | return str(self.index) 66 | 67 | def metrics(self): 68 | try: 69 | args = ["amd-smi", f"--id={self.gpu_id}", "--query-gpu=memory.used,utilization.gpu", 70 | "--format=csv,noheader,nounits"] 71 | used_memory, utilization = _run(args=args, seperator="\n")[0].split(", ") 72 | 73 | return AMDGPUMetrics( 74 | memory_used=int(used_memory) * 1024 * 1024, # bytes 75 | memory_process=0, # Bytes, TODO, get this 76 | utilization=float(utilization), 77 | ) 78 | 79 | except FileNotFoundError: 80 | raise FileNotFoundError( 81 | "The 'amd-smi' command was not found. Please ensure that the 'amd-utils' package is installed." 82 | ) 83 | except Exception as e: 84 | raise e 85 | -------------------------------------------------------------------------------- /device_smi/apple.py: -------------------------------------------------------------------------------- 1 | from .base import BaseMetrics, GPUDevice, _run 2 | 3 | 4 | class AppleGPUMetrics(BaseMetrics): 5 | pass 6 | 7 | 8 | class AppleDevice(GPUDevice): 9 | def __init__(self, cls, index): 10 | super().__init__(cls, index) 11 | self.gpu_id = 0 12 | 13 | args = ["system_profiler", "SPDisplaysDataType"] 14 | 15 | result = _run(args=args, seperator="\n") 16 | 17 | model = "" 18 | vendor = "" 19 | for o in result: 20 | if "Chipset Model" in o: 21 | model = o.split(":")[1].replace("Apple", "").strip() 22 | if "Vendor" in o: 23 | vendor = o.split(":")[1].strip().split(" ")[0].strip() 24 | 25 | memory_total = int(_run(["sysctl", "-n", "hw.memsize"])) 26 | 27 | cls.model = model.lower() 28 | cls.memory_total = memory_total # bytes 29 | cls.vendor = vendor.lower() 30 | 31 | def metrics(self): 32 | output = _run(["top", "-l", "1", "-stats", "cpu"]) 33 | 34 | utilization = "0.0" 35 | for line in output.splitlines(): 36 | if line.startswith("CPU usage"): 37 | parts = line.split() 38 | user_time = float(parts[2].strip("%")) 39 | sys_time = float(parts[4].strip("%")) 40 | utilization = user_time + sys_time 41 | 42 | total_memory = int(_run(['sysctl', 'hw.memsize']).split(':')[1].strip()) 43 | free_memory = int(_run(['sysctl', 'vm.page_free_count']).split(':')[1].strip()) 44 | page_size = int(_run(['sysctl', 'hw.pagesize']).split(':')[1].strip()) 45 | 46 | used_memory = total_memory - (free_memory * page_size) 47 | 48 | return AppleGPUMetrics( 49 | memory_used=int(used_memory), # bytes 50 | memory_process=0, # Bytes, TODO, get this 51 | utilization=float(utilization), 52 | ) 53 | -------------------------------------------------------------------------------- /device_smi/base.py: -------------------------------------------------------------------------------- 1 | import re 2 | import subprocess 3 | from abc import abstractmethod 4 | from typing import Optional 5 | 6 | INSTALLATION_HINTS = { 7 | "lspci": "`lspci` is not installed, you can install it via `sudo apt install pciutils`.", 8 | "nvidia-smi": "`nvidia-smi` is not installed. You need to install NVIDIA driver support binaries by `sudo apt install nvidia-utils-`", 9 | "powershell": "`PowerShell` is not installed. Please follow the instructions at `https://learn.microsoft.com/en-us/powershell/scripting/install/installing-powershell`", 10 | "xpu-smi": "`xpu-smi` is not installed. Please follow the instructions at https://github.com/intel/xpumanager/blob/master/doc/smi_install_guide.md`", 11 | "amd-smi": "`amd-smi` is not installed. Please follow the instructions at `https://rocm.docs.amd.com/projects/amdsmi/en/latest/install/install.html`", 12 | } 13 | 14 | class BaseDevice: 15 | def __init__(self, cls, type: str): 16 | cls.type = type 17 | 18 | @abstractmethod 19 | def metrics(self): 20 | pass 21 | 22 | def __str__(self): 23 | return str(self.__dict__) 24 | 25 | def to_dict(self, text, split: str = ":"): 26 | return {k.strip(): v.strip() for k, v in (line.split(split, 1) for line in text.splitlines() if split in line)} 27 | 28 | 29 | class GPUDevice(BaseDevice): 30 | def __init__(self, cls, index): 31 | super().__init__(cls, "gpu") 32 | self.index = index 33 | 34 | @abstractmethod 35 | def metrics(self): 36 | pass 37 | 38 | 39 | class BaseMetrics: 40 | def __init__( 41 | self, 42 | memory_used: int = 0, 43 | memory_process: int = 0, 44 | utilization: float = 0.0, 45 | ): 46 | self.memory_used = memory_used 47 | self.memory_process = memory_process 48 | self.utilization = max(0.0, utilization) 49 | 50 | def __str__(self): 51 | return str(self.__dict__) 52 | 53 | 54 | class Pcie: 55 | def __init__(self, gen: int, speed: int, id: str): 56 | self.gen = gen 57 | self.speed = speed 58 | self.id = id 59 | 60 | def __str__(self): 61 | return str(self.__dict__) 62 | 63 | def __repr__(self): 64 | return self.__str__() 65 | 66 | 67 | class GPU: 68 | def __init__(self, driver: str, firmware: str): 69 | self.driver = driver 70 | self.firmware = firmware 71 | 72 | def __str__(self): 73 | return str(self.__dict__) 74 | 75 | def __repr__(self): 76 | return self.__str__() 77 | 78 | 79 | def _run(args, line_start: Optional[str] = None, seperator: str=None): # -> str | list[str] disable type hint, because solaris test is using python 3.7 which doesn't support | usage 80 | try: 81 | result = subprocess.run( 82 | args, 83 | stdout=subprocess.PIPE, 84 | stderr=subprocess.PIPE, 85 | text=True, 86 | ) 87 | except FileNotFoundError: 88 | install_hint = INSTALLATION_HINTS.get(args[0], f"Command not found: `{args[0]}`, please check if it was installed.") 89 | raise RuntimeError(install_hint) 90 | 91 | if result.returncode != 0 or result.stderr.strip() != "": 92 | raise RuntimeError(result.stderr) 93 | 94 | result = result.stdout.strip() 95 | result = re.sub(r'\n+', '\n', result) # remove consecutive \n 96 | if line_start: 97 | return " ".join([line for line in result.splitlines() if line.strip().startswith(line_start)]) 98 | if seperator: 99 | return [l.strip() for l in result.split(seperator)] 100 | return result 101 | -------------------------------------------------------------------------------- /device_smi/cpu.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | import re 4 | 5 | from .base import BaseDevice, BaseMetrics, _run 6 | 7 | 8 | class CPUMetrics(BaseMetrics): 9 | pass 10 | 11 | 12 | class CPUDevice(BaseDevice): 13 | def __init__(self, cls): 14 | super().__init__(cls, "cpu") 15 | 16 | model = "Unknown Model" 17 | vendor = "Unknown vendor" 18 | flags = set() 19 | 20 | if platform.system().lower() == "windows": 21 | command_result = _run(["wmic", "cpu", "get", "manufacturer,name,numberofcores,numberoflogicalprocessors", "/format:csv"]).strip() 22 | result = command_result.split("\n")[1].split(",") 23 | 24 | cpu_count = command_result.count('\n') 25 | model = result[2].strip() 26 | cpu_cores = int(result[3]) 27 | cpu_threads = int(result[4]) 28 | vendor = result[1].strip() 29 | 30 | command_result = _run(["wmic", "os", "get", "TotalVisibleMemorySize", "/Value", "/format:csv"]).strip() 31 | result = command_result.split("\n")[1].split(",") 32 | 33 | mem_total = int(result[1]) 34 | elif platform.system().lower() == 'darwin': 35 | model = (_run(["sysctl", "-n", "machdep.cpu.brand_string"]).replace("Apple", "").strip()) 36 | try: 37 | vendor = (_run(["sysctl", "-n", "machdep.cpu.vendor"])) 38 | except BaseException: 39 | vendor = "apple" 40 | 41 | sysctl_info = self.to_dict(_run(["sysctl", "-a"])) 42 | cpu_count = 1 43 | cpu_cores = int(sysctl_info["hw.physicalcpu"]) 44 | cpu_threads = int(sysctl_info["hw.logicalcpu"]) 45 | 46 | mem_total = int(_run(["sysctl", "-n", "hw.memsize"])) 47 | 48 | try: 49 | features = sysctl_info["machdep.cpu.features"].splitlines() 50 | except Exception: 51 | features = [] 52 | 53 | flags = set(features) 54 | elif os.name == 'posix': 55 | try: 56 | with open("/proc/cpuinfo", "r") as f: 57 | lines = f.readlines() 58 | for line in lines: 59 | if line.startswith("flags"): 60 | flags.update(line.strip().split(":")[1].split()) 61 | if line.startswith("model name"): 62 | model = line.split(":")[1].strip() 63 | elif line.startswith("vendor_id"): 64 | vendor = line.split(":")[1].strip() 65 | except FileNotFoundError: 66 | model = platform.processor() 67 | vendor = platform.uname().system 68 | 69 | cpu_info = self.to_dict(_run(['lscpu'])) 70 | 71 | cpu_count = int(cpu_info["Socket(s)"]) 72 | cpu_cores_per_socket = int(cpu_info["Core(s) per socket"]) 73 | cpu_cores = cpu_count * cpu_cores_per_socket 74 | cpu_threads = int(cpu_info["CPU(s)"]) 75 | 76 | with open("/proc/meminfo", "r") as f: 77 | lines = f.readlines() 78 | mem_total = 0 79 | for line in lines: 80 | if line.startswith("MemTotal:"): 81 | mem_total = int(line.split()[1]) * 1024 82 | break 83 | else: 84 | print("not support") 85 | 86 | model = " ".join(i for i in model.lower().split() if not any(x in i for x in ["ghz", "cpu", "(r)", "(tm)", "intel", "amd", "core", "processor", "@"])) 87 | cls.model = model 88 | 89 | if "intel" in vendor.lower(): 90 | vendor = "intel" 91 | elif "amd" in vendor.lower(): 92 | vendor = "amd" 93 | cls.vendor = vendor.lower().replace("authentic", "") 94 | cls.memory_total = mem_total # Bytes 95 | self.memory_total = mem_total # Bytes 96 | cls.count = cpu_count 97 | cls.cores = cpu_cores 98 | cls.threads = cpu_threads 99 | cls.features = sorted({f.lower() for f in flags}) 100 | 101 | def _utilization(self): 102 | # check if is macOS 103 | if platform.system().lower() == "darwin": 104 | output = _run(["top", "-l", "1", "-stats", "cpu"]) 105 | 106 | # CPU usage: 7.61% user, 15.23% sys, 77.15% idle 107 | for line in output.splitlines(): 108 | if line.startswith("CPU usage"): 109 | parts = line.split() 110 | user_time = float(parts[2].strip("%")) 111 | sys_time = float(parts[4].strip("%")) 112 | idle_time = float(parts[6].strip("%")) 113 | total_time = user_time + sys_time + idle_time 114 | return total_time, idle_time 115 | else: 116 | with open("/proc/stat", "r") as f: 117 | lines = f.readlines() 118 | for line in lines: 119 | if line.startswith("cpu "): 120 | parts = line.split() 121 | total_time = sum(int(part) for part in parts[1:]) 122 | idle_time = int(parts[4]) 123 | return total_time, idle_time 124 | 125 | def metrics(self): 126 | if platform.system().lower() == "windows": 127 | if platform.system().lower() == "windows": 128 | command_result = _run(["wmic", "cpu", "get", "loadpercentage"]).strip() 129 | try: 130 | result = command_result.split("\n")[1].split(",") 131 | utilization = int(result[0]) 132 | except BaseException as e: 133 | print("error occurred, command_result: ") 134 | print(f"{command_result}") 135 | print("------------") 136 | raise e 137 | 138 | try: 139 | command_result = _run(["wmic", "os", "get", "FreePhysicalMemory"]).strip() 140 | result = command_result.split("\n")[1].split(",") 141 | memory_used = int(result[0]) 142 | except BaseException as e: 143 | print("error occurred, command_result: ") 144 | print(f"{command_result}") 145 | print("------------") 146 | raise e 147 | return CPUMetrics( 148 | memory_used=memory_used, # bytes 149 | memory_process=0, # bytes 150 | utilization=utilization, 151 | ) 152 | 153 | total_time_1, idle_time_1 = self._utilization() 154 | # read CPU status second time here, read too quickly will get inaccurate results 155 | total_time_2, idle_time_2 = self._utilization() 156 | 157 | total_diff = total_time_2 - total_time_1 158 | idle_diff = idle_time_2 - idle_time_1 159 | 160 | # total_diff might be 0 161 | if total_diff <= 0: 162 | utilization = 0 163 | else: 164 | if platform.system().lower() == "darwin": 165 | utilization = idle_time_2 - idle_time_1 166 | else: 167 | utilization = (1 - (idle_diff / total_diff)) * 100 168 | 169 | if platform.system().lower() == "darwin": 170 | available_mem = _run(["vm_stat"]).replace(".", "").lower() 171 | 172 | result = self.to_dict(available_mem) 173 | 174 | available_mem = available_mem.splitlines() 175 | page_size = int(re.findall(r'\d+', available_mem[0])[0]) 176 | 177 | free_pages = int(result["pages free"]) 178 | 179 | mem_free = free_pages * page_size 180 | else: 181 | with open("/proc/meminfo", "r") as f: 182 | lines = f.readlines() 183 | mem_free = 0 184 | for line in lines: 185 | if line.startswith("MemAvailable:"): 186 | mem_free = int(line.split()[1]) * 1024 187 | break 188 | 189 | memory_used = self.memory_total - mem_free 190 | 191 | process_id = os.getpid() 192 | if platform.system().lower() == "darwin": 193 | result = _run(["ps", "-p", str(process_id), "-o", "rss="]) 194 | memory_current_process = int(result) * 1024 195 | else: 196 | with open(f"/proc/{process_id}/status", "r") as f: 197 | lines = f.readlines() 198 | memory_current_process = 0 199 | for line in lines: 200 | if line.startswith("VmRSS:"): 201 | memory_current_process = int(line.split()[1]) * 1024 202 | break 203 | 204 | return CPUMetrics( 205 | memory_used=memory_used, # bytes 206 | memory_process=memory_current_process, # bytes 207 | utilization=utilization, 208 | ) 209 | -------------------------------------------------------------------------------- /device_smi/device.py: -------------------------------------------------------------------------------- 1 | import platform 2 | import warnings 3 | 4 | from .amd import AMDDevice 5 | from .apple import AppleDevice 6 | from .base import _run 7 | from .cpu import CPUDevice 8 | from .intel import IntelDevice 9 | from .nvidia import NvidiaDevice 10 | from .os import OSDevice 11 | 12 | IS_ROCM = False 13 | try: 14 | import torch 15 | 16 | HAS_TORCH = True 17 | if torch.version.hip is not None: 18 | IS_ROCM = True 19 | except BaseException: 20 | HAS_TORCH = False 21 | 22 | 23 | class Device: 24 | def __init__(self, device): 25 | # init attribute first to avoid IDE not attr warning 26 | # CPU/GPU Device 27 | self.memory_total = None 28 | self.type = None 29 | self.features = [] 30 | self.vendor = None 31 | self.model = None 32 | self.device = None 33 | # OS Device 34 | self.arch = None 35 | self.version = None 36 | self.name = None 37 | if HAS_TORCH and isinstance(device, torch.device): 38 | device_type = device.type.lower() 39 | device_index = device.index 40 | elif f"{device}".lower() == "os": 41 | self.device = OSDevice(self) 42 | assert self.arch 43 | assert self.version 44 | assert self.name 45 | return 46 | else: 47 | d = f"{device}".lower() 48 | if ":" in d: 49 | type, index = d.split(":") 50 | device_type = type 51 | device_index = (int(index)) 52 | else: 53 | device_type = d 54 | device_index = 0 55 | 56 | self.pcie = None 57 | self.gpu = None 58 | 59 | if device_type == "cpu": 60 | self.device = CPUDevice(self) 61 | elif device_type == "xpu": 62 | self.device = IntelDevice(self, device_index) 63 | elif device_type == "rocm" or IS_ROCM: 64 | self.device = AMDDevice(self, device_index) 65 | elif device_type == "cuda" and not IS_ROCM: 66 | self.device = NvidiaDevice(self, device_index) 67 | elif device_type == "gpu": 68 | if platform.system().lower() == "darwin": 69 | if platform.machine() == 'x86_64': 70 | raise Exception("Not supported for macOS on Intel chips.") 71 | 72 | self.device = AppleDevice(self, device_index) 73 | else: 74 | if platform.system().lower() == "windows": 75 | for d in ["NVIDIA", "AMD", "INTEL"]: 76 | result = _run(["powershell", "-Command", "Get-CimInstance", "Win32_VideoController", "-Filter", f"\"Name like '%{d}%'\""]).lower().splitlines() 77 | if result: 78 | if d == "INTEL": 79 | self.device = IntelDevice(self, device_index) 80 | elif d == "AMD": 81 | self.device = AMDDevice(self, device_index) 82 | else: 83 | self.device = NvidiaDevice(self, device_index) 84 | break 85 | else: 86 | result = _run(["lspci"]).lower().splitlines() 87 | result = "\n".join([ 88 | line for line in result 89 | if any(keyword.lower() in line.lower() for keyword in ['vga', '3d', 'display']) 90 | ]).lower() 91 | if "nvidia" in result: 92 | self.device = NvidiaDevice(self, device_index) 93 | elif "amd" in result: 94 | self.device = AMDDevice(self, device_index) 95 | elif "intel" in result: 96 | self.device = IntelDevice(self, device_index) 97 | if not self.device: 98 | raise ValueError(f"Unable to find requested device: {device}") 99 | else: 100 | raise Exception(f"The device {device_type} is not supported") 101 | 102 | assert self.memory_total 103 | assert self.type 104 | assert self.features is not None 105 | assert self.vendor 106 | assert self.model 107 | 108 | def info(self): 109 | warnings.warn( 110 | "info() method is deprecated and will be removed in next release.", 111 | DeprecationWarning, 112 | stacklevel=2 113 | ) 114 | return self 115 | 116 | def memory_used(self) -> int: 117 | return self.device.metrics().memory_used 118 | 119 | def utilization(self) -> float: 120 | return self.device.metrics().utilization 121 | 122 | def __str__(self): 123 | return str({k: v for k, v in self.__dict__.items() if k != 'device' and v is not None}) 124 | -------------------------------------------------------------------------------- /device_smi/intel.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | 4 | from .base import GPU, BaseMetrics, GPUDevice, Pcie, _run 5 | 6 | 7 | class IntelGPUMetrics(BaseMetrics): 8 | pass 9 | 10 | 11 | class IntelDevice(GPUDevice): 12 | def __init__(self, cls, index: int = 0): 13 | super().__init__(cls, index) 14 | self.gpu_id = index 15 | 16 | try: 17 | args = ["xpu-smi", "discovery", "-d", f"{self.gpu_id}", "-j"] 18 | 19 | result = _run(args=args) 20 | 21 | data = json.loads(result) 22 | 23 | model = data["device_name"] 24 | 25 | if model: 26 | model = model.lower().replace("intel(r)", "").replace("core(tm)", "").replace("cpu @", "") 27 | model = re.sub(r"\s?\d+(\.\d+)?ghz", "", model).strip() 28 | vendor = data["vendor_name"] 29 | if vendor and vendor.lower().startswith("intel"): 30 | vendor = "Intel" 31 | total_memory = data["max_mem_alloc_size_byte"] 32 | 33 | pcie_gen = int(data["pcie_generation"]) 34 | pcie_speed = int(data["pcie_max_link_width"]) 35 | pcie_id = data["pci_device_id"] 36 | driver = data["driver_version"] 37 | firmware = data["gfx_firmware_version"] 38 | 39 | cls.model = model.lower() 40 | cls.memory_total = int(total_memory) # bytes 41 | cls.vendor = vendor.lower() 42 | cls.pcie = Pcie(gen=pcie_gen, speed=pcie_speed, id=pcie_id) 43 | cls.gpu = GPU(driver=driver, firmware=firmware) 44 | 45 | except FileNotFoundError: 46 | raise FileNotFoundError("'xpu-smi' command not found. Please ensure it is installed") 47 | except Exception as e: 48 | raise e 49 | 50 | def metrics(self): 51 | try: 52 | args = [ 53 | "xpu-smi", "dump", 54 | "-d", f"{self.gpu_id}", 55 | "-m", "0,18", 56 | "-n", "1" 57 | ] 58 | output = _run(args=args, seperator="\n")[-1] 59 | 60 | # xpu-smi dump -d 0 -m 0,1,2 -i 1 -n 5 61 | # Timestamp, DeviceId, GPU Utilization (%), GPU Power (W), GPU Frequency (MHz) 62 | # 06:14:46.000, 0, 0.00, 14.61, 0 63 | 64 | memory_used = output.split(",")[-1].strip() 65 | utilization = output.split(",")[-2].strip() 66 | if utilization.lower() == "n/a": 67 | utilization = "0.0" 68 | 69 | return IntelGPUMetrics( 70 | memory_used=int(float(memory_used) * 1024 * 1024), # bytes 71 | memory_process=0, 72 | utilization=float(utilization), 73 | ) 74 | except FileNotFoundError: 75 | raise FileNotFoundError("'xpu-smi' command not found. Please ensure it is installed") 76 | except Exception as e: 77 | raise e 78 | -------------------------------------------------------------------------------- /device_smi/nvidia.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | 4 | from .base import GPU, BaseMetrics, GPUDevice, Pcie, _run 5 | 6 | 7 | class NvidiaGPUMetrics(BaseMetrics): 8 | pass 9 | 10 | 11 | class NvidiaDevice(GPUDevice): 12 | def __init__(self, cls, index): 13 | super().__init__(cls, index) 14 | self.gpu_id = self._get_gpu_id() 15 | 16 | try: 17 | args = [ 18 | "nvidia-smi", 19 | f"--id={self.gpu_id}", 20 | "--query-gpu=" 21 | "name," 22 | "memory.total," 23 | "pci.bus_id," 24 | "pcie.link.gen.max," 25 | "pcie.link.gen.current," 26 | "driver_version", 27 | "--format=csv,noheader,nounits", 28 | ] 29 | 30 | result = _run(args=args, seperator="\n") 31 | 32 | model, total_memory, pci_bus_id, pcie_gen, pcie_width, driver = (result[0].split(", ")) 33 | 34 | result = _run(args=["nvidia-smi", "-q", "-i", f"{self.gpu_id}"], seperator="\n") 35 | firmware = " ".join([line.split(":", 1)[1].strip() for line in result if "VBIOS" in line]) 36 | 37 | if model.lower().startswith("nvidia"): 38 | model = model[len("nvidia"):] 39 | 40 | compute_cap = ( 41 | _run(["nvidia-smi", "--format=csv", "--query-gpu=compute_cap", "-i", f"{self.gpu_id}"]) 42 | .removeprefix("compute_cap\n") 43 | ) 44 | 45 | cls.model = model.strip().lower() 46 | cls.memory_total = int(total_memory) * 1024 * 1024 # bytes 47 | cls.vendor = "nvidia" 48 | cls.features = [compute_cap] 49 | cls.pcie = Pcie(gen=int(pcie_gen), speed=int(pcie_width), id=pci_bus_id) 50 | cls.gpu = GPU(driver=driver, firmware=firmware) 51 | except FileNotFoundError: 52 | raise FileNotFoundError() 53 | except Exception as e: 54 | raise e 55 | 56 | def _get_gpu_id(self): 57 | gpu_count = len(_run(["nvidia-smi", "--list-gpus"]).splitlines()) 58 | cudas = os.environ.get("CUDA_VISIBLE_DEVICES", "") 59 | cuda_list = cudas.split(",") if cudas else [] 60 | if gpu_count > 0 and os.environ.get("CUDA_DEVICE_ORDER", "") != "PCI_BUS_ID": 61 | warnings.warn("Detected different devices in the system. Please make sure to set `CUDA_DEVICE_ORDER=PCI_BUS_ID` to avoid unexpected behavior.", RuntimeWarning, 2) 62 | if cuda_list and len(cuda_list) > self.index: 63 | return cuda_list[self.index] 64 | else: 65 | return str(self.index) 66 | 67 | def metrics(self): 68 | try: 69 | args = ["nvidia-smi", f"--id={self.gpu_id}", "--query-gpu=memory.used,utilization.gpu", "--format=csv,noheader,nounits"] 70 | used_memory, utilization = _run(args=args, seperator="\n")[0].split(", ") 71 | 72 | return NvidiaGPUMetrics( 73 | memory_used=int(used_memory) * 1024 * 1024, # bytes 74 | memory_process=0, # Bytes, TODO, get this 75 | utilization=float(utilization), 76 | ) 77 | 78 | except FileNotFoundError: 79 | raise FileNotFoundError( 80 | "The 'nvidia-smi' command was not found. Please ensure that the 'nvidia-utils' package is installed." 81 | ) 82 | except Exception as e: 83 | raise e 84 | -------------------------------------------------------------------------------- /device_smi/os.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | import re 4 | 5 | from .base import BaseDevice, BaseMetrics, _run 6 | 7 | 8 | class OSMetrics(BaseMetrics): 9 | pass 10 | 11 | 12 | class OSDevice(BaseDevice): 13 | def __init__(self, cls): 14 | super().__init__(cls, "os") 15 | 16 | if platform.system().lower() == "linux" or platform.system().lower() == "freebsd" or platform.system().lower() == "solaris" or platform.system().lower() == "sunos": 17 | release_info = self.to_dict(_run(["cat", "/etc/os-release"]).replace("\"", "").lower(), "=") 18 | cls.name = release_info["name"].replace("oracle", "").replace("gnu/linux", "").strip() 19 | 20 | cls.version = release_info["version_id"] 21 | match = re.match(r"(\d+\.\d+)", cls.version) 22 | if match: 23 | cls.version = match.group(1) 24 | 25 | cls.kernel, cls.arch = _run(["uname", "-mr"]).lower().split() 26 | elif platform.system().lower() == "darwin": 27 | release_info = self.to_dict(_run(["sw_vers"]).lower()) 28 | cls.name = release_info["productname"] 29 | cls.version = release_info["productversion"] 30 | cls.kernel, cls.arch = _run(["uname", "-mr"]).lower().split() 31 | elif platform.system().lower() == "windows": 32 | cls.version = _run(["wmic", "os", "get", "caption", "/format:csv"], seperator="\n")[1].split(",")[1].lower().removeprefix("microsoft windows").strip() 33 | cls.name = "windows" 34 | cls.arch = os.environ.get("PROCESSOR_ARCHITECTURE").lower() 35 | 36 | cls.kernel = _run(["cmd", "/c", "ver"]) 37 | match = re.search(r'(\d+\.\d+\.\d+\.\d+)', cls.kernel) 38 | if match: 39 | cls.kernel = match.group(1) 40 | else: 41 | cls.name = platform.system().lower() 42 | cls.version = platform.version().lower() 43 | cls.arch = platform.architecture()[0].lower().strip() 44 | 45 | if cls.arch in ["amd64", "x64"]: 46 | cls.arch = "x86_64" 47 | if cls.arch in ["i386", "i86pc"]: 48 | cls.arch = "x86" 49 | if cls.arch in ["arm64"]: 50 | cls.arch = "aarch64" 51 | 52 | def metrics(self): 53 | pass 54 | -------------------------------------------------------------------------------- /format/format.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd "$(dirname "$0")" || exit 4 | 5 | # force ruff/isort to be same version as setup.py 6 | pip install -U ruff==0.9.6 isort==6.0.0 7 | 8 | ruff check ../device_smi ../tests ../setup.py --fix --unsafe-fixes 9 | ruff_status=$? 10 | 11 | isort -l 119 -e ../ 12 | 13 | # Exit with the status code of ruff check 14 | exit $ruff_status -------------------------------------------------------------------------------- /format/ruff.toml: -------------------------------------------------------------------------------- 1 | # Never enforce `E501` (line length violations). 2 | lint.ignore = ["C901", "E501", "E741", "W605", "E402"] 3 | lint.select = ["C", "E", "F", "I", "W"] 4 | line-length = 119 5 | 6 | # Ignore import violations in all `__init__.py` files. 7 | [lint.per-file-ignores] 8 | "__init__.py" = ["E402", "F401", "F403", "F811"] 9 | 10 | [lint.isort] 11 | lines-after-imports = 2 12 | known-first-party = ["device_smi"] 13 | 14 | [format] 15 | # Like Black, use double quotes for strings. 16 | quote-style = "double" 17 | 18 | # Like Black, indent with spaces, rather than tabs. 19 | indent-style = "space" 20 | 21 | # Like Black, respect magic trailing commas. 22 | skip-magic-trailing-comma = false 23 | 24 | # Like Black, automatically detect the appropriate line ending. 25 | line-ending = "auto" -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ModelCloud/Device-SMI/a0c7c0c54ebc7606957ccc22e504313d7cf85664/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from setuptools import find_packages, setup 4 | 5 | __version__ = "0.4.1" 6 | 7 | setup( 8 | name="device-smi", 9 | version=__version__, 10 | author="ModelCloud", 11 | author_email="qubitium@modelcloud.ai", 12 | description="Retrieve gpu, cpu, and npu device info and properties from Linux/MacOS with zero package dependency.", 13 | long_description=(Path(__file__).parent / "README.md").read_text(encoding="UTF-8"), 14 | long_description_content_type="text/markdown", 15 | url="https://github.com/ModelCloud/Device-SMI/", 16 | packages=find_packages(), 17 | install_requires=[], 18 | platform=["linux", "windows", "darwin", "solaris", "freebsd"], 19 | classifiers=[ 20 | "Programming Language :: Python :: 3", 21 | "License :: OSI Approved :: Apache Software License", 22 | "Operating System :: OS Independent", 23 | ], 24 | python_requires=">=3", 25 | ) 26 | -------------------------------------------------------------------------------- /tests/cpu.py: -------------------------------------------------------------------------------- 1 | from device_smi import Device 2 | 3 | dev = Device("cpu") 4 | print(dev) 5 | 6 | assert dev.type == "cpu", f"type is wrong, expected: `cpu`, actual: `{dev.type}`" 7 | assert dev.model 8 | 9 | for i in ["ghz", "cpu", "(r)", "(tm)", "intel", "amd", "core", "processor", "@"]: 10 | assert i not in dev.model, f"{i} should be removed in model" 11 | 12 | assert dev.vendor in "amd, intel, apple", f"check vendor: {dev.vendor}" 13 | assert dev.memory_total > 10, f"wrong memory size: {dev.memory_total}" 14 | assert dev.features is not None 15 | 16 | memory_used = dev.memory_used() 17 | assert memory_used > 0, f"dev.memory_used()={memory_used}" 18 | 19 | utilization = dev.utilization() 20 | assert utilization >= 0.0, f"dev.utilization()={utilization}" 21 | 22 | print(f"mem used={dev.memory_used() / 1024 / 1024 / 1024:.2f} GB | utilization={dev.utilization()}%") 23 | 24 | if __name__ == '__main__': 25 | print() 26 | -------------------------------------------------------------------------------- /tests/gpu.py: -------------------------------------------------------------------------------- 1 | from device_smi import Device 2 | 3 | dev = Device("gpu") 4 | print(dev) 5 | 6 | assert dev.type == "gpu", f"type is wrong, expected: `cpu`, actual: `{dev.type}`" 7 | if dev.pcie: 8 | assert dev.pcie.gen is not None 9 | assert dev.pcie.speed is not None 10 | assert dev.pcie.id is not None 11 | if dev.gpu: 12 | assert dev.gpu.driver is not None 13 | assert dev.gpu.firmware is not None 14 | assert dev.model 15 | assert dev.memory_total > 10, f"wrong memory size: {dev.memory_total()}" 16 | -------------------------------------------------------------------------------- /tests/os.py: -------------------------------------------------------------------------------- 1 | from device_smi import Device 2 | 3 | dev = Device("os") 4 | print(dev) 5 | 6 | assert dev.type == "os", f"type is wrong, expected: `cpu`, actual: `{dev.type}`" 7 | assert dev.name 8 | assert dev.version 9 | assert dev.arch in ["x86", "x86_64", "aarch64"] 10 | --------------------------------------------------------------------------------