├── LICENSE ├── README.md ├── blackbox_exporter └── README.md ├── flask_exporter.py ├── kafka ├── 1.png ├── 2.png ├── KAFKA Dashboard CN 20240516 StarsL.cn-1715795355801.json └── readme.md ├── kubernetes ├── K8S Dashboard CN 20240513 StarsL.cn-1715536276053.json ├── README.md ├── kube-state-metrics_v1.9.8 │ ├── cluster-role-binding.yaml │ ├── cluster-role.yaml │ ├── deployment.yaml │ ├── service-account.yaml │ └── service.yaml ├── kube-state-metrics_v2.11.0 │ ├── cluster-role-binding.yaml │ ├── cluster-role.yaml │ ├── deployment.yaml │ ├── service-account.yaml │ └── service.yaml ├── kube-state-metrics_v2.12.0 │ ├── cluster-role-binding.yaml │ ├── cluster-role.yaml │ ├── deployment.yaml │ ├── service-account.yaml │ └── service.yaml ├── kube-state-metrics_v2.2.1 │ ├── cluster-role-binding.yaml │ ├── cluster-role.yaml │ ├── deployment.yaml │ ├── service-account.yaml │ └── service.yaml ├── kube-state-metrics_v2.3.0 │ ├── cluster-role-binding.yaml │ ├── cluster-role.yaml │ ├── deployment.yaml │ ├── service-account.yaml │ └── service.yaml └── screenshot │ ├── 1.png │ ├── 2.png │ ├── 3.png │ └── 4.png ├── linux_proc_monit ├── README.md ├── node-exporter │ ├── linux_proc.py │ ├── node-exporter_install.sh │ ├── prom_rule.yaml │ └── 京东云-进程监控-1727601235648.json └── pushgateway │ ├── linux_proc.py │ └── 进程监控-1655180951178.json ├── node_exporter ├── Node Exporter Dashboard 20240520 TenSunS自动同步版-1716239453095.json ├── Node Exporter Dashboard 20240520 通用JOB分组版-1716347618609.json ├── README.md ├── gf1.png ├── gf2.png ├── gf3.png ├── tss1.png ├── tss2.png ├── tss3.png └── tss4.png ├── qr.jpg ├── screenshot └── windows_exporter ├── README.md ├── Windows_Dashboard_20230720.json ├── win_alert_rules.yml └── windows_exporter.png /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### 推荐使用【TenSunS】来管理主机/MySQL/Redis与站点监控,自动同步云资源到Prometheus,更多惊喜! 2 | ### [【TenSunS介绍】](https://github.com/starsliao/TenSunS) 3 | - #### [应用场景1:如何优雅的基于Consul自动同步ECS主机监控](https://github.com/starsliao/TenSunS/blob/main/docs/ECS%E4%B8%BB%E6%9C%BA%E7%9B%91%E6%8E%A7.md) 4 | - #### [应用场景2:如何优雅的使用Consul管理Blackbox站点监控](https://github.com/starsliao/TenSunS/blob/main/docs/blackbox%E7%AB%99%E7%82%B9%E7%9B%91%E6%8E%A7.md) 5 | - #### [应用场景3:如何把云主机自动同步到JumpServer](https://github.com/starsliao/TenSunS/blob/main/docs/%E5%A6%82%E4%BD%95%E6%8A%8A%E4%B8%BB%E6%9C%BA%E8%87%AA%E5%8A%A8%E5%90%8C%E6%AD%A5%E5%88%B0JumpServer.md) 6 | - #### [应用场景4:使用1个mysqld_exporter监控所有的MySQL实例](https://github.com/starsliao/TenSunS/blob/main/docs/%E5%A6%82%E4%BD%95%E4%BC%98%E9%9B%85%E7%9A%84%E4%BD%BF%E7%94%A8%E4%B8%80%E4%B8%AAmysqld_exporter%E7%9B%91%E6%8E%A7%E6%89%80%E6%9C%89%E7%9A%84MySQL%E5%AE%9E%E4%BE%8B.md) 7 | - #### [应用场景5:使用1个redis_exporter监控所有的Redis实例](https://github.com/starsliao/TenSunS/blob/main/docs/%E4%BD%BF%E7%94%A8%E4%B8%80%E4%B8%AAredis_exporter%E7%9B%91%E6%8E%A7%E6%89%80%E6%9C%89%E7%9A%84Redis%E5%AE%9E%E4%BE%8B.md) 8 | 9 | --- 10 | ### 💖我的Grafana看板汇总[【点击进入Grafana官网查看我的全部看板】](https://grafana.com/orgs/starsliao/dashboards) 11 | ##### [1 Kubernetes for Prometheus Dashboard](https://github.com/starsliao/Prometheus/tree/master/kubernetes) 12 | ##### [2 Node Exporter for Prometheus Dashboard](https://github.com/starsliao/Prometheus/tree/master/node_exporter) 13 | ##### [3 Blackbox Manager & Blackbox Exporter Dashboard](https://github.com/starsliao/Prometheus/tree/master/blackbox_exporter) 14 | ##### [4 windows_exporter for Prometheus Dashboard](https://github.com/starsliao/Prometheus/tree/master/windows_exporter) 15 | ##### [5 基于推送方式的进程监控](https://github.com/starsliao/Prometheus/tree/master/linux_proc_monit) 16 | ##### [6 Mysqld Exporter Dashboard](https://github.com/starsliao/TenSunS/blob/main/docs/%E5%A6%82%E4%BD%95%E4%BC%98%E9%9B%85%E7%9A%84%E4%BD%BF%E7%94%A8%E4%B8%80%E4%B8%AAmysqld_exporter%E7%9B%91%E6%8E%A7%E6%89%80%E6%9C%89%E7%9A%84MySQL%E5%AE%9E%E4%BE%8B.md) 17 | ##### [7 Redis Exporter Dashboard](https://github.com/starsliao/TenSunS/blob/main/docs/%E4%BD%BF%E7%94%A8%E4%B8%80%E4%B8%AAredis_exporter%E7%9B%91%E6%8E%A7%E6%89%80%E6%9C%89%E7%9A%84Redis%E5%AE%9E%E4%BE%8B.md) 18 | 19 | #### 请进入相应目录查看说明与截图 20 | # 特别鸣谢 21 | ## 赞赏与关注公众号【**云原生DevOps**】加入运维群交流,获取更多... 22 | ![](https://starsl.cn/static/img/thanks.png) 23 | -------------------------------------------------------------------------------- /blackbox_exporter/README.md: -------------------------------------------------------------------------------- 1 | 2 | # 推荐使用【ConsulManager】来管理主机监控与站点监控 3 | ## [【ConsulManager介绍】](https://github.com/starsliao/ConsulManager) 4 | - ### [应用场景1:如何优雅的基于Consul自动同步ECS主机监控](https://github.com/starsliao/ConsulManager/blob/main/docs/ECS%E4%B8%BB%E6%9C%BA%E7%9B%91%E6%8E%A7.md) 5 | - ### [应用场景2:如何优雅的使用Consul管理Blackbox站点监控](https://github.com/starsliao/ConsulManager/blob/main/docs/blackbox%E7%AB%99%E7%82%B9%E7%9B%91%E6%8E%A7.md) 6 | 7 | --- 8 | 9 | ### Blackbox Exporter Dashboard 10 | - 支持Grafana 8,基于blackbox_exporter 0.19.0设计 11 | - 采用图表+曲线图方式展示TCP,ICMP,HTTPS的服务状态,各阶段请求延时,HTTPS证书信息等 12 | - 优化展示效果,支持监控目标的分组、分类级联展示,多服务同时对比展示。 13 | 14 | 导入ID:9965 15 | 详细URL:https://grafana.com/grafana/dashboards/9965 16 | 17 | ![](https://raw.githubusercontent.com/starsliao/ConsulManager/main/screenshot/blackbox1.PNG) 18 | ![](https://raw.githubusercontent.com/starsliao/ConsulManager/main/screenshot/blackbox2.PNG) 19 | ![](https://raw.githubusercontent.com/starsliao/ConsulManager/main/screenshot/blackbox3.PNG) 20 | ![](https://raw.githubusercontent.com/starsliao/ConsulManager/main/screenshot/blackbox4.PNG) 21 | ![](https://raw.githubusercontent.com/starsliao/ConsulManager/main/vue-consul/public/blackbox.png) 22 | -------------------------------------------------------------------------------- /flask_exporter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3.6 2 | # -*- coding:utf-8 -*- 3 | import prometheus_client 4 | from prometheus_client import Gauge,Info 5 | from prometheus_client.core import CollectorRegistry 6 | from flask import Response, Flask 7 | import psutil 8 | app = Flask(__name__) 9 | REGISTRY = CollectorRegistry(auto_describe=False) 10 | mem = Gauge('memory', 'memory info',['memtype'],registry=REGISTRY) 11 | i = Info('my_build_version', 'Description of info',registry=REGISTRY) 12 | 13 | @app.route("/metrics") 14 | def hahah(): 15 | mem.labels(memtype='total').set(psutil.virtual_memory().total) 16 | mem.labels(memtype='available').set(psutil.virtual_memory().available) 17 | mem.labels(memtype='used').set(psutil.virtual_memory().used) 18 | i.info({'version': '1.2.3', 'buildhost': 'foo@bar'}) 19 | return Response(prometheus_client.generate_latest(REGISTRY),mimetype="text/plain") 20 | 21 | if __name__ == "__main__": 22 | app.run(host="0.0.0.0",port=5000) 23 | -------------------------------------------------------------------------------- /kafka/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starsliao/Prometheus/1936cda22403a97af95048435591df84c97c7f08/kafka/1.png -------------------------------------------------------------------------------- /kafka/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starsliao/Prometheus/1936cda22403a97af95048435591df84c97c7f08/kafka/2.png -------------------------------------------------------------------------------- /kafka/readme.md: -------------------------------------------------------------------------------- 1 | ### KAFKA监控一条龙:史上最强Kafka看板+监控配置与告警规则 2 | ### [https://mp.weixin.qq.com/s/ZG3DjRoJ3IDeln-_KbvTDQ](https://mp.weixin.qq.com/s/ZG3DjRoJ3IDeln-_KbvTDQ) 3 | 4 | --- 5 | 6 | ### KAFKA Grafana Dashboard 7 | ##### 【中文版本】2024.05.20更新,基于Prometheus的kafka_exporter,KAFKA资源展示、问题排查、快速积压分析! 8 | - 看板的所有Panel支持最新样式,优化展示性能,已兼容Grafana10.X版本. 9 | - 包括KAFKA整体的资源状态, 10 | - 生产者与消费者关系 11 | - 消息积压的明细信息 12 | - 生产与消费的速率 13 | - 异常的消费与Topic展示 14 | - 分区级别的积压与消费明细 15 | 16 | ### 赞赏与关注公众号【**云原生DevOps**】加入交流群(请备注:K8S),获取更多... 17 | 18 | **如果看不到图片请点击该链接:[https://starsl.cn/static/img/thanks.png](https://starsl.cn/static/img/thanks.png)** 19 | ![](https://starsl.cn/static/img/thanks.png) 20 | 21 | ### 截图 22 | - 全局信息、消费者与Topic、异常与积压分析 23 | ![](https://grafana.com/api/dashboards/21078/images/16236/image) 24 | - 分区维度明细 25 | ![](https://grafana.com/api/dashboards/21078/images/16237/image) 26 | 27 | ### 看板下载 28 | - Grafana看板ID:21078 29 | - Grafana看板地址:[https://grafana.com/grafana/dashboards/21078](https://grafana.com/grafana/dashboards/21078) 30 | - 项目仓库:[https://github.com/starsliao/Prometheus/tree/master/kafka](https://github.com/starsliao/Prometheus/tree/master/kafka) 31 | -------------------------------------------------------------------------------- /kubernetes/README.md: -------------------------------------------------------------------------------- 1 | ### Kubernetes Grafana看板更新啦!kube-state-metrics部署与JOB配置说明 2 | ### [https://mp.weixin.qq.com/s/R88DraaaS3bpm3PurzpP9g](https://mp.weixin.qq.com/s/R88DraaaS3bpm3PurzpP9g) 3 | 4 | --- 5 | 6 | #### kubernetes资源全面展示!包含K8S整体资源总览、微服务资源明细、Pod资源明细及K8S网络带宽,优化重要指标展示。 7 | ### 更新说明 8 | ##### v20240513 9 | 1. 更新了看板的所有Panel支持最新样式,优化展示性能,已兼容Grafana10.X版本. 10 | 2. 增加了K8S总体的状态条展示节点与微服务资源的统计. 11 | 3. 增加了PVC的使用情况,各命名空间的CPU,内存使用曲线图. 12 | 4. 优化了Pod与微服务资源明细表格的展示字段与视觉效果. 13 | 5. 优化了Pod与微服务CPU与内存使用量的曲线图中可以直接展示出该资源的Limit值红线. 14 | 6. 修复了Pod重启导致短时间内展示的Pod资源数据不准确的BUG. 15 | 7. 调整了多个图表,曲线图的展示效果与描述,优化部分指标数据更加精准。 16 | 8. 增加了各个版本的kube-state-metrics国内镜像.参考[【这里】](https://github.com/starsliao/Prometheus/tree/master/kubernetes) 17 | 9. 增加了Prometheus on K8S的JOB配置说明. 18 | 19 | --- 20 | 21 | ### 截图 22 | #### 整体资源总览 23 | ![](https://grafana.com/api/dashboards/13105/images/16207/image) 24 | ![](https://grafana.com/api/dashboards/13105/images/16208/image) 25 | #### Pod资源明细 26 | ![](https://grafana.com/api/dashboards/13105/images/16209/image) 27 | #### 微服务资源明细 28 | ![](https://grafana.com/api/dashboards/13105/images/16210/image) 29 | 30 | ### 赞赏与关注公众号【**云原生DevOps**】加入交流群(请备注:K8S),获取更多... 31 | 32 | **如果看不到图片请点击该链接:[https://starsl.cn/static/img/thanks.png](https://starsl.cn/static/img/thanks.png)** 33 | ![](https://starsl.cn/static/img/thanks.png) 34 | 35 | --- 36 | 37 | ### 看板下载 38 | - Grafana看板ID:13105 39 | - Grafana看板地址:[https://grafana.com/grafana/dashboards/13105](https://grafana.com/grafana/dashboards/13105) 40 | - 项目仓库:[https://github.com/starsliao/Prometheus/tree/master/kubernetes](https://github.com/starsliao/Prometheus/tree/master/kubernetes) 41 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v1.9.8/cluster-role-binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v1.9.8 7 | name: kube-state-metrics 8 | roleRef: 9 | apiGroup: rbac.authorization.k8s.io 10 | kind: ClusterRole 11 | name: kube-state-metrics 12 | subjects: 13 | - kind: ServiceAccount 14 | name: kube-state-metrics 15 | namespace: ops-monit 16 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v1.9.8/cluster-role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v1.9.8 7 | name: kube-state-metrics 8 | rules: 9 | - apiGroups: 10 | - "" 11 | resources: 12 | - configmaps 13 | - secrets 14 | - nodes 15 | - pods 16 | - services 17 | - resourcequotas 18 | - replicationcontrollers 19 | - limitranges 20 | - persistentvolumeclaims 21 | - persistentvolumes 22 | - namespaces 23 | - endpoints 24 | verbs: 25 | - list 26 | - watch 27 | - apiGroups: 28 | - extensions 29 | resources: 30 | - daemonsets 31 | - deployments 32 | - replicasets 33 | - ingresses 34 | verbs: 35 | - list 36 | - watch 37 | - apiGroups: 38 | - apps 39 | resources: 40 | - statefulsets 41 | - daemonsets 42 | - deployments 43 | - replicasets 44 | verbs: 45 | - list 46 | - watch 47 | - apiGroups: 48 | - batch 49 | resources: 50 | - cronjobs 51 | - jobs 52 | verbs: 53 | - list 54 | - watch 55 | - apiGroups: 56 | - autoscaling 57 | resources: 58 | - horizontalpodautoscalers 59 | verbs: 60 | - list 61 | - watch 62 | - apiGroups: 63 | - authentication.k8s.io 64 | resources: 65 | - tokenreviews 66 | verbs: 67 | - create 68 | - apiGroups: 69 | - authorization.k8s.io 70 | resources: 71 | - subjectaccessreviews 72 | verbs: 73 | - create 74 | - apiGroups: 75 | - policy 76 | resources: 77 | - poddisruptionbudgets 78 | verbs: 79 | - list 80 | - watch 81 | - apiGroups: 82 | - certificates.k8s.io 83 | resources: 84 | - certificatesigningrequests 85 | verbs: 86 | - list 87 | - watch 88 | - apiGroups: 89 | - storage.k8s.io 90 | resources: 91 | - storageclasses 92 | - volumeattachments 93 | verbs: 94 | - list 95 | - watch 96 | - apiGroups: 97 | - admissionregistration.k8s.io 98 | resources: 99 | - mutatingwebhookconfigurations 100 | - validatingwebhookconfigurations 101 | verbs: 102 | - list 103 | - watch 104 | - apiGroups: 105 | - networking.k8s.io 106 | resources: 107 | - networkpolicies 108 | verbs: 109 | - list 110 | - watch 111 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v1.9.8/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v1.9.8 7 | name: kube-state-metrics 8 | namespace: ops-monit 9 | spec: 10 | replicas: 1 11 | selector: 12 | matchLabels: 13 | app.kubernetes.io/name: kube-state-metrics 14 | template: 15 | metadata: 16 | labels: 17 | app.kubernetes.io/name: kube-state-metrics 18 | app.kubernetes.io/version: v1.9.8 19 | spec: 20 | containers: 21 | - image: registry.cn-shenzhen.aliyuncs.com/starsl/kube-state-metrics:v1.9.8 22 | livenessProbe: 23 | httpGet: 24 | path: /healthz 25 | port: 8080 26 | initialDelaySeconds: 5 27 | timeoutSeconds: 5 28 | name: kube-state-metrics 29 | ports: 30 | - containerPort: 8080 31 | name: http-metrics 32 | - containerPort: 8081 33 | name: telemetry 34 | readinessProbe: 35 | httpGet: 36 | path: / 37 | port: 8081 38 | initialDelaySeconds: 5 39 | timeoutSeconds: 5 40 | nodeSelector: 41 | beta.kubernetes.io/os: linux 42 | serviceAccountName: kube-state-metrics 43 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v1.9.8/service-account.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v1.9.8 7 | name: kube-state-metrics 8 | namespace: ops-monit 9 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v1.9.8/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | # annotations: 5 | # prometheus.io/scrape: 'true' 6 | labels: 7 | app.kubernetes.io/name: kube-state-metrics 8 | app.kubernetes.io/version: v1.9.8 9 | name: kube-state-metrics 10 | namespace: ops-monit 11 | spec: 12 | clusterIP: None 13 | ports: 14 | - name: http-metrics 15 | port: 8080 16 | targetPort: http-metrics 17 | - name: telemetry 18 | port: 8081 19 | targetPort: telemetry 20 | selector: 21 | app.kubernetes.io/name: kube-state-metrics 22 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.11.0/cluster-role-binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v2.11.0 7 | name: kube-state-metrics 8 | roleRef: 9 | apiGroup: rbac.authorization.k8s.io 10 | kind: ClusterRole 11 | name: kube-state-metrics 12 | subjects: 13 | - kind: ServiceAccount 14 | name: kube-state-metrics 15 | namespace: ops-monit 16 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.11.0/cluster-role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v2.11.0 7 | name: kube-state-metrics 8 | rules: 9 | - apiGroups: 10 | - "" 11 | resources: 12 | - configmaps 13 | - secrets 14 | - nodes 15 | - pods 16 | - services 17 | - resourcequotas 18 | - replicationcontrollers 19 | - limitranges 20 | - persistentvolumeclaims 21 | - persistentvolumes 22 | - namespaces 23 | - endpoints 24 | verbs: 25 | - list 26 | - watch 27 | - apiGroups: 28 | - extensions 29 | resources: 30 | - daemonsets 31 | - deployments 32 | - replicasets 33 | - ingresses 34 | verbs: 35 | - list 36 | - watch 37 | - apiGroups: 38 | - apps 39 | resources: 40 | - statefulsets 41 | - daemonsets 42 | - deployments 43 | - replicasets 44 | verbs: 45 | - list 46 | - watch 47 | - apiGroups: 48 | - batch 49 | resources: 50 | - cronjobs 51 | - jobs 52 | verbs: 53 | - list 54 | - watch 55 | - apiGroups: 56 | - autoscaling 57 | resources: 58 | - horizontalpodautoscalers 59 | verbs: 60 | - list 61 | - watch 62 | - apiGroups: 63 | - authentication.k8s.io 64 | resources: 65 | - tokenreviews 66 | verbs: 67 | - create 68 | - apiGroups: 69 | - authorization.k8s.io 70 | resources: 71 | - subjectaccessreviews 72 | verbs: 73 | - create 74 | - apiGroups: 75 | - policy 76 | resources: 77 | - poddisruptionbudgets 78 | verbs: 79 | - list 80 | - watch 81 | - apiGroups: 82 | - certificates.k8s.io 83 | resources: 84 | - certificatesigningrequests 85 | verbs: 86 | - list 87 | - watch 88 | - apiGroups: 89 | - storage.k8s.io 90 | resources: 91 | - storageclasses 92 | - volumeattachments 93 | verbs: 94 | - list 95 | - watch 96 | - apiGroups: 97 | - admissionregistration.k8s.io 98 | resources: 99 | - mutatingwebhookconfigurations 100 | - validatingwebhookconfigurations 101 | verbs: 102 | - list 103 | - watch 104 | - apiGroups: 105 | - networking.k8s.io 106 | resources: 107 | - networkpolicies 108 | - ingresses 109 | verbs: 110 | - list 111 | - watch 112 | - apiGroups: 113 | - coordination.k8s.io 114 | resources: 115 | - leases 116 | verbs: 117 | - list 118 | - watch 119 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.11.0/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v2.11.0 7 | name: kube-state-metrics 8 | namespace: ops-monit 9 | spec: 10 | replicas: 1 11 | selector: 12 | matchLabels: 13 | app.kubernetes.io/name: kube-state-metrics 14 | template: 15 | metadata: 16 | labels: 17 | app.kubernetes.io/name: kube-state-metrics 18 | app.kubernetes.io/version: v2.11.0 19 | spec: 20 | containers: 21 | - image: registry.cn-shenzhen.aliyuncs.com/starsl/kube-state-metrics:v2.11.0 22 | livenessProbe: 23 | httpGet: 24 | path: /healthz 25 | port: 8080 26 | initialDelaySeconds: 5 27 | timeoutSeconds: 5 28 | name: kube-state-metrics 29 | ports: 30 | - containerPort: 8080 31 | name: http-metrics 32 | - containerPort: 8081 33 | name: telemetry 34 | readinessProbe: 35 | httpGet: 36 | path: / 37 | port: 8081 38 | initialDelaySeconds: 5 39 | timeoutSeconds: 5 40 | nodeSelector: 41 | kubernetes.io/os: linux 42 | serviceAccountName: kube-state-metrics 43 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.11.0/service-account.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v2.11.0 7 | name: kube-state-metrics 8 | namespace: ops-monit 9 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.11.0/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | # annotations: 5 | # prometheus.io/scrape: 'true' 6 | labels: 7 | app.kubernetes.io/name: kube-state-metrics 8 | app.kubernetes.io/version: v2.11.0 9 | name: kube-state-metrics 10 | namespace: ops-monit 11 | spec: 12 | clusterIP: None 13 | ports: 14 | - name: http-metrics 15 | port: 8080 16 | targetPort: http-metrics 17 | - name: telemetry 18 | port: 8081 19 | targetPort: telemetry 20 | selector: 21 | app.kubernetes.io/name: kube-state-metrics 22 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.12.0/cluster-role-binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v2.12.0 7 | name: kube-state-metrics 8 | roleRef: 9 | apiGroup: rbac.authorization.k8s.io 10 | kind: ClusterRole 11 | name: kube-state-metrics 12 | subjects: 13 | - kind: ServiceAccount 14 | name: kube-state-metrics 15 | namespace: ops-monit 16 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.12.0/cluster-role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v2.12.0 7 | name: kube-state-metrics 8 | rules: 9 | - apiGroups: 10 | - "" 11 | resources: 12 | - configmaps 13 | - secrets 14 | - nodes 15 | - pods 16 | - services 17 | - resourcequotas 18 | - replicationcontrollers 19 | - limitranges 20 | - persistentvolumeclaims 21 | - persistentvolumes 22 | - namespaces 23 | - endpoints 24 | verbs: 25 | - list 26 | - watch 27 | - apiGroups: 28 | - extensions 29 | resources: 30 | - daemonsets 31 | - deployments 32 | - replicasets 33 | - ingresses 34 | verbs: 35 | - list 36 | - watch 37 | - apiGroups: 38 | - apps 39 | resources: 40 | - statefulsets 41 | - daemonsets 42 | - deployments 43 | - replicasets 44 | verbs: 45 | - list 46 | - watch 47 | - apiGroups: 48 | - batch 49 | resources: 50 | - cronjobs 51 | - jobs 52 | verbs: 53 | - list 54 | - watch 55 | - apiGroups: 56 | - autoscaling 57 | resources: 58 | - horizontalpodautoscalers 59 | verbs: 60 | - list 61 | - watch 62 | - apiGroups: 63 | - authentication.k8s.io 64 | resources: 65 | - tokenreviews 66 | verbs: 67 | - create 68 | - apiGroups: 69 | - authorization.k8s.io 70 | resources: 71 | - subjectaccessreviews 72 | verbs: 73 | - create 74 | - apiGroups: 75 | - policy 76 | resources: 77 | - poddisruptionbudgets 78 | verbs: 79 | - list 80 | - watch 81 | - apiGroups: 82 | - certificates.k8s.io 83 | resources: 84 | - certificatesigningrequests 85 | verbs: 86 | - list 87 | - watch 88 | - apiGroups: 89 | - storage.k8s.io 90 | resources: 91 | - storageclasses 92 | - volumeattachments 93 | verbs: 94 | - list 95 | - watch 96 | - apiGroups: 97 | - admissionregistration.k8s.io 98 | resources: 99 | - mutatingwebhookconfigurations 100 | - validatingwebhookconfigurations 101 | verbs: 102 | - list 103 | - watch 104 | - apiGroups: 105 | - networking.k8s.io 106 | resources: 107 | - networkpolicies 108 | - ingresses 109 | verbs: 110 | - list 111 | - watch 112 | - apiGroups: 113 | - coordination.k8s.io 114 | resources: 115 | - leases 116 | verbs: 117 | - list 118 | - watch 119 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.12.0/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v2.12.0 7 | name: kube-state-metrics 8 | namespace: ops-monit 9 | spec: 10 | replicas: 1 11 | selector: 12 | matchLabels: 13 | app.kubernetes.io/name: kube-state-metrics 14 | template: 15 | metadata: 16 | labels: 17 | app.kubernetes.io/name: kube-state-metrics 18 | app.kubernetes.io/version: v2.12.0 19 | spec: 20 | containers: 21 | - image: registry.cn-shenzhen.aliyuncs.com/starsl/kube-state-metrics:v2.12.0 22 | livenessProbe: 23 | httpGet: 24 | path: /healthz 25 | port: 8080 26 | initialDelaySeconds: 5 27 | timeoutSeconds: 5 28 | name: kube-state-metrics 29 | ports: 30 | - containerPort: 8080 31 | name: http-metrics 32 | - containerPort: 8081 33 | name: telemetry 34 | readinessProbe: 35 | httpGet: 36 | path: / 37 | port: 8081 38 | initialDelaySeconds: 5 39 | timeoutSeconds: 5 40 | nodeSelector: 41 | kubernetes.io/os: linux 42 | serviceAccountName: kube-state-metrics 43 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.12.0/service-account.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v2.12.0 7 | name: kube-state-metrics 8 | namespace: ops-monit 9 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.12.0/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | # annotations: 5 | # prometheus.io/scrape: 'true' 6 | labels: 7 | app.kubernetes.io/name: kube-state-metrics 8 | app.kubernetes.io/version: v2.12.0 9 | name: kube-state-metrics 10 | namespace: ops-monit 11 | spec: 12 | clusterIP: None 13 | ports: 14 | - name: http-metrics 15 | port: 8080 16 | targetPort: http-metrics 17 | - name: telemetry 18 | port: 8081 19 | targetPort: telemetry 20 | selector: 21 | app.kubernetes.io/name: kube-state-metrics 22 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.2.1/cluster-role-binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v2.2.1 7 | name: kube-state-metrics 8 | roleRef: 9 | apiGroup: rbac.authorization.k8s.io 10 | kind: ClusterRole 11 | name: kube-state-metrics 12 | subjects: 13 | - kind: ServiceAccount 14 | name: kube-state-metrics 15 | namespace: ops-monit 16 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.2.1/cluster-role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v2.2.1 7 | name: kube-state-metrics 8 | rules: 9 | - apiGroups: 10 | - "" 11 | resources: 12 | - configmaps 13 | - secrets 14 | - nodes 15 | - pods 16 | - services 17 | - resourcequotas 18 | - replicationcontrollers 19 | - limitranges 20 | - persistentvolumeclaims 21 | - persistentvolumes 22 | - namespaces 23 | - endpoints 24 | verbs: 25 | - list 26 | - watch 27 | - apiGroups: 28 | - extensions 29 | resources: 30 | - daemonsets 31 | - deployments 32 | - replicasets 33 | - ingresses 34 | verbs: 35 | - list 36 | - watch 37 | - apiGroups: 38 | - apps 39 | resources: 40 | - statefulsets 41 | - daemonsets 42 | - deployments 43 | - replicasets 44 | verbs: 45 | - list 46 | - watch 47 | - apiGroups: 48 | - batch 49 | resources: 50 | - cronjobs 51 | - jobs 52 | verbs: 53 | - list 54 | - watch 55 | - apiGroups: 56 | - autoscaling 57 | resources: 58 | - horizontalpodautoscalers 59 | verbs: 60 | - list 61 | - watch 62 | - apiGroups: 63 | - authentication.k8s.io 64 | resources: 65 | - tokenreviews 66 | verbs: 67 | - create 68 | - apiGroups: 69 | - authorization.k8s.io 70 | resources: 71 | - subjectaccessreviews 72 | verbs: 73 | - create 74 | - apiGroups: 75 | - policy 76 | resources: 77 | - poddisruptionbudgets 78 | verbs: 79 | - list 80 | - watch 81 | - apiGroups: 82 | - certificates.k8s.io 83 | resources: 84 | - certificatesigningrequests 85 | verbs: 86 | - list 87 | - watch 88 | - apiGroups: 89 | - storage.k8s.io 90 | resources: 91 | - storageclasses 92 | - volumeattachments 93 | verbs: 94 | - list 95 | - watch 96 | - apiGroups: 97 | - admissionregistration.k8s.io 98 | resources: 99 | - mutatingwebhookconfigurations 100 | - validatingwebhookconfigurations 101 | verbs: 102 | - list 103 | - watch 104 | - apiGroups: 105 | - networking.k8s.io 106 | resources: 107 | - networkpolicies 108 | verbs: 109 | - list 110 | - watch 111 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.2.1/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v2.2.1 7 | name: kube-state-metrics 8 | namespace: ops-monit 9 | spec: 10 | replicas: 1 11 | selector: 12 | matchLabels: 13 | app.kubernetes.io/name: kube-state-metrics 14 | template: 15 | metadata: 16 | labels: 17 | app.kubernetes.io/name: kube-state-metrics 18 | app.kubernetes.io/version: v2.2.1 19 | spec: 20 | containers: 21 | - image: registry.cn-shenzhen.aliyuncs.com/starsl/kube-state-metrics:v2.2.1 22 | livenessProbe: 23 | httpGet: 24 | path: /healthz 25 | port: 8080 26 | initialDelaySeconds: 5 27 | timeoutSeconds: 5 28 | name: kube-state-metrics 29 | ports: 30 | - containerPort: 8080 31 | name: http-metrics 32 | - containerPort: 8081 33 | name: telemetry 34 | readinessProbe: 35 | httpGet: 36 | path: / 37 | port: 8081 38 | initialDelaySeconds: 5 39 | timeoutSeconds: 5 40 | nodeSelector: 41 | beta.kubernetes.io/os: linux 42 | serviceAccountName: kube-state-metrics 43 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.2.1/service-account.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v2.2.1 7 | name: kube-state-metrics 8 | namespace: ops-monit 9 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.2.1/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | # annotations: 5 | # prometheus.io/scrape: 'true' 6 | labels: 7 | app.kubernetes.io/name: kube-state-metrics 8 | app.kubernetes.io/version: v2.2.1 9 | name: kube-state-metrics 10 | namespace: ops-monit 11 | spec: 12 | clusterIP: None 13 | ports: 14 | - name: http-metrics 15 | port: 8080 16 | targetPort: http-metrics 17 | - name: telemetry 18 | port: 8081 19 | targetPort: telemetry 20 | selector: 21 | app.kubernetes.io/name: kube-state-metrics 22 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.3.0/cluster-role-binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v2.3.0 7 | name: kube-state-metrics 8 | roleRef: 9 | apiGroup: rbac.authorization.k8s.io 10 | kind: ClusterRole 11 | name: kube-state-metrics 12 | subjects: 13 | - kind: ServiceAccount 14 | name: kube-state-metrics 15 | namespace: ops-monit 16 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.3.0/cluster-role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v2.3.0 7 | name: kube-state-metrics 8 | rules: 9 | - apiGroups: 10 | - "" 11 | resources: 12 | - configmaps 13 | - secrets 14 | - nodes 15 | - pods 16 | - services 17 | - resourcequotas 18 | - replicationcontrollers 19 | - limitranges 20 | - persistentvolumeclaims 21 | - persistentvolumes 22 | - namespaces 23 | - endpoints 24 | verbs: 25 | - list 26 | - watch 27 | - apiGroups: 28 | - extensions 29 | resources: 30 | - daemonsets 31 | - deployments 32 | - replicasets 33 | - ingresses 34 | verbs: 35 | - list 36 | - watch 37 | - apiGroups: 38 | - apps 39 | resources: 40 | - statefulsets 41 | - daemonsets 42 | - deployments 43 | - replicasets 44 | verbs: 45 | - list 46 | - watch 47 | - apiGroups: 48 | - batch 49 | resources: 50 | - cronjobs 51 | - jobs 52 | verbs: 53 | - list 54 | - watch 55 | - apiGroups: 56 | - autoscaling 57 | resources: 58 | - horizontalpodautoscalers 59 | verbs: 60 | - list 61 | - watch 62 | - apiGroups: 63 | - authentication.k8s.io 64 | resources: 65 | - tokenreviews 66 | verbs: 67 | - create 68 | - apiGroups: 69 | - authorization.k8s.io 70 | resources: 71 | - subjectaccessreviews 72 | verbs: 73 | - create 74 | - apiGroups: 75 | - policy 76 | resources: 77 | - poddisruptionbudgets 78 | verbs: 79 | - list 80 | - watch 81 | - apiGroups: 82 | - certificates.k8s.io 83 | resources: 84 | - certificatesigningrequests 85 | verbs: 86 | - list 87 | - watch 88 | - apiGroups: 89 | - storage.k8s.io 90 | resources: 91 | - storageclasses 92 | - volumeattachments 93 | verbs: 94 | - list 95 | - watch 96 | - apiGroups: 97 | - admissionregistration.k8s.io 98 | resources: 99 | - mutatingwebhookconfigurations 100 | - validatingwebhookconfigurations 101 | verbs: 102 | - list 103 | - watch 104 | - apiGroups: 105 | - networking.k8s.io 106 | resources: 107 | - networkpolicies 108 | - ingresses 109 | verbs: 110 | - list 111 | - watch 112 | - apiGroups: 113 | - coordination.k8s.io 114 | resources: 115 | - leases 116 | verbs: 117 | - list 118 | - watch 119 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.3.0/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v2.3.0 7 | name: kube-state-metrics 8 | namespace: ops-monit 9 | spec: 10 | replicas: 1 11 | selector: 12 | matchLabels: 13 | app.kubernetes.io/name: kube-state-metrics 14 | template: 15 | metadata: 16 | labels: 17 | app.kubernetes.io/name: kube-state-metrics 18 | app.kubernetes.io/version: v2.3.0 19 | spec: 20 | containers: 21 | - image: registry.cn-shenzhen.aliyuncs.com/starsl/kube-state-metrics:v2.3.0 22 | livenessProbe: 23 | httpGet: 24 | path: /healthz 25 | port: 8080 26 | initialDelaySeconds: 5 27 | timeoutSeconds: 5 28 | name: kube-state-metrics 29 | ports: 30 | - containerPort: 8080 31 | name: http-metrics 32 | - containerPort: 8081 33 | name: telemetry 34 | readinessProbe: 35 | httpGet: 36 | path: / 37 | port: 8081 38 | initialDelaySeconds: 5 39 | timeoutSeconds: 5 40 | nodeSelector: 41 | beta.kubernetes.io/os: linux 42 | serviceAccountName: kube-state-metrics 43 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.3.0/service-account.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: kube-state-metrics 6 | app.kubernetes.io/version: v2.3.0 7 | name: kube-state-metrics 8 | namespace: ops-monit 9 | -------------------------------------------------------------------------------- /kubernetes/kube-state-metrics_v2.3.0/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | # annotations: 5 | # prometheus.io/scrape: 'true' 6 | labels: 7 | app.kubernetes.io/name: kube-state-metrics 8 | app.kubernetes.io/version: v2.3.0 9 | name: kube-state-metrics 10 | namespace: ops-monit 11 | spec: 12 | clusterIP: None 13 | ports: 14 | - name: http-metrics 15 | port: 8080 16 | targetPort: http-metrics 17 | - name: telemetry 18 | port: 8081 19 | targetPort: telemetry 20 | selector: 21 | app.kubernetes.io/name: kube-state-metrics 22 | -------------------------------------------------------------------------------- /kubernetes/screenshot/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starsliao/Prometheus/1936cda22403a97af95048435591df84c97c7f08/kubernetes/screenshot/1.png -------------------------------------------------------------------------------- /kubernetes/screenshot/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starsliao/Prometheus/1936cda22403a97af95048435591df84c97c7f08/kubernetes/screenshot/2.png -------------------------------------------------------------------------------- /kubernetes/screenshot/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starsliao/Prometheus/1936cda22403a97af95048435591df84c97c7f08/kubernetes/screenshot/3.png -------------------------------------------------------------------------------- /kubernetes/screenshot/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starsliao/Prometheus/1936cda22403a97af95048435591df84c97c7f08/kubernetes/screenshot/4.png -------------------------------------------------------------------------------- /linux_proc_monit/README.md: -------------------------------------------------------------------------------- 1 | ### 基于推送方式的进程监控脚本 2 | 说明:监控进程所在主机无需启动后台服务方式监控,而是使用定时任务来推送监控指标到pushgateway。 3 | ##### 【进程监控-1655180951178.json】 为grafana看板,导入即可。 4 | ### grafana 看板 5 | ![图片](https://user-images.githubusercontent.com/3349611/224199577-fd131800-c998-434d-8fbe-3ece8c24e5c0.png) 6 | 7 | 1. 部署Pushgateway并增加到Prometheus 8 | ``` 9 | mkdir /opt/pushgateway/ 10 | wget https://github.com/prometheus/pushgateway/releases/download/v1.4.3/pushgateway-1.4.3.linux-amd64.tar.gz -O /opt/pushgateway/pushgateway-1.4.3.linux-amd64.tar.gz 11 | cd /opt/pushgateway/ 12 | tar -zxvf pushgateway-1.4.3.linux-amd64.tar.gz 13 | 14 | cat > /etc/systemd/system/pushgateway.service < 86400: 44 | # try: 45 | # res = urllib.request.urlopen('http://100.100.100.200/latest/meta-data/instance-id',timeout=1) 46 | # iid = res.read().decode('utf-8') 47 | # except: 48 | # iid = f"{socket.gethostname()}_{[(s.connect(('114.114.114.114', 53)), s.getsockname()[0], s.close()) for s in [socket.socket(socket.AF_INET, socket.SOCK_DGRAM)]][0][1]}" 49 | # cfg['instance'] = iid 50 | # with open(yaml_path, 'w') as fw: 51 | # yaml.dump(cfg, fw) 52 | # print('update:' + yaml_path) 53 | #print(cfg) 54 | 55 | REGISTRY = CollectorRegistry(auto_describe=False) 56 | linux_proc_error = Gauge(f'linux_proc_error', f"LINUX_进程异常指标", ["instance", "A00_iid", "iexe", "iparam", "icwd"],registry=REGISTRY) 57 | linux_proc_info_list = ["instance", "A00_iid", "iexe", "iparam", "icwd", "pid", "name", "status", "is_running", "exe", "cmdline", "parent", "username", "port"] 58 | linux_proc_info = Gauge("linux_proc_info", "LINUX_进程信息指标", linux_proc_info_list,registry=REGISTRY) 59 | metric_list = ["io_read_count","io_write_count","io_read_bytes","io_write_bytes","cpu_user","cpu_system","cpu_children_user","cpu_children_system","cpu_iowait","memory_rss","memory_vms","memory_shared","memory_swap","memory_text","memory_data","num_open_files","num_fds_limit","num_fds","cpu_num","num_threads","num_children","cpu_percent","memory_percent","durn"] 60 | 61 | metric_dict = {} 62 | for li in metric_list: 63 | metric_dict[li] = {} 64 | 65 | instance = cfg['instance'] 66 | A00_iid = cfg['instance'] 67 | inum = 0 68 | cpu_count = psutil.cpu_count() 69 | for app in cfg['apps']: 70 | iexe = app['iexe'] 71 | iparam = app['iparam'] 72 | icwd = app['icwd'] 73 | proc_app = [i for i in psutil.process_iter() if icwd == i.cwd() and iparam in i.cmdline() and iexe in i.cmdline()] 74 | if len(proc_app) >= 1: 75 | inum = inum + 1 76 | if len(proc_app) > 1: 77 | pids = [i for i in proc_app if i.ppid() == 1] 78 | if len(pids) >= 1: 79 | appinfo = pids[0] 80 | else: 81 | app_pid = Counter([i.ppid() for i in proc_app]).most_common(1)[0][0] 82 | appinfo = psutil.Process(app_pid) 83 | print(iexe,iparam,'ppid:',app_pid) 84 | if appinfo in proc_app: 85 | pass 86 | else: 87 | # 进程有多个,父进程不在列表中,取列表中的第一个监控 88 | appinfo = proc_app[0] 89 | #linux_proc_error.labels(instance, A00_iid, iexe, iparam, icwd).set(len(proc_app)) 90 | #continue 91 | else: 92 | appinfo = proc_app[0] 93 | pid = appinfo.pid 94 | name = appinfo.name() 95 | status = appinfo.status() 96 | is_running = appinfo.is_running() 97 | exe = appinfo.exe() 98 | cmdline = ' '.join(appinfo.cmdline()) 99 | parent = f'{appinfo.parent().pid}/{appinfo.parent().name()}' 100 | durn = datetime.datetime.now().timestamp() - appinfo.create_time() 101 | username = appinfo.username() 102 | connections = appinfo.connections('all') 103 | port = '/'.join(sorted([f'{x.laddr.port}' for x in connections if x.status == 'LISTEN'],key=int)) 104 | linux_proc_info.labels(instance, A00_iid, iexe, iparam, icwd, pid, name, status, is_running, exe, cmdline, parent, username, port).set(1) 105 | 106 | io_counters = appinfo.io_counters() 107 | metric_dict["io_read_count"][pid] = io_counters.read_count 108 | metric_dict["io_write_count"][pid] = io_counters.write_count 109 | metric_dict["io_read_bytes"][pid] = io_counters.read_bytes 110 | metric_dict["io_write_bytes"][pid] = io_counters.write_bytes 111 | 112 | cpu_times = appinfo.cpu_times() 113 | metric_dict["cpu_user"][pid] = cpu_times.user 114 | metric_dict["cpu_system"][pid] = cpu_times.system 115 | metric_dict["cpu_children_user"][pid] = cpu_times.children_user 116 | metric_dict["cpu_children_system"][pid] = cpu_times.children_system 117 | metric_dict["cpu_iowait"][pid] = cpu_times.iowait 118 | 119 | memory_info = appinfo.memory_full_info() 120 | metric_dict["memory_rss"][pid] = memory_info.rss 121 | metric_dict["memory_vms"][pid] = memory_info.vms 122 | metric_dict["memory_shared"][pid] = memory_info.shared 123 | metric_dict["memory_swap"][pid] = memory_info.swap 124 | metric_dict["memory_text"][pid] = memory_info.text 125 | metric_dict["memory_data"][pid] = memory_info.data 126 | 127 | metric_dict["num_open_files"][pid] = len(appinfo.open_files()) 128 | metric_dict["num_fds_limit"][pid] = appinfo.rlimit(psutil.RLIMIT_NOFILE)[0] 129 | metric_dict["num_fds"][pid] = appinfo.num_fds() 130 | metric_dict["cpu_num"][pid] = appinfo.cpu_num() 131 | metric_dict["num_threads"][pid] = appinfo.num_threads() 132 | metric_dict["num_children"][pid] = len(appinfo.children()) 133 | metric_dict["cpu_percent"][pid] = appinfo.cpu_percent(interval=1) 134 | #metric_dict["cpu_total_percent"][pid] = round(metric_dict["cpu_percent"][pid] / (cpu_count * 100),2) * 100 135 | metric_dict["memory_percent"][pid] = appinfo.memory_percent() 136 | metric_dict["durn"][pid] = datetime.datetime.now().timestamp() - appinfo.create_time() 137 | 138 | connections_sum = Counter([con.status for con in connections]) 139 | for k,v in connections_sum.items(): 140 | if f'conn_{k.lower()}' not in metric_dict: 141 | metric_dict[f'conn_{k.lower()}'] = {pid:v} 142 | else: 143 | metric_dict[f'conn_{k.lower()}'][pid] = v 144 | else: 145 | linux_proc_error.labels(instance, A00_iid, iexe, iparam, icwd).set(len(proc_app)) 146 | # print(inum, metric_dict) 147 | if inum != 0: 148 | for mk,mv in metric_dict.items(): 149 | linux_proc_metric = Gauge(f'linux_proc_{mk}', f"LINUX_进程指标:{mk}", ["instance", "A00_iid", "pid"],registry=REGISTRY) 150 | for ik,iv in mv.items(): 151 | linux_proc_metric.labels(instance, A00_iid, ik).set(iv) 152 | 153 | result = generate_latest(REGISTRY).decode() 154 | print(result) 155 | -------------------------------------------------------------------------------- /linux_proc_monit/node-exporter/node-exporter_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $(id -u) != "0" ]; then 4 | echo "请使用root用户执行。" 5 | exit 1 6 | fi 7 | ##部署node-exporter 8 | 9 | if ss -nlptu|grep ":9100" 10 | then 11 | echo -e "\n\n9100端口存在,请检查是否已经部署node-exporter!\n\n" 12 | exit 1 13 | else 14 | Install_PATH=/opt/monit 15 | # Download_PATH='https://dingdingpushpic.oss-cn-shenzhen.aliyuncs.com/cassops/install/node_exporter-1.7.0.linux-amd64.tar.gz' 16 | Download_PATH='https://github.com/prometheus/node_exporter/releases/download/v1.8.2/node_exporter-1.8.2.linux-amd64.tar.gz' 17 | Filename=`echo ${Download_PATH}|awk -F / '{print $NF}'|sed 's/.tar.gz//g'` 18 | rm -rf ${Install_PATH}/node_exporter/ 19 | mkdir -p ${Install_PATH}/textfile 20 | cd ${Install_PATH} 21 | wget ${Download_PATH} 22 | tar -zxvf ${Filename}.tar.gz 23 | mv ${Filename} node_exporter 24 | chmod +x ${Install_PATH}/node_exporter/node_exporter 25 | rm -rvf ${Filename}.tar.gz 26 | cat >/etc/systemd/system/node_exporter.service <<-EOF 27 | [Unit] 28 | Description=Prometheus Node Exporter 29 | After=network.target 30 | [Service] 31 | Type=simple 32 | ExecStart=${Install_PATH}/node_exporter/node_exporter \\ 33 | --no-collector.arp \\ 34 | --no-collector.nfs \\ 35 | --no-collector.wifi \\ 36 | --no-collector.ipvs \\ 37 | --no-collector.mdadm \\ 38 | --no-collector.zfs \\ 39 | --no-collector.infiniband \\ 40 | --log.level=error \\ 41 | --web.listen-address=0.0.0.0:9100 \\ 42 | --collector.textfile.directory=${Install_PATH}/textfile 43 | SyslogIdentifier=node_exporter 44 | Restart=always 45 | [Install] 46 | WantedBy=multi-user.target 47 | EOF 48 | systemctl daemon-reload 49 | systemctl restart node_exporter 50 | systemctl enable node_exporter 51 | systemctl status node_exporter 52 | echo -e "\n\n" 53 | ps -ef|grep node_exporter|grep -v grep 54 | echo -e "\n" 55 | netstat -naptu|grep ":9100" 56 | fi 57 | -------------------------------------------------------------------------------- /linux_proc_monit/node-exporter/prom_rule.yaml: -------------------------------------------------------------------------------- 1 | - name: Linux-Proc 2 | rules: 3 | - alert: 业务进程CPU过高 4 | expr: (linux_proc_cpu_percent > 180) * on (A00_iid,pid) group_left(A01_iaccount,A02_iname,A03_igroup,A04_izone,A05_ienv,ifile,icwd) total:linux_proc_info 5 | for: 2m 6 | labels: 7 | alertype: proc 8 | severity: critical 9 | annotations: 10 | description: "{{ $labels.A02_iname }}({{ $labels.A03_igroup }}-{{ $labels.A05_ienv }})-{{ $labels.pid }}进程{{ $labels.ifile }}({{ $labels.icwd }}):CPU使用率为{{ $value | humanize }}\n> {{ $labels.A01_iaccount}}-{{ $labels.A04_izone }}_{{ $labels.A00_iid }} [【详细】](http://prometheus.casstime.com/d/yoPfIkqWk/a-li-yun-jin-cheng-jian-kong?orgId=1&var-A01_iaccount=All&var-A05_ienv=All&var-A03_igroup=All&var-A02_iname=All&var-A00_iid={{ $labels.A00_iid }}&var-pid={{ $labels.pid }})" 11 | 12 | - alert: 业务进程内存过高 13 | expr: (linux_proc_memory_percent > 85) * on (A00_iid,pid) group_left(A01_iaccount,A02_iname,A03_igroup,A04_izone,A05_ienv,ifile,icwd) total:linux_proc_info 14 | for: 2m 15 | labels: 16 | alertype: proc 17 | severity: critical 18 | annotations: 19 | description: "{{ $labels.A02_iname }}({{ $labels.A03_igroup }}-{{ $labels.A05_ienv }})-{{ $labels.pid }}进程{{ $labels.ifile }}({{ $labels.icwd }}):内存使用率为{{ $value | humanize }}%\n> {{ $labels.A01_iaccount}}-{{ $labels.A04_izone }}_{{ $labels.A00_iid }} [【详细】](http://prometheus.casstime.com/d/yoPfIkqWk/a-li-yun-jin-cheng-jian-kong?orgId=1&var-A01_iaccount=All&var-A05_ienv=All&var-A03_igroup=All&var-A02_iname=All&var-A00_iid={{ $labels.A00_iid }}&var-pid={{ $labels.pid }})" 20 | 21 | - alert: 业务进程IOPS过高 22 | expr: (irate(linux_proc_io_read_count[5m]) > 8000)* on (A00_iid,pid) group_left(A01_iaccount,A02_iname,A03_igroup,A04_izone,A05_ienv,ifile,icwd) total:linux_proc_info or (irate(linux_proc_io_write_count[5m]) > 8000)* on (A00_iid,pid) group_left(A01_iaccount,A02_iname,A03_igroup,A04_izone,A05_ienv,ifile,icwd) total:linux_proc_info 23 | for: 5m 24 | labels: 25 | alertype: proc 26 | severity: critical 27 | annotations: 28 | description: "{{ $labels.A02_iname }}({{ $labels.A03_igroup }}-{{ $labels.A05_ienv }})-{{ $labels.pid }}进程{{ $labels.ifile }}({{ $labels.icwd }}):IOPS为{{ $value | humanize }}\n> {{ $labels.A01_iaccount}}-{{ $labels.A04_izone }}_{{ $labels.A00_iid }} [【详细】](http://prometheus.casstime.com/d/yoPfIkqWk/a-li-yun-jin-cheng-jian-kong?orgId=1&var-A01_iaccount=All&var-A05_ienv=All&var-A03_igroup=All&var-A02_iname=All&var-A00_iid={{ $labels.A00_iid }}&var-pid={{ $labels.pid }})" 29 | 30 | - alert: 业务进程打开文件描述符过高 31 | expr: (linux_proc_num_fds/ linux_proc_num_fds_limit >0.7) * on (A00_iid,pid) group_left(A01_iaccount,A02_iname,A03_igroup,A04_izone,A05_ienv,ifile,icwd) total:linux_proc_info 32 | for: 2m 33 | labels: 34 | alertype: proc 35 | severity: critical 36 | annotations: 37 | description: "{{ $labels.A02_iname }}({{ $labels.A03_igroup }}-{{ $labels.A05_ienv }})-{{ $labels.pid }}进程{{ $labels.ifile }}({{ $labels.icwd }}):打开文件描述符超过70%\n> {{ $labels.A01_iaccount}}-{{ $labels.A04_izone }}_{{ $labels.A00_iid }} [【详细】](http://prometheus.casstime.com/d/yoPfIkqWk/a-li-yun-jin-cheng-jian-kong?orgId=1&var-A01_iaccount=All&var-A05_ienv=All&var-A03_igroup=All&var-A02_iname=All&var-A00_iid={{ $labels.A00_iid }}&var-pid={{ $labels.pid }})" 38 | 39 | - alert: 业务进程停止 40 | expr: linux_proc_error * on (A00_iid) group_left(A01_iaccount,A02_iname,A03_igroup,A04_izone,A05_ienv) ali_ecs_info 41 | for: 2m 42 | labels: 43 | alertype: proc 44 | severity: critical 45 | annotations: 46 | description: "{{ $labels.A02_iname }}({{ $labels.A03_igroup }}-{{ $labels.A05_ienv }})异常进程{{ $labels.ifile }}({{ $labels.icwd }}):停止\n> {{ $labels.A01_iaccount}}-{{ $labels.A04_izone }}_{{ $labels.A00_iid }} [【详细】](http://prometheus.casstime.com/d/yoPfIkqWk/a-li-yun-jin-cheng-jian-kong?orgId=1&var-A01_iaccount=All&var-A05_ienv=All&var-A03_igroup=All&var-A02_iname=All&var-A00_iid={{ $labels.A00_iid }}&var-pid=All)" 47 | 48 | - alert: 业务进程重启 49 | expr: (linux_proc_durn < 3600) * on (A00_iid,pid) group_left(A01_iaccount,A02_iname,A03_igroup,A04_izone,A05_ienv,ifile,icwd) total:linux_proc_info 50 | for: 2m 51 | labels: 52 | alertype: proc 53 | severity: critical 54 | annotations: 55 | description: "{{ $labels.A02_iname }}({{ $labels.A03_igroup }}-{{ $labels.A05_ienv }})-{{ $labels.pid }}进程{{ $labels.ifile }}({{ $labels.icwd }}):启动不足1小时\n> {{ $labels.A01_iaccount}}-{{ $labels.A04_izone }}_{{ $labels.A00_iid }} [【详细】](http://prometheus.casstime.com/d/yoPfIkqWk/a-li-yun-jin-cheng-jian-kong?orgId=1&var-A01_iaccount=All&var-A05_ienv=All&var-A03_igroup=All&var-A02_iname=All&var-A00_iid={{ $labels.A00_iid }}&var-pid={{ $labels.pid }})" 56 | 57 | -------------------------------------------------------------------------------- /linux_proc_monit/node-exporter/京东云-进程监控-1727601235648.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [ 3 | { 4 | "name": "DS__VICTORIAMETRICS-PROD-ALL", 5 | "label": " VictoriaMetrics-prod-all", 6 | "description": "", 7 | "type": "datasource", 8 | "pluginId": "prometheus", 9 | "pluginName": "Prometheus" 10 | } 11 | ], 12 | "__elements": {}, 13 | "__requires": [ 14 | { 15 | "type": "grafana", 16 | "id": "grafana", 17 | "name": "Grafana", 18 | "version": "11.0.0" 19 | }, 20 | { 21 | "type": "panel", 22 | "id": "graph", 23 | "name": "Graph (old)", 24 | "version": "" 25 | }, 26 | { 27 | "type": "datasource", 28 | "id": "prometheus", 29 | "name": "Prometheus", 30 | "version": "1.0.0" 31 | }, 32 | { 33 | "type": "panel", 34 | "id": "table-old", 35 | "name": "Table (old)", 36 | "version": "" 37 | } 38 | ], 39 | "annotations": { 40 | "list": [ 41 | { 42 | "$$hashKey": "object:15", 43 | "builtIn": 1, 44 | "datasource": { 45 | "type": "datasource", 46 | "uid": "grafana" 47 | }, 48 | "enable": true, 49 | "hide": true, 50 | "iconColor": "#e0752d", 51 | "limit": 100, 52 | "name": "Annotations & Alerts", 53 | "showIn": 0, 54 | "tags": [], 55 | "type": "dashboard" 56 | } 57 | ] 58 | }, 59 | "description": "", 60 | "editable": true, 61 | "fiscalYearStartMonth": 0, 62 | "gnetId": 7362, 63 | "graphTooltip": 0, 64 | "id": null, 65 | "links": [ 66 | { 67 | "$$hashKey": "object:1359", 68 | "asDropdown": true, 69 | "icon": "external link", 70 | "tags": [ 71 | "阿里云" 72 | ], 73 | "targetBlank": true, 74 | "type": "dashboards" 75 | } 76 | ], 77 | "panels": [ 78 | { 79 | "collapsed": false, 80 | "datasource": { 81 | "type": "prometheus", 82 | "uid": "WAYOn0FGz" 83 | }, 84 | "gridPos": { 85 | "h": 1, 86 | "w": 24, 87 | "x": 0, 88 | "y": 0 89 | }, 90 | "id": 382, 91 | "panels": [], 92 | "targets": [ 93 | { 94 | "datasource": { 95 | "type": "prometheus", 96 | "uid": "WAYOn0FGz" 97 | }, 98 | "refId": "A" 99 | } 100 | ], 101 | "title": "总览", 102 | "type": "row" 103 | }, 104 | { 105 | "columns": [], 106 | "datasource": { 107 | "type": "prometheus", 108 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 109 | }, 110 | "fontSize": "80%", 111 | "gridPos": { 112 | "h": 6, 113 | "w": 24, 114 | "x": 0, 115 | "y": 1 116 | }, 117 | "id": 408, 118 | "pageSize": 20, 119 | "showHeader": true, 120 | "sort": { 121 | "col": 9, 122 | "desc": true 123 | }, 124 | "styles": [ 125 | { 126 | "$$hashKey": "object:3621", 127 | "alias": "主机(连接到明细)", 128 | "align": "auto", 129 | "colors": [ 130 | "rgba(245, 54, 54, 0.9)", 131 | "rgba(237, 129, 40, 0.89)", 132 | "rgba(50, 172, 45, 0.97)" 133 | ], 134 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 135 | "decimals": 2, 136 | "link": true, 137 | "linkTargetBlank": true, 138 | "linkTooltip": "${__cell_2},实例:${__cell_1}", 139 | "linkUrl": "/d/1w7YjB_Zz/a-li-yun-ecsxi-tong-zi-yuan-jian-kong-ming-xi?orgId=1&var-A01_iaccount=All&var-A02_iname=All&var-A00_iid=${__cell_1}", 140 | "mappingType": 1, 141 | "pattern": "A02_iname", 142 | "thresholds": [], 143 | "type": "number", 144 | "unit": "short" 145 | }, 146 | { 147 | "$$hashKey": "object:3632", 148 | "alias": "所属组", 149 | "align": "auto", 150 | "colors": [ 151 | "rgba(245, 54, 54, 0.9)", 152 | "rgba(237, 129, 40, 0.89)", 153 | "rgba(50, 172, 45, 0.97)" 154 | ], 155 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 156 | "decimals": 2, 157 | "mappingType": 1, 158 | "pattern": "A03_igroup", 159 | "thresholds": [], 160 | "type": "number", 161 | "unit": "short" 162 | }, 163 | { 164 | "$$hashKey": "object:3643", 165 | "alias": "环境", 166 | "align": "auto", 167 | "colors": [ 168 | "rgba(245, 54, 54, 0.9)", 169 | "rgba(237, 129, 40, 0.89)", 170 | "rgba(50, 172, 45, 0.97)" 171 | ], 172 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 173 | "decimals": 2, 174 | "mappingType": 1, 175 | "pattern": "A05_ienv", 176 | "thresholds": [], 177 | "type": "number", 178 | "unit": "short" 179 | }, 180 | { 181 | "$$hashKey": "object:3665", 182 | "alias": "i路径", 183 | "align": "auto", 184 | "colors": [ 185 | "rgba(245, 54, 54, 0.9)", 186 | "rgba(237, 129, 40, 0.89)", 187 | "rgba(50, 172, 45, 0.97)" 188 | ], 189 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 190 | "decimals": 2, 191 | "mappingType": 1, 192 | "pattern": "icwd", 193 | "preserveFormat": false, 194 | "sanitize": false, 195 | "thresholds": [], 196 | "type": "string", 197 | "unit": "dateTimeFromNow" 198 | }, 199 | { 200 | "$$hashKey": "object:3676", 201 | "alias": "端口", 202 | "align": "auto", 203 | "colors": [ 204 | "rgba(245, 54, 54, 0.9)", 205 | "rgba(237, 129, 40, 0.89)", 206 | "rgba(50, 172, 45, 0.97)" 207 | ], 208 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 209 | "decimals": 2, 210 | "mappingType": 1, 211 | "pattern": "port", 212 | "thresholds": [], 213 | "type": "string", 214 | "unit": "short" 215 | }, 216 | { 217 | "$$hashKey": "object:3687", 218 | "alias": "主机", 219 | "align": "auto", 220 | "colors": [ 221 | "rgba(245, 54, 54, 0.9)", 222 | "rgba(237, 129, 40, 0.89)", 223 | "rgba(50, 172, 45, 0.97)" 224 | ], 225 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 226 | "decimals": 2, 227 | "mappingType": 1, 228 | "pattern": "name", 229 | "thresholds": [], 230 | "type": "number", 231 | "unit": "short" 232 | }, 233 | { 234 | "$$hashKey": "object:3698", 235 | "alias": "i程序", 236 | "align": "auto", 237 | "colors": [ 238 | "rgba(245, 54, 54, 0.9)", 239 | "rgba(237, 129, 40, 0.89)", 240 | "rgba(50, 172, 45, 0.97)" 241 | ], 242 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 243 | "decimals": 2, 244 | "mappingType": 1, 245 | "pattern": "ifile", 246 | "thresholds": [], 247 | "type": "number", 248 | "unit": "short" 249 | }, 250 | { 251 | "$$hashKey": "object:3976", 252 | "alias": "PID/PPID", 253 | "align": "auto", 254 | "colors": [ 255 | "rgba(245, 54, 54, 0.9)", 256 | "rgba(237, 129, 40, 0.89)", 257 | "rgba(50, 172, 45, 0.97)" 258 | ], 259 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 260 | "decimals": 2, 261 | "link": true, 262 | "linkTooltip": "${__cell_17}", 263 | "mappingType": 1, 264 | "pattern": "pid", 265 | "thresholds": [], 266 | "type": "string", 267 | "unit": "none" 268 | }, 269 | { 270 | "$$hashKey": "object:1065", 271 | "alias": "持续时间", 272 | "align": "auto", 273 | "colorMode": "cell", 274 | "colors": [ 275 | "rgba(245, 54, 54, 0.9)", 276 | "rgba(237, 129, 40, 0.89)", 277 | "rgba(50, 172, 45, 0.97)" 278 | ], 279 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 280 | "decimals": 1, 281 | "mappingType": 1, 282 | "pattern": "Value", 283 | "thresholds": [ 284 | "3600", 285 | "36000" 286 | ], 287 | "type": "number", 288 | "unit": "s" 289 | }, 290 | { 291 | "$$hashKey": "object:1224", 292 | "alias": "IP", 293 | "align": "auto", 294 | "colors": [ 295 | "rgba(245, 54, 54, 0.9)", 296 | "rgba(237, 129, 40, 0.89)", 297 | "rgba(50, 172, 45, 0.97)" 298 | ], 299 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 300 | "decimals": 2, 301 | "mappingType": 1, 302 | "pattern": "iintip", 303 | "thresholds": [], 304 | "type": "string", 305 | "unit": "short" 306 | }, 307 | { 308 | "$$hashKey": "object:327", 309 | "alias": "进程名", 310 | "align": "auto", 311 | "colors": [ 312 | "rgba(245, 54, 54, 0.9)", 313 | "rgba(237, 129, 40, 0.89)", 314 | "rgba(50, 172, 45, 0.97)" 315 | ], 316 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 317 | "decimals": 2, 318 | "mappingType": 1, 319 | "pattern": "exported_name", 320 | "thresholds": [], 321 | "type": "number", 322 | "unit": "short" 323 | }, 324 | { 325 | "$$hashKey": "object:3185", 326 | "alias": "", 327 | "align": "right", 328 | "colors": [ 329 | "rgba(245, 54, 54, 0.9)", 330 | "rgba(237, 129, 40, 0.89)", 331 | "rgba(50, 172, 45, 0.97)" 332 | ], 333 | "decimals": 2, 334 | "pattern": "/.*/", 335 | "thresholds": [], 336 | "type": "hidden", 337 | "unit": "short" 338 | } 339 | ], 340 | "targets": [ 341 | { 342 | "datasource": { 343 | "type": "prometheus", 344 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 345 | }, 346 | "editorMode": "code", 347 | "expr": "linux_proc_info{A00_iid=~\"$A00_iid\",pid=~\"$pid\"} * on (A00_iid,pid) group_left linux_proc_durn", 348 | "format": "table", 349 | "hide": false, 350 | "instant": true, 351 | "interval": "", 352 | "legendFormat": "", 353 | "refId": "A" 354 | } 355 | ], 356 | "title": "进程监控基础信息", 357 | "transform": "table", 358 | "type": "table-old" 359 | }, 360 | { 361 | "collapsed": false, 362 | "datasource": { 363 | "type": "prometheus", 364 | "uid": "WAYOn0FGz" 365 | }, 366 | "gridPos": { 367 | "h": 1, 368 | "w": 24, 369 | "x": 0, 370 | "y": 7 371 | }, 372 | "id": 410, 373 | "panels": [], 374 | "targets": [ 375 | { 376 | "datasource": { 377 | "type": "prometheus", 378 | "uid": "WAYOn0FGz" 379 | }, 380 | "refId": "A" 381 | } 382 | ], 383 | "title": "进程信息明细", 384 | "type": "row" 385 | }, 386 | { 387 | "aliasColors": {}, 388 | "bars": false, 389 | "dashLength": 10, 390 | "dashes": false, 391 | "datasource": { 392 | "type": "prometheus", 393 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 394 | }, 395 | "description": "", 396 | "editable": true, 397 | "error": false, 398 | "fieldConfig": { 399 | "defaults": { 400 | "links": [] 401 | }, 402 | "overrides": [] 403 | }, 404 | "fill": 0, 405 | "fillGradient": 0, 406 | "grid": {}, 407 | "gridPos": { 408 | "h": 8, 409 | "w": 15, 410 | "x": 0, 411 | "y": 8 412 | }, 413 | "hiddenSeries": false, 414 | "id": 406, 415 | "legend": { 416 | "alignAsTable": true, 417 | "avg": true, 418 | "current": true, 419 | "hideEmpty": true, 420 | "hideZero": false, 421 | "max": true, 422 | "min": false, 423 | "rightSide": true, 424 | "show": true, 425 | "sort": "current", 426 | "sortDesc": true, 427 | "total": false, 428 | "values": true 429 | }, 430 | "lines": true, 431 | "linewidth": 2, 432 | "nullPointMode": "null", 433 | "options": { 434 | "alertThreshold": true 435 | }, 436 | "percentage": false, 437 | "pluginVersion": "11.0.0", 438 | "pointradius": 5, 439 | "points": false, 440 | "renderer": "flot", 441 | "seriesOverrides": [], 442 | "spaceLength": 10, 443 | "stack": false, 444 | "steppedLine": false, 445 | "targets": [ 446 | { 447 | "datasource": { 448 | "type": "prometheus", 449 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 450 | }, 451 | "expr": "linux_proc_memory_rss{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 452 | "format": "time_series", 453 | "interval": "", 454 | "intervalFactor": 1, 455 | "legendFormat": "{{pid}}:常驻内存", 456 | "refId": "A", 457 | "step": 4 458 | }, 459 | { 460 | "datasource": { 461 | "type": "prometheus", 462 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 463 | }, 464 | "expr": "linux_proc_memory_vms{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 465 | "format": "time_series", 466 | "interval": "", 467 | "intervalFactor": 1, 468 | "legendFormat": "{{pid}}:虚拟内存", 469 | "refId": "B", 470 | "step": 4 471 | }, 472 | { 473 | "datasource": { 474 | "type": "prometheus", 475 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 476 | }, 477 | "expr": "linux_proc_memory_shared{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 478 | "format": "time_series", 479 | "interval": "", 480 | "intervalFactor": 1, 481 | "legendFormat": "{{pid}}:共享内存", 482 | "refId": "C", 483 | "step": 4 484 | }, 485 | { 486 | "datasource": { 487 | "type": "prometheus", 488 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 489 | }, 490 | "expr": "linux_proc_memory_swap{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 491 | "format": "time_series", 492 | "interval": "", 493 | "intervalFactor": 1, 494 | "legendFormat": "{{pid}}:交换内存", 495 | "refId": "D", 496 | "step": 4 497 | }, 498 | { 499 | "datasource": { 500 | "type": "prometheus", 501 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 502 | }, 503 | "expr": "linux_proc_memory_data{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 504 | "format": "time_series", 505 | "interval": "", 506 | "intervalFactor": 1, 507 | "legendFormat": "{{pid}}:除代码内存", 508 | "refId": "E", 509 | "step": 4 510 | }, 511 | { 512 | "datasource": { 513 | "type": "prometheus", 514 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 515 | }, 516 | "expr": "linux_proc_memory_text{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 517 | "format": "time_series", 518 | "interval": "", 519 | "intervalFactor": 1, 520 | "legendFormat": "{{pid}}:代码内存", 521 | "refId": "F", 522 | "step": 4 523 | }, 524 | { 525 | "datasource": { 526 | "type": "prometheus", 527 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 528 | }, 529 | "expr": "linux_proc_memory_percent{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 530 | "format": "time_series", 531 | "hide": true, 532 | "interval": "", 533 | "intervalFactor": 1, 534 | "legendFormat": "{{pid}}:内存占用比例", 535 | "refId": "G", 536 | "step": 4 537 | } 538 | ], 539 | "thresholds": [], 540 | "timeRegions": [], 541 | "title": "进程分类内存占用量", 542 | "tooltip": { 543 | "msResolution": false, 544 | "shared": true, 545 | "sort": 2, 546 | "value_type": "individual" 547 | }, 548 | "type": "graph", 549 | "xaxis": { 550 | "mode": "time", 551 | "show": true, 552 | "values": [] 553 | }, 554 | "yaxes": [ 555 | { 556 | "$$hashKey": "object:542", 557 | "format": "bytes", 558 | "label": "", 559 | "logBase": 1, 560 | "show": true 561 | }, 562 | { 563 | "$$hashKey": "object:543", 564 | "format": "percent", 565 | "label": "内存占用比例", 566 | "logBase": 1, 567 | "max": "100", 568 | "min": "0", 569 | "show": false 570 | } 571 | ], 572 | "yaxis": { 573 | "align": false 574 | } 575 | }, 576 | { 577 | "aliasColors": {}, 578 | "bars": false, 579 | "dashLength": 10, 580 | "dashes": false, 581 | "datasource": { 582 | "type": "prometheus", 583 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 584 | }, 585 | "description": "", 586 | "editable": true, 587 | "error": false, 588 | "fieldConfig": { 589 | "defaults": { 590 | "links": [] 591 | }, 592 | "overrides": [] 593 | }, 594 | "fill": 0, 595 | "fillGradient": 0, 596 | "grid": {}, 597 | "gridPos": { 598 | "h": 8, 599 | "w": 9, 600 | "x": 15, 601 | "y": 8 602 | }, 603 | "hiddenSeries": false, 604 | "id": 414, 605 | "legend": { 606 | "alignAsTable": true, 607 | "avg": true, 608 | "current": true, 609 | "hideEmpty": true, 610 | "hideZero": false, 611 | "max": true, 612 | "min": false, 613 | "rightSide": false, 614 | "show": true, 615 | "sort": "current", 616 | "sortDesc": true, 617 | "total": false, 618 | "values": true 619 | }, 620 | "lines": true, 621 | "linewidth": 2, 622 | "nullPointMode": "null", 623 | "options": { 624 | "alertThreshold": true 625 | }, 626 | "percentage": false, 627 | "pluginVersion": "11.0.0", 628 | "pointradius": 5, 629 | "points": false, 630 | "renderer": "flot", 631 | "seriesOverrides": [], 632 | "spaceLength": 10, 633 | "stack": false, 634 | "steppedLine": false, 635 | "targets": [ 636 | { 637 | "datasource": { 638 | "type": "prometheus", 639 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 640 | }, 641 | "expr": "linux_proc_memory_percent{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 642 | "format": "time_series", 643 | "interval": "", 644 | "intervalFactor": 1, 645 | "legendFormat": "{{pid}}:内存占比", 646 | "refId": "G", 647 | "step": 4 648 | } 649 | ], 650 | "thresholds": [], 651 | "timeRegions": [], 652 | "title": "进程实际内存占用比例", 653 | "tooltip": { 654 | "msResolution": false, 655 | "shared": true, 656 | "sort": 2, 657 | "value_type": "individual" 658 | }, 659 | "type": "graph", 660 | "xaxis": { 661 | "mode": "time", 662 | "show": true, 663 | "values": [] 664 | }, 665 | "yaxes": [ 666 | { 667 | "$$hashKey": "object:542", 668 | "format": "percent", 669 | "label": "", 670 | "logBase": 1, 671 | "show": true 672 | }, 673 | { 674 | "$$hashKey": "object:543", 675 | "format": "percent", 676 | "label": "内存占用比例", 677 | "logBase": 1, 678 | "max": "100", 679 | "min": "0", 680 | "show": false 681 | } 682 | ], 683 | "yaxis": { 684 | "align": false 685 | } 686 | }, 687 | { 688 | "aliasColors": {}, 689 | "bars": false, 690 | "dashLength": 10, 691 | "dashes": false, 692 | "datasource": { 693 | "type": "prometheus", 694 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 695 | }, 696 | "description": "", 697 | "editable": true, 698 | "error": false, 699 | "fieldConfig": { 700 | "defaults": { 701 | "links": [] 702 | }, 703 | "overrides": [] 704 | }, 705 | "fill": 0, 706 | "fillGradient": 0, 707 | "grid": {}, 708 | "gridPos": { 709 | "h": 8, 710 | "w": 15, 711 | "x": 0, 712 | "y": 16 713 | }, 714 | "hiddenSeries": false, 715 | "id": 411, 716 | "legend": { 717 | "alignAsTable": true, 718 | "avg": true, 719 | "current": true, 720 | "hideEmpty": true, 721 | "hideZero": true, 722 | "max": true, 723 | "min": false, 724 | "rightSide": true, 725 | "show": true, 726 | "sort": "current", 727 | "sortDesc": true, 728 | "total": false, 729 | "values": true 730 | }, 731 | "lines": true, 732 | "linewidth": 1, 733 | "nullPointMode": "null", 734 | "options": { 735 | "alertThreshold": true 736 | }, 737 | "percentage": false, 738 | "pluginVersion": "11.0.0", 739 | "pointradius": 5, 740 | "points": false, 741 | "renderer": "flot", 742 | "seriesOverrides": [], 743 | "spaceLength": 10, 744 | "stack": false, 745 | "steppedLine": false, 746 | "targets": [ 747 | { 748 | "datasource": { 749 | "type": "prometheus", 750 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 751 | }, 752 | "expr": "irate(linux_proc_cpu_user{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 753 | "format": "time_series", 754 | "interval": "", 755 | "intervalFactor": 1, 756 | "legendFormat": "{{pid}}:用户cpu占比", 757 | "refId": "A", 758 | "step": 4 759 | }, 760 | { 761 | "datasource": { 762 | "type": "prometheus", 763 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 764 | }, 765 | "expr": "irate(linux_proc_cpu_system{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 766 | "format": "time_series", 767 | "interval": "", 768 | "intervalFactor": 1, 769 | "legendFormat": "{{pid}}:系统cpu占比", 770 | "refId": "B", 771 | "step": 4 772 | }, 773 | { 774 | "datasource": { 775 | "type": "prometheus", 776 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 777 | }, 778 | "expr": "irate(linux_proc_cpu_iowait{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 779 | "format": "time_series", 780 | "hide": false, 781 | "interval": "", 782 | "intervalFactor": 1, 783 | "legendFormat": "{{pid}}:iowait_cpu占比", 784 | "refId": "C", 785 | "step": 4 786 | }, 787 | { 788 | "datasource": { 789 | "type": "prometheus", 790 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 791 | }, 792 | "expr": "irate(linux_proc_cpu_children_user{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 793 | "format": "time_series", 794 | "hide": false, 795 | "interval": "", 796 | "intervalFactor": 1, 797 | "legendFormat": "{{pid}}:子进程用户_cpu占比", 798 | "refId": "D", 799 | "step": 4 800 | }, 801 | { 802 | "datasource": { 803 | "type": "prometheus", 804 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 805 | }, 806 | "expr": "irate(linux_proc_cpu_children_system{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 807 | "format": "time_series", 808 | "hide": false, 809 | "interval": "", 810 | "intervalFactor": 1, 811 | "legendFormat": "{{pid}}:子进程系统_cpu占比", 812 | "refId": "E", 813 | "step": 4 814 | }, 815 | { 816 | "datasource": { 817 | "type": "prometheus", 818 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 819 | }, 820 | "expr": "linux_proc_cpu_percent{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}/100", 821 | "format": "time_series", 822 | "hide": false, 823 | "interval": "", 824 | "intervalFactor": 1, 825 | "legendFormat": "{{pid}}:总使用CPU占比", 826 | "refId": "F", 827 | "step": 4 828 | } 829 | ], 830 | "thresholds": [], 831 | "timeRegions": [], 832 | "title": "进程CPU占用 (单核为100%计算,超过100%为使用多核)", 833 | "tooltip": { 834 | "msResolution": false, 835 | "shared": true, 836 | "sort": 2, 837 | "value_type": "individual" 838 | }, 839 | "type": "graph", 840 | "xaxis": { 841 | "mode": "time", 842 | "show": true, 843 | "values": [] 844 | }, 845 | "yaxes": [ 846 | { 847 | "$$hashKey": "object:542", 848 | "format": "percentunit", 849 | "label": "", 850 | "logBase": 1, 851 | "show": true 852 | }, 853 | { 854 | "$$hashKey": "object:543", 855 | "format": "percent", 856 | "label": "总使用CPU占比", 857 | "logBase": 1, 858 | "show": false 859 | } 860 | ], 861 | "yaxis": { 862 | "align": false 863 | } 864 | }, 865 | { 866 | "aliasColors": {}, 867 | "bars": false, 868 | "dashLength": 10, 869 | "dashes": false, 870 | "datasource": { 871 | "type": "prometheus", 872 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 873 | }, 874 | "fieldConfig": { 875 | "defaults": { 876 | "links": [] 877 | }, 878 | "overrides": [] 879 | }, 880 | "fill": 1, 881 | "fillGradient": 0, 882 | "gridPos": { 883 | "h": 8, 884 | "w": 9, 885 | "x": 15, 886 | "y": 16 887 | }, 888 | "hiddenSeries": false, 889 | "id": 417, 890 | "legend": { 891 | "avg": false, 892 | "current": false, 893 | "max": false, 894 | "min": false, 895 | "show": false, 896 | "total": false, 897 | "values": false 898 | }, 899 | "lines": true, 900 | "linewidth": 1, 901 | "nullPointMode": "connected", 902 | "options": { 903 | "alertThreshold": true 904 | }, 905 | "percentage": false, 906 | "pluginVersion": "11.0.0", 907 | "pointradius": 5, 908 | "points": false, 909 | "renderer": "flot", 910 | "seriesOverrides": [], 911 | "spaceLength": 10, 912 | "stack": false, 913 | "steppedLine": false, 914 | "targets": [ 915 | { 916 | "datasource": { 917 | "type": "prometheus", 918 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 919 | }, 920 | "editorMode": "code", 921 | "expr": "(1 - avg(irate(node_cpu_seconds_total{instance=~\"$instance\",mode=\"idle\"}[3m])) by (instance))*100", 922 | "format": "time_series", 923 | "interval": "", 924 | "intervalFactor": 1, 925 | "legendFormat": "cpu_total", 926 | "range": true, 927 | "refId": "A" 928 | } 929 | ], 930 | "thresholds": [], 931 | "timeRegions": [], 932 | "title": "ECS CPU总使用率(所有核平均)", 933 | "tooltip": { 934 | "shared": true, 935 | "sort": 0, 936 | "value_type": "individual" 937 | }, 938 | "type": "graph", 939 | "xaxis": { 940 | "mode": "time", 941 | "show": true, 942 | "values": [] 943 | }, 944 | "yaxes": [ 945 | { 946 | "$$hashKey": "object:1462", 947 | "format": "percent", 948 | "logBase": 1, 949 | "show": true 950 | }, 951 | { 952 | "$$hashKey": "object:1463", 953 | "format": "short", 954 | "logBase": 1, 955 | "show": false 956 | } 957 | ], 958 | "yaxis": { 959 | "align": false 960 | } 961 | }, 962 | { 963 | "aliasColors": {}, 964 | "bars": false, 965 | "dashLength": 10, 966 | "dashes": false, 967 | "datasource": { 968 | "type": "prometheus", 969 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 970 | }, 971 | "description": "", 972 | "editable": true, 973 | "error": false, 974 | "fieldConfig": { 975 | "defaults": { 976 | "links": [] 977 | }, 978 | "overrides": [] 979 | }, 980 | "fill": 0, 981 | "fillGradient": 1, 982 | "grid": {}, 983 | "gridPos": { 984 | "h": 9, 985 | "w": 12, 986 | "x": 0, 987 | "y": 24 988 | }, 989 | "hiddenSeries": false, 990 | "id": 412, 991 | "legend": { 992 | "alignAsTable": true, 993 | "avg": true, 994 | "current": true, 995 | "hideEmpty": true, 996 | "hideZero": false, 997 | "max": true, 998 | "min": false, 999 | "rightSide": true, 1000 | "show": true, 1001 | "sort": "current", 1002 | "sortDesc": true, 1003 | "total": false, 1004 | "values": true 1005 | }, 1006 | "lines": true, 1007 | "linewidth": 2, 1008 | "nullPointMode": "null", 1009 | "options": { 1010 | "alertThreshold": true 1011 | }, 1012 | "percentage": false, 1013 | "pluginVersion": "11.0.0", 1014 | "pointradius": 5, 1015 | "points": false, 1016 | "renderer": "flot", 1017 | "seriesOverrides": [], 1018 | "spaceLength": 10, 1019 | "stack": false, 1020 | "steppedLine": false, 1021 | "targets": [ 1022 | { 1023 | "datasource": { 1024 | "type": "prometheus", 1025 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 1026 | }, 1027 | "expr": "irate(linux_proc_io_read_count{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 1028 | "format": "time_series", 1029 | "instant": false, 1030 | "interval": "", 1031 | "intervalFactor": 1, 1032 | "legendFormat": "{{pid}}:读IO", 1033 | "refId": "A", 1034 | "step": 4 1035 | }, 1036 | { 1037 | "datasource": { 1038 | "type": "prometheus", 1039 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 1040 | }, 1041 | "expr": "irate(linux_proc_io_write_count{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 1042 | "format": "time_series", 1043 | "interval": "", 1044 | "intervalFactor": 1, 1045 | "legendFormat": "{{pid}}:写IO", 1046 | "refId": "B", 1047 | "step": 4 1048 | } 1049 | ], 1050 | "thresholds": [], 1051 | "timeRegions": [], 1052 | "title": "进程IOPS", 1053 | "tooltip": { 1054 | "msResolution": false, 1055 | "shared": true, 1056 | "sort": 2, 1057 | "value_type": "individual" 1058 | }, 1059 | "type": "graph", 1060 | "xaxis": { 1061 | "mode": "time", 1062 | "show": true, 1063 | "values": [] 1064 | }, 1065 | "yaxes": [ 1066 | { 1067 | "$$hashKey": "object:542", 1068 | "format": "iops", 1069 | "label": "", 1070 | "logBase": 1, 1071 | "show": true 1072 | }, 1073 | { 1074 | "$$hashKey": "object:543", 1075 | "format": "Bps", 1076 | "label": "读写字节", 1077 | "logBase": 1, 1078 | "show": false 1079 | } 1080 | ], 1081 | "yaxis": { 1082 | "align": false 1083 | } 1084 | }, 1085 | { 1086 | "aliasColors": {}, 1087 | "bars": false, 1088 | "dashLength": 10, 1089 | "dashes": false, 1090 | "datasource": { 1091 | "type": "prometheus", 1092 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 1093 | }, 1094 | "description": "", 1095 | "editable": true, 1096 | "error": false, 1097 | "fieldConfig": { 1098 | "defaults": { 1099 | "links": [] 1100 | }, 1101 | "overrides": [] 1102 | }, 1103 | "fill": 0, 1104 | "fillGradient": 1, 1105 | "grid": {}, 1106 | "gridPos": { 1107 | "h": 9, 1108 | "w": 12, 1109 | "x": 12, 1110 | "y": 24 1111 | }, 1112 | "hiddenSeries": false, 1113 | "id": 415, 1114 | "legend": { 1115 | "alignAsTable": true, 1116 | "avg": true, 1117 | "current": true, 1118 | "hideEmpty": true, 1119 | "hideZero": false, 1120 | "max": true, 1121 | "min": false, 1122 | "rightSide": true, 1123 | "show": true, 1124 | "sort": "current", 1125 | "sortDesc": true, 1126 | "total": false, 1127 | "values": true 1128 | }, 1129 | "lines": true, 1130 | "linewidth": 1, 1131 | "nullPointMode": "null", 1132 | "options": { 1133 | "alertThreshold": true 1134 | }, 1135 | "percentage": false, 1136 | "pluginVersion": "11.0.0", 1137 | "pointradius": 5, 1138 | "points": false, 1139 | "renderer": "flot", 1140 | "seriesOverrides": [], 1141 | "spaceLength": 10, 1142 | "stack": false, 1143 | "steppedLine": false, 1144 | "targets": [ 1145 | { 1146 | "datasource": { 1147 | "type": "prometheus", 1148 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 1149 | }, 1150 | "expr": "irate(linux_proc_io_read_bytes{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 1151 | "format": "time_series", 1152 | "hide": false, 1153 | "interval": "", 1154 | "intervalFactor": 1, 1155 | "legendFormat": "{{pid}}:读字节", 1156 | "refId": "C", 1157 | "step": 4 1158 | }, 1159 | { 1160 | "datasource": { 1161 | "type": "prometheus", 1162 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 1163 | }, 1164 | "expr": "irate(linux_proc_io_write_bytes{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 1165 | "format": "time_series", 1166 | "hide": false, 1167 | "interval": "", 1168 | "intervalFactor": 1, 1169 | "legendFormat": "{{pid}}:写字节", 1170 | "refId": "D", 1171 | "step": 4 1172 | } 1173 | ], 1174 | "thresholds": [], 1175 | "timeRegions": [], 1176 | "title": "进程IO读写", 1177 | "tooltip": { 1178 | "msResolution": false, 1179 | "shared": true, 1180 | "sort": 2, 1181 | "value_type": "individual" 1182 | }, 1183 | "type": "graph", 1184 | "xaxis": { 1185 | "mode": "time", 1186 | "show": true, 1187 | "values": [] 1188 | }, 1189 | "yaxes": [ 1190 | { 1191 | "$$hashKey": "object:542", 1192 | "format": "Bps", 1193 | "label": "", 1194 | "logBase": 1, 1195 | "show": true 1196 | }, 1197 | { 1198 | "$$hashKey": "object:543", 1199 | "format": "Bps", 1200 | "label": "读写字节", 1201 | "logBase": 1, 1202 | "show": false 1203 | } 1204 | ], 1205 | "yaxis": { 1206 | "align": false 1207 | } 1208 | }, 1209 | { 1210 | "aliasColors": { 1211 | "2060:文件描述符上限": "dark-red" 1212 | }, 1213 | "bars": false, 1214 | "dashLength": 10, 1215 | "dashes": false, 1216 | "datasource": { 1217 | "type": "prometheus", 1218 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 1219 | }, 1220 | "description": "", 1221 | "editable": true, 1222 | "error": false, 1223 | "fieldConfig": { 1224 | "defaults": { 1225 | "links": [] 1226 | }, 1227 | "overrides": [] 1228 | }, 1229 | "fill": 0, 1230 | "fillGradient": 0, 1231 | "grid": {}, 1232 | "gridPos": { 1233 | "h": 9, 1234 | "w": 24, 1235 | "x": 0, 1236 | "y": 33 1237 | }, 1238 | "hiddenSeries": false, 1239 | "id": 413, 1240 | "legend": { 1241 | "alignAsTable": true, 1242 | "avg": true, 1243 | "current": true, 1244 | "hideEmpty": true, 1245 | "hideZero": true, 1246 | "max": true, 1247 | "min": false, 1248 | "rightSide": true, 1249 | "show": true, 1250 | "sort": "current", 1251 | "sortDesc": true, 1252 | "total": false, 1253 | "values": true 1254 | }, 1255 | "lines": true, 1256 | "linewidth": 2, 1257 | "nullPointMode": "null", 1258 | "options": { 1259 | "alertThreshold": true 1260 | }, 1261 | "percentage": false, 1262 | "pluginVersion": "11.0.0", 1263 | "pointradius": 5, 1264 | "points": false, 1265 | "renderer": "flot", 1266 | "seriesOverrides": [ 1267 | { 1268 | "$$hashKey": "object:1269", 1269 | "alias": "/.*文件描述符上限占比/", 1270 | "color": "#FADE2A", 1271 | "lines": false, 1272 | "points": true, 1273 | "yaxis": 2 1274 | } 1275 | ], 1276 | "spaceLength": 10, 1277 | "stack": false, 1278 | "steppedLine": false, 1279 | "targets": [ 1280 | { 1281 | "datasource": { 1282 | "type": "prometheus", 1283 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 1284 | }, 1285 | "expr": "linux_proc_num_open_files{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 1286 | "format": "time_series", 1287 | "instant": false, 1288 | "interval": "", 1289 | "intervalFactor": 1, 1290 | "legendFormat": "{{pid}}:打开文件数", 1291 | "refId": "A", 1292 | "step": 4 1293 | }, 1294 | { 1295 | "datasource": { 1296 | "type": "prometheus", 1297 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 1298 | }, 1299 | "expr": "linux_proc_num_fds{A00_iid=~\"$A00_iid\",pid=~\"$pid\"} / linux_proc_num_fds_limit{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 1300 | "format": "time_series", 1301 | "interval": "30m", 1302 | "intervalFactor": 1, 1303 | "legendFormat": "{{pid}}:文件描述符上限占比", 1304 | "refId": "B", 1305 | "step": 4 1306 | }, 1307 | { 1308 | "datasource": { 1309 | "type": "prometheus", 1310 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 1311 | }, 1312 | "expr": "linux_proc_num_fds{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 1313 | "format": "time_series", 1314 | "hide": false, 1315 | "interval": "", 1316 | "intervalFactor": 1, 1317 | "legendFormat": "{{pid}}:打开文件描述符", 1318 | "refId": "C", 1319 | "step": 4 1320 | }, 1321 | { 1322 | "datasource": { 1323 | "type": "prometheus", 1324 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 1325 | }, 1326 | "expr": "linux_proc_num_threads{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 1327 | "format": "time_series", 1328 | "hide": false, 1329 | "interval": "", 1330 | "intervalFactor": 1, 1331 | "legendFormat": "{{pid}}:线程数", 1332 | "refId": "D", 1333 | "step": 4 1334 | }, 1335 | { 1336 | "datasource": { 1337 | "type": "prometheus", 1338 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 1339 | }, 1340 | "expr": "linux_proc_num_children{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 1341 | "format": "time_series", 1342 | "hide": false, 1343 | "interval": "", 1344 | "intervalFactor": 1, 1345 | "legendFormat": "{{pid}}:子进程", 1346 | "refId": "E", 1347 | "step": 4 1348 | }, 1349 | { 1350 | "datasource": { 1351 | "type": "prometheus", 1352 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 1353 | }, 1354 | "expr": "linux_proc_num_fds_limit{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 1355 | "format": "time_series", 1356 | "interval": "", 1357 | "intervalFactor": 1, 1358 | "legendFormat": "{{pid}}:文件描述符上限", 1359 | "refId": "F", 1360 | "step": 4 1361 | } 1362 | ], 1363 | "thresholds": [], 1364 | "timeRegions": [], 1365 | "title": "文件描述符与线程", 1366 | "tooltip": { 1367 | "msResolution": false, 1368 | "shared": true, 1369 | "sort": 2, 1370 | "value_type": "individual" 1371 | }, 1372 | "type": "graph", 1373 | "xaxis": { 1374 | "mode": "time", 1375 | "show": true, 1376 | "values": [] 1377 | }, 1378 | "yaxes": [ 1379 | { 1380 | "$$hashKey": "object:542", 1381 | "format": "short", 1382 | "label": "", 1383 | "logBase": 1, 1384 | "show": true 1385 | }, 1386 | { 1387 | "$$hashKey": "object:543", 1388 | "format": "percentunit", 1389 | "label": "【黄点】文件描述符上限占比", 1390 | "logBase": 1, 1391 | "max": "1", 1392 | "min": "0", 1393 | "show": true 1394 | } 1395 | ], 1396 | "yaxis": { 1397 | "align": false 1398 | } 1399 | } 1400 | ], 1401 | "refresh": false, 1402 | "schemaVersion": 39, 1403 | "tags": [ 1404 | "阿里云", 1405 | "Non-ali-API" 1406 | ], 1407 | "templating": { 1408 | "list": [ 1409 | { 1410 | "current": {}, 1411 | "datasource": { 1412 | "type": "prometheus", 1413 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 1414 | }, 1415 | "definition": "label_values(linux_proc_info,A00_iid)", 1416 | "hide": 0, 1417 | "includeAll": true, 1418 | "label": "实例ID", 1419 | "multi": false, 1420 | "name": "A00_iid", 1421 | "options": [], 1422 | "query": { 1423 | "qryType": 1, 1424 | "query": "label_values(linux_proc_info,A00_iid)", 1425 | "refId": "PrometheusVariableQueryEditor-VariableQuery" 1426 | }, 1427 | "refresh": 1, 1428 | "regex": "", 1429 | "skipUrlSync": false, 1430 | "sort": 5, 1431 | "tagValuesQuery": "", 1432 | "tagsQuery": "", 1433 | "type": "query", 1434 | "useTags": false 1435 | }, 1436 | { 1437 | "current": {}, 1438 | "datasource": { 1439 | "type": "prometheus", 1440 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 1441 | }, 1442 | "definition": "label_values(linux_proc_info{A00_iid=~\"$A00_iid\"},pid)", 1443 | "hide": 0, 1444 | "includeAll": true, 1445 | "label": "PID", 1446 | "multi": false, 1447 | "name": "pid", 1448 | "options": [], 1449 | "query": { 1450 | "qryType": 1, 1451 | "query": "label_values(linux_proc_info{A00_iid=~\"$A00_iid\"},pid)", 1452 | "refId": "PrometheusVariableQueryEditor-VariableQuery" 1453 | }, 1454 | "refresh": 1, 1455 | "regex": "", 1456 | "skipUrlSync": false, 1457 | "sort": 3, 1458 | "tagValuesQuery": "", 1459 | "tagsQuery": "", 1460 | "type": "query", 1461 | "useTags": false 1462 | }, 1463 | { 1464 | "current": {}, 1465 | "datasource": { 1466 | "type": "prometheus", 1467 | "uid": "${DS__VICTORIAMETRICS-PROD-ALL}" 1468 | }, 1469 | "definition": "label_values(linux_proc_info{A00_iid=~\"$A00_iid\"},instance)", 1470 | "hide": 0, 1471 | "includeAll": true, 1472 | "label": "实例IP", 1473 | "multi": false, 1474 | "name": "instance", 1475 | "options": [], 1476 | "query": { 1477 | "qryType": 1, 1478 | "query": "label_values(linux_proc_info{A00_iid=~\"$A00_iid\"},instance)", 1479 | "refId": "PrometheusVariableQueryEditor-VariableQuery" 1480 | }, 1481 | "refresh": 1, 1482 | "regex": "", 1483 | "skipUrlSync": false, 1484 | "sort": 0, 1485 | "type": "query" 1486 | } 1487 | ] 1488 | }, 1489 | "time": { 1490 | "from": "now-1h", 1491 | "to": "now" 1492 | }, 1493 | "timeRangeUpdatedDuringEditOrView": false, 1494 | "timepicker": { 1495 | "collapse": false, 1496 | "enable": true, 1497 | "hidden": false, 1498 | "notice": false, 1499 | "now": true, 1500 | "refresh_intervals": [ 1501 | "1m", 1502 | "5m", 1503 | "15m", 1504 | "30m", 1505 | "1h", 1506 | "2h", 1507 | "1d" 1508 | ], 1509 | "status": "Stable", 1510 | "time_options": [ 1511 | "5m", 1512 | "15m", 1513 | "1h", 1514 | "6h", 1515 | "12h", 1516 | "24h", 1517 | "2d", 1518 | "7d", 1519 | "30d" 1520 | ], 1521 | "type": "timepicker" 1522 | }, 1523 | "timezone": "browser", 1524 | "title": "京东云-进程监控", 1525 | "uid": "yoPfIkqWk", 1526 | "version": 5, 1527 | "weekStart": "" 1528 | } -------------------------------------------------------------------------------- /linux_proc_monit/pushgateway/linux_proc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding: UTF-8 -*- 3 | """ 4 | yum install python3-devel 5 | pip3 install psutil prometheus_client pyyaml 6 | */1 * * * * /usr/bin/python3 /opt/monit/linux_proc.py 7 | """ 8 | import sys,os,socket,psutil,yaml,datetime,urllib 9 | from collections import Counter 10 | from prometheus_client import CollectorRegistry, Gauge, push_to_gateway 11 | 12 | cur_path = os.path.dirname(os.path.realpath(__file__)) 13 | yaml_path = os.path.join(cur_path, "linux_proc.yaml") 14 | 15 | if len(sys.argv) == 2: 16 | print(f'pid:{sys.argv[1]}') 17 | ps = psutil.Process(int(sys.argv[1])) 18 | iexe = ps.cmdline()[0] 19 | iparam = ps.cmdline()[-1] 20 | icwd = ps.cwd() 21 | psdict = {'iexe': iexe,'iparam': iparam, 'icwd': icwd} 22 | if not os.path.exists(yaml_path): 23 | try: 24 | res = urllib.request.urlopen('http://100.100.100.200/latest/meta-data/instance-id',timeout=1) 25 | iid = res.read().decode('utf-8') 26 | except: 27 | iid = f"{socket.gethostname()}_{[(s.connect(('114.114.114.114', 53)), s.getsockname()[0], s.close()) for s in [socket.socket(socket.AF_INET, socket.SOCK_DGRAM)]][0][1]}" 28 | cfg = {'instance': iid, 'apps': [psdict]} 29 | else: 30 | with open(yaml_path, 'r') as fy: 31 | cfg = yaml.load(fy, Loader=yaml.FullLoader) 32 | cfg['apps'].append(psdict) 33 | 34 | with open(yaml_path, 'w+') as fw: 35 | yaml.dump(cfg, fw) 36 | sys.exit() 37 | 38 | with open(yaml_path, 'r') as fy: 39 | cfg = yaml.load(fy, Loader=yaml.FullLoader) 40 | 41 | if datetime.datetime.now().timestamp() - os.path.getmtime(yaml_path) > 86400: 42 | try: 43 | res = urllib.request.urlopen('http://100.100.100.200/latest/meta-data/instance-id',timeout=1) 44 | iid = res.read().decode('utf-8') 45 | except: 46 | iid = f"{socket.gethostname()}_{[(s.connect(('114.114.114.114', 53)), s.getsockname()[0], s.close()) for s in [socket.socket(socket.AF_INET, socket.SOCK_DGRAM)]][0][1]}" 47 | cfg['instance'] = iid 48 | with open(yaml_path, 'w') as fw: 49 | yaml.dump(cfg, fw) 50 | print('update:' + yaml_path) 51 | print(cfg) 52 | 53 | REGISTRY = CollectorRegistry(auto_describe=False) 54 | linux_proc_error = Gauge(f'linux_proc_error', f"LINUX_进程异常指标", ["instance", "A00_iid", "iexe", "iparam", "icwd"],registry=REGISTRY) 55 | linux_proc_info_list = ["instance", "A00_iid", "iexe", "iparam", "icwd", "pid", "name", "status", "is_running", "exe", "cmdline", "parent", "username", "port"] 56 | linux_proc_info = Gauge("linux_proc_info", "LINUX_进程信息指标", linux_proc_info_list,registry=REGISTRY) 57 | metric_list = ["io_read_count","io_write_count","io_read_bytes","io_write_bytes","cpu_user","cpu_system","cpu_children_user","cpu_children_system","cpu_iowait","memory_rss","memory_vms","memory_shared","memory_swap","memory_text","memory_data","num_open_files","num_fds_limit","num_fds","cpu_num","num_threads","num_children","cpu_percent","memory_percent","durn"] 58 | 59 | metric_dict = {} 60 | for li in metric_list: 61 | metric_dict[li] = {} 62 | 63 | instance = cfg['instance'] 64 | A00_iid = cfg['instance'] 65 | inum = 0 66 | cpu_count = psutil.cpu_count() 67 | for app in cfg['apps']: 68 | iexe = app['iexe'] 69 | iparam = app['iparam'] 70 | icwd = app['icwd'] 71 | proc_app = [i for i in psutil.process_iter() if icwd == i.cwd() and iparam in i.cmdline() and iexe in i.cmdline()] 72 | if len(proc_app) >= 1: 73 | inum = inum + 1 74 | if len(proc_app) > 1: 75 | pids = [i for i in proc_app if i.ppid() == 1] 76 | if len(pids) >= 1: 77 | appinfo = pids[0] 78 | else: 79 | app_pid = Counter([i.ppid() for i in proc_app]).most_common(1)[0][0] 80 | appinfo = psutil.Process(app_pid) 81 | print(iexe,iparam,'ppid:',app_pid) 82 | if appinfo in proc_app: 83 | pass 84 | else: 85 | # 进程有多个,父进程不在列表中,取列表中的第一个监控 86 | appinfo = proc_app[0] 87 | #linux_proc_error.labels(instance, A00_iid, iexe, iparam, icwd).set(len(proc_app)) 88 | #continue 89 | else: 90 | appinfo = proc_app[0] 91 | pid = appinfo.pid 92 | name = appinfo.name() 93 | status = appinfo.status() 94 | is_running = appinfo.is_running() 95 | exe = appinfo.exe() 96 | cmdline = ' '.join(appinfo.cmdline()) 97 | parent = f'{appinfo.parent().pid}/{appinfo.parent().name()}' 98 | durn = datetime.datetime.now().timestamp() - appinfo.create_time() 99 | username = appinfo.username() 100 | connections = appinfo.connections('all') 101 | port = '/'.join(sorted([f'{x.laddr.port}' for x in connections if x.status == 'LISTEN'],key=int)) 102 | linux_proc_info.labels(instance, A00_iid, iexe, iparam, icwd, pid, name, status, is_running, exe, cmdline, parent, username, port).set(1) 103 | 104 | io_counters = appinfo.io_counters() 105 | metric_dict["io_read_count"][pid] = io_counters.read_count 106 | metric_dict["io_write_count"][pid] = io_counters.write_count 107 | metric_dict["io_read_bytes"][pid] = io_counters.read_bytes 108 | metric_dict["io_write_bytes"][pid] = io_counters.write_bytes 109 | 110 | cpu_times = appinfo.cpu_times() 111 | metric_dict["cpu_user"][pid] = cpu_times.user 112 | metric_dict["cpu_system"][pid] = cpu_times.system 113 | metric_dict["cpu_children_user"][pid] = cpu_times.children_user 114 | metric_dict["cpu_children_system"][pid] = cpu_times.children_system 115 | metric_dict["cpu_iowait"][pid] = cpu_times.iowait 116 | 117 | memory_info = appinfo.memory_full_info() 118 | metric_dict["memory_rss"][pid] = memory_info.rss 119 | metric_dict["memory_vms"][pid] = memory_info.vms 120 | metric_dict["memory_shared"][pid] = memory_info.shared 121 | metric_dict["memory_swap"][pid] = memory_info.swap 122 | metric_dict["memory_text"][pid] = memory_info.text 123 | metric_dict["memory_data"][pid] = memory_info.data 124 | 125 | metric_dict["num_open_files"][pid] = len(appinfo.open_files()) 126 | metric_dict["num_fds_limit"][pid] = appinfo.rlimit(psutil.RLIMIT_NOFILE)[0] 127 | metric_dict["num_fds"][pid] = appinfo.num_fds() 128 | metric_dict["cpu_num"][pid] = appinfo.cpu_num() 129 | metric_dict["num_threads"][pid] = appinfo.num_threads() 130 | metric_dict["num_children"][pid] = len(appinfo.children()) 131 | metric_dict["cpu_percent"][pid] = appinfo.cpu_percent(interval=1) 132 | #metric_dict["cpu_total_percent"][pid] = round(metric_dict["cpu_percent"][pid] / (cpu_count * 100),2) * 100 133 | metric_dict["memory_percent"][pid] = appinfo.memory_percent() 134 | metric_dict["durn"][pid] = datetime.datetime.now().timestamp() - appinfo.create_time() 135 | 136 | connections_sum = Counter([con.status for con in connections]) 137 | for k,v in connections_sum.items(): 138 | if f'conn_{k.lower()}' not in metric_dict: 139 | metric_dict[f'conn_{k.lower()}'] = {pid:v} 140 | else: 141 | metric_dict[f'conn_{k.lower()}'][pid] = v 142 | else: 143 | linux_proc_error.labels(instance, A00_iid, iexe, iparam, icwd).set(len(proc_app)) 144 | #print(inum, metric_dict) 145 | if inum != 0: 146 | for mk,mv in metric_dict.items(): 147 | linux_proc_metric = Gauge(f'linux_proc_{mk}', f"LINUX_进程指标:{mk}", ["instance", "A00_iid", "pid"],registry=REGISTRY) 148 | for ik,iv in mv.items(): 149 | linux_proc_metric.labels(instance, A00_iid, ik).set(iv) 150 | 151 | push_to_gateway('172.23.0.83:9091', job='push_linux_proc', grouping_key={'instance': instance}, registry=REGISTRY) 152 | -------------------------------------------------------------------------------- /linux_proc_monit/pushgateway/进程监控-1655180951178.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [ 3 | { 4 | "name": "DS__VICTORIAMETRICS", 5 | "label": " VictoriaMetrics", 6 | "description": "", 7 | "type": "datasource", 8 | "pluginId": "prometheus", 9 | "pluginName": "Prometheus" 10 | } 11 | ], 12 | "__requires": [ 13 | { 14 | "type": "grafana", 15 | "id": "grafana", 16 | "name": "Grafana", 17 | "version": "7.5.11" 18 | }, 19 | { 20 | "type": "panel", 21 | "id": "graph", 22 | "name": "Graph", 23 | "version": "" 24 | }, 25 | { 26 | "type": "datasource", 27 | "id": "prometheus", 28 | "name": "Prometheus", 29 | "version": "1.0.0" 30 | }, 31 | { 32 | "type": "panel", 33 | "id": "table-old", 34 | "name": "Table (old)", 35 | "version": "" 36 | } 37 | ], 38 | "annotations": { 39 | "list": [ 40 | { 41 | "$$hashKey": "object:15", 42 | "builtIn": 1, 43 | "datasource": "-- Grafana --", 44 | "enable": true, 45 | "hide": true, 46 | "iconColor": "#e0752d", 47 | "limit": 100, 48 | "name": "Annotations & Alerts", 49 | "showIn": 0, 50 | "tags": [], 51 | "type": "dashboard" 52 | } 53 | ] 54 | }, 55 | "description": "", 56 | "editable": true, 57 | "gnetId": 7362, 58 | "graphTooltip": 0, 59 | "id": null, 60 | "iteration": 1655180724286, 61 | "links": [ 62 | { 63 | "$$hashKey": "object:1359", 64 | "asDropdown": true, 65 | "icon": "external link", 66 | "tags": [ 67 | "阿里云" 68 | ], 69 | "targetBlank": true, 70 | "type": "dashboards" 71 | } 72 | ], 73 | "panels": [ 74 | { 75 | "collapsed": false, 76 | "datasource": "${DS__VICTORIAMETRICS}", 77 | "gridPos": { 78 | "h": 1, 79 | "w": 24, 80 | "x": 0, 81 | "y": 0 82 | }, 83 | "id": 382, 84 | "panels": [], 85 | "repeat": null, 86 | "title": "总览", 87 | "type": "row" 88 | }, 89 | { 90 | "columns": [], 91 | "datasource": "${DS__VICTORIAMETRICS}", 92 | "fieldConfig": { 93 | "defaults": {}, 94 | "overrides": [] 95 | }, 96 | "fontSize": "80%", 97 | "gridPos": { 98 | "h": 6, 99 | "w": 24, 100 | "x": 0, 101 | "y": 1 102 | }, 103 | "id": 408, 104 | "pageSize": 20, 105 | "showHeader": true, 106 | "sort": { 107 | "col": 9, 108 | "desc": true 109 | }, 110 | "styles": [ 111 | { 112 | "$$hashKey": "object:3621", 113 | "alias": "主机(连接到明细)", 114 | "align": "auto", 115 | "colorMode": null, 116 | "colors": [ 117 | "rgba(245, 54, 54, 0.9)", 118 | "rgba(237, 129, 40, 0.89)", 119 | "rgba(50, 172, 45, 0.97)" 120 | ], 121 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 122 | "decimals": 2, 123 | "link": true, 124 | "linkTargetBlank": true, 125 | "linkTooltip": "${__cell_2},实例:${__cell_1}", 126 | "linkUrl": "/d/1w7YjB_Zz/a-li-yun-ecsxi-tong-zi-yuan-jian-kong-ming-xi?orgId=1&var-A01_iaccount=All&var-A02_iname=All&var-A00_iid=${__cell_1}", 127 | "mappingType": 1, 128 | "pattern": "A02_iname", 129 | "thresholds": [], 130 | "type": "number", 131 | "unit": "short" 132 | }, 133 | { 134 | "$$hashKey": "object:3632", 135 | "alias": "所属组", 136 | "align": "auto", 137 | "colorMode": null, 138 | "colors": [ 139 | "rgba(245, 54, 54, 0.9)", 140 | "rgba(237, 129, 40, 0.89)", 141 | "rgba(50, 172, 45, 0.97)" 142 | ], 143 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 144 | "decimals": 2, 145 | "mappingType": 1, 146 | "pattern": "A03_igroup", 147 | "thresholds": [], 148 | "type": "number", 149 | "unit": "short" 150 | }, 151 | { 152 | "$$hashKey": "object:3643", 153 | "alias": "环境", 154 | "align": "auto", 155 | "colorMode": null, 156 | "colors": [ 157 | "rgba(245, 54, 54, 0.9)", 158 | "rgba(237, 129, 40, 0.89)", 159 | "rgba(50, 172, 45, 0.97)" 160 | ], 161 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 162 | "decimals": 2, 163 | "mappingType": 1, 164 | "pattern": "A05_ienv", 165 | "thresholds": [], 166 | "type": "number", 167 | "unit": "short" 168 | }, 169 | { 170 | "$$hashKey": "object:3665", 171 | "alias": "i路径", 172 | "align": "auto", 173 | "colorMode": null, 174 | "colors": [ 175 | "rgba(245, 54, 54, 0.9)", 176 | "rgba(237, 129, 40, 0.89)", 177 | "rgba(50, 172, 45, 0.97)" 178 | ], 179 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 180 | "decimals": 2, 181 | "mappingType": 1, 182 | "pattern": "icwd", 183 | "preserveFormat": false, 184 | "sanitize": false, 185 | "thresholds": [], 186 | "type": "string", 187 | "unit": "dateTimeFromNow" 188 | }, 189 | { 190 | "$$hashKey": "object:3676", 191 | "alias": "端口", 192 | "align": "auto", 193 | "colorMode": null, 194 | "colors": [ 195 | "rgba(245, 54, 54, 0.9)", 196 | "rgba(237, 129, 40, 0.89)", 197 | "rgba(50, 172, 45, 0.97)" 198 | ], 199 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 200 | "decimals": 2, 201 | "mappingType": 1, 202 | "pattern": "port", 203 | "thresholds": [], 204 | "type": "string", 205 | "unit": "short" 206 | }, 207 | { 208 | "$$hashKey": "object:3687", 209 | "alias": "进程名", 210 | "align": "auto", 211 | "colorMode": null, 212 | "colors": [ 213 | "rgba(245, 54, 54, 0.9)", 214 | "rgba(237, 129, 40, 0.89)", 215 | "rgba(50, 172, 45, 0.97)" 216 | ], 217 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 218 | "decimals": 2, 219 | "mappingType": 1, 220 | "pattern": "name", 221 | "thresholds": [], 222 | "type": "number", 223 | "unit": "short" 224 | }, 225 | { 226 | "$$hashKey": "object:3698", 227 | "alias": "i程序", 228 | "align": "auto", 229 | "colorMode": null, 230 | "colors": [ 231 | "rgba(245, 54, 54, 0.9)", 232 | "rgba(237, 129, 40, 0.89)", 233 | "rgba(50, 172, 45, 0.97)" 234 | ], 235 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 236 | "decimals": 2, 237 | "mappingType": 1, 238 | "pattern": "ifile", 239 | "thresholds": [], 240 | "type": "number", 241 | "unit": "short" 242 | }, 243 | { 244 | "$$hashKey": "object:3976", 245 | "alias": "PID/PPID", 246 | "align": "auto", 247 | "colorMode": null, 248 | "colors": [ 249 | "rgba(245, 54, 54, 0.9)", 250 | "rgba(237, 129, 40, 0.89)", 251 | "rgba(50, 172, 45, 0.97)" 252 | ], 253 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 254 | "decimals": 2, 255 | "link": true, 256 | "linkTooltip": "${__cell_17}", 257 | "mappingType": 1, 258 | "pattern": "pid", 259 | "thresholds": [], 260 | "type": "string", 261 | "unit": "none" 262 | }, 263 | { 264 | "$$hashKey": "object:1065", 265 | "alias": "持续时间", 266 | "align": "auto", 267 | "colorMode": "cell", 268 | "colors": [ 269 | "rgba(245, 54, 54, 0.9)", 270 | "rgba(237, 129, 40, 0.89)", 271 | "rgba(50, 172, 45, 0.97)" 272 | ], 273 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 274 | "decimals": 1, 275 | "mappingType": 1, 276 | "pattern": "Value", 277 | "thresholds": [ 278 | "3600", 279 | "36000" 280 | ], 281 | "type": "number", 282 | "unit": "s" 283 | }, 284 | { 285 | "$$hashKey": "object:1224", 286 | "alias": "IP", 287 | "align": "auto", 288 | "colorMode": null, 289 | "colors": [ 290 | "rgba(245, 54, 54, 0.9)", 291 | "rgba(237, 129, 40, 0.89)", 292 | "rgba(50, 172, 45, 0.97)" 293 | ], 294 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 295 | "decimals": 2, 296 | "mappingType": 1, 297 | "pattern": "iintip", 298 | "thresholds": [], 299 | "type": "string", 300 | "unit": "short" 301 | }, 302 | { 303 | "$$hashKey": "object:3185", 304 | "alias": "", 305 | "align": "right", 306 | "colorMode": null, 307 | "colors": [ 308 | "rgba(245, 54, 54, 0.9)", 309 | "rgba(237, 129, 40, 0.89)", 310 | "rgba(50, 172, 45, 0.97)" 311 | ], 312 | "decimals": 2, 313 | "pattern": "/.*/", 314 | "thresholds": [], 315 | "type": "hidden", 316 | "unit": "short" 317 | } 318 | ], 319 | "targets": [ 320 | { 321 | "expr": "total:linux_proc_info{A00_iid=~\"$A00_iid\",pid=~\"$pid\"} * on (A00_iid,pid) group_left linux_proc_durn", 322 | "format": "table", 323 | "hide": false, 324 | "instant": true, 325 | "interval": "", 326 | "legendFormat": "", 327 | "refId": "A" 328 | } 329 | ], 330 | "timeFrom": null, 331 | "timeShift": null, 332 | "title": "进程监控基础信息", 333 | "transform": "table", 334 | "type": "table-old" 335 | }, 336 | { 337 | "collapsed": false, 338 | "datasource": "${DS__VICTORIAMETRICS}", 339 | "gridPos": { 340 | "h": 1, 341 | "w": 24, 342 | "x": 0, 343 | "y": 7 344 | }, 345 | "id": 410, 346 | "panels": [], 347 | "repeat": null, 348 | "title": "进程信息明细", 349 | "type": "row" 350 | }, 351 | { 352 | "aliasColors": {}, 353 | "bars": false, 354 | "dashLength": 10, 355 | "dashes": false, 356 | "datasource": "${DS__VICTORIAMETRICS}", 357 | "decimals": null, 358 | "description": "", 359 | "editable": true, 360 | "error": false, 361 | "fieldConfig": { 362 | "defaults": { 363 | "links": [] 364 | }, 365 | "overrides": [] 366 | }, 367 | "fill": 0, 368 | "fillGradient": 0, 369 | "grid": {}, 370 | "gridPos": { 371 | "h": 8, 372 | "w": 15, 373 | "x": 0, 374 | "y": 8 375 | }, 376 | "hiddenSeries": false, 377 | "id": 406, 378 | "legend": { 379 | "alignAsTable": true, 380 | "avg": true, 381 | "current": true, 382 | "hideEmpty": true, 383 | "hideZero": false, 384 | "max": true, 385 | "min": false, 386 | "rightSide": true, 387 | "show": true, 388 | "sort": "current", 389 | "sortDesc": true, 390 | "total": false, 391 | "values": true 392 | }, 393 | "lines": true, 394 | "linewidth": 2, 395 | "links": [], 396 | "nullPointMode": "null", 397 | "options": { 398 | "alertThreshold": true 399 | }, 400 | "percentage": false, 401 | "pluginVersion": "7.5.11", 402 | "pointradius": 5, 403 | "points": false, 404 | "renderer": "flot", 405 | "seriesOverrides": [], 406 | "spaceLength": 10, 407 | "stack": false, 408 | "steppedLine": false, 409 | "targets": [ 410 | { 411 | "expr": "linux_proc_memory_rss{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 412 | "format": "time_series", 413 | "interval": "", 414 | "intervalFactor": 1, 415 | "legendFormat": "{{pid}}:常驻内存", 416 | "refId": "A", 417 | "step": 4 418 | }, 419 | { 420 | "expr": "linux_proc_memory_vms{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 421 | "format": "time_series", 422 | "interval": "", 423 | "intervalFactor": 1, 424 | "legendFormat": "{{pid}}:虚拟内存", 425 | "refId": "B", 426 | "step": 4 427 | }, 428 | { 429 | "expr": "linux_proc_memory_shared{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 430 | "format": "time_series", 431 | "interval": "", 432 | "intervalFactor": 1, 433 | "legendFormat": "{{pid}}:共享内存", 434 | "refId": "C", 435 | "step": 4 436 | }, 437 | { 438 | "expr": "linux_proc_memory_swap{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 439 | "format": "time_series", 440 | "interval": "", 441 | "intervalFactor": 1, 442 | "legendFormat": "{{pid}}:交换内存", 443 | "refId": "D", 444 | "step": 4 445 | }, 446 | { 447 | "expr": "linux_proc_memory_data{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 448 | "format": "time_series", 449 | "interval": "", 450 | "intervalFactor": 1, 451 | "legendFormat": "{{pid}}:除代码内存", 452 | "refId": "E", 453 | "step": 4 454 | }, 455 | { 456 | "expr": "linux_proc_memory_text{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 457 | "format": "time_series", 458 | "interval": "", 459 | "intervalFactor": 1, 460 | "legendFormat": "{{pid}}:代码内存", 461 | "refId": "F", 462 | "step": 4 463 | }, 464 | { 465 | "expr": "linux_proc_memory_percent{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 466 | "format": "time_series", 467 | "hide": true, 468 | "interval": "", 469 | "intervalFactor": 1, 470 | "legendFormat": "{{pid}}:内存占用比例", 471 | "refId": "G", 472 | "step": 4 473 | } 474 | ], 475 | "thresholds": [], 476 | "timeFrom": null, 477 | "timeRegions": [], 478 | "timeShift": null, 479 | "title": "进程分类内存占用量", 480 | "tooltip": { 481 | "msResolution": false, 482 | "shared": true, 483 | "sort": 2, 484 | "value_type": "individual" 485 | }, 486 | "type": "graph", 487 | "xaxis": { 488 | "buckets": null, 489 | "mode": "time", 490 | "name": null, 491 | "show": true, 492 | "values": [] 493 | }, 494 | "yaxes": [ 495 | { 496 | "$$hashKey": "object:542", 497 | "format": "bytes", 498 | "label": "", 499 | "logBase": 1, 500 | "max": null, 501 | "min": null, 502 | "show": true 503 | }, 504 | { 505 | "$$hashKey": "object:543", 506 | "decimals": null, 507 | "format": "percent", 508 | "label": "内存占用比例", 509 | "logBase": 1, 510 | "max": "100", 511 | "min": "0", 512 | "show": false 513 | } 514 | ], 515 | "yaxis": { 516 | "align": false, 517 | "alignLevel": null 518 | } 519 | }, 520 | { 521 | "aliasColors": {}, 522 | "bars": false, 523 | "dashLength": 10, 524 | "dashes": false, 525 | "datasource": "${DS__VICTORIAMETRICS}", 526 | "decimals": null, 527 | "description": "", 528 | "editable": true, 529 | "error": false, 530 | "fieldConfig": { 531 | "defaults": { 532 | "links": [] 533 | }, 534 | "overrides": [] 535 | }, 536 | "fill": 0, 537 | "fillGradient": 0, 538 | "grid": {}, 539 | "gridPos": { 540 | "h": 8, 541 | "w": 9, 542 | "x": 15, 543 | "y": 8 544 | }, 545 | "hiddenSeries": false, 546 | "id": 414, 547 | "legend": { 548 | "alignAsTable": true, 549 | "avg": true, 550 | "current": true, 551 | "hideEmpty": true, 552 | "hideZero": false, 553 | "max": true, 554 | "min": false, 555 | "rightSide": false, 556 | "show": true, 557 | "sort": "current", 558 | "sortDesc": true, 559 | "total": false, 560 | "values": true 561 | }, 562 | "lines": true, 563 | "linewidth": 2, 564 | "links": [], 565 | "nullPointMode": "null", 566 | "options": { 567 | "alertThreshold": true 568 | }, 569 | "percentage": false, 570 | "pluginVersion": "7.5.11", 571 | "pointradius": 5, 572 | "points": false, 573 | "renderer": "flot", 574 | "seriesOverrides": [], 575 | "spaceLength": 10, 576 | "stack": false, 577 | "steppedLine": false, 578 | "targets": [ 579 | { 580 | "expr": "linux_proc_memory_percent{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 581 | "format": "time_series", 582 | "interval": "", 583 | "intervalFactor": 1, 584 | "legendFormat": "{{pid}}:内存占比", 585 | "refId": "G", 586 | "step": 4 587 | } 588 | ], 589 | "thresholds": [], 590 | "timeFrom": null, 591 | "timeRegions": [], 592 | "timeShift": null, 593 | "title": "进程实际内存占用比例", 594 | "tooltip": { 595 | "msResolution": false, 596 | "shared": true, 597 | "sort": 2, 598 | "value_type": "individual" 599 | }, 600 | "type": "graph", 601 | "xaxis": { 602 | "buckets": null, 603 | "mode": "time", 604 | "name": null, 605 | "show": true, 606 | "values": [] 607 | }, 608 | "yaxes": [ 609 | { 610 | "$$hashKey": "object:542", 611 | "format": "percent", 612 | "label": "", 613 | "logBase": 1, 614 | "max": null, 615 | "min": null, 616 | "show": true 617 | }, 618 | { 619 | "$$hashKey": "object:543", 620 | "decimals": null, 621 | "format": "percent", 622 | "label": "内存占用比例", 623 | "logBase": 1, 624 | "max": "100", 625 | "min": "0", 626 | "show": false 627 | } 628 | ], 629 | "yaxis": { 630 | "align": false, 631 | "alignLevel": null 632 | } 633 | }, 634 | { 635 | "aliasColors": {}, 636 | "bars": false, 637 | "dashLength": 10, 638 | "dashes": false, 639 | "datasource": "${DS__VICTORIAMETRICS}", 640 | "decimals": null, 641 | "description": "", 642 | "editable": true, 643 | "error": false, 644 | "fieldConfig": { 645 | "defaults": { 646 | "links": [] 647 | }, 648 | "overrides": [] 649 | }, 650 | "fill": 0, 651 | "fillGradient": 0, 652 | "grid": {}, 653 | "gridPos": { 654 | "h": 8, 655 | "w": 15, 656 | "x": 0, 657 | "y": 16 658 | }, 659 | "hiddenSeries": false, 660 | "id": 411, 661 | "legend": { 662 | "alignAsTable": true, 663 | "avg": true, 664 | "current": true, 665 | "hideEmpty": true, 666 | "hideZero": true, 667 | "max": true, 668 | "min": false, 669 | "rightSide": true, 670 | "show": true, 671 | "sort": "current", 672 | "sortDesc": true, 673 | "total": false, 674 | "values": true 675 | }, 676 | "lines": true, 677 | "linewidth": 1, 678 | "links": [], 679 | "nullPointMode": "null", 680 | "options": { 681 | "alertThreshold": true 682 | }, 683 | "percentage": false, 684 | "pluginVersion": "7.5.11", 685 | "pointradius": 5, 686 | "points": false, 687 | "renderer": "flot", 688 | "seriesOverrides": [], 689 | "spaceLength": 10, 690 | "stack": false, 691 | "steppedLine": false, 692 | "targets": [ 693 | { 694 | "expr": "irate(linux_proc_cpu_user{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 695 | "format": "time_series", 696 | "interval": "", 697 | "intervalFactor": 1, 698 | "legendFormat": "{{pid}}:用户cpu占比", 699 | "refId": "A", 700 | "step": 4 701 | }, 702 | { 703 | "expr": "irate(linux_proc_cpu_system{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 704 | "format": "time_series", 705 | "interval": "", 706 | "intervalFactor": 1, 707 | "legendFormat": "{{pid}}:系统cpu占比", 708 | "refId": "B", 709 | "step": 4 710 | }, 711 | { 712 | "expr": "irate(linux_proc_cpu_iowait{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 713 | "format": "time_series", 714 | "hide": false, 715 | "interval": "", 716 | "intervalFactor": 1, 717 | "legendFormat": "{{pid}}:iowait_cpu占比", 718 | "refId": "C", 719 | "step": 4 720 | }, 721 | { 722 | "expr": "irate(linux_proc_cpu_children_user{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 723 | "format": "time_series", 724 | "hide": false, 725 | "interval": "", 726 | "intervalFactor": 1, 727 | "legendFormat": "{{pid}}:子进程用户_cpu占比", 728 | "refId": "D", 729 | "step": 4 730 | }, 731 | { 732 | "expr": "irate(linux_proc_cpu_children_system{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 733 | "format": "time_series", 734 | "hide": false, 735 | "interval": "", 736 | "intervalFactor": 1, 737 | "legendFormat": "{{pid}}:子进程系统_cpu占比", 738 | "refId": "E", 739 | "step": 4 740 | }, 741 | { 742 | "expr": "linux_proc_cpu_percent{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}/100", 743 | "format": "time_series", 744 | "hide": false, 745 | "interval": "", 746 | "intervalFactor": 1, 747 | "legendFormat": "{{pid}}:总使用CPU占比", 748 | "refId": "F", 749 | "step": 4 750 | } 751 | ], 752 | "thresholds": [], 753 | "timeFrom": null, 754 | "timeRegions": [], 755 | "timeShift": null, 756 | "title": "进程CPU占用 (单核为100%计算,超过100%为使用多核)", 757 | "tooltip": { 758 | "msResolution": false, 759 | "shared": true, 760 | "sort": 2, 761 | "value_type": "individual" 762 | }, 763 | "type": "graph", 764 | "xaxis": { 765 | "buckets": null, 766 | "mode": "time", 767 | "name": null, 768 | "show": true, 769 | "values": [] 770 | }, 771 | "yaxes": [ 772 | { 773 | "$$hashKey": "object:542", 774 | "decimals": null, 775 | "format": "percentunit", 776 | "label": "", 777 | "logBase": 1, 778 | "max": null, 779 | "min": null, 780 | "show": true 781 | }, 782 | { 783 | "$$hashKey": "object:543", 784 | "format": "percent", 785 | "label": "总使用CPU占比", 786 | "logBase": 1, 787 | "max": null, 788 | "min": null, 789 | "show": false 790 | } 791 | ], 792 | "yaxis": { 793 | "align": false, 794 | "alignLevel": null 795 | } 796 | }, 797 | { 798 | "aliasColors": {}, 799 | "bars": false, 800 | "dashLength": 10, 801 | "dashes": false, 802 | "datasource": "${DS__VICTORIAMETRICS}", 803 | "fieldConfig": { 804 | "defaults": { 805 | "links": [] 806 | }, 807 | "overrides": [] 808 | }, 809 | "fill": 1, 810 | "fillGradient": 0, 811 | "gridPos": { 812 | "h": 8, 813 | "w": 9, 814 | "x": 15, 815 | "y": 16 816 | }, 817 | "hiddenSeries": false, 818 | "id": 417, 819 | "legend": { 820 | "avg": false, 821 | "current": false, 822 | "max": false, 823 | "min": false, 824 | "show": false, 825 | "total": false, 826 | "values": false 827 | }, 828 | "lines": true, 829 | "linewidth": 1, 830 | "links": [], 831 | "nullPointMode": "connected", 832 | "options": { 833 | "alertThreshold": true 834 | }, 835 | "percentage": false, 836 | "pluginVersion": "7.5.11", 837 | "pointradius": 5, 838 | "points": false, 839 | "renderer": "flot", 840 | "seriesOverrides": [], 841 | "spaceLength": 10, 842 | "stack": false, 843 | "steppedLine": false, 844 | "targets": [ 845 | { 846 | "expr": "ali_ecs_cpu_total{A00_iid=~\"$A00_iid\"}", 847 | "format": "time_series", 848 | "interval": "", 849 | "intervalFactor": 1, 850 | "legendFormat": "cpu_total", 851 | "refId": "A" 852 | } 853 | ], 854 | "thresholds": [], 855 | "timeFrom": null, 856 | "timeRegions": [], 857 | "timeShift": null, 858 | "title": "ECS CPU总使用率(所有核平均)", 859 | "tooltip": { 860 | "shared": true, 861 | "sort": 0, 862 | "value_type": "individual" 863 | }, 864 | "type": "graph", 865 | "xaxis": { 866 | "buckets": null, 867 | "mode": "time", 868 | "name": null, 869 | "show": true, 870 | "values": [] 871 | }, 872 | "yaxes": [ 873 | { 874 | "$$hashKey": "object:1462", 875 | "format": "percent", 876 | "label": null, 877 | "logBase": 1, 878 | "max": null, 879 | "min": null, 880 | "show": true 881 | }, 882 | { 883 | "$$hashKey": "object:1463", 884 | "format": "short", 885 | "label": null, 886 | "logBase": 1, 887 | "max": null, 888 | "min": null, 889 | "show": false 890 | } 891 | ], 892 | "yaxis": { 893 | "align": false, 894 | "alignLevel": null 895 | } 896 | }, 897 | { 898 | "aliasColors": {}, 899 | "bars": false, 900 | "dashLength": 10, 901 | "dashes": false, 902 | "datasource": "${DS__VICTORIAMETRICS}", 903 | "decimals": null, 904 | "description": "", 905 | "editable": true, 906 | "error": false, 907 | "fieldConfig": { 908 | "defaults": { 909 | "links": [] 910 | }, 911 | "overrides": [] 912 | }, 913 | "fill": 0, 914 | "fillGradient": 1, 915 | "grid": {}, 916 | "gridPos": { 917 | "h": 9, 918 | "w": 12, 919 | "x": 0, 920 | "y": 24 921 | }, 922 | "hiddenSeries": false, 923 | "id": 412, 924 | "legend": { 925 | "alignAsTable": true, 926 | "avg": true, 927 | "current": true, 928 | "hideEmpty": true, 929 | "hideZero": false, 930 | "max": true, 931 | "min": false, 932 | "rightSide": true, 933 | "show": true, 934 | "sort": "current", 935 | "sortDesc": true, 936 | "total": false, 937 | "values": true 938 | }, 939 | "lines": true, 940 | "linewidth": 2, 941 | "links": [], 942 | "nullPointMode": "null", 943 | "options": { 944 | "alertThreshold": true 945 | }, 946 | "percentage": false, 947 | "pluginVersion": "7.5.11", 948 | "pointradius": 5, 949 | "points": false, 950 | "renderer": "flot", 951 | "seriesOverrides": [], 952 | "spaceLength": 10, 953 | "stack": false, 954 | "steppedLine": false, 955 | "targets": [ 956 | { 957 | "expr": "irate(linux_proc_io_read_count{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 958 | "format": "time_series", 959 | "instant": false, 960 | "interval": "", 961 | "intervalFactor": 1, 962 | "legendFormat": "{{pid}}:读IO", 963 | "refId": "A", 964 | "step": 4 965 | }, 966 | { 967 | "expr": "irate(linux_proc_io_write_count{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 968 | "format": "time_series", 969 | "interval": "", 970 | "intervalFactor": 1, 971 | "legendFormat": "{{pid}}:写IO", 972 | "refId": "B", 973 | "step": 4 974 | } 975 | ], 976 | "thresholds": [], 977 | "timeFrom": null, 978 | "timeRegions": [], 979 | "timeShift": null, 980 | "title": "进程IOPS", 981 | "tooltip": { 982 | "msResolution": false, 983 | "shared": true, 984 | "sort": 2, 985 | "value_type": "individual" 986 | }, 987 | "type": "graph", 988 | "xaxis": { 989 | "buckets": null, 990 | "mode": "time", 991 | "name": null, 992 | "show": true, 993 | "values": [] 994 | }, 995 | "yaxes": [ 996 | { 997 | "$$hashKey": "object:542", 998 | "decimals": null, 999 | "format": "iops", 1000 | "label": "", 1001 | "logBase": 1, 1002 | "max": null, 1003 | "min": null, 1004 | "show": true 1005 | }, 1006 | { 1007 | "$$hashKey": "object:543", 1008 | "format": "Bps", 1009 | "label": "读写字节", 1010 | "logBase": 1, 1011 | "max": null, 1012 | "min": null, 1013 | "show": false 1014 | } 1015 | ], 1016 | "yaxis": { 1017 | "align": false, 1018 | "alignLevel": null 1019 | } 1020 | }, 1021 | { 1022 | "aliasColors": {}, 1023 | "bars": false, 1024 | "dashLength": 10, 1025 | "dashes": false, 1026 | "datasource": "${DS__VICTORIAMETRICS}", 1027 | "decimals": null, 1028 | "description": "", 1029 | "editable": true, 1030 | "error": false, 1031 | "fieldConfig": { 1032 | "defaults": { 1033 | "links": [] 1034 | }, 1035 | "overrides": [] 1036 | }, 1037 | "fill": 0, 1038 | "fillGradient": 1, 1039 | "grid": {}, 1040 | "gridPos": { 1041 | "h": 9, 1042 | "w": 12, 1043 | "x": 12, 1044 | "y": 24 1045 | }, 1046 | "hiddenSeries": false, 1047 | "id": 415, 1048 | "legend": { 1049 | "alignAsTable": true, 1050 | "avg": true, 1051 | "current": true, 1052 | "hideEmpty": true, 1053 | "hideZero": false, 1054 | "max": true, 1055 | "min": false, 1056 | "rightSide": true, 1057 | "show": true, 1058 | "sort": "current", 1059 | "sortDesc": true, 1060 | "total": false, 1061 | "values": true 1062 | }, 1063 | "lines": true, 1064 | "linewidth": 1, 1065 | "links": [], 1066 | "nullPointMode": "null", 1067 | "options": { 1068 | "alertThreshold": true 1069 | }, 1070 | "percentage": false, 1071 | "pluginVersion": "7.5.11", 1072 | "pointradius": 5, 1073 | "points": false, 1074 | "renderer": "flot", 1075 | "seriesOverrides": [], 1076 | "spaceLength": 10, 1077 | "stack": false, 1078 | "steppedLine": false, 1079 | "targets": [ 1080 | { 1081 | "expr": "irate(linux_proc_io_read_bytes{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 1082 | "format": "time_series", 1083 | "hide": false, 1084 | "interval": "", 1085 | "intervalFactor": 1, 1086 | "legendFormat": "{{pid}}:读字节", 1087 | "refId": "C", 1088 | "step": 4 1089 | }, 1090 | { 1091 | "expr": "irate(linux_proc_io_write_bytes{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}[5m])", 1092 | "format": "time_series", 1093 | "hide": false, 1094 | "interval": "", 1095 | "intervalFactor": 1, 1096 | "legendFormat": "{{pid}}:写字节", 1097 | "refId": "D", 1098 | "step": 4 1099 | } 1100 | ], 1101 | "thresholds": [], 1102 | "timeFrom": null, 1103 | "timeRegions": [], 1104 | "timeShift": null, 1105 | "title": "进程IO读写", 1106 | "tooltip": { 1107 | "msResolution": false, 1108 | "shared": true, 1109 | "sort": 2, 1110 | "value_type": "individual" 1111 | }, 1112 | "type": "graph", 1113 | "xaxis": { 1114 | "buckets": null, 1115 | "mode": "time", 1116 | "name": null, 1117 | "show": true, 1118 | "values": [] 1119 | }, 1120 | "yaxes": [ 1121 | { 1122 | "$$hashKey": "object:542", 1123 | "decimals": null, 1124 | "format": "Bps", 1125 | "label": "", 1126 | "logBase": 1, 1127 | "max": null, 1128 | "min": null, 1129 | "show": true 1130 | }, 1131 | { 1132 | "$$hashKey": "object:543", 1133 | "format": "Bps", 1134 | "label": "读写字节", 1135 | "logBase": 1, 1136 | "max": null, 1137 | "min": null, 1138 | "show": false 1139 | } 1140 | ], 1141 | "yaxis": { 1142 | "align": false, 1143 | "alignLevel": null 1144 | } 1145 | }, 1146 | { 1147 | "aliasColors": { 1148 | "2060:文件描述符上限": "dark-red" 1149 | }, 1150 | "bars": false, 1151 | "dashLength": 10, 1152 | "dashes": false, 1153 | "datasource": "${DS__VICTORIAMETRICS}", 1154 | "decimals": null, 1155 | "description": "", 1156 | "editable": true, 1157 | "error": false, 1158 | "fieldConfig": { 1159 | "defaults": { 1160 | "links": [] 1161 | }, 1162 | "overrides": [] 1163 | }, 1164 | "fill": 0, 1165 | "fillGradient": 0, 1166 | "grid": {}, 1167 | "gridPos": { 1168 | "h": 9, 1169 | "w": 24, 1170 | "x": 0, 1171 | "y": 33 1172 | }, 1173 | "hiddenSeries": false, 1174 | "id": 413, 1175 | "legend": { 1176 | "alignAsTable": true, 1177 | "avg": true, 1178 | "current": true, 1179 | "hideEmpty": true, 1180 | "hideZero": true, 1181 | "max": true, 1182 | "min": false, 1183 | "rightSide": true, 1184 | "show": true, 1185 | "sort": "current", 1186 | "sortDesc": true, 1187 | "total": false, 1188 | "values": true 1189 | }, 1190 | "lines": true, 1191 | "linewidth": 2, 1192 | "links": [], 1193 | "nullPointMode": "null", 1194 | "options": { 1195 | "alertThreshold": true 1196 | }, 1197 | "percentage": false, 1198 | "pluginVersion": "7.5.11", 1199 | "pointradius": 5, 1200 | "points": false, 1201 | "renderer": "flot", 1202 | "seriesOverrides": [ 1203 | { 1204 | "$$hashKey": "object:1269", 1205 | "alias": "/.*文件描述符上限占比/", 1206 | "color": "#FADE2A", 1207 | "lines": false, 1208 | "points": true, 1209 | "yaxis": 2 1210 | } 1211 | ], 1212 | "spaceLength": 10, 1213 | "stack": false, 1214 | "steppedLine": false, 1215 | "targets": [ 1216 | { 1217 | "expr": "linux_proc_num_open_files{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 1218 | "format": "time_series", 1219 | "instant": false, 1220 | "interval": "", 1221 | "intervalFactor": 1, 1222 | "legendFormat": "{{pid}}:打开文件数", 1223 | "refId": "A", 1224 | "step": 4 1225 | }, 1226 | { 1227 | "expr": "linux_proc_num_fds{A00_iid=~\"$A00_iid\",pid=~\"$pid\"} / linux_proc_num_fds_limit{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 1228 | "format": "time_series", 1229 | "interval": "30m", 1230 | "intervalFactor": 1, 1231 | "legendFormat": "{{pid}}:文件描述符上限占比", 1232 | "refId": "B", 1233 | "step": 4 1234 | }, 1235 | { 1236 | "expr": "linux_proc_num_fds{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 1237 | "format": "time_series", 1238 | "hide": false, 1239 | "interval": "", 1240 | "intervalFactor": 1, 1241 | "legendFormat": "{{pid}}:打开文件描述符", 1242 | "refId": "C", 1243 | "step": 4 1244 | }, 1245 | { 1246 | "expr": "linux_proc_num_threads{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 1247 | "format": "time_series", 1248 | "hide": false, 1249 | "interval": "", 1250 | "intervalFactor": 1, 1251 | "legendFormat": "{{pid}}:线程数", 1252 | "refId": "D", 1253 | "step": 4 1254 | }, 1255 | { 1256 | "expr": "linux_proc_num_children{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 1257 | "format": "time_series", 1258 | "hide": false, 1259 | "interval": "", 1260 | "intervalFactor": 1, 1261 | "legendFormat": "{{pid}}:子进程", 1262 | "refId": "E", 1263 | "step": 4 1264 | }, 1265 | { 1266 | "expr": "linux_proc_num_fds_limit{A00_iid=~\"$A00_iid\",pid=~\"$pid\"}", 1267 | "format": "time_series", 1268 | "interval": "", 1269 | "intervalFactor": 1, 1270 | "legendFormat": "{{pid}}:文件描述符上限", 1271 | "refId": "F", 1272 | "step": 4 1273 | } 1274 | ], 1275 | "thresholds": [], 1276 | "timeFrom": null, 1277 | "timeRegions": [], 1278 | "timeShift": null, 1279 | "title": "文件描述符与线程", 1280 | "tooltip": { 1281 | "msResolution": false, 1282 | "shared": true, 1283 | "sort": 2, 1284 | "value_type": "individual" 1285 | }, 1286 | "type": "graph", 1287 | "xaxis": { 1288 | "buckets": null, 1289 | "mode": "time", 1290 | "name": null, 1291 | "show": true, 1292 | "values": [] 1293 | }, 1294 | "yaxes": [ 1295 | { 1296 | "$$hashKey": "object:542", 1297 | "decimals": null, 1298 | "format": "short", 1299 | "label": "", 1300 | "logBase": 1, 1301 | "max": null, 1302 | "min": null, 1303 | "show": true 1304 | }, 1305 | { 1306 | "$$hashKey": "object:543", 1307 | "decimals": null, 1308 | "format": "percentunit", 1309 | "label": "【黄点】文件描述符上限占比", 1310 | "logBase": 1, 1311 | "max": "1", 1312 | "min": "0", 1313 | "show": true 1314 | } 1315 | ], 1316 | "yaxis": { 1317 | "align": false, 1318 | "alignLevel": null 1319 | } 1320 | } 1321 | ], 1322 | "refresh": false, 1323 | "schemaVersion": 27, 1324 | "style": "dark", 1325 | "tags": [ 1326 | "阿里云", 1327 | "Non-ali-API" 1328 | ], 1329 | "templating": { 1330 | "list": [ 1331 | { 1332 | "allValue": null, 1333 | "current": {}, 1334 | "datasource": "${DS__VICTORIAMETRICS}", 1335 | "definition": "label_values(total:linux_proc_info,A01_iaccount)", 1336 | "description": null, 1337 | "error": null, 1338 | "hide": 0, 1339 | "includeAll": true, 1340 | "label": "账号", 1341 | "multi": false, 1342 | "name": "A01_iaccount", 1343 | "options": [], 1344 | "query": { 1345 | "query": "label_values(total:linux_proc_info,A01_iaccount)", 1346 | "refId": " VictoriaMetrics-A01_iaccount-Variable-Query" 1347 | }, 1348 | "refresh": 1, 1349 | "regex": "", 1350 | "skipUrlSync": false, 1351 | "sort": 0, 1352 | "tagValuesQuery": "", 1353 | "tags": [], 1354 | "tagsQuery": "", 1355 | "type": "query", 1356 | "useTags": false 1357 | }, 1358 | { 1359 | "allValue": null, 1360 | "current": {}, 1361 | "datasource": "${DS__VICTORIAMETRICS}", 1362 | "definition": "label_values(total:linux_proc_info{A01_iaccount=~\"$A01_iaccount\"},A05_ienv)", 1363 | "description": null, 1364 | "error": null, 1365 | "hide": 0, 1366 | "includeAll": true, 1367 | "label": "环境", 1368 | "multi": false, 1369 | "name": "A05_ienv", 1370 | "options": [], 1371 | "query": { 1372 | "query": "label_values(total:linux_proc_info{A01_iaccount=~\"$A01_iaccount\"},A05_ienv)", 1373 | "refId": " VictoriaMetrics-A05_ienv-Variable-Query" 1374 | }, 1375 | "refresh": 1, 1376 | "regex": "", 1377 | "skipUrlSync": false, 1378 | "sort": 0, 1379 | "tagValuesQuery": "", 1380 | "tags": [], 1381 | "tagsQuery": "", 1382 | "type": "query", 1383 | "useTags": false 1384 | }, 1385 | { 1386 | "allValue": null, 1387 | "current": {}, 1388 | "datasource": "${DS__VICTORIAMETRICS}", 1389 | "definition": "label_values(total:linux_proc_info{A01_iaccount=~\"$A01_iaccount\",A05_ienv=~\"$A05_ienv\"},A03_igroup)", 1390 | "description": null, 1391 | "error": null, 1392 | "hide": 0, 1393 | "includeAll": true, 1394 | "label": "所属组", 1395 | "multi": false, 1396 | "name": "A03_igroup", 1397 | "options": [], 1398 | "query": { 1399 | "query": "label_values(total:linux_proc_info{A01_iaccount=~\"$A01_iaccount\",A05_ienv=~\"$A05_ienv\"},A03_igroup)", 1400 | "refId": " VictoriaMetrics-A03_igroup-Variable-Query" 1401 | }, 1402 | "refresh": 1, 1403 | "regex": "", 1404 | "skipUrlSync": false, 1405 | "sort": 5, 1406 | "tagValuesQuery": "", 1407 | "tags": [], 1408 | "tagsQuery": "", 1409 | "type": "query", 1410 | "useTags": false 1411 | }, 1412 | { 1413 | "allValue": null, 1414 | "current": {}, 1415 | "datasource": "${DS__VICTORIAMETRICS}", 1416 | "definition": "label_values(total:linux_proc_info{A01_iaccount=~\"$A01_iaccount\",A05_ienv=~\"$A05_ienv\",A03_igroup=~\"$A03_igroup\"},A02_iname)", 1417 | "description": null, 1418 | "error": null, 1419 | "hide": 0, 1420 | "includeAll": true, 1421 | "label": "主机名", 1422 | "multi": false, 1423 | "name": "A02_iname", 1424 | "options": [], 1425 | "query": { 1426 | "query": "label_values(total:linux_proc_info{A01_iaccount=~\"$A01_iaccount\",A05_ienv=~\"$A05_ienv\",A03_igroup=~\"$A03_igroup\"},A02_iname)", 1427 | "refId": " VictoriaMetrics-A02_iname-Variable-Query" 1428 | }, 1429 | "refresh": 1, 1430 | "regex": "", 1431 | "skipUrlSync": false, 1432 | "sort": 5, 1433 | "tagValuesQuery": "", 1434 | "tags": [], 1435 | "tagsQuery": "", 1436 | "type": "query", 1437 | "useTags": false 1438 | }, 1439 | { 1440 | "allValue": null, 1441 | "current": {}, 1442 | "datasource": "${DS__VICTORIAMETRICS}", 1443 | "definition": "label_values(total:linux_proc_info{A01_iaccount=~\"$A01_iaccount\",A05_ienv=~\"$A05_ienv\",A03_igroup=~\"$A03_igroup\",A02_iname=~\"$A02_iname\"},A00_iid)", 1444 | "description": null, 1445 | "error": null, 1446 | "hide": 0, 1447 | "includeAll": true, 1448 | "label": "实例ID", 1449 | "multi": false, 1450 | "name": "A00_iid", 1451 | "options": [], 1452 | "query": { 1453 | "query": "label_values(total:linux_proc_info{A01_iaccount=~\"$A01_iaccount\",A05_ienv=~\"$A05_ienv\",A03_igroup=~\"$A03_igroup\",A02_iname=~\"$A02_iname\"},A00_iid)", 1454 | "refId": " VictoriaMetrics-A00_iid-Variable-Query" 1455 | }, 1456 | "refresh": 1, 1457 | "regex": "", 1458 | "skipUrlSync": false, 1459 | "sort": 5, 1460 | "tagValuesQuery": "", 1461 | "tags": [], 1462 | "tagsQuery": "", 1463 | "type": "query", 1464 | "useTags": false 1465 | }, 1466 | { 1467 | "allValue": null, 1468 | "current": {}, 1469 | "datasource": "${DS__VICTORIAMETRICS}", 1470 | "definition": "label_values(total:linux_proc_info{A01_iaccount=~\"$A01_iaccount\",A05_ienv=~\"$A05_ienv\",A03_igroup=~\"$A03_igroup\",A02_iname=~\"$A02_iname\",A00_iid=~\"$A00_iid\"},pid)", 1471 | "description": null, 1472 | "error": null, 1473 | "hide": 0, 1474 | "includeAll": true, 1475 | "label": "PID", 1476 | "multi": false, 1477 | "name": "pid", 1478 | "options": [], 1479 | "query": { 1480 | "query": "label_values(total:linux_proc_info{A01_iaccount=~\"$A01_iaccount\",A05_ienv=~\"$A05_ienv\",A03_igroup=~\"$A03_igroup\",A02_iname=~\"$A02_iname\",A00_iid=~\"$A00_iid\"},pid)", 1481 | "refId": " VictoriaMetrics-pid-Variable-Query" 1482 | }, 1483 | "refresh": 1, 1484 | "regex": "", 1485 | "skipUrlSync": false, 1486 | "sort": 3, 1487 | "tagValuesQuery": "", 1488 | "tags": [], 1489 | "tagsQuery": "", 1490 | "type": "query", 1491 | "useTags": false 1492 | }, 1493 | { 1494 | "datasource": null, 1495 | "description": null, 1496 | "error": null, 1497 | "filters": [], 1498 | "hide": 0, 1499 | "label": "", 1500 | "name": "Filters", 1501 | "skipUrlSync": false, 1502 | "type": "adhoc" 1503 | } 1504 | ] 1505 | }, 1506 | "time": { 1507 | "from": "now-12h", 1508 | "to": "now" 1509 | }, 1510 | "timepicker": { 1511 | "collapse": false, 1512 | "enable": true, 1513 | "hidden": false, 1514 | "notice": false, 1515 | "now": true, 1516 | "refresh_intervals": [ 1517 | "1m", 1518 | "5m", 1519 | "15m", 1520 | "30m", 1521 | "1h", 1522 | "2h", 1523 | "1d" 1524 | ], 1525 | "status": "Stable", 1526 | "time_options": [ 1527 | "5m", 1528 | "15m", 1529 | "1h", 1530 | "6h", 1531 | "12h", 1532 | "24h", 1533 | "2d", 1534 | "7d", 1535 | "30d" 1536 | ], 1537 | "type": "timepicker" 1538 | }, 1539 | "timezone": "browser", 1540 | "title": "阿里云-进程监控", 1541 | "uid": "yoPfIkqWk", 1542 | "version": 9 1543 | } -------------------------------------------------------------------------------- /node_exporter/README.md: -------------------------------------------------------------------------------- 1 | #### English Version:https://grafana.com/grafana/dashboards/11074 2 | 3 | --- 4 | 5 | ### 🎉Node Exporter Grafana Dashboard 更新啦! 6 | 7 | #### **Node Exporter Dashboard分为2个版本** 8 | 9 | - **🌟TenSunS自动同步版:通过TenSunS来管理各云厂商的ECS监控**:支持在云厂商对资源增删改查后,自动同步到Prometheus(同时也支持自建主机的批量web管理与同步)。采集云厂商的ECS信息(包括到期日)与分组等信息,基于采集的数据实现了更友好、丰富的资源分组,以及云资源名称等多种云标签的搜索与展示。 10 | 11 | - **原基于Job分组的通用版**:对于不使用TenSunS同步资源的情况,资源标签信息较少,仅可以使用通用的JOB字段来分组。 12 | 13 | --- 14 | 15 | ### 2024.05.20更新说明: 16 | 1. 更新了看板的所有Panel支持最新样式,对大量图表重新做了美化,已兼容Grafana10.X版本。 17 | 2. 总览表优化ECS健康评分加载性能,增加了更多图表的说明描述。 18 | 3. 新增了整体资源消耗信息的一些图表,用于资源成本优化参考。 19 | 4. 使用了从云厂商获取的ECS名称字段和新的分组字段,并且能展示资源到期日。 20 | 5. 优化重要指标展示,包含整体资源展示与资源明细图表:CPU 内存 磁盘 进程 网络等监控指标。 21 | 22 | ##### 注意:【最近7天P99资源使用率】图表需要在Prometheus增加记录规则(采集1小时后出数据): 23 | 24 | - P99:数据集按升序排列,第99分位置大的数据。(即升序排列后排在99%位置的数据) 25 | - 该表格需要在Prometheus增加记录规则(参考看板下载页) 26 | - 采集1小时后出数据 27 | - 时间范围[7d:1h]表示要查看过去 7 天内每小时的数据点。 28 | 29 | --- 30 | 31 | ##### TenSunS自动同步版增加记录规则 32 | 33 | ``` 34 | groups: #新rule文件需要加这行开头,追加旧的rule文件则不需要。 35 | - name: node_usage_record_rules 36 | interval: 1m 37 | rules: 38 | - record: cpu:usage:rate1m 39 | expr: (1 - avg(irate(node_cpu_seconds_total{mode="idle"}[3m])) by (instance,vendor,account,group,name)) * 100 40 | - record: mem:usage:rate1m 41 | expr: (1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100 42 | ``` 43 | 44 | ##### 基于Job分组的通用版增加记录规则 45 | - Job分组的通用版的数据源变量origin_prometheus,取自于Prometheus的外部系统标签:external_labels,可用于支持多个Prometheus接入VictoriaMetrics或Thanos等第三方存储使用remote_write方式的场景。(默认取值空,指标中无该标签不影响使用) 46 | ``` 47 | groups: #新rule文件需要加这行开头,追加旧的rule文件则不需要。 48 | - name: node_usage_record_rules 49 | interval: 1m 50 | rules: 51 | - record: cpu:usage:rate1m 52 | expr: (1 - avg(irate(node_cpu_seconds_total{mode="idle"}[3m])) by (instance,job)) * 100 53 | - record: mem:usage:rate1m 54 | expr: (1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100 55 | ``` 56 | --- 57 | 58 | ### TenSunS自动同步版看板请配合TenSunS使用 59 | #### [📌点击进入【TenSunS介绍】https://github.com/starsliao/TenSunS](https://github.com/starsliao/TenSunS) 60 | #### [🥇最佳实践 https://github.com/starsliao/TenSunS?tab=readme-ov-file#最佳实践](https://github.com/starsliao/TenSunS?tab=readme-ov-file#%E6%9C%80%E4%BD%B3%E5%AE%9E%E8%B7%B5) 61 | - [应用场景1:如何优雅的基于Consul自动同步ECS主机监控](https://github.com/starsliao/ConsulManager/blob/main/docs/ECS%E4%B8%BB%E6%9C%BA%E7%9B%91%E6%8E%A7.md) 62 | - [应用场景2:如何优雅的使用Consul管理Blackbox站点监控](https://github.com/starsliao/ConsulManager/blob/main/docs/blackbox%E7%AB%99%E7%82%B9%E7%9B%91%E6%8E%A7.md) 63 | - [应用场景3:如何把云主机自动同步到JumpServer](https://github.com/starsliao/ConsulManager/blob/main/docs/%E5%A6%82%E4%BD%95%E6%8A%8A%E4%B8%BB%E6%9C%BA%E8%87%AA%E5%8A%A8%E5%90%8C%E6%AD%A5%E5%88%B0JumpServer.md) 64 | - [应用场景4:使用1个mysqld_exporter监控所有的MySQL实例](https://github.com/starsliao/ConsulManager/blob/main/docs/%E5%A6%82%E4%BD%95%E4%BC%98%E9%9B%85%E7%9A%84%E4%BD%BF%E7%94%A8%E4%B8%80%E4%B8%AAmysqld_exporter%E7%9B%91%E6%8E%A7%E6%89%80%E6%9C%89%E7%9A%84MySQL%E5%AE%9E%E4%BE%8B.md) 65 | - [应用场景5:使用1个redis_exporter监控所有的Redis实例](https://github.com/starsliao/ConsulManager/blob/main/docs/%E4%BD%BF%E7%94%A8%E4%B8%80%E4%B8%AAredis_exporter%E7%9B%91%E6%8E%A7%E6%89%80%E6%9C%89%E7%9A%84Redis%E5%AE%9E%E4%BE%8B.md) 66 | 67 | --- 68 | 69 | ### TenSunS部分功能描述 70 | #### 自建与云资源监控管理(ECS/RDS/Redis) 71 | >**基于Consul实现Prometheus监控目标的自动发现。** 72 | 73 | - ✔**当前已支持对接阿里云、腾讯云、华为云。** 74 | 75 | - ⭐支持多云ECS/RDS/Redis的**资源、分组、标签**自动同步到Consul并接入到Prometheus自动发现!(并提供云资源信息查询与自定义页面) 76 | - ⭐支持多云ECS信息自动同步到**JumpServer**。 77 | - ⭐支持多云**账户余额**与云资源**到期日**设置阈值告警通知。 78 | - ⭐支持作为Exporter接入Prometheus:Prometheus增加ConsulManager的JOB后可抓取云厂商的部分MySQL/Redis指标。(弥补原生Exporter无法获取部分云MySQL/Redis指标的问题) 79 | - ✔**支持自建主机/MySQL/Redis**接入WEB管理,支持增删改查、批量导入导出,自动同步到Consul并接入到Prometheus监控! 80 | - ✔提供了按需生成Prometheus配置与ECS/MySQL/Redis告警规则的功能。 81 | - ✔设计了多个支持同步的各字段展示的Node_Exporter、Mysqld_Exporter、Redis_Exporter Grafana看板。 82 | 83 | 截图: 84 | ![](https://grafana.com/api/dashboards/8919/images/16268/image) 85 | ![](https://grafana.com/api/dashboards/8919/images/16269/image) 86 | ![](https://grafana.com/api/dashboards/8919/images/16270/image) 87 | ![](https://grafana.com/api/dashboards/8919/images/16271/image) 88 | ![](https://grafana.com/api/dashboards/8919/images/16272/image) 89 | ![](https://grafana.com/api/dashboards/8919/images/16273/image) 90 | ![](https://grafana.com/api/dashboards/8919/images/16274/image) 91 | 92 | ### 赞赏与关注公众号【云原生DevOps】加入运维群交流,获取更多... 93 | ![](https://starsl.cn/static/img/thanks.png) 94 | #### GitHub:[https://github.com/starsliao/TenSunS](https://github.com/starsliao/TenSunS) 95 | 96 | ### 看板下载 97 | 98 | **我的全部Grafana看板** 99 | - https://grafana.com/orgs/starsliao/dashboards 100 | 101 | **TenSunS自动同步版** 102 | - Grafana ID: 8919 103 | - https://grafana.com/grafana/dashboards/8919 104 | 105 | **通用Job分组版** 106 | - Grafana ID: 16098 107 | - https://grafana.com/grafana/dashboards/16098 108 | -------------------------------------------------------------------------------- /node_exporter/gf1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starsliao/Prometheus/1936cda22403a97af95048435591df84c97c7f08/node_exporter/gf1.png -------------------------------------------------------------------------------- /node_exporter/gf2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starsliao/Prometheus/1936cda22403a97af95048435591df84c97c7f08/node_exporter/gf2.png -------------------------------------------------------------------------------- /node_exporter/gf3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starsliao/Prometheus/1936cda22403a97af95048435591df84c97c7f08/node_exporter/gf3.png -------------------------------------------------------------------------------- /node_exporter/tss1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starsliao/Prometheus/1936cda22403a97af95048435591df84c97c7f08/node_exporter/tss1.png -------------------------------------------------------------------------------- /node_exporter/tss2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starsliao/Prometheus/1936cda22403a97af95048435591df84c97c7f08/node_exporter/tss2.png -------------------------------------------------------------------------------- /node_exporter/tss3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starsliao/Prometheus/1936cda22403a97af95048435591df84c97c7f08/node_exporter/tss3.png -------------------------------------------------------------------------------- /node_exporter/tss4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starsliao/Prometheus/1936cda22403a97af95048435591df84c97c7f08/node_exporter/tss4.png -------------------------------------------------------------------------------- /qr.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starsliao/Prometheus/1936cda22403a97af95048435591df84c97c7f08/qr.jpg -------------------------------------------------------------------------------- /screenshot: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /windows_exporter/README.md: -------------------------------------------------------------------------------- 1 | ### Grafana v9 + windows_exporter 0.22.0测试通过 2 | #### Windows的Prometheus监控看板展示,增加了资源汇总展示,优化了明细展示。更新支持windows_exporter 0.22.0。 3 | ### 中文版:[https://grafana.com/grafana/dashboards/10467](https://grafana.com/grafana/dashboards/10467) 4 | 5 | 6 | #### 截图 7 | 8 | ![](https://github.com/starsliao/Prometheus/blob/master/windows_exporter/windows_exporter.png) 9 | ### 运维博客:[StarsL.cn](https://starsl.cn/) 10 | ### GitHub:[https://github.com/starsliao/Prometheus](https://github.com/starsliao/Prometheus) 11 | ### windows_exporter:[https://github.com/prometheus-community/windows_exporter](https://github.com/prometheus-community/windows_exporter) 12 | ### 关注公众号【**云原生DevOps**】加入运维群交流,获取更多... 13 | ![](https://github.com/starsliao/Prometheus/blob/master/qr.jpg) 14 | -------------------------------------------------------------------------------- /windows_exporter/win_alert_rules.yml: -------------------------------------------------------------------------------- 1 | - name: Windows 2 | rules: 3 | - alert: win_CPU使用率 4 | expr: 100 - (avg by (instance,A02_iname,intip,A05_ienv,A01_iaccount) (irate(wmi_cpu_time_total{mode="idle"}[2m])) * 100) > 80 5 | for: 2m 6 | labels: 7 | alertype: win 8 | severity: warning 9 | annotations: 10 | description: "{{ $labels.A02_iname }}_{{$labels.intip}}:CPU使用率达到{{ $value | humanize }}%\n> {{ $labels.A01_iaccount}}-{{ $labels.A05_ienv }}" 11 | 12 | - alert: win_内存使用率 13 | expr: 100.0 - 100 * wmi_os_physical_memory_free_bytes / wmi_cs_physical_memory_bytes > 85 14 | for: 2m 15 | labels: 16 | alertype: win 17 | severity: warning 18 | annotations: 19 | description: "{{ $labels.A02_iname }}_{{$labels.intip}}:内存使用率达到{{ $value | humanize }}%\n> {{ $labels.A01_iaccount}}-{{ $labels.A05_ienv }}" 20 | 21 | - alert: win_磁盘使用率 22 | expr: 100.0 - 100 * ((wmi_logical_disk_free_bytes{} / 1024 / 1024 ) / (wmi_logical_disk_size_bytes{} / 1024 / 1024)) > 80 23 | for: 2m 24 | labels: 25 | alertype: win 26 | severity: warning 27 | annotations: 28 | description: "{{ $labels.A02_iname }}_{{$labels.intip}}_{{$labels.volume}}:使用率达到{{ $value | humanize }}%\n> {{ $labels.A01_iaccount}}-{{ $labels.A05_ienv }}" 29 | 30 | - alert: win_采集器状态 31 | expr: wmi_exporter_collector_success == 0 32 | for: 2m 33 | labels: 34 | alertype: win 35 | severity: critical 36 | annotations: 37 | description: "{{ $labels.A02_iname }}_{{$labels.intip}}_{{ $labels.collector }}:异常\n> {{ $labels.A01_iaccount}}-{{ $labels.A05_ienv }}" 38 | 39 | -------------------------------------------------------------------------------- /windows_exporter/windows_exporter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/starsliao/Prometheus/1936cda22403a97af95048435591df84c97c7f08/windows_exporter/windows_exporter.png --------------------------------------------------------------------------------