├── client ├── stop_datacollector.sh ├── run_datacollector.sh ├── update_client.sh ├── install_client.sh ├── swarm.mtail └── get_metrics.sh ├── screenshot1.png ├── screenshot2.png ├── server ├── grafana │ └── grafana.db ├── update_server.sh ├── docker-compose.yaml ├── install_server.sh └── prometheus.yml ├── .idea └── vcs.xml └── README.md /client/stop_datacollector.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sudo pkill -f get_metrics 3 | sudo pkill -f mtail -------------------------------------------------------------------------------- /screenshot1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ytx1991/SwarmMonitoring/HEAD/screenshot1.png -------------------------------------------------------------------------------- /screenshot2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ytx1991/SwarmMonitoring/HEAD/screenshot2.png -------------------------------------------------------------------------------- /server/grafana/grafana.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ytx1991/SwarmMonitoring/HEAD/server/grafana/grafana.db -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /server/update_server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd ~/swarmon_server 3 | rm grafana.db 4 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/grafana/grafana.db 5 | sudo cp grafana.db /root/swarmon/grafana/grafana.db 6 | sudo chmod -R 777 /root/swarmon/grafana -------------------------------------------------------------------------------- /server/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | services: 3 | grafana: 4 | container_name: grafana_swarm 5 | image: grafana/grafana 6 | network_mode: "host" 7 | volumes: 8 | - /root/swarmon/grafana:/var/lib/grafana 9 | restart: always 10 | prometheus: 11 | container_name: prometheus 12 | image: prom/prometheus 13 | volumes: 14 | - /root/swarmon/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml 15 | network_mode: "host" 16 | restart: always 17 | -------------------------------------------------------------------------------- /client/run_datacollector.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #One node per line, $1 = Remote Grafana host, $2 = node name, $3 = node debug api 3 | sudo pkill -f get_metrics 4 | sudo pkill -f mtail 5 | #Add your nodes debug api here 6 | nodes=(127.0.0.1:1635 127.0.0.1:1735) 7 | names=(bee1 bee2) 8 | 9 | 10 | total=${#nodes[*]} 11 | for (( i=0; i<=$(( $total -1 )); i++ )) 12 | do 13 | nohup watch -n 10 "~/swarmon_client/get_metrics.sh ${nodes[$i]} ${names[$i]} > ~/swarmon_client/logs/${names[$i]}.log" & 14 | done 15 | 16 | nohup ~/swarmon_client/mtail --progs ~/swarmon_client/progs --logs ~/swarmon_client/"logs/*.log" --poll_interval 10000ms & -------------------------------------------------------------------------------- /client/update_client.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sudo pkill -f get_metrics 3 | sudo pkill -f mtail 4 | cd ~/swarmon_client 5 | cd progs 6 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/swarm.mtail -O swarm.mtail 7 | cd .. 8 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/stop_datacollector.sh -O stop_datacollector.sh 9 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/get_metrics.sh -O get_metrics.sh 10 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/update_client.sh -O update_client.sh 11 | chmod +x run_datacollector.sh 12 | chmod +x get_metrics.sh 13 | chmod +x update_client.sh -------------------------------------------------------------------------------- /server/install_server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | mkdir ~/swarmon_server 4 | sudo mkdir /root/swarmon/prometheus 5 | sudo mkdir /root/swarmon/grafana 6 | 7 | cd ~/swarmon_server 8 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/grafana/grafana.db 9 | sudo cp grafana.db /root/swarmon/grafana/grafana.db 10 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/docker-compose.yaml 11 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/prometheus.yml 12 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/update_server.sh 13 | sudo cp prometheus.yml /root/swarmon/prometheus 14 | sudo chmod -R 777 /root/swarmon/grafana 15 | chmod +x update_server.sh -------------------------------------------------------------------------------- /client/install_client.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #Only for Linux x86, change the mtail link if you are running on a different OS 3 | mkdir ~/swarmon_client 4 | cd ~/swarmon_client 5 | mkdir logs 6 | mkdir progs 7 | cd progs 8 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/swarm.mtail 9 | cd .. 10 | wget https://github.com/google/mtail/releases/download/v3.0.0-rc45/mtail_3.0.0-rc45_Linux_x86_64.tar.gz 11 | tar -xf mtail_3.0.0-rc45_Linux_x86_64.tar.gz 12 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/run_datacollector.sh 13 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/stop_datacollector.sh 14 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/get_metrics.sh 15 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/update_client.sh 16 | chmod +x stop_datacollector.sh 17 | chmod +x run_datacollector.sh 18 | chmod +x get_metrics.sh 19 | chmod +x update_client.sh -------------------------------------------------------------------------------- /server/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 10s # By default, scrape targets every 15 seconds. 3 | # A scrape configuration containing exactly one endpoint to scrape: 4 | # Here it's Prometheus itself. 5 | scrape_configs: 6 | # The job name is added as a label `job=` to any timeseries scraped from this config. 7 | - job_name: 'prometheus' 8 | 9 | # Override the global default and scrape targets from this job every 10 seconds. 10 | scrape_interval: 10s 11 | 12 | static_configs: 13 | - targets: ['localhost:9090'] 14 | 15 | - job_name: 'swarm' 16 | static_configs: 17 | - targets: ['YOUR_HOST_IP1:3903'] 18 | labels: 19 | host: host1 20 | - targets: ['YOUR_HOST_IP2:3903'] 21 | labels: 22 | host: host2 23 | - targets: ['YOUR_HOST_IP3:3903'] 24 | labels: 25 | host: host3 26 | - targets: ['YOUR_HOST_IP4:3903'] 27 | labels: 28 | host: host4 -------------------------------------------------------------------------------- /client/swarm.mtail: -------------------------------------------------------------------------------- 1 | gauge peers by node 2 | gauge diskavail 3 | gauge diskfree 4 | gauge cpu 5 | gauge memory 6 | gauge upload_bandwidth 7 | gauge download_bandwidth 8 | gauge cheque by node 9 | gauge total_bzz by node 10 | gauge available_bzz by node 11 | gauge total_uncashed by node 12 | /^\{"name":"(?P.+)","peers":(?P\d+),"diskavail":(?P\d+),"diskfree":(?P\d+),"cheque":(?P\d+),"total_bzz":(?P\d+),"available_bzz":(?P\d+),"total_uncashed":(?P\d+)\}$/ { 13 | peers[$node] = $peers 14 | diskavail = $diskavail 15 | diskfree = $diskfree 16 | cheque[$node] = $cheque 17 | total_bzz[$node] = $total_bzz 18 | available_bzz[$node] = $available_bzz 19 | total_uncashed[$node] = $total_uncashed 20 | } 21 | /^CPU:(?P[\d\.]+)$/ { 22 | cpu = $cpu 23 | } 24 | /^MEM:(?P[\d\.]+)$/ { 25 | memory = $mem 26 | } 27 | /^Total send rate:[ \t]*(?P[\d\.]+)KB.*$/ { 28 | upload_bandwidth = $upload 29 | } 30 | /^Total receive rate:[ \t]*(?P[\d\.]+)KB.*$/ { 31 | download_bandwidth = $download 32 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![效果](screenshot1.png) 2 |
3 | ![效果](screenshot2.png) 4 |


5 | ### 请保证每个节点的名称是唯一的! 本方案兼容一机多节点及Docker
6 | ### 目前仅支持Linux系统
7 | ### 有问题欢迎到 Discord https://discord.gg/PJwJzCkT4W 咨询 8 | 9 | ### 如何更新 10 | #### 客户端需运行对应的update_client.sh脚本进行更新 11 | 12 | #### 服务端需运行对应的update_server.sh脚本进行更新 (注意原来的Dashboard会被覆盖,如有做修改请先备份grafana.db) 13 | 14 | ### 安装监控服务,仅需在主服务器上安装 15 | 16 | #### 安装Docker及Docker Compose (已安装的可跳过) 17 | 请确保3000和9090端口没被占用且可被外部访问 18 | 19 | 这里仅给出Ubuntu安装命令,其他系统可自行搜索安装教程 20 | ``` 21 | sudo apt-get update 22 | sudo apt install docker.io 23 | sudo systemctl start docker 24 | sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose 25 | sudo chmod +x /usr/local/bin/docker-compose 26 | sudo ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose 27 | ``` 28 | 29 | #### 下载安装集群监控服务端 30 | 1.执行以下命令 31 | ``` 32 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/install_server.sh 33 | chmod +x install_server.sh 34 | ./install_server.sh 35 | ``` 36 | 37 | 2.修改/root/swarmon/prometheus/prometheus.yml文件。 38 | 39 | 可使用sudo vim /root/swarmon/prometheus/prometheus.yml编辑 40 | 41 | 根据自己拥有的服务器数量和IP修改对应内容,端口号不要改变,默认示例包含四个服务器Host1 - Host4,可删掉多余的服务器 42 | ``` 43 | static_configs: 44 | - targets: ['YOUR_HOST_IP1:3903'] 45 | labels: 46 | host: host1 47 | - targets: ['YOUR_HOST_IP2:3903'] 48 | labels: 49 | host: host2 50 | - targets: ['YOUR_HOST_IP3:3903'] 51 | labels: 52 | host: host3 53 | - targets: ['YOUR_HOST_IP4:3903'] 54 | labels: 55 | host: host4 56 | ``` 57 | 3.修改完成保存后运行下面命令启动服务端 58 | cd ~/swarmon_server && sudo docker-compose up -d 59 | 60 | 此时你应该可以通过浏览器访问主服务器3000端口, 如192.168.1.2:3000 并看到Grafana登录界面。 61 | 62 | 使用用户名:admin 密码:swarmadmin 登录管理员账号,登录后可修改管理员密码。 63 | 64 | 监控面版及数据源应该已经预设好,直接打开便可使用。 65 | 66 | ### 下载安装集群监控客户端 (每个有节点的服务器都需要安装) 67 | 68 | 如果需要显示带宽数据请先安装iftop! 69 | Ubuntu: sudo apt install iftop 70 | 71 | 请确保3903端口没被占用且可被外部访问 72 | 73 | 1.运行以下命令下载安装监控客户端 74 | ``` 75 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/install_client.sh 76 | chmod +x install_client.sh 77 | ./install_client.sh 78 | ``` 79 | 80 | 2.编辑run_datacollector.sh,添加当前服务器上的节点。 81 | 可使用 vim ~/swarmon_client/run_datacollector.sh 编辑 82 | 83 | ``` 84 | 修改下面两行,第一行为节点的debug api,用空格隔开 85 | 第二行为节点名称,需要确保节点名称是唯一的,不同服务器上不能有相同名称的节点 86 | 87 | nodes=(127.0.0.1:1635 127.0.0.1:1735) 88 | names=(bee1 bee2) 89 | ``` 90 | 修改完成后保存退出 91 | 92 | 3.运行以下命令: 93 | cd ~/swarmon_client && ./run_datacollector.sh 94 | 95 | 如需停止可运行: 96 | cd ~/swarmon_client && ./stop_datacollector.sh 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /client/get_metrics.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | [ -z ${DEBUG_API+x} ] && DEBUG_API=$1 3 | nodeName=$2 4 | totalAmount=0 5 | MIN_BZZ_UNIT=1000000000 6 | function getPeers(){ 7 | curl -s "$DEBUG_API/chequebook/cheque" | jq -r '.lastcheques | .[].peer' 8 | } 9 | 10 | function getCumulativePayout() { 11 | local peer=$1 12 | local cumulativePayout=$(curl -s "$DEBUG_API/chequebook/cheque/$peer" | jq '.lastreceived.payout') 13 | if [ $cumulativePayout == null ] 14 | then 15 | echo 0 16 | else 17 | echo $cumulativePayout 18 | fi 19 | } 20 | 21 | function getLastCashedPayout() { 22 | local peer=$1 23 | local cashout=$(curl -s "$DEBUG_API/chequebook/cashout/$peer" | jq '.cumulativePayout') 24 | if [ $cashout == null ] 25 | then 26 | echo 0 27 | else 28 | echo $cashout 29 | fi 30 | } 31 | 32 | function getUncashedAmount() { 33 | local peer=$1 34 | local cumulativePayout=$(getCumulativePayout $peer) 35 | if [ $cumulativePayout == 0 ] 36 | then 37 | echo 0 38 | return 39 | fi 40 | 41 | cashedPayout=$(getLastCashedPayout $peer) 42 | let uncashedAmount=$cumulativePayout-$cashedPayout 43 | echo $uncashedAmount 44 | } 45 | 46 | function countUncashed() { 47 | for peer in $(getPeers) 48 | do 49 | totalAmount=$((totalAmount + $(getUncashedAmount $peer))) 50 | done 51 | echo $totalAmount 52 | } 53 | 54 | function makejson(){ 55 | echo "Populating $DEBUG_API data..." 56 | peers=$(curl -s $DEBUG_API/peers | jq '.peers | length') 57 | if [ -z "$peers" ] 58 | then 59 | peers=0 60 | fi 61 | diskavail=$(df -P . | awk 'NR==2{print $2}') 62 | diskfree=$(df -P . | awk 'NR==2{print $4}') 63 | cheque=$(curl -s $DEBUG_API/chequebook/cheque | jq '.lastcheques | length') 64 | if [ -z "$cheque" ] 65 | then 66 | cheque=0 67 | fi 68 | totalBZZ=$(curl -s $DEBUG_API/chequebook/balance | jq '.totalBalance') 69 | totalBZZ=$(echo $totalBZZ| awk '{printf("%d",$0)}') 70 | if [ -z "$totalBZZ" ] 71 | then 72 | totalBZZ=0 73 | fi 74 | availableBZZ=$(curl -s $DEBUG_API/chequebook/balance | jq '.availableBalance') 75 | availableBZZ=$(echo $availableBZZ| awk '{printf("%d",$0)}') 76 | if [ -z "$availableBZZ" ] 77 | then 78 | availableBZZ=0 79 | fi 80 | uncashedBZZ=$(countUncashed) 81 | uncashedBZZ=$(echo $uncashedBZZ| awk '{printf("%d",$0)}') 82 | if [ -z "$uncashedBZZ" ] 83 | then 84 | uncashedBZZ=0 85 | fi 86 | json='{"name":"'"$nodeName"'","peers":'$peers',"diskavail":'$diskavail',"diskfree":'$diskfree',"cheque":'$cheque',"total_bzz":'$(($totalBZZ/$MIN_BZZ_UNIT))',"available_bzz":'$(($availableBZZ/$MIN_BZZ_UNIT))',"total_uncashed":'$(($uncashedBZZ/$MIN_BZZ_UNIT))'}' 87 | echo $json 88 | } 89 | #get CPU util 90 | top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" |awk '{print "CPU:"100 - $1}' 91 | #get Mem util 92 | free -t | awk 'NR == 2 {print "MEM:" $3/$2*100}' 93 | makejson 94 | #get Network util 95 | sudo iftop -tB -s 1 -L 1 | grep "Total send rate.*\|Total rec.*" > ~/swarmon_client/logs/network.log 96 | --------------------------------------------------------------------------------