├── client
    ├── stop_datacollector.sh
    ├── run_datacollector.sh
    ├── update_client.sh
    ├── install_client.sh
    ├── swarm.mtail
    └── get_metrics.sh
├── screenshot1.png
├── screenshot2.png
├── server
    ├── grafana
    │   └── grafana.db
    ├── update_server.sh
    ├── docker-compose.yaml
    ├── install_server.sh
    └── prometheus.yml
├── .idea
    └── vcs.xml
└── README.md


/client/stop_datacollector.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | sudo pkill -f get_metrics
3 | sudo pkill -f mtail


--------------------------------------------------------------------------------
/screenshot1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ytx1991/SwarmMonitoring/HEAD/screenshot1.png


--------------------------------------------------------------------------------
/screenshot2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ytx1991/SwarmMonitoring/HEAD/screenshot2.png


--------------------------------------------------------------------------------
/server/grafana/grafana.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ytx1991/SwarmMonitoring/HEAD/server/grafana/grafana.db


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/server/update_server.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cd ~/swarmon_server
3 | rm grafana.db
4 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/grafana/grafana.db
5 | sudo cp grafana.db /root/swarmon/grafana/grafana.db
6 | sudo chmod -R 777 /root/swarmon/grafana


--------------------------------------------------------------------------------
/server/docker-compose.yaml:
--------------------------------------------------------------------------------
 1 | version: "3.9"
 2 | services:
 3 |   grafana:
 4 |       container_name: grafana_swarm
 5 |       image: grafana/grafana
 6 |       network_mode: "host"
 7 |       volumes:
 8 |           - /root/swarmon/grafana:/var/lib/grafana
 9 |       restart: always
10 |   prometheus:
11 |       container_name: prometheus
12 |       image: prom/prometheus
13 |       volumes:
14 |           - /root/swarmon/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
15 |       network_mode: "host"
16 |       restart: always
17 | 


--------------------------------------------------------------------------------
/client/run_datacollector.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #One node per line, $1 = Remote Grafana host, $2 = node name, $3 = node debug api
 3 | sudo pkill -f get_metrics
 4 | sudo pkill -f mtail
 5 | #Add your nodes debug api here
 6 | nodes=(127.0.0.1:1635 127.0.0.1:1735)
 7 | names=(bee1 bee2)
 8 | 
 9 | 
10 | total=${#nodes[*]}
11 | for (( i=0; i<=$(( $total -1 )); i++ ))
12 | do
13 |     nohup watch -n 10 "~/swarmon_client/get_metrics.sh ${nodes[$i]} ${names[$i]} > ~/swarmon_client/logs/${names[$i]}.log" &
14 | done
15 | 
16 | nohup ~/swarmon_client/mtail --progs ~/swarmon_client/progs --logs ~/swarmon_client/"logs/*.log"  --poll_interval 10000ms &


--------------------------------------------------------------------------------
/client/update_client.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | sudo pkill -f get_metrics
 3 | sudo pkill -f mtail
 4 | cd ~/swarmon_client
 5 | cd progs
 6 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/swarm.mtail -O swarm.mtail
 7 | cd ..
 8 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/stop_datacollector.sh -O stop_datacollector.sh
 9 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/get_metrics.sh -O get_metrics.sh
10 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/update_client.sh -O update_client.sh
11 | chmod +x run_datacollector.sh
12 | chmod +x get_metrics.sh
13 | chmod +x update_client.sh


--------------------------------------------------------------------------------
/server/install_server.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | mkdir ~/swarmon_server
 4 | sudo mkdir /root/swarmon/prometheus
 5 | sudo mkdir /root/swarmon/grafana
 6 | 
 7 | cd ~/swarmon_server
 8 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/grafana/grafana.db
 9 | sudo cp grafana.db /root/swarmon/grafana/grafana.db
10 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/docker-compose.yaml
11 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/prometheus.yml
12 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/update_server.sh
13 | sudo cp prometheus.yml /root/swarmon/prometheus
14 | sudo chmod -R 777 /root/swarmon/grafana
15 | chmod +x update_server.sh


--------------------------------------------------------------------------------
/client/install_client.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #Only for Linux x86, change the mtail link if you are running on a different OS
 3 | mkdir ~/swarmon_client
 4 | cd ~/swarmon_client
 5 | mkdir logs
 6 | mkdir progs
 7 | cd progs
 8 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/swarm.mtail
 9 | cd ..
10 | wget https://github.com/google/mtail/releases/download/v3.0.0-rc45/mtail_3.0.0-rc45_Linux_x86_64.tar.gz
11 | tar -xf mtail_3.0.0-rc45_Linux_x86_64.tar.gz
12 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/run_datacollector.sh
13 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/stop_datacollector.sh
14 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/get_metrics.sh
15 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/update_client.sh
16 | chmod +x stop_datacollector.sh
17 | chmod +x run_datacollector.sh
18 | chmod +x get_metrics.sh
19 | chmod +x update_client.sh


--------------------------------------------------------------------------------
/server/prometheus.yml:
--------------------------------------------------------------------------------
 1 | global:
 2 |   scrape_interval:     10s # By default, scrape targets every 15 seconds.
 3 | # A scrape configuration containing exactly one endpoint to scrape:
 4 | # Here it's Prometheus itself.
 5 | scrape_configs:
 6 |   # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
 7 |   - job_name: 'prometheus'
 8 | 
 9 |     # Override the global default and scrape targets from this job every 10 seconds.
10 |     scrape_interval: 10s
11 | 
12 |     static_configs:
13 |       - targets: ['localhost:9090']
14 | 
15 |   - job_name: 'swarm'
16 |     static_configs:
17 |       - targets: ['YOUR_HOST_IP1:3903']
18 |         labels:
19 |           host: host1
20 |       - targets: ['YOUR_HOST_IP2:3903']
21 |         labels:
22 |           host: host2
23 |       - targets: ['YOUR_HOST_IP3:3903']
24 |         labels:
25 |           host: host3
26 |       - targets: ['YOUR_HOST_IP4:3903']
27 |         labels:
28 |           host: host4


--------------------------------------------------------------------------------
/client/swarm.mtail:
--------------------------------------------------------------------------------
 1 | gauge peers by node
 2 | gauge diskavail
 3 | gauge diskfree
 4 | gauge cpu
 5 | gauge memory
 6 | gauge upload_bandwidth
 7 | gauge download_bandwidth
 8 | gauge cheque by node
 9 | gauge total_bzz by node
10 | gauge available_bzz by node
11 | gauge total_uncashed by node
12 | /^\{"name":"(?P<node>.+)","peers":(?P<peers>\d+),"diskavail":(?P<diskavail>\d+),"diskfree":(?P<diskfree>\d+),"cheque":(?P<cheque>\d+),"total_bzz":(?P<total_bzz>\d+),"available_bzz":(?P<available_bzz>\d+),"total_uncashed":(?P<total_uncashed>\d+)\}$/ {
13 |     peers[$node] = $peers
14 |     diskavail = $diskavail
15 |     diskfree = $diskfree
16 |     cheque[$node] = $cheque
17 |     total_bzz[$node] = $total_bzz
18 |     available_bzz[$node] = $available_bzz
19 |     total_uncashed[$node] = $total_uncashed
20 | }
21 | /^CPU:(?P<cpu>[\d\.]+)$/ {
22 |     cpu = $cpu
23 | }
24 | /^MEM:(?P<mem>[\d\.]+)$/ {
25 |     memory = $mem
26 | }
27 | /^Total send rate:[ \t]*(?P<upload>[\d\.]+)KB.*$/ {
28 |     upload_bandwidth = $upload
29 | }
30 | /^Total receive rate:[ \t]*(?P<download>[\d\.]+)KB.*$/ {
31 |     download_bandwidth = $download
32 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![效果](screenshot1.png)
  2 | <br>
  3 | ![效果](screenshot2.png)
  4 | <br><br><br>
  5 | ### 请保证每个节点的名称是唯一的! 本方案兼容一机多节点及Docker<br>
  6 | ### 目前仅支持Linux系统<br>
  7 | ### 有问题欢迎到 Discord https://discord.gg/PJwJzCkT4W 咨询
  8 | 
  9 | ### 如何更新
 10 | #### 客户端需运行对应的update_client.sh脚本进行更新
 11 | 
 12 | #### 服务端需运行对应的update_server.sh脚本进行更新 (注意原来的Dashboard会被覆盖，如有做修改请先备份grafana.db)
 13 | 
 14 | ### 安装监控服务，仅需在主服务器上安装
 15 | 
 16 | #### 安装Docker及Docker Compose (已安装的可跳过)
 17 | 请确保3000和9090端口没被占用且可被外部访问
 18 | 
 19 | 这里仅给出Ubuntu安装命令，其他系统可自行搜索安装教程
 20 | ```
 21 | sudo apt-get update
 22 | sudo apt install docker.io
 23 | sudo systemctl start docker
 24 | sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
 25 | sudo chmod +x /usr/local/bin/docker-compose
 26 | sudo ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose
 27 | ```
 28 | 
 29 | #### 下载安装集群监控服务端
 30 | 1.执行以下命令
 31 | ```
 32 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/install_server.sh
 33 | chmod +x install_server.sh
 34 | ./install_server.sh
 35 | ```
 36 | 
 37 | 2.修改/root/swarmon/prometheus/prometheus.yml文件。
 38 | 
 39 | 可使用sudo vim /root/swarmon/prometheus/prometheus.yml编辑
 40 | 
 41 | 根据自己拥有的服务器数量和IP修改对应内容，端口号不要改变，默认示例包含四个服务器Host1 - Host4，可删掉多余的服务器
 42 | ```
 43 | static_configs:
 44 |       - targets: ['YOUR_HOST_IP1:3903']
 45 |         labels:
 46 |           host: host1
 47 |       - targets: ['YOUR_HOST_IP2:3903']
 48 |         labels:
 49 |           host: host2
 50 |       - targets: ['YOUR_HOST_IP3:3903']
 51 |         labels:
 52 |           host: host3
 53 |       - targets: ['YOUR_HOST_IP4:3903']
 54 |         labels:
 55 |           host: host4
 56 | ```
 57 | 3.修改完成保存后运行下面命令启动服务端
 58 | cd ~/swarmon_server && sudo docker-compose up -d
 59 | 
 60 | 此时你应该可以通过浏览器访问主服务器3000端口， 如192.168.1.2:3000 并看到Grafana登录界面。
 61 | 
 62 | 使用用户名：admin 密码：swarmadmin 登录管理员账号，登录后可修改管理员密码。
 63 | 
 64 | 监控面版及数据源应该已经预设好，直接打开便可使用。
 65 | 
 66 | ### 下载安装集群监控客户端 （每个有节点的服务器都需要安装）
 67 | 
 68 | 如果需要显示带宽数据请先安装iftop!
 69 | Ubuntu: sudo apt install iftop
 70 | 
 71 | 请确保3903端口没被占用且可被外部访问
 72 | 
 73 | 1.运行以下命令下载安装监控客户端
 74 | ```
 75 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/install_client.sh
 76 | chmod +x install_client.sh
 77 | ./install_client.sh
 78 | ```
 79 | 
 80 | 2.编辑run_datacollector.sh,添加当前服务器上的节点。
 81 | 可使用 vim ~/swarmon_client/run_datacollector.sh 编辑
 82 | 
 83 | ```
 84 | 修改下面两行，第一行为节点的debug api，用空格隔开
 85 | 第二行为节点名称，需要确保节点名称是唯一的，不同服务器上不能有相同名称的节点
 86 | 
 87 | nodes=(127.0.0.1:1635 127.0.0.1:1735)
 88 | names=(bee1 bee2)
 89 | ```
 90 | 修改完成后保存退出
 91 | 
 92 | 3.运行以下命令：
 93 | cd ~/swarmon_client && ./run_datacollector.sh
 94 | 
 95 | 如需停止可运行：
 96 | cd ~/swarmon_client && ./stop_datacollector.sh
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 


--------------------------------------------------------------------------------
/client/get_metrics.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | [ -z ${DEBUG_API+x} ] && DEBUG_API=$1
 3 | nodeName=$2
 4 | totalAmount=0
 5 | MIN_BZZ_UNIT=1000000000
 6 | function getPeers(){
 7 |       	curl -s "$DEBUG_API/chequebook/cheque" | jq -r '.lastcheques | .[].peer'
 8 | }
 9 | 
10 | function getCumulativePayout() {
11 |   local peer=$1
12 |   local cumulativePayout=$(curl -s "$DEBUG_API/chequebook/cheque/$peer" | jq '.lastreceived.payout')
13 |   if [ $cumulativePayout == null ]
14 |   then
15 |     echo 0
16 |   else
17 |     echo $cumulativePayout
18 |   fi
19 | }
20 | 
21 | function getLastCashedPayout() {
22 |   local peer=$1
23 |   local cashout=$(curl -s "$DEBUG_API/chequebook/cashout/$peer" | jq '.cumulativePayout')
24 |   if [ $cashout == null ]
25 |   then
26 |     echo 0
27 |   else
28 |     echo $cashout
29 |   fi
30 | }
31 | 
32 | function getUncashedAmount() {
33 |   local peer=$1
34 |   local cumulativePayout=$(getCumulativePayout $peer)
35 |   if [ $cumulativePayout == 0 ]
36 |   then
37 |     echo 0
38 |     return
39 |   fi
40 | 
41 |   cashedPayout=$(getLastCashedPayout $peer)
42 |   let uncashedAmount=$cumulativePayout-$cashedPayout
43 |   echo $uncashedAmount
44 | }
45 | 
46 | function countUncashed() {
47 |   for peer in $(getPeers)
48 |   do
49 |     totalAmount=$((totalAmount + $(getUncashedAmount $peer)))
50 |   done
51 |   echo $totalAmount
52 | }
53 | 
54 | function makejson(){
55 |   echo "Populating $DEBUG_API data..."
56 |   peers=$(curl -s $DEBUG_API/peers | jq '.peers | length')
57 |   if [ -z "$peers" ]
58 |   then
59 |         peers=0
60 |   fi
61 |   diskavail=$(df -P . | awk 'NR==2{print $2}')
62 |   diskfree=$(df -P . | awk 'NR==2{print $4}')
63 |   cheque=$(curl -s $DEBUG_API/chequebook/cheque | jq '.lastcheques | length')
64 |   if [ -z "$cheque" ]
65 |   then
66 |         cheque=0
67 |   fi
68 |   totalBZZ=$(curl -s $DEBUG_API/chequebook/balance  | jq '.totalBalance')
69 |   totalBZZ=$(echo $totalBZZ| awk '{printf("%d",$0)}')
70 |   if [ -z "$totalBZZ" ]
71 |   then
72 |         totalBZZ=0
73 |   fi
74 |   availableBZZ=$(curl -s $DEBUG_API/chequebook/balance  | jq '.availableBalance')
75 |   availableBZZ=$(echo $availableBZZ| awk '{printf("%d",$0)}')
76 |   if [ -z "$availableBZZ" ]
77 |   then
78 |         availableBZZ=0
79 |   fi
80 |   uncashedBZZ=$(countUncashed)
81 |   uncashedBZZ=$(echo $uncashedBZZ| awk '{printf("%d",$0)}')
82 |   if [ -z "$uncashedBZZ" ]
83 |   then
84 |         uncashedBZZ=0
85 |   fi
86 |   json='{"name":"'"$nodeName"'","peers":'$peers',"diskavail":'$diskavail',"diskfree":'$diskfree',"cheque":'$cheque',"total_bzz":'$(($totalBZZ/$MIN_BZZ_UNIT))',"available_bzz":'$(($availableBZZ/$MIN_BZZ_UNIT))',"total_uncashed":'$(($uncashedBZZ/$MIN_BZZ_UNIT))'}'
87 |   echo $json
88 | }
89 | #get CPU util
90 | top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" |awk '{print "CPU:"100 - $1}'
91 | #get Mem util
92 | free -t | awk 'NR == 2 {print "MEM:" $3/$2*100}'
93 | makejson
94 | #get Network util
95 | sudo iftop -tB -s 1 -L 1 | grep  "Total send rate.*\|Total rec.*" > ~/swarmon_client/logs/network.log
96 | 


--------------------------------------------------------------------------------