├── client
├── stop_datacollector.sh
├── run_datacollector.sh
├── update_client.sh
├── install_client.sh
├── swarm.mtail
└── get_metrics.sh
├── screenshot1.png
├── screenshot2.png
├── server
├── grafana
│ └── grafana.db
├── update_server.sh
├── docker-compose.yaml
├── install_server.sh
└── prometheus.yml
├── .idea
└── vcs.xml
└── README.md
/client/stop_datacollector.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | sudo pkill -f get_metrics
3 | sudo pkill -f mtail
--------------------------------------------------------------------------------
/screenshot1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ytx1991/SwarmMonitoring/HEAD/screenshot1.png
--------------------------------------------------------------------------------
/screenshot2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ytx1991/SwarmMonitoring/HEAD/screenshot2.png
--------------------------------------------------------------------------------
/server/grafana/grafana.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ytx1991/SwarmMonitoring/HEAD/server/grafana/grafana.db
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/server/update_server.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cd ~/swarmon_server
3 | rm grafana.db
4 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/grafana/grafana.db
5 | sudo cp grafana.db /root/swarmon/grafana/grafana.db
6 | sudo chmod -R 777 /root/swarmon/grafana
--------------------------------------------------------------------------------
/server/docker-compose.yaml:
--------------------------------------------------------------------------------
1 | version: "3.9"
2 | services:
3 | grafana:
4 | container_name: grafana_swarm
5 | image: grafana/grafana
6 | network_mode: "host"
7 | volumes:
8 | - /root/swarmon/grafana:/var/lib/grafana
9 | restart: always
10 | prometheus:
11 | container_name: prometheus
12 | image: prom/prometheus
13 | volumes:
14 | - /root/swarmon/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
15 | network_mode: "host"
16 | restart: always
17 |
--------------------------------------------------------------------------------
/client/run_datacollector.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #One node per line, $1 = Remote Grafana host, $2 = node name, $3 = node debug api
3 | sudo pkill -f get_metrics
4 | sudo pkill -f mtail
5 | #Add your nodes debug api here
6 | nodes=(127.0.0.1:1635 127.0.0.1:1735)
7 | names=(bee1 bee2)
8 |
9 |
10 | total=${#nodes[*]}
11 | for (( i=0; i<=$(( $total -1 )); i++ ))
12 | do
13 | nohup watch -n 10 "~/swarmon_client/get_metrics.sh ${nodes[$i]} ${names[$i]} > ~/swarmon_client/logs/${names[$i]}.log" &
14 | done
15 |
16 | nohup ~/swarmon_client/mtail --progs ~/swarmon_client/progs --logs ~/swarmon_client/"logs/*.log" --poll_interval 10000ms &
--------------------------------------------------------------------------------
/client/update_client.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | sudo pkill -f get_metrics
3 | sudo pkill -f mtail
4 | cd ~/swarmon_client
5 | cd progs
6 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/swarm.mtail -O swarm.mtail
7 | cd ..
8 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/stop_datacollector.sh -O stop_datacollector.sh
9 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/get_metrics.sh -O get_metrics.sh
10 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/update_client.sh -O update_client.sh
11 | chmod +x run_datacollector.sh
12 | chmod +x get_metrics.sh
13 | chmod +x update_client.sh
--------------------------------------------------------------------------------
/server/install_server.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | mkdir ~/swarmon_server
4 | sudo mkdir /root/swarmon/prometheus
5 | sudo mkdir /root/swarmon/grafana
6 |
7 | cd ~/swarmon_server
8 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/grafana/grafana.db
9 | sudo cp grafana.db /root/swarmon/grafana/grafana.db
10 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/docker-compose.yaml
11 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/prometheus.yml
12 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/update_server.sh
13 | sudo cp prometheus.yml /root/swarmon/prometheus
14 | sudo chmod -R 777 /root/swarmon/grafana
15 | chmod +x update_server.sh
--------------------------------------------------------------------------------
/client/install_client.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #Only for Linux x86, change the mtail link if you are running on a different OS
3 | mkdir ~/swarmon_client
4 | cd ~/swarmon_client
5 | mkdir logs
6 | mkdir progs
7 | cd progs
8 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/swarm.mtail
9 | cd ..
10 | wget https://github.com/google/mtail/releases/download/v3.0.0-rc45/mtail_3.0.0-rc45_Linux_x86_64.tar.gz
11 | tar -xf mtail_3.0.0-rc45_Linux_x86_64.tar.gz
12 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/run_datacollector.sh
13 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/stop_datacollector.sh
14 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/get_metrics.sh
15 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/update_client.sh
16 | chmod +x stop_datacollector.sh
17 | chmod +x run_datacollector.sh
18 | chmod +x get_metrics.sh
19 | chmod +x update_client.sh
--------------------------------------------------------------------------------
/server/prometheus.yml:
--------------------------------------------------------------------------------
1 | global:
2 | scrape_interval: 10s # By default, scrape targets every 15 seconds.
3 | # A scrape configuration containing exactly one endpoint to scrape:
4 | # Here it's Prometheus itself.
5 | scrape_configs:
6 | # The job name is added as a label `job=` to any timeseries scraped from this config.
7 | - job_name: 'prometheus'
8 |
9 | # Override the global default and scrape targets from this job every 10 seconds.
10 | scrape_interval: 10s
11 |
12 | static_configs:
13 | - targets: ['localhost:9090']
14 |
15 | - job_name: 'swarm'
16 | static_configs:
17 | - targets: ['YOUR_HOST_IP1:3903']
18 | labels:
19 | host: host1
20 | - targets: ['YOUR_HOST_IP2:3903']
21 | labels:
22 | host: host2
23 | - targets: ['YOUR_HOST_IP3:3903']
24 | labels:
25 | host: host3
26 | - targets: ['YOUR_HOST_IP4:3903']
27 | labels:
28 | host: host4
--------------------------------------------------------------------------------
/client/swarm.mtail:
--------------------------------------------------------------------------------
1 | gauge peers by node
2 | gauge diskavail
3 | gauge diskfree
4 | gauge cpu
5 | gauge memory
6 | gauge upload_bandwidth
7 | gauge download_bandwidth
8 | gauge cheque by node
9 | gauge total_bzz by node
10 | gauge available_bzz by node
11 | gauge total_uncashed by node
12 | /^\{"name":"(?P.+)","peers":(?P\d+),"diskavail":(?P\d+),"diskfree":(?P\d+),"cheque":(?P\d+),"total_bzz":(?P\d+),"available_bzz":(?P\d+),"total_uncashed":(?P\d+)\}$/ {
13 | peers[$node] = $peers
14 | diskavail = $diskavail
15 | diskfree = $diskfree
16 | cheque[$node] = $cheque
17 | total_bzz[$node] = $total_bzz
18 | available_bzz[$node] = $available_bzz
19 | total_uncashed[$node] = $total_uncashed
20 | }
21 | /^CPU:(?P[\d\.]+)$/ {
22 | cpu = $cpu
23 | }
24 | /^MEM:(?P[\d\.]+)$/ {
25 | memory = $mem
26 | }
27 | /^Total send rate:[ \t]*(?P[\d\.]+)KB.*$/ {
28 | upload_bandwidth = $upload
29 | }
30 | /^Total receive rate:[ \t]*(?P[\d\.]+)KB.*$/ {
31 | download_bandwidth = $download
32 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 |
3 | 
4 |
5 | ### 请保证每个节点的名称是唯一的! 本方案兼容一机多节点及Docker
6 | ### 目前仅支持Linux系统
7 | ### 有问题欢迎到 Discord https://discord.gg/PJwJzCkT4W 咨询
8 |
9 | ### 如何更新
10 | #### 客户端需运行对应的update_client.sh脚本进行更新
11 |
12 | #### 服务端需运行对应的update_server.sh脚本进行更新 (注意原来的Dashboard会被覆盖,如有做修改请先备份grafana.db)
13 |
14 | ### 安装监控服务,仅需在主服务器上安装
15 |
16 | #### 安装Docker及Docker Compose (已安装的可跳过)
17 | 请确保3000和9090端口没被占用且可被外部访问
18 |
19 | 这里仅给出Ubuntu安装命令,其他系统可自行搜索安装教程
20 | ```
21 | sudo apt-get update
22 | sudo apt install docker.io
23 | sudo systemctl start docker
24 | sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
25 | sudo chmod +x /usr/local/bin/docker-compose
26 | sudo ln -s /usr/local/bin/docker-compose /usr/bin/docker-compose
27 | ```
28 |
29 | #### 下载安装集群监控服务端
30 | 1.执行以下命令
31 | ```
32 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/server/install_server.sh
33 | chmod +x install_server.sh
34 | ./install_server.sh
35 | ```
36 |
37 | 2.修改/root/swarmon/prometheus/prometheus.yml文件。
38 |
39 | 可使用sudo vim /root/swarmon/prometheus/prometheus.yml编辑
40 |
41 | 根据自己拥有的服务器数量和IP修改对应内容,端口号不要改变,默认示例包含四个服务器Host1 - Host4,可删掉多余的服务器
42 | ```
43 | static_configs:
44 | - targets: ['YOUR_HOST_IP1:3903']
45 | labels:
46 | host: host1
47 | - targets: ['YOUR_HOST_IP2:3903']
48 | labels:
49 | host: host2
50 | - targets: ['YOUR_HOST_IP3:3903']
51 | labels:
52 | host: host3
53 | - targets: ['YOUR_HOST_IP4:3903']
54 | labels:
55 | host: host4
56 | ```
57 | 3.修改完成保存后运行下面命令启动服务端
58 | cd ~/swarmon_server && sudo docker-compose up -d
59 |
60 | 此时你应该可以通过浏览器访问主服务器3000端口, 如192.168.1.2:3000 并看到Grafana登录界面。
61 |
62 | 使用用户名:admin 密码:swarmadmin 登录管理员账号,登录后可修改管理员密码。
63 |
64 | 监控面版及数据源应该已经预设好,直接打开便可使用。
65 |
66 | ### 下载安装集群监控客户端 (每个有节点的服务器都需要安装)
67 |
68 | 如果需要显示带宽数据请先安装iftop!
69 | Ubuntu: sudo apt install iftop
70 |
71 | 请确保3903端口没被占用且可被外部访问
72 |
73 | 1.运行以下命令下载安装监控客户端
74 | ```
75 | wget https://github.com/ytx1991/SwarmMonitoring/raw/main/client/install_client.sh
76 | chmod +x install_client.sh
77 | ./install_client.sh
78 | ```
79 |
80 | 2.编辑run_datacollector.sh,添加当前服务器上的节点。
81 | 可使用 vim ~/swarmon_client/run_datacollector.sh 编辑
82 |
83 | ```
84 | 修改下面两行,第一行为节点的debug api,用空格隔开
85 | 第二行为节点名称,需要确保节点名称是唯一的,不同服务器上不能有相同名称的节点
86 |
87 | nodes=(127.0.0.1:1635 127.0.0.1:1735)
88 | names=(bee1 bee2)
89 | ```
90 | 修改完成后保存退出
91 |
92 | 3.运行以下命令:
93 | cd ~/swarmon_client && ./run_datacollector.sh
94 |
95 | 如需停止可运行:
96 | cd ~/swarmon_client && ./stop_datacollector.sh
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
--------------------------------------------------------------------------------
/client/get_metrics.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | [ -z ${DEBUG_API+x} ] && DEBUG_API=$1
3 | nodeName=$2
4 | totalAmount=0
5 | MIN_BZZ_UNIT=1000000000
6 | function getPeers(){
7 | curl -s "$DEBUG_API/chequebook/cheque" | jq -r '.lastcheques | .[].peer'
8 | }
9 |
10 | function getCumulativePayout() {
11 | local peer=$1
12 | local cumulativePayout=$(curl -s "$DEBUG_API/chequebook/cheque/$peer" | jq '.lastreceived.payout')
13 | if [ $cumulativePayout == null ]
14 | then
15 | echo 0
16 | else
17 | echo $cumulativePayout
18 | fi
19 | }
20 |
21 | function getLastCashedPayout() {
22 | local peer=$1
23 | local cashout=$(curl -s "$DEBUG_API/chequebook/cashout/$peer" | jq '.cumulativePayout')
24 | if [ $cashout == null ]
25 | then
26 | echo 0
27 | else
28 | echo $cashout
29 | fi
30 | }
31 |
32 | function getUncashedAmount() {
33 | local peer=$1
34 | local cumulativePayout=$(getCumulativePayout $peer)
35 | if [ $cumulativePayout == 0 ]
36 | then
37 | echo 0
38 | return
39 | fi
40 |
41 | cashedPayout=$(getLastCashedPayout $peer)
42 | let uncashedAmount=$cumulativePayout-$cashedPayout
43 | echo $uncashedAmount
44 | }
45 |
46 | function countUncashed() {
47 | for peer in $(getPeers)
48 | do
49 | totalAmount=$((totalAmount + $(getUncashedAmount $peer)))
50 | done
51 | echo $totalAmount
52 | }
53 |
54 | function makejson(){
55 | echo "Populating $DEBUG_API data..."
56 | peers=$(curl -s $DEBUG_API/peers | jq '.peers | length')
57 | if [ -z "$peers" ]
58 | then
59 | peers=0
60 | fi
61 | diskavail=$(df -P . | awk 'NR==2{print $2}')
62 | diskfree=$(df -P . | awk 'NR==2{print $4}')
63 | cheque=$(curl -s $DEBUG_API/chequebook/cheque | jq '.lastcheques | length')
64 | if [ -z "$cheque" ]
65 | then
66 | cheque=0
67 | fi
68 | totalBZZ=$(curl -s $DEBUG_API/chequebook/balance | jq '.totalBalance')
69 | totalBZZ=$(echo $totalBZZ| awk '{printf("%d",$0)}')
70 | if [ -z "$totalBZZ" ]
71 | then
72 | totalBZZ=0
73 | fi
74 | availableBZZ=$(curl -s $DEBUG_API/chequebook/balance | jq '.availableBalance')
75 | availableBZZ=$(echo $availableBZZ| awk '{printf("%d",$0)}')
76 | if [ -z "$availableBZZ" ]
77 | then
78 | availableBZZ=0
79 | fi
80 | uncashedBZZ=$(countUncashed)
81 | uncashedBZZ=$(echo $uncashedBZZ| awk '{printf("%d",$0)}')
82 | if [ -z "$uncashedBZZ" ]
83 | then
84 | uncashedBZZ=0
85 | fi
86 | json='{"name":"'"$nodeName"'","peers":'$peers',"diskavail":'$diskavail',"diskfree":'$diskfree',"cheque":'$cheque',"total_bzz":'$(($totalBZZ/$MIN_BZZ_UNIT))',"available_bzz":'$(($availableBZZ/$MIN_BZZ_UNIT))',"total_uncashed":'$(($uncashedBZZ/$MIN_BZZ_UNIT))'}'
87 | echo $json
88 | }
89 | #get CPU util
90 | top -bn1 | grep "Cpu(s)" | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" |awk '{print "CPU:"100 - $1}'
91 | #get Mem util
92 | free -t | awk 'NR == 2 {print "MEM:" $3/$2*100}'
93 | makejson
94 | #get Network util
95 | sudo iftop -tB -s 1 -L 1 | grep "Total send rate.*\|Total rec.*" > ~/swarmon_client/logs/network.log
96 |
--------------------------------------------------------------------------------