├── zabbix_agentd.conf ├── zabbix-agent-stress-test.py └── README.md /zabbix_agentd.conf: -------------------------------------------------------------------------------- 1 | DebugLevel=0 2 | EnableRemoteCommands=1 3 | Hostname=agent 4 | LogFileSize=0 5 | LogFile=/var/log/zabbix/zabbix_agentd.log 6 | PidFile=/var/run/zabbix/zabbix_agentd.pid 7 | Server=127.0.0.1 8 | StartAgents=4 9 | AllowRoot=1 10 | -------------------------------------------------------------------------------- /zabbix-agent-stress-test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ''' 4 | ** zabbix-agent-stress-test.py 5 | ** - script for zabbix agent stress testing - how many queries per second 6 | ** can be reached for defined item key from zabbix-agent in passive mode 7 | ** 8 | ** It's full sync multithreaded code => TODO: async code (twisted) 9 | ** 10 | ** Copyright (C) 2015 Jan Garaj - www.jangaraj.com 11 | ** 12 | ** This program is free software; you can redistribute it and/or modify 13 | ** it under the terms of the GNU General Public License as published by 14 | ** the Free Software Foundation; either version 2 of the License, or 15 | ** (at your option) any later version. 16 | ** 17 | ** This program is distributed in the hope that it will be useful, 18 | ** but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | ** GNU General Public License for more details. 21 | ** 22 | ** You should have received a copy of the GNU General Public License 23 | ** along with this program; if not, write to the Free Software 24 | ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 25 | ''' 26 | 27 | import sys, getopt 28 | import socket, struct, threading, multiprocessing 29 | from timeit import default_timer as timer 30 | 31 | # global variables 32 | zabbix_agent_host = '127.0.0.1' 33 | zabbix_agent_port = 10050 34 | threads = 1 35 | timeout = 10 36 | key = '' 37 | success = 0 38 | error = 0 39 | rate_avg = 0 40 | count = 0 41 | 42 | def str2packed(data): 43 | header_field = struct.pack('<4sBQ', 'ZBXD', 1, len(data)) 44 | return header_field + data 45 | 46 | def packed2str(packed_data): 47 | header, version, length = struct.unpack('<4sBQ', packed_data[:13]) 48 | (data, ) = struct.unpack('<%ds'%length, packed_data[13:13+length]) 49 | return data 50 | 51 | def zabbixconntest(): 52 | global success 53 | global error 54 | global zabbix_agent_host 55 | global zabbix_agent_port 56 | global timeout 57 | global key 58 | conoptions = [((socket.AF_INET), (zabbix_agent_host, zabbix_agent_port))] 59 | family, hostport = conoptions[0] 60 | s = socket.socket(family, socket.SOCK_STREAM) 61 | s.settimeout(timeout) 62 | try: 63 | s.connect(hostport) 64 | except socket.timeout: 65 | error = error + 1 66 | return "Timeout" 67 | except socket.error, err: 68 | error = error + 1 69 | return "Socket error (%s)" % str(err) 70 | s.sendall(str2packed(key)) 71 | 72 | data = '' 73 | while True: 74 | buff = s.recv(1024) 75 | if not buff: 76 | break 77 | data += buff 78 | packed2str(data) 79 | s.close() 80 | success = success + 1 81 | return data 82 | 83 | def worker(): 84 | global rate_avg 85 | global count 86 | while True: 87 | count = count + 1 88 | start = timer() 89 | zabbixconntest() 90 | end = timer() 91 | rate = 1/(end-start) 92 | rate_avg = (rate_avg + rate)/2 93 | #print "Success: %d\tError: %d\tCurrent rate: %.2f qps\tAvg rate: %.2f qps" % (success, error, rate, rate_avg) 94 | 95 | def main(argv): 96 | global zabbix_agent_host 97 | global zabbix_agent_port 98 | global timeout 99 | global key 100 | global threads 101 | try: 102 | opts, args = getopt.getopt(argv,"hs:p:k:t:",["host=", "port=", "key=", "threads="]) 103 | except getopt.GetoptError, e: 104 | print 'Error option parsing' + str(e) 105 | sys.exit(2) 106 | for opt, arg in opts: 107 | if opt == '-h': 108 | print 'Usage:\n' + __file__ + ' [-h] [-s ] [-p ] -k ' 109 | print """ 110 | Utility for stress testing of zabbix_agent - how many queries per second can be reached for defined item key. 111 | 112 | Options: 113 | -s, --host 114 | Specify host name or IP address of a host. Default value is 127.0.0.1 115 | 116 | -p, --port 117 | Specify port number of agent running on the host. Default value is 10050 118 | 119 | -k, --key 120 | Specify key of item to retrieve value for 121 | 122 | -t, --threads 123 | Specify number of worker threads 124 | 125 | -h, --help 126 | Display help information 127 | 128 | Example: ./zabbix-agent-stress-test.py -s 127.0.0.1 -p 10050 -k agent.ping 129 | """ 130 | sys.exit() 131 | elif opt in ("-s", "--host"): 132 | zabbix_agent_host = arg 133 | elif opt in ("-p", "--port"): 134 | zabbix_agent_port = arg 135 | elif opt in ("-k", "--key"): 136 | key = arg 137 | elif opt in ("-t", "--threads"): 138 | threads = int(arg) 139 | 140 | cpus=multiprocessing.cpu_count() 141 | if threads>cpus: 142 | print "Warning: you are starting more threads, than your system has available CPU cores (%s)!" % cpus 143 | print "Starting %d threads, host: %s:%d, key: %s" % (threads, zabbix_agent_host, zabbix_agent_port, key) 144 | for i in range(threads): 145 | t = threading.Thread(target=worker) 146 | t.setDaemon(True) 147 | t.start() 148 | 149 | import time 150 | startg = timer() 151 | try: 152 | while True: 153 | time.sleep(1) 154 | print "Success: %d\tErrors: %d\tAvg rate: %.2f qps\tExecution time: %.2f sec" % (success, error, rate_avg*threads, timer()-startg) 155 | except KeyboardInterrupt: 156 | total_time = timer()-startg 157 | print "\nSuccess: %d\tErrors: %d\tAvg rate: %.2f qps\tExecution time: %.2f sec" % (success, error, rate_avg*threads, total_time) 158 | print "Avg rate based on total execution time and success connections: %.2f qps" % (success/total_time) 159 | sys.exit(0) 160 | 161 | if __name__ == "__main__": 162 | main(sys.argv[1:]) 163 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Zabbix Agent Stress Test 2 | ======================== 3 | 4 | Script for Zabbix Agent stress testing - how many queries per second can 5 | be reached for defined item key from zabbix-agent in passive mode? 6 | 7 | [![Paypal donate button](http://jangaraj.com/img/github-donate-button02.png)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=8LB6J222WRUZ4) 8 | 9 | Zabbix Agent performance depends on: 10 | * zabbix-agent config: how many passive threads are started - config option StartAgents 11 | * item, items can be slower, if they need subshell or IOPs operation, e.g. UserParameters 12 | * HW (CPU, network, ...) 13 | 14 | Stress test code can be more precise and also async => provided results are only 15 | informative. Stress test is only for zabbix-agent in passive mode and maybe 16 | active mode can provide better performance (IDNK). 17 | 18 | Manual 19 | ====== 20 | 21 | $ ./zabbix-agent-stress-test.py -h 22 | Usage: 23 | ./zabbix-agent-stress-test.py [-h] [-s ] [-p ] -k 24 | 25 | Utility for stress testing of zabbix_agent - how many queries per second can be reached for defined item key. 26 | 27 | Options: 28 | -s, --host 29 | Specify host name or IP address of a host. Default value is 127.0.0.1 30 | 31 | -p, --port 32 | Specify port number of agent running on the host. Default value is 10050 33 | 34 | -k, --key 35 | Specify key of item to retrieve value for 36 | 37 | -t, --threads 38 | Specify number of worker threads 39 | 40 | -h, --help 41 | Display help information 42 | 43 | Example: ./zabbix-agent-stress-test.py -s 127.0.0.1 -p 10050 -k agent.ping 44 | 45 | Stress test examples 46 | ==================== 47 | 48 | Some examples for Zabbix agent 2.4.3 on localhost and StartAgents=4: 49 | 50 | Expected ~4 qps, because 4 agents threads are started and every execution needs 1 sec (sleep 1): 51 | 52 | $ ./zabbix-agent-stress-test.py -s 127.0.0.1 -k "system.run[sleep 1]" -t 20 53 | Warning: you are starting more threads, than your system has available CPU cores (2)! 54 | Starting 20 threads, host: 127.0.0.1:10050, key: system.run[sleep 1] 55 | Success: 4 Errors: 0 Avg rate: 18.55 qps Execution time: 1.00 sec 56 | Success: 7 Errors: 0 Avg rate: 11.04 qps Execution time: 2.00 sec 57 | Success: 11 Errors: 0 Avg rate: 7.13 qps Execution time: 3.00 sec 58 | Success: 12 Errors: 0 Avg rate: 6.88 qps Execution time: 4.00 sec 59 | Success: 16 Errors: 0 Avg rate: 5.10 qps Execution time: 5.01 sec 60 | Success: 20 Errors: 0 Avg rate: 4.05 qps Execution time: 6.01 sec 61 | Success: 24 Errors: 0 Avg rate: 3.98 qps Execution time: 7.01 sec 62 | Success: 28 Errors: 0 Avg rate: 3.97 qps Execution time: 8.01 sec 63 | Success: 32 Errors: 0 Avg rate: 3.96 qps Execution time: 9.01 sec 64 | Success: 36 Errors: 0 Avg rate: 3.96 qps Execution time: 10.02 sec 65 | Success: 40 Errors: 0 Avg rate: 3.96 qps Execution time: 11.02 sec 66 | Success: 44 Errors: 0 Avg rate: 3.96 qps Execution time: 12.02 sec 67 | Success: 48 Errors: 0 Avg rate: 3.96 qps Execution time: 13.02 sec 68 | Success: 52 Errors: 0 Avg rate: 3.97 qps Execution time: 14.03 sec 69 | Success: 56 Errors: 0 Avg rate: 3.98 qps Execution time: 15.03 sec 70 | ... 71 | 72 | Expected ~400 qps value, because 4 agents threads are started and execution needs ~0.01 sec (echo 1): 73 | 74 | $ ./zabbix-agent-stress-test.py -s 127.0.0.1 -k "system.run[echo 1]" -t 20 75 | Warning: you are starting more threads, than your system has available CPU cores (2)! 76 | Starting 20 threads, host: 127.0.0.1:10050, key: system.run[echo 1] 77 | Success: 596 Errors: 0 Avg rate: 525.18 qps Execution time: 1.00 sec 78 | Success: 1144 Errors: 0 Avg rate: 564.76 qps Execution time: 2.00 sec 79 | Success: 1673 Errors: 0 Avg rate: 479.72 qps Execution time: 3.00 sec 80 | Success: 2230 Errors: 0 Avg rate: 646.48 qps Execution time: 4.00 sec 81 | Success: 2808 Errors: 0 Avg rate: 577.59 qps Execution time: 5.01 sec 82 | Success: 3357 Errors: 0 Avg rate: 532.59 qps Execution time: 6.01 sec 83 | Success: 3950 Errors: 0 Avg rate: 589.85 qps Execution time: 7.01 sec 84 | Success: 4536 Errors: 0 Avg rate: 527.77 qps Execution time: 8.01 sec 85 | Success: 5112 Errors: 0 Avg rate: 595.04 qps Execution time: 9.01 sec 86 | Success: 5686 Errors: 0 Avg rate: 620.66 qps Execution time: 10.01 sec 87 | Success: 6247 Errors: 0 Avg rate: 600.07 qps Execution time: 11.01 sec 88 | Success: 6802 Errors: 0 Avg rate: 521.53 qps Execution time: 12.01 sec 89 | Success: 7362 Errors: 0 Avg rate: 548.17 qps Execution time: 13.01 sec 90 | Success: 7933 Errors: 0 Avg rate: 580.31 qps Execution time: 14.01 sec 91 | ... 92 | 93 | Probably maximum qps value, when 4 agents threads are started - item key is agent.ping, so no subshell 94 | executions or IOPs are needed for this item: 95 | 96 | $ ./zabbix-agent-stress-test.py -s 127.0.0.1 -k "agent.ping" -t 4 97 | Warning: you are starting more threads, than your system has available CPU cores (2)! 98 | Starting 4 threads, host: 127.0.0.1:10050, key: agent.ping 99 | Success: 3354 Errors: 0 Avg rate: 3406.18 qps Execution time: 1.00 sec 100 | Success: 6692 Errors: 0 Avg rate: 4054.38 qps Execution time: 2.00 sec 101 | Success: 9952 Errors: 0 Avg rate: 3347.73 qps Execution time: 3.00 sec 102 | Success: 13395 Errors: 0 Avg rate: 3476.23 qps Execution time: 4.00 sec 103 | Success: 16511 Errors: 0 Avg rate: 3946.44 qps Execution time: 5.00 sec 104 | Success: 20041 Errors: 0 Avg rate: 4049.99 qps Execution time: 6.01 sec 105 | Success: 23502 Errors: 0 Avg rate: 8685.35 qps Execution time: 7.01 sec 106 | Success: 26875 Errors: 0 Avg rate: 5739.68 qps Execution time: 8.01 sec 107 | Success: 30107 Errors: 0 Avg rate: 6029.05 qps Execution time: 9.01 sec 108 | Success: 33344 Errors: 0 Avg rate: 3814.14 qps Execution time: 10.01 sec 109 | ... 110 | 111 | Conclusion 112 | ========== 113 | 114 | Zabbix Agent can handle ~3k requests per second for in memory items. 115 | If you need shell execution for items, then it's ~0.5k requests per second. 116 | (Tested on 2.1GHz CPUs). 117 | 118 | Similar projects 119 | ================ 120 | 121 | Better implementation in Go: https://github.com/cavaliercoder/zabbix_agent_bench 122 | 123 | # Author 124 | 125 | [Devops Monitoring Expert](http://www.jangaraj.com 'DevOps / Docker / Kubernetes / AWS ECS / Google GCP / Zabbix / Zenoss / Terraform / Monitoring'), 126 | who loves monitoring systems and cutting/bleeding edge technologies: Docker, 127 | Kubernetes, ECS, AWS, Google GCP, Terraform, Lambda, Zabbix, Grafana, Elasticsearch, 128 | Kibana, Prometheus, Sysdig, ... 129 | 130 | Summary: 131 | * 2000+ [GitHub](https://github.com/monitoringartist/) stars 132 | * 10 000+ [Grafana dashboard](https://grafana.net/monitoringartist) downloads 133 | * 1 000 000+ [Docker image](https://hub.docker.com/u/monitoringartist/) pulls 134 | 135 | Professional devops / monitoring / consulting services: 136 | 137 | [![Monitoring Artist](http://monitoringartist.com/img/github-monitoring-artist-logo.jpg)](http://www.monitoringartist.com 'DevOps / Docker / Kubernetes / AWS ECS / Google GCP / Zabbix / Zenoss / Terraform / Monitoring') 138 | --------------------------------------------------------------------------------