├── zabbix_agentd.conf
├── zabbix-agent-stress-test.py
└── README.md


/zabbix_agentd.conf:
--------------------------------------------------------------------------------
 1 | DebugLevel=0
 2 | EnableRemoteCommands=1
 3 | Hostname=agent
 4 | LogFileSize=0
 5 | LogFile=/var/log/zabbix/zabbix_agentd.log
 6 | PidFile=/var/run/zabbix/zabbix_agentd.pid
 7 | Server=127.0.0.1
 8 | StartAgents=4
 9 | AllowRoot=1
10 | 


--------------------------------------------------------------------------------
/zabbix-agent-stress-test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | '''
  4 |  ** zabbix-agent-stress-test.py
  5 |  ** - script for zabbix agent stress testing - how many queries per second 
  6 |  ** can be reached for defined item key from zabbix-agent in passive mode
  7 |  **
  8 |  ** It's full sync multithreaded code => TODO: async code (twisted)
  9 |  ** 
 10 |  ** Copyright (C) 2015 Jan Garaj - www.jangaraj.com 
 11 |  **
 12 |  ** This program is free software; you can redistribute it and/or modify
 13 |  ** it under the terms of the GNU General Public License as published by
 14 |  ** the Free Software Foundation; either version 2 of the License, or
 15 |  ** (at your option) any later version.
 16 |  **
 17 |  ** This program is distributed in the hope that it will be useful,
 18 |  ** but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 |  ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 20 |  ** GNU General Public License for more details.
 21 |  **
 22 |  ** You should have received a copy of the GNU General Public License
 23 |  ** along with this program; if not, write to the Free Software
 24 |  ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.     
 25 | '''
 26 | 
 27 | import sys, getopt
 28 | import socket, struct, threading, multiprocessing
 29 | from timeit import default_timer as timer
 30 | 
 31 | # global variables
 32 | zabbix_agent_host = '127.0.0.1'
 33 | zabbix_agent_port = 10050
 34 | threads = 1
 35 | timeout = 10
 36 | key = ''
 37 | success = 0
 38 | error = 0
 39 | rate_avg = 0
 40 | count = 0
 41 | 
 42 | def str2packed(data):
 43 |     header_field =  struct.pack('<4sBQ', 'ZBXD', 1, len(data))
 44 |     return header_field + data
 45 | 
 46 | def packed2str(packed_data):
 47 |     header, version, length = struct.unpack('<4sBQ', packed_data[:13])
 48 |     (data, ) = struct.unpack('<%ds'%length, packed_data[13:13+length])
 49 |     return data
 50 | 
 51 | def zabbixconntest():
 52 |     global success
 53 |     global error
 54 |     global zabbix_agent_host
 55 |     global zabbix_agent_port
 56 |     global timeout
 57 |     global key
 58 |     conoptions = [((socket.AF_INET), (zabbix_agent_host, zabbix_agent_port))]
 59 |     family, hostport = conoptions[0]
 60 |     s = socket.socket(family, socket.SOCK_STREAM)
 61 |     s.settimeout(timeout)
 62 |     try:
 63 |         s.connect(hostport)
 64 |     except socket.timeout:
 65 |         error = error + 1
 66 |         return "Timeout"        
 67 |     except socket.error, err:
 68 |         error = error + 1
 69 |         return "Socket error (%s)" % str(err)
 70 |     s.sendall(str2packed(key))
 71 | 
 72 |     data = ''
 73 |     while True:
 74 |         buff = s.recv(1024)
 75 |         if not buff:
 76 |             break
 77 |         data += buff
 78 |     packed2str(data)
 79 |     s.close()
 80 |     success = success + 1
 81 |     return data   
 82 | 
 83 | def worker():
 84 |     global rate_avg
 85 |     global count 
 86 |     while True:
 87 |         count = count + 1
 88 |         start = timer()
 89 |         zabbixconntest()
 90 |         end = timer()
 91 |         rate = 1/(end-start)
 92 |         rate_avg = (rate_avg + rate)/2
 93 |         #print "Success: %d\tError: %d\tCurrent rate: %.2f qps\tAvg rate: %.2f qps" % (success, error, rate, rate_avg)
 94 | 
 95 | def main(argv):
 96 |     global zabbix_agent_host
 97 |     global zabbix_agent_port
 98 |     global timeout
 99 |     global key
100 |     global threads
101 |     try:
102 |        opts, args = getopt.getopt(argv,"hs:p:k:t:",["host=", "port=", "key=", "threads="])
103 |     except getopt.GetoptError, e:
104 |        print 'Error option parsing' + str(e)
105 |        sys.exit(2)
106 |     for opt, arg in opts:
107 |        if opt == '-h':
108 |           print 'Usage:\n' + __file__ + ' [-h] [-s <host name or IP>] [-p <port>] -k <key>'
109 |           print """
110 | Utility for stress testing of zabbix_agent - how many queries per second can be reached for defined item key.
111 | 
112 | Options:
113 |   -s, --host <host name or IP>
114 |     Specify host name or IP address of a host. Default value is 127.0.0.1
115 | 
116 |   -p, --port <port>
117 |     Specify port number of agent running on the host. Default value is 10050
118 | 
119 |   -k, --key <key of metric>
120 |     Specify key of item to retrieve value for
121 | 
122 |   -t, --threads <number of thread>
123 |     Specify number of worker threads
124 | 
125 |   -h, --help
126 |     Display help information
127 | 
128 | Example: ./zabbix-agent-stress-test.py -s 127.0.0.1 -p 10050 -k agent.ping
129 |           """
130 |           sys.exit()
131 |        elif opt in ("-s", "--host"):
132 |           zabbix_agent_host = arg
133 |        elif opt in ("-p", "--port"):
134 |           zabbix_agent_port = arg
135 |        elif opt in ("-k", "--key"):
136 |           key = arg
137 |        elif opt in ("-t", "--threads"):
138 |           threads = int(arg)
139 | 
140 |     cpus=multiprocessing.cpu_count()
141 |     if threads>cpus:
142 |         print "Warning: you are starting more threads, than your system has available CPU cores (%s)!" % cpus
143 |     print "Starting %d threads, host: %s:%d, key: %s" % (threads, zabbix_agent_host, zabbix_agent_port, key)
144 |     for i in range(threads):
145 |         t = threading.Thread(target=worker)
146 |         t.setDaemon(True)
147 |         t.start()
148 | 
149 |     import time
150 |     startg = timer()    
151 |     try:        
152 |         while True:
153 |             time.sleep(1)
154 |             print "Success: %d\tErrors: %d\tAvg rate: %.2f qps\tExecution time: %.2f sec" % (success, error, rate_avg*threads, timer()-startg)
155 |     except KeyboardInterrupt:
156 |         total_time = timer()-startg
157 |         print "\nSuccess: %d\tErrors: %d\tAvg rate: %.2f qps\tExecution time: %.2f sec" % (success, error, rate_avg*threads, total_time)
158 |         print "Avg rate based on total execution time and success connections: %.2f qps" % (success/total_time) 
159 |         sys.exit(0)
160 | 
161 | if __name__ == "__main__":
162 |     main(sys.argv[1:])
163 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Zabbix Agent Stress Test
  2 | ========================
  3 | 
  4 | Script for Zabbix Agent stress testing - how many queries per second can 
  5 | be reached for defined item key from zabbix-agent in passive mode?
  6 | 
  7 | [![Paypal donate button](http://jangaraj.com/img/github-donate-button02.png)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=8LB6J222WRUZ4)
  8 | 
  9 | Zabbix Agent performance depends on:
 10 | * zabbix-agent config: how many passive threads are started - config option StartAgents
 11 | * item, items can be slower, if they need subshell or IOPs operation, e.g. UserParameters
 12 | * HW (CPU, network, ...)
 13 |  
 14 | Stress test code can be more precise and also async => provided results are only 
 15 | informative. Stress test is only for zabbix-agent in passive mode and maybe 
 16 | active mode can provide better performance (IDNK).
 17 | 
 18 | Manual
 19 | ======
 20 | 
 21 |     $ ./zabbix-agent-stress-test.py -h
 22 |     Usage:
 23 |     ./zabbix-agent-stress-test.py [-h] [-s <host name or IP>] [-p <port>] -k <key>
 24 |     
 25 |     Utility for stress testing of zabbix_agent - how many queries per second can be reached for defined item key.
 26 |     
 27 |     Options:
 28 |       -s, --host <host name or IP>
 29 |         Specify host name or IP address of a host. Default value is 127.0.0.1
 30 |     
 31 |       -p, --port <port>
 32 |         Specify port number of agent running on the host. Default value is 10050
 33 |     
 34 |       -k, --key <key of metric>
 35 |         Specify key of item to retrieve value for
 36 |     
 37 |       -t, --threads <number of thread>
 38 |         Specify number of worker threads
 39 |     
 40 |       -h, --help
 41 |         Display help information
 42 |     
 43 |     Example: ./zabbix-agent-stress-test.py -s 127.0.0.1 -p 10050 -k agent.ping
 44 |    
 45 | Stress test examples
 46 | ====================
 47 | 
 48 | Some examples for Zabbix agent 2.4.3 on localhost and StartAgents=4:
 49 | 
 50 | Expected ~4 qps, because 4 agents threads are started and every execution needs 1 sec (sleep 1):
 51 | 
 52 |     $ ./zabbix-agent-stress-test.py -s 127.0.0.1 -k "system.run[sleep 1]" -t 20
 53 |     Warning: you are starting more threads, than your system has available CPU cores (2)!
 54 |     Starting 20 threads, host: 127.0.0.1:10050, key: system.run[sleep 1]
 55 |     Success: 4      Errors: 0       Avg rate: 18.55 qps    Execution time: 1.00 sec
 56 |     Success: 7      Errors: 0       Avg rate: 11.04 qps    Execution time: 2.00 sec
 57 |     Success: 11     Errors: 0       Avg rate: 7.13 qps     Execution time: 3.00 sec
 58 |     Success: 12     Errors: 0       Avg rate: 6.88 qps     Execution time: 4.00 sec
 59 |     Success: 16     Errors: 0       Avg rate: 5.10 qps     Execution time: 5.01 sec
 60 |     Success: 20     Errors: 0       Avg rate: 4.05 qps     Execution time: 6.01 sec
 61 |     Success: 24     Errors: 0       Avg rate: 3.98 qps     Execution time: 7.01 sec
 62 |     Success: 28     Errors: 0       Avg rate: 3.97 qps     Execution time: 8.01 sec
 63 |     Success: 32     Errors: 0       Avg rate: 3.96 qps     Execution time: 9.01 sec
 64 |     Success: 36     Errors: 0       Avg rate: 3.96 qps     Execution time: 10.02 sec
 65 |     Success: 40     Errors: 0       Avg rate: 3.96 qps     Execution time: 11.02 sec
 66 |     Success: 44     Errors: 0       Avg rate: 3.96 qps     Execution time: 12.02 sec
 67 |     Success: 48     Errors: 0       Avg rate: 3.96 qps     Execution time: 13.02 sec
 68 |     Success: 52     Errors: 0       Avg rate: 3.97 qps     Execution time: 14.03 sec
 69 |     Success: 56     Errors: 0       Avg rate: 3.98 qps     Execution time: 15.03 sec
 70 |     ...
 71 |     
 72 | Expected ~400 qps value, because 4 agents threads are started and execution needs ~0.01 sec (echo 1):
 73 | 
 74 |     $ ./zabbix-agent-stress-test.py -s 127.0.0.1 -k "system.run[echo 1]" -t 20
 75 |     Warning: you are starting more threads, than your system has available CPU cores (2)!
 76 |     Starting 20 threads, host: 127.0.0.1:10050, key: system.run[echo 1]
 77 |     Success: 596    Errors: 0       Avg rate: 525.18 qps   Execution time: 1.00 sec
 78 |     Success: 1144   Errors: 0       Avg rate: 564.76 qps   Execution time: 2.00 sec
 79 |     Success: 1673   Errors: 0       Avg rate: 479.72 qps   Execution time: 3.00 sec
 80 |     Success: 2230   Errors: 0       Avg rate: 646.48 qps   Execution time: 4.00 sec
 81 |     Success: 2808   Errors: 0       Avg rate: 577.59 qps   Execution time: 5.01 sec
 82 |     Success: 3357   Errors: 0       Avg rate: 532.59 qps   Execution time: 6.01 sec
 83 |     Success: 3950   Errors: 0       Avg rate: 589.85 qps   Execution time: 7.01 sec
 84 |     Success: 4536   Errors: 0       Avg rate: 527.77 qps   Execution time: 8.01 sec
 85 |     Success: 5112   Errors: 0       Avg rate: 595.04 qps   Execution time: 9.01 sec
 86 |     Success: 5686   Errors: 0       Avg rate: 620.66 qps   Execution time: 10.01 sec
 87 |     Success: 6247   Errors: 0       Avg rate: 600.07 qps   Execution time: 11.01 sec
 88 |     Success: 6802   Errors: 0       Avg rate: 521.53 qps   Execution time: 12.01 sec
 89 |     Success: 7362   Errors: 0       Avg rate: 548.17 qps   Execution time: 13.01 sec
 90 |     Success: 7933   Errors: 0       Avg rate: 580.31 qps   Execution time: 14.01 sec
 91 |     ...
 92 | 
 93 | Probably maximum qps value, when 4 agents threads are started - item key is agent.ping, so no subshell 
 94 | executions or IOPs are needed for this item:
 95 |     
 96 |     $ ./zabbix-agent-stress-test.py -s 127.0.0.1 -k "agent.ping" -t 4
 97 |     Warning: you are starting more threads, than your system has available CPU cores (2)!
 98 |     Starting 4 threads, host: 127.0.0.1:10050, key: agent.ping
 99 |     Success: 3354   Errors: 0       Avg rate: 3406.18 qps  Execution time: 1.00 sec
100 |     Success: 6692   Errors: 0       Avg rate: 4054.38 qps  Execution time: 2.00 sec
101 |     Success: 9952   Errors: 0       Avg rate: 3347.73 qps  Execution time: 3.00 sec
102 |     Success: 13395  Errors: 0       Avg rate: 3476.23 qps  Execution time: 4.00 sec
103 |     Success: 16511  Errors: 0       Avg rate: 3946.44 qps  Execution time: 5.00 sec
104 |     Success: 20041  Errors: 0       Avg rate: 4049.99 qps  Execution time: 6.01 sec
105 |     Success: 23502  Errors: 0       Avg rate: 8685.35 qps  Execution time: 7.01 sec
106 |     Success: 26875  Errors: 0       Avg rate: 5739.68 qps  Execution time: 8.01 sec
107 |     Success: 30107  Errors: 0       Avg rate: 6029.05 qps  Execution time: 9.01 sec
108 |     Success: 33344  Errors: 0       Avg rate: 3814.14 qps  Execution time: 10.01 sec
109 |     ...
110 |     
111 | Conclusion
112 | ==========  
113 |     
114 | Zabbix Agent can handle ~3k requests per second for in memory items.
115 | If you need shell execution for items, then it's ~0.5k requests per second. 
116 | (Tested on 2.1GHz CPUs).
117 |     
118 | Similar projects
119 | ================
120 | 
121 | Better implementation in Go: https://github.com/cavaliercoder/zabbix_agent_bench
122 | 
123 | # Author
124 | 
125 | [Devops Monitoring Expert](http://www.jangaraj.com 'DevOps / Docker / Kubernetes / AWS ECS / Google GCP / Zabbix / Zenoss / Terraform / Monitoring'),
126 | who loves monitoring systems and cutting/bleeding edge technologies: Docker,
127 | Kubernetes, ECS, AWS, Google GCP, Terraform, Lambda, Zabbix, Grafana, Elasticsearch,
128 | Kibana, Prometheus, Sysdig, ...
129 | 
130 | Summary:
131 | * 2000+ [GitHub](https://github.com/monitoringartist/) stars
132 | * 10 000+ [Grafana dashboard](https://grafana.net/monitoringartist) downloads
133 | * 1 000 000+ [Docker image](https://hub.docker.com/u/monitoringartist/) pulls
134 | 
135 | Professional devops / monitoring / consulting services:
136 | 
137 | [![Monitoring Artist](http://monitoringartist.com/img/github-monitoring-artist-logo.jpg)](http://www.monitoringartist.com 'DevOps / Docker / Kubernetes / AWS ECS / Google GCP / Zabbix / Zenoss / Terraform / Monitoring')
138 | 


--------------------------------------------------------------------------------