├── README.md ├── ngx-lua-ban ├── README.md └── ban.lua └── ngx-lua-stats ├── README.md ├── empty_dict.lua ├── output.lua ├── perf ├── bin │ ├── fuck_perf.py │ ├── lhlib.py │ └── perf-create.py ├── conf │ └── perf_conf.py └── log │ └── README ├── record.lua └── update_l7.sh /README.md: -------------------------------------------------------------------------------- 1 | ## 介绍 2 | 3 | 利用nginx-lua模块做一些运维相关的工具. 4 | 5 | ## 包括 6 | 7 | - nginx站点性能相关统计 8 | - nginx访问频率封禁 9 | 10 | ## 环境依赖 11 | 12 | - nginx + ngx_http_lua_module 13 | 14 | ## 安装 15 | 16 | ``` 17 | http://wiki.nginx.org/HttpLuaModule#Installation 18 | ``` 19 | 20 | 21 | ## Help! 22 | 联系 skyeydemon <skyeydemon@gmail.com> 23 | -------------------------------------------------------------------------------- /ngx-lua-ban/README.md: -------------------------------------------------------------------------------- 1 | ## 介绍 2 | 3 | 利用nginx+lua做基于ip频率的封禁. 4 | 5 | ## 功能 6 | 7 | - 可以进行多次封禁,每次封禁时间可配置. 8 | 9 | ## 环境依赖 10 | 11 | - nginx + ngx_http_lua_module 12 | 13 | ## 安装 14 | 15 | ``` 16 | http://wiki.nginx.org/HttpLuaModule#Installation 17 | ``` 18 | 19 | ## 使用方法 20 | 21 | ### 添加全局字典 22 | 23 | 在nginx的配置中添加dict的初始化, 类似如下 24 | 25 | ``` 26 | lua_shared_dict IpLastDict 20M; 27 | lua_shared_dict IpBansDict 20M; 28 | lua_shared_dict IpStatusDict 20M; 29 | lua_shared_dict QsIpDict 20M; 30 | ``` 31 | 32 | ### 为特定的location添加统计 33 | 34 | 只需要添加一句即可~~ 35 | 将lua脚本嵌套进nginx的配置中, 例如: 36 | 37 | ``` 38 | server { 39 | listen 8080; 40 | server_name xxxxx.com; 41 | access_log /home/work/nginx/logs/xxxxx.com.log milog; 42 | 43 | location / { 44 | proxy_set_header Host $host; 45 | proxy_set_header X-Forwarded-For $remote_addr; 46 | proxy_pass http://xxxxx.com_backend; 47 | 48 | log_by_lua_file ./site-enable/record.lua; 49 | } 50 | } 51 | 52 | ``` 53 | 54 | ## Help! 55 | 联系 skyeydemon <skyeydemon@gmail.com> 56 | -------------------------------------------------------------------------------- /ngx-lua-ban/ban.lua: -------------------------------------------------------------------------------- 1 | --记录某个ip最后一次访问的时间戳 2 | local IpLastDict = ngx.shared.IpLastDict 3 | --记录某个ip触犯规则的次数情况 4 | local IpBansDict = ngx.shared.IpBansDict 5 | --ip被封禁的状态,1是被封禁状态 6 | local IpStatusDict = ngx.shared.IpStatusDict 7 | --某个ip在一个时间范围内访问的次数累加 8 | local QsIpDict = ngx.shared.QsIpDict 9 | 10 | --在REQ_IN_TIME秒内做某个ip的query访问次数限制 11 | local REQ_IN_TIME = 1 12 | --在REQ_IN_TIME秒内如果超过REQ_LIMIT次数则触犯规则被封禁BAN_LIMIT_TIME_1秒 13 | local REQ_LIMIT = 4 14 | --如果连续触犯BAN_LIMIT次规则,则封禁BAN_LIMIT_TIME_2秒 15 | local BAN_LIMIT = 4 16 | -- 17 | local BAN_LIMIT_TIME_1 = 3 18 | -- 19 | local BAN_LIMIT_TIME_2 = 10 20 | 21 | --获取访问ip 22 | --local ip = ngx.var.http_x_forwarded_for 23 | local ip = ngx.var.remote_addr 24 | --通过cookie获取userId字段 25 | local uid = ngx.var.cookie_userId 26 | 27 | --当前时间戳 28 | local time = os.time() 29 | local tds = tostring(time) 30 | 31 | --静态资源访问不做限制,以/static/开头 32 | local m, err = ngx.re.match(ngx.var.uri, "^(/static/)*") 33 | if m then 34 | if m[1] then 35 | --ngx.say("static") 36 | return 37 | end 38 | end 39 | 40 | 41 | --将ip最后一次访问时间记录到字典 42 | --IpLastDict:set(ip, tds) 43 | 44 | local Ipbans,_ = IpBansDict:get(ip) 45 | local Ipisban,_ = IpStatusDict:get(ip) 46 | 47 | --如果ip的状态是被禁用状态 48 | if Ipisban then 49 | --ip犯规次数没有超过BAN_LIMIT次 50 | if Ipbans <= BAN_LIMIT then 51 | --ip最后一次访问的时间戳 52 | iplasttime = IpLastDict:get(ip) 53 | --封禁BAN_LIMIT_TIME_1秒才能再访问 54 | if tds - iplasttime <= BAN_LIMIT_TIME_1 then 55 | --将触犯规则次数累加 56 | IpBansDict:incr(ip, 1) 57 | ngx.say("/ban1") 58 | --更新最后一次访问时间 59 | IpLastDict:set(ip, tds) 60 | return 61 | else 62 | ---解禁 63 | --ngx.say("/access4") 64 | --更新最后一次访问时间 65 | IpLastDict:set(ip, tds) 66 | --清除ip被封禁标志 67 | IpStatusDict:delete(ip) 68 | return 69 | end 70 | --ip犯规次数过BAN_LIMIT次会封禁BAN_LIMIT_TIME_2秒 71 | else 72 | iplasttime = IpLastDict:get(ip) 73 | --如果封禁的BAN_LIMIT_TIME_2时间还没有到 74 | if tds - iplasttime <= BAN_LIMIT_TIME_2 then 75 | ngx.say("/ban2") 76 | --更新最后一次访问时间 77 | QsIpDict:delete(ip) 78 | IpLastDict:set(ip, tds) 79 | return 80 | else 81 | --清除ip被封禁标志 82 | IpStatusDict:delete(ip) 83 | --ngx.say("/access3") 84 | --更新最后一次访问时间 85 | IpLastDict:set(ip, tds) 86 | return 87 | end 88 | end 89 | end 90 | 91 | 92 | --更新最后一次访问时间 93 | IpLastDict:set(ip, tds) 94 | --query次数计数 95 | local QsIp,_ = QsIpDict:get(ip) 96 | if QsIp then 97 | --在时间范围内超过REQ_LIMIT次会封禁 98 | if QsIp >= REQ_LIMIT then 99 | --封禁 100 | --将ip触犯规则次数记为1 101 | IpBansDict:set(ip,1) 102 | --将ip的状态改为禁用 103 | IpStatusDict:set(ip,1) 104 | ngx.say("/ban3") 105 | return 106 | else 107 | --ip的query次数+1 108 | QsIpDict:incr(ip, 1) 109 | --ngx.say("/access2") 110 | return 111 | end 112 | else 113 | --首次访问,设置query计数信息,过期时间1s 114 | QsIpDict:set(ip,1,REQ_IN_TIME) 115 | --ngx.say("/access1") 116 | return 117 | end 118 | 119 | --if uid then 120 | ----如果用户登录 121 | -- ngx.exec("/access") 122 | --else 123 | ----用户未登录 124 | -- ngx.exec("/ban") 125 | --end -------------------------------------------------------------------------------- /ngx-lua-stats/README.md: -------------------------------------------------------------------------------- 1 | ## 介绍 2 | 3 | 以前我们为nginx做统计,都是通过对日志的分析来完成.比较麻烦,现在基于ngx_lua插件,以及perf-counter系统.开发了实时统计站点状态的脚本,解放生产力. 4 | 5 | ## 功能 6 | 7 | - 支持分不同虚拟主机统计, 同一个虚拟主机下可以分不同的location统计. 8 | - 可以统计与query-times request-time status-code speed 相关的数据. 9 | - 自带python脚本, 可以保存数据的历史值,方便与各种监控系统对接. 10 | 11 | 12 | ## 环境依赖 13 | 14 | - nginx + ngx_http_lua_module 15 | 16 | ## 安装 17 | 18 | ``` 19 | http://wiki.nginx.org/HttpLuaModule#Installation 20 | ``` 21 | 22 | ## 使用方法 23 | 24 | ### 添加全局字典 25 | 26 | 在nginx的配置中添加dict的初始化, 类似如下 27 | 28 | ``` 29 | lua_shared_dict log_dict 20M; 30 | lua_shared_dict result_dict 20M; 31 | ``` 32 | 33 | ### 为特定的location添加统计 34 | 35 | 只需要添加一句即可~~ 36 | 将lua脚本嵌套进nginx的配置中, 例如: 37 | 38 | ``` 39 | server { 40 | listen 8080; 41 | server_name xxxxx.com; 42 | access_log /home/work/nginx/logs/xxxxx.com.log milog; 43 | 44 | location / { 45 | proxy_set_header Host $host; 46 | proxy_set_header X-Forwarded-For $remote_addr; 47 | proxy_pass http://xxxxx.com_backend; 48 | 49 | log_by_lua_file ./site-enable/record.lua; 50 | } 51 | 52 | location wtr/ { 53 | proxy_set_header Host $host; 54 | proxy_set_header X-Forwarded-For $remote_addr; 55 | proxy_pass http://xxxxx.com_wtr_backend; 56 | 57 | set $xlocation 'wtr'; 58 | log_by_lua_file ./site-enable/record.lua; 59 | } 60 | } 61 | 62 | ``` 63 | 64 | 其中 set $xlocation 'xxx' 用来明确的根据指定的location分组最终的数据. 具体输出可以看后面的介绍. 65 | 66 | 67 | ### 输出结果 68 | 69 | 通过配置一个server, 使得可以通过curl获取到字典里的所有结果 70 | 71 | ``` 72 | server { 73 | listen 8080 default; 74 | server_name _; 75 | 76 | location / { 77 | return 404; 78 | } 79 | 80 | location /status { 81 | content_by_lua_file ./site-enable/output.lua; 82 | } 83 | 84 | location /empty_dict { 85 | content_by_lua_file ./site-enable/empty_dict.lua; 86 | } 87 | } 88 | ``` 89 | 90 | 可以通过如下命令获取 91 | 92 | ``` 93 | curl ip_addr:8080/status 94 | ``` 95 | 96 | ### 清理字典 97 | 运行一段时间之后, 字典会变大. 可以通过如下接口清理 98 | 99 | ``` 100 | curl ip_addr:8080/empty_dict 101 | ``` 102 | 103 | ### 支持的统计数据说明 104 | 105 | 目前支持统计以下数据,返回的原始数据类似于,每一行都是一个json.方便数据处理 106 | 107 | ``` 108 | 109 | -------------------------- 110 | {"app.xxxxx.com__upstream_time_to_192.168.1.162:8088_counter":191.509} 111 | {"app.xxxxx.com__upstream_time_to_192.168.1.162:8088_nb_counter":4633} 112 | {"app.xxxxx.com_sts__status_code_4xx_counter":10140} 113 | {"app.xxxxx.com__query_counter":10140} 114 | {"app.xxxxx.com__request_time_counter":412.432} 115 | {"app.xxxxx.com__upstream_time_counter":401.90899999999} 116 | {"app.xxxxx.com__upstream_time_to_192.168.1.165:8088_counter":210.4} 117 | {"app.xxxxx.com__upstream_time_to_192.168.1.165:8088_nb_counter":5507} 118 | {"app.xxxxx.com__bytes_sent_counter":426873680} 119 | 120 | ``` 121 | 122 | 其中 __ 用来分割虚拟主机(包含prefix)与后面的数据项,便于数据处理. 123 | counter表示此值一直在累加 124 | nb表示次数 125 | 126 | 127 | 可以得到的数据包括: query次数 request_time bytes_sent upstream_time 128 | 其中 upstream_time_10.20.12.49:8250_counter 表示到某个特定后端的upstrea_time耗时累加 129 | upstream_time_10.20.12.49:8250_nb_counter 表示到到某个特定后端的upstrea_time次数累加 130 | 131 | 132 | ## 如何处理数据 133 | 134 | ``` 135 | 因为采集到的数据大多都是counter的累加值,需要将delta值得到. 136 | 自带的perf系列脚本可以将现有的数据存储,计算得到delta值。 137 | fuck_perf.py脚本里面已经将需要计算的值算好了. 138 | 修改fuck_perf.py中tag_string = "cop.xxxx_owt.xxx_pdl.com_srv.l7_idc.sd_grp.pub,grp.pay"以及url = ip + ":8080/status".就可以添加自己想要的数据. 139 | 140 | 有两个概念需要明确一下: 141 | counter -- `具体的值,对应于xperf中的counter的值,类型为COUNTER`.被计算为speed类型, (当前值 - 上次值)/(当前时间-上次时间) 142 | value -- `具体的值,对应于xperf中的counter的值,类型为GAUGE`.原值,上传什么就存储为什么 143 | 144 | 比如 delta(bytes_sent_counter)/delta(query_counter) 得到就是这段时间的http传输速度 145 | delta(upstream_time_10.20.12.49:8250_counter)/delta(upstream_time_10.20.12.49:8250_nb_counter) 得到的就是这个后端upstream_time的平均值 146 | 147 | 148 | ``` 149 | 150 | ## ToDo 151 | 152 | 对于percentile的支持是下一步的重点计划. 153 | 154 | 155 | ## Help! 156 | 联系 skyeydemon <skyeydemon@gmail.com> 157 | -------------------------------------------------------------------------------- /ngx-lua-stats/empty_dict.lua: -------------------------------------------------------------------------------- 1 | ---- 2 | local log_dict = ngx.shared.log_dict 3 | local result_dict = ngx.shared.result_dict 4 | ---- 清空字典 5 | result_dict:flush_all() 6 | log_dict:flush_all() 7 | -------------------------------------------------------------------------------- /ngx-lua-stats/output.lua: -------------------------------------------------------------------------------- 1 | ---- 2 | local log_dict = ngx.shared.log_dict 3 | local result_dict = ngx.shared.result_dict 4 | ---- 将字典中所有的值输出出来 5 | for k,v in pairs(result_dict:get_keys(2048))do 6 | ngx.say("{\"", v,"\":",result_dict:get(v),"}") 7 | end 8 | -------------------------------------------------------------------------------- /ngx-lua-stats/perf/bin/fuck_perf.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python 2 | #-*- coding:utf-8 -*- 3 | import urllib2 4 | import json 5 | import os 6 | import sys 7 | import time 8 | import cPickle as pickle 9 | from hashlib import md5 10 | import traceback 11 | import logging 12 | 13 | BASE_DIR = "/".join(os.path.abspath(__file__).split("/")[0:-2]) 14 | DATA_DIR = BASE_DIR + "/data/" 15 | LOG_DIR = BASE_DIR + "/log/" 16 | CONF_DIR = BASE_DIR + "/conf/" 17 | CONF_FILE = CONF_DIR + "perf_conf.py" 18 | 19 | 20 | if not os.path.isfile(CONF_FILE): 21 | print "%s not found. Example:"%(CONF_FILE) 22 | print ''' 23 | #!/bin/env python\n 24 | #-*- coding:utf-8 -*-\n 25 | 26 | ## tag_string和对应的group\n 27 | tag_group = {"cop.xxxx_owt.xxx_pdl.com_srv.l7_idc.sd,idc.lg_grp.pub,grp.pay":["xxx-l7",":8080/status",":8080"]}\n 28 | ## xperf服务器的地址\n 29 | xperf_uri = "10.0.4.65:8088/ez"''' 30 | sys.exit(0) 31 | 32 | if not os.path.exists(DATA_DIR): 33 | os.system("mkdir -p %s" %DATA_DIR) 34 | if not os.path.exists(LOG_DIR): 35 | os.system("mkdir -p %s" %LOG_DIR) 36 | 37 | ISOTIMEFORMAT = '%Y%m%d' 38 | TODAY = time.strftime(ISOTIMEFORMAT,time.localtime(time.time())) 39 | NOW_M = time.strftime('%M',time.localtime(time.time())) 40 | 41 | log_file = LOG_DIR + "fuck_perf.log." + TODAY 42 | logging.basicConfig(filename=log_file, level=logging.DEBUG, format='%(asctime)s %(filename)s [line:%(lineno)d] %(levelname)s %(message)s') 43 | logger = logging.getLogger() 44 | 45 | ########################################################## 46 | #### 需要配置项目 47 | ## tag_string和对应的group 48 | #tag_group = {"cop.xxxx_owt.xxx_pdl.com_srv.l7_idc.sd,idc.lg_grp.pub,grp.pay":["xxx-l7",":8080/status",":8080"]} 49 | ## xperf服务器的地址 50 | #xperf_uri = "10.0.4.65:8088/ez" 51 | 52 | ########################################################## 53 | 54 | def get_json(url): 55 | 56 | try: 57 | 58 | url = "http://%s"%(url) 59 | ## 用来存储一些信息 60 | status_dict = {} 61 | cal_dict = {} 62 | status_list = [] 63 | site_list = [] 64 | 65 | js = urllib2.urlopen(url).read().split("\n") 66 | for i in js: 67 | if i: 68 | _d = json.loads( i) 69 | for k,v in _d.items(): 70 | ## 如果字典的k v 有空值则直接抛弃 71 | if not k or not v: 72 | pass 73 | else: 74 | tlist = k.split("__") 75 | ## 检查k是否是以 __ 分割的 76 | if len(tlist) == 2: 77 | if tlist[0] not in site_list: 78 | ## 顺便将site项目拿出来,单独放到一个list中 79 | site_list.append( str(tlist[0]) ) 80 | 81 | site = str(tlist[0]) 82 | counter = str(tlist[1]) 83 | value = float(v) 84 | 85 | ## 将数据拆分放入list中 86 | _list = [ site, counter, value ] 87 | status_list.append(_list) 88 | 89 | status_dict.update( {str(k) : float(v)} ) 90 | ## 生成一个比较复杂的字典 91 | if cal_dict.has_key(site): 92 | cal_dict[site].update( {counter : value} ) 93 | else: 94 | cal_dict.update( {site : {counter : value} } ) 95 | 96 | #return status_dict, site_list, status_list, cal_dict 97 | return cal_dict 98 | except Exception, e: 99 | logger.info ("Get %s stauts error. %s" %( url, e) ) 100 | 101 | 102 | def save_cache_by_arith(obj, url, interval): 103 | ## 通过对当前分钟取余数来区分不同的历史数据, 并且将其通过pickle持久化 104 | arithmetic = int(NOW_M) % interval 105 | url_md5 = md5(url).hexdigest() 106 | cache_file = "%s/%s.cache.%s" %(DATA_DIR, url_md5, arithmetic) 107 | logger.info("Save to:%s" %cache_file) 108 | f = open(cache_file, "w+") 109 | pickle.dump(obj, f) 110 | f.close() 111 | 112 | 113 | def load_cache_by_file(file_path): 114 | ## 通过指定文件路径来load一个pickle数据 115 | logger.info("Load from:%s" %file_path) 116 | if os.path.exists(file_path): 117 | f = open(file_path, 'rb') 118 | try: 119 | r = pickle.load(f) 120 | except Exception, e: 121 | logger.info("Load file %s error : %s" %( file_name, e)) 122 | return {} 123 | finally: 124 | f.close() 125 | return r 126 | return {} 127 | 128 | 129 | def cal_delta_dict(dict1 , dict2): 130 | # 将两个字典里面的相同项算出delta并且存在同样结构的字典中输出 131 | # detal是用dict2 减去 dict1 132 | try: 133 | delta_d = {} 134 | for site, c_dict1 in dict1.items(): 135 | delta_d[site] = {} 136 | if not dict2.has_key(site): 137 | pass 138 | c_dict2 = dict2[site] 139 | for counter, value in c_dict1.items(): 140 | if not c_dict2.has_key(counter): 141 | pass 142 | delta_v = c_dict2[counter] - c_dict1[counter] 143 | ## 如果得到的detal小于0则不作为结果 144 | if float(delta_v) < 0: 145 | logger.info("Delta dict %s and %s lt 0! %s - %s = %s" % ( dict1, dict2, c_dict2[counter], c_dict2[counter], delta_v)) 146 | delta_v = 0 147 | else: 148 | delta_d[site].update( {counter:delta_v } ) 149 | logger.info("Delta dict : %s"%(delta_d)) 150 | return delta_d 151 | except Exception, e: 152 | logger.info("Delta dict %s and %s error. %s" %( dict1, dict2, e) ) 153 | return {} 154 | 155 | 156 | def send_counter_by_part( now_dict, delta_dict): 157 | SEND_LIST = [] 158 | ## 有的数据需要发送原值,有的需要将delta进行计算之后发送 159 | ## 先处理现有counter上报的值 160 | COUNTER_TYPE_LIST = ['status_code_4xx_counter','status_code_5xx_counter','query_counter','bytes_sent_counter'] 161 | for site,c_dict_n in now_dict.items(): 162 | for COUNTER_TYPE in COUNTER_TYPE_LIST: 163 | if c_dict_n.has_key(COUNTER_TYPE): 164 | if float(c_dict_n[COUNTER_TYPE]) >0: 165 | counter_name = site + "__" + COUNTER_TYPE 166 | counter_value = int(c_dict_n[COUNTER_TYPE]) 167 | SEND_DATA = ["COUNTER", counter_name, counter_value] 168 | SEND_LIST.append(SEND_DATA) 169 | 170 | ## 处理需要做计算的数据 171 | for site,c_dict_d in delta_dict.items(): 172 | ## 计算speed平均值 173 | if c_dict_d.has_key("query_counter") and c_dict_d.has_key("bytes_sent_counter"): 174 | if float(c_dict_d["query_counter"]) > 0: 175 | speed_avg = c_dict_d["bytes_sent_counter"] / c_dict_d["query_counter"] 176 | if float(speed_avg) > 0: 177 | counter_name = site + "__" + "speed_avg" 178 | SEND_DATA = ["GAUGE", counter_name, speed_avg] 179 | SEND_LIST.append(SEND_DATA) 180 | ## 计算request_time平均值 181 | if c_dict_d.has_key("query_counter") and c_dict_d.has_key("request_time_counter"): 182 | if float(c_dict_d["query_counter"]) >0: 183 | request_time_avg = c_dict_d["request_time_counter"] / c_dict_d["query_counter"] 184 | if float(request_time_avg) > 0: 185 | counter_name = site + "__" + "request_time_avg" 186 | SEND_DATA = ["GAUGE", counter_name, request_time_avg] 187 | SEND_LIST.append(SEND_DATA) 188 | ## 计算upstream_time平均值 189 | if c_dict_d.has_key("query_counter") and c_dict_d.has_key("upstream_time_counter"): 190 | if float(c_dict_d["query_counter"]) >0: 191 | upstream_time_avg = c_dict_d["upstream_time_counter"] / c_dict_d["query_counter"] 192 | if float(upstream_time_avg) > 0: 193 | counter_name = site + "__" + "upstream_time_avg" 194 | SEND_DATA = ["GAUGE", counter_name, upstream_time_avg] 195 | SEND_LIST.append(SEND_DATA) 196 | ## 分别计算每个upstream_time_to_addr的平均值 197 | upstream_to_addr_prefix_list = [] 198 | for counter_name, value in c_dict_d.items(): 199 | if "upstream_time_to" in str(counter_name): 200 | try: 201 | upstream_to_addr_prefix = "_".join(str(counter_name).split("_")[0:-2]) 202 | if upstream_to_addr_prefix not in upstream_to_addr_prefix_list and upstream_to_addr_prefix != "upstream_time_to": 203 | upstream_to_addr_prefix_list.append(upstream_to_addr_prefix) 204 | except Exception, e: 205 | logger.info("Calcul upstream_time_to_addr error. %s . counter_name : %s" %(e, counter_name)) 206 | 207 | 208 | if upstream_to_addr_prefix_list: 209 | for upstream_to_addr_prefix in upstream_to_addr_prefix_list: 210 | nb_var = upstream_to_addr_prefix + "_nb_counter" 211 | cnt_var = upstream_to_addr_prefix + "_counter" 212 | 213 | if c_dict_d.has_key(nb_var) and c_dict_d.has_key(cnt_var): 214 | if float(c_dict_d[nb_var]) >0 and float(c_dict_d[cnt_var]) >0: 215 | counter_name = site + "__" + upstream_to_addr_prefix + "_avg" 216 | counter_value = c_dict_d[cnt_var] / c_dict_d[nb_var] 217 | SEND_DATA = ["GAUGE", counter_name, counter_value] 218 | SEND_LIST.append(SEND_DATA) 219 | 220 | return SEND_LIST 221 | 222 | 223 | def send_data_to_fuck(group, endpoint, SEND_LIST): 224 | ## 将数据发送到perf-counter 225 | for _list in SEND_LIST: 226 | if _list[0] == "GAUGE": 227 | counter_type = "value" 228 | elif _list[0] == "COUNTER": 229 | counter_type = "count" 230 | counter_name = _list[1] 231 | value = _list[2] 232 | cmd = '''curl -d "group=%s&stat=%s&email=%s&%s=%s" http://10.0.4.65:8088/ez''' \ 233 | %(group, counter_name, endpoint, counter_type, value) 234 | r = os.popen(cmd).read() 235 | #r = os.system(cmd) 236 | logger.info("Send data to perf-counter. CMD: %s. Result : %s" %(cmd, r)) 237 | 238 | 239 | def get_one(url): 240 | cal_dict = get_json(url) 241 | logger.info( "Get data from %s : %s" %(url, cal_dict)) 242 | if not cal_dict: 243 | logger.info( "Fail get data from %s." %(url)) 244 | return {} , {} 245 | 246 | ## 以5分钟存储为粒度,取余数之后得到上一次需要的数据位置 247 | interval = 5 248 | url_md5 = md5(url).hexdigest() 249 | arithmetic = int(NOW_M) % interval 250 | 251 | if arithmetic == 0: 252 | last_cache_file = "%s/%s.cache." %(DATA_DIR, url_md5) + "4" 253 | elif arithmetic == 1: 254 | last_cache_file = "%s/%s.cache." %(DATA_DIR, url_md5) + "0" 255 | elif arithmetic == 2: 256 | last_cache_file = "%s/%s.cache." %(DATA_DIR, url_md5) + "1" 257 | elif arithmetic == 3: 258 | last_cache_file = "%s/%s.cache." %(DATA_DIR, url_md5) + "2" 259 | else: 260 | last_cache_file = "%s/%s.cache." %(DATA_DIR, url_md5) + "3" 261 | 262 | ## 将本次数据持久化 263 | save_cache_by_arith(cal_dict, url, interval) 264 | ## 如果上次数据不存在则退出 265 | if not os.path.exists(last_cache_file): 266 | logger.info( "Last data : %s not exist." %(last_cache_file)) 267 | return {} , cal_dict 268 | 269 | ## 如果存在上一次数据,则进行计算 270 | last_data = load_cache_by_file(last_cache_file) 271 | logger.info( "Last data %s exist . %s " %( last_cache_file, last_data)) 272 | return last_data, cal_dict 273 | 274 | 275 | def for_one( url, endpoint, group): 276 | 277 | try: 278 | ## last_data, cal_dict = get_one("10.20.12.57:8080/status") 279 | last_data, cal_dict = get_one(url) 280 | 281 | if not last_data: 282 | return 283 | 284 | delta_dict = cal_delta_dict(last_data, cal_dict) 285 | 286 | SEND_LIST = send_counter_by_part( cal_dict, delta_dict) 287 | if not SEND_LIST: 288 | return 289 | 290 | send_data_to_fuck(group, endpoint, SEND_LIST) 291 | 292 | except Exception, e: 293 | logger.info("Something looks like fucked. %s" %(e) ) 294 | 295 | 296 | def main(): 297 | try: 298 | from lhip import lhip 299 | sys.path.insert(0,CONF_DIR) 300 | from perf_conf import tag_group, xperf_uri 301 | 302 | for k,v in tag_group.items(): 303 | tag_string = k 304 | group = v[0] 305 | target_prefix = v[1] 306 | endpoint_prefix = v[2] 307 | ip_list = lhip("x", tag_string) 308 | for ip in ip_list: 309 | url = ip + target_prefix 310 | endpoint = ip + endpoint_prefix 311 | for_one( url, endpoint, group) 312 | except Exception, e: 313 | logger.info("Something looks like fucked. %s" %(e) ) 314 | 315 | 316 | if __name__ == '__main__': 317 | main() 318 | -------------------------------------------------------------------------------- /ngx-lua-stats/perf/bin/lhlib.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #-*- encoding:utf-8 3 | 4 | import socket 5 | 6 | def get_ip_by_host(host): 7 | ip = None 8 | try: 9 | r = socket.getaddrinfo(host.strip(),None) 10 | ip = r[0][4][0] 11 | except socket.gaierror: 12 | pass 13 | return ip 14 | 15 | if __name__ == '__main__': 16 | print get_ip_by_host("") 17 | -------------------------------------------------------------------------------- /ngx-lua-stats/perf/bin/perf-create.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python 2 | 3 | import urllib2, urllib 4 | import json, sys, time, re, os 5 | from lhip import * 6 | from perf_log import * 7 | 8 | HTTP_404_ERROR = 'HTTP Error 404: Not Found' 9 | CREATE_STR = """echo "create {endpoint} {counter} `date '+%s'` {value} COUNTER" | nc perfcounter.miliao.srv 4444""" 10 | 11 | class HostData(): 12 | def __init__(self, host_lst = []): 13 | self.host_lst = host_lst 14 | self.index = 0 15 | 16 | def get_json(self, endpoint): 17 | js, js_, url = None, {}, '''http://%s/status ''' %(endpoint) 18 | try: 19 | js = urllib2.urlopen(url).read() 20 | js_ = json.loads(js) 21 | except Exception, e: 22 | if str(e) == HTTP_404_ERROR: 23 | js_ = None 24 | else: 25 | if js != None: 26 | for i in js.split('key: '): 27 | if i != '': 28 | k, v, nil = i.split('\n') 29 | js_[k] = v 30 | else: 31 | js_ = None 32 | logger.info("[%s] Receive %s from host %s", __file__, str(js_), \ 33 | self.host_lst[self.index]) 34 | 35 | return endpoint, js_ 36 | 37 | def __iter__(self): 38 | return self 39 | 40 | def next(self): 41 | try: 42 | endpoint, res = self.get_json(self.host_lst[self.index]) 43 | except IndexError: 44 | raise StopIteration 45 | self.index += 1 46 | return endpoint, res 47 | 48 | 49 | if __name__ == "__main__": 50 | hd = HostData(main()) 51 | 52 | for e, res in hd: 53 | if res is not None: 54 | for k, v in res.items(): 55 | # print PUSH_STR.format(endpoint = e, counter = k, value = v) 56 | result = os.system(CREATE_STR.format(endpoint = e, counter = k, value = v)) 57 | logger.info("[%s] Exec %s and result %s", __file__,\ 58 | CREATE_STR.format(endpoint = e, counter = k, value = v), result) 59 | -------------------------------------------------------------------------------- /ngx-lua-stats/perf/conf/perf_conf.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python 2 | #-*- coding:utf-8 -*- 3 | 4 | ## tag_string和对应的group 5 | tag_group = {"cop.xxxx_owt.xxx_pdl.com_srv.l7_idc.sd,idc.lg_grp.pub,grp.pay":["nginx-l7",":8080/status",":8080"]} 6 | ## xperf服务器的地址 7 | xperf_uri = "10.0.4.65:8088/ez" 8 | -------------------------------------------------------------------------------- /ngx-lua-stats/perf/log/README: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skyeydemon/ngx-lua-practise/a83cab03fd926244a868a1af83d3140b06088cae/ngx-lua-stats/perf/log/README -------------------------------------------------------------------------------- /ngx-lua-stats/record.lua: -------------------------------------------------------------------------------- 1 | ---- log_dict做临时记录用 result_dict记录最终需要采集的数据 2 | local log_dict = ngx.shared.log_dict 3 | local result_dict = ngx.shared.result_dict 4 | 5 | 6 | ---- 用server 和 location来作为标示. localtion通过对uri的处理拿到 7 | local server_name = ngx.var.server_name 8 | 9 | 10 | -- var_prefix是每个站点+location的标示, 以__作为分界, 便于数据处理 11 | local var_prefix = server_name 12 | if ngx.var.xlocation ~= "" or ngx.var.xlocation ~= nil then 13 | var_prefix = var_prefix.."_"..ngx.var.xlocation.."_" 14 | else 15 | var_prefix = var_prefix.."_" 16 | end 17 | 18 | 19 | ---- 状态码统计, 4xx, 5xx, counter 20 | -- 在这里直接return 21 | local status_code = tonumber(ngx.var.status) 22 | status_code_4xx_nb_var = var_prefix.."_status_code_4xx_counter" 23 | status_code_5xx_nb_var = var_prefix.."_status_code_5xx_counter" 24 | 25 | local status_code_4xx_nb = result_dict:get(status_code_4xx_nb_var) or 0 26 | if status_code >= 400 and status_code < 500 then 27 | local newval, err = result_dict:incr(status_code_4xx_nb_var, 1) 28 | if not newval and err == "not found" then 29 | result_dict:add(status_code_4xx_nb_var, 0) 30 | result_dict:incr(status_code_4xx_nb_var, 1) 31 | end 32 | return 33 | end 34 | 35 | local status_code_5xx_nb = result_dict:get(status_code_5xx_nb_var ) or 0 36 | if status_code >= 500 then 37 | local newval, err = result_dict:incr(status_code_5xx_nb_var, 1) 38 | if not newval and err == "not found" then 39 | result_dict:add(status_code_5xx_nb_var, 0) 40 | result_dict:incr(status_code_5xx_nb_var, 1) 41 | end 42 | return 43 | end 44 | 45 | 46 | ---- 请求次数统计, counter 47 | query_nb_var = var_prefix.."_query_counter" 48 | 49 | local newval, err = result_dict:incr(query_nb_var, 1) 50 | if not newval and err == "not found" then 51 | result_dict:add(query_nb_var, 0) 52 | result_dict:incr(query_nb_var, 1) 53 | end 54 | 55 | 56 | ---- request_time统计, counter 57 | request_time_var = var_prefix.."_request_time_counter" 58 | 59 | local request_time = tonumber(ngx.var.request_time) 60 | -- 如果获取不到值,则直接退出 61 | if not request_time then 62 | return 63 | end 64 | 65 | local sum = result_dict:get(request_time_var) or 0 66 | sum = sum + request_time 67 | result_dict:set(request_time_var, sum) 68 | 69 | 70 | ---- upstream_time统计, counter 71 | upstream_time_var = var_prefix.."_upstream_time_counter" 72 | 73 | local upstream_time = tonumber(ngx.var.upstream_response_time) 74 | -- 如果获取不到值,则直接退出 75 | if not upstream_time then 76 | return 77 | end 78 | 79 | local sum = result_dict:get(upstream_time_var) or 0 80 | sum = sum + upstream_time 81 | result_dict:set(upstream_time_var, sum) 82 | 83 | 84 | ---- bytes_sent统计, counter 85 | bytes_sent_var = var_prefix.."_bytes_sent_counter" 86 | 87 | local bytes_sent = tonumber(ngx.var.bytes_sent) 88 | 89 | if not bytes_sent then 90 | return 91 | end 92 | 93 | local sum = result_dict:get(bytes_sent_var) or 0 94 | sum = sum + bytes_sent 95 | result_dict:set(bytes_sent_var, sum) 96 | 97 | 98 | ---- upstream_time_to_addr统计, counter 99 | local upstream_addr = ngx.var.upstream_addr 100 | upstream_time_to_addr_var = var_prefix.."_upstream_time_to_"..upstream_addr.."_counter" 101 | 102 | local upstream_time_to_addr = tonumber(ngx.var.upstream_response_time) 103 | 104 | local sum = result_dict:get(upstream_time_to_addr_var) or 0 105 | sum = sum + upstream_time_to_addr 106 | result_dict:set(upstream_time_to_addr_var, sum) 107 | 108 | 109 | -- upstream_time_addr记录query次数的累加器, counter 110 | upstream_time_to_addr_nb_var = var_prefix.."_upstream_time_to_"..upstream_addr.."_nb_counter" 111 | 112 | local newval, err = result_dict:incr(upstream_time_to_addr_nb_var, 1) 113 | if not newval and err == "not found" then 114 | result_dict:add(upstream_time_to_addr_nb_var, 0) 115 | result_dict:incr(upstream_time_to_addr_nb_var, 1) 116 | end 117 | 118 | 119 | ---- bytes_sent累加, 便于做speed统计, counter 120 | bytes_sent_var = var_prefix.."_bytes_sent_counter" 121 | 122 | local bytes_sent = tonumber(ngx.var.bytes_sent) 123 | 124 | local sum = result_dict:get(bytes_sent_var) or 0 125 | sum = sum + bytes_sent 126 | result_dict:set(bytes_sent_var, sum) 127 | 128 | 129 | -------------------------------------------------------------------------------- /ngx-lua-stats/update_l7.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ####用来更新线上l7的ngx-lua-stats脚本 3 | ####适用于/home/work/nginx/site-enable/目录结构 4 | 5 | set -x 6 | set -e 7 | 8 | mkdir -p /home/work/nginx/site-enable/ 9 | rm -rf /home/work/nginx/site-enable/empty_dict.lua /home/work/nginx/site-enable/output.lua /home/work/nginx/site-enable/record.lua 10 | cp empty_dict.lua output.lua record.lua /home/work/nginx/site-enable/ 11 | --------------------------------------------------------------------------------