├── .gitignore ├── README.md ├── dopey.example.yaml ├── dopey.py ├── makefile ├── requirements.txt ├── test ├── confirm-test.py └── make-test-indices.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | *.pyc 3 | *.swo 4 | *.swp 5 | .ropeproject/ 6 | dopey.yaml 7 | env/ 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 改版了一下,批量处理, 现在快多了~很适合大集群~~ 2 | 3 | # dopey 4 | ES索引的维护脚本, 每天close freeze delete reallocate optimize索引 5 | 6 | # 依赖 7 | click==3.3 8 | elasticsearch==1.3.0 9 | elasticsearch-curator==3.2.1 10 | PyYAML==3.11 11 | urllib3==1.10 12 | wheel==0.24.0 13 | 14 | # 使用 15 | dopey.py -c dopey.yaml -l /var/log/dopey.log --level debug 16 | 17 | ## advanced 18 | dopey.py -c dopey.yaml --base-day -1 #以昨天做为基准日期计算 19 | 20 | dopey.py -c dopey.yaml --base-day 1 #以明天做为基准日期计算 21 | 22 | dopey.py -c dopey.yaml --base-day 2016-11-11 #以指定日期做为基准日期计算 23 | 24 | dopey.py -c dopey.yaml --action-filters u,c #只做update setting, close index. 一共有4种操作, d:delete, c:close, u:update settings,f:force merge. 不加这个参数代表全部都可以执行. 25 | 26 | dopey.py --help 27 | 28 | ## 下面这样可以实现: 按月建的索引, 在34天后删除, 按天建的索引, 2天后删除 29 | 30 | ``` 31 | .*-(?=\d{4}\.\d{2}$): 32 | - delete_indices: 33 | days: 34 34 | .*-(?=\d{4}\.\d{2}\.\d{2}$): 35 | - delete_indices: 36 | days: 2 37 | ``` 38 | 39 | 自定义索引名字中的日期部分: 40 | 41 | ``` 42 | test-(?P%Y%m%d%H)00-\d: 43 | - delete_indices: 44 | hours: 6 45 | ``` 46 | -------------------------------------------------------------------------------- /dopey.example.yaml: -------------------------------------------------------------------------------- 1 | #log: dopey.log 2 | eshost: http://user:secret@127.0.0.1:9200 3 | batch: 10 4 | sumary: 5 | log: null 6 | prints: null 7 | mail: 8 | mail_host: "mail.corp.com" 9 | from_who: "jia.liu@corp.com" 10 | to_list: ["jia.liu@corp.com"] 11 | login_user: 'admin' 12 | login_password: '123456' 13 | 14 | setup: 15 | - update_cluster_settings: 16 | transient: 17 | cluster: 18 | routing: 19 | rebalance.enable: none 20 | 21 | teardown: 22 | - update_cluster_settings: 23 | transient: 24 | cluster: 25 | routing: 26 | rebalance.enable: all 27 | indices: 28 | test1-: 29 | - delete_indices: 30 | days: 10 # <=10 days 31 | - close_indices: 32 | days: 7 # <= 7 days 33 | - freeze_indices: 34 | days: 3-6 35 | - optimize_indices: 36 | day: 2 37 | settings: 38 | flush: 'true' 39 | max_num_segments: 1 40 | - update_settings: 41 | day: 1 # == 1 day 42 | settings: 43 | index: 44 | routing: 45 | allocation: 46 | require: 47 | boxtype: "weak" 48 | number_of_replicas: "1" 49 | refresh_interval: "30s" 50 | test2-: 51 | - delete_indices: 52 | days: 10 # <=10 days 53 | - close_indices: 54 | days: 7 # <= 7 days 55 | - freeze_indices: 56 | days: 3-6 57 | - optimize_indices: 58 | day: 2 59 | settings: 60 | flush: 'true' 61 | max_num_segments: 2 62 | - update_settings: 63 | day: 1 # == 1 day 64 | settings: 65 | index: 66 | routing: 67 | allocation: 68 | require: 69 | boxtype: "weak" 70 | number_of_replicas: "1" 71 | refresh_interval: "30s" 72 | 73 | #按月建的索引, 在34天后删除, 按天建的索引, 8天后删除 74 | .*-(?=\d{4}\.\d{2}$): 75 | - delete_indices: 76 | days: 34 77 | 78 | .*-(?=\d{4}\.\d{2}\.\d{2}$): 79 | - delete_indices: 80 | days: 8 81 | 82 | test-(?P%Y%m%d%H)00-\d: 83 | - delete_indices: 84 | hours: 6 85 | - freeze_indices: 86 | hours: 4 87 | -------------------------------------------------------------------------------- /dopey.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import yaml 5 | import requests 6 | 7 | import json 8 | import datetime 9 | import argparse 10 | import smtplib 11 | from email.mime.text import MIMEText 12 | import logging.handlers 13 | import logging 14 | import logging.config 15 | 16 | import utils 17 | 18 | config = {} 19 | 20 | 21 | def initlog(level=None, log="-", disable_existing_loggers=True): 22 | if level is None: 23 | level = logging.DEBUG if __debug__ else logging.INFO 24 | if isinstance(level, basestring): 25 | if '|' in level: 26 | level, d = level.split('|') 27 | if 'FALSE'.startswith(d.upper()): 28 | disable_existing_loggers = False 29 | level = getattr(logging, level.upper()) 30 | 31 | class MyFormatter(logging.Formatter): 32 | 33 | def format(self, record): 34 | dformatter = '[%(asctime)s] %(name)s %(levelname)s %(pathname)s %(lineno)d [%(funcName)s] %(message)s' 35 | formatter = '[%(asctime)s] %(levelname)s %(name)s %(message)s' 36 | if record.levelno <= logging.DEBUG: 37 | self._fmt = dformatter 38 | else: 39 | self._fmt = formatter 40 | return super(MyFormatter, self).format(record) 41 | 42 | config = { 43 | "version": 1, 44 | "disable_existing_loggers": disable_existing_loggers, 45 | "formatters": { 46 | "custom": { 47 | '()': MyFormatter 48 | }, 49 | "simple": { 50 | "format": "%(asctime)s %(name)s %(levelname)s %(message)s" 51 | }, 52 | "verbose": { 53 | "format": "[%(asctime)s] %(name)s %(levelname)s %(pathname)s %(lineno)d [%(funcName)s] %(message)s" 54 | } 55 | }, 56 | "handlers": { 57 | }, 58 | 'root': { 59 | 'level': level, 60 | 'handlers': ['console'] 61 | } 62 | } 63 | console = { 64 | "class": "logging.StreamHandler", 65 | "level": "DEBUG", 66 | "formatter": "verbose", 67 | "stream": "ext://sys.stdout" 68 | } 69 | file_handler = { 70 | "class": "logging.handlers.RotatingFileHandler", 71 | "level": "DEBUG", 72 | "formatter": "verbose", 73 | "filename": log, 74 | "maxBytes": 10*1000**2, # 10M 75 | "backupCount": 5, 76 | "encoding": "utf8" 77 | } 78 | if log == "-": 79 | config["handlers"]["console"] = console 80 | config["root"]["handlers"] = ["console"] 81 | else: 82 | config["handlers"]["file_handler"] = file_handler 83 | config["root"]["handlers"] = ["file_handler"] 84 | logging.config.dictConfig(config) 85 | # end initlog 86 | 87 | 88 | class Sumary(object): 89 | 90 | def __init__(self): 91 | super(Sumary, self).__init__() 92 | self.records = [] 93 | 94 | def add(self, record): 95 | self.records.append( 96 | "[%s] %s" % 97 | (datetime.datetime.now().strftime("%Y.%m.%d %H:%M:%S"), record)) 98 | 99 | @property 100 | def sumary(self): 101 | return "\n".join(self.records) 102 | 103 | def prints(self): 104 | print self.sumary.encode("utf-8") 105 | 106 | def log(self): 107 | logging.getLogger("DopeySumary").info(self.sumary) 108 | 109 | def mail( 110 | self, 111 | mail_host=None, 112 | from_who=None, 113 | to_list=None, 114 | login_user=None, 115 | login_password=None, 116 | sub="dopey summary"): 117 | content = self.sumary 118 | content = content.encode("utf-8") 119 | 120 | msg = MIMEText(content) 121 | msg["Subject"] = sub 122 | msg["From"] = from_who 123 | msg["To"] = ";".join(to_list) 124 | try: 125 | s = smtplib.SMTP() 126 | s.connect(mail_host) 127 | if login_user is not None: 128 | s.login(login_user, login_password) 129 | s.sendmail(from_who, to_list, msg.as_string()) 130 | s.close() 131 | except Exception as e: 132 | logging.error(str(e)) 133 | 134 | 135 | dopey_summary = Sumary() 136 | 137 | _delete = [] 138 | _close = [] 139 | _optimize = [] 140 | _dealt = [] 141 | _update_settings = [] 142 | 143 | 144 | def update_cluster_settings(settings): 145 | """ 146 | :type settings: cluster settings 147 | :rtype: response 148 | """ 149 | global config 150 | logging.info("update cluster settings: %s" % settings) 151 | try: 152 | url = u"{}/_cluster/settings".format(config["eshost"]) 153 | r = requests.put( 154 | url, data=json.dumps(settings), params={ 155 | "master_timeout": "300s"}, headers={"content-type": "application/json"}) 156 | return r.ok 157 | except Exception as e: 158 | logging.error("failed to update cluster settings. %s" % e) 159 | return False 160 | 161 | 162 | def _get_base_day(base_day): 163 | try: 164 | int(base_day) 165 | except BaseException: 166 | return datetime.datetime.strptime(base_day, r"%Y-%m-%d") 167 | else: 168 | return datetime.datetime.now() + datetime.timedelta(int(base_day)) 169 | 170 | 171 | def _get_action_filters(action_filters_arg): 172 | action_filters_mapping = { 173 | "c": "close_indices", 174 | "d": "delete_indices", 175 | "u": "update_settings", 176 | "f": "optimize_indices", 177 | "fr": "freeze_indices", 178 | } 179 | if action_filters_arg == "": 180 | return action_filters_mapping.values() 181 | try: 182 | return [action_filters_mapping[k] 183 | for k in action_filters_arg.split(",")] 184 | except BaseException: 185 | raise Exception("unrecognizable action filters") 186 | 187 | 188 | def pre_process_index_config(index_config): 189 | """ 190 | type index_config: list[{}] 191 | """ 192 | action_weight = { 193 | "update_settings": 0, 194 | "delete_indices": 1, 195 | "close_indices": 2, 196 | "optimize_indices": 4, 197 | } 198 | index_config.sort(key=lambda x: action_weight[x.keys()[0]]) 199 | return index_config 200 | 201 | 202 | def main(): 203 | global logging 204 | 205 | parser = argparse.ArgumentParser() 206 | parser.add_argument("-c", default="dopey.yaml", help="yaml config file") 207 | parser.add_argument("--eshost", default="", help="eshost here will overwrite that in config file") 208 | parser.add_argument( 209 | "--base-day", default="0", 210 | help="number 0(today), 1(tommorow), -1(yestoday), or string line 2011-11-11") 211 | parser.add_argument( 212 | "--action-filters", 213 | default="", 214 | help="comma splited. d:delete, c:close, u:update settings, f:forcemerge, fr:freeze. \ 215 | leaving blank means do all the actions configuared in config file") 216 | parser.add_argument( 217 | "-l", 218 | default="-", 219 | help="log file") 220 | parser.add_argument("--level", default="info") 221 | args = parser.parse_args() 222 | 223 | global config 224 | config = yaml.load(open(args.c)) 225 | if args.eshost: 226 | config['eshost'] = args.eshost 227 | 228 | initlog(level=args.level, log=config["l"] if "log" in config else args.l) 229 | 230 | all_indices = utils.get_indices(config['eshost']) 231 | 232 | logging.debug(u"all_indices: {}".format(all_indices)) 233 | 234 | for action in config.get("setup", []): 235 | settings = action.values()[0] 236 | eval(action.keys()[0])(settings) 237 | 238 | base_day = _get_base_day(args.base_day) 239 | logging.info("base day is %s" % base_day) 240 | action_filters = _get_action_filters(args.action_filters) 241 | 242 | if 'delete_indices' in action_filters: 243 | to_delete_indices = utils.get_to_delete_indices( 244 | config, all_indices, base_day) 245 | logging.info('try to delete `{}`'.format(' '.join(e[0] for e in to_delete_indices))) 246 | utils.delete_indices(config, to_delete_indices) 247 | 248 | if 'close_indices' in action_filters: 249 | to_close_indices = utils.get_to_close_indices( 250 | config, all_indices, base_day) 251 | logging.info('try to close `{}`'.format(' '.join(e[0] for e in to_close_indices))) 252 | utils.close_indices(config, to_close_indices) 253 | 254 | if 'freeze_indices' in action_filters: 255 | to_freeze_indices = utils.get_to_freeze_indices( 256 | config, all_indices, base_day) 257 | logging.info('try to freeze `{}`'.format(' '.join(e[0] for e in to_freeze_indices))) 258 | utils.freeze_indices(config, to_freeze_indices) 259 | 260 | if 'update_settings' in action_filters: 261 | to_update_indices = utils.get_to_update_indices( 262 | config, all_indices, base_day) 263 | logging.info('(before settings diff filter)try to update `{}`'.format(' '.join(e[0] for e in to_update_indices))) 264 | utils.update_settings(config, to_update_indices) 265 | 266 | if 'optimize_indices' in action_filters: 267 | to_optimize_indices = utils.get_to_optimize_indices( 268 | config, all_indices, base_day) 269 | logging.info('try to forcemerge `{}`'.format(' '.join(e[0] for e in to_optimize_indices))) 270 | utils.optimize_indices(config, to_optimize_indices) 271 | 272 | # dopey_summary.add( 273 | # u"未处理:\n{}\n删除:\n{}\n关闭:\n{}\n优化:{}\n更新索配置:{}".format( 274 | # "\n".join(sorted(not_dealt)), 275 | # "\n".join(sorted(_delete)), 276 | # "\n".join(sorted(_close)), 277 | # "\n".join(sorted(_optimize)), 278 | # "\n".join(sorted(_update_settings)))) 279 | 280 | for action in config.get("teardown", []): 281 | settings = action.values()[0] 282 | eval(action.keys()[0])(settings) 283 | 284 | sumary_config = config.get("sumary") 285 | for action, kargs in sumary_config.items(): 286 | if kargs: 287 | getattr(dopey_summary, action)(**kargs) 288 | else: 289 | getattr(dopey_summary, action)() 290 | 291 | 292 | if __name__ == "__main__": 293 | main() 294 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | test: 2 | python make-test-indices.py 3 | python dopey.py -c dopey.example.yml 4 | python confirm-test.py 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | PyYAML>=4.2b1 2 | requests>=2.20.0 3 | -------------------------------------------------------------------------------- /test/confirm-test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import requests 5 | 6 | import json 7 | import datetime 8 | import argparse 9 | import logging 10 | import logging.config 11 | 12 | def initlog(level=None, log="-"): 13 | if level is None: 14 | level = logging.DEBUG if __debug__ else logging.INFO 15 | if isinstance(level, basestring): 16 | level = getattr(logging, level.upper()) 17 | 18 | class MyFormatter(logging.Formatter): 19 | 20 | def format(self, record): 21 | dformatter = '[%(asctime)s] %(name)s %(levelname)s %(pathname)s %(lineno)d [%(funcName)s] %(message)s' 22 | formatter = '[%(asctime)s] %(levelname)s %(name)s %(message)s' 23 | if record.levelno <= logging.DEBUG: 24 | self._fmt = dformatter 25 | else: 26 | self._fmt = formatter 27 | return super(MyFormatter, self).format(record) 28 | 29 | config = { 30 | "version": 1, 31 | "disable_existing_loggers": True, 32 | "formatters": { 33 | "custom": { 34 | '()': MyFormatter 35 | }, 36 | "simple": { 37 | "format": "%(asctime)s %(name)s %(levelname)s %(message)s" 38 | }, 39 | "verbose": { 40 | "format": "[%(asctime)s] %(name)s %(levelname)s %(pathname)s %(lineno)d [%(funcName)s] %(message)s" 41 | } 42 | }, 43 | "handlers": { 44 | }, 45 | 'root': { 46 | 'level': level, 47 | 'handlers': ['console'] 48 | } 49 | } 50 | console = { 51 | "class": "logging.StreamHandler", 52 | "level": "DEBUG", 53 | "formatter": "verbose", 54 | "stream": "ext://sys.stdout" 55 | } 56 | file_handler = { 57 | "class": "logging.handlers.RotatingFileHandler", 58 | "level": "DEBUG", 59 | "formatter": "verbose", 60 | "filename": log, 61 | "maxBytes": 10*1000**2, # 10M 62 | "backupCount": 5, 63 | "encoding": "utf8" 64 | } 65 | if log == "-": 66 | config["handlers"]["console"] = console 67 | config["root"]["handlers"] = ["console"] 68 | else: 69 | config["handlers"]["file_handler"] = file_handler 70 | config["root"]["handlers"] = ["file_handler"] 71 | logging.config.dictConfig(config) 72 | # end initlog 73 | 74 | 75 | def main(): 76 | parser = argparse.ArgumentParser() 77 | parser.add_argument("-l", default="-", help="log file") 78 | parser.add_argument("--level", default="info") 79 | parser.add_argument("--eshost") 80 | args = parser.parse_args() 81 | 82 | initlog(level=args.level, log=args.l) 83 | 84 | url = u'{}/_cat/indices?h=status,index,pri,rep,docs.count,pri.store.size,sc'.format(args.eshost) 85 | logging.debug(url) 86 | 87 | r = requests.get(url) 88 | logging.debug(r) 89 | 90 | all_indices = {} 91 | 92 | for l in [e.strip() for e in r.text.split('\n') if e.strip()]: 93 | if 'close' in l: 94 | status, index = l.split() 95 | all_indices[index] = {'status': status} 96 | else: 97 | status, index, pri, rep, docs_count, pri_store_size, sc = l.split() 98 | all_indices[index] = {'status': status, 'pri': pri, 'rep': rep, 99 | 'docs_count': docs_count, 'pri_store_size': pri_store_size, 'sc': sc} 100 | logging.info(json.dumps(all_indices, indent=2)) 101 | 102 | now = datetime.datetime.now() 103 | 104 | # test1-YYYY-mm-dd 105 | # delete 106 | for i in range(10, 15): 107 | date = now - datetime.timedelta(i) 108 | indexname = u'test1-{}'.format(date.strftime("%Y-%m-%d")) 109 | assert indexname not in all_indices 110 | # close 111 | for i in range(7, 10): 112 | date = now - datetime.timedelta(i) 113 | indexname = u'test1-{}'.format(date.strftime("%Y-%m-%d")) 114 | assert all_indices[indexname]['status'] == 'close' 115 | # freeze 116 | for i in range(3, 7): 117 | date = now - datetime.timedelta(i) 118 | indexname = u'test1-{}'.format(date.strftime("%Y-%m-%d")) 119 | assert all_indices[indexname]['sc'] == '0' 120 | for i in range(3): 121 | date = now - datetime.timedelta(i) 122 | indexname = u'test1-{}'.format(date.strftime("%Y-%m-%d")) 123 | assert all_indices[indexname]['sc'] != '0' 124 | 125 | # update settings 126 | date = now - datetime.timedelta(1) 127 | indexname = u'test1-{}'.format(date.strftime("%Y-%m-%d")) 128 | url = u'{}/{}/_settings'.format(args.eshost, indexname) 129 | logging.debug(url) 130 | settings = requests.get(url).json()[indexname]['settings'] 131 | logging.debug(settings) 132 | assert str(settings['index']['number_of_replicas']) == '1' 133 | assert settings['index']['refresh_interval'] == '30s' 134 | assert settings['index']['routing']['allocation']['require']['boxtype'] == 'weak' 135 | # merge 136 | date = now - datetime.timedelta(2) 137 | indexname = u'test1-{}'.format(date.strftime("%Y-%m-%d")) 138 | assert int(all_indices[indexname]['sc']) <= 1 139 | 140 | logging.info('test1-YYYY-mm-dd passed') 141 | 142 | # test1-YYYY.mm.dd 143 | # delete 144 | for i in range(8, 15): 145 | date = now - datetime.timedelta(i) 146 | indexname = u'test1-{}'.format(date.strftime("%Y.%m.%d")) 147 | assert indexname not in all_indices 148 | # close 149 | for i in range(7, 8): 150 | date = now - datetime.timedelta(i) 151 | indexname = u'test1-{}'.format(date.strftime("%Y.%m.%d")) 152 | assert all_indices[indexname]['status'] == 'close' 153 | # update settings 154 | date = now - datetime.timedelta(1) 155 | indexname = u'test1-{}'.format(date.strftime("%Y.%m.%d")) 156 | url = u'{}/{}/_settings'.format(args.eshost, indexname) 157 | logging.debug(url) 158 | settings = requests.get(url).json()[indexname]['settings'] 159 | logging.debug(settings) 160 | assert str(settings['index']['number_of_replicas']) == '1' 161 | assert settings['index']['refresh_interval'] == '30s' 162 | assert settings['index']['routing']['allocation']['require']['boxtype'] == 'weak' 163 | # merge 164 | date = now - datetime.timedelta(2) 165 | indexname = u'test1-{}'.format(date.strftime("%Y.%m.%d")) 166 | assert int(all_indices[indexname]['sc']) <= 1 167 | logging.info('test1-YYYY.mm.dd passed') 168 | 169 | # test2-YYYY.mm.dd 170 | # delete 171 | for i in range(8, 15): 172 | date = now - datetime.timedelta(i) 173 | indexname = u'test2-{}'.format(date.strftime("%Y.%m.%d")) 174 | assert indexname not in all_indices 175 | # close 176 | for i in range(7, 8): 177 | date = now - datetime.timedelta(i) 178 | indexname = u'test2-{}'.format(date.strftime("%Y.%m.%d")) 179 | assert all_indices[indexname]['status'] == 'close' 180 | # freeze 181 | for i in range(3, 7): 182 | date = now - datetime.timedelta(i) 183 | indexname = u'test2-{}'.format(date.strftime("%Y.%m.%d")) 184 | assert all_indices[indexname]['sc'] == '0' 185 | for i in range(3): 186 | date = now - datetime.timedelta(i) 187 | indexname = u'test2-{}'.format(date.strftime("%Y.%m.%d")) 188 | assert all_indices[indexname]['sc'] != '0' 189 | # update settings 190 | date = now - datetime.timedelta(1) 191 | indexname = u'test2-{}'.format(date.strftime("%Y.%m.%d")) 192 | url = u'{}/{}/_settings'.format(args.eshost, indexname) 193 | logging.debug(url) 194 | settings = requests.get(url).json()[indexname]['settings'] 195 | logging.debug(settings) 196 | assert str(settings['index']['number_of_replicas']) == '1' 197 | assert settings['index']['refresh_interval'] == '30s' 198 | assert settings['index']['routing']['allocation']['require']['boxtype'] == 'weak' 199 | # merge 200 | date = now - datetime.timedelta(2) 201 | indexname = u'test2-{}'.format(date.strftime("%Y.%m.%d")) 202 | assert int(all_indices[indexname]['sc']) <= 2 203 | logging.info('test2-YYYY.mm.dd passed') 204 | 205 | # test3 206 | # delete 207 | for i in range(8, 15): 208 | date = now - datetime.timedelta(i) 209 | indexname = u'test2-{}'.format(date.strftime("%Y.%m.%d")) 210 | assert indexname not in all_indices 211 | for i in range(8): 212 | date = now - datetime.timedelta(i) 213 | indexname = u'test2-{}'.format(date.strftime("%Y.%m.%d")) 214 | assert indexname in all_indices 215 | logging.info('test3-YYYY.mm.dd passed') 216 | 217 | # month-YYYY.mm 218 | # delete 219 | for i in range(34, 50): 220 | date = now - datetime.timedelta(days=i) 221 | indexname = u'month-{}'.format(date.strftime("%Y.%m")) 222 | assert indexname not in all_indices 223 | for i in range(1): 224 | date = now - datetime.timedelta(days=i) 225 | indexname = u'month-{}'.format(date.strftime("%Y.%m")) 226 | assert indexname in all_indices 227 | logging.info('month-YYYY.mm passed') 228 | 229 | # test-YYYYMMDDHHmm-1 230 | # delete 231 | for i in range(6, 15): 232 | date = now - datetime.timedelta(hours=i) 233 | indexname = u'test-{}00-1'.format(date.strftime("%Y%m%d%H")) 234 | assert indexname not in all_indices 235 | for i in range(6): 236 | date = now - datetime.timedelta(hours=i) 237 | indexname = u'test-{}00-1'.format(date.strftime("%Y%m%d%H")) 238 | assert indexname in all_indices 239 | for i in range(4,6): 240 | date = now - datetime.timedelta(hours=i) 241 | indexname = u'test-{}00-1'.format(date.strftime("%Y%m%d%H")) 242 | assert all_indices[indexname]['sc'] == '0' 243 | for i in range(4): 244 | date = now - datetime.timedelta(hours=i) 245 | indexname = u'test-{}00-1'.format(date.strftime("%Y%m%d%H")) 246 | assert all_indices[indexname]['sc'] != '0' 247 | logging.info('test-YYYYMMDDHHmm-1 passed') 248 | 249 | 250 | if __name__ == '__main__': 251 | main() 252 | -------------------------------------------------------------------------------- /test/make-test-indices.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | ''' 4 | test1-YYYY-MM-dd 15 days 5 | test1-YYYY.MM.dd 15 days 6 | test2-YYYY.MM.dd 15 days 7 | test3-YYYY.MM.dd 15 days 8 | test-YYYYMMDDHHmm-1 15 hours 9 | ''' 10 | 11 | import requests 12 | 13 | import datetime 14 | import json 15 | 16 | 17 | def main(): 18 | yn = raw_input('this will delete all indices. [y/n]') 19 | if yn != 'y': 20 | return 21 | 22 | url = u'http://127.0.0.1:9200/*' 23 | print url 24 | print requests.delete(url, headers={'content-type': 'application/json'}) 25 | 26 | now = datetime.datetime.now() 27 | settings = { 28 | "settings": { 29 | "index": { 30 | "number_of_shards": "1", 31 | "number_of_replicas": "0" 32 | } 33 | } 34 | } 35 | for i in range(15): 36 | date = now - datetime.timedelta(i) 37 | indexname = u'test1-{}'.format(date.strftime("%Y.%m.%d")) 38 | print indexname 39 | 40 | # create index 41 | url = u'http://127.0.0.1:9200/{}'.format(indexname) 42 | print url 43 | r = requests.put(url, data=json.dumps(settings), headers={'content-type': 'application/json'}) 44 | print r.text 45 | 46 | # write 10 docs to index and refresh 47 | for j in range(8): 48 | url = u'http://127.0.0.1:9200/{}/logs?refresh=true'.format(indexname) 49 | print url 50 | r = requests.post(url, data=json.dumps({"age": j}), headers={'content-type': 'application/json'}) 51 | print r.text 52 | 53 | indexname = u'test1-{}'.format(date.strftime("%Y-%m-%d")) 54 | print indexname 55 | url = u'http://127.0.0.1:9200/{}'.format(indexname) 56 | print url 57 | r = requests.put(url, data=json.dumps(settings), headers={'content-type': 'application/json'}) 58 | print r.text 59 | 60 | # write 10 docs to index and refresh 61 | for j in range(8): 62 | url = u'http://127.0.0.1:9200/{}/logs?refresh=true'.format(indexname) 63 | print url 64 | r = requests.post(url, data=json.dumps({"age": j}), headers={'content-type': 'application/json'}) 65 | print r.text 66 | 67 | date = now - datetime.timedelta(i) 68 | indexname = u'test2-{}'.format(date.strftime("%Y.%m.%d")) 69 | print indexname 70 | url = u'http://127.0.0.1:9200/{}'.format(indexname) 71 | print url 72 | r = requests.put(url, data=json.dumps(settings), headers={'content-type': 'application/json'}) 73 | print r.text 74 | 75 | # write 10 docs to index and refresh 76 | for j in range(8): 77 | url = u'http://127.0.0.1:9200/{}/logs?refresh=true'.format(indexname) 78 | print url 79 | r = requests.post(url, data=json.dumps({"age": j}), headers={'content-type': 'application/json'}) 80 | print r.text 81 | 82 | date = now - datetime.timedelta(i) 83 | indexname = u'test3-{}'.format(date.strftime("%Y.%m.%d")) 84 | print indexname 85 | url = u'http://127.0.0.1:9200/{}'.format(indexname) 86 | print url 87 | r = requests.put(url, data=json.dumps(settings), headers={'content-type': 'application/json'}) 88 | print r.text 89 | 90 | date = now - datetime.timedelta(hours=i) 91 | indexname = u'test-{}00-1'.format(date.strftime("%Y%m%d%H")) 92 | print indexname 93 | 94 | url = u'http://127.0.0.1:9200/{}'.format(indexname) 95 | print url 96 | r = requests.put(url, data=json.dumps(settings), headers={'content-type': 'application/json'}) 97 | print r.text 98 | 99 | # write 10 docs to index and refresh 100 | for j in range(8): 101 | url = u'http://127.0.0.1:9200/{}/logs?refresh=true'.format(indexname) 102 | print url 103 | r = requests.post(url, data=json.dumps({"age": j}), headers={'content-type': 'application/json'}) 104 | print r.text 105 | 106 | 107 | for i in range(50): 108 | date = now - datetime.timedelta(days=i) 109 | indexname = u'month-{}'.format(date.strftime("%Y.%m")) 110 | print indexname 111 | url = u'http://127.0.0.1:9200/{}'.format(indexname) 112 | print url 113 | r = requests.put(url, data=json.dumps(settings), headers={'content-type': 'application/json'}) 114 | print r.text 115 | 116 | 117 | if __name__ == '__main__': 118 | main() 119 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import requests 5 | 6 | import datetime 7 | import logging 8 | import re 9 | import json 10 | 11 | 12 | def _compare_index_settings(part, whole): 13 | """ 14 | return True if part is part of whole 15 | type part: dict or else 16 | type whole: dict or else 17 | rtype: boolean 18 | >>> whole={"index":{"routing":{"allocation":{"include":{"group":"4,5"},"total_shards_per_node":"2"}},"refresh_interval":"60s","number_of_shards":"20",\ 19 | "store":{"type":"niofs"},"number_of_replicas":"1"}} 20 | >>> part={"index":{"routing":{"allocation":{"include":{"group":"4,5"}}}}} 21 | >>> _compare_index_settings(part, whole) 22 | True 23 | >>> part={"index":{"routing":{"allocation":{"include":{"group":"5"}}}}} 24 | >>> _compare_index_settings(part, whole) 25 | False 26 | """ 27 | if part == whole: 28 | return True 29 | if part is None and whole is None: 30 | return True 31 | if part is None or whole is None: 32 | return (part, whole) 33 | if not isinstance(part, type(whole)): 34 | return (part, whole) 35 | if not isinstance(part, dict): 36 | return part == whole 37 | for k, v in part.items(): 38 | r = _compare_index_settings(v, whole.get(k)) 39 | if r is not True: 40 | return r 41 | return True 42 | 43 | 44 | def get_indices(eshost): 45 | all_indices = [] 46 | url = "{}/_cat/indices?h=i".format(eshost) 47 | logging.debug(u"get all indices from {}".format(url)) 48 | 49 | r = requests.get(url, headers={"content-type": "application/json"}) 50 | if not r.ok: 51 | logging.error(r.text) 52 | raise BaseException(u"could not get indices from {}:{}".format(url, r.status_code)) 53 | for i in r.text.split(): 54 | i = i.strip() 55 | if i == "": 56 | continue 57 | all_indices.append(i) 58 | return all_indices 59 | 60 | 61 | def cache(c): 62 | def wrapper(func): 63 | def inner(*args): 64 | r = c.get(tuple(args)) 65 | if r: 66 | return r 67 | r = func(*args) 68 | c[tuple(args)] = r 69 | return r 70 | return inner 71 | return wrapper 72 | 73 | 74 | @cache(c={}) 75 | def get_index_settings(eshost, indexname): 76 | url = u"{}/{}/_settings".format(eshost, indexname) 77 | try: 78 | return requests.get(url, headers={"content-type": "application/json"}).json()[indexname]['settings'] 79 | except Exception as e: 80 | logging.error( 81 | u"could not get {} settings: {}".format( 82 | indexname, str(e))) 83 | return {} 84 | 85 | 86 | @cache(c={}) 87 | def pick_date_from_indexname(indexname, index_prefix): 88 | patterns = ( 89 | (r"^%s(\d{4}\.\d{2}\.\d{2})$", "%Y.%m.%d"), 90 | (r"^%s(\d{4}\-\d{2}\-\d{2})$", "%Y-%m-%d"), 91 | (r"^%s(\d{4}\.\d{2})$", "%Y.%m"), 92 | (r"^%s(\d{4}\-\d{2})$", "%Y-%m"), 93 | ) 94 | for pattern_format, date_format in patterns: 95 | r = re.findall( 96 | pattern_format % index_prefix, 97 | indexname) 98 | if r: 99 | date = datetime.datetime.strptime(r[0], date_format) 100 | return date 101 | 102 | index_format = index_prefix 103 | r = re.findall(u'\(\?P([^)]+)\)', index_format) 104 | if len(r) != 1: 105 | return 106 | date_format = r[0] 107 | 108 | index_format = index_format.replace('%Y', r'\d{4}') 109 | index_format = index_format.replace('%y', r'\d{2}') 110 | index_format = index_format.replace('%m', r'\d{2}') 111 | index_format = index_format.replace('%d', r'\d{2}') 112 | index_format = index_format.replace('%H', r'\d{2}') 113 | index_format = index_format.replace('%M', r'\d{2}') 114 | index_format = index_format.replace('.', r'\.') 115 | 116 | r = re.findall(index_format, indexname) 117 | if r: 118 | date = datetime.datetime.strptime(r[0], date_format) 119 | return date 120 | 121 | 122 | def get_to_process_indices(to_select_action, config, all_indices, base_day): 123 | """ 124 | rtype: [(indexname, index_settings, dopey_index_settings)] 125 | """ 126 | rst = [] 127 | 128 | for index_prefix, index_config in config['indices'].items(): 129 | for indexname in all_indices: 130 | date = pick_date_from_indexname(indexname, index_prefix) 131 | if date is None: 132 | continue 133 | 134 | for e in index_config: 135 | action, configs = e.keys()[0], e.values()[0] 136 | if action != to_select_action: 137 | continue 138 | 139 | offset = base_day-date 140 | if "day" in configs and offset.days == configs["day"]: 141 | index_settings = get_index_settings(config['eshost'], indexname) 142 | rst.append((indexname, index_settings, configs.get('settings'))) 143 | continue 144 | if "days" in configs: 145 | days = configs["days"] 146 | if isinstance(days, basestring): 147 | if '-' in days: 148 | from_day, to_day = days.split('-') 149 | if offset.days < int(from_day) or offset.days > int(to_day): 150 | continue 151 | else: 152 | raise BaseException("invalid config {}".format(configs)) 153 | elif offset.days < int(days): 154 | continue 155 | index_settings = get_index_settings(config['eshost'], indexname) 156 | rst.append((indexname, index_settings, configs.get('settings'))) 157 | continue 158 | 159 | if "hour" in configs and offset.days*24+offset.seconds // 3600 == configs["hour"]: 160 | index_settings = get_index_settings(config['eshost'], indexname) 161 | rst.append((indexname, index_settings, configs.get('settings'))) 162 | continue 163 | if "hours" in configs: 164 | hour = offset.days*24 + offset.seconds//3600 165 | hours = configs["hours"] 166 | if isinstance(hours, basestring): 167 | if '-' in hours: 168 | from_hour, to_hour = hours.split('-') 169 | if hour < int(from_hour) or hour > int(to_hour): 170 | continue 171 | else: 172 | raise BaseException("invalid config {}".format(configs)) 173 | elif hour < int(hours): 174 | continue 175 | index_settings = get_index_settings(config['eshost'], indexname) 176 | rst.append((indexname, index_settings, configs.get('settings'))) 177 | continue 178 | if "minute" in configs and offset.days*24*60+offset.seconds // 60 == configs["minute"]: 179 | index_settings = get_index_settings(config['eshost'], indexname) 180 | rst.append((indexname, index_settings, configs.get('settings'))) 181 | continue 182 | if "minutes" in configs: 183 | minute = offset.days*24 + offset.seconds//60 184 | minutes = configs["minutes"] 185 | if isinstance(minutes, basestring) : 186 | if '-' in minutes: 187 | from_minute, to_minute = minutes.split('-') 188 | if minute < int(from_minute) or minute > int(to_minute): 189 | continue 190 | else: 191 | raise BaseException("invalid config {}".format(configs)) 192 | elif minute < int(minutes): 193 | continue 194 | index_settings = get_index_settings(config['eshost'], indexname) 195 | rst.append((indexname, index_settings, configs.get('settings'))) 196 | continue 197 | 198 | return rst 199 | 200 | 201 | def get_to_delete_indices(config, all_indices, base_day): 202 | return get_to_process_indices( 203 | 'delete_indices', config, all_indices, base_day) 204 | 205 | 206 | def get_to_close_indices(config, all_indices, base_day): 207 | return get_to_process_indices( 208 | 'close_indices', config, all_indices, base_day) 209 | 210 | 211 | def get_to_freeze_indices(config, all_indices, base_day): 212 | return get_to_process_indices( 213 | 'freeze_indices', config, all_indices, base_day) 214 | 215 | 216 | def get_to_update_indices(config, all_indices, base_day): 217 | return get_to_process_indices( 218 | 'update_settings', config, all_indices, base_day) 219 | 220 | 221 | def get_to_optimize_indices(config, all_indices, base_day): 222 | return get_to_process_indices( 223 | 'optimize_indices', config, all_indices, base_day) 224 | 225 | 226 | def delete_indices(config, indices): 227 | """ 228 | :type indices: list of (indexname,index_settings, dopey_index_settings) 229 | :rtype: None 230 | """ 231 | if not indices: 232 | return 233 | 234 | retry = config.get('retry', 3) 235 | batch = config.get('batch', 50) 236 | indices = [e[0] for e in indices] 237 | 238 | logging.debug(u"try to delete %s" % ",".join(indices)) 239 | while indices: 240 | to_delete_indices = indices[:batch] 241 | to_delete_indices_joined = ','.join(to_delete_indices) 242 | url = u"{}/{}".format( 243 | config['eshost'], to_delete_indices_joined) 244 | logging.info(u"delete: {}".format(url)) 245 | 246 | for _ in range(retry): 247 | try: 248 | r = requests.delete( 249 | url, timeout=300, params={ 250 | "master_timeout": "10m", "ignore_unavailable": 'true'}, headers={ 251 | "content-type": "application/json"}) 252 | if r.ok: 253 | logging.info(u"%s deleted" % to_delete_indices_joined) 254 | break 255 | else: 256 | logging.warn( 257 | u"%s deleted failed. %s" % 258 | (to_delete_indices_joined, r.text)) 259 | except BaseException as e: 260 | logging.info(e) 261 | indices = indices[batch:] 262 | 263 | 264 | def close_indices(config, indices): 265 | """ 266 | :type indices: list of (indexname,index_settings, dopey_index_settings) 267 | :rtype: None 268 | """ 269 | if not indices: 270 | return 271 | 272 | retry = config.get('retry', 3) 273 | batch = config.get('batch', 50) 274 | indices = [e[0] for e in indices] 275 | 276 | while indices: 277 | to_close_indices = indices[:batch] 278 | to_close_indices_joined = ','.join(to_close_indices) 279 | logging.debug(u"try to close %s" % to_close_indices_joined) 280 | 281 | url = u"{}/{}/_close".format( 282 | config['eshost'], to_close_indices_joined) 283 | logging.info(u"close: {}".format(url)) 284 | 285 | for _ in range(retry): 286 | try: 287 | r = requests.post( 288 | url, 289 | timeout=300, 290 | params={ 291 | "master_timeout": "10m", 292 | "ignore_unavailable": 'true'}, headers={"content-type": "application/json"}) 293 | 294 | if r.ok: 295 | logging.info(u"%s closed" % to_close_indices_joined) 296 | break 297 | else: 298 | logging.warn( 299 | u"%s closed failed. %s" % 300 | (to_close_indices_joined, r.text)) 301 | except BaseException as e: 302 | logging.info(e) 303 | indices = indices[batch:] 304 | 305 | 306 | def freeze_indices(config, indices): 307 | """ 308 | :type indices: list of (indexname,index_settings, dopey_index_settings) 309 | :rtype: None 310 | """ 311 | if not indices: 312 | return 313 | 314 | retry = config.get('retry', 3) 315 | batch = config.get('batch', 50) 316 | indices = [e[0] for e in indices] 317 | 318 | while indices: 319 | to_freeze_indices = indices[:batch] 320 | to_freeze_indices_joined = ','.join(to_freeze_indices) 321 | logging.debug(u"try to freeze %s" % to_freeze_indices_joined) 322 | 323 | url = u"{}/{}/_freeze".format( 324 | config['eshost'], to_freeze_indices_joined) 325 | logging.info(u"freeze: {}".format(url)) 326 | 327 | for _ in range(retry): 328 | try: 329 | r = requests.post( 330 | url, 331 | timeout=300, 332 | params={ 333 | "master_timeout": "10m", 334 | "ignore_unavailable": 'true'}, headers={"content-type": "application/json"}) 335 | 336 | if r.ok: 337 | logging.info(u"%s freezed" % to_freeze_indices_joined) 338 | break 339 | else: 340 | logging.warn( 341 | u"%s freezed failed. %s" % 342 | (to_freeze_indices_joined, r.text)) 343 | except BaseException as e: 344 | logging.info(e) 345 | indices = indices[batch:] 346 | 347 | 348 | def find_need_to_update_indices(indices): 349 | """ 350 | :type indices: [(indexname,index_settings, dopey_index_settings)] 351 | :rtype : [(indexname,index_settings, dopey_index_settings)] 352 | """ 353 | rst = [] 354 | for index, index_settings, dopey_index_settings in indices: 355 | if_same = _compare_index_settings(dopey_index_settings, index_settings) 356 | if if_same is True: 357 | logging.info(u"%s settings is unchanged , skip" % index) 358 | continue 359 | else: 360 | logging.info( 361 | u"%s settings need to be updated. %s" % (index, 362 | json.dumps(if_same))) 363 | rst.append((index, index_settings, dopey_index_settings)) 364 | 365 | return rst 366 | 367 | 368 | def arrange_indices_by_settings(indices): 369 | """ 370 | :type indices: [(indexname,index_settings, dopey_index_settings)] 371 | :rtype: [(dopey_index_settings,[indexname])] 372 | """ 373 | rst = [] 374 | for index, index_settings, dopey_index_settings in indices: 375 | for e in rst: 376 | if dopey_index_settings == e[0]: 377 | e[1].append(index) 378 | break 379 | else: 380 | rst.append((dopey_index_settings, [index])) 381 | 382 | return rst 383 | 384 | 385 | def update_settings_same_settings(config, indices, dopey_index_settings): 386 | """ 387 | :type indices: [indexname] 388 | :rtype: None 389 | """ 390 | retry = config.get('retry', 3) 391 | batch = config.get('batch', 50) 392 | while indices: 393 | to_update_indices = indices[:batch] 394 | to_update_indices_joined = ','.join(to_update_indices) 395 | 396 | url = u"{}/{}/_settings".format( 397 | config["eshost"], to_update_indices_joined) 398 | logging.debug(u"update settings: %s", url) 399 | 400 | for _ in range(retry): 401 | try: 402 | r = requests.put( 403 | url, 404 | timeout=300, 405 | params={ 406 | "master_timeout": "10m", 407 | "ignore_unavailable": 'true'}, 408 | data=json.dumps(dopey_index_settings), headers={"content-type": "application/json"}) 409 | 410 | if r.ok: 411 | logging.info(u"%s updated" % to_update_indices_joined) 412 | break 413 | else: 414 | logging.warn( 415 | u"%s updated failed. %s" % 416 | (to_update_indices_joined, r.text)) 417 | except BaseException as e: 418 | logging.info(e) 419 | 420 | indices = indices[batch:] 421 | 422 | 423 | def update_settings(config, indices): 424 | """ 425 | :type indices: [(indexname,index_settings, dopey_index_settings)] 426 | :rtype: None 427 | """ 428 | if not indices: 429 | return 430 | 431 | logging.debug(u"try to update index settings %s" % 432 | ','.join([e[0] for e in indices])) 433 | 434 | need_to_update_indices = find_need_to_update_indices(indices) 435 | logging.debug(u"need_to_update_indices: %s", need_to_update_indices) 436 | 437 | to_update_indices = arrange_indices_by_settings(need_to_update_indices) 438 | logging.debug(u"to_update_indices: %s", to_update_indices) 439 | 440 | for dopey_index_settings, indices in to_update_indices: 441 | update_settings_same_settings( 442 | config, indices, dopey_index_settings) 443 | 444 | 445 | def optimize_indices(config, indices): 446 | """ 447 | :type indices: [(indexname,index_settings, dopey_index_settings)] 448 | :rtype: None 449 | """ 450 | arranged_indices = arrange_indices_by_settings(indices) 451 | 452 | retry = config.get('retry', 1) 453 | batch = config.get('batch', 50) 454 | 455 | for dopey_index_settings, indices in arranged_indices: 456 | if not dopey_index_settings: 457 | dopey_index_settings = {} 458 | dopey_index_settings.setdefault("max_num_segments", 1) 459 | while indices: 460 | to_optimize_indices = indices[:batch] 461 | to_optimize_indices_joined = ','.join(to_optimize_indices) 462 | url = u"{}/{}/_forcemerge".format( 463 | config["eshost"], to_optimize_indices_joined) 464 | logging.debug(u"forcemerge: %s" % url) 465 | 466 | for _ in range(retry): 467 | try: 468 | r = requests.post(url, headers={"content-type": "application/json"}, params=dopey_index_settings) 469 | if r.ok: 470 | logging.info(u"%s forcemerged" % to_optimize_indices_joined) 471 | break 472 | else: 473 | logging.warn( 474 | u"%s forcemerge failed. %s" % 475 | (to_optimize_indices_joined, r.text)) 476 | except BaseException as e: 477 | logging.info(e) 478 | 479 | indices = indices[batch:] 480 | --------------------------------------------------------------------------------