├── README.md ├── SCHEDULER ├── elric ├── __init__.py ├── core │ ├── __init__.py │ ├── exceptions.py │ ├── job.py │ ├── lock.py │ ├── log.py │ ├── settings.py │ └── utils.py ├── dupefilter │ ├── __init__.py │ ├── base.py │ ├── memoryfilter.py │ └── redisfilter.py ├── executor │ ├── __init__.py │ ├── base.py │ └── pool.py ├── jobqueue │ ├── __init__.py │ ├── base.py │ └── rqueue.py ├── jobstore │ ├── __init__.py │ ├── base.py │ ├── memory.py │ └── mongodb.py ├── master │ ├── __init__.py │ ├── base.py │ ├── rqbase.py │ └── rqextend.py ├── trigger │ ├── __init__.py │ ├── base.py │ ├── cron │ │ ├── __init__.py │ │ ├── expressions.py │ │ └── fields.py │ ├── date.py │ ├── interval.py │ └── tool.py └── worker │ ├── __init__.py │ ├── base.py │ └── rqueue.py ├── example ├── __init__.py ├── test_master.py ├── test_worker.py └── test_worker2.py └── settings.py /README.md: -------------------------------------------------------------------------------- 1 | Elric 2 | ===== 3 | A Simple Distributed Job Scheduler Based on Redis and Apscheduler. 4 | 5 | Features: 6 | - ***Master-Slave Architecture*** 7 | - ***Distributed Job Based on Redis*** 8 | - ***Support Cron/Data/Interval/Immediate Job*** 9 | 10 | Usage 11 | ----- 12 | Setup environment in `settings.py`: 13 | 14 | ```python 15 | DISTRIBUTED_LOCK_CONFIG = { 16 | 'server': { 17 | 'host': 'localhost', 18 | 'port': 6379, 19 | 'password': None, 20 | 'db': 1, 21 | }, 22 | 'resource': 'elric_distributed_lock', 23 | 'retry_count': 5, 24 | 'retry_delay': 0.2, 25 | } 26 | 27 | JOB_QUEUE_CONFIG = { 28 | 'server': { 29 | 'host': 'localhost', 30 | 'port': 6379, 31 | 'password': None, 32 | 'db': 1, 33 | }, 34 | 'max_length': 100000, 35 | 'buffer_time': 10 36 | } 37 | 38 | FILTER_CONFIG = { 39 | 'server': { 40 | 'host': 'localhost', 41 | 'port': 6379, 42 | 'password': None, 43 | 'db': 0, 44 | } 45 | } 46 | 47 | JOB_STORE_CONFIG = { 48 | 'server': {}, 49 | 'maximum_records': 3 50 | } 51 | ``` 52 | 53 | Create a master instance and start: 54 | 55 | ```python 56 | rq_master = RQMasterExtend() 57 | rq_master.start() 58 | ``` 59 | 60 | Implement some jobs/functions: 61 | 62 | ```python 63 | def test_job(language=None): 64 | print 'my favorite language is {language}'.format(language=language) 65 | ``` 66 | Create a worker instace, specify worker's name and listening keys. Submit job and start worker. 67 | ```python 68 | # worker will only receive job from listen_keys that have been provided here 69 | rq_worker = RQWorker(name='test', listen_keys=['job1', ]) 70 | # submit job to master 71 | rq_worker.submit_job(test_job, 'job1', kwargs={'language': 'python'}) 72 | # start worker, then worker will receive and execute job from master by listening job queue on listen keys you provided 73 | rq_worker.start() 74 | ``` 75 | 76 | Running the example code 77 | ------------------------ 78 | 79 | This example illustrates how to submit different type of jobs to master. 80 | 81 | Step 1. Setup environment in settings.py 82 | 83 | Step 2. Start master 84 | ``` 85 | cd example 86 | python test_master.py 87 | ``` 88 | 89 | Step 3. Start master 90 | ``` 91 | python test_worker.py 92 | ``` 93 | 94 | For more information 95 | ------------- 96 | Documentation [described in this blog post](http://masutangu.com/2016/07/elric-documentation/). 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /SCHEDULER: -------------------------------------------------------------------------------- 1 | 2015.5.5 2 | 启动,定下框架 3 | 4 | 2015.5.7 5 | 实现基于redis多进程/线程 6 | 7 | 2015.5.10 8 | 实现定时任务 9 | -------------------------------------------------------------------------------- /elric/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | -------------------------------------------------------------------------------- /elric/core/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | from elric.core.settings import settings 5 | -------------------------------------------------------------------------------- /elric/core/exceptions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | 5 | class AlreadyRunningException(Exception): 6 | pass 7 | 8 | 9 | class JobAlreadyExist(Exception): 10 | pass 11 | 12 | 13 | class JobDoesNotExist(Exception): 14 | pass 15 | 16 | 17 | class AddFilterFailed(Exception): 18 | pass 19 | 20 | 21 | class WrongType(Exception): 22 | pass 23 | 24 | 25 | class ParseConfigurationError(Exception): 26 | pass 27 | 28 | 29 | class ImproperlyConfigured(Exception): 30 | pass 31 | 32 | -------------------------------------------------------------------------------- /elric/core/job.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | from uuid import uuid4 5 | import six 6 | 7 | try: 8 | import cPickle as pickle 9 | except ImportError: 10 | import pickle 11 | 12 | 13 | from elric.core.utils import ref_to_obj, obj_to_ref, check_callable_args 14 | from elric.trigger.base import BaseTrigger 15 | from elric.core.exceptions import WrongType 16 | 17 | 18 | 19 | class Job(object): 20 | 21 | def __init__(self, **job_in_dict): 22 | id = job_in_dict.get('id', None) 23 | func = job_in_dict.get('func', None) 24 | args = job_in_dict.get('args', None) 25 | kwargs = job_in_dict.get('kwargs', None) 26 | trigger = job_in_dict.get('trigger', None) 27 | next_run_time = job_in_dict.get('next_run_time', None) 28 | job_key= job_in_dict.get('job_key', None) 29 | need_filter = job_in_dict.get('need_filter', False) 30 | replace_exist = job_in_dict.get('replace_exist', False) 31 | is_success = job_in_dict.get('is_success', None) 32 | details = job_in_dict.get('details', None) 33 | ref_to_func = None 34 | if isinstance(func, six.string_types): 35 | ref_to_func = func 36 | elif callable(func): 37 | ref_to_func = obj_to_ref(func) 38 | if trigger and not isinstance(trigger, BaseTrigger): 39 | raise WrongType 40 | if trigger: 41 | next_run_time = next_run_time or trigger.get_next_trigger_time(None) 42 | 43 | self.args = tuple(args) if args is not None else () 44 | self.kwargs = dict(kwargs) if kwargs is not None else {} 45 | self.trigger = trigger 46 | self.next_run_time = next_run_time 47 | self.id = id or uuid4().hex 48 | self.__func = ref_to_func 49 | self.job_key = job_key 50 | self.need_filter = need_filter 51 | self.replace_exist = replace_exist 52 | self.is_success = is_success 53 | self.details = details 54 | 55 | def serialize(self): 56 | """ 57 | dict representation of job 58 | :return: 59 | """ 60 | job_in_dict = { 61 | 'id': self.id, 62 | 'func': self.__func, 63 | 'trigger': self.trigger, 64 | 'next_run_time': self.next_run_time, 65 | 'args': self.args, 66 | 'kwargs':self.kwargs, 67 | 'job_key': self.job_key, 68 | 'need_filter': self.need_filter, 69 | 'replace_exist': self.replace_exist, 70 | 'is_success': self.is_success, 71 | 'details': self.details 72 | } 73 | return pickle.dumps(job_in_dict, pickle.HIGHEST_PROTOCOL) 74 | 75 | def check(self): 76 | check_callable_args(self.func, self.args, self.kwargs) 77 | 78 | @property 79 | def func(self): 80 | return ref_to_obj(self.__func) 81 | 82 | @property 83 | def filter_key(self): 84 | return "%s:filter" % self.job_key 85 | 86 | @classmethod 87 | def deserialize(cls, serialization): 88 | job_in_dict = pickle.loads(serialization) 89 | return cls(**job_in_dict) 90 | 91 | @classmethod 92 | def get_serial_run_times(cls, job, now): 93 | run_times = [] 94 | next_run_time = job.next_run_time 95 | while next_run_time and next_run_time <= now: 96 | run_times.append(next_run_time) 97 | next_run_time = job.trigger.get_next_trigger_time(next_run_time, now) 98 | 99 | return run_times 100 | 101 | @classmethod 102 | def get_next_trigger_time(cls, job, run_time): 103 | return job.trigger.get_next_trigger_time(run_time) 104 | -------------------------------------------------------------------------------- /elric/core/lock.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | import redlock 5 | import time 6 | 7 | 8 | class distributed_lock(object): 9 | def __init__(self, **config): 10 | self.config = config 11 | self.dlm = redlock.Redlock([config['server'], ], 12 | retry_count=config['retry_count'], 13 | retry_delay=config['retry_delay']) 14 | self.dlm_lock = None 15 | 16 | def __enter__(self): 17 | while not self.dlm_lock: 18 | self.dlm_lock = self.dlm.lock(self.config['resource'], 1000) 19 | if self.dlm_lock: 20 | break 21 | else: 22 | time.sleep(self.config['retry_delay']) 23 | 24 | def __exit__(self, type, value, traceback): 25 | self.dlm.unlock(self.dlm_lock) 26 | self.dlm_lock = None 27 | -------------------------------------------------------------------------------- /elric/core/log.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | from logging.config import dictConfig 5 | 6 | from elric.core import settings 7 | 8 | 9 | DEFAULT_LOGGING = { 10 | 'version': 1, 11 | 'disable_existing_loggers': False, 12 | 'formatters': { 13 | 'standard': { 14 | 'format': '[%(asctime)s]-[%(levelname)s]-[%(filename)s %(funcName)s():line %(lineno)s]-[%(message)s]', 15 | } 16 | }, 17 | 'handlers': { 18 | 'console': { 19 | 'level': 'INFO', 20 | 'class': 'logging.StreamHandler', 21 | 'formatter': 'standard', 22 | }, 23 | }, 24 | 'loggers': { 25 | 'elric.master': { 26 | 'handlers': ['console'], 27 | 'level': 'INFO', 28 | }, 29 | 'elric.worker': { 30 | 'handlers': ['console'], 31 | 'level': 'INFO', 32 | }, 33 | } 34 | } 35 | 36 | 37 | def init_logging_config(): 38 | config = DEFAULT_LOGGING 39 | try: 40 | if settings.LOGGING_CONFIG: 41 | config = settings.LOGGING_CONFIG 42 | except AttributeError as e: 43 | pass 44 | 45 | dictConfig(config) 46 | -------------------------------------------------------------------------------- /elric/core/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from __future__ import absolute_import, unicode_literals 3 | 4 | import importlib 5 | import os 6 | 7 | from elric.core.exceptions import ImproperlyConfigured 8 | 9 | ENVIRONMENT_VARIABLE = "ELRIC_SETTINGS_MODULE" 10 | 11 | 12 | class Settings(object): 13 | def _setup(self): 14 | settings_module = os.environ.get(ENVIRONMENT_VARIABLE) 15 | if not settings_module: 16 | raise ImproperlyConfigured("You must define the environment variable {}".format(ENVIRONMENT_VARIABLE)) 17 | 18 | self.SETTINGS_MODULE = settings_module 19 | 20 | s = importlib.import_module(self.SETTINGS_MODULE) 21 | required_settings = ('JOB_QUEUE_CONFIG', 'JOB_STORE_CONFIG', 'FILTER_CONFIG', 'DISTRIBUTED_LOCK_CONFIG') 22 | for setting in required_settings: 23 | if not getattr(s, setting, None): 24 | raise ImproperlyConfigured("'{}' setting is not defined properly".format(setting)) 25 | 26 | self.settings = s 27 | 28 | def __getattr__(self, item): 29 | return getattr(self.settings, item) 30 | 31 | def __init__(self): 32 | self._setup() 33 | 34 | settings = Settings() 35 | -------------------------------------------------------------------------------- /elric/core/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """note: this file contains utils function copy from apscheduler and rq""" 3 | 4 | from __future__ import (absolute_import, unicode_literals) 5 | 6 | import six 7 | import signal 8 | from inspect import isfunction, ismethod, getargspec 9 | from datetime import date, datetime, time, timedelta, tzinfo 10 | from pytz import timezone, utc 11 | import re 12 | from calendar import timegm 13 | try: 14 | from inspect import signature 15 | except ImportError: # pragma: nocover 16 | try: 17 | from funcsigs import signature 18 | except ImportError: 19 | signature = None 20 | 21 | 22 | _signames = dict((getattr(signal, signame), signame) 23 | for signame in dir(signal) 24 | if signame.startswith('SIG') and '_' not in signame) 25 | 26 | 27 | def get_callable_name(func): 28 | """ 29 | Returns the best available display name for the given function/callable. 30 | 31 | :rtype: str 32 | """ 33 | 34 | # the easy case (on Python 3.3+) 35 | if hasattr(func, '__qualname__'): 36 | return func.__qualname__ 37 | 38 | # class methods, bound and unbound methods 39 | f_self = getattr(func, '__self__', None) or getattr(func, 'im_self', None) 40 | if f_self and hasattr(func, '__name__'): 41 | f_class = f_self if isinstance(f_self, type) else f_self.__class__ 42 | else: 43 | f_class = getattr(func, 'im_class', None) 44 | 45 | if f_class and hasattr(func, '__name__'): 46 | return '%s.%s' % (f_class.__name__, func.__name__) 47 | 48 | # class or class instance 49 | if hasattr(func, '__call__'): 50 | # class 51 | if hasattr(func, '__name__'): 52 | return func.__name__ 53 | 54 | # instance of a class with a __call__ method 55 | return func.__class__.__name__ 56 | 57 | raise TypeError('Unable to determine a name for %r -- maybe it is not a callable?' % func) 58 | 59 | 60 | def obj_to_ref(obj): 61 | """ 62 | Returns the path to the given object. 63 | 64 | :rtype: str 65 | """ 66 | 67 | try: 68 | ref = '%s:%s' % (obj.__module__, get_callable_name(obj)) 69 | obj2 = ref_to_obj(ref) 70 | if obj != obj2: 71 | raise ValueError 72 | except Exception: 73 | raise ValueError('Cannot determine the reference to %r' % obj) 74 | 75 | return ref 76 | 77 | 78 | def ref_to_obj(ref): 79 | """ 80 | Returns the object pointed to by ``ref``. 81 | 82 | :type ref: str 83 | """ 84 | 85 | if not isinstance(ref, six.string_types): 86 | raise TypeError('References must be strings') 87 | if ':' not in ref: 88 | raise ValueError('Invalid reference') 89 | 90 | modulename, rest = ref.split(':', 1) 91 | try: 92 | obj = __import__(modulename) 93 | except ImportError: 94 | raise LookupError('Error resolving reference %s: could not import module' % ref) 95 | 96 | try: 97 | for name in modulename.split('.')[1:] + rest.split('.'): 98 | obj = getattr(obj, name) 99 | return obj 100 | except Exception: 101 | raise LookupError('Error resolving reference %s: error looking up object' % ref) 102 | 103 | 104 | def check_callable_args(func, args, kwargs): 105 | """ 106 | Ensures that the given callable can be called with the given arguments. 107 | 108 | :type args: tuple 109 | :type kwargs: dict 110 | """ 111 | 112 | pos_kwargs_conflicts = [] # parameters that have a match in both args and kwargs 113 | positional_only_kwargs = [] # positional-only parameters that have a match in kwargs 114 | unsatisfied_args = [] # parameters in signature that don't have a match in args or kwargs 115 | unsatisfied_kwargs = [] # keyword-only arguments that don't have a match in kwargs 116 | unmatched_args = list(args) # args that didn't match any of the parameters in the signature 117 | unmatched_kwargs = list(kwargs) # kwargs that didn't match any of the parameters in the signature 118 | has_varargs = has_var_kwargs = False # indicates if the signature defines *args and **kwargs respectively 119 | 120 | if signature: 121 | try: 122 | sig = signature(func) 123 | except ValueError: 124 | return # signature() doesn't work against every kind of callable 125 | 126 | for param in six.itervalues(sig.parameters): 127 | if param.kind == param.POSITIONAL_OR_KEYWORD: 128 | if param.name in unmatched_kwargs and unmatched_args: 129 | pos_kwargs_conflicts.append(param.name) 130 | elif unmatched_args: 131 | del unmatched_args[0] 132 | elif param.name in unmatched_kwargs: 133 | unmatched_kwargs.remove(param.name) 134 | elif param.default is param.empty: 135 | unsatisfied_args.append(param.name) 136 | elif param.kind == param.POSITIONAL_ONLY: 137 | if unmatched_args: 138 | del unmatched_args[0] 139 | elif param.name in unmatched_kwargs: 140 | unmatched_kwargs.remove(param.name) 141 | positional_only_kwargs.append(param.name) 142 | elif param.default is param.empty: 143 | unsatisfied_args.append(param.name) 144 | elif param.kind == param.KEYWORD_ONLY: 145 | if param.name in unmatched_kwargs: 146 | unmatched_kwargs.remove(param.name) 147 | elif param.default is param.empty: 148 | unsatisfied_kwargs.append(param.name) 149 | elif param.kind == param.VAR_POSITIONAL: 150 | has_varargs = True 151 | elif param.kind == param.VAR_KEYWORD: 152 | has_var_kwargs = True 153 | else: 154 | if not isfunction(func) and not ismethod(func) and hasattr(func, '__call__'): 155 | func = func.__call__ 156 | 157 | try: 158 | argspec = getargspec(func) 159 | except TypeError: 160 | return # getargspec() doesn't work certain callables 161 | 162 | argspec_args = argspec.args if not ismethod(func) else argspec.args[1:] 163 | arg_defaults = dict(zip(reversed(argspec_args), argspec.defaults or ())) 164 | has_varargs = bool(argspec.varargs) 165 | has_var_kwargs = bool(argspec.keywords) 166 | for arg in argspec_args: 167 | if arg in unmatched_kwargs and unmatched_args: 168 | pos_kwargs_conflicts.append(arg) 169 | elif unmatched_args: 170 | del unmatched_args[0] 171 | elif arg in unmatched_kwargs: 172 | unmatched_kwargs.remove(arg) 173 | elif arg not in arg_defaults: 174 | unsatisfied_args.append(arg) 175 | 176 | # Make sure there are no conflicts between args and kwargs 177 | if pos_kwargs_conflicts: 178 | raise ValueError('The following arguments are supplied in both args and kwargs: %s' % 179 | ', '.join(pos_kwargs_conflicts)) 180 | 181 | # Check if keyword arguments are being fed to positional-only parameters 182 | if positional_only_kwargs: 183 | raise ValueError('The following arguments cannot be given as keyword arguments: %s' % 184 | ', '.join(positional_only_kwargs)) 185 | 186 | # Check that the number of positional arguments minus the number of matched kwargs matches the argspec 187 | if unsatisfied_args: 188 | raise ValueError('The following arguments have not been supplied: %s' % ', '.join(unsatisfied_args)) 189 | 190 | # Check that all keyword-only arguments have been supplied 191 | if unsatisfied_kwargs: 192 | raise ValueError('The following keyword-only arguments have not been supplied in kwargs: %s' % 193 | ', '.join(unsatisfied_kwargs)) 194 | 195 | # Check that the callable can accept the given number of positional arguments 196 | if not has_varargs and unmatched_args: 197 | raise ValueError('The list of positional arguments is longer than the target callable can handle ' 198 | '(allowed: %d, given in args: %d)' % (len(args) - len(unmatched_args), len(args))) 199 | 200 | # Check that the callable can accept the given keyword arguments 201 | if not has_var_kwargs and unmatched_kwargs: 202 | raise ValueError('The target callable does not accept the following keyword arguments: %s' % 203 | ', '.join(unmatched_kwargs)) 204 | 205 | 206 | def signal_name(signum): 207 | # Hackety-hack-hack: is there really no better way to reverse lookup the 208 | # signal name? If you read this and know a way: please provide a patch :) 209 | try: 210 | return _signames[signum] 211 | except KeyError: 212 | return 'SIG_UNKNOWN' 213 | 214 | 215 | 216 | def timedelta_seconds(delta): 217 | """ 218 | Converts the given timedelta to seconds. 219 | 220 | :type delta: timedelta 221 | :rtype: float 222 | """ 223 | return delta.days * 24 * 60 * 60 + delta.seconds + \ 224 | delta.microseconds / 1000000.0 225 | 226 | 227 | def astimezone(obj): 228 | """ 229 | Interprets an object as a timezone. 230 | 231 | :rtype: tzinfo 232 | """ 233 | 234 | if isinstance(obj, six.string_types): 235 | return timezone(obj) 236 | if isinstance(obj, tzinfo): 237 | if not hasattr(obj, 'localize') or not hasattr(obj, 'normalize'): 238 | raise TypeError('Only timezones from the pytz library are supported') 239 | if obj.zone == 'local': 240 | raise ValueError('Unable to determine the name of the local timezone -- use an explicit timezone instead') 241 | return obj 242 | if obj is not None: 243 | raise TypeError('Expected tzinfo, got %s instead' % obj.__class__.__name__) 244 | 245 | 246 | _DATE_REGEX = re.compile( 247 | r'(?P\d{4})-(?P\d{1,2})-(?P\d{1,2})' 248 | r'(?: (?P\d{1,2}):(?P\d{1,2}):(?P\d{1,2})' 249 | r'(?:\.(?P\d{1,6}))?)?') 250 | 251 | 252 | def convert_to_datetime(input, tz, arg_name): 253 | """ 254 | Converts the given object to a timezone aware datetime object. 255 | If a timezone aware datetime object is passed, it is returned unmodified. 256 | If a native datetime object is passed, it is given the specified timezone. 257 | If the input is a string, it is parsed as a datetime with the given timezone. 258 | 259 | Date strings are accepted in three different forms: date only (Y-m-d), 260 | date with time (Y-m-d H:M:S) or with date+time with microseconds 261 | (Y-m-d H:M:S.micro). 262 | 263 | :param str|datetime input: the datetime or string to convert to a timezone aware datetime 264 | :param datetime.tzinfo tz: timezone to interpret ``input`` in 265 | :param str arg_name: the name of the argument (used in an error message) 266 | :rtype: datetime 267 | """ 268 | 269 | if input is None: 270 | return 271 | elif isinstance(input, datetime): 272 | datetime_ = input 273 | elif isinstance(input, date): 274 | datetime_ = datetime.combine(input, time()) 275 | elif isinstance(input, six.string_types): 276 | m = _DATE_REGEX.match(input) 277 | if not m: 278 | raise ValueError('Invalid date string') 279 | values = [(k, int(v or 0)) for k, v in m.groupdict().items()] 280 | values = dict(values) 281 | datetime_ = datetime(**values) 282 | else: 283 | raise TypeError('Unsupported type for %s: %s' % (arg_name, input.__class__.__name__)) 284 | 285 | if datetime_.tzinfo is not None: 286 | return datetime_ 287 | if tz is None: 288 | raise ValueError('The "tz" argument must be specified if %s has no timezone information' % arg_name) 289 | if isinstance(tz, six.string_types): 290 | tz = timezone(tz) 291 | 292 | try: 293 | return tz.localize(datetime_, is_dst=None) 294 | except AttributeError: 295 | raise TypeError('Only pytz timezones are supported (need the localize() and normalize() methods)') 296 | 297 | 298 | def datetime_to_utc_timestamp(timeval): 299 | """ 300 | Converts a datetime instance to a timestamp. 301 | 302 | :type timeval: datetime 303 | :rtype: float 304 | """ 305 | 306 | if timeval is not None: 307 | return timegm(timeval.utctimetuple()) + float(timeval.microsecond) / 1000000 308 | 309 | 310 | def utc_timestamp_to_datetime(timestamp): 311 | """ 312 | Converts the given timestamp to a datetime instance. 313 | 314 | :type timestamp: float 315 | :rtype: datetime 316 | """ 317 | 318 | if timestamp is not None: 319 | return datetime.fromtimestamp(timestamp, utc) 320 | 321 | 322 | def datetime_repr(dateval): 323 | return dateval.strftime('%Y-%m-%d %H:%M:%S %Z') if dateval else 'None' 324 | 325 | 326 | def datetime_ceil(dateval): 327 | """ 328 | Rounds the given datetime object upwards. 329 | 330 | :type dateval: datetime 331 | """ 332 | 333 | if dateval.microsecond > 0: 334 | return dateval + timedelta(seconds=1, microseconds=-dateval.microsecond) 335 | return dateval 336 | 337 | 338 | def asint(text): 339 | """ 340 | Safely converts a string to an integer, returning None if the string is None. 341 | 342 | :type text: str 343 | :rtype: int 344 | """ 345 | 346 | if text is not None: 347 | return int(text) -------------------------------------------------------------------------------- /elric/dupefilter/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) -------------------------------------------------------------------------------- /elric/dupefilter/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | from abc import ABCMeta, abstractmethod 5 | 6 | 7 | class BaseFilter(object): 8 | """ 9 | baseclass of Filter 10 | """ 11 | __metaclass__ = ABCMeta 12 | 13 | @abstractmethod 14 | def exist(self, key, value): 15 | raise NotImplementedError 16 | 17 | @abstractmethod 18 | def add(self, key, value): 19 | raise NotImplementedError 20 | 21 | @abstractmethod 22 | def clear(self, key): 23 | raise NotImplementedError -------------------------------------------------------------------------------- /elric/dupefilter/memoryfilter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | from elric.dupefilter.base import BaseFilter 5 | 6 | 7 | class MemoryFilter(BaseFilter): 8 | 9 | def __init__(self): 10 | self.memory_filter = {} 11 | 12 | def exist(self, key, value): 13 | """ 14 | check if value already exist 15 | if exist return 1 16 | if not exist return 0 17 | """ 18 | return value in self.memory_filter.get(key, set()) 19 | 20 | def add(self, key, value): 21 | self.memory_filter.setdefault(key, set()).add(value) 22 | 23 | def clear(self, key): 24 | """Clears fingerprints data""" 25 | self.memory_filter.pop(key, set()) 26 | -------------------------------------------------------------------------------- /elric/dupefilter/redisfilter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | import redis 5 | 6 | from elric.dupefilter.base import BaseFilter 7 | 8 | 9 | class RedisFilter(BaseFilter): 10 | 11 | def __init__(self, **config): 12 | self.server = redis.Redis(**config['server']) 13 | 14 | def exist(self, key, value): 15 | """ 16 | check if value already exist 17 | if exist return 1 18 | if not exist return 0 19 | """ 20 | return self.server.sismember(key, value) 21 | 22 | def add(self, key, value): 23 | self.server.sadd(key, value) 24 | 25 | def clear(self, key): 26 | """Clears fingerprints data""" 27 | self.server.delete(key) -------------------------------------------------------------------------------- /elric/executor/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | -------------------------------------------------------------------------------- /elric/executor/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | from abc import ABCMeta, abstractmethod 5 | 6 | 7 | class BaseExecutor(object): 8 | __metaclass__ = ABCMeta 9 | 10 | def __init__(self, context): 11 | self.context = context 12 | 13 | @abstractmethod 14 | def execute_job(self, job): 15 | raise NotImplementedError 16 | 17 | @abstractmethod 18 | def shutdown(self): 19 | raise NotImplementedError 20 | -------------------------------------------------------------------------------- /elric/executor/pool.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | import concurrent.futures 5 | 6 | from elric.executor.base import BaseExecutor 7 | 8 | 9 | class ProcessPoolExecutor(BaseExecutor): 10 | 11 | def __init__(self, max_workers, context): 12 | BaseExecutor.__init__(self, context) 13 | self._pool = concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) 14 | self.context.log.debug('start executor..') 15 | 16 | def execute_job(self, job): 17 | def job_done(f): 18 | self.context.internal_job_queue.get(False) 19 | if f.exception(): 20 | self.context.log.error('job [%s] occurs error. exception info [%s]' % (job.id, f.exception_info())) 21 | else: 22 | self.context.log.debug('job [%s] finish, result=[%s]' % (job.id, f.result())) 23 | 24 | self.context.finish_job(job.id, False if f.exception() else True, 25 | str(f.exception_info()) if f.exception() else None, 26 | job.job_key, job.need_filter) 27 | future = self._pool.submit(job.func, *job.args, **job.kwargs) 28 | future.add_done_callback(job_done) 29 | 30 | def shutdown(self, wait=True): 31 | self._pool.shutdown(wait) 32 | -------------------------------------------------------------------------------- /elric/jobqueue/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | -------------------------------------------------------------------------------- /elric/jobqueue/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | from abc import ABCMeta, abstractmethod 5 | 6 | 7 | class JobQueue(object): 8 | """ 9 | baseclass of job queue 10 | redis job queue/zmq job queue or customer job queue should inherit from this class 11 | """ 12 | __metaclass__ = ABCMeta 13 | 14 | def __init__(self, context): 15 | self.context = context 16 | 17 | @abstractmethod 18 | def __len__(self, key): 19 | raise NotImplementedError 20 | 21 | @abstractmethod 22 | def enqueue(self, key, value): 23 | raise NotImplementedError 24 | 25 | @abstractmethod 26 | def dequeue(self, key, timeout=0): 27 | raise NotImplementedError 28 | 29 | @abstractmethod 30 | def dequeue_any(self, queue_keys, timeout=0): 31 | raise NotImplementedError 32 | 33 | @abstractmethod 34 | def is_full(self, key): 35 | raise NotImplementedError 36 | 37 | @abstractmethod 38 | def clear(self, key): 39 | raise NotImplementedError 40 | 41 | 42 | -------------------------------------------------------------------------------- /elric/jobqueue/rqueue.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | import redis 5 | 6 | from elric.jobqueue.base import JobQueue 7 | from elric.core.exceptions import WrongType 8 | 9 | 10 | class RedisJobQueue(JobQueue): 11 | def __init__(self, context, **config): 12 | JobQueue.__init__(self, context) 13 | self.server = redis.Redis(**config['server']) 14 | self.max_length = config['max_length'] 15 | 16 | def __len__(self, key): 17 | return self.server.llen(key) 18 | 19 | def enqueue(self, key, value): 20 | return self.server.lpush(key, value) 21 | 22 | def dequeue(self, key, timeout=0): 23 | data = self.server.brpop(key, timeout) 24 | if isinstance(data, tuple): 25 | data = data[1] 26 | if data: 27 | return data 28 | 29 | def dequeue_any(self, queue_keys, timeout=0): 30 | if not isinstance(queue_keys, (tuple, list)): 31 | raise WrongType('queue_keys: [%s] must be tuple or list' % queue_keys) 32 | result = self.server.brpop(queue_keys, timeout) 33 | if result: 34 | queue_key, data = result 35 | return queue_key, data 36 | 37 | def is_full(self, key): 38 | return self.server.llen(key) >= self.max_length 39 | 40 | def clear(self, key): 41 | self.server.delete(key) -------------------------------------------------------------------------------- /elric/jobstore/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | -------------------------------------------------------------------------------- /elric/jobstore/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | from abc import ABCMeta, abstractmethod 5 | 6 | 7 | class BaseJobStore(object): 8 | __metaclass__ = ABCMeta 9 | 10 | def __init__(self, context): 11 | self.context = context 12 | 13 | @abstractmethod 14 | def add_job(self, job): 15 | raise NotImplementedError('subclasses of BaseJobStore must provide a add_job() method') 16 | 17 | @abstractmethod 18 | def update_job(self, job): 19 | raise NotImplementedError('subclasses of BaseJobStore must provide a update_job() method') 20 | 21 | @abstractmethod 22 | def remove_job(self, job): 23 | raise NotImplementedError('subclasses of BaseJobStore must provide a remove_job() method') 24 | 25 | @abstractmethod 26 | def save_execute_record(self, job): 27 | raise NotImplementedError('subclasses of BaseJobStore must provide a save_execute_record() method') 28 | 29 | @abstractmethod 30 | def get_due_jobs(self, now): 31 | raise NotImplementedError('subclasses of BaseJobStore must provide a get_due_jobs() method') 32 | 33 | @abstractmethod 34 | def get_closest_run_time(self): 35 | raise NotImplementedError('subclasses of BaseJobStore must provide a get_closest_run_time() method') 36 | -------------------------------------------------------------------------------- /elric/jobstore/memory.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | 4 | from collections import deque 5 | import datetime 6 | 7 | from elric.jobstore.base import BaseJobStore 8 | from elric.core.utils import datetime_to_utc_timestamp, utc_timestamp_to_datetime 9 | from elric.core.exceptions import JobAlreadyExist, JobDoesNotExist 10 | 11 | class MemoryJobStore(BaseJobStore): 12 | 13 | def __init__(self, context, **config): 14 | BaseJobStore.__init__(self, context) 15 | self.job_info = {} 16 | self.job_execute_records = {} 17 | self.job_run_time = [] 18 | self.max_preserve_records = config['maximum_records'] 19 | 20 | def add_job(self, job): 21 | """ 22 | add job 23 | :type job: Job 24 | """ 25 | if job.id in self.job_info: 26 | raise JobAlreadyExist("add job failed! job [%s] has already exist" % job.id) 27 | next_timestamp = datetime_to_utc_timestamp(job.next_run_time) 28 | index = self._get_job_index(job.id, next_timestamp) 29 | self.job_run_time.insert(index, (job.id, next_timestamp)) 30 | self.job_info[job.id] = {'serialized_job': job.serialize(), 'job_key': job.job_key, 31 | 'next_timestamp': next_timestamp} 32 | 33 | def update_job(self, job): 34 | """ 35 | update job 36 | :type job: Job 37 | """ 38 | if job.id not in self.job_info: 39 | raise JobDoesNotExist("update job failed! job [%s] does not exist" % job.id) 40 | job_info = self.job_info[job.id] 41 | if job.job_key is not None: 42 | job_info['job_key'] = job.job_key 43 | 44 | job_info['serialized_job'] = job.serialize() 45 | 46 | new_timestamp = datetime_to_utc_timestamp(job.next_run_time) 47 | old_timestamp = job_info['next_timestamp'] 48 | if new_timestamp != old_timestamp: 49 | old_index = self._get_job_index(job.id, old_timestamp) 50 | del self.job_run_time[old_index] 51 | new_index = self._get_job_index(job.id, new_timestamp) 52 | self.job_run_time.insert(new_index, (job.id, new_timestamp)) 53 | 54 | self.context.log.debug("job_run_time = %s" % self.job_run_time) 55 | 56 | def remove_job(self, job): 57 | """ 58 | remove job 59 | :type job: Job 60 | """ 61 | if job.id not in self.job_info: 62 | raise JobDoesNotExist("remove job failed! job [%s] does not exist" % job.id) 63 | job_info = self.job_info[job.id] 64 | index = self._get_job_index(job.id, job_info['next_timestamp']) 65 | del self.job_info[job.id] 66 | del self.job_run_time[index] 67 | 68 | def save_execute_record(self, job): 69 | """ 70 | save job's execute record 71 | :type job_id: str 72 | :type is_success: bool 73 | :type details: str 74 | """ 75 | if not self.job_execute_records.get(job.id): 76 | self.job_execute_records[job.id] = deque(maxlen=self.max_preserve_records) 77 | self.job_execute_records[job.id].append((job.is_success, job.details, datetime.datetime.now())) 78 | 79 | def get_due_jobs(self, now): 80 | """ 81 | Get due jobs. 82 | :type now: datetime.datetime 83 | """ 84 | curr_timestamp = datetime_to_utc_timestamp(now) 85 | due_jobs = [] 86 | for job_id, timestamp in self.job_run_time: 87 | if timestamp is None or timestamp > curr_timestamp: 88 | break 89 | job_info = self.job_info[job_id] 90 | due_jobs.append((job_id, job_info['job_key'], job_info['serialized_job'])) 91 | 92 | return due_jobs 93 | 94 | def get_closest_run_time(self): 95 | return utc_timestamp_to_datetime(self.job_run_time[0][1]) if self.job_run_time else None 96 | 97 | def _get_job_index(self, job_id, timestamp): 98 | """ 99 | insert job_id and job_run_time into self.job_run_time sorted by job_run_time 100 | and return index 101 | :type job_id: str 102 | :type timestamp: float 103 | """ 104 | start, end = 0, len(self.job_run_time) 105 | timestamp = float('inf') if timestamp is None else timestamp 106 | while start < end: 107 | mid = (start + end) / 2 108 | mid_job_id, mid_timestamp = self.job_run_time[mid] 109 | mid_timestamp = float('inf') if mid_timestamp is None else mid_timestamp 110 | if mid_timestamp > timestamp: 111 | end = mid 112 | elif mid_timestamp < timestamp: 113 | start = mid + 1 114 | elif mid_job_id > job_id: 115 | end = mid 116 | elif mid_job_id < job_id: 117 | start = mid + 1 118 | else: 119 | return mid 120 | 121 | return start 122 | 123 | 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /elric/jobstore/mongodb.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | 4 | import datetime 5 | 6 | from pymongo import MongoClient, errors 7 | import pymongo 8 | 9 | from elric.jobstore.base import BaseJobStore 10 | from elric.core.utils import datetime_to_utc_timestamp, utc_timestamp_to_datetime 11 | from elric.core.exceptions import JobAlreadyExist, JobDoesNotExist 12 | 13 | 14 | class MongoJobStore(BaseJobStore): 15 | def __init__(self, context, **config): 16 | BaseJobStore.__init__(self, context) 17 | self.client = MongoClient(**config['server']) 18 | self.db = self.client['elric'] 19 | self.max_preserve_records = config['maximum_records'] 20 | 21 | def add_job(self, job): 22 | """ 23 | add job 24 | :type job: Job 25 | """ 26 | next_timestamp = datetime_to_utc_timestamp(job.next_run_time) 27 | try: 28 | self.db.elric_jobs.insert_one( 29 | { 30 | "serialized_job": job.serialize().decode("raw_unicode_escape"), 31 | "job_key": job.job_key, 32 | "next_timestamp": next_timestamp, 33 | "_id": job.id, 34 | } 35 | ) 36 | except pymongo.errors.DuplicateKeyError: 37 | raise JobAlreadyExist("add job failed! job [%s] has already exist" % job.id) 38 | 39 | def update_job(self, job): 40 | """ 41 | update job 42 | :type job: Job 43 | """ 44 | update_job_info = {} 45 | if job.job_key is not None: 46 | update_job_info['job_key'] = job.job_key 47 | 48 | update_job_info['serialized_job'] = job.serialize().decode("raw_unicode_escape") 49 | update_job_info['next_timestamp'] = datetime_to_utc_timestamp(job.next_run_time) 50 | result = self.db.elric_jobs.update_one( 51 | {"_id": job.id}, 52 | { 53 | "$set": update_job_info, 54 | "$currentDate": {"lastModified": True} 55 | } 56 | ) 57 | if result.matched_count == 0: 58 | raise JobDoesNotExist("update job failed! job [%s] does not exist" % job.id) 59 | 60 | def remove_job(self, job): 61 | """ 62 | remove job 63 | :type job: Job 64 | """ 65 | result = self.db.elric_jobs.delete_one({"_id": job.id}) 66 | if result.deleted_count == 0: 67 | raise JobDoesNotExist("remove job failed! job [%s] does not exist" % job.id) 68 | 69 | def save_execute_record(self, job): 70 | execute_info = {"is_success": job.is_success, "details": job.details, 71 | "report_timestamp": datetime.datetime.now()} 72 | self.db.elric_execute_records.update( 73 | {"_id": job.id}, 74 | { 75 | "$push": { 76 | "execute_records": { 77 | "$each": [execute_info, ], 78 | "$sort": {"report_timestamp": 1}, 79 | "$slice": -self.max_preserve_records 80 | } 81 | } 82 | }, 83 | upsert=True, 84 | ) 85 | 86 | def get_due_jobs(self, now): 87 | """ 88 | Get due jobs. 89 | :type now: datetime.datetime 90 | """ 91 | curr_timestamp = datetime_to_utc_timestamp(now) 92 | due_jobs = [] 93 | cursor = self.db.elric_jobs.find({"next_timestamp": {"$lt": curr_timestamp}}) 94 | for job in cursor: 95 | due_jobs.append((job["_id"], job['job_key'], job['serialized_job'].encode("raw_unicode_escape"))) 96 | return due_jobs 97 | 98 | def get_closest_run_time(self): 99 | cursor = self.db.elric_jobs.find({}, {'next_timestamp': True, '_id': False}).sort([ #projection 100 | ("next_timestamp", pymongo.ASCENDING), 101 | ]).limit(1) 102 | if cursor.count() > 0: 103 | return utc_timestamp_to_datetime(cursor[0]['next_timestamp']) 104 | -------------------------------------------------------------------------------- /elric/master/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) -------------------------------------------------------------------------------- /elric/master/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | from abc import ABCMeta, abstractmethod 5 | import threading 6 | import logging 7 | 8 | from elric.core.log import init_logging_config 9 | 10 | 11 | class BaseMaster(object): 12 | __metaclass__ = ABCMeta 13 | 14 | def __init__(self): 15 | init_logging_config() 16 | self.log = logging.getLogger('elric.master') 17 | # TODO: avoid hard code 18 | self.func_map = { 19 | '__elric_submit_channel__': self.submit_job, 20 | '__elric_remove_channel__': self.remove_job, 21 | '__elric_finish_channel__': self.finish_job 22 | } 23 | 24 | @abstractmethod 25 | def start(self): 26 | raise NotImplementedError('subclasses of BaseMaster must provide a start() method') 27 | 28 | @abstractmethod 29 | def submit_job(self, job): 30 | raise NotImplementedError('subclasses of BaseMaster must provide a submit_job() method') 31 | 32 | @abstractmethod 33 | def remove_job(self, job): 34 | raise NotImplementedError('subclasses of BaseMaster must provide a remove_job() method') 35 | 36 | @abstractmethod 37 | def finish_job(self, job): 38 | raise NotImplementedError('subclasses of BaseMaster must provide a finish_job() method') 39 | 40 | @abstractmethod 41 | def subscribe_mq(self): 42 | raise NotImplementedError('subclasses of BaseMaster must provide a subscribe_mq() method') 43 | 44 | def start_subscribe_thread(self): 45 | """ 46 | Start a new thread to subscribe message queue 47 | :return: 48 | """ 49 | self.log.debug('start subscribe thread..') 50 | thd = threading.Thread(target=self.subscribe_mq) 51 | thd.setDaemon(True) 52 | thd.start() 53 | -------------------------------------------------------------------------------- /elric/master/rqbase.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | 5 | from datetime import datetime 6 | from tzlocal import get_localzone 7 | from threading import Event, RLock 8 | import threading 9 | import time 10 | from Queue import Queue 11 | 12 | from elric.master.base import BaseMaster 13 | from elric.jobqueue.rqueue import RedisJobQueue 14 | from elric.core.exceptions import JobAlreadyExist, JobDoesNotExist, AlreadyRunningException 15 | # from jobstore.memory import MemoryJobStore 16 | from elric.jobstore.mongodb import MongoJobStore 17 | from elric.core.job import Job 18 | from elric.core.utils import timedelta_seconds 19 | from elric.core import settings 20 | from elric.core.lock import distributed_lock 21 | 22 | 23 | class RQMasterBase(BaseMaster): 24 | 25 | MIN_WAIT_TIME = 5 26 | 27 | def __init__(self, timezone=None): 28 | BaseMaster.__init__(self) 29 | self.jobqueue = RedisJobQueue(self, **settings.JOB_QUEUE_CONFIG) 30 | self.jobqueue_lock = RLock() 31 | self.timezone = timezone or get_localzone() 32 | self._event = Event() 33 | self._stopped = True 34 | self.jobstore = MongoJobStore(self, **settings.JOB_STORE_CONFIG) 35 | #self.jobstore = MemoryJobStore(self) 36 | self._internal_buffer_queues = {} 37 | self._job_maximum_buffer_time = settings.JOB_QUEUE_CONFIG['buffer_time'] 38 | 39 | def submit_job(self, job): 40 | """ 41 | process submit_job request from worker. 42 | :type job: Job 43 | """ 44 | self.log.debug('client submit job, id=%s, key=%s' % (job.id, job.job_key)) 45 | # if job doesn't contains trigger, then enqueue it into job queue immediately 46 | if not job.trigger: 47 | self._enqueue_job(job.job_key, job.serialize()) 48 | # else store job into job store first 49 | else: 50 | with distributed_lock(**settings.DISTRIBUTED_LOCK_CONFIG): 51 | try: 52 | self.jobstore.add_job(job) 53 | except JobAlreadyExist as e: 54 | if job.replace_exist: 55 | self.update_job(job) 56 | else: 57 | self.log.error(e) 58 | # wake up when new job has store into job store 59 | self.wake_up() 60 | 61 | def update_job(self, job): 62 | """ 63 | update job in jobstore 64 | :type job: Job 65 | """ 66 | try: 67 | self.jobstore.update_job(job) 68 | except JobDoesNotExist as e: 69 | self.log.error(e) 70 | 71 | def remove_job(self, job): 72 | """ 73 | remove job from jobstore 74 | :type job: Job 75 | """ 76 | try: 77 | self.jobstore.remove_job(job) 78 | except JobDoesNotExist: 79 | self.log.error('remove job error. job id %s does not exist' % job.id) 80 | 81 | def finish_job(self, job): 82 | """ 83 | process finish_job request from worker 84 | :type job: Job 85 | """ 86 | self.jobstore.save_execute_record(job) 87 | 88 | def _enqueue_buffer_queue(self, key, job): 89 | self.log.debug("job queue [%s] is full, put job into buffer queue" % key) 90 | try: 91 | self._internal_buffer_queues[key].put((job, datetime.now())) 92 | except KeyError: 93 | self._internal_buffer_queues[key] = Queue() 94 | self._internal_buffer_queues[key].put((job, datetime.now())) 95 | self.start_process_buffer_job(key) 96 | 97 | def _enqueue_job(self, key, job): 98 | """ 99 | enqueue job into jobqueue 100 | :type key: str 101 | :type job: str or xmlrpc.Binary 102 | """ 103 | self.log.debug('enqueue job key=[%s]' % key) 104 | with self.jobqueue_lock: 105 | # check whether job queue is full 106 | if not self.jobqueue.is_full(key): 107 | self.jobqueue.enqueue(key, job) 108 | else: 109 | self._enqueue_buffer_queue(key, job) 110 | 111 | def start(self): 112 | """ 113 | Start elric master. Select all due jobs from jobstore and enqueue them into redis queue. 114 | Then update due jobs' information into jobstore. 115 | :return: 116 | """ 117 | if self.running: 118 | raise AlreadyRunningException 119 | self._stopped = False 120 | self.log.debug('eric master start...') 121 | 122 | self.start_subscribe_thread() 123 | 124 | while True: 125 | now = datetime.now(self.timezone) 126 | wait_seconds = None 127 | with distributed_lock(**settings.DISTRIBUTED_LOCK_CONFIG): 128 | for job_id, job_key, serialized_job in self.jobstore.get_due_jobs(now): 129 | # enqueue due job into redis queue 130 | self._enqueue_job(job_key, serialized_job) 131 | # update job's information, such as next_run_time 132 | job = Job.deserialize(serialized_job) 133 | last_run_time = Job.get_serial_run_times(job, now) 134 | if last_run_time: 135 | next_run_time = Job.get_next_trigger_time(job, last_run_time[-1]) 136 | if next_run_time: 137 | job.next_run_time = next_run_time 138 | self.update_job(job) 139 | else: 140 | # if job has no next run time, then remove it from jobstore 141 | self.remove_job(job) 142 | 143 | # get next closet run time job from jobstore and set it to be wake up time 144 | closest_run_time = self.jobstore.get_closest_run_time() 145 | 146 | if closest_run_time is not None: 147 | wait_seconds = min(max(timedelta_seconds(closest_run_time - now), 0), self.MIN_WAIT_TIME) 148 | self.log.debug('Next wakeup is due at %s (in %f seconds)' % (closest_run_time, wait_seconds)) 149 | self._event.wait(wait_seconds if wait_seconds is not None else self.MIN_WAIT_TIME) 150 | self._event.clear() 151 | 152 | def wake_up(self): 153 | self._event.set() 154 | 155 | @property 156 | def running(self): 157 | return not self._stopped 158 | 159 | def start_process_buffer_job(self, job_key): 160 | """ 161 | Start filter data serialization thread 162 | """ 163 | self.log.debug('start process buffer job... job key=[%s]' % job_key) 164 | thd = threading.Thread(target=self.process_buffer_job, args=(job_key, )) 165 | thd.setDaemon(True) 166 | thd.start() 167 | 168 | def process_buffer_job(self, job_key): 169 | while True: 170 | job, buffer_time = self._internal_buffer_queues[job_key].get() 171 | with self.jobqueue_lock: 172 | if not self.jobqueue.is_full(job_key): 173 | self.jobqueue.enqueue(job_key, job) 174 | continue 175 | 176 | if (datetime.now() - buffer_time).total_seconds() < self._job_maximum_buffer_time: 177 | self.log.debug("requeue into buffer") 178 | self._internal_buffer_queues[job_key].put((job, buffer_time)) 179 | time.sleep(1.0) 180 | else: 181 | self.log.warning("timeout, discard job...") 182 | 183 | def subscribe_mq(self): 184 | while self.running: 185 | try: 186 | # grab job from job queue only if internal_job_queue has space 187 | key, serialized_job = self.jobqueue.dequeue_any(['__elric_submit_channel__', 188 | '__elric_remove_channel__', 189 | '__elric_finish_channel__']) 190 | Job.deserialize(serialized_job) 191 | self.func_map[key](Job.deserialize(serialized_job)) 192 | except TypeError as e: 193 | self.log.error(e) 194 | time.sleep(60) 195 | continue 196 | 197 | 198 | 199 | -------------------------------------------------------------------------------- /elric/master/rqextend.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | from threading import RLock 5 | 6 | from elric.master.rqbase import RQMasterBase 7 | from elric.dupefilter.redisfilter import RedisFilter 8 | from elric.core.exceptions import JobAlreadyExist 9 | from elric.core import settings 10 | from elric.core.lock import distributed_lock 11 | 12 | 13 | class RQMasterExtend(RQMasterBase): 14 | def __init__(self, timezone=None): 15 | RQMasterBase.__init__(self, timezone) 16 | self.filter = RedisFilter(**settings.FILTER_CONFIG) 17 | self.filter_lock = RLock() 18 | 19 | def submit_job(self, job): 20 | def exist(key, value): 21 | with self.filter_lock: 22 | return self.filter.exist(key, value) 23 | 24 | self.log.debug("client call submit job [%s]" % job.id) 25 | 26 | if job.need_filter: 27 | if exist(job.filter_key, job.id): 28 | self.log.debug("job [%s] has been filter..." % job.id) 29 | return False 30 | 31 | if not job.trigger: 32 | self._enqueue_job(job.job_key, job.serialize()) 33 | else: 34 | with distributed_lock(**settings.DISTRIBUTED_LOCK_CONFIG): 35 | try: 36 | self.jobstore.add_job(job) 37 | except JobAlreadyExist: 38 | if job.replace_exist: 39 | self.jobstore.update_job(job) 40 | else: 41 | self.log.warn('job [%s] already exist' % job.id) 42 | self.wake_up() 43 | 44 | return True 45 | 46 | def finish_job(self, job): 47 | """ 48 | Receive finish_job rpc request from worker. 49 | :type job_id str 50 | :type is_success bool 51 | :type details str 52 | :type filter_key str or int 53 | :type filter_value str or int 54 | """ 55 | self.log.debug("job_id [%s] finish" % job.id) 56 | RQMasterBase.finish_job(self, job) 57 | # add job into filter only when job is finish successfully 58 | if job.is_success and job.need_filter: 59 | self.filter.add(job.filter_key, job.id) 60 | 61 | -------------------------------------------------------------------------------- /elric/trigger/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | -------------------------------------------------------------------------------- /elric/trigger/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | from abc import ABCMeta, abstractmethod 5 | 6 | 7 | class BaseTrigger(object): 8 | __metaclass__ = ABCMeta 9 | 10 | def __init__(self, timezone): 11 | self.timezone = timezone 12 | 13 | @abstractmethod 14 | def get_next_trigger_time(self, previous_trigger_time): 15 | raise NotImplementedError 16 | 17 | @classmethod 18 | def create_trigger(cls, **trigger_args): 19 | return cls(**trigger_args) 20 | 21 | 22 | -------------------------------------------------------------------------------- /elric/trigger/cron/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """this file quote from apscheduler.trigger""" 3 | 4 | from __future__ import (absolute_import, unicode_literals) 5 | 6 | from datetime import datetime, timedelta 7 | import six 8 | from tzlocal import get_localzone 9 | 10 | from elric.core.utils import datetime_ceil, convert_to_datetime, datetime_repr, astimezone 11 | from elric.trigger.base import BaseTrigger 12 | from elric.trigger.cron.fields import BaseField, WeekField, DayOfMonthField, DayOfWeekField, DEFAULT_VALUES 13 | 14 | 15 | class CronTrigger(BaseTrigger): 16 | """ 17 | Triggers when current time matches all specified time constraints, similarly to how the UNIX cron scheduler works. 18 | 19 | :param int|str year: 4-digit year 20 | :param int|str month: month (1-12) 21 | :param int|str day: day of the (1-31) 22 | :param int|str week: ISO week (1-53) 23 | :param int|str day_of_week: number or name of weekday (0-6 or mon,tue,wed,thu,fri,sat,sun) 24 | :param int|str hour: hour (0-23) 25 | :param int|str minute: minute (0-59) 26 | :param int|str second: second (0-59) 27 | :param datetime|str start_date: earliest possible date/time to trigger on (inclusive) 28 | :param datetime|str end_date: latest possible date/time to trigger on (inclusive) 29 | :param datetime.tzinfo|str timezone: time zone to use for the date/time calculations 30 | (defaults to scheduler timezone) 31 | 32 | .. note:: The first weekday is always **monday**. 33 | """ 34 | 35 | FIELD_NAMES = ('year', 'month', 'day', 'week', 'day_of_week', 'hour', 'minute', 'second') 36 | FIELDS_MAP = { 37 | 'year': BaseField, 38 | 'month': BaseField, 39 | 'week': WeekField, 40 | 'day': DayOfMonthField, 41 | 'day_of_week': DayOfWeekField, 42 | 'hour': BaseField, 43 | 'minute': BaseField, 44 | 'second': BaseField 45 | } 46 | 47 | __slots__ = 'timezone', 'start_date', 'end_date', 'fields' 48 | 49 | def __init__(self, year=None, month=None, day=None, week=None, day_of_week=None, hour=None, minute=None, 50 | second=None, start_date=None, end_date=None, timezone=None): 51 | if timezone: 52 | timezone = astimezone(timezone) 53 | elif start_date and start_date.tzinfo: 54 | timezone = start_date.tzinfo 55 | elif end_date and end_date.tzinfo: 56 | timezone = end_date.tzinfo 57 | else: 58 | timezone = get_localzone() 59 | BaseTrigger.__init__(self, timezone) 60 | self.start_date = convert_to_datetime(start_date, self.timezone, 'start_date') 61 | self.end_date = convert_to_datetime(end_date, self.timezone, 'end_date') 62 | 63 | values = dict((key, value) for (key, value) in six.iteritems(locals()) 64 | if key in self.FIELD_NAMES and value is not None) 65 | self.fields = [] 66 | assign_defaults = False 67 | for field_name in self.FIELD_NAMES: 68 | if field_name in values: 69 | exprs = values.pop(field_name) 70 | is_default = False 71 | assign_defaults = not values 72 | elif assign_defaults: 73 | exprs = DEFAULT_VALUES[field_name] 74 | is_default = True 75 | else: 76 | exprs = '*' 77 | is_default = True 78 | 79 | field_class = self.FIELDS_MAP[field_name] 80 | field = field_class(field_name, exprs, is_default) 81 | self.fields.append(field) 82 | 83 | def _increment_field_value(self, dateval, fieldnum): 84 | """ 85 | Increments the designated field and resets all less significant fields to their minimum values. 86 | 87 | :type dateval: datetime 88 | :type fieldnum: int 89 | :return: a tuple containing the new date, and the number of the field that was actually incremented 90 | :rtype: tuple 91 | """ 92 | 93 | values = {} 94 | i = 0 95 | while i < len(self.fields): 96 | field = self.fields[i] 97 | if not field.REAL: 98 | if i == fieldnum: 99 | fieldnum -= 1 100 | i -= 1 101 | else: 102 | i += 1 103 | continue 104 | 105 | if i < fieldnum: 106 | values[field.name] = field.get_value(dateval) 107 | i += 1 108 | elif i > fieldnum: 109 | values[field.name] = field.get_min(dateval) 110 | i += 1 111 | else: 112 | value = field.get_value(dateval) 113 | maxval = field.get_max(dateval) 114 | if value == maxval: 115 | fieldnum -= 1 116 | i -= 1 117 | else: 118 | values[field.name] = value + 1 119 | i += 1 120 | 121 | difference = datetime(**values) - dateval.replace(tzinfo=None) 122 | return self.timezone.normalize(dateval + difference), fieldnum 123 | 124 | def _set_field_value(self, dateval, fieldnum, new_value): 125 | values = {} 126 | for i, field in enumerate(self.fields): 127 | if field.REAL: 128 | if i < fieldnum: 129 | values[field.name] = field.get_value(dateval) 130 | elif i > fieldnum: 131 | values[field.name] = field.get_min(dateval) 132 | else: 133 | values[field.name] = new_value 134 | 135 | difference = datetime(**values) - dateval.replace(tzinfo=None) 136 | return self.timezone.normalize(dateval + difference) 137 | 138 | def get_next_trigger_time(self, previous_fire_time, now=None): 139 | if not now: 140 | curr_time = datetime.now(self.timezone) 141 | else: 142 | curr_time = now 143 | if previous_fire_time: 144 | start_date = max(curr_time, previous_fire_time + timedelta(microseconds=1)) 145 | else: 146 | start_date = max(curr_time, self.start_date) if self.start_date else curr_time 147 | 148 | fieldnum = 0 149 | next_date = datetime_ceil(start_date).astimezone(self.timezone) 150 | while 0 <= fieldnum < len(self.fields): 151 | field = self.fields[fieldnum] 152 | curr_value = field.get_value(next_date) 153 | next_value = field.get_next_value(next_date) 154 | 155 | if next_value is None: 156 | # No valid value was found 157 | next_date, fieldnum = self._increment_field_value(next_date, fieldnum - 1) 158 | elif next_value > curr_value: 159 | # A valid, but higher than the starting value, was found 160 | if field.REAL: 161 | next_date = self._set_field_value(next_date, fieldnum, next_value) 162 | fieldnum += 1 163 | else: 164 | next_date, fieldnum = self._increment_field_value(next_date, fieldnum) 165 | else: 166 | # A valid value was found, no changes necessary 167 | fieldnum += 1 168 | 169 | # Return if the date has rolled past the end date 170 | if self.end_date and next_date > self.end_date: 171 | return None 172 | 173 | if fieldnum >= 0: 174 | return next_date 175 | 176 | def __str__(self): 177 | options = ["%s='%s'" % (f.name, f) for f in self.fields if not f.is_default] 178 | return 'cron[%s]' % (', '.join(options)) 179 | 180 | def __repr__(self): 181 | options = ["%s='%s'" % (f.name, f) for f in self.fields if not f.is_default] 182 | if self.start_date: 183 | options.append("start_date='%s'" % datetime_repr(self.start_date)) 184 | return '<%s (%s)>' % (self.__class__.__name__, ', '.join(options)) 185 | -------------------------------------------------------------------------------- /elric/trigger/cron/expressions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """this file quote from apscheduler.trigger""" 3 | 4 | from __future__ import (absolute_import, unicode_literals) 5 | 6 | from calendar import monthrange 7 | import re 8 | 9 | from elric.core.utils import asint 10 | 11 | __all__ = ('AllExpression', 'RangeExpression', 'WeekdayRangeExpression', 'WeekdayPositionExpression', 12 | 'LastDayOfMonthExpression') 13 | 14 | 15 | WEEKDAYS = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] 16 | 17 | 18 | class AllExpression(object): 19 | value_re = re.compile(r'\*(?:/(?P\d+))?$') 20 | 21 | def __init__(self, step=None): 22 | self.step = asint(step) 23 | if self.step == 0: 24 | raise ValueError('Increment must be higher than 0') 25 | 26 | def get_next_value(self, date, field): 27 | start = field.get_value(date) 28 | minval = field.get_min(date) 29 | maxval = field.get_max(date) 30 | start = max(start, minval) 31 | 32 | if not self.step: 33 | next = start 34 | else: 35 | distance_to_next = (self.step - (start - minval)) % self.step 36 | next = start + distance_to_next 37 | 38 | if next <= maxval: 39 | return next 40 | 41 | def __str__(self): 42 | if self.step: 43 | return '*/%d' % self.step 44 | return '*' 45 | 46 | def __repr__(self): 47 | return "%s(%s)" % (self.__class__.__name__, self.step) 48 | 49 | 50 | class RangeExpression(AllExpression): 51 | value_re = re.compile( 52 | r'(?P\d+)(?:-(?P\d+))?(?:/(?P\d+))?$') 53 | 54 | def __init__(self, first, last=None, step=None): 55 | AllExpression.__init__(self, step) 56 | first = asint(first) 57 | last = asint(last) 58 | if last is None and step is None: 59 | last = first 60 | if last is not None and first > last: 61 | raise ValueError('The minimum value in a range must not be higher than the maximum') 62 | self.first = first 63 | self.last = last 64 | 65 | def get_next_value(self, date, field): 66 | start = field.get_value(date) 67 | minval = field.get_min(date) 68 | maxval = field.get_max(date) 69 | 70 | # Apply range limits 71 | minval = max(minval, self.first) 72 | if self.last is not None: 73 | maxval = min(maxval, self.last) 74 | start = max(start, minval) 75 | 76 | if not self.step: 77 | next = start 78 | else: 79 | distance_to_next = (self.step - (start - minval)) % self.step 80 | next = start + distance_to_next 81 | 82 | if next <= maxval: 83 | return next 84 | 85 | def __str__(self): 86 | if self.last != self.first and self.last is not None: 87 | range = '%d-%d' % (self.first, self.last) 88 | else: 89 | range = str(self.first) 90 | 91 | if self.step: 92 | return '%s/%d' % (range, self.step) 93 | return range 94 | 95 | def __repr__(self): 96 | args = [str(self.first)] 97 | if self.last != self.first and self.last is not None or self.step: 98 | args.append(str(self.last)) 99 | if self.step: 100 | args.append(str(self.step)) 101 | return "%s(%s)" % (self.__class__.__name__, ', '.join(args)) 102 | 103 | 104 | class WeekdayRangeExpression(RangeExpression): 105 | value_re = re.compile(r'(?P[a-z]+)(?:-(?P[a-z]+))?', re.IGNORECASE) 106 | 107 | def __init__(self, first, last=None): 108 | try: 109 | first_num = WEEKDAYS.index(first.lower()) 110 | except ValueError: 111 | raise ValueError('Invalid weekday name "%s"' % first) 112 | 113 | if last: 114 | try: 115 | last_num = WEEKDAYS.index(last.lower()) 116 | except ValueError: 117 | raise ValueError('Invalid weekday name "%s"' % last) 118 | else: 119 | last_num = None 120 | 121 | RangeExpression.__init__(self, first_num, last_num) 122 | 123 | def __str__(self): 124 | if self.last != self.first and self.last is not None: 125 | return '%s-%s' % (WEEKDAYS[self.first], WEEKDAYS[self.last]) 126 | return WEEKDAYS[self.first] 127 | 128 | def __repr__(self): 129 | args = ["'%s'" % WEEKDAYS[self.first]] 130 | if self.last != self.first and self.last is not None: 131 | args.append("'%s'" % WEEKDAYS[self.last]) 132 | return "%s(%s)" % (self.__class__.__name__, ', '.join(args)) 133 | 134 | 135 | class WeekdayPositionExpression(AllExpression): 136 | options = ['1st', '2nd', '3rd', '4th', '5th', 'last'] 137 | value_re = re.compile(r'(?P%s) +(?P(?:\d+|\w+))' % '|'.join(options), re.IGNORECASE) 138 | 139 | def __init__(self, option_name, weekday_name): 140 | try: 141 | self.option_num = self.options.index(option_name.lower()) 142 | except ValueError: 143 | raise ValueError('Invalid weekday position "%s"' % option_name) 144 | 145 | try: 146 | self.weekday = WEEKDAYS.index(weekday_name.lower()) 147 | except ValueError: 148 | raise ValueError('Invalid weekday name "%s"' % weekday_name) 149 | 150 | def get_next_value(self, date, field): 151 | # Figure out the weekday of the month's first day and the number 152 | # of days in that month 153 | first_day_wday, last_day = monthrange(date.year, date.month) 154 | 155 | # Calculate which day of the month is the first of the target weekdays 156 | first_hit_day = self.weekday - first_day_wday + 1 157 | if first_hit_day <= 0: 158 | first_hit_day += 7 159 | 160 | # Calculate what day of the month the target weekday would be 161 | if self.option_num < 5: 162 | target_day = first_hit_day + self.option_num * 7 163 | else: 164 | target_day = first_hit_day + ((last_day - first_hit_day) / 7) * 7 165 | 166 | if target_day <= last_day and target_day >= date.day: 167 | return target_day 168 | 169 | def __str__(self): 170 | return '%s %s' % (self.options[self.option_num], WEEKDAYS[self.weekday]) 171 | 172 | def __repr__(self): 173 | return "%s('%s', '%s')" % (self.__class__.__name__, self.options[self.option_num], WEEKDAYS[self.weekday]) 174 | 175 | 176 | class LastDayOfMonthExpression(AllExpression): 177 | value_re = re.compile(r'last', re.IGNORECASE) 178 | 179 | def __init__(self): 180 | pass 181 | 182 | def get_next_value(self, date, field): 183 | return monthrange(date.year, date.month)[1] 184 | 185 | def __str__(self): 186 | return 'last' 187 | 188 | def __repr__(self): 189 | return "%s()" % self.__class__.__name__ 190 | -------------------------------------------------------------------------------- /elric/trigger/cron/fields.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """this file quote from apscheduler.trigger""" 3 | 4 | from __future__ import (absolute_import, unicode_literals) 5 | 6 | from calendar import monthrange 7 | 8 | from elric.trigger.cron.expressions import ( 9 | AllExpression, RangeExpression, WeekdayPositionExpression, LastDayOfMonthExpression, WeekdayRangeExpression) 10 | 11 | 12 | __all__ = ('MIN_VALUES', 'MAX_VALUES', 'DEFAULT_VALUES', 'BaseField', 'WeekField', 'DayOfMonthField', 'DayOfWeekField') 13 | 14 | 15 | MIN_VALUES = {'year': 1970, 'month': 1, 'day': 1, 'week': 1, 'day_of_week': 0, 'hour': 0, 'minute': 0, 'second': 0} 16 | MAX_VALUES = {'year': 2 ** 63, 'month': 12, 'day:': 31, 'week': 53, 'day_of_week': 6, 'hour': 23, 'minute': 59, 17 | 'second': 59} 18 | DEFAULT_VALUES = {'year': '*', 'month': 1, 'day': 1, 'week': '*', 'day_of_week': '*', 'hour': 0, 'minute': 0, 19 | 'second': 0} 20 | 21 | 22 | class BaseField(object): 23 | REAL = True 24 | COMPILERS = [AllExpression, RangeExpression] 25 | 26 | def __init__(self, name, exprs, is_default=False): 27 | self.name = name 28 | self.is_default = is_default 29 | self.compile_expressions(exprs) 30 | 31 | def get_min(self, dateval): 32 | return MIN_VALUES[self.name] 33 | 34 | def get_max(self, dateval): 35 | return MAX_VALUES[self.name] 36 | 37 | def get_value(self, dateval): 38 | return getattr(dateval, self.name) 39 | 40 | def get_next_value(self, dateval): 41 | smallest = None 42 | for expr in self.expressions: 43 | value = expr.get_next_value(dateval, self) 44 | if smallest is None or (value is not None and value < smallest): 45 | smallest = value 46 | 47 | return smallest 48 | 49 | def compile_expressions(self, exprs): 50 | self.expressions = [] 51 | 52 | # Split a comma-separated expression list, if any 53 | exprs = str(exprs).strip() 54 | if ',' in exprs: 55 | for expr in exprs.split(','): 56 | self.compile_expression(expr) 57 | else: 58 | self.compile_expression(exprs) 59 | 60 | def compile_expression(self, expr): 61 | for compiler in self.COMPILERS: 62 | match = compiler.value_re.match(expr) 63 | if match: 64 | compiled_expr = compiler(**match.groupdict()) 65 | self.expressions.append(compiled_expr) 66 | return 67 | 68 | raise ValueError('Unrecognized expression "%s" for field "%s"' % (expr, self.name)) 69 | 70 | def __str__(self): 71 | expr_strings = (str(e) for e in self.expressions) 72 | return ','.join(expr_strings) 73 | 74 | def __repr__(self): 75 | return "%s('%s', '%s')" % (self.__class__.__name__, self.name, self) 76 | 77 | 78 | class WeekField(BaseField): 79 | REAL = False 80 | 81 | def get_value(self, dateval): 82 | return dateval.isocalendar()[1] 83 | 84 | 85 | class DayOfMonthField(BaseField): 86 | COMPILERS = BaseField.COMPILERS + [WeekdayPositionExpression, LastDayOfMonthExpression] 87 | 88 | def get_max(self, dateval): 89 | return monthrange(dateval.year, dateval.month)[1] 90 | 91 | 92 | class DayOfWeekField(BaseField): 93 | REAL = False 94 | COMPILERS = BaseField.COMPILERS + [WeekdayRangeExpression] 95 | 96 | def get_value(self, dateval): 97 | return dateval.weekday() 98 | -------------------------------------------------------------------------------- /elric/trigger/date.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | this file quote from apscheduler.trigger 4 | """ 5 | from __future__ import (absolute_import, unicode_literals) 6 | from datetime import datetime 7 | from tzlocal import get_localzone 8 | 9 | from elric.trigger.base import BaseTrigger 10 | from elric.core.utils import convert_to_datetime, datetime_repr, astimezone 11 | 12 | 13 | class DateTrigger(BaseTrigger): 14 | 15 | def __init__(self, run_date=None, timezone=None): 16 | timezone = astimezone(timezone) or get_localzone() 17 | BaseTrigger.__init__(self, timezone) 18 | self.run_date = convert_to_datetime(run_date or datetime.now(), timezone, 'run_date') 19 | 20 | def get_next_trigger_time(self, previous_fire_time, now=None): 21 | return self.run_date if previous_fire_time is None else None 22 | 23 | def __str__(self): 24 | return 'date[%s]' % datetime_repr(self.run_date) 25 | 26 | def __repr__(self): 27 | return "<%s (run_date='%s')>" % (self.__class__.__name__, datetime_repr(self.run_date)) 28 | -------------------------------------------------------------------------------- /elric/trigger/interval.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | this file quote from apscheduler.trigger 4 | """ 5 | from __future__ import (absolute_import, unicode_literals) 6 | 7 | from datetime import timedelta, datetime 8 | from tzlocal import get_localzone 9 | from math import ceil 10 | 11 | from elric.core.utils import timedelta_seconds, astimezone, convert_to_datetime 12 | from elric.trigger.base import BaseTrigger 13 | 14 | 15 | class IntervalTrigger(BaseTrigger): 16 | 17 | def __init__(self, weeks=0, days=0, hours=0, minutes=0, seconds=0, start_date=None, end_date=None, timezone=None): 18 | if timezone: 19 | timezone = astimezone(timezone) 20 | elif start_date and start_date.tzinfo: 21 | timezone = start_date.tzinfo 22 | elif end_date and end_date.tzinfo: 23 | timezone = end_date.tzinfo 24 | else: 25 | timezone = get_localzone() 26 | BaseTrigger.__init__(self, timezone) 27 | self.interval = timedelta(weeks=weeks, days=days, hours=hours, minutes=minutes, seconds=seconds) 28 | self.interval_length = timedelta_seconds(self.interval) 29 | if self.interval_length == 0: 30 | self.interval = timedelta(seconds=1) 31 | self.interval_length = 1 32 | start_date = start_date or (datetime.now(self.timezone) + self.interval) 33 | self.start_date = convert_to_datetime(start_date, self.timezone, 'start_date') 34 | self.end_date = convert_to_datetime(end_date, self.timezone, 'end_date') 35 | 36 | def get_next_trigger_time(self, previous_trigger_time, now=None): 37 | if not now: 38 | curr_time = datetime.now(self.timezone) 39 | else: 40 | curr_time = now 41 | if previous_trigger_time: 42 | next_trigger_time = previous_trigger_time + self.interval 43 | elif self.start_date > curr_time: 44 | next_trigger_time = self.start_date 45 | else: 46 | timediff_seconds = timedelta_seconds(curr_time - self.start_date) 47 | next_interval_num = int(ceil(timediff_seconds / self.interval_length)) 48 | next_trigger_time = self.start_date + self.interval * next_interval_num 49 | 50 | if not self.end_date or next_trigger_time <= self.end_date: 51 | return self.timezone.normalize(next_trigger_time) -------------------------------------------------------------------------------- /elric/trigger/tool.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | from elric.trigger.date import DateTrigger 5 | from elric.trigger.interval import IntervalTrigger 6 | from elric.trigger.cron import CronTrigger 7 | 8 | 9 | def create_trigger(trigger_name, trigger_args): 10 | trigger_class = { 11 | 'interval': IntervalTrigger, 12 | 'date': DateTrigger, 13 | 'cron': CronTrigger 14 | }[trigger_name] 15 | 16 | return trigger_class.create_trigger(**trigger_args) -------------------------------------------------------------------------------- /elric/worker/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /elric/worker/base.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | from abc import ABCMeta, abstractmethod 5 | import logging 6 | 7 | from elric.core.log import init_logging_config 8 | 9 | 10 | class BaseWorker(object): 11 | __metaclass__ = ABCMeta 12 | name = None 13 | 14 | def __init__(self, name, logger_name): 15 | init_logging_config() 16 | self.log = logging.getLogger(logger_name) 17 | self.name = name 18 | if not getattr(self, 'name', None): 19 | raise ValueError("%s must have a name" % type(self).__name__) 20 | 21 | @abstractmethod 22 | def start(self): 23 | raise NotImplementedError('subclasses of BaseWorker must provide a start() method') 24 | 25 | @abstractmethod 26 | def stop(self): 27 | raise NotImplementedError('subclasses of BaseWorker must provide a stop() method') 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /elric/worker/rqueue.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | import time 5 | from multiprocessing import Queue 6 | from tzlocal import get_localzone 7 | 8 | from elric.jobqueue.rqueue import RedisJobQueue 9 | from elric.executor.pool import ProcessPoolExecutor 10 | from elric.trigger.tool import create_trigger 11 | from elric.core import settings 12 | from elric.core.job import Job 13 | from elric.core.exceptions import AlreadyRunningException 14 | from elric.worker.base import BaseWorker 15 | 16 | 17 | class RQWorker(BaseWorker): 18 | def __init__(self, name, listen_keys=None, worker_num=2, timezone=None, logger_name='elric.worker'): 19 | BaseWorker.__init__(self, name, logger_name) 20 | self.jobqueue = RedisJobQueue(self, **settings.JOB_QUEUE_CONFIG) 21 | self.listen_keys = [] 22 | if listen_keys: 23 | self.listen_keys = ['%s:%s' % (self.name, listen_key) for listen_key in listen_keys] 24 | self.timezone = timezone or get_localzone() 25 | self.internal_job_queue = Queue(maxsize=worker_num) 26 | self.executor = ProcessPoolExecutor(worker_num, self) 27 | self._stopped = True 28 | 29 | def start(self): 30 | if self.running: 31 | raise AlreadyRunningException 32 | 33 | self._stopped = False 34 | self.log.debug('elric worker running..') 35 | while self.running: 36 | try: 37 | # grab job from job queue only if internal_job_queue has space 38 | self.internal_job_queue.put("#", True) 39 | job_key, serialized_job = self.jobqueue.dequeue_any(self.listen_keys) 40 | job = Job.deserialize(serialized_job) 41 | self.log.debug('get job id=[%s] func=[%s]from key %s' % (job.id, job.func, job.job_key)) 42 | self.executor.execute_job(job) 43 | except TypeError as e: 44 | self.log.error(e) 45 | time.sleep(60) 46 | continue 47 | 48 | def submit_job(self, func, job_key, args=None, kwargs=None, trigger=None, job_id=None, 49 | replace_exist=False, need_filter=False, **trigger_args): 50 | """ 51 | submit job to master through redis queue 52 | :type func: str or callable obj or unicode 53 | :type job_key: str or unicode 54 | :type args: tuple or list 55 | :type kwargs: dict 56 | :type trigger: str or unicode 57 | :type job_id: str or unicode 58 | :type replace_exist: bool 59 | :type trigger_args: dict 60 | """ 61 | # use worker's timezone if trigger don't provide specific `timezone` configuration 62 | trigger_args['timezone'] = self.timezone 63 | job_in_dict = { 64 | 'id': "%s:%s" % (self.name, job_id) if job_id else None, 65 | 'func': func, 66 | 'args': args, 67 | 'trigger': create_trigger(trigger, trigger_args) if trigger else None, 68 | 'kwargs': kwargs, 69 | 'job_key': '%s:%s' % (self.name, job_key), 70 | 'need_filter': need_filter, 71 | 'replace_exist': replace_exist 72 | } 73 | job = Job(**job_in_dict) 74 | job.check() 75 | self.jobqueue.enqueue('__elric_submit_channel__', job.serialize()) 76 | 77 | def remove_job(self, job_id): 78 | """ 79 | send remove job request to master through redis queue 80 | :type job_id: str 81 | """ 82 | job_in_dict = { 83 | 'id': "%s:%s" % (self.name, job_id) 84 | } 85 | job = Job(**job_in_dict) 86 | self.jobqueue.enqueue('__elric_remove_channel__', job.serialize()) 87 | 88 | def finish_job(self, job_id, is_success, details, job_key, need_filter): 89 | job_in_dict = { 90 | 'id': job_id, 91 | 'job_key': job_key, 92 | 'is_success': is_success, 93 | 'details': details, 94 | 'need_filter': need_filter 95 | } 96 | job = Job(**job_in_dict) 97 | self.jobqueue.enqueue('__elric_finish_channel__', job.serialize()) 98 | 99 | @property 100 | def running(self): 101 | return not self._stopped 102 | 103 | def stop(self): 104 | self.log.debug('Worker is quiting') 105 | self._stopped = True 106 | self.executor.shutdown() 107 | -------------------------------------------------------------------------------- /example/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | -------------------------------------------------------------------------------- /example/test_master.py: -------------------------------------------------------------------------------- 1 | #-*- coding: utf-8 -*- 2 | 3 | import os 4 | os.environ.setdefault('ELRIC_SETTINGS_MODULE', 'settings') 5 | 6 | from elric.master.rqextend import RQMasterExtend 7 | 8 | 9 | if __name__ == '__main__': 10 | rq_master = RQMasterExtend() 11 | rq_master.start() 12 | -------------------------------------------------------------------------------- /example/test_worker.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | import os 5 | os.environ.setdefault('ELRIC_SETTINGS_MODULE', 'settings') 6 | 7 | from elric.worker.rqueue import RQWorker 8 | 9 | 10 | def wapper_job(): 11 | print 'run first job' 12 | rq_worker.submit_job(nest_job, 'job1', args=['hi i am nested job']) 13 | 14 | 15 | def nest_job(welcome): 16 | print welcome 17 | 18 | 19 | def test_job(language=None): 20 | print 'my favorite language is {language}'.format(language=language) 21 | 22 | 23 | def test_date_job(): 24 | print 'hello i am date job' 25 | 26 | 27 | def test_cron_job(): 28 | print 'hello i am crontab job' 29 | 30 | 31 | if __name__ == '__main__': 32 | rq_worker = RQWorker(name='test', listen_keys=['job1', 'job2']) 33 | rq_worker.submit_job(test_date_job, 'job1', trigger='date', run_date='2015-05-17 21:13:30') 34 | rq_worker.submit_job(wapper_job, 'job1', trigger='interval', seconds=30) 35 | rq_worker.submit_job(test_job, 'job2', trigger='interval', seconds=8, kwargs={'language': 'python'}) 36 | rq_worker.submit_job(test_cron_job, 'job2', trigger='cron', second=7) 37 | rq_worker.submit_job(test_job, 'job2', need_filter=True, job_id='test_job') 38 | rq_worker.start() -------------------------------------------------------------------------------- /example/test_worker2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | import sys 5 | sys.path.append('..') 6 | from worker.rqueue import RQWorker 7 | 8 | 9 | def test_job(): 10 | print 'hello this is worker 2' 11 | 12 | if __name__ == '__main__': 13 | rq_worker = RQWorker(name='test2', listen_keys=['job1',]) 14 | rq_worker.submit_job(test_job, 'job1', trigger='interval', minutes=1) 15 | rq_worker.start() 16 | -------------------------------------------------------------------------------- /settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (absolute_import, unicode_literals) 3 | 4 | 5 | DISTRIBUTED_LOCK_CONFIG = { 6 | 'server': { 7 | 'host': 'localhost', 8 | 'port': 6379, 9 | 'password': None, 10 | 'db': 1, 11 | }, 12 | 'resource': 'elric_distributed_lock', 13 | 'retry_count': 5, 14 | 'retry_delay': 0.2, 15 | } 16 | 17 | JOB_QUEUE_CONFIG = { 18 | 'server': { 19 | 'host': 'localhost', 20 | 'port': 6379, 21 | 'password': None, 22 | 'db': 1, 23 | }, 24 | 'max_length': 100000, 25 | 'buffer_time': 10 26 | } 27 | 28 | FILTER_CONFIG = { 29 | 'server': { 30 | 'host': 'localhost', 31 | 'port': 6379, 32 | 'password': None, 33 | 'db': 0, 34 | } 35 | } 36 | 37 | JOB_STORE_CONFIG = { 38 | 'server': {}, 39 | 'maximum_records': 3 40 | } 41 | 42 | LOGGING_CONFIG = { 43 | 'version': 1, 44 | 'disable_existing_loggers': False, 45 | 'formatters': { 46 | 'standard': { 47 | 'format': '[%(asctime)s]-[%(levelname)s]-[%(filename)s %(funcName)s():line %(lineno)s]-[%(message)s]', 48 | } 49 | }, 50 | 'handlers': { 51 | 'console': { 52 | 'level': 'DEBUG', 53 | 'class': 'logging.StreamHandler', 54 | 'formatter': 'standard', 55 | }, 56 | 'worker': { 57 | 'level': 'DEBUG', 58 | 'class': 'logging.handlers.RotatingFileHandler', 59 | 'formatter': 'standard', 60 | 'filename': 'worker.log', 61 | 'maxBytes': 1024*1024*1024, 62 | 'backupCount': 5, 63 | }, 64 | 'master': { 65 | 'level': 'DEBUG', 66 | 'class': 'logging.handlers.RotatingFileHandler', 67 | 'formatter': 'standard', 68 | 'filename': 'master.log', 69 | 'maxBytes': 1024*1024*1024, 70 | 'backupCount': 5, 71 | } 72 | }, 73 | 'loggers': { 74 | 'elric.master': { 75 | 'handlers': ['console', "master"], 76 | 'level': 'DEBUG', 77 | }, 78 | 'elric.worker': { 79 | 'handlers': ['console', "worker"], 80 | 'level': 'DEBUG', 81 | }, 82 | } 83 | } 84 | 85 | 86 | --------------------------------------------------------------------------------