├── .pre-commit-config.yaml ├── README.md ├── django_stator ├── __init__.py ├── exceptions.py ├── graph.py ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ └── runstator.py ├── models.py ├── runner.py └── timer.py ├── pyproject.toml └── tests ├── test_model.py ├── test_runner.py └── testapp ├── __init__.py ├── models.py └── settings.py /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.5.0 4 | hooks: 5 | - id: check-case-conflict 6 | - id: check-merge-conflict 7 | - id: check-yaml 8 | - id: end-of-file-fixer 9 | - id: file-contents-sorter 10 | args: ["--ignore-case", "--unique"] 11 | files: ^(\.gitignore|\.dockerignore|requirements[-\w]*.txt)$ 12 | - id: mixed-line-ending 13 | args: ["--fix=lf"] 14 | - id: pretty-format-json 15 | args: ["--autofix"] 16 | - id: trailing-whitespace 17 | 18 | - repo: https://github.com/asottile/pyupgrade 19 | rev: "v3.15.0" 20 | hooks: 21 | - id: pyupgrade 22 | args: [--py311-plus] 23 | 24 | - repo: https://github.com/psf/black-pre-commit-mirror 25 | rev: 23.11.0 26 | hooks: 27 | - id: black 28 | 29 | - repo: https://github.com/pycqa/isort 30 | rev: 5.12.0 31 | hooks: 32 | - id: isort 33 | args: ["--profile=black"] 34 | 35 | - repo: https://github.com/pycqa/flake8 36 | rev: 6.1.0 37 | hooks: 38 | - id: flake8 39 | args: ["--max-line-length=119"] 40 | 41 | - repo: https://github.com/pre-commit/mirrors-mypy 42 | rev: v1.6.1 43 | hooks: 44 | - id: mypy 45 | exclude: "^tests/" 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Django Stator 2 | 3 | *(this is an in-progress rewrite out of Takahē's stator; do not use yet)* 4 | 5 | ## Mechanics 6 | 7 | Any model that is tracked by Stator must have three columns (automatically 8 | defined when you inherit from `StatorModel`): 9 | 10 | * A `state` column, a string, representing the state it is currently in 11 | * A `state_changed` column, a datetime, representing when that state was entered 12 | * A `state_next` column, a datetime, representing when it should next be checked 13 | 14 | It may also define: 15 | 16 | * A `state_history` column, a nullable JSON list, which will have every state 17 | change appended to it as a `[state, timestamp]` pair. 18 | 19 | It must also have a defined State Graph, which outlines the valid values of 20 | `state` and how to transition between them. Each state must either have: 21 | 22 | * A *transition function*, which is run when the model instance is in that 23 | state to see if it should move to a new state. These are expected to be on 24 | the state graph class itself, and be called `check_statename`. 25 | 26 | * `externally_progressed` set, marking it as not moving out of that state due 27 | to Stator; some other process will move it out if required. 28 | 29 | It can also optionally have: 30 | 31 | * `start_after`, the number of seconds to wait after entering the state before 32 | trying the *transition function* for it. 33 | 34 | * `retry_after`, the number of seconds to wait between tries of the state 35 | transition function. 36 | 37 | * `delete_after`, the number of seconds to wait before deleting an instance 38 | in this state. 39 | 40 | When a Stator runner needs to find instances of the model it should run, it: 41 | 42 | * Finds a suitable batch of instances that have `state_next <= now`, and in one 43 | `UPDATE RETURNING` statement, updates `state_next` to be two minutes in 44 | the future (or whatever double the *task deadline duration* is) 45 | 46 | * Hands these instances to its worker threads, each of which runs one 47 | instance's transition function at a time to see if it should transition to a 48 | new state. 49 | 50 | * If the function does trigger a transition, it updates `state` to the new 51 | state, `state_changed` to the current time, and `state_next` to be `start_after` 52 | seconds in the future, as defined on the state. 53 | 54 | * If the function does not trigger a transition, it updates `state_next` to be 55 | `retry_after` seconds in the future. 56 | 57 | * If the function takes longer than the *task deadline duration* to finish, it 58 | is killed and `state_next` is updated to be `retry_after` seconds in the 59 | future. 60 | 61 | * An entry is added to the `StatorLog` model every so often, summarising what 62 | -------------------------------------------------------------------------------- /django_stator/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1" 2 | -------------------------------------------------------------------------------- /django_stator/exceptions.py: -------------------------------------------------------------------------------- 1 | class TryAgainLater(BaseException): 2 | """ 3 | Special exception that Stator will catch without error, 4 | leaving a state to have another attempt soon. 5 | 6 | Equivalent to the state transition check function returning None; this 7 | just allows it to be more easily done from inner calls. 8 | """ 9 | 10 | 11 | class TimeoutError(BaseException): 12 | """ 13 | Raised in threads to kill them when they time out 14 | """ 15 | 16 | pass 17 | -------------------------------------------------------------------------------- /django_stator/graph.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from collections.abc import Callable 3 | from typing import Any, ClassVar 4 | 5 | 6 | class StateGraph: 7 | """ 8 | Represents a graph of possible states and transitions to attempt on them. 9 | Does not support subclasses of existing graphs yet. 10 | """ 11 | 12 | states: ClassVar[dict[str, "State"]] 13 | choices: ClassVar[list[tuple[object, str]]] 14 | initial_state: ClassVar["State"] 15 | terminal_states: ClassVar[set["State"]] 16 | automatic_states: ClassVar[set["State"]] 17 | deletion_states: ClassVar[set["State"]] 18 | timeout_states: ClassVar[set["State"]] 19 | 20 | def __init_subclass__(cls) -> None: 21 | # Collect state members 22 | cls.states = {} 23 | for name, value in cls.__dict__.items(): 24 | if name in ["__module__", "__doc__", "states"]: 25 | pass 26 | elif name in ["initial_state", "terminal_states", "choices"]: 27 | raise ValueError(f"Cannot name a state {name} - this is reserved") 28 | elif isinstance(value, State): 29 | value._add_to_graph(cls, name) 30 | elif callable(value) or isinstance(value, classmethod): 31 | pass 32 | else: 33 | raise ValueError( 34 | f"Graph has item {name} of unallowed type {type(value)}" 35 | ) 36 | # Check the graph layout 37 | initial_state = None 38 | terminal_states = set() 39 | automatic_states = set() 40 | deletion_states = set() 41 | timeout_states = set() 42 | for state in cls.states.values(): 43 | # Check for multiple initial states 44 | if state.initial: 45 | if initial_state: 46 | raise ValueError( 47 | f"The graph has more than one initial state: {initial_state} and {state}" 48 | ) 49 | initial_state = state 50 | # Collect states that require deletion or timeout handling (they can be terminal or not) 51 | if state.delete_after: 52 | deletion_states.add(state) 53 | if state.timeout_after: 54 | timeout_states.add(state) 55 | # Collect terminal states 56 | if state.terminal: 57 | state.externally_progressed = True 58 | terminal_states.add(state) 59 | # Ensure they do NOT have a handler 60 | try: 61 | state.handler 62 | except AttributeError: 63 | pass 64 | else: 65 | raise ValueError( 66 | f"Terminal state {state} should not have a check method ({state.transition_function})" 67 | ) 68 | else: 69 | # Ensure non-terminal/manual states have a try interval and a handler 70 | if not state.externally_progressed: 71 | if not state.retry_after: 72 | raise ValueError( 73 | f"State {state} has no retry_after and is not terminal or manual" 74 | ) 75 | try: 76 | state.handler 77 | except AttributeError: 78 | raise ValueError( 79 | f"State {state} does not have a check method ({state.transition_function})" 80 | ) 81 | if not inspect.ismethod(state.handler) and not hasattr( 82 | state.handler, "__self__" 83 | ): 84 | raise ValueError( 85 | f"State {state}'s check method ({state.transition_function}) is not a classmethod" 86 | ) 87 | automatic_states.add(state) 88 | if initial_state is None: 89 | raise ValueError("The graph has no initial state") 90 | cls.initial_state = initial_state 91 | cls.terminal_states = terminal_states 92 | cls.automatic_states = automatic_states 93 | cls.deletion_states = deletion_states 94 | # Generate choices 95 | cls.choices = [(name, name) for name in cls.states.keys()] 96 | 97 | 98 | class State: 99 | """ 100 | Represents an individual state 101 | """ 102 | 103 | def __init__( 104 | self, 105 | transition_function: str | None = None, 106 | externally_progressed: bool = False, 107 | start_after: float = 0, 108 | retry_after: float | None = None, 109 | delete_after: float | None = None, 110 | force_initial: bool = False, 111 | ): 112 | self.transition_function = transition_function 113 | self.externally_progressed = externally_progressed 114 | self.start_after = start_after 115 | self.retry_after = retry_after 116 | self.delete_after = delete_after 117 | self.force_initial = force_initial 118 | if self.start_after < 0: 119 | raise ValueError("start_after cannot be negative") 120 | if self.retry_after is not None and self.retry_after < 0: 121 | raise ValueError("retry_after cannot be negative") 122 | if self.delete_after is not None and self.delete_after < 0: 123 | raise ValueError("delete_after cannot be negative") 124 | # Add some initial values before the graph is built 125 | self._in_graph = False 126 | self.parents: set["State"] = set() 127 | self.children: set["State"] = set() 128 | self.timeout_state: State | None = None 129 | self.timeout_after: int | None = None 130 | 131 | def _add_to_graph(self, graph: type[StateGraph], name: str): 132 | """ 133 | Called by the StateGraph as it constructs itself 134 | """ 135 | self._in_graph = True 136 | self.graph = graph 137 | self.name = name 138 | self.graph.states[name] = self 139 | if self.transition_function is None: 140 | self.transition_function = f"check_{self.name}" 141 | 142 | def __repr__(self): 143 | return f"" 144 | 145 | def __str__(self): 146 | return self.name 147 | 148 | def __eq__(self, other): 149 | if isinstance(other, State): 150 | return self is other 151 | return self.name == other 152 | 153 | def __hash__(self): 154 | return hash(id(self)) 155 | 156 | def transitions_to(self, other: "State"): 157 | self.children.add(other) 158 | other.parents.add(other) 159 | 160 | def timeout_to(self, other: "State", seconds: int): 161 | if self.timeout_state is not None: 162 | raise ValueError("Timeout state already set!") 163 | self.timeout_state = other 164 | self.timeout_after = seconds 165 | self.children.add(other) 166 | other.parents.add(other) 167 | 168 | @property 169 | def initial(self): 170 | return self.force_initial or (not self.parents) 171 | 172 | @property 173 | def terminal(self): 174 | return not self.children 175 | 176 | @property 177 | def handler(self) -> Callable[[Any], str | None]: 178 | # Retrieve it by name off the graph 179 | if self.transition_function is None: 180 | raise AttributeError("No handler defined") 181 | return getattr(self.graph, self.transition_function) 182 | -------------------------------------------------------------------------------- /django_stator/management/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewgodwin/django-stator/ecb1a7484eedf281e37777a9ccc43140d954f2bc/django_stator/management/__init__.py -------------------------------------------------------------------------------- /django_stator/management/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewgodwin/django-stator/ecb1a7484eedf281e37777a9ccc43140d954f2bc/django_stator/management/commands/__init__.py -------------------------------------------------------------------------------- /django_stator/management/commands/runstator.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import cast 3 | 4 | from django.apps import apps 5 | from django.conf import settings 6 | from django.core.management.base import BaseCommand 7 | 8 | from django_stator.models import StatorModel 9 | from django_stator.runner import StatorRunner 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class Command(BaseCommand): 15 | help = "Runs a Stator runner" 16 | 17 | def add_arguments(self, parser): 18 | parser.add_argument( 19 | "--concurrency", 20 | "-c", 21 | type=int, 22 | default=None, 23 | help="How many threads to provision", 24 | ) 25 | parser.add_argument( 26 | "--liveness-file", 27 | type=str, 28 | default=None, 29 | help="A file to touch at least every 30 seconds to say the runner is alive", 30 | ) 31 | parser.add_argument( 32 | "--run-for", 33 | "-r", 34 | type=int, 35 | default=0, 36 | help="How long to run for before exiting (defaults to infinite)", 37 | ) 38 | parser.add_argument( 39 | "--exclude", 40 | "-x", 41 | type=str, 42 | action="append", 43 | help="Model labels that should not be processed", 44 | ) 45 | parser.add_argument("model_labels", nargs="*", type=str) 46 | 47 | def handle( 48 | self, 49 | model_labels: list[str], 50 | exclude: list[str], 51 | run_for: int, 52 | liveness_file: str | None = None, 53 | concurrency: int = getattr(settings, "STATOR_CONCURRENCY", 10), 54 | *args, 55 | **options, 56 | ): 57 | # Cache system config 58 | logging.basicConfig( 59 | format="[%(asctime)s] %(levelname)8s - %(message)s", 60 | datefmt="%Y-%m-%d %H:%M:%S", 61 | level=logging.INFO, 62 | force=True, 63 | ) 64 | # Resolve the models list into names 65 | models = cast( 66 | list[type[StatorModel]], 67 | [apps.get_model(label) for label in model_labels], 68 | ) 69 | excluded = cast( 70 | list[type[StatorModel]], 71 | [apps.get_model(label) for label in (exclude or [])], 72 | ) 73 | if not models: 74 | models = StatorModel.subclasses 75 | models = [model for model in models if model not in excluded] 76 | logger.info( 77 | "Running for models: " + " ".join(m._meta.label_lower for m in models) 78 | ) 79 | # Run a runner 80 | runner = StatorRunner( 81 | models, 82 | concurrency=concurrency, 83 | liveness_file=liveness_file, 84 | ) 85 | try: 86 | runner.run(run_for=run_for) 87 | except KeyboardInterrupt: 88 | logger.critical("Ctrl-C received") 89 | -------------------------------------------------------------------------------- /django_stator/models.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | from typing import ClassVar, Self 4 | 5 | from asgiref.sync import async_to_sync, iscoroutinefunction 6 | from django.db import models, transaction 7 | from django.utils import timezone 8 | from django.utils.functional import classproperty 9 | 10 | from django_stator.exceptions import TimeoutError, TryAgainLater 11 | from django_stator.graph import State, StateGraph 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | class StateField(models.CharField): 17 | """ 18 | A special field that automatically gets choices from a state graph 19 | """ 20 | 21 | def __init__(self, graph: type[StateGraph], **kwargs): 22 | # Sensible default for state length 23 | kwargs.setdefault("max_length", 100) 24 | # Add choices and initial 25 | self.graph = graph 26 | kwargs["choices"] = self.graph.choices 27 | kwargs["default"] = self.graph.initial_state.name 28 | super().__init__(**kwargs) 29 | 30 | def deconstruct(self): 31 | name, path, args, kwargs = super().deconstruct() 32 | kwargs["graph"] = self.graph 33 | return name, path, args, kwargs 34 | 35 | def get_prep_value(self, value): 36 | if isinstance(value, State): 37 | return value.name 38 | return value 39 | 40 | 41 | class StatorModel(models.Model): 42 | """ 43 | A model base class that has a state machine backing it, with tasks to work 44 | out when to move the state to the next one. 45 | 46 | You need to provide a "state" field as an instance of StateField on the 47 | concrete model yourself. 48 | """ 49 | 50 | STATOR_BATCH_SIZE = 500 51 | 52 | state: StateField 53 | 54 | # When the state last actually changed, or the date of instance creation 55 | state_changed = models.DateTimeField(auto_now_add=True, db_index=True) 56 | 57 | # When the next state change should be attempted 58 | # TODO: Ensure start_after works with fresh models 59 | state_next = models.DateTimeField( 60 | null=True, blank=True, auto_now_add=True, db_index=True 61 | ) 62 | 63 | # Collection of subclasses of us 64 | subclasses: ClassVar[list[type["StatorModel"]]] = [] 65 | 66 | class Meta: 67 | abstract = True 68 | 69 | def __init_subclass__(cls) -> None: 70 | if cls is not StatorModel: 71 | cls.subclasses.append(cls) 72 | 73 | @classproperty 74 | def state_graph(cls) -> type[StateGraph]: 75 | return cls._meta.get_field("state").graph 76 | 77 | @property 78 | def state_age(self) -> float: 79 | return (timezone.now() - self.state_changed).total_seconds() 80 | 81 | @classmethod 82 | def state_get_ready(cls, number: int, lock_period: int) -> list[Self]: 83 | """ 84 | Finds up to `number` instances that are ready to be looked at, bumps 85 | their state_next by lock_period, and returns them. 86 | """ 87 | with transaction.atomic(): 88 | # Query for `number` rows that have a state_next that's in the past. 89 | # Rows that are for states that are not automatic SHOULD have a NULL 90 | # state_next date, but we can handle a few if they slip through. 91 | # Also sort by state_next for some semblance of FIFO ordering. 92 | selected = list( 93 | cls.objects.filter(state_next__lte=timezone.now()) 94 | .order_by("state_next")[:number] 95 | .select_for_update(skip_locked=True, no_key=True) 96 | ) 97 | cls.objects.filter(pk__in=[i.pk for i in selected]).update( 98 | state_next=timezone.now() + datetime.timedelta(seconds=lock_period * 2) 99 | ) 100 | return selected 101 | 102 | @classmethod 103 | def state_do_deletes(cls) -> int: 104 | """ 105 | Finds instances of this model that need to be deleted and deletes them 106 | in small batches. Returns how many were deleted. 107 | """ 108 | deleted = 0 109 | for state in cls.state_graph.deletion_states: 110 | to_delete = cls.objects.filter( 111 | state_changed__lte=timezone.now() 112 | - datetime.timedelta(seconds=state.delete_after), 113 | )[: cls.STATOR_BATCH_SIZE] 114 | deleted += cls.objects.filter(pk__in=to_delete).delete()[0] 115 | return deleted 116 | 117 | @classmethod 118 | def state_count_pending(cls) -> int: 119 | """ 120 | Returns how many instances are "pending", i.e. need a transition 121 | checked. 122 | """ 123 | return cls.objects.filter(state_next__lte=timezone.now()).count() 124 | 125 | def state_transition_check(self) -> State | None: 126 | """ 127 | Attempts to transition the current state by running its handler(s). 128 | Returns the new state it moved to, or None if no transition occurred. 129 | """ 130 | current_state: State = self.state_graph.states[self.state] 131 | 132 | # If it's a manual progression state don't even try 133 | # We shouldn't really be here, but it could be a race condition 134 | if current_state.externally_progressed: 135 | logger.warning( 136 | f"Warning: trying to progress externally progressed state {self.state}!" 137 | ) 138 | self.state_next = None 139 | self.save(update_fields=["state_next"]) 140 | return None 141 | 142 | # Try running its handler function 143 | try: 144 | if iscoroutinefunction(current_state.handler): 145 | next_state = async_to_sync(current_state.handler)(self) 146 | else: 147 | next_state = current_state.handler(self) 148 | except (TryAgainLater, TimeoutError): 149 | pass 150 | except BaseException as e: 151 | logger.exception(e) 152 | else: 153 | if next_state: 154 | # Ensure it's a State object 155 | if isinstance(next_state, str): 156 | next_state = self.state_graph.states[next_state] 157 | # Ensure it's a child 158 | if next_state not in current_state.children: 159 | raise ValueError( 160 | f"Cannot transition from {current_state} to {next_state} - not a declared transition" 161 | ) 162 | self.state_transition(next_state) 163 | return next_state 164 | 165 | # See if it timed out since its last state change 166 | if ( 167 | current_state.timeout_state 168 | and current_state.timeout_after 169 | and current_state.timeout_after <= self.state_age 170 | ): 171 | self.state_transition(current_state.timeout_state) 172 | return current_state.timeout_state 173 | 174 | # Nothing happened, bump state_next to match retry_after 175 | if current_state.retry_after is None: 176 | raise ValueError(f"Invalid retry_after on state {current_state}!") 177 | self.state_next = timezone.now() + datetime.timedelta(current_state.retry_after) 178 | self.save(update_fields=["state_next"]) 179 | return None 180 | 181 | def state_transition(self, state: State | str): 182 | """ 183 | Transitions the instance to the given state name, forcibly. 184 | """ 185 | self.state_transition_queryset( 186 | self.__class__.objects.filter(pk=self.pk), 187 | state, 188 | ) 189 | self.refresh_from_db() 190 | 191 | @classmethod 192 | def state_transition_queryset( 193 | cls, 194 | queryset: models.QuerySet, 195 | state: State | str, 196 | ): 197 | """ 198 | Transitions every instance in the queryset to the given state, forcibly. 199 | """ 200 | # Really ensure we have the right state object 201 | if isinstance(state, State): 202 | state_obj = cls.state_graph.states[state.name] 203 | else: 204 | state_obj = cls.state_graph.states[state] 205 | assert isinstance(state, State) 206 | # Update the state and its next transition attempt 207 | if state.externally_progressed: 208 | queryset.update( 209 | state=state_obj, 210 | state_changed=timezone.now(), 211 | state_next=None, 212 | ) 213 | else: 214 | queryset.update( 215 | state=state_obj, 216 | state_changed=timezone.now(), 217 | state_next=timezone.now() 218 | + datetime.timedelta(seconds=state.start_after), 219 | ) 220 | -------------------------------------------------------------------------------- /django_stator/runner.py: -------------------------------------------------------------------------------- 1 | import ctypes 2 | import logging 3 | import os 4 | import signal 5 | import threading 6 | import time 7 | from typing import Any 8 | 9 | from django.conf import settings 10 | from django.db import connections 11 | 12 | from django_stator.exceptions import TimeoutError 13 | from django_stator.models import StatorModel 14 | from django_stator.timer import LoopingTimer 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class StatorRunner: 20 | """ 21 | Runs tasks on models that are looking for state changes. 22 | Designed to run either indefinitely, or just for a limited time (i.e. 30 23 | seconds, if called from a web view as an homage to wp-cron.php) 24 | """ 25 | 26 | def __init__( 27 | self, 28 | models: list[type[StatorModel]], 29 | concurrency: int = getattr(settings, "STATOR_CONCURRENCY", 10), 30 | concurrency_per_model: int = getattr( 31 | settings, "STATOR_CONCURRENCY_PER_MODEL", 5 32 | ), 33 | liveness_file: str | None = None, 34 | watchdog_interval: int = 60, 35 | delete_interval: int = 30, 36 | task_deadline: int = getattr(settings, "STATOR_TASK_DEADLINE", 15), 37 | ): 38 | self.models = models 39 | self.concurrency = concurrency 40 | self.concurrency_per_model = concurrency_per_model 41 | self.liveness_file = liveness_file 42 | self.watchdog_interval = watchdog_interval 43 | self.delete_interval = delete_interval 44 | self.task_deadline = task_deadline 45 | self.minimum_loop_delay = 0.5 46 | self.maximum_loop_delay = 5 47 | # Set up SIGALRM handler 48 | signal.signal(signal.SIGALRM, self.alarm_handler) 49 | 50 | def run(self, run_for: int | None = None): 51 | self.handled: dict[str, int] = {} 52 | self.started = time.monotonic() 53 | 54 | self.loop_delay = self.minimum_loop_delay 55 | self.watchdog_timer = LoopingTimer(self.watchdog_interval) 56 | self.deletion_timer = LoopingTimer(self.delete_interval) 57 | # Spin up the worker threads 58 | self.workers: list[WorkerThread] = [ 59 | WorkerThread(self) for i in range(self.concurrency) 60 | ] 61 | for worker in self.workers: 62 | worker.start() 63 | # For the first time period, launch tasks 64 | logger.info("Running main task loop") 65 | try: 66 | while True: 67 | # See if we need to handle the watchdog 68 | if self.watchdog_timer.check(): 69 | # Set up the watchdog timer (each time we do this the previous one is cancelled) 70 | signal.alarm(self.watchdog_interval * 2) 71 | # Write liveness file if configured 72 | if self.liveness_file: 73 | with open(self.liveness_file, "w") as fh: 74 | fh.write(str(int(time.time()))) 75 | 76 | # Kill any overdue workers 77 | self.check_worker_deadlines() 78 | 79 | # See if we need to add deletion tasks 80 | if self.deletion_timer.check(): 81 | self.add_deletion_tasks() 82 | 83 | # Fetch and run any new handlers we can fit 84 | self.add_transition_tasks() 85 | 86 | # Are we in limited run mode? 87 | if run_for is not None and (time.monotonic() - self.started) > run_for: 88 | break 89 | 90 | # Prevent busylooping, but also back off delay if we have 91 | # no tasks 92 | if self.busy_workers or ( 93 | run_for is not None and run_for < self.maximum_loop_delay 94 | ): 95 | self.loop_delay = self.minimum_loop_delay 96 | else: 97 | self.loop_delay = min( 98 | self.loop_delay * 1.5, 99 | self.maximum_loop_delay, 100 | ) 101 | time.sleep(self.loop_delay) 102 | except KeyboardInterrupt: 103 | pass 104 | 105 | # Wait for tasks to finish 106 | logger.info("Waiting for tasks to complete") 107 | for worker in self.workers: 108 | worker.shutdown = True 109 | for i in range(self.task_deadline): 110 | if not any([w.task for w in self.workers]): 111 | break 112 | self.check_worker_deadlines() 113 | time.sleep(1) 114 | for worker in self.workers: 115 | worker.join() 116 | 117 | # We're done 118 | logger.info("Complete") 119 | 120 | def alarm_handler(self, signum, frame): 121 | """ 122 | Called when SIGALRM fires, which means we missed a schedule loop. 123 | Just exit as we're likely deadlocked. 124 | """ 125 | logger.warning("Watchdog timeout exceeded") 126 | os._exit(2) 127 | 128 | def add_transition_tasks(self, call_inline=False): 129 | """ 130 | Adds a transition thread for as many instances as we can, given capacity 131 | and batch size limits. 132 | """ 133 | # Calculate space left for tasks 134 | space_remaining = self.idle_workers 135 | # Fetch new tasks 136 | for model in self.models: 137 | if space_remaining > 0: 138 | for instance in model.state_get_ready( 139 | number=min(space_remaining, self.concurrency_per_model), 140 | lock_period=self.task_deadline, 141 | ): 142 | self.assign_to_worker(("transition", instance)) 143 | space_remaining -= 1 144 | # Rotate models list around by one for fairness 145 | self.models = self.models[1:] + self.models[:1] 146 | 147 | def add_deletion_tasks(self, call_inline=False): 148 | """ 149 | Adds a deletion thread for each model 150 | """ 151 | # TODO: Make sure these always get to run and don't get starved out 152 | for model in self.models: 153 | if model.state_graph.deletion_states and self.idle_workers: 154 | self.assign_to_worker(("delete", model)) 155 | 156 | @property 157 | def idle_workers(self) -> int: 158 | """ 159 | Returns how many worker threads are currently idle and awaiting work. 160 | """ 161 | return len( 162 | [ 163 | worker 164 | for worker in self.workers 165 | if worker.is_alive() and worker.task is None 166 | ] 167 | ) 168 | 169 | @property 170 | def busy_workers(self) -> int: 171 | """ 172 | Returns how many worker threads are currently busy. 173 | """ 174 | return len(self.workers) - self.idle_workers 175 | 176 | def assign_to_worker(self, task: tuple[str, Any]): 177 | """ 178 | Assigns the given task to a worker 179 | """ 180 | for worker in self.workers: 181 | if worker.task is None: 182 | worker.task = task 183 | worker.deadline = time.monotonic() + self.task_deadline 184 | break 185 | else: 186 | raise ValueError("Cannot assign task to any worker") 187 | 188 | def check_worker_deadlines(self): 189 | """ 190 | Kills any worker tasks that are over their deadline 191 | """ 192 | for worker in self.workers: 193 | if worker.deadline and worker.deadline < time.monotonic(): 194 | # Inject a timeout error using a totally valid and normal API 195 | assert worker.ident is not None 196 | ctypes.pythonapi.PyThreadState_SetAsyncExc( 197 | ctypes.c_long(worker.ident), ctypes.py_object(TimeoutError) 198 | ) 199 | worker.deadline = None 200 | worker.task = None 201 | 202 | def log_handled(self, model_name: str, number: int): 203 | """ 204 | Called from worker threads - logs that something was run 205 | """ 206 | self.handled[model_name] = self.handled.get(model_name, 0) + number 207 | 208 | 209 | class WorkerThread(threading.Thread): 210 | """ 211 | Worker thread for running transitions/deletes/etc. in 212 | """ 213 | 214 | def __init__(self, runner: StatorRunner): 215 | super().__init__() 216 | self.runner = runner 217 | self.task: tuple[str, Any] | None = None 218 | self.shutdown: bool = False 219 | self.deadline: float | None = None 220 | 221 | def run(self): 222 | try: 223 | while not self.shutdown or self.task: 224 | # Wait for a task to be assigned 225 | if self.task is None: 226 | time.sleep(0.1) 227 | continue 228 | # Run the correct subtask 229 | try: 230 | if self.task[0] == "transition": 231 | self.task_transition(self.task[1]) 232 | elif self.task[0] == "delete": 233 | self.task_delete(self.task[1]) 234 | else: 235 | logging.error(f"Unknown task type {self.task[0]}") 236 | except TimeoutError: 237 | continue 238 | finally: 239 | # Clear the task 240 | self.task = None 241 | self.deadline = None 242 | finally: 243 | connections.close_all() 244 | 245 | def task_transition(self, instance: StatorModel): 246 | """ 247 | Runs one state transition/action. 248 | """ 249 | started = time.monotonic() 250 | previous_state = instance.state 251 | result = instance.state_transition_check() 252 | duration = time.monotonic() - started 253 | if result: 254 | logger.info( 255 | f"{instance._meta.label_lower}: {instance.pk}: {previous_state} -> {result} ({duration:.2f}s)" 256 | ) 257 | else: 258 | logger.info( 259 | f"{instance._meta.label_lower}: {instance.pk}: {previous_state} unchanged ({duration:.2f}s)" 260 | ) 261 | self.runner.log_handled(instance._meta.label_lower, 1) 262 | 263 | def task_delete(self, model: type[StatorModel]): 264 | """ 265 | Runs one model deletion set. 266 | """ 267 | # Loop, running deletions every second, until there are no more to do 268 | total_deleted = 0 269 | last_total = None 270 | while total_deleted != last_total: 271 | last_total = total_deleted 272 | total_deleted += model.state_do_deletes() 273 | logger.info(f"{model._meta.label_lower}: Deleted {total_deleted} stale items") 274 | self.runner.log_handled(model._meta.label_lower, total_deleted) 275 | -------------------------------------------------------------------------------- /django_stator/timer.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | class LoopingTimer: 5 | """ 6 | Triggers check() to be true once every `interval`. 7 | """ 8 | 9 | next_run: float | None = None 10 | 11 | def __init__(self, interval: float, trigger_at_start=True): 12 | self.interval = interval 13 | self.trigger_at_start = trigger_at_start 14 | 15 | def check(self) -> bool: 16 | # See if it's our first time being called 17 | if self.next_run is None: 18 | # Set up the next call based on trigger_at_start 19 | if self.trigger_at_start: 20 | self.next_run = time.monotonic() 21 | else: 22 | self.next_run = time.monotonic() + self.interval 23 | # See if it's time to run the next call 24 | if time.monotonic() >= self.next_run: 25 | self.next_run = time.monotonic() + self.interval 26 | return True 27 | return False 28 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "django-stator" 3 | authors = [{ name = "Andrew Godwin", email = "andrew@aeracode.org" }] 4 | description = "A background process worker for Django based on state machines" 5 | readme = "README.rst" 6 | requires-python = ">=3.11" 7 | license = { text = "BSD-3-Clause" } 8 | classifiers = ["Programming Language :: Python :: 3"] 9 | dependencies = ["Django>=4.0"] 10 | dynamic = ["version"] 11 | 12 | [tool.setuptools.dynamic] 13 | version = { attr = "django_stator.__version__" } 14 | 15 | [project.optional-dependencies] 16 | dev = ["pytest-django~=4.7.0", "pre-commit"] 17 | 18 | [tool.pytest.ini_options] 19 | log_level = "INFO" 20 | DJANGO_SETTINGS_MODULE = "testapp.settings" 21 | pythonpath = [".", "tests"] 22 | django_find_project = false 23 | -------------------------------------------------------------------------------- /tests/test_model.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import pytest 4 | from django.utils import timezone 5 | from testapp.models import BasicModel, BasicStates 6 | 7 | 8 | @pytest.mark.django_db 9 | def test_state_transition_check(): 10 | """ 11 | Tests that normal progression works (i.e. that the transition check function 12 | can return either None or a state and things move or not) 13 | """ 14 | instance = BasicModel.objects.create() 15 | 16 | # By default it should not be ready, and so won't progress 17 | assert instance.state_transition_check() is None 18 | assert instance.state_next is not None and instance.state_next > timezone.now() 19 | 20 | # Make it ready, and then it should 21 | instance.ready = True 22 | instance.save() 23 | assert instance.state_transition_check() is BasicModel.state_graph.done 24 | assert instance.state_next is None 25 | 26 | # If we manually screw it up and give it a state_next when it shouldn't 27 | # have one (as the done state is externally progressed), it should fix 28 | # itself. 29 | instance.state_next = timezone.now() 30 | instance.save() 31 | assert instance.state_transition_check() is None 32 | assert instance.state_next is None 33 | 34 | # Now manually transition it to pending_delete and ensure it regains state_next 35 | instance.state_transition(BasicStates.pending_delete) 36 | assert instance.state_next is not None and instance.state_next > timezone.now() 37 | 38 | # Finally, set a new one up to timeout and make sure it does 39 | instance = BasicModel.objects.create() 40 | instance.state_changed = timezone.now() - datetime.timedelta(days=1) 41 | instance.save() 42 | assert instance.state_transition_check() is BasicStates.timed_out 43 | assert instance.state_next is None 44 | -------------------------------------------------------------------------------- /tests/test_runner.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | import pytest 4 | from django.utils import timezone 5 | from testapp.models import BasicModel, BasicStates 6 | 7 | from django_stator.runner import StatorRunner 8 | 9 | 10 | @pytest.mark.django_db(transaction=True) 11 | def test_runner_basic(): 12 | """ 13 | Tests that normal progression works inside the runner 14 | """ 15 | 16 | # Make one that should progress and one that should not 17 | instance_ready = BasicModel.objects.create(ready=True) 18 | instance_unready = BasicModel.objects.create() 19 | 20 | # Make a runner and run it once 21 | runner = StatorRunner([BasicModel]) 22 | runner.run(run_for=0) 23 | 24 | # One should have progressed, one should not have 25 | instance_ready.refresh_from_db() 26 | assert instance_ready.state == BasicStates.done 27 | assert instance_ready.state_next is None 28 | instance_unready.refresh_from_db() 29 | assert instance_unready.state == BasicStates.new 30 | assert instance_unready.state_next is not None 31 | 32 | 33 | @pytest.mark.django_db(transaction=True) 34 | def test_runner_deletion(): 35 | """ 36 | Tests that deletion is done by the runner 37 | """ 38 | 39 | # Make one that should delete and one that should not 40 | instance_delete = BasicModel.objects.create() 41 | instance_delete.state_transition(BasicStates.deleted) 42 | instance_delete.state_changed = datetime.datetime(2000, 1, 1, tzinfo=datetime.UTC) 43 | instance_delete.save() 44 | instance_nodelete = BasicModel.objects.create() 45 | instance_nodelete.state_transition(BasicStates.deleted) 46 | 47 | # Make a runner and run it once 48 | runner = StatorRunner([BasicModel]) 49 | runner.run(run_for=0) 50 | 51 | # One should have deleted, one should not have 52 | assert BasicModel.objects.filter(pk=instance_delete.pk).count() == 0 53 | assert BasicModel.objects.filter(pk=instance_nodelete.pk).count() == 1 54 | 55 | 56 | @pytest.mark.django_db(transaction=True) 57 | def test_runner_deadline(): 58 | """ 59 | Tests that timing out tasks works, and does not render their worker threads 60 | useless (and that tasks get pushed back when they time out!) 61 | """ 62 | 63 | # Make one that should be super slow and not be allowed to finish, and 64 | # another that should finish 65 | instance_slow = BasicModel.objects.create() 66 | instance_slow.state_transition(BasicStates.slow) 67 | instance_fast = BasicModel.objects.create(ready=True) 68 | 69 | # Make instance_slow have an earlier state_next so it goes first 70 | instance_slow.state_next = datetime.datetime(2000, 1, 1, tzinfo=datetime.UTC) 71 | instance_slow.save() 72 | 73 | # Make a runner with only a single worker 74 | runner = StatorRunner([BasicModel], concurrency=1, task_deadline=1) 75 | runner.run(run_for=5) 76 | 77 | # Slow should not have transitioned, but fast should have 78 | instance_slow.refresh_from_db() 79 | assert instance_slow.state == BasicStates.slow 80 | assert instance_slow.state_next > timezone.now() 81 | instance_fast.refresh_from_db() 82 | assert instance_fast.state == BasicStates.done 83 | -------------------------------------------------------------------------------- /tests/testapp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrewgodwin/django-stator/ecb1a7484eedf281e37777a9ccc43140d954f2bc/tests/testapp/__init__.py -------------------------------------------------------------------------------- /tests/testapp/models.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from django.db import models 4 | 5 | from django_stator.graph import State, StateGraph 6 | from django_stator.models import StateField, StatorModel 7 | 8 | 9 | class BasicStates(StateGraph): 10 | new = State(retry_after=5) 11 | slow = State(retry_after=5) 12 | done = State(externally_progressed=True) 13 | timed_out = State(delete_after=10) 14 | pending_delete = State(retry_after=5, start_after=5) 15 | deleted = State(delete_after=10) 16 | 17 | new.transitions_to(done) 18 | new.transitions_to(slow) 19 | new.transitions_to(pending_delete) 20 | new.timeout_to(timed_out, seconds=10) 21 | slow.transitions_to(done) 22 | done.transitions_to(pending_delete) 23 | pending_delete.transitions_to(deleted) 24 | 25 | @classmethod 26 | def check_new(cls, instance): 27 | if instance.ready: 28 | return cls.done 29 | 30 | @classmethod 31 | def check_slow(cls, instance): 32 | time.sleep(2) 33 | return cls.done 34 | 35 | @classmethod 36 | def check_pending_delete(cls, instance): 37 | if instance.ready: 38 | return cls.deleted 39 | 40 | 41 | class BasicModel(StatorModel): 42 | state = StateField(BasicStates) 43 | ready = models.BooleanField(default=False) 44 | -------------------------------------------------------------------------------- /tests/testapp/settings.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | BASE_DIR = Path(__file__).resolve().parent.parent 4 | SECRET_KEY = "django-testing" 5 | DEBUG = True 6 | INSTALLED_APPS = ["testapp", "django_stator"] 7 | DATABASES = { 8 | "default": { 9 | "ENGINE": "django.db.backends.postgresql_psycopg2", 10 | "NAME": "stator", 11 | } 12 | } 13 | TIME_ZONE = "UTC" 14 | USE_I18N = True 15 | USE_TZ = True 16 | --------------------------------------------------------------------------------