├── .editorconfig ├── .gitattributes ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .pre-commit.sh ├── .rustfmt.toml ├── .sqlx ├── query-0a6b2c09cbb30aad7624e1d21ea8cba43bf521bae77cecabe0e89ca461d29687.json ├── query-1a69b80bf0909e445dcbe33ef6f698fc6226e641b2216d4e1266e8f49bcaeed5.json ├── query-24d7ef3a1dc86b9e408c7c691cb9c981673f0139ec30884dead832f19bae36d9.json ├── query-2d2d8318e918473d99f96fed61ab197d8447bfe601667b8fba5690d8aa4b05d9.json ├── query-477fabc7c3f47465d5197844fb595c907b70d89a2922f9ffeb6062d71a1a96f9.json ├── query-565c545ebc779a7df338f4efa0813f3672bd5e1d5639fc2b06686bdbca16aa2d.json ├── query-c7988b4a7ce47ef3aed755e53df221a0d5307e83a5748b4c1ddc6e50832cfb45.json ├── query-ebc5a43458570f6f64356d4fdffb906b7b52ff09a55a3064ed7943558234b103.json └── query-f7824b0e7bc69b17d2c3de68b35dc382dd6fff214a80f93c4b9fd082bf24696c.json ├── .typos.toml ├── Cargo.toml ├── LICENSE ├── README.md ├── examples ├── counter.rs ├── delay.rs ├── tutorial.rs └── util.rs ├── migrations ├── 20230703122702_table.sql ├── 20230714025134_trigger.sql └── 20240630005452_auto-updated_at.sql └── src ├── error.rs ├── lib.rs ├── listener.rs ├── macros.rs ├── next_step.rs ├── task.rs ├── traits.rs ├── util.rs └── worker.rs /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 4 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | 10 | [*.toml] 11 | indent_size = 2 12 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Don't show `Cargo.lock` in `git diff` 2 | Cargo.lock -diff 3 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - main 6 | - ci 7 | pull_request: 8 | 9 | env: 10 | SQLX_OFFLINE: true 11 | 12 | jobs: 13 | rustfmt: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v3 18 | 19 | - name: Nightly Rust 20 | uses: actions-rs/toolchain@v1 21 | with: 22 | toolchain: nightly 23 | components: rustfmt 24 | 25 | - name: Rustfmt 26 | run: cargo fmt -- --check 27 | 28 | clippy: 29 | runs-on: ubuntu-latest 30 | steps: 31 | - name: Checkout 32 | uses: actions/checkout@v3 33 | 34 | - name: Stable Rust 35 | uses: actions-rs/toolchain@v1 36 | with: 37 | toolchain: stable 38 | components: clippy 39 | 40 | - name: Clippy 41 | run: cargo clippy --all-targets -- -D warnings 42 | 43 | test: 44 | runs-on: ubuntu-latest 45 | steps: 46 | - name: Checkout 47 | uses: actions/checkout@v3 48 | 49 | - name: Stable Rust 50 | uses: actions-rs/toolchain@v1 51 | with: 52 | toolchain: stable 53 | 54 | - name: Test all targets 55 | run: cargo test --all-targets 56 | 57 | - name: Test docs 58 | run: cargo test --doc 59 | 60 | typos: 61 | runs-on: ubuntu-latest 62 | steps: 63 | - name: Checkout 64 | uses: actions/checkout@v3 65 | 66 | - name: Check typos in `rust` folder 67 | uses: crate-ci/typos@master 68 | with: 69 | files: . 70 | 71 | cargo_sort: 72 | runs-on: ubuntu-latest 73 | steps: 74 | - name: Checkout 75 | uses: actions/checkout@v3 76 | 77 | - name: Stable Rust 78 | uses: actions-rs/toolchain@v1 79 | with: 80 | toolchain: stable 81 | 82 | - name: Install cargo-sort 83 | run: cargo install --locked cargo-sort 84 | 85 | - name: Check `Cargo.toml` sort 86 | run: cargo sort -c 87 | 88 | machete: 89 | runs-on: ubuntu-latest 90 | steps: 91 | - name: Checkout 92 | uses: actions/checkout@v3 93 | 94 | - name: Stable Rust 95 | uses: actions-rs/toolchain@v1 96 | with: 97 | toolchain: stable 98 | 99 | - name: Install `cargo-machete` 100 | run: cargo install --locked cargo-machete 101 | 102 | - name: Check unused Cargo dependencies 103 | run: cargo machete 104 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | /.env 4 | /.todo.md 5 | -------------------------------------------------------------------------------- /.pre-commit.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -eu 4 | 5 | # Linking the script as the pre-commit hook 6 | SCRIPT_PATH=$(realpath "$0") 7 | HOOK_PATH=$(git rev-parse --git-dir)/hooks/pre-commit 8 | if [ "$(realpath "$HOOK_PATH")" != "$SCRIPT_PATH" ]; then 9 | read -p "Link this script as the git pre-commit hook to avoid further manual running? (y/N): " answer 10 | if [[ $answer =~ ^[Yy]$ ]]; then 11 | ln -sf "$SCRIPT_PATH" "$HOOK_PATH" 12 | fi 13 | fi 14 | 15 | set -x 16 | 17 | # Install tools 18 | cargo clippy --version &>/dev/null || rustup component add clippy 19 | cargo machete --version &>/dev/null || cargo install --locked cargo-machete 20 | cargo sort --version &>/dev/null || cargo install --locked cargo-sort 21 | cargo sqlx --version &>/dev/null || cargo install --locked sqlx-cli 22 | typos --version &>/dev/null || cargo install --locked typos-cli 23 | 24 | rustup toolchain list | grep -q 'nightly' || rustup toolchain install nightly 25 | cargo +nightly fmt --version &>/dev/null || rustup component add rustfmt --toolchain nightly 26 | 27 | # Checks 28 | typos . 29 | cargo machete 30 | cargo +nightly fmt -- --check 31 | cargo sort -c 32 | cargo test --all-targets 33 | cargo test --doc 34 | cargo sqlx prepare && git add .sqlx 35 | cargo clippy --all-targets -- -D warnings 36 | -------------------------------------------------------------------------------- /.rustfmt.toml: -------------------------------------------------------------------------------- 1 | wrap_comments = true 2 | imports_granularity = "Crate" 3 | -------------------------------------------------------------------------------- /.sqlx/query-0a6b2c09cbb30aad7624e1d21ea8cba43bf521bae77cecabe0e89ca461d29687.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "INSERT INTO pg_task (step, wakeup_at) VALUES ($1, $2) RETURNING id", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Uuid" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [ 14 | "Text", 15 | "Timestamptz" 16 | ] 17 | }, 18 | "nullable": [ 19 | false 20 | ] 21 | }, 22 | "hash": "0a6b2c09cbb30aad7624e1d21ea8cba43bf521bae77cecabe0e89ca461d29687" 23 | } 24 | -------------------------------------------------------------------------------- /.sqlx/query-1a69b80bf0909e445dcbe33ef6f698fc6226e641b2216d4e1266e8f49bcaeed5.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "UPDATE pg_task SET is_running = true WHERE id = $1", 4 | "describe": { 5 | "columns": [], 6 | "parameters": { 7 | "Left": [ 8 | "Uuid" 9 | ] 10 | }, 11 | "nullable": [] 12 | }, 13 | "hash": "1a69b80bf0909e445dcbe33ef6f698fc6226e641b2216d4e1266e8f49bcaeed5" 14 | } 15 | -------------------------------------------------------------------------------- /.sqlx/query-24d7ef3a1dc86b9e408c7c691cb9c981673f0139ec30884dead832f19bae36d9.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "UPDATE pg_task SET is_running = false WHERE is_running = true", 4 | "describe": { 5 | "columns": [], 6 | "parameters": { 7 | "Left": [] 8 | }, 9 | "nullable": [] 10 | }, 11 | "hash": "24d7ef3a1dc86b9e408c7c691cb9c981673f0139ec30884dead832f19bae36d9" 12 | } 13 | -------------------------------------------------------------------------------- /.sqlx/query-2d2d8318e918473d99f96fed61ab197d8447bfe601667b8fba5690d8aa4b05d9.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n UPDATE pg_task\n SET is_running = false,\n tried = tried + 1,\n wakeup_at = $2\n WHERE id = $1\n ", 4 | "describe": { 5 | "columns": [], 6 | "parameters": { 7 | "Left": [ 8 | "Uuid", 9 | "Timestamptz" 10 | ] 11 | }, 12 | "nullable": [] 13 | }, 14 | "hash": "2d2d8318e918473d99f96fed61ab197d8447bfe601667b8fba5690d8aa4b05d9" 15 | } 16 | -------------------------------------------------------------------------------- /.sqlx/query-477fabc7c3f47465d5197844fb595c907b70d89a2922f9ffeb6062d71a1a96f9.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n SELECT\n id,\n step,\n tried,\n wakeup_at\n FROM pg_task\n WHERE is_running = false\n AND error IS NULL\n ORDER BY wakeup_at\n LIMIT 1\n FOR UPDATE\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Uuid" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "step", 14 | "type_info": "Text" 15 | }, 16 | { 17 | "ordinal": 2, 18 | "name": "tried", 19 | "type_info": "Int4" 20 | }, 21 | { 22 | "ordinal": 3, 23 | "name": "wakeup_at", 24 | "type_info": "Timestamptz" 25 | } 26 | ], 27 | "parameters": { 28 | "Left": [] 29 | }, 30 | "nullable": [ 31 | false, 32 | false, 33 | false, 34 | false 35 | ] 36 | }, 37 | "hash": "477fabc7c3f47465d5197844fb595c907b70d89a2922f9ffeb6062d71a1a96f9" 38 | } 39 | -------------------------------------------------------------------------------- /.sqlx/query-565c545ebc779a7df338f4efa0813f3672bd5e1d5639fc2b06686bdbca16aa2d.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n UPDATE pg_task\n SET is_running = false,\n tried = tried + 1,\n error = $2,\n wakeup_at = now()\n WHERE id = $1\n RETURNING tried, step::TEXT as \"step!\"\n ", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "tried", 9 | "type_info": "Int4" 10 | }, 11 | { 12 | "ordinal": 1, 13 | "name": "step!", 14 | "type_info": "Text" 15 | } 16 | ], 17 | "parameters": { 18 | "Left": [ 19 | "Uuid", 20 | "Text" 21 | ] 22 | }, 23 | "nullable": [ 24 | false, 25 | false 26 | ] 27 | }, 28 | "hash": "565c545ebc779a7df338f4efa0813f3672bd5e1d5639fc2b06686bdbca16aa2d" 29 | } 30 | -------------------------------------------------------------------------------- /.sqlx/query-c7988b4a7ce47ef3aed755e53df221a0d5307e83a5748b4c1ddc6e50832cfb45.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "SELECT id FROM pg_task LIMIT 1", 4 | "describe": { 5 | "columns": [ 6 | { 7 | "ordinal": 0, 8 | "name": "id", 9 | "type_info": "Uuid" 10 | } 11 | ], 12 | "parameters": { 13 | "Left": [] 14 | }, 15 | "nullable": [ 16 | false 17 | ] 18 | }, 19 | "hash": "c7988b4a7ce47ef3aed755e53df221a0d5307e83a5748b4c1ddc6e50832cfb45" 20 | } 21 | -------------------------------------------------------------------------------- /.sqlx/query-ebc5a43458570f6f64356d4fdffb906b7b52ff09a55a3064ed7943558234b103.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "DELETE FROM pg_task WHERE id = $1", 4 | "describe": { 5 | "columns": [], 6 | "parameters": { 7 | "Left": [ 8 | "Uuid" 9 | ] 10 | }, 11 | "nullable": [] 12 | }, 13 | "hash": "ebc5a43458570f6f64356d4fdffb906b7b52ff09a55a3064ed7943558234b103" 14 | } 15 | -------------------------------------------------------------------------------- /.sqlx/query-f7824b0e7bc69b17d2c3de68b35dc382dd6fff214a80f93c4b9fd082bf24696c.json: -------------------------------------------------------------------------------- 1 | { 2 | "db_name": "PostgreSQL", 3 | "query": "\n UPDATE pg_task\n SET is_running = false,\n tried = 0,\n step = $2,\n wakeup_at = $3\n WHERE id = $1\n ", 4 | "describe": { 5 | "columns": [], 6 | "parameters": { 7 | "Left": [ 8 | "Uuid", 9 | "Text", 10 | "Timestamptz" 11 | ] 12 | }, 13 | "nullable": [] 14 | }, 15 | "hash": "f7824b0e7bc69b17d2c3de68b35dc382dd6fff214a80f93c4b9fd082bf24696c" 16 | } 17 | -------------------------------------------------------------------------------- /.typos.toml: -------------------------------------------------------------------------------- 1 | [default.extend-words] 2 | "nd" = "nd" # fn ordinal 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | description = "Resumable state machine based Postgres tasks" 3 | edition = "2021" 4 | license = "MIT" 5 | name = "pg_task" 6 | repository = "https://github.com/imbolc/pg_task" 7 | version = "0.2.1" 8 | 9 | [dependencies] 10 | async-trait = "0.1" 11 | chrono = { version = "0.4", features = ["std", "serde"] } 12 | code-path = "0.3" 13 | displaydoc = "0.2" 14 | num_cpus = "1" 15 | serde = { version = "1", features = ["derive"] } 16 | serde_json = "1" 17 | source-chain = "0.1" 18 | sqlx = { version = "0.8", features = [ 19 | "json", 20 | "chrono", 21 | "postgres", 22 | "runtime-tokio-rustls", 23 | "uuid", 24 | ] } 25 | thiserror = "2" 26 | tokio = "1" 27 | tracing = "0.1" 28 | 29 | [dev-dependencies] 30 | anyhow = "1" 31 | dotenv = "0.15" 32 | rusty-hook = "0.11" 33 | tokio = { version = "1", features = ["full"] } 34 | tracing-subscriber = { version = "0.3", features = ["env-filter"] } 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 imbolc 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pg_task 2 | 3 | [![License](https://img.shields.io/crates/l/pg_task.svg)](https://choosealicense.com/licenses/mit/) 4 | [![Crates.io](https://img.shields.io/crates/v/pg_task.svg)](https://crates.io/crates/pg_task) 5 | [![Docs.rs](https://docs.rs/pg_task/badge.svg)](https://docs.rs/pg_task) 6 | 7 | FSM-based Resumable Postgres tasks 8 | 9 | - **FSM-based** - each task is a granular state machine 10 | - **Resumable** - on error, after you fix the step logic or the external 11 | world, the task is able to pick up where it stopped 12 | - **Postgres** - a single table is enough to handle task scheduling, state 13 | transitions, and error processing 14 | 15 | ## Table of Contents 16 | 17 | - [Tutorial](#tutorial) 18 | - [Defining Tasks](#defining-tasks) 19 | - [Investigating Errors](#investigating-errors) 20 | - [Fixing the World](#fixing-the-world) 21 | - [Scheduling Tasks](#scheduling-tasks) 22 | - [Running Workers](#running-workers) 23 | - [Stopping Workers](#stopping-workers) 24 | - [Delaying Steps](#delaying-steps) 25 | - [Retrying Steps](#retrying-steps) 26 | 27 | ## Tutorial 28 | 29 | _The full runnable code is in [examples/tutorial.rs][tutorial-example]._ 30 | 31 | ### Defining Tasks 32 | 33 | We create a greeter task consisting of two steps: 34 | 35 | ```rust,ignore 36 | #[derive(Debug, Deserialize, Serialize)] 37 | pub struct ReadName { 38 | filename: String, 39 | } 40 | 41 | #[async_trait] 42 | impl Step for ReadName { 43 | const RETRY_LIMIT: i32 = 5; 44 | 45 | async fn step(self, _db: &PgPool) -> StepResult { 46 | let name = std::fs::read_to_string(&self.filename)?; 47 | NextStep::now(SayHello { name }) 48 | } 49 | } 50 | ``` 51 | 52 | The first step tries to read a name from a file: 53 | 54 | - `filename` - the only state we need in this step 55 | - `impl Step for ReadName` - our step is a part of a `Greeter` task 56 | - `RETRY_LIMIT` - the step is fallible, let's retry it a few times 57 | - `NextStep::now(SayHello { name })` - move our task to the `SayHello` step 58 | right now 59 | 60 | ```rust,ignore 61 | #[derive(Debug, Deserialize, Serialize)] 62 | pub struct SayHello { 63 | name: String, 64 | } 65 | #[async_trait] 66 | impl Step for SayHello { 67 | async fn step(self, _db: &PgPool) -> StepResult { 68 | println!("Hello, {}", self.name); 69 | NextStep::none() 70 | } 71 | } 72 | ``` 73 | 74 | The second step prints the greeting and finishes the task returning 75 | `NextStep::none()`. 76 | 77 | That's essentially all, except for some boilerplate you can find in the 78 | [full code][tutorial-example]. Let's run it: 79 | 80 | ```bash 81 | cargo run --example hello 82 | ``` 83 | 84 | ### Investigating Errors 85 | 86 | You'll see log messages about the 6 (first try + `RETRY_LIMIT`) attempts and 87 | the final error message. Let's look into the DB to find out what happened: 88 | 89 | ```bash 90 | ~$ psql pg_task -c 'table pg_task' 91 | -[ RECORD 1 ]------------------------------------------------ 92 | id | cddf7de1-1194-4bee-90c6-af73d9206ce2 93 | step | {"Greeter":{"ReadName":{"filename":"name.txt"}}} 94 | wakeup_at | 2024-06-30 09:32:27.703599+06 95 | tried | 6 96 | is_running | f 97 | error | No such file or directory (os error 2) 98 | created_at | 2024-06-30 09:32:22.628563+06 99 | updated_at | 2024-06-30 09:32:27.703599+06 100 | ``` 101 | 102 | - a non-null `error` field indicates that the task has errored and contains 103 | the error message 104 | - the `step` field provides you with the information about a particular step 105 | and its state when the error occurred 106 | 107 | ### Fixing the World 108 | 109 | In this case, the error is due to the external world state. Let's fix it by 110 | creating the file: 111 | 112 | ```bash 113 | echo 'Fixed World' > name.txt 114 | ``` 115 | 116 | To rerun the task, we just need to clear its `error`: 117 | 118 | ```bash 119 | psql pg_task -c 'update pg_task set error = null' 120 | ``` 121 | 122 | You'll see the log messages about rerunning the task and the greeting 123 | message of the final step. That's all 🎉. 124 | 125 | ## Scheduling Tasks 126 | 127 | Essentially scheduling a task is done by inserting a corresponding row into 128 | the `pg_task` table. You can do in by hands from `psql` or code in any 129 | language. 130 | 131 | There's also a few helpers to take care of the first step serialization and 132 | time scheduling: 133 | - [`enqueue`] - to run the task immediately 134 | - [`delay`] - to run it with a delay 135 | - [`schedule`] - to schedule it to a particular time 136 | 137 | ## Running Workers 138 | 139 | After [defining](#defining-tasks) the steps of each task, we need to 140 | wrap them into enums representing whole tasks via [`task!`]: 141 | 142 | ```rust,ignore 143 | pg_task::task!(Task1 { StepA, StepB }); 144 | pg_task::task!(Task2 { StepC }); 145 | ``` 146 | 147 | One more enum is needed to combine all the possible tasks: 148 | 149 | ```rust,ignore 150 | pg_task::scheduler!(Tasks { Task1, Task2 }); 151 | ``` 152 | 153 | Now we can run the worker: 154 | 155 | ```rust,ignore 156 | pg_task::Worker::::new(db).run().await?; 157 | ``` 158 | 159 | All the communication is synchronized by the DB, so it doesn't matter how or 160 | how many workers you run. It could be a separate process as well as 161 | in-process [`tokio::spawn`]. 162 | 163 | ## Stopping Workers 164 | 165 | You can gracefully stop task runners by sending a notification using the 166 | DB: 167 | 168 | ```sql 169 | SELECT pg_notify('pg_task_changed', 'stop_worker'); 170 | ``` 171 | 172 | The workers would wait until the current step of all the tasks is finished 173 | and then exit. You can wait for this by checking for the existence of 174 | running tasks: 175 | 176 | ```sql 177 | SELECT EXISTS(SELECT 1 FROM pg_task WHERE is_running = true); 178 | ``` 179 | 180 | ## Delaying Steps 181 | 182 | Sometimes you need to delay the next step. Using [`tokio::time::sleep`] 183 | before returning the next step creates a couple of issues: 184 | 185 | - if the process is crashed while sleeping it wont be considered done and 186 | will rerun on restart 187 | - you'd have to wait for the sleeping task to finish on [gracefulshutdown](#stopping-workers) 188 | 189 | Use [`NextStep::delay`] instead - it schedules the next step with the delay 190 | and finishes the current one right away. 191 | 192 | You can find a runnable example in the [examples/delay.rs][delay-example] 193 | 194 | ## Retrying Steps 195 | 196 | Use [`Step::RETRY_LIMIT`] and [`Step::RETRY_DELAY`] when you need to retry a 197 | task on errors: 198 | 199 | ```rust,ignore 200 | impl Step for ApiRequest { 201 | const RETRY_LIMIT: i32 = 5; 202 | const RETRY_DELAY: Duration = Duration::from_secs(5); 203 | 204 | async fn step(self, _db: &PgPool) -> StepResult { 205 | let result = api_request().await?; 206 | NextStep::now(ProcessResult { result }) 207 | } 208 | } 209 | ``` 210 | 211 | ## Contributing 212 | 213 | - please run [.pre-commit.sh] before sending a PR, it will check everything 214 | 215 | ## License 216 | 217 | This project is licensed under the [MIT license](LICENSE). 218 | 219 | [.pre-commit.sh]: https://github.com/imbolc/pg_task/blob/main/.pre-commit.sh 220 | [delay-example]: https://github.com/imbolc/pg_task/blob/main/examples/delay.rs 221 | [tutorial-example]: https://github.com/imbolc/pg_task/blob/main/examples/tutorial.rs 222 | -------------------------------------------------------------------------------- /examples/counter.rs: -------------------------------------------------------------------------------- 1 | //! A counter task gives some idea on the worker performatnce 2 | use async_trait::async_trait; 3 | use chrono::{DateTime, Utc}; 4 | use pg_task::{NextStep, Step, StepResult}; 5 | use serde::{Deserialize, Serialize}; 6 | use sqlx::PgPool; 7 | 8 | mod util; 9 | 10 | // It wraps the task step into an enum which proxies necessary methods 11 | pg_task::task!(Count { 12 | Start, 13 | Proceed, 14 | Finish, 15 | }); 16 | 17 | // Also we need a enum representing all the possible tasks 18 | pg_task::scheduler!(Tasks { Count }); 19 | 20 | #[tokio::main] 21 | async fn main() -> anyhow::Result<()> { 22 | let db = util::init().await?; 23 | 24 | // Let's schedule a few tasks 25 | pg_task::enqueue(&db, &Tasks::Count(Start { up_to: 1000 }.into())).await?; 26 | 27 | // And run a worker 28 | pg_task::Worker::::new(db).run().await?; 29 | 30 | Ok(()) 31 | } 32 | 33 | #[derive(Debug, Deserialize, Serialize)] 34 | pub struct Start { 35 | pub up_to: usize, 36 | } 37 | #[async_trait] 38 | impl Step for Start { 39 | async fn step(self, _db: &PgPool) -> StepResult { 40 | println!("1..{}: start", self.up_to); 41 | NextStep::now(Proceed { 42 | up_to: self.up_to, 43 | started_at: Utc::now(), 44 | cur: 0, 45 | }) 46 | } 47 | } 48 | 49 | #[derive(Debug, Deserialize, Serialize)] 50 | pub struct Proceed { 51 | pub up_to: usize, 52 | pub started_at: DateTime, 53 | pub cur: usize, 54 | } 55 | #[async_trait] 56 | impl Step for Proceed { 57 | async fn step(self, _db: &PgPool) -> StepResult { 58 | let Self { 59 | up_to, 60 | mut cur, 61 | started_at, 62 | } = self; 63 | 64 | cur += 1; 65 | if cur < up_to { 66 | NextStep::now(Proceed { 67 | up_to, 68 | started_at, 69 | cur, 70 | }) 71 | } else { 72 | NextStep::now(Finish { up_to, started_at }) 73 | } 74 | } 75 | } 76 | 77 | #[derive(Debug, Deserialize, Serialize)] 78 | pub struct Finish { 79 | pub up_to: usize, 80 | pub started_at: DateTime, 81 | } 82 | #[async_trait] 83 | impl Step for Finish { 84 | async fn step(self, _db: &PgPool) -> StepResult { 85 | let took = Utc::now() - self.started_at; 86 | let secs = num_seconds(took); 87 | let per_sec = self.up_to as f64 / secs; 88 | println!( 89 | "1..{}: done in {} secs, {} / sec", 90 | self.up_to, 91 | secs, 92 | per_sec.round() 93 | ); 94 | NextStep::none() 95 | } 96 | } 97 | 98 | fn num_seconds(duration: chrono::Duration) -> f64 { 99 | let seconds = duration.num_seconds(); 100 | let nanos = duration.num_nanoseconds().unwrap() % 1_000_000_000; 101 | seconds as f64 + (nanos as f64 / 1_000_000_000.0) 102 | } 103 | -------------------------------------------------------------------------------- /examples/delay.rs: -------------------------------------------------------------------------------- 1 | //! Scheduling delayed steps 2 | use async_trait::async_trait; 3 | use pg_task::{NextStep, Step, StepResult}; 4 | use serde::{Deserialize, Serialize}; 5 | use sqlx::PgPool; 6 | use std::time::Duration; 7 | 8 | mod util; 9 | 10 | // It wraps the task step into an enum which proxies necessary methods 11 | pg_task::task!(Sleeper { Sleep, Wakeup }); 12 | 13 | // Also we need a enum representing all the possible tasks 14 | pg_task::scheduler!(Tasks { Sleeper }); 15 | 16 | #[tokio::main] 17 | async fn main() -> anyhow::Result<()> { 18 | let db = util::init().await?; 19 | 20 | // Let's schedule a few tasks 21 | for delay in [3, 1, 2] { 22 | pg_task::enqueue(&db, &Tasks::Sleeper(Sleep(delay).into())).await?; 23 | } 24 | 25 | // And run a worker 26 | pg_task::Worker::::new(db).run().await?; 27 | 28 | Ok(()) 29 | } 30 | 31 | #[derive(Debug, Deserialize, Serialize)] 32 | pub struct Sleep(u64); 33 | #[async_trait] 34 | impl Step for Sleep { 35 | async fn step(self, _db: &PgPool) -> StepResult { 36 | println!("Sleeping for {} sec", self.0); 37 | NextStep::delay(Wakeup(self.0), Duration::from_secs(self.0)) 38 | } 39 | } 40 | 41 | #[derive(Debug, Deserialize, Serialize)] 42 | pub struct Wakeup(u64); 43 | #[async_trait] 44 | impl Step for Wakeup { 45 | async fn step(self, _db: &PgPool) -> StepResult { 46 | println!("Woke up after {} sec", self.0); 47 | NextStep::none() 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /examples/tutorial.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use pg_task::{NextStep, Step, StepResult}; 3 | use serde::{Deserialize, Serialize}; 4 | use sqlx::PgPool; 5 | 6 | mod util; 7 | 8 | // Creates a enum `Greeter` containing our task steps 9 | pg_task::task!(Greeter { ReadName, SayHello }); 10 | 11 | // Creates a enum `Tasks` representing all the possible tasks 12 | pg_task::scheduler!(Tasks { Greeter }); 13 | 14 | #[derive(Debug, Deserialize, Serialize)] 15 | pub struct ReadName { 16 | filename: String, 17 | } 18 | #[async_trait] 19 | impl Step for ReadName { 20 | const RETRY_LIMIT: i32 = 5; 21 | 22 | async fn step(self, _db: &PgPool) -> StepResult { 23 | let name = std::fs::read_to_string(self.filename)?; 24 | NextStep::now(SayHello { name }) 25 | } 26 | } 27 | 28 | #[derive(Debug, Deserialize, Serialize)] 29 | pub struct SayHello { 30 | name: String, 31 | } 32 | #[async_trait] 33 | impl Step for SayHello { 34 | async fn step(self, _db: &PgPool) -> StepResult { 35 | println!("Hello, {}", self.name); 36 | NextStep::none() 37 | } 38 | } 39 | 40 | #[tokio::main] 41 | async fn main() -> anyhow::Result<()> { 42 | let db = util::init().await?; 43 | 44 | // Let's schedule the task 45 | pg_task::enqueue( 46 | &db, 47 | &Tasks::Greeter( 48 | ReadName { 49 | filename: "name.txt".into(), 50 | } 51 | .into(), 52 | ), 53 | ) 54 | .await?; 55 | 56 | // And run a worker 57 | pg_task::Worker::::new(db).run().await?; 58 | 59 | Ok(()) 60 | } 61 | -------------------------------------------------------------------------------- /examples/util.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use sqlx::PgPool; 3 | use tracing_subscriber::{EnvFilter, FmtSubscriber}; 4 | 5 | pub async fn init() -> Result { 6 | dotenv::dotenv().ok(); 7 | init_logging()?; 8 | connect().await 9 | } 10 | 11 | async fn connect() -> Result { 12 | let db = sqlx::PgPool::connect(&std::env::var("DATABASE_URL")?).await?; 13 | sqlx::migrate!().run(&db).await?; 14 | Ok(db) 15 | } 16 | 17 | fn init_logging() -> Result<()> { 18 | let subscriber = FmtSubscriber::builder() 19 | .with_env_filter(EnvFilter::from_default_env()) 20 | .finish(); 21 | tracing::subscriber::set_global_default(subscriber)?; 22 | Ok(()) 23 | } 24 | 25 | // Make `cagro check --examples` happy 26 | #[allow(dead_code)] 27 | fn main() {} 28 | -------------------------------------------------------------------------------- /migrations/20230703122702_table.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE pg_task ( 2 | id UUID PRIMARY KEY DEFAULT gen_random_uuid(), 3 | step TEXT NOT NULL, 4 | wakeup_at timestamptz NOT NULL DEFAULT now(), 5 | tried INT NOT NULL DEFAULT 0, 6 | is_running BOOLEAN NOT NULL DEFAULT false, 7 | error TEXT, 8 | created_at timestamptz NOT NULL DEFAULT now(), 9 | updated_at timestamptz NOT NULL DEFAULT now() 10 | ); 11 | 12 | CREATE INDEX pg_task_wakeup_at_idx ON pg_task (wakeup_at); 13 | 14 | COMMENT ON TABLE pg_task IS 'pg_task tasks'; 15 | COMMENT ON COLUMN pg_task.step IS 'State of the current step'; 16 | COMMENT ON COLUMN pg_task.wakeup_at IS 'Scheduled time for the task to execute the current step'; 17 | COMMENT ON COLUMN pg_task.tried IS 'Number of times the current step has resulted in an error'; 18 | COMMENT ON COLUMN pg_task.is_running IS 'Indicates if the current step is running right now'; 19 | COMMENT ON COLUMN pg_task.error IS 'Indicates if the current step has resulted in an error and all retry attempts have been exhausted. Set this field to null for the step to resume.'; 20 | COMMENT ON COLUMN pg_task.created_at IS 'Time the task was created'; 21 | COMMENT ON COLUMN pg_task.updated_at IS 'Time the task was updated'; 22 | -------------------------------------------------------------------------------- /migrations/20230714025134_trigger.sql: -------------------------------------------------------------------------------- 1 | CREATE FUNCTION pg_task_notify_on_change() 2 | RETURNS trigger AS $$ 3 | BEGIN 4 | PERFORM pg_notify('pg_task_changed', ''); 5 | RETURN NEW; 6 | END; 7 | $$ LANGUAGE plpgsql; 8 | 9 | CREATE TRIGGER pg_task_changed 10 | AFTER INSERT OR UPDATE 11 | ON pg_task 12 | FOR EACH ROW 13 | EXECUTE PROCEDURE pg_task_notify_on_change(); 14 | -------------------------------------------------------------------------------- /migrations/20240630005452_auto-updated_at.sql: -------------------------------------------------------------------------------- 1 | CREATE OR REPLACE FUNCTION pg_task_before_update_refresh_updated_at() 2 | RETURNS TRIGGER AS $$ 3 | BEGIN 4 | NEW.updated_at = now(); 5 | RETURN NEW; 6 | END; 7 | $$ LANGUAGE plpgsql; 8 | 9 | COMMENT ON FUNCTION pg_task_before_update_refresh_updated_at 10 | IS 'Refreshes `updated_at` column of the task'; 11 | 12 | 13 | CREATE TRIGGER pg_task_before_update_refresh_updated_at_trigger 14 | BEFORE UPDATE ON pg_task 15 | FOR EACH ROW EXECUTE FUNCTION pg_task_before_update_refresh_updated_at(); 16 | 17 | COMMENT ON TRIGGER pg_task_before_update_refresh_updated_at_trigger 18 | ON pg_task 19 | IS 'Refreshes `updated_at` column of the task on change'; 20 | 21 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | use crate::NextStep; 2 | use std::{error::Error as StdError, result::Result as StdResult}; 3 | 4 | /// The crate error 5 | #[derive(Debug, displaydoc::Display, thiserror::Error)] 6 | pub enum Error { 7 | /// can't add task 8 | AddTask(#[source] sqlx::Error), 9 | /// can't serialize step: {1} 10 | SerializeStep(#[source] serde_json::Error, String), 11 | /** 12 | can't deserialize step (the task was likely changed between the 13 | scheduling and running of the step): {1} 14 | */ 15 | DeserializeStep(#[source] serde_json::Error, String), 16 | /// can't unlock stale tasks 17 | UnlockStaleTasks(#[source] sqlx::Error), 18 | /// waiter can't connect to the db 19 | ListenerConnect(#[source] sqlx::Error), 20 | /// can't start listening for tables changes 21 | ListenerListen(#[source] sqlx::Error), 22 | /// unreachable: worker semaphore is closed 23 | UnreachableWorkerSemaphoreClosed(#[source] tokio::sync::AcquireError), 24 | /// db error: {1} 25 | Db(#[source] sqlx::Error, String), 26 | } 27 | 28 | /// The crate result 29 | pub type Result = StdResult; 30 | 31 | /// Error of a task step 32 | pub type StepError = Box; 33 | 34 | /// Result returning from task steps 35 | pub type StepResult = StdResult, StepError>; 36 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![doc = include_str!("../README.md")] 2 | #![forbid(unsafe_code)] 3 | #![warn(clippy::all, missing_docs, nonstandard_style, future_incompatible)] 4 | 5 | mod error; 6 | mod listener; 7 | mod macros; 8 | mod next_step; 9 | mod task; 10 | mod traits; 11 | mod util; 12 | mod worker; 13 | 14 | pub use error::{Error, Result, StepError, StepResult}; 15 | pub use next_step::NextStep; 16 | pub use traits::{Scheduler, Step}; 17 | pub use worker::Worker; 18 | 19 | use chrono::{DateTime, Utc}; 20 | use sqlx::{types::Uuid, PgExecutor}; 21 | use std::time::Duration; 22 | 23 | const LOST_CONNECTION_SLEEP: Duration = Duration::from_secs(1); 24 | 25 | /// Enqueues the task to be run immediately 26 | pub async fn enqueue<'e>(db: impl PgExecutor<'e>, task: &impl Scheduler) -> Result { 27 | task.enqueue(db).await 28 | } 29 | 30 | /// Schedules a task to be run after a specified delay 31 | pub async fn delay<'e>( 32 | db: impl PgExecutor<'e>, 33 | task: &impl Scheduler, 34 | delay: Duration, 35 | ) -> Result { 36 | task.delay(db, delay).await 37 | } 38 | 39 | /// Schedules a task to run at a specified time in the future 40 | pub async fn schedule<'e>( 41 | db: impl PgExecutor<'e>, 42 | task: &impl Scheduler, 43 | at: DateTime, 44 | ) -> Result { 45 | task.schedule(db, at).await 46 | } 47 | -------------------------------------------------------------------------------- /src/listener.rs: -------------------------------------------------------------------------------- 1 | use crate::{util, LOST_CONNECTION_SLEEP}; 2 | use sqlx::{postgres::PgListener, PgPool}; 3 | use std::{ 4 | sync::{ 5 | atomic::{AtomicBool, Ordering}, 6 | Arc, 7 | }, 8 | time::Duration, 9 | }; 10 | use tokio::{ 11 | sync::{futures::Notified, Notify}, 12 | time::{sleep, timeout}, 13 | }; 14 | use tracing::{trace, warn}; 15 | 16 | const NOTIFICATION_CHANNEL: &str = "pg_task_changed"; 17 | const STOP_WORKER_NOTIFICATION: &str = "stop_worker"; 18 | 19 | /// Waits for tasks table to change 20 | pub struct Listener { 21 | notify: Arc, 22 | stop_worker: Arc, 23 | } 24 | 25 | /// Subscription to the [`Listener`] notifications 26 | pub struct Subscription<'a>(Notified<'a>); 27 | 28 | impl Listener { 29 | /// Creates a waiter 30 | pub fn new() -> Self { 31 | let notify = Arc::new(Notify::new()); 32 | let stop_worker = Arc::new(AtomicBool::new(false)); 33 | Self { 34 | notify, 35 | stop_worker, 36 | } 37 | } 38 | 39 | /// Connects to the db and starts to listen to tasks table changes 40 | pub async fn listen(&self, db: PgPool) -> crate::Result<()> { 41 | let mut listener = PgListener::connect_with(&db) 42 | .await 43 | .map_err(crate::Error::ListenerConnect)?; 44 | listener 45 | .listen(NOTIFICATION_CHANNEL) 46 | .await 47 | .map_err(crate::Error::ListenerListen)?; 48 | 49 | let notify = self.notify.clone(); 50 | let stop_worker = self.stop_worker.clone(); 51 | tokio::spawn(async move { 52 | loop { 53 | match listener.recv().await { 54 | Ok(msg) => { 55 | if msg.payload() == STOP_WORKER_NOTIFICATION { 56 | trace!("Got stop-worker notification"); 57 | stop_worker.store(true, Ordering::SeqCst); 58 | } 59 | } 60 | Err(e) => { 61 | warn!("Listening for the tasks table changes is interrupted (probably due to db connection loss):\n{}", source_chain::to_string(&e)); 62 | sleep(LOST_CONNECTION_SLEEP).await; 63 | util::wait_for_reconnection(&db, LOST_CONNECTION_SLEEP).await; 64 | warn!("Listening for the tasks table changes is probably restored"); 65 | } 66 | }; 67 | notify.notify_waiters(); 68 | } 69 | }); 70 | Ok(()) 71 | } 72 | 73 | /// Subscribes for notifications. 74 | /// 75 | /// Awaiting on the result ends on the first notification after the 76 | /// subscription, even if it happens between the subscription and awaiting. 77 | pub fn subscribe(&self) -> Subscription<'_> { 78 | Subscription(self.notify.notified()) 79 | } 80 | 81 | /// Returns true if notification to stop worker is received 82 | pub fn time_to_stop_worker(&self) -> bool { 83 | self.stop_worker.load(Ordering::SeqCst) 84 | } 85 | } 86 | 87 | impl<'a> Subscription<'a> { 88 | pub async fn wait_for(self, period: Duration) { 89 | trace!("⌛Waiting for the tasks table to change for {period:?}"); 90 | match timeout(period, self.0).await { 91 | Ok(_) => trace!("⚡The tasks table has changed"), 92 | Err(_) => trace!("⏰The waiting timeout has expired"), 93 | } 94 | } 95 | 96 | pub async fn wait_forever(self) { 97 | trace!("⌛Waiting for the tasks table to change"); 98 | self.0.await; 99 | trace!("⚡The tasks table has changed"); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/macros.rs: -------------------------------------------------------------------------------- 1 | /// Implements enum wrapper for a single task containing all it's steps 2 | #[macro_export] 3 | macro_rules! task { 4 | ($enum:ident { $($variant:ident),* $(,)? }) => { 5 | #[derive(Debug, serde::Deserialize, serde::Serialize)] 6 | pub enum $enum { 7 | $($variant($variant),)* 8 | } 9 | 10 | $( 11 | impl From<$variant> for $enum { 12 | fn from(inner: $variant) -> Self { 13 | Self::$variant(inner) 14 | } 15 | } 16 | )* 17 | 18 | #[async_trait::async_trait] 19 | impl $crate::Step<$enum> for $enum { 20 | async fn step(self, db: &sqlx::PgPool) -> $crate::StepResult<$enum> { 21 | match self { 22 | $(Self::$variant(inner) => inner.step(db).await.map(|next| 23 | match next { 24 | $crate::NextStep::None => $crate::NextStep::None, 25 | $crate::NextStep::Now(x) => $crate::NextStep::Now(x.into()), 26 | $crate::NextStep::Delayed(x, d) => $crate::NextStep::Delayed(x.into(), d), 27 | } 28 | ),)* 29 | } 30 | } 31 | 32 | fn retry_limit(&self) -> i32 { 33 | match self { 34 | $(Self::$variant(inner) => inner.retry_limit(),)* 35 | } 36 | } 37 | 38 | fn retry_delay(&self) -> std::time::Duration { 39 | match self { 40 | $(Self::$variant(inner) => inner.retry_delay(),)* 41 | } 42 | } 43 | } 44 | } 45 | } 46 | 47 | /// The macro implements the outer enum wrapper containing all the tasks 48 | #[macro_export] 49 | macro_rules! scheduler { 50 | ($enum:ident { $($variant:ident),* $(,)? }) => { 51 | $crate::task!($enum { $($variant),* }); 52 | impl $crate::Scheduler for $enum {} 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/next_step.rs: -------------------------------------------------------------------------------- 1 | use crate::StepResult; 2 | use std::time::Duration; 3 | 4 | /// Represents next step of the task 5 | pub enum NextStep { 6 | /// The task is done 7 | None, 8 | /// Run the next step immediately 9 | Now(T), 10 | /// Delay the next step 11 | Delayed(T, Duration), 12 | } 13 | 14 | impl NextStep { 15 | /// The task is done 16 | pub fn none() -> StepResult { 17 | Ok(Self::None) 18 | } 19 | 20 | /// Run the next step immediately 21 | pub fn now(step: impl Into) -> StepResult { 22 | Ok(Self::Now(step.into())) 23 | } 24 | 25 | /// Delay the next step 26 | pub fn delay(step: impl Into, delay: Duration) -> StepResult { 27 | Ok(Self::Delayed(step.into(), delay)) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/task.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | util::{chrono_duration_to_std, db_error, ordinal, std_duration_to_chrono}, 3 | Error, NextStep, Result, Step, StepError, 4 | }; 5 | use chrono::{DateTime, Utc}; 6 | use serde::Serialize; 7 | use sqlx::{ 8 | postgres::{PgConnection, PgPool}, 9 | types::Uuid, 10 | }; 11 | use std::{fmt, time::Duration}; 12 | use tracing::{debug, error, info, trace}; 13 | 14 | #[derive(Debug)] 15 | pub struct Task { 16 | pub id: Uuid, 17 | step: String, 18 | tried: i32, 19 | pub wakeup_at: DateTime, 20 | } 21 | 22 | impl Task { 23 | /// Returns a delay before running the task 24 | pub fn wait_before_running(&self) -> Option { 25 | let delay = self.wakeup_at - Utc::now(); 26 | if delay <= chrono::Duration::zero() { 27 | None 28 | } else { 29 | Some(chrono_duration_to_std(delay)) 30 | } 31 | } 32 | 33 | /// Fetches the closest task to run 34 | pub async fn fetch_closest(con: &mut PgConnection) -> Result> { 35 | trace!("Fetching the closest task to run"); 36 | sqlx::query_as!( 37 | Task, 38 | r#" 39 | SELECT 40 | id, 41 | step, 42 | tried, 43 | wakeup_at 44 | FROM pg_task 45 | WHERE is_running = false 46 | AND error IS NULL 47 | ORDER BY wakeup_at 48 | LIMIT 1 49 | FOR UPDATE 50 | "#, 51 | ) 52 | .fetch_optional(con) 53 | .await 54 | .map_err(db_error!()) 55 | } 56 | 57 | /// Marks the task running 58 | pub async fn mark_running(&self, con: &mut PgConnection) -> Result<()> { 59 | trace!("[{}] mark running", self.id); 60 | sqlx::query!( 61 | "UPDATE pg_task SET is_running = true WHERE id = $1", 62 | self.id 63 | ) 64 | .execute(con) 65 | .await 66 | .map_err(db_error!())?; 67 | Ok(()) 68 | } 69 | 70 | /// Runs the current step of the task to completion 71 | pub async fn run_step>(&self, db: &PgPool) -> Result<()> { 72 | info!( 73 | "[{id}]{attempt} run step {step}", 74 | id = self.id, 75 | attempt = if self.tried > 0 { 76 | format!(" {} attempt to", ordinal(self.tried + 1)) 77 | } else { 78 | "".into() 79 | }, 80 | step = self.step 81 | ); 82 | let step: S = match serde_json::from_str(&self.step) 83 | .map_err(|e| Error::DeserializeStep(e, format!("{:?}", self.step))) 84 | { 85 | Ok(x) => x, 86 | Err(e) => { 87 | self.save_error(db, e.into()).await.ok(); 88 | return Ok(()); 89 | } 90 | }; 91 | 92 | let retry_limit = step.retry_limit(); 93 | let retry_delay = step.retry_delay(); 94 | match step.step(db).await { 95 | Err(e) => { 96 | if self.tried < retry_limit { 97 | self.retry(db, self.tried, retry_limit, retry_delay, e) 98 | .await?; 99 | } else { 100 | self.save_error(db, e).await?; 101 | } 102 | } 103 | Ok(NextStep::None) => self.complete(db).await?, 104 | Ok(NextStep::Now(step)) => self.save_next_step(db, step, Duration::ZERO).await?, 105 | Ok(NextStep::Delayed(step, delay)) => self.save_next_step(db, step, delay).await?, 106 | }; 107 | Ok(()) 108 | } 109 | 110 | /// Saves the task error 111 | async fn save_error(&self, db: &PgPool, err: StepError) -> Result<()> { 112 | let err_str = source_chain::to_string(&*err); 113 | 114 | let (tried, step) = sqlx::query!( 115 | r#" 116 | UPDATE pg_task 117 | SET is_running = false, 118 | tried = tried + 1, 119 | error = $2, 120 | wakeup_at = now() 121 | WHERE id = $1 122 | RETURNING tried, step::TEXT as "step!" 123 | "#, 124 | self.id, 125 | &err_str, 126 | ) 127 | .fetch_one(db) 128 | .await 129 | .map(|r| (r.tried, r.step)) 130 | .map_err(db_error!())?; 131 | 132 | error!( 133 | "[{id}] resulted in an error at step {step} on {attempt} attempt: {err_str}", 134 | id = self.id, 135 | attempt = ordinal(tried + 1) 136 | ); 137 | 138 | Ok(()) 139 | } 140 | 141 | /// Updates the tasks step 142 | async fn save_next_step( 143 | &self, 144 | db: &PgPool, 145 | step: impl Serialize + fmt::Debug, 146 | delay: Duration, 147 | ) -> Result<()> { 148 | let step = match serde_json::to_string(&step) 149 | .map_err(|e| Error::SerializeStep(e, format!("{:?}", step))) 150 | { 151 | Ok(x) => x, 152 | Err(e) => return self.save_error(db, e.into()).await, 153 | }; 154 | debug!("[{}] moved to the next step {step}", self.id); 155 | 156 | sqlx::query!( 157 | " 158 | UPDATE pg_task 159 | SET is_running = false, 160 | tried = 0, 161 | step = $2, 162 | wakeup_at = $3 163 | WHERE id = $1 164 | ", 165 | self.id, 166 | step, 167 | Utc::now() + std_duration_to_chrono(delay), 168 | ) 169 | .execute(db) 170 | .await 171 | .map_err(db_error!())?; 172 | Ok(()) 173 | } 174 | 175 | /// Removes the finished task 176 | async fn complete(&self, db: &PgPool) -> Result<()> { 177 | info!("[{}] is successfully completed", self.id); 178 | sqlx::query!("DELETE FROM pg_task WHERE id = $1", self.id) 179 | .execute(db) 180 | .await 181 | .map_err(db_error!())?; 182 | Ok(()) 183 | } 184 | 185 | /// Schedules the task for retry 186 | async fn retry( 187 | &self, 188 | db: &PgPool, 189 | tried: i32, 190 | retry_limit: i32, 191 | delay: Duration, 192 | err: StepError, 193 | ) -> Result<()> { 194 | let delay = std_duration_to_chrono(delay); 195 | debug!( 196 | "[{id}] scheduled {attempt} of {retry_limit} retries in {delay:?} on error: {err}", 197 | id = self.id, 198 | attempt = ordinal(tried + 1), 199 | err = source_chain::to_string(&*err), 200 | ); 201 | 202 | sqlx::query!( 203 | " 204 | UPDATE pg_task 205 | SET is_running = false, 206 | tried = tried + 1, 207 | wakeup_at = $2 208 | WHERE id = $1 209 | ", 210 | self.id, 211 | Utc::now() + delay, 212 | ) 213 | .execute(db) 214 | .await 215 | .map_err(db_error!())?; 216 | 217 | Ok(()) 218 | } 219 | } 220 | -------------------------------------------------------------------------------- /src/traits.rs: -------------------------------------------------------------------------------- 1 | use crate::{util::std_duration_to_chrono, Error, StepResult}; 2 | use async_trait::async_trait; 3 | use chrono::{DateTime, Utc}; 4 | use serde::{de::DeserializeOwned, Serialize}; 5 | use sqlx::{types::Uuid, PgExecutor, PgPool}; 6 | use std::{fmt, time::Duration}; 7 | 8 | /// A tait to implement on each task step 9 | #[async_trait] 10 | pub trait Step 11 | where 12 | Task: Sized, 13 | Self: Into + Send + Sized + fmt::Debug + DeserializeOwned + Serialize, 14 | { 15 | /// How many times retry_limit the step on an error 16 | const RETRY_LIMIT: i32 = 0; 17 | 18 | /// The time to wait between retries 19 | const RETRY_DELAY: Duration = Duration::from_secs(1); 20 | 21 | /// Processes the current step and returns the next if any 22 | async fn step(self, db: &PgPool) -> StepResult; 23 | 24 | /// Proxies the `RETRY` const, doesn't mean to be changed in impls 25 | fn retry_limit(&self) -> i32 { 26 | Self::RETRY_LIMIT 27 | } 28 | 29 | /// Proxies the `RETRY_DELAY` const, doesn't mean to be changed in impls 30 | fn retry_delay(&self) -> Duration { 31 | Self::RETRY_DELAY 32 | } 33 | } 34 | 35 | /// A tait to implement on the outer enum wrapper containing all the tasks 36 | #[async_trait] 37 | pub trait Scheduler: fmt::Debug + DeserializeOwned + Serialize + Sized + Sync { 38 | /// Enqueues the task to be run immediately 39 | async fn enqueue<'e>(&self, db: impl PgExecutor<'e>) -> crate::Result { 40 | self.schedule(db, Utc::now()).await 41 | } 42 | 43 | /// Schedules a task to be run after a specified delay 44 | async fn delay<'e>(&self, db: impl PgExecutor<'e>, delay: Duration) -> crate::Result { 45 | let delay = std_duration_to_chrono(delay); 46 | self.schedule(db, Utc::now() + delay).await 47 | } 48 | 49 | /// Schedules a task to run at a specified time in the future 50 | async fn schedule<'e>( 51 | &self, 52 | db: impl PgExecutor<'e>, 53 | at: DateTime, 54 | ) -> crate::Result { 55 | let step = serde_json::to_string(self) 56 | .map_err(|e| Error::SerializeStep(e, format!("{self:?}")))?; 57 | sqlx::query!( 58 | "INSERT INTO pg_task (step, wakeup_at) VALUES ($1, $2) RETURNING id", 59 | step, 60 | at 61 | ) 62 | .map(|r| r.id) 63 | .fetch_one(db) 64 | .await 65 | .map_err(Error::AddTask) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | /// Converts a chrono duration to std, it uses absolute value of the chrono 2 | /// duration 3 | pub fn chrono_duration_to_std(chrono_duration: chrono::Duration) -> std::time::Duration { 4 | let seconds = chrono_duration.num_seconds(); 5 | let nanos = chrono_duration.num_nanoseconds().unwrap_or(0) % 1_000_000_000; 6 | std::time::Duration::new(seconds.unsigned_abs(), nanos.unsigned_abs() as u32) 7 | } 8 | 9 | /// Converts a std duration to chrono 10 | pub fn std_duration_to_chrono(std_duration: std::time::Duration) -> chrono::Duration { 11 | chrono::Duration::from_std(std_duration).unwrap_or(chrono::Duration::MAX) 12 | } 13 | 14 | /// Returns the ordinal string of a given integer 15 | pub fn ordinal(n: i32) -> String { 16 | match n.abs() { 17 | 11..=13 => format!("{}th", n), 18 | _ => match n % 10 { 19 | 1 => format!("{}st", n), 20 | 2 => format!("{}nd", n), 21 | 3 => format!("{}rd", n), 22 | _ => format!("{}th", n), 23 | }, 24 | } 25 | } 26 | 27 | /// Waits for the db reconnection 28 | pub async fn wait_for_reconnection(db: &sqlx::PgPool, sleep: std::time::Duration) { 29 | while let Err(sqlx::Error::Io(_)) = sqlx::query!("SELECT id FROM pg_task LIMIT 1") 30 | .fetch_optional(db) 31 | .await 32 | { 33 | tracing::trace!("Waiting for db reconnection"); 34 | tokio::time::sleep(sleep).await; 35 | } 36 | } 37 | 38 | /// A helper to construct db error 39 | macro_rules! db_error { 40 | () => { 41 | |e| $crate::Error::Db(e, code_path::code_path!().into()) 42 | }; 43 | ($desc:expr) => { 44 | |e| $crate::Error::Db(e, format!("{} {}", code_path::code_path!(), $desc)) 45 | }; 46 | } 47 | 48 | pub(crate) use db_error; 49 | -------------------------------------------------------------------------------- /src/worker.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | listener::Listener, 3 | task::Task, 4 | util::{db_error, wait_for_reconnection}, 5 | Error, Result, Step, LOST_CONNECTION_SLEEP, 6 | }; 7 | use sqlx::postgres::PgPool; 8 | use std::{marker::PhantomData, sync::Arc, time::Duration}; 9 | use tokio::{sync::Semaphore, time::sleep}; 10 | use tracing::{debug, error, info, trace, warn}; 11 | 12 | /// A worker for processing tasks 13 | pub struct Worker { 14 | db: PgPool, 15 | listener: Listener, 16 | tasks: PhantomData, 17 | concurrency: usize, 18 | } 19 | 20 | impl> Worker { 21 | /// Creates a new worker 22 | pub fn new(db: PgPool) -> Self { 23 | let listener = Listener::new(); 24 | let concurrency = num_cpus::get(); 25 | Self { 26 | db, 27 | listener, 28 | concurrency, 29 | tasks: PhantomData, 30 | } 31 | } 32 | 33 | /// Sets the number of concurrent tasks, default is the number of CPU cores 34 | pub fn with_concurrency(mut self, concurrency: usize) -> Self { 35 | self.concurrency = concurrency; 36 | self 37 | } 38 | 39 | /// Runs all ready tasks to completion and waits for new ones 40 | pub async fn run(&self) -> Result<()> { 41 | self.unlock_stale_tasks().await?; 42 | self.listener.listen(self.db.clone()).await?; 43 | 44 | let semaphore = Arc::new(Semaphore::new(self.concurrency)); 45 | 46 | loop { 47 | match self.recv_task().await { 48 | Ok(Some(task)) => { 49 | let permit = semaphore 50 | .clone() 51 | .acquire_owned() 52 | .await 53 | .map_err(Error::UnreachableWorkerSemaphoreClosed)?; 54 | let db = self.db.clone(); 55 | tokio::spawn(async move { 56 | if let Err(e) = task.run_step::(&db).await { 57 | error!("[{}] {}", task.id, source_chain::to_string(&e)); 58 | }; 59 | drop(permit); 60 | }); 61 | } 62 | Ok(None) => { 63 | self.wait_for_steps_to_finish(semaphore.clone()).await; 64 | info!("Stopped"); 65 | return Ok(()); 66 | } 67 | Err(e) => { 68 | warn!( 69 | "Can't fetch a task (probably due to db connection loss):\n{}", 70 | source_chain::to_string(&e) 71 | ); 72 | sleep(LOST_CONNECTION_SLEEP).await; 73 | wait_for_reconnection(&self.db, LOST_CONNECTION_SLEEP).await; 74 | warn!("Task fetching is probably restored"); 75 | } 76 | } 77 | } 78 | } 79 | 80 | /// Unlocks all tasks. This is intended to run at the start of the worker as 81 | /// some tasks could remain locked as running indefinitely if the 82 | /// previous run ended due to some kind of crash. 83 | async fn unlock_stale_tasks(&self) -> Result<()> { 84 | let unlocked = 85 | sqlx::query!("UPDATE pg_task SET is_running = false WHERE is_running = true") 86 | .execute(&self.db) 87 | .await 88 | .map_err(Error::UnlockStaleTasks)? 89 | .rows_affected(); 90 | if unlocked == 0 { 91 | debug!("No stale tasks to unlock") 92 | } else { 93 | debug!("Unlocked {} stale tasks", unlocked) 94 | } 95 | Ok(()) 96 | } 97 | 98 | /// Waits until the next task is ready, marks it running and returns it. 99 | /// Returns `None` if the worker is stopped 100 | async fn recv_task(&self) -> Result> { 101 | trace!("Receiving the next task"); 102 | 103 | loop { 104 | if self.listener.time_to_stop_worker() { 105 | return Ok(None); 106 | } 107 | 108 | let table_changes = self.listener.subscribe(); 109 | let mut tx = self.db.begin().await.map_err(db_error!("begin"))?; 110 | 111 | let Some(task) = Task::fetch_closest(&mut tx).await? else { 112 | // No tasks, waiting for the tasks table changes 113 | tx.commit().await.map_err(db_error!("no tasks"))?; 114 | table_changes.wait_forever().await; 115 | continue; 116 | }; 117 | 118 | if let Some(delay) = task.wait_before_running() { 119 | // Waiting until a task is ready or for the tasks table to change 120 | tx.commit().await.map_err(db_error!("wait"))?; 121 | table_changes.wait_for(delay).await; 122 | continue; 123 | }; 124 | 125 | task.mark_running(&mut tx).await?; 126 | tx.commit().await.map_err(db_error!("mark running"))?; 127 | return Ok(Some(task)); 128 | } 129 | } 130 | 131 | async fn wait_for_steps_to_finish(&self, semaphore: Arc) { 132 | let mut logged_tasks_left = None; 133 | loop { 134 | let tasks_left = self.concurrency - semaphore.available_permits(); 135 | if tasks_left == 0 { 136 | break; 137 | } 138 | if let Some(logged) = logged_tasks_left { 139 | if logged != tasks_left { 140 | trace!("Waiting for the current steps of {tasks_left} tasks to finish..."); 141 | } 142 | } else { 143 | info!("Waiting for the current steps of {tasks_left} tasks to finish..."); 144 | } 145 | logged_tasks_left = Some(tasks_left); 146 | sleep(Duration::from_secs_f32(0.1)).await; 147 | } 148 | if logged_tasks_left.is_some() { 149 | trace!("The current step of every task is done") 150 | } 151 | } 152 | } 153 | --------------------------------------------------------------------------------