├── .editorconfig ├── .github └── workflows │ └── hedwig.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── examples ├── googlepubsub.rs └── redis.rs ├── hedwig_core ├── Cargo.toml └── src │ ├── lib.rs │ ├── message.rs │ └── topic.rs └── src ├── backends ├── googlepubsub │ ├── consumer.rs │ ├── mod.rs │ └── publisher.rs ├── mock.rs ├── mod.rs └── redis │ ├── consumer.rs │ ├── mod.rs │ └── publisher.rs ├── consumer.rs ├── lib.rs ├── publisher.rs ├── tests ├── google.rs ├── json.rs └── mod.rs └── validators ├── json_schema.rs ├── mod.rs └── prost.rs /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | charset = utf-8 6 | trim_trailing_whitespace = true 7 | insert_final_newline = true 8 | indent_style = space 9 | indent_size = 4 10 | max_line_length = 120 11 | 12 | [*.md] 13 | trim_trailing_whitespace = false 14 | -------------------------------------------------------------------------------- /.github/workflows/hedwig.yml: -------------------------------------------------------------------------------- 1 | name: Test hedwig 2 | 3 | on: 4 | push: 5 | paths-ignore: 6 | - '*.md' 7 | - 'LICENSE' 8 | branches: 9 | - master 10 | pull_request: 11 | types: [opened, repoened, synchronize] 12 | 13 | jobs: 14 | lint: 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | timeout-minutes: 10 19 | steps: 20 | - uses: actions/checkout@v2 21 | - name: Install Rust 22 | uses: actions-rs/toolchain@v1 23 | with: 24 | toolchain: stable 25 | profile: minimal 26 | components: clippy 27 | default: true 28 | - name: Clippy 29 | uses: actions-rs/cargo@v1 30 | with: 31 | command: clippy 32 | args: --all-features -- -Dclippy::correctness -Dclippy::complexity -Dclippy::perf -Dunsafe_code -Dunreachable_pub -Dunused 33 | 34 | doc: 35 | runs-on: ubuntu-latest 36 | timeout-minutes: 10 37 | steps: 38 | - uses: actions/checkout@v2 39 | - name: Install Rust 40 | uses: actions-rs/toolchain@v1 41 | with: 42 | toolchain: nightly 43 | profile: minimal 44 | default: true 45 | - name: Doc 46 | uses: actions-rs/cargo@v1 47 | with: 48 | command: doc 49 | args: --all-features --manifest-path=Cargo.toml --no-deps 50 | env: 51 | RUSTDOCFLAGS: --cfg docsrs -Dmissing_docs -Drustdoc::broken_intra_doc_links 52 | 53 | test: 54 | runs-on: ${{ matrix.os }} 55 | strategy: 56 | fail-fast: false 57 | matrix: 58 | rust_toolchain: [nightly, stable, 1.81.0] 59 | os: [ubuntu-latest] 60 | timeout-minutes: 20 61 | steps: 62 | - uses: actions/checkout@v2 63 | - name: Install Rust ${{ matrix.rust_toolchain }} 64 | uses: actions-rs/toolchain@v1 65 | with: 66 | toolchain: ${{ matrix.rust_toolchain }} 67 | profile: minimal 68 | default: true 69 | - name: Build without features 70 | uses: actions-rs/cargo@v1 71 | with: 72 | command: build 73 | args: --no-default-features --manifest-path=Cargo.toml 74 | - name: Test with all features 75 | uses: actions-rs/cargo@v1 76 | with: 77 | command: test 78 | args: --manifest-path=Cargo.toml --all-features -- --nocapture 79 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binary files 2 | /target/ 3 | 4 | # These are backup files generated by rustfmt 5 | **/*.rs.bk 6 | 7 | # Private examples 8 | examples/private 9 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["hedwig_core"] 3 | 4 | [package] 5 | name = "hedwig" 6 | version = "7.2.0" 7 | authors = [ 8 | "Aniruddha Maru ", 9 | "Simonas Kazlauskas ", 10 | "Renar Narubin ", 11 | "Alessandro Pezzato ", 12 | ] 13 | edition = "2021" 14 | repository = "https://github.com/standard-ai/hedwig-rust.git" 15 | homepage = "https://github.com/standard-ai/hedwig-rust" 16 | readme = "README.md" 17 | description = "A simple message bus for Rust" 18 | license = "Apache-2.0" 19 | keywords = ["pubsub", "messagebus", "microservices", "redis"] 20 | categories = ["asynchronous", "web-programming"] 21 | rust-version = "1.81" 22 | 23 | [badges] 24 | maintenance = { status = "actively-developed" } 25 | 26 | [features] 27 | default = [] 28 | 29 | # Backends 30 | google = ["dep:ya-gcp", "dep:tracing", "dep:parking_lot"] 31 | redis = ["dep:tracing", "dep:async-channel", "dep:parking_lot", "dep:redis", "dep:tokio", "dep:base64"] 32 | mock = ["dep:async-channel", "dep:parking_lot"] 33 | 34 | # Validators 35 | json-schema = ["valico", "serde_json", "serde"] 36 | protobuf = ["prost"] 37 | 38 | [[example]] 39 | name = "googlepubsub" 40 | required-features = ["google", "protobuf"] 41 | 42 | [[example]] 43 | name = "redis" 44 | required-features = ["redis", "protobuf"] 45 | 46 | [dependencies] 47 | async-trait = { version = "0.1" } 48 | bytes = "1" 49 | either = { version = "1", features = ["use_std"], default-features = false } 50 | futures-util = { version = "0.3.17", features = [ 51 | "std", 52 | "sink", 53 | ], default-features = false } 54 | hedwig_core = { version = "0.1", path = "./hedwig_core" } 55 | pin-project = "1" 56 | smallstr = { version = "0.3.0", features = ["union"] } 57 | thiserror = { version = "1", default-features = false } 58 | url = { version = "2", default-features = false } 59 | uuid = { version = "1.6", features = ["v4"], default-features = false } 60 | 61 | async-channel = { version = "1.6", optional = true } 62 | serde = { version = "^1.0", optional = true, default-features = false } 63 | serde_json = { version = "^1", features = [ 64 | "std", 65 | ], optional = true, default-features = false } 66 | parking_lot = { version = "0.11", optional = true } 67 | prost = { version = "0.12", optional = true, features = [ 68 | "std", 69 | ], default-features = false } 70 | tracing = { version = "0.1.37", optional = true } 71 | valico = { version = "^3.2", optional = true, default-features = false } 72 | ya-gcp = { version = "0.11", features = ["pubsub"], optional = true } 73 | redis = { version = "0.29.0", features = ["streams", "tokio-comp", "connection-manager"], optional = true } 74 | base64 = { version = "0.22.1", optional = true } 75 | tokio = { version = "1", features = ["macros", "rt"], optional = true} 76 | 77 | [dev-dependencies] 78 | async-channel = { version = "1.6" } 79 | futures-channel = "0.3.17" 80 | parking_lot = { version = "0.11" } 81 | prost = { version = "0.12", features = ["std", "prost-derive"] } 82 | tokio = { version = "1", features = ["macros", "rt"] } 83 | tonic = "0.10" 84 | serde = { version = "1", features = ["derive"] } 85 | ya-gcp = { version = "0.11", features = ["pubsub", "emulators"] } 86 | structopt = "0.3" 87 | tracing-subscriber = "0.3.19" 88 | 89 | [package.metadata.docs.rs] 90 | all-features = true 91 | rustdoc-args = ["--cfg", "docsrs"] 92 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hedwig library for Rust 2 | 3 | [![Build Status](https://travis-ci.com/standard-ai/hedwig-rust.svg?branch=master)](https://travis-ci.com/standard-ai/hedwig-rust) 4 | [![Latest Version](https://img.shields.io/crates/v/hedwig.svg?style=flat-square)](https://crates.io/crates/hedwig) 5 | [![Docs](https://docs.rs/hedwig/badge.svg)](https://docs.rs/hedwig) 6 | 7 | ## What is it? 8 | 9 | Hedwig is a inter-service communication bus that works on AWS and GCP, while keeping things pretty 10 | simple and straight forward. 11 | 12 | It allows validation of the message payloads before they are sent, helping to catch cross-component 13 | incompatibilities early. 14 | 15 | Hedwig allows separation of concerns between consumers and publishers so your services are loosely 16 | coupled, and the contract is enforced by the message payload validation. Hedwig may also be used to 17 | build asynchronous APIs. 18 | 19 | ## Quick Start 20 | 21 | ### Installation 22 | 23 | Add to Cargo.toml: 24 | 25 | ```toml 26 | [dependencies] 27 | hedwig = "6" 28 | ``` 29 | 30 | You may also need to enable additional features in order to use the optional publishers or 31 | validators, like this: 32 | 33 | ```toml 34 | [dependencies] 35 | hedwig = { version = "6", features = ["google"] } 36 | ``` 37 | 38 | ### Usage 39 | 40 | See [the examples](https://github.com/standard-ai/hedwig-rust/tree/master/examples) in the 41 | repository as well as [the crate-level documentation](https://docs.rs/hedwig/) for 42 | usage examples. 43 | 44 | ## Getting Help 45 | 46 | We use GitHub issues for tracking bugs and feature requests. 47 | 48 | * If it turns out that you may have found a bug, please [open an 49 | issue](https://github.com/standard-ai/hedwig-rust/issues/new) 50 | -------------------------------------------------------------------------------- /examples/googlepubsub.rs: -------------------------------------------------------------------------------- 1 | //! An example of ingesting messages from a PubSub subscription, applying a 2 | //! transformation, then submitting those transformations to another PubSub topic. 3 | 4 | use futures_util::{SinkExt, StreamExt, TryFutureExt}; 5 | use hedwig::{ 6 | googlepubsub::{ 7 | AuthFlow, ClientBuilder, ClientBuilderConfig, PubSubConfig, PubSubMessage, PublishError, 8 | ServiceAccountAuth, StreamSubscriptionConfig, SubscriptionConfig, SubscriptionName, 9 | TopicConfig, TopicName, 10 | }, 11 | validators, Consumer, DecodableMessage, EncodableMessage, Headers, Publisher, 12 | }; 13 | use std::{error::Error as StdError, time::SystemTime}; 14 | use structopt::StructOpt; 15 | 16 | const USER_CREATED_TOPIC: &str = "user.created"; 17 | const USER_UPDATED_TOPIC: &str = "user.updated"; 18 | 19 | /// The input data, representing some user being created with the given name 20 | #[derive(PartialEq, Eq, prost::Message)] 21 | struct UserCreatedMessage { 22 | #[prost(string, tag = "1")] 23 | name: String, 24 | } 25 | 26 | impl EncodableMessage for UserCreatedMessage { 27 | type Error = validators::ProstValidatorError; 28 | type Validator = validators::ProstValidator; 29 | fn topic(&self) -> hedwig::Topic { 30 | USER_CREATED_TOPIC.into() 31 | } 32 | fn encode(&self, validator: &Self::Validator) -> Result { 33 | validator.validate( 34 | uuid::Uuid::new_v4(), 35 | SystemTime::now(), 36 | "user.created/1.0", 37 | Headers::new(), 38 | self, 39 | ) 40 | } 41 | } 42 | 43 | impl DecodableMessage for UserCreatedMessage { 44 | type Error = validators::ProstDecodeError; 45 | type Decoder = 46 | validators::ProstDecoder>; 47 | 48 | fn decode(msg: hedwig::ValidatedMessage, decoder: &Self::Decoder) -> Result { 49 | decoder.decode(msg) 50 | } 51 | } 52 | 53 | /// The output data, where the given user has now been assigned an ID and some metadata 54 | #[derive(PartialEq, Eq, prost::Message)] 55 | struct UserUpdatedMessage { 56 | #[prost(string, tag = "1")] 57 | name: String, 58 | 59 | #[prost(int64, tag = "2")] 60 | id: i64, 61 | 62 | #[prost(string, tag = "3")] 63 | metadata: String, 64 | } 65 | 66 | /// The output message will carry an ack token from the input message, to ack when the output is 67 | /// successfully published, or nack on failure 68 | #[derive(Debug)] 69 | struct TransformedMessage(PubSubMessage); 70 | 71 | impl EncodableMessage for TransformedMessage { 72 | type Error = validators::ProstValidatorError; 73 | type Validator = validators::ProstValidator; 74 | 75 | fn topic(&self) -> hedwig::Topic { 76 | USER_UPDATED_TOPIC.into() 77 | } 78 | 79 | fn encode(&self, validator: &Self::Validator) -> Result { 80 | validator.validate( 81 | uuid::Uuid::new_v4(), 82 | SystemTime::now(), 83 | "user.updated/1.0", 84 | Headers::new(), 85 | &self.0.message, 86 | ) 87 | } 88 | } 89 | 90 | #[derive(Debug, StructOpt)] 91 | struct Args { 92 | /// The name of the pubsub project 93 | #[structopt(long)] 94 | project_name: String, 95 | 96 | /// Load credentials from an authorized user secret, such as the one created when running `gcloud auth 97 | /// application-default login` 98 | #[structopt(long)] 99 | user_account_credentials: Option, 100 | 101 | /// Assume topics already exist and do not create them 102 | #[structopt(long)] 103 | assume_topics_exist: bool, 104 | 105 | /// Do not clean up created topics on exit 106 | #[structopt(long)] 107 | keep_topics: bool, 108 | 109 | /// Assume topics already exist and do not create them 110 | #[structopt(long)] 111 | assume_subscriptions_exist: bool, 112 | 113 | /// Do not clean up created topics on exit 114 | #[structopt(long)] 115 | keep_subscriptions: bool, 116 | } 117 | 118 | #[tokio::main(flavor = "current_thread")] 119 | async fn main() -> Result<(), Box> { 120 | tracing_subscriber::fmt::init(); 121 | 122 | let args = Args::from_args(); 123 | 124 | println!("Building PubSub clients"); 125 | 126 | let auth_flow = if let Some(user) = args.user_account_credentials { 127 | AuthFlow::UserAccount(user) 128 | } else { 129 | AuthFlow::ServiceAccount(ServiceAccountAuth::EnvVar) 130 | }; 131 | 132 | let builder = ClientBuilder::new( 133 | ClientBuilderConfig::new().auth_flow(auth_flow), 134 | PubSubConfig::default(), 135 | ) 136 | .await?; 137 | 138 | let input_topic_name = TopicName::new(USER_CREATED_TOPIC); 139 | let subscription_name = SubscriptionName::new("user-metadata-updaters"); 140 | 141 | let output_topic_name = TopicName::new(USER_UPDATED_TOPIC); 142 | const APP_NAME: &str = "user-metadata-updater"; 143 | 144 | let mut publisher_client = builder 145 | .build_publisher(&args.project_name, APP_NAME) 146 | .await?; 147 | let mut consumer_client = builder.build_consumer(&args.project_name, APP_NAME).await?; 148 | 149 | if !args.assume_topics_exist { 150 | for topic_name in [&input_topic_name, &output_topic_name] { 151 | println!("Creating topic {:?}", topic_name); 152 | 153 | let _ = publisher_client 154 | .create_topic(TopicConfig { 155 | name: topic_name.clone(), 156 | ..TopicConfig::default() 157 | }) 158 | .await; 159 | } 160 | } 161 | 162 | println!("Creating subscription {:?}", &subscription_name); 163 | 164 | if !args.assume_subscriptions_exist { 165 | let _ = consumer_client 166 | .create_subscription(SubscriptionConfig { 167 | topic: input_topic_name.clone(), 168 | name: subscription_name.clone(), 169 | ..SubscriptionConfig::default() 170 | }) 171 | .await; 172 | } 173 | 174 | println!( 175 | "Synthesizing input messages for topic {:?}", 176 | &input_topic_name 177 | ); 178 | 179 | { 180 | let validator = validators::ProstValidator::new(); 181 | let mut input_sink = 182 | Publisher::::publish_sink(publisher_client.publisher(), validator); 183 | 184 | for i in 1..=10 { 185 | let message = UserCreatedMessage { 186 | name: format!("Example Name #{}", i), 187 | }; 188 | 189 | input_sink.feed(message).await?; 190 | } 191 | input_sink.flush().await?; 192 | } 193 | 194 | println!("Ingesting input messages, applying transformations, and publishing to destination"); 195 | 196 | let mut read_stream = consumer_client 197 | .stream_subscription( 198 | subscription_name.clone(), 199 | StreamSubscriptionConfig::default(), 200 | ) 201 | .consume::(hedwig::validators::ProstDecoder::new( 202 | hedwig::validators::prost::ExactSchemaMatcher::new("user.created/1.0"), 203 | )); 204 | 205 | let mut output_sink = Publisher::::publish_sink_with_responses( 206 | publisher_client.publisher(), 207 | validators::ProstValidator::new(), 208 | futures_util::sink::unfold((), |_, message: TransformedMessage| async move { 209 | // if the output is successfully sent, ack the input to mark it as processed 210 | message.0.ack().await.map(|_success| ()) 211 | }), 212 | ); 213 | 214 | for i in 1..=10 { 215 | let PubSubMessage { ack_token, message } = read_stream 216 | .next() 217 | .await 218 | .expect("stream should have 10 elements")?; 219 | 220 | if message.name != format!("Example Name #{}", i) { 221 | println!("Unexpected message received: {:?}", &message.name); 222 | } else { 223 | println!("Received: {:?}", &message.name); 224 | } 225 | 226 | let transformed = TransformedMessage(PubSubMessage { 227 | ack_token, 228 | message: UserUpdatedMessage { 229 | name: message.name, 230 | id: random_id(), 231 | metadata: "some metadata".into(), 232 | }, 233 | }); 234 | 235 | output_sink 236 | .feed(transformed) 237 | .or_else(|publish_error| async move { 238 | // if publishing fails, nack the failed messages to allow later retries 239 | Err(match publish_error { 240 | PublishError::Publish { cause, messages } => { 241 | for failed_transform in messages { 242 | failed_transform.0.nack().await?; 243 | } 244 | Box::::from(cause) 245 | } 246 | err => Box::::from(err), 247 | }) 248 | }) 249 | .await? 250 | } 251 | output_sink.flush().await?; 252 | 253 | println!("All messages matched and published successfully!"); 254 | 255 | println!("Deleting subscription {:?}", &subscription_name); 256 | 257 | if !args.keep_subscriptions { 258 | let _ = consumer_client.delete_subscription(subscription_name).await; 259 | } 260 | 261 | if !args.keep_topics { 262 | for topic_name in [input_topic_name, output_topic_name] { 263 | println!("Deleting topic {:?}", &topic_name); 264 | 265 | let _ = publisher_client.delete_topic(topic_name).await; 266 | } 267 | } 268 | 269 | println!("Done"); 270 | 271 | Ok(()) 272 | } 273 | 274 | fn random_id() -> i64 { 275 | 4 // chosen by fair dice roll. 276 | // guaranteed to be random. 277 | } 278 | -------------------------------------------------------------------------------- /examples/redis.rs: -------------------------------------------------------------------------------- 1 | use futures_util::{SinkExt, StreamExt, TryFutureExt}; 2 | use hedwig::{ 3 | redis::{ClientBuilder, ClientBuilderConfig, Group, GroupName, RedisMessage, StreamName}, 4 | validators, Consumer, DecodableMessage, EncodableMessage, Headers, Publisher, 5 | }; 6 | use std::{error::Error as StdError, time::SystemTime}; 7 | use structopt::StructOpt; 8 | use tracing::warn; 9 | 10 | const USER_CREATED_TOPIC: &str = "user.created"; 11 | const USER_UPDATED_TOPIC: &str = "user.updated"; 12 | 13 | /// The input data, representing some user being created with the given name 14 | #[derive(PartialEq, Eq, prost::Message)] 15 | struct UserCreatedMessage { 16 | #[prost(string, tag = "1")] 17 | name: String, 18 | } 19 | 20 | impl EncodableMessage for UserCreatedMessage { 21 | type Error = validators::ProstValidatorError; 22 | type Validator = validators::ProstValidator; 23 | fn topic(&self) -> hedwig::Topic { 24 | USER_CREATED_TOPIC.into() 25 | } 26 | fn encode(&self, validator: &Self::Validator) -> Result { 27 | validator.validate( 28 | uuid::Uuid::new_v4(), 29 | SystemTime::now(), 30 | "user.created/1.0", 31 | Headers::new(), 32 | self, 33 | ) 34 | } 35 | } 36 | 37 | impl DecodableMessage for UserCreatedMessage { 38 | type Error = validators::ProstDecodeError; 39 | type Decoder = 40 | validators::ProstDecoder>; 41 | 42 | fn decode(msg: hedwig::ValidatedMessage, decoder: &Self::Decoder) -> Result { 43 | decoder.decode(msg) 44 | } 45 | } 46 | 47 | /// The output data, where the given user has now been assigned an ID and some metadata 48 | #[derive(PartialEq, Eq, prost::Message)] 49 | struct UserUpdatedMessage { 50 | #[prost(string, tag = "1")] 51 | name: String, 52 | 53 | #[prost(int64, tag = "2")] 54 | id: i64, 55 | 56 | #[prost(string, tag = "3")] 57 | metadata: String, 58 | } 59 | 60 | /// The output message will carry an ack token from the input message, to ack when the output is 61 | /// successfully published, or nack on failure 62 | #[derive(Debug)] 63 | struct TransformedMessage(RedisMessage); 64 | 65 | impl EncodableMessage for TransformedMessage { 66 | type Error = validators::ProstValidatorError; 67 | type Validator = validators::ProstValidator; 68 | 69 | fn topic(&self) -> hedwig::Topic { 70 | USER_UPDATED_TOPIC.into() 71 | } 72 | 73 | fn encode(&self, validator: &Self::Validator) -> Result { 74 | validator.validate( 75 | uuid::Uuid::new_v4(), 76 | SystemTime::now(), 77 | "user.updated/1.0", 78 | Headers::new(), 79 | &self.0.message, 80 | ) 81 | } 82 | } 83 | 84 | #[derive(Debug, StructOpt)] 85 | struct Args { 86 | #[structopt(long, default_value = "redis://localhost:6379")] 87 | endpoint: String, 88 | } 89 | 90 | #[tokio::main(flavor = "current_thread")] 91 | async fn main() -> Result<(), Box> { 92 | tracing_subscriber::fmt::init(); 93 | 94 | let args = Args::from_args(); 95 | 96 | println!("Building PubSub clients"); 97 | 98 | let config = ClientBuilderConfig { 99 | endpoint: args.endpoint, 100 | }; 101 | 102 | let builder = ClientBuilder::new(config).await?; 103 | 104 | let input_topic_name = StreamName::from_topic(USER_CREATED_TOPIC); 105 | let input_consumer_group = Group::new(GroupName::new(APP_NAME), input_topic_name.clone()); 106 | 107 | const APP_NAME: &str = "user-metadata-updater"; 108 | 109 | let publisher_client = builder.build_publisher(APP_NAME).await?; 110 | let mut consumer_client = builder.build_consumer(APP_NAME).await?; 111 | 112 | let _ = consumer_client 113 | .create_consumer_group(&input_consumer_group) 114 | .await 115 | .inspect_err(|err| { 116 | warn!(err = err.to_string(), "cannot create consumer group"); 117 | }); 118 | 119 | println!( 120 | "Synthesizing input messages for topic {:?}", 121 | &input_topic_name 122 | ); 123 | 124 | { 125 | let validator = validators::ProstValidator::new(); 126 | let mut input_sink = Publisher::::publish_sink( 127 | publisher_client.publisher().await, 128 | validator, 129 | ); 130 | 131 | for i in 1..=10 { 132 | let message = UserCreatedMessage { 133 | name: format!("Example Name #{}", i), 134 | }; 135 | 136 | println!("Sending message {:?}", message.name); 137 | 138 | input_sink.feed(message).await.unwrap(); 139 | } 140 | 141 | input_sink.flush().await.unwrap(); 142 | } 143 | 144 | println!("Ingesting input messages, applying transformations, and publishing to destination"); 145 | 146 | let mut read_stream = consumer_client 147 | .stream_subscription(input_consumer_group.clone()) 148 | .await 149 | .consume::(hedwig::validators::ProstDecoder::new( 150 | hedwig::validators::prost::ExactSchemaMatcher::new("user.created/1.0"), 151 | )); 152 | 153 | let mut output_sink = Publisher::::publish_sink_with_responses( 154 | publisher_client.publisher().await, 155 | validators::ProstValidator::new(), 156 | futures_util::sink::unfold((), |_, message: TransformedMessage| async move { 157 | // if the output is successfully sent, ack the input to mark it as processed 158 | message.0.ack().await.map(|_success| ()) 159 | }), 160 | ); 161 | 162 | for i in 1..=10 { 163 | let RedisMessage { ack_token, message } = read_stream 164 | .next() 165 | .await 166 | .expect("stream should have 10 elements")?; 167 | 168 | if message.name != format!("Example Name #{}", i) { 169 | println!("Unexpected message received: {:?}", &message.name); 170 | } else { 171 | println!("Received: {:?}", &message.name); 172 | } 173 | 174 | let transformed = TransformedMessage(RedisMessage { 175 | ack_token, 176 | message: UserUpdatedMessage { 177 | name: message.name, 178 | id: i, 179 | metadata: "some metadata".into(), 180 | }, 181 | }); 182 | 183 | let _ = output_sink 184 | .feed(transformed) 185 | .inspect_err(|publish_error| { 186 | println!("Error: {:?}", publish_error); 187 | }) 188 | .or_else(|publish_error| async move { 189 | // if publishing fails, nack the failed messages to allow later retries 190 | Err(match publish_error { 191 | hedwig::redis::PublishError::Publish { cause: _, messages } => { 192 | for failed_transform in messages { 193 | failed_transform.0.nack().await?; 194 | } 195 | Box::::from("Cannot publish message") 196 | } 197 | err => Box::::from(err), 198 | }) 199 | }) 200 | .await; 201 | } 202 | 203 | if (output_sink.flush().await).is_err() { 204 | panic!() 205 | } 206 | 207 | println!("All messages matched and published successfully!"); 208 | 209 | println!("Done"); 210 | 211 | Ok(()) 212 | } 213 | -------------------------------------------------------------------------------- /hedwig_core/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "hedwig_core" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | bytes = "1" 8 | smallstr = { version = "0.3.0", features = ["union"] } 9 | uuid = { version = "1.6", features = ["v4"], default-features = false } 10 | -------------------------------------------------------------------------------- /hedwig_core/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! The core set of traits and types used in the hedwig format. 2 | //! 3 | //! This crate aims to provide better version stability over the primary batteries-included 4 | //! `hedwig` crate. Top-level applications should typically use `hedwig`, while crates that define 5 | //! message types should use `hedwig_core` 6 | 7 | mod topic; 8 | pub use topic::Topic; 9 | pub mod message; 10 | 11 | /// Custom headers associated with a message. 12 | pub type Headers = std::collections::BTreeMap; 13 | 14 | /// A validated message. 15 | pub type ValidatedMessage = message::ValidatedMessage; 16 | 17 | -------------------------------------------------------------------------------- /hedwig_core/src/message.rs: -------------------------------------------------------------------------------- 1 | //! Message related types 2 | 3 | use bytes::Bytes; 4 | use std::{borrow::Cow, time::SystemTime}; 5 | use uuid::Uuid; 6 | 7 | use crate::{Headers, Topic}; 8 | 9 | /// A validated message. 10 | /// 11 | /// These are created by validators after encoding a user message, or when pulling messages from 12 | /// the message service. 13 | #[derive(Debug, Clone)] 14 | // derive Eq only in tests so that users can't foot-shoot an expensive == over data 15 | #[cfg_attr(test, derive(PartialEq, Eq))] 16 | pub struct ValidatedMessage { 17 | /// Unique message identifier. 18 | pub(crate) id: Uuid, 19 | /// The timestamp when message was created in the publishing service. 20 | pub(crate) timestamp: SystemTime, 21 | /// URI of the schema validating this message. 22 | /// 23 | /// E.g. `https://hedwig.domain.xyz/schemas#/schemas/user.created/1.0` 24 | pub(crate) schema: Cow<'static, str>, 25 | /// Custom message headers. 26 | /// 27 | /// This may be used to track request_id, for example. 28 | pub(crate) headers: Headers, 29 | /// The message data. 30 | pub(crate) data: M, 31 | } 32 | 33 | impl ValidatedMessage { 34 | /// Create a new validated message 35 | pub fn new(id: Uuid, timestamp: SystemTime, schema: S, headers: Headers, data: D) -> Self 36 | where 37 | S: Into>, 38 | D: Into, 39 | { 40 | Self { 41 | id, 42 | timestamp, 43 | schema: schema.into(), 44 | headers, 45 | data: data.into(), 46 | } 47 | } 48 | } 49 | 50 | impl ValidatedMessage { 51 | /// Unique message identifier. 52 | pub fn uuid(&self) -> &Uuid { 53 | &self.id 54 | } 55 | 56 | /// The timestamp when message was created in the publishing service. 57 | pub fn timestamp(&self) -> &SystemTime { 58 | &self.timestamp 59 | } 60 | 61 | /// URI of the schema validating this message. 62 | /// 63 | /// E.g. `https://hedwig.domain.xyz/schemas#/schemas/user.created/1.0` 64 | pub fn schema(&self) -> &str { 65 | &self.schema 66 | } 67 | 68 | /// Custom message headers. 69 | /// 70 | /// This may be used to track request_id, for example. 71 | pub fn headers(&self) -> &Headers { 72 | &self.headers 73 | } 74 | 75 | /// Mutable access to the message headers 76 | pub fn headers_mut(&mut self) -> &mut Headers { 77 | &mut self.headers 78 | } 79 | 80 | /// The message data. 81 | pub fn data(&self) -> &M { 82 | &self.data 83 | } 84 | 85 | /// Destructure this message into just the contained data 86 | pub fn into_data(self) -> M { 87 | self.data 88 | } 89 | } 90 | 91 | /// Messages which can be decoded from a [`ValidatedMessage`] stream. 92 | pub trait DecodableMessage { 93 | /// The error returned when a message fails to decode 94 | type Error; 95 | 96 | /// The decoder used to decode a validated message 97 | type Decoder; 98 | 99 | /// Decode the given message, using the given decoder, into its structured type 100 | fn decode(msg: ValidatedMessage, decoder: &Self::Decoder) -> Result 101 | where 102 | Self: Sized; 103 | } 104 | 105 | impl DecodableMessage for ValidatedMessage 106 | where 107 | M: DecodableMessage, 108 | { 109 | /// The error returned when a message fails to decode 110 | type Error = M::Error; 111 | 112 | /// The decoder used to decode a validated message 113 | type Decoder = M::Decoder; 114 | 115 | /// Decode the given message, using the given decoder, into its structured type 116 | fn decode(msg: ValidatedMessage, decoder: &Self::Decoder) -> Result 117 | where 118 | Self: Sized, 119 | { 120 | let message = M::decode(msg.clone(), decoder)?; 121 | Ok(Self { 122 | id: msg.id, 123 | timestamp: msg.timestamp, 124 | schema: msg.schema, 125 | headers: msg.headers, 126 | data: message, 127 | }) 128 | } 129 | } 130 | 131 | /// Types that can be encoded and published. 132 | pub trait EncodableMessage { 133 | /// The errors that can occur when calling the [`EncodableMessage::encode`] method. 134 | /// 135 | /// Will typically match the errors returned by the [`EncodableMessage::Validator`]. 136 | type Error; 137 | 138 | /// The validator to use for this message. 139 | type Validator; 140 | 141 | /// Topic into which this message shall be published. 142 | fn topic(&self) -> Topic; 143 | 144 | /// Encode the message payload. 145 | fn encode(&self, validator: &Self::Validator) -> Result, Self::Error>; 146 | } 147 | -------------------------------------------------------------------------------- /hedwig_core/src/topic.rs: -------------------------------------------------------------------------------- 1 | use smallstr::SmallString; 2 | 3 | /// A message queue topic name to which messages can be published 4 | // A survey of common topics found lengths between 16 and 35 bytes 5 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] 6 | pub struct Topic(SmallString<[u8; 36]>); 7 | 8 | impl Default for Topic { 9 | fn default() -> Self { 10 | Topic(SmallString::new()) 11 | } 12 | } 13 | 14 | impl std::fmt::Display for Topic { 15 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 16 | std::fmt::Display::fmt(self.0.as_str(), f) 17 | } 18 | } 19 | 20 | impl<'a> From<&'a str> for Topic { 21 | fn from(s: &'a str) -> Topic { 22 | Topic(s.into()) 23 | } 24 | } 25 | 26 | impl From for Topic { 27 | fn from(s: String) -> Topic { 28 | Topic(s.into()) 29 | } 30 | } 31 | 32 | impl AsRef for Topic { 33 | fn as_ref(&self) -> &str { 34 | self.0.as_ref() 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/backends/googlepubsub/consumer.rs: -------------------------------------------------------------------------------- 1 | //! A [`Consumer`](crate::Consumer) implementation for Google's [PubSub][0] service 2 | //! 3 | //! [0]: https://cloud.google.com/pubsub/ 4 | 5 | use crate::{Headers, ValidatedMessage}; 6 | use async_trait::async_trait; 7 | use futures_util::stream; 8 | use pin_project::pin_project; 9 | use std::{ 10 | borrow::Cow, 11 | fmt::Display, 12 | ops::Bound, 13 | pin::Pin, 14 | str::FromStr, 15 | task::{Context, Poll}, 16 | time::{Duration, SystemTime}, 17 | }; 18 | use tracing::debug; 19 | use uuid::Uuid; 20 | use ya_gcp::{ 21 | grpc::{Body, BoxBody, Bytes, DefaultGrpcImpl, GrpcService, StdError}, 22 | pubsub, 23 | }; 24 | 25 | use super::{ 26 | retry_policy, AcknowledgeError, BoxError, ModifyAcknowledgeError, PubSubError, 27 | StreamSubscriptionConfig, TopicName, 28 | }; 29 | 30 | /// A PubSub subscription name. 31 | /// 32 | /// This will be used to internally construct the expected 33 | /// `projects/{project}/subscriptions/hedwig-{queue}-{subscription_name}` format for API calls 34 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] 35 | pub struct SubscriptionName<'s>(Cow<'s, str>); 36 | 37 | impl<'s> SubscriptionName<'s> { 38 | /// Create a new `SubscriptionName` 39 | pub fn new(subscription: impl Into>) -> Self { 40 | Self(subscription.into()) 41 | } 42 | 43 | /// Create a new `SubscriptionName` for a cross-project subscription, i.e. a subscription that subscribes to a topic 44 | /// from another project. 45 | pub fn with_cross_project( 46 | project: impl Into>, 47 | subscription: impl Into>, 48 | ) -> Self { 49 | // the cross-project is effectively part of a compound subscription name 50 | Self(format!("{}-{}", project.into(), subscription.into()).into()) 51 | } 52 | 53 | /// Construct a full project and subscription name with this name 54 | fn into_project_subscription_name( 55 | self, 56 | project_name: impl Display, 57 | queue_name: impl Display, 58 | ) -> pubsub::ProjectSubscriptionName { 59 | pubsub::ProjectSubscriptionName::new( 60 | project_name, 61 | std::format_args!( 62 | "hedwig-{queue}-{subscription}", 63 | queue = queue_name, 64 | subscription = self.0 65 | ), 66 | ) 67 | } 68 | } 69 | 70 | /// A client through which PubSub consuming operations can be performed. 71 | /// 72 | /// This includes managing subscriptions and reading data from subscriptions. Created using 73 | /// [`build_consumer`](super::ClientBuilder::build_consumer) 74 | #[derive(Debug, Clone)] 75 | pub struct ConsumerClient { 76 | client: pubsub::SubscriberClient, 77 | project: String, 78 | queue: String, 79 | } 80 | 81 | impl ConsumerClient { 82 | /// Create a new consumer from an existing pubsub client. 83 | /// 84 | /// This function is useful for client customization; most callers should typically use the 85 | /// defaults provided by [`build_consumer`](super::ClientBuilder::build_consumer) 86 | pub fn from_client( 87 | client: pubsub::SubscriberClient, 88 | project: String, 89 | queue: String, 90 | ) -> Self { 91 | ConsumerClient { 92 | client, 93 | project, 94 | queue, 95 | } 96 | } 97 | 98 | fn project(&self) -> &str { 99 | &self.project 100 | } 101 | 102 | fn queue(&self) -> &str { 103 | &self.queue 104 | } 105 | 106 | /// Construct a fully formatted project and subscription name for the given subscription 107 | pub fn format_subscription( 108 | &self, 109 | subscription: SubscriptionName<'_>, 110 | ) -> pubsub::ProjectSubscriptionName { 111 | subscription.into_project_subscription_name(self.project(), self.queue()) 112 | } 113 | 114 | /// Construct a fully formatted project and topic name for the given topic 115 | pub fn format_topic(&self, topic: TopicName<'_>) -> pubsub::ProjectTopicName { 116 | topic.into_project_topic_name(self.project()) 117 | } 118 | 119 | /// Get a reference to the underlying pubsub client 120 | pub fn inner(&self) -> &pubsub::SubscriberClient { 121 | &self.client 122 | } 123 | 124 | /// Get a mutable reference to the underlying pubsub client 125 | pub fn inner_mut(&mut self) -> &mut pubsub::SubscriberClient { 126 | &mut self.client 127 | } 128 | } 129 | 130 | impl ConsumerClient 131 | where 132 | S: GrpcService, 133 | S::Error: Into, 134 | S::ResponseBody: Body + Send + 'static, 135 | ::Error: Into + Send, 136 | { 137 | /// Create a new PubSub subscription 138 | /// 139 | /// See the GCP documentation on subscriptions [here](https://cloud.google.com/pubsub/docs/subscriber) 140 | pub async fn create_subscription( 141 | &mut self, 142 | config: SubscriptionConfig<'_>, 143 | ) -> Result<(), PubSubError> { 144 | let subscription = SubscriptionConfig::into_subscription(config, &*self); 145 | 146 | self.client 147 | .raw_api_mut() 148 | .create_subscription(subscription) 149 | .await?; 150 | 151 | Ok(()) 152 | } 153 | 154 | /// Delete an existing PubSub subscription. 155 | /// 156 | /// See the GCP documentation on subscriptions [here](https://cloud.google.com/pubsub/docs/subscriber) 157 | pub async fn delete_subscription( 158 | &mut self, 159 | subscription: SubscriptionName<'_>, 160 | ) -> Result<(), PubSubError> { 161 | let subscription = self.format_subscription(subscription).into(); 162 | 163 | self.client 164 | .raw_api_mut() 165 | .delete_subscription({ 166 | let mut r = pubsub::api::DeleteSubscriptionRequest::default(); 167 | r.subscription = subscription; 168 | r 169 | }) 170 | .await?; 171 | 172 | Ok(()) 173 | } 174 | 175 | /// Connect to PubSub and start streaming messages from the given subscription 176 | pub fn stream_subscription( 177 | &mut self, 178 | subscription: SubscriptionName<'_>, 179 | stream_config: StreamSubscriptionConfig, 180 | ) -> PubSubStream 181 | where 182 | S: Clone, 183 | { 184 | let subscription = self.format_subscription(subscription); 185 | 186 | PubSubStream(self.client.stream_subscription(subscription, stream_config)) 187 | } 188 | 189 | /// Seeks messages from the given timestamp. 190 | /// It marks as acknowledged all the messages prior to the timestamp, and as 191 | /// not acknowledged the messages after the timestamp. 192 | pub async fn seek( 193 | &mut self, 194 | subscription: SubscriptionName<'_>, 195 | timestamp: pubsub::api::Timestamp, 196 | ) -> Result<(), PubSubError> { 197 | let request = { 198 | let mut r = pubsub::api::SeekRequest::default(); 199 | r.subscription = self.format_subscription(subscription).into(); 200 | r.target = Some(pubsub::api::seek_request::Target::Time(timestamp)); 201 | r 202 | }; 203 | self.client.raw_api_mut().seek(request).await?; 204 | Ok(()) 205 | } 206 | 207 | // TODO list_subscriptions (paginated, nontrivial) 208 | // TODO update_subscriptions (field mask necessary?) 209 | // TODO get_subscription (impl From for SubscriptionConfig) 210 | // TODO snapshots? 211 | } 212 | 213 | match_fields! { 214 | pubsub::api::Subscription => 215 | 216 | /// Configuration describing a PubSub subscription. 217 | // TODO incorporate standard_config 218 | #[derive(Debug, Clone)] 219 | pub struct SubscriptionConfig<'s> { 220 | pub name: SubscriptionName<'s>, 221 | pub topic: TopicName<'s>, 222 | pub ack_deadline_seconds: u16, 223 | pub retain_acked_messages: bool, 224 | pub message_retention_duration: Option, 225 | pub labels: std::collections::HashMap, 226 | pub enable_message_ordering: bool, 227 | pub expiration_policy: Option, 228 | pub filter: String, 229 | pub dead_letter_policy: Option, 230 | pub retry_policy: Option, 231 | 232 | @except: 233 | push_config, 234 | detached, 235 | topic_message_retention_duration, 236 | bigquery_config, 237 | cloud_storage_config, 238 | enable_exactly_once_delivery, 239 | // FIXME check state 240 | } 241 | } 242 | 243 | impl SubscriptionConfig<'_> { 244 | fn into_subscription(self, client: &ConsumerClient) -> pubsub::api::Subscription { 245 | let mut sub = pubsub::api::Subscription::default(); 246 | 247 | sub.name = client.format_subscription(self.name).into(); 248 | sub.topic = client.format_topic(self.topic).into(); 249 | sub.ack_deadline_seconds = self.ack_deadline_seconds.into(); 250 | sub.retain_acked_messages = self.retain_acked_messages; 251 | sub.message_retention_duration = self.message_retention_duration; 252 | sub.labels = self.labels; 253 | sub.enable_message_ordering = self.enable_message_ordering; 254 | sub.expiration_policy = self.expiration_policy; 255 | sub.filter = self.filter; 256 | sub.dead_letter_policy = self.dead_letter_policy; 257 | sub.retry_policy = self.retry_policy; 258 | sub.push_config = None; // push delivery isn't used, it's streaming pull 259 | sub.detached = false; // set by the server on gets/listing 260 | sub.topic_message_retention_duration = None; // Output only, set by the server 261 | 262 | sub 263 | } 264 | } 265 | 266 | // TODO replace with a builder? 267 | impl Default for SubscriptionConfig<'_> { 268 | fn default() -> Self { 269 | Self { 270 | name: SubscriptionName::new(String::new()), 271 | topic: TopicName::new(String::new()), 272 | ack_deadline_seconds: 0, 273 | retain_acked_messages: false, 274 | message_retention_duration: None, 275 | labels: std::collections::HashMap::default(), 276 | enable_message_ordering: false, 277 | expiration_policy: None, 278 | filter: "".into(), 279 | dead_letter_policy: None, 280 | retry_policy: None, 281 | } 282 | } 283 | } 284 | 285 | // TODO match_fields! on ExpirationPolicy, DeadLetterPolicy, RetryPolicy 286 | 287 | /// A message received from PubSub. 288 | /// 289 | /// This includes the message itself, and an [`AcknowledgeToken`](crate::AcknowledgeToken) used to 290 | /// inform the message service when this message has been processed. 291 | #[cfg_attr(docsrs, doc(cfg(feature = "google")))] 292 | pub type PubSubMessage = crate::consumer::AcknowledgeableMessage; 293 | 294 | /// Errors encountered while streaming messages from PubSub 295 | #[derive(Debug, thiserror::Error)] 296 | #[cfg_attr(docsrs, doc(cfg(feature = "google")))] 297 | pub enum PubSubStreamError { 298 | /// An error from the underlying stream 299 | #[error(transparent)] 300 | Stream(#[from] PubSubError), 301 | 302 | /// An error from a missing hedwig attribute 303 | #[error("missing expected attribute: {key}")] 304 | MissingAttribute { 305 | /// the missing attribute 306 | key: &'static str, 307 | }, 308 | 309 | /// An error from a hedwig attribute with an invalid value 310 | #[error("invalid attribute value for {key}: {invalid_value}")] 311 | InvalidAttribute { 312 | /// the invalid attribute 313 | key: &'static str, 314 | /// the invalid value 315 | invalid_value: String, 316 | /// the error describing the invalidity 317 | #[source] 318 | source: BoxError, 319 | }, 320 | } 321 | 322 | #[async_trait] 323 | impl crate::consumer::AcknowledgeToken for pubsub::AcknowledgeToken { 324 | type AckError = AcknowledgeError; 325 | type ModifyError = ModifyAcknowledgeError; 326 | type NackError = AcknowledgeError; 327 | 328 | async fn ack(self) -> Result<(), Self::AckError> { 329 | self.ack().await 330 | } 331 | 332 | async fn nack(self) -> Result<(), Self::NackError> { 333 | self.nack().await 334 | } 335 | 336 | async fn modify_deadline(&mut self, seconds: u32) -> Result<(), Self::ModifyError> { 337 | self.modify_deadline(seconds).await 338 | } 339 | } 340 | 341 | /// A stream of messages from a subscription in PubSub. 342 | /// 343 | /// Created by [`ConsumerClient::stream_subscription`] 344 | #[pin_project] 345 | #[cfg_attr(docsrs, doc(cfg(feature = "google")))] 346 | pub struct PubSubStream< 347 | S = DefaultGrpcImpl, 348 | R = retry_policy::ExponentialBackoff, 349 | >(#[pin] pubsub::StreamSubscription); 350 | 351 | impl PubSubStream { 352 | /// Set the [`RetryPolicy`](retry_policy::RetryPolicy) to use for this streaming subscription. 353 | /// 354 | /// The stream will be reconnected if the policy indicates that an encountered error should be 355 | /// retried 356 | // Because `poll_next` requires `Pin<&mut Self>`, this function cannot be called after the 357 | // stream has started because it moves `self`. That means that the retry policy can only be 358 | // changed before the polling starts, and is fixed from that point on 359 | pub fn with_retry_policy(self, retry_policy: R) -> PubSubStream 360 | where 361 | R: retry_policy::RetryPolicy<(), PubSubError>, 362 | { 363 | PubSubStream(self.0.with_retry_policy(retry_policy)) 364 | } 365 | } 366 | 367 | impl stream::Stream for PubSubStream 368 | where 369 | S: GrpcService + Send + 'static, 370 | S::Future: Send + 'static, 371 | S::Error: Into, 372 | S::ResponseBody: Body + Send + 'static, 373 | ::Error: Into + Send, 374 | R: retry_policy::RetryPolicy<(), PubSubError> + Send + 'static, 375 | R::RetryOp: Send + 'static, 376 | >::Sleep: Send + 'static, 377 | { 378 | type Item = Result, PubSubStreamError>; 379 | 380 | fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 381 | self.project().0.poll_next(cx).map(|opt| { 382 | opt.map(|res| { 383 | let (ack_token, message) = res?; 384 | Ok(PubSubMessage { 385 | ack_token, 386 | message: pubsub_to_hedwig(message)?, 387 | }) 388 | }) 389 | }) 390 | } 391 | } 392 | 393 | impl crate::consumer::Consumer for PubSubStream 394 | where 395 | S: GrpcService + Send + 'static, 396 | S::Future: Send + 'static, 397 | S::Error: Into, 398 | S::ResponseBody: Body + Send + 'static, 399 | ::Error: Into + Send, 400 | R: retry_policy::RetryPolicy<(), PubSubError> + Send + 'static, 401 | R::RetryOp: Send + 'static, 402 | >::Sleep: Send + 'static, 403 | { 404 | type AckToken = pubsub::AcknowledgeToken; 405 | type Error = PubSubStreamError; 406 | type Stream = PubSubStream; 407 | 408 | fn stream(self) -> Self::Stream { 409 | self 410 | } 411 | } 412 | 413 | /// The namespace of all the hedwig-internal attributes applied to messages 414 | // the backtick '`' is one greater than the underscore '_' in ascii, which makes it the next 415 | // greatest for Ord. Having this as the excluded upper bound makes the range contain every 416 | // string prefixed by "hedwig_". 417 | // 418 | // This uses explicit Bounds instead of the Range syntax because impl RangeBounds for Range<&T> 419 | // requires T: Sized for some reason 420 | const HEDWIG_NAME_RANGE: (Bound<&str>, Bound<&str>) = 421 | (Bound::Included("hedwig_"), Bound::Excluded("hedwig`")); 422 | 423 | /// convert a pubsub message into a hedwig message 424 | fn pubsub_to_hedwig( 425 | msg: pubsub::api::PubsubMessage, 426 | ) -> Result { 427 | let mut headers = msg.attributes; 428 | 429 | // extract the hedwig attributes from the attribute map. 430 | // any remaining attributes were ones inserted by the user 431 | fn take_attr( 432 | map: &mut Headers, 433 | key: &'static str, 434 | parse: F, 435 | ) -> Result 436 | where 437 | F: FnOnce(String) -> Result, 438 | { 439 | let value = map 440 | .remove(key) 441 | .ok_or(PubSubStreamError::MissingAttribute { key })?; 442 | 443 | parse(value).map_err( 444 | |(invalid_value, source)| PubSubStreamError::InvalidAttribute { 445 | key, 446 | invalid_value, 447 | source, 448 | }, 449 | ) 450 | } 451 | 452 | let id = take_attr(&mut headers, crate::HEDWIG_ID, |string| { 453 | Uuid::from_str(&string).map_err(|e| (string, BoxError::from(e))) 454 | })?; 455 | 456 | let timestamp = take_attr(&mut headers, crate::HEDWIG_MESSAGE_TIMESTAMP, |string| { 457 | // match instead of map_err to keep ownership of string 458 | let millis_since_epoch = match u64::from_str(&string) { 459 | Err(err) => return Err((string, BoxError::from(err))), 460 | Ok(t) => t, 461 | }; 462 | SystemTime::UNIX_EPOCH 463 | .checked_add(Duration::from_millis(millis_since_epoch)) 464 | .ok_or_else(|| { 465 | ( 466 | string, 467 | BoxError::from(format!( 468 | "time stamp {} is too large for SystemTime", 469 | millis_since_epoch 470 | )), 471 | ) 472 | }) 473 | })?; 474 | let schema = take_attr(&mut headers, crate::HEDWIG_SCHEMA, Ok::)?; 475 | 476 | // these attributes we don't actually use, but we check for their existence as defensive 477 | // validation, and remove them so that the user doesn't see them among the headers 478 | take_attr(&mut headers, crate::HEDWIG_PUBLISHER, |_| Ok(()))?; 479 | take_attr(&mut headers, crate::HEDWIG_FORMAT_VERSION, |_| Ok(()))?; 480 | 481 | // for forwards compatibility with future hedwig formats, remove any other "hedwig_*" 482 | // attributes that might exist, so that the user doesn't witness them. 483 | headers 484 | .range::(HEDWIG_NAME_RANGE) 485 | .map(|(k, _v)| k.clone()) // clone b/c there isn't a remove_range, and we can't borrow + remove 486 | .collect::>() 487 | .into_iter() 488 | .for_each(|k| { 489 | debug!(message = "removing unknown hedwig attribute", key = &k[..]); 490 | headers.remove(&k); 491 | }); 492 | 493 | Ok(ValidatedMessage::new( 494 | id, timestamp, schema, headers, msg.data, 495 | )) 496 | } 497 | 498 | #[cfg(test)] 499 | mod test { 500 | use super::*; 501 | use crate::{ 502 | HEDWIG_FORMAT_VERSION, HEDWIG_ID, HEDWIG_MESSAGE_TIMESTAMP, HEDWIG_PUBLISHER, HEDWIG_SCHEMA, 503 | }; 504 | use pubsub::api::PubsubMessage; 505 | use std::collections::BTreeMap; 506 | 507 | #[derive(Debug, Clone)] 508 | struct EqValidatedMessage(ValidatedMessage); 509 | 510 | impl std::ops::Deref for EqValidatedMessage { 511 | type Target = ValidatedMessage; 512 | 513 | fn deref(&self) -> &Self::Target { 514 | &self.0 515 | } 516 | } 517 | 518 | impl PartialEq for EqValidatedMessage { 519 | fn eq(&self, other: &ValidatedMessage) -> bool { 520 | self.uuid() == other.uuid() 521 | && self.timestamp() == other.timestamp() 522 | && self.schema() == other.schema() 523 | && self.headers() == other.headers() 524 | && self.data() == other.data() 525 | } 526 | } 527 | 528 | macro_rules! string_btree { 529 | ($($key:expr => $val:expr),* $(,)?) => { 530 | { 531 | #[allow(unused_mut)] 532 | let mut map = BTreeMap::new(); 533 | $( 534 | map.insert(($key).to_string(), ($val).to_string()); 535 | )* 536 | map 537 | } 538 | } 539 | } 540 | 541 | /// Check that the data in headers is deserialized appropriately 542 | #[test] 543 | fn headers_parsed() { 544 | let user_attrs = string_btree! { 545 | "aaa" => "aaa_value", 546 | "zzz" => "zzz_value", 547 | "some_longer_string" => "the value for the longer string", 548 | }; 549 | 550 | let hedwig_attrs = string_btree! { 551 | HEDWIG_ID => Uuid::nil(), 552 | HEDWIG_MESSAGE_TIMESTAMP => 1000, 553 | HEDWIG_SCHEMA => "my-test-schema", 554 | HEDWIG_PUBLISHER => "my-test-publisher", 555 | HEDWIG_FORMAT_VERSION => "1", 556 | }; 557 | 558 | let data = "foobar"; 559 | 560 | let mut attributes = user_attrs.clone(); 561 | attributes.extend(hedwig_attrs); 562 | 563 | let message = { 564 | let mut m = PubsubMessage::default(); 565 | m.data = data.into(); 566 | m.attributes = attributes; 567 | m.message_id = String::from("some_unique_id"); 568 | m.publish_time = Some(pubsub::api::Timestamp { 569 | seconds: 15, 570 | nanos: 42, 571 | }); 572 | m.ordering_key = String::new(); 573 | m 574 | }; 575 | 576 | let validated_message = pubsub_to_hedwig(message).unwrap(); 577 | 578 | assert_eq!( 579 | EqValidatedMessage(ValidatedMessage::new( 580 | Uuid::nil(), 581 | SystemTime::UNIX_EPOCH + Duration::from_millis(1000), 582 | "my-test-schema", 583 | user_attrs, 584 | data 585 | )), 586 | validated_message 587 | ); 588 | } 589 | 590 | /// Check that parsing headers fails if a hedwig attribute is missing 591 | #[test] 592 | fn headers_error_on_missing() { 593 | let full_hedwig_attrs = string_btree! { 594 | HEDWIG_ID => Uuid::nil(), 595 | HEDWIG_MESSAGE_TIMESTAMP => 1000, 596 | HEDWIG_SCHEMA => "my-test-schema", 597 | HEDWIG_PUBLISHER => "my-test-publisher", 598 | HEDWIG_FORMAT_VERSION => "1", 599 | }; 600 | 601 | for &missing_header in [ 602 | HEDWIG_ID, 603 | HEDWIG_MESSAGE_TIMESTAMP, 604 | HEDWIG_SCHEMA, 605 | HEDWIG_PUBLISHER, 606 | HEDWIG_FORMAT_VERSION, 607 | ] 608 | .iter() 609 | { 610 | let mut attributes = full_hedwig_attrs.clone(); 611 | attributes.remove(missing_header); 612 | 613 | let res = pubsub_to_hedwig({ 614 | let mut m = PubsubMessage::default(); 615 | m.attributes = attributes; 616 | m 617 | }); 618 | 619 | match res { 620 | Err(PubSubStreamError::MissingAttribute { key }) => assert_eq!(key, missing_header), 621 | _ => panic!( 622 | "result did not fail on missing attribute {}: {:?}", 623 | missing_header, res 624 | ), 625 | } 626 | } 627 | } 628 | 629 | /// Check that unknown hedwig headers are removed from the user-visible message, under the 630 | /// assumption that they are from some hedwig format change 631 | #[test] 632 | fn forward_compat_headers_removed() { 633 | let hedwig_attrs = string_btree! { 634 | HEDWIG_ID => Uuid::nil(), 635 | HEDWIG_MESSAGE_TIMESTAMP => 1000, 636 | HEDWIG_SCHEMA => "my-test-schema", 637 | HEDWIG_PUBLISHER => "my-test-publisher", 638 | HEDWIG_FORMAT_VERSION => "1", 639 | "hedwig_some_new_flag" => "boom!", 640 | "hedwig_another_change_from_the_future" => "kablam!", 641 | }; 642 | 643 | let user_attrs = string_btree! { 644 | "abc" => "123", 645 | "foo" => "bar", 646 | "aaaaaaaaaaaaaaaaaaaaaaaaa" => "bbbbbbbbbbbbbbbbbbbb", 647 | // hedwig attributes are restricted to the "hedwig_" prefix by producers. It should 648 | // then be valid for a user to have the word "hedwig" prefixed for their own keys 649 | "hedwig-key-but-with-hyphens" => "assumes the restricted format always uses underscores", 650 | "hedwigAsAPrefixToSomeString" => "camelCase", 651 | }; 652 | 653 | let mut attributes = user_attrs.clone(); 654 | attributes.extend(hedwig_attrs); 655 | 656 | let validated_message = pubsub_to_hedwig({ 657 | let mut m = PubsubMessage::default(); 658 | m.attributes = attributes; 659 | m 660 | }) 661 | .unwrap(); 662 | 663 | assert_eq!(&user_attrs, validated_message.headers()); 664 | } 665 | 666 | #[test] 667 | fn project_subscription_name() { 668 | let subscription_name = 669 | SubscriptionName::with_cross_project("other_project", "my_subscription"); 670 | 671 | assert_eq!( 672 | String::from( 673 | subscription_name.into_project_subscription_name("my_project", "some_queue") 674 | ), 675 | "projects/my_project/subscriptions/hedwig-some_queue-other_project-my_subscription" 676 | ); 677 | } 678 | } 679 | -------------------------------------------------------------------------------- /src/backends/googlepubsub/mod.rs: -------------------------------------------------------------------------------- 1 | //! Adapters for using GCP's PubSub as a message service for hedwig 2 | 3 | #![macro_use] 4 | 5 | use std::{borrow::Cow, fmt::Display}; 6 | 7 | pub use ya_gcp::{ 8 | self as gcp, 9 | grpc::StatusCodeSet, 10 | pubsub::{ 11 | AcknowledgeError, AcknowledgeToken, BuildError, Error as PubSubError, 12 | ModifyAcknowledgeError, PubSubConfig, PubSubRetryCheck, SinkError, 13 | StreamSubscriptionConfig, Uri, 14 | }, 15 | retry_policy, AuthFlow, ClientBuilderConfig, CreateBuilderError, ServiceAccountAuth, 16 | }; 17 | 18 | type BoxError = Box; 19 | 20 | /// Create a new struct with the same fields as another struct, with the annotated exceptions 21 | /// 22 | /// This is used to create a narrowed-down API type, with irrelevant fields removed and other fields 23 | /// replaced with richer types. 24 | macro_rules! match_fields { 25 | ( 26 | $target:path => 27 | 28 | $(#[$struct_attr:meta])* 29 | pub struct $struct_name:ident $(<$struct_generics:tt>)? { 30 | $( 31 | $(#[$field_attr:meta])* 32 | pub $field_name:ident : $field_type:ty, 33 | )*$(,)? 34 | 35 | // fields which exist in the target but not in the struct. 36 | // used to ensure names are listed exhaustively 37 | @except: 38 | $( 39 | $target_except_field:ident, 40 | )*$(,)? 41 | } 42 | ) => { 43 | $(#[$struct_attr])* 44 | // nested cfg_attr prevents older compilers from parsing the new doc = EXPR syntax 45 | #[cfg_attr(docsrs, cfg_attr(docsrs, 46 | doc = "", // newline 47 | doc = concat!("This is a more ergonomic wrapper over [`", stringify!($target), "`]") 48 | ))] 49 | #[cfg_attr(not(docsrs), allow(missing_docs))] 50 | pub struct $struct_name $(<$struct_generics>)? { 51 | $( 52 | #[cfg_attr(docsrs, cfg_attr(docsrs, doc = concat!( 53 | "See [`", stringify!($field_name), "`]", 54 | "(", stringify!($target), "::", stringify!($field_name), ")" 55 | )))] 56 | $(#[$field_attr])* 57 | pub $field_name : $field_type, 58 | )* 59 | } 60 | 61 | impl$(<$struct_generics>)? $struct_name $(<$struct_generics>)? { 62 | const _MATCH_CHECK: () = { 63 | match None { 64 | Some($target { 65 | $( 66 | $field_name: _, 67 | )* 68 | $( 69 | $target_except_field: _, 70 | )* 71 | .. 72 | }) => {}, 73 | None => {} 74 | }; 75 | }; 76 | } 77 | }; 78 | } 79 | 80 | mod consumer; 81 | mod publisher; 82 | 83 | pub use consumer::*; 84 | pub use publisher::*; 85 | 86 | /// A PubSub topic name. 87 | /// 88 | /// This will be used to internally construct the expected 89 | /// `projects/{project}/topics/hedwig-{topic}` format for API calls 90 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] 91 | pub struct TopicName<'s>(Cow<'s, str>); 92 | 93 | impl<'s> TopicName<'s> { 94 | /// Create a new `TopicName` 95 | pub fn new(name: impl Into>) -> Self { 96 | Self(name.into()) 97 | } 98 | 99 | /// Construct a full project and topic name with this name 100 | fn into_project_topic_name( 101 | self, 102 | project_name: impl Display, 103 | ) -> ya_gcp::pubsub::ProjectTopicName { 104 | ya_gcp::pubsub::ProjectTopicName::new( 105 | project_name, 106 | std::format_args!("hedwig-{topic}", topic = self.0), 107 | ) 108 | } 109 | } 110 | 111 | /// A builder used to create [`ConsumerClient`] and [`PublisherClient`] instances 112 | /// 113 | /// Note that the builder is not consumed when creating clients, and many clients can be built 114 | /// using the same builder. This may allow some resource re-use across the clients 115 | pub struct ClientBuilder { 116 | inner: ya_gcp::ClientBuilder, 117 | pubsub_config: PubSubConfig, 118 | } 119 | 120 | impl ClientBuilder { 121 | /// Create a new client builder using the default HTTPS connector based on the crate's 122 | /// enabled features 123 | pub async fn new( 124 | config: ClientBuilderConfig, 125 | pubsub_config: PubSubConfig, 126 | ) -> Result { 127 | Ok(ClientBuilder { 128 | inner: ya_gcp::ClientBuilder::new(config).await?, 129 | pubsub_config, 130 | }) 131 | } 132 | } 133 | 134 | impl ClientBuilder { 135 | /// Create a new [`ConsumerClient`] for consuming messages from PubSub subscriptions within the 136 | /// given project, identified by the given queue name. 137 | pub async fn build_consumer( 138 | &self, 139 | project: impl Into, 140 | queue: impl Into, 141 | ) -> Result { 142 | Ok(ConsumerClient::from_client( 143 | self.inner 144 | .build_pubsub_subscriber(self.pubsub_config.clone()) 145 | .await?, 146 | project.into(), 147 | queue.into(), 148 | )) 149 | } 150 | 151 | /// Create a new [`PublisherClient`] for publishing messages to PubSub topics within the given 152 | /// project. 153 | /// 154 | /// Each published message will have an attribute labelling the publisher with the given 155 | /// identifier. 156 | pub async fn build_publisher( 157 | &self, 158 | project: impl Into, 159 | publisher_id: impl Into, 160 | ) -> Result { 161 | Ok(PublisherClient::from_client( 162 | self.inner 163 | .build_pubsub_publisher(self.pubsub_config.clone()) 164 | .await?, 165 | project.into(), 166 | publisher_id.into(), 167 | )) 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /src/backends/googlepubsub/publisher.rs: -------------------------------------------------------------------------------- 1 | use crate::{EncodableMessage, Topic, ValidatedMessage}; 2 | use futures_util::{ 3 | ready, 4 | sink::{Sink, SinkExt}, 5 | }; 6 | use pin_project::pin_project; 7 | use std::{ 8 | collections::{BTreeMap, VecDeque}, 9 | fmt, 10 | pin::Pin, 11 | task::{Context, Poll}, 12 | time::SystemTime, 13 | }; 14 | use ya_gcp::{ 15 | grpc::{Body, BoxBody, Bytes, DefaultGrpcImpl, GrpcService, StdError}, 16 | pubsub, 17 | }; 18 | 19 | use super::{ 20 | retry_policy::{ 21 | exponential_backoff::Config as ExponentialBackoffConfig, ExponentialBackoff, 22 | RetryOperation, RetryPolicy, 23 | }, 24 | PubSubError, TopicName, 25 | }; 26 | 27 | use message_translate::{TopicSink, TopicSinkError}; 28 | 29 | /// A thread-safe analog to Rc> 30 | /// 31 | /// There are a few components in the publishing sink which are shared between layers and 32 | /// exclusively borrowed, but not in a way the compiler can recognize. These can't use references 33 | /// because the layers need ownership (some are passed to other libs like into gcp). In principle 34 | /// they could use raw pointers, aided by Pin preventing moves; but the unsafety is unnerving, so 35 | /// checked sharing is used instead. 36 | /// 37 | /// Note the element is never actually borrowed across threads, or even across `await` points; all 38 | /// calls happen in a single call stack of `poll_*` functions. Send + Sync are required to ensure 39 | /// the containing top-level sink can be held across awaits (or actually sent) without an unsafe 40 | /// Send+Sync declaration 41 | #[derive(Debug)] 42 | struct Shared(std::sync::Arc>); 43 | 44 | impl Shared { 45 | fn new(t: T) -> Self { 46 | Self(std::sync::Arc::new(parking_lot::Mutex::new(t))) 47 | } 48 | 49 | fn borrow_mut(&self) -> impl std::ops::DerefMut + '_ { 50 | self.0 51 | .try_lock() 52 | .unwrap_or_else(|| panic!("unexpected overlapping borrow of shared state")) 53 | } 54 | } 55 | 56 | impl Clone for Shared { 57 | fn clone(&self) -> Self { 58 | Self(std::sync::Arc::clone(&self.0)) 59 | } 60 | } 61 | 62 | /// A client through which PubSub publishing operations can be performed. 63 | /// 64 | /// This includes managing topics and writing data to topics. Created using 65 | /// [`build_publisher`](super::ClientBuilder::build_publisher) 66 | #[derive(Debug, Clone)] 67 | pub struct PublisherClient { 68 | client: pubsub::PublisherClient, 69 | project: String, 70 | identifier: String, 71 | } 72 | 73 | impl PublisherClient { 74 | /// Create a new publisher from an existing pubsub client. 75 | /// 76 | /// This function is useful for client customization; most callers should typically use the 77 | /// defaults provided by [`build_publisher`](super::ClientBuilder::build_publisher) 78 | pub fn from_client( 79 | client: pubsub::PublisherClient, 80 | project: String, 81 | identifier: String, 82 | ) -> Self { 83 | PublisherClient { 84 | client, 85 | project, 86 | identifier, 87 | } 88 | } 89 | 90 | fn project(&self) -> &str { 91 | &self.project 92 | } 93 | 94 | fn identifier(&self) -> &str { 95 | &self.identifier 96 | } 97 | 98 | /// Construct a fully formatted project and topic name for the given topic 99 | pub fn format_topic(&self, topic: TopicName<'_>) -> pubsub::ProjectTopicName { 100 | topic.into_project_topic_name(self.project()) 101 | } 102 | 103 | /// Get a reference to the underlying pubsub client 104 | pub fn inner(&self) -> &pubsub::PublisherClient { 105 | &self.client 106 | } 107 | 108 | /// Get a mutable reference to the underlying pubsub client 109 | pub fn inner_mut(&mut self) -> &mut pubsub::PublisherClient { 110 | &mut self.client 111 | } 112 | } 113 | 114 | /// Errors which can occur while publishing a message 115 | #[derive(Debug)] 116 | pub enum PublishError { 117 | /// An error from publishing 118 | Publish { 119 | /// The cause of the error 120 | cause: PubSubError, 121 | 122 | /// The batch of messages which failed to be published 123 | messages: Vec, 124 | }, 125 | 126 | /// An error from submitting a successfully published message to the user-provided response 127 | /// sink 128 | Response(E), 129 | 130 | /// An error from validating the given message 131 | InvalidMessage { 132 | /// The cause of the error 133 | cause: M::Error, 134 | 135 | /// The message which failed to be validated 136 | message: M, 137 | }, 138 | } 139 | 140 | impl fmt::Display for PublishError 141 | where 142 | M::Error: fmt::Display, 143 | E: fmt::Display, 144 | { 145 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 146 | match self { 147 | PublishError::Publish { messages, .. } => f.write_fmt(format_args!( 148 | "could not publish {} messages", 149 | messages.len() 150 | )), 151 | PublishError::Response(..) => f.write_str( 152 | "could not forward response for a successfully published message to the sink", 153 | ), 154 | PublishError::InvalidMessage { .. } => f.write_str("could not validate message"), 155 | } 156 | } 157 | } 158 | 159 | impl std::error::Error for PublishError 160 | where 161 | M: fmt::Debug, 162 | M::Error: std::error::Error + 'static, 163 | E: std::error::Error + 'static, 164 | { 165 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { 166 | match self { 167 | PublishError::Publish { cause, .. } => Some(cause as &_), 168 | PublishError::Response(cause) => Some(cause as &_), 169 | PublishError::InvalidMessage { cause, .. } => Some(cause as &_), 170 | } 171 | } 172 | } 173 | 174 | impl From> for PublishError { 175 | fn from(from: TopicSinkError) -> Self { 176 | match from { 177 | TopicSinkError::Publish(cause, messages) => PublishError::Publish { cause, messages }, 178 | TopicSinkError::Response(err) => PublishError::Response(err), 179 | } 180 | } 181 | } 182 | 183 | impl PublisherClient 184 | where 185 | C: GrpcService, 186 | C::Error: Into, 187 | C::ResponseBody: Body + Send + 'static, 188 | ::Error: Into + Send, 189 | { 190 | /// Create a new PubSub topic. 191 | /// 192 | /// See the GCP documentation on topics [here](https://cloud.google.com/pubsub/docs/admin) 193 | pub async fn create_topic(&mut self, topic: TopicConfig<'_>) -> Result<(), PubSubError> { 194 | let topic = topic.into_topic(self); 195 | self.client.raw_api_mut().create_topic(topic).await?; 196 | 197 | Ok(()) 198 | } 199 | 200 | /// Delete an existing PubSub topic. 201 | /// 202 | /// See the GCP documentation on topics [here](https://cloud.google.com/pubsub/docs/admin) 203 | pub async fn delete_topic(&mut self, topic: TopicName<'_>) -> Result<(), PubSubError> { 204 | let topic = topic.into_project_topic_name(self.project()).into(); 205 | 206 | self.client 207 | .raw_api_mut() 208 | .delete_topic({ 209 | let mut r = pubsub::api::DeleteTopicRequest::default(); 210 | r.topic = topic; 211 | r 212 | }) 213 | .await?; 214 | 215 | Ok(()) 216 | } 217 | 218 | /// Create a a new [`Publisher`] instance for publishing messages. 219 | /// 220 | /// Multiple publishers can be created using the same client, for example to use different 221 | /// validators. They may share some underlying resources for greater efficiency than creating 222 | /// multiple clients. 223 | pub fn publisher(&self) -> Publisher 224 | where 225 | C: Clone, 226 | { 227 | Publisher { 228 | client: self.clone(), 229 | retry_policy: ExponentialBackoff::new( 230 | pubsub::PubSubRetryCheck::default(), 231 | ExponentialBackoffConfig::default(), 232 | ), 233 | publish_config: pubsub::PublishConfig::default(), 234 | } 235 | } 236 | 237 | // TODO list_topics (paginated, nontrivial) 238 | // TODO list_topic_subscriptions (same) 239 | // TODO list_topic_snapshots (same) 240 | // TODO update_topic 241 | // TODO get_topic 242 | // TODO detach_subscription 243 | } 244 | 245 | /// A publisher for sending messages to PubSub topics 246 | pub struct Publisher> { 247 | client: PublisherClient, 248 | retry_policy: R, 249 | publish_config: pubsub::PublishConfig, 250 | } 251 | 252 | impl Publisher { 253 | /// Set the retry policy for this `Publisher`. 254 | /// 255 | /// If a publishing operation encounters an error, the given retry policy will be consulted to 256 | /// possibly retry the operation, or otherwise propagate the error to the caller. 257 | pub fn with_retry_policy(self, retry_policy: R) -> Publisher 258 | where 259 | R: RetryPolicy<[M], PubSubError> + Clone, 260 | M: EncodableMessage, 261 | { 262 | Publisher { 263 | retry_policy, 264 | client: self.client, 265 | publish_config: self.publish_config, 266 | } 267 | } 268 | 269 | /// Set the publishing configuration for this `Publisher`. 270 | pub fn with_config(self, publish_config: pubsub::PublishConfig) -> Self { 271 | Self { 272 | publish_config, 273 | ..self 274 | } 275 | } 276 | } 277 | 278 | impl crate::publisher::Publisher for Publisher 279 | where 280 | C: GrpcService + Clone + Send + 'static, 281 | C::Future: Send + 'static, 282 | C::Error: Into, 283 | C::ResponseBody: Body + Send + 'static, 284 | ::Error: Into + Send, 285 | M: EncodableMessage + Send + 'static, 286 | S: Sink + Send + 'static, 287 | R: RetryPolicy<[M], PubSubError> + Clone + 'static, 288 | R::RetryOp: Send + 'static, 289 | >::Sleep: Send + 'static, 290 | { 291 | type PublishError = PublishError; 292 | type PublishSink = PublishSink; 293 | 294 | fn publish_sink_with_responses( 295 | self, 296 | validator: M::Validator, 297 | response_sink: S, 298 | ) -> Self::PublishSink { 299 | PublishSink { 300 | topic_sinks: BTreeMap::new(), 301 | validator, 302 | buffer: None, 303 | client: self.client, 304 | retry_policy: self.retry_policy, 305 | response_sink: Shared::new(Box::pin(response_sink)), 306 | publish_config: self.publish_config, 307 | _p: std::marker::PhantomPinned, 308 | } 309 | } 310 | } 311 | 312 | match_fields! { 313 | pubsub::api::Topic => 314 | 315 | /// Configuration describing a PubSub topic. 316 | #[derive(Debug, Clone)] 317 | pub struct TopicConfig<'s> { 318 | pub name: TopicName<'s>, 319 | pub labels: std::collections::HashMap, 320 | pub message_storage_policy: Option, 321 | pub kms_key_name: String, 322 | pub message_retention_duration: Option, 323 | 324 | @except: 325 | schema_settings, 326 | satisfies_pzs, 327 | } 328 | } 329 | 330 | impl TopicConfig<'_> { 331 | fn into_topic(self, client: &PublisherClient) -> pubsub::api::Topic { 332 | let mut t = pubsub::api::Topic::default(); 333 | t.name = self.name.into_project_topic_name(client.project()).into(); 334 | t.labels = self.labels; 335 | t.message_storage_policy = self.message_storage_policy; 336 | t.kms_key_name = self.kms_key_name; 337 | t.message_retention_duration = self.message_retention_duration; 338 | t 339 | } 340 | } 341 | 342 | impl Default for TopicConfig<'_> { 343 | fn default() -> Self { 344 | Self { 345 | name: TopicName::new(String::new()), 346 | labels: std::collections::HashMap::new(), 347 | message_storage_policy: None, 348 | kms_key_name: String::new(), 349 | message_retention_duration: None, 350 | } 351 | } 352 | } 353 | 354 | /// A sink for publishing messages to pubsub topics. 355 | /// 356 | /// Created by [`Publisher::publish_sink`](crate::Publisher::publish_sink) 357 | #[pin_project] 358 | pub struct PublishSink, R> { 359 | // The underlying sinks operate on a single topic. The incoming messages could have varying 360 | // topics, so this map holds a lazily initialized set of underlying sinks 361 | #[allow(clippy::type_complexity)] // mostly from Pin+Box 362 | topic_sinks: BTreeMap>>>, 363 | 364 | // The validator for the messages 365 | validator: M::Validator, 366 | 367 | // In order to know which sink to check in `poll_ready`, we need a message's topic; but we 368 | // won't know the topic until looking at the element in `start_send`, which contractually must 369 | // always be preceded by a `poll_ready`. 370 | // 371 | // Work around this chicken-egg problem by deferring readiness checking by 1 message. 372 | // The first `poll_ready` will always be Ready, and the first value will be seeded in this 373 | // buffer. Subsequent `poll_ready`s will check the *previous* message in the buffer, and try to 374 | // send it to its corresponding underlying sink 375 | buffer: Option, 376 | 377 | // Because the sinks will be generated lazily, we need a client, retry policy, and 378 | // destination sink to create new per-topic sinks 379 | client: PublisherClient, 380 | retry_policy: R, 381 | 382 | // The sink where user messages are sent once published, to inform the user that the message 383 | // was successfully sent. 384 | // 385 | // Boxing this sink isn't strictly necessary because it's already in an Arc which does half the 386 | // job of preventing moves by putting it on the heap; unfortunately there's no pin projection 387 | // through mutexes, so we can't mark it pinned without some unsafe shenanigans. If we go 388 | // unsafe, we should ditch the Arc sharing altogether and pass pointers, which should be mostly 389 | // fine due to the outer pinning 390 | response_sink: Shared>>, 391 | 392 | publish_config: pubsub::PublishConfig, 393 | 394 | // enable future !Unpin without breaking changes 395 | _p: std::marker::PhantomPinned, 396 | } 397 | 398 | impl Sink for PublishSink 399 | where 400 | C: GrpcService + Clone + Send + 'static, 401 | C::Future: Send + 'static, 402 | C::Error: Into, 403 | C::ResponseBody: Body + Send + 'static, 404 | ::Error: Into + Send, 405 | M: EncodableMessage + Send + 'static, 406 | S: Sink + Send + 'static, 407 | R: RetryPolicy<[M], PubSubError> + Clone + 'static, 408 | R::RetryOp: Send + 'static, 409 | >::Sleep: Send + 'static, 410 | { 411 | type Error = PublishError; 412 | 413 | fn poll_ready(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 414 | let this = self.project(); 415 | let client = this.client; 416 | 417 | // Given the buffered (topic, message), find the sink corresponding to the topic 418 | match this.buffer.as_ref() { 419 | Some(msg) => { 420 | let topic = msg.topic(); 421 | // look up the sink by topic. If a sink doesn't exist, initialize one 422 | let sink = { 423 | let retry_policy = this.retry_policy; 424 | let response_sink = this.response_sink; 425 | 426 | // avoid cloning the topic if the key exists 427 | match this.topic_sinks.get_mut(&topic) { 428 | Some(existing) => existing, 429 | None => this.topic_sinks.entry(topic.clone()).or_insert(Box::pin( 430 | TopicSink::new( 431 | client.client.publish_topic_sink( 432 | TopicName::new(topic.as_ref()) 433 | .into_project_topic_name(client.project()), 434 | *this.publish_config, 435 | ), 436 | retry_policy.clone(), 437 | Shared::clone(response_sink), 438 | ), 439 | )), 440 | } 441 | }; 442 | 443 | // poll the sink to see if it's ready 444 | ready!(sink.poll_ready_unpin(cx))?; 445 | 446 | // only take out of the buffer when we know the sink is ready 447 | let message = this.buffer.take().expect("already check Some"); 448 | 449 | // validate the message with the validator 450 | let validated = match message.encode(this.validator) { 451 | Ok(validated_msg) => validated_msg, 452 | Err(err) => { 453 | return Poll::Ready(Err(PublishError::InvalidMessage { 454 | cause: err, 455 | message, 456 | })) 457 | } 458 | }; 459 | 460 | // convert the validated message to pubsub's message type 461 | let api_message = match hedwig_to_pubsub(validated, client.identifier()) { 462 | Ok(api_message) => api_message, 463 | Err(err) => { 464 | return Poll::Ready(Err(PublishError::Publish { 465 | cause: err, 466 | messages: vec![message], 467 | })) 468 | } 469 | }; 470 | 471 | // now send the message to the sink 472 | sink.start_send_unpin((message, api_message))?; 473 | Poll::Ready(Ok(())) 474 | } 475 | 476 | // The buffer could be empty on the first ever poll_ready or after explicit flushes. 477 | // In that case the sink is immediately ready for an element 478 | None => Poll::Ready(Ok(())), 479 | } 480 | } 481 | 482 | fn start_send(self: Pin<&mut Self>, item: M) -> Result<(), Self::Error> { 483 | // try to put the item into the buffer. 484 | // If an item is already in the buffer, the user must not have called `poll_ready` 485 | if self.project().buffer.replace(item).is_some() { 486 | panic!("each `start_send` must be preceded by a successful call to `poll_ready`") 487 | } 488 | 489 | Ok(()) 490 | } 491 | 492 | fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 493 | // first send any element in the buffer by checking readiness 494 | ready!(self.as_mut().poll_ready(cx))?; 495 | 496 | // then flush all of the underlying sinks 497 | let mut all_ready = true; 498 | for sink in self.topic_sinks.values_mut() { 499 | all_ready &= sink.poll_flush_unpin(cx)?.is_ready(); 500 | } 501 | 502 | if all_ready { 503 | Poll::Ready(Ok(())) 504 | } else { 505 | Poll::Pending 506 | } 507 | } 508 | 509 | fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 510 | // first initiate a flush as required by the Sink contract 511 | ready!(self.as_mut().poll_flush(cx))?; 512 | 513 | // then close all of the underlying sinks 514 | let mut all_ready = true; 515 | for sink in self.topic_sinks.values_mut() { 516 | all_ready &= sink.poll_close_unpin(cx)?.is_ready(); 517 | } 518 | 519 | if all_ready { 520 | Poll::Ready(Ok(())) 521 | } else { 522 | Poll::Pending 523 | } 524 | } 525 | } 526 | 527 | /// convert a hedwig message into a pubsub message 528 | fn hedwig_to_pubsub( 529 | mut msg: ValidatedMessage, 530 | publisher_id: &str, 531 | ) -> Result { 532 | let mut attributes = std::mem::take(msg.headers_mut()); 533 | 534 | if let Some(invalid_key) = attributes.keys().find(|key| key.starts_with("hedwig_")) { 535 | return Err(PubSubError::invalid_argument(format!( 536 | "keys starting with \"hedwig_\" are reserved: {}", 537 | invalid_key 538 | ))); 539 | } 540 | 541 | attributes.insert(crate::HEDWIG_ID.into(), msg.uuid().to_string()); 542 | attributes.insert( 543 | crate::HEDWIG_MESSAGE_TIMESTAMP.into(), 544 | msg.timestamp() 545 | .duration_since(SystemTime::UNIX_EPOCH) 546 | .map_err(|_| { 547 | PubSubError::invalid_argument(format!( 548 | "timestamp should be after UNIX epoch: {:?}", 549 | msg.timestamp() 550 | )) 551 | })? 552 | .as_millis() 553 | .to_string(), 554 | ); 555 | attributes.insert(crate::HEDWIG_SCHEMA.into(), msg.schema().into()); 556 | attributes.insert(crate::HEDWIG_PUBLISHER.into(), publisher_id.into()); 557 | attributes.insert(crate::HEDWIG_FORMAT_VERSION.into(), "1.0".into()); 558 | 559 | let mut m = pubsub::api::PubsubMessage::default(); 560 | m.data = msg.into_data(); 561 | m.attributes = attributes; 562 | 563 | Ok(m) 564 | } 565 | 566 | /// Translation mechanisms for converting between user messages and api messages. 567 | /// 568 | /// While the user submits messages of arbitrary type `M` to the publisher, that information is 569 | /// transformed (first by the generic validator, then a pubsub-specific conversion) into a concrete 570 | /// type (`pubsub::api::PubsubMessage`) to actually communicate with the remote service. Some 571 | /// operations then require user input based on messages in the api type (for example, checking 572 | /// whether a retry is necessary) but the api type is meaningless to the user, they only understand 573 | /// `M`. 574 | /// 575 | /// This module provides several means of translating from the api type back into the type `M` 576 | /// (without explicit de-transformation). 577 | mod message_translate { 578 | use super::*; 579 | 580 | /// A buffer which will hold un-encoded user messages while the encoded version of the message is 581 | /// published. After publishing (or on encountering an error) the encoded version is mapped back to 582 | /// this user message so that success (or errors) can be reported in terms of the user's familiar 583 | /// type, rather than an opaque encoded/serialized version. 584 | /// 585 | /// The actual mapping mechanism is ordering-based synchronization. This buffer will 586 | /// maintain a queue of `M` which is implicitly in the same order as the pubsub library's 587 | /// internal buffer; one `M` will be pushed here for every corresponding api message pushed to 588 | /// the lib's buffer, and conversely popped when the corresponding api messages are published 589 | /// in order. This relies on the pubsub lib's documented preservation of FIFO order. 590 | /// 591 | /// This ordering is also preserved after errors. The pubsub sink will report errors along with 592 | /// the affected messages; this buffer will remove user messages for each error-carried message 593 | /// to relay back to the user. 594 | struct TranslateBuffer { 595 | buf: VecDeque, 596 | } 597 | 598 | impl TranslateBuffer { 599 | /// The maximum number of messages that could be inserted before a publisher flushes. 600 | /// 601 | /// This is defined by the pubsub service 602 | const PUBLISH_BUFFER_SIZE: usize = 1000; 603 | 604 | fn new() -> Self { 605 | Self { 606 | buf: VecDeque::with_capacity(Self::PUBLISH_BUFFER_SIZE), 607 | } 608 | } 609 | 610 | fn add_message(&mut self, user_message: M) { 611 | self.buf.push_back(user_message) 612 | } 613 | 614 | fn remove_success(&mut self, _api_message: pubsub::api::PubsubMessage) -> M { 615 | self.buf 616 | .pop_front() 617 | .expect("translate buffer should be in sync with publish buffer") 618 | } 619 | 620 | fn remove_errors( 621 | &mut self, 622 | error: pubsub::PublishError, 623 | ) -> (PubSubError, impl Iterator + '_) { 624 | (error.source, self.buf.drain(0..error.messages.len())) 625 | } 626 | 627 | fn view_messages(&mut self, api_messages: &[pubsub::api::PubsubMessage]) -> &[M] { 628 | // When a publishing request fails, a retry may be attempted; that retry policy will 629 | // check on the request payload and the user may choose to retry or not. That payload 630 | // needs to be translated back into user messages for retry assessment. 631 | // 632 | // Ideally we could return a subrange of the vecdeque, but the retry policy API 633 | // provides the user with `&T` of the failed request, so we can only return a reference 634 | // and not an instantiated struct. We _can_ get slices of the underlying queue, 635 | // but a vecdeque might be split into two segments so it wouldn't be a single reference. 636 | // 637 | // This call moves elements within the queue such that it all exists in a contiguous 638 | // segment (while preserving order); then we can return just a single slice. This only 639 | // happens on publishing errors, so all the moves aren't in the common path and 640 | // probably won't be a big problem in practice. 641 | // 642 | // There is a crate https://crates.io/crates/slice-deque that can create a slice 643 | // without this data movement (by using clever virtual memory tricks). That's an ideal 644 | // candidate for this use case (long-lived buffer, ideally contiguous) but its 645 | // (un)safety makes me nervous, whereas std's vecdeque has more eyes on it 646 | &self.buf.make_contiguous()[0..api_messages.len()] 647 | } 648 | } 649 | 650 | /// A wrapper over the pubsub sink which holds the user message buffer and provides message 651 | /// translation for the response sink and retry policy 652 | #[pin_project] 653 | pub(super) struct TopicSink, R> { 654 | user_messages: Shared>, 655 | #[pin] 656 | pubsub_sink: pubsub::PublishTopicSink, TranslateSink>, 657 | } 658 | 659 | pub(super) enum TopicSinkError { 660 | Publish(PubSubError, Vec), 661 | Response(E), 662 | } 663 | 664 | impl, R> TopicSink 665 | where 666 | S: Sink, 667 | R: RetryPolicy<[M], PubSubError>, 668 | { 669 | pub(super) fn new( 670 | pubsub_sink: pubsub::PublishTopicSink, 671 | retry_policy: R, 672 | response_sink: Shared>>, 673 | ) -> Self { 674 | let user_messages = Shared::new(TranslateBuffer::new()); 675 | Self { 676 | user_messages: Shared::clone(&user_messages), 677 | pubsub_sink: pubsub_sink 678 | .with_retry_policy(TranslateRetryPolicy { 679 | user_messages: Shared::clone(&user_messages), 680 | user_retry: retry_policy, 681 | }) 682 | .with_response_sink(TranslateSink { 683 | user_messages, 684 | user_sink: response_sink, 685 | }), 686 | } 687 | } 688 | 689 | /// Translate the error type of a poll_x function into one holding user messages instead of 690 | /// api messages 691 | fn translate_poll_fn( 692 | self: Pin<&mut Self>, 693 | poll_fn: F, 694 | cx: &mut Context, 695 | ) -> Poll>> 696 | where 697 | F: FnOnce( 698 | Pin< 699 | &mut pubsub::PublishTopicSink< 700 | C, 701 | TranslateRetryPolicy, 702 | TranslateSink, 703 | >, 704 | >, 705 | &mut Context, 706 | ) -> Poll>>, 707 | { 708 | let this = self.project(); 709 | let user_messages = this.user_messages; 710 | 711 | poll_fn(this.pubsub_sink, cx).map_err(|err| match err { 712 | pubsub::SinkError::Publish(publish_error) => { 713 | let mut user_messages = user_messages.borrow_mut(); 714 | let (source, messages) = user_messages.remove_errors(publish_error); 715 | TopicSinkError::Publish(source, messages.collect()) 716 | } 717 | pubsub::SinkError::Response(response_error) => { 718 | TopicSinkError::Response(response_error) 719 | } 720 | }) 721 | } 722 | } 723 | 724 | impl, R> Sink<(M, pubsub::api::PubsubMessage)> for TopicSink 725 | where 726 | C: GrpcService + Clone + Send + 'static, 727 | C::Future: Send + 'static, 728 | C::Error: Into, 729 | C::ResponseBody: Body + Send + 'static, 730 | ::Error: Into + Send, 731 | R: RetryPolicy<[M], PubSubError> + 'static, 732 | R::RetryOp: Send + 'static, 733 | >::Sleep: Send + 'static, 734 | S: Sink + Send + 'static, 735 | M: EncodableMessage + Send + 'static, 736 | { 737 | type Error = TopicSinkError; 738 | 739 | fn poll_ready(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 740 | self.translate_poll_fn(pubsub::PublishTopicSink::poll_ready, cx) 741 | } 742 | 743 | fn start_send( 744 | self: Pin<&mut Self>, 745 | (user_message, api_message): (M, pubsub::api::PubsubMessage), 746 | ) -> Result<(), Self::Error> { 747 | let this = self.project(); 748 | 749 | // try to send the api message to the sink. Only if successful will it be added to the 750 | // buffer; if it fails some argument check, the buffer does not need to be popped for 751 | // translation 752 | match this.pubsub_sink.start_send(api_message) { 753 | Ok(()) => { 754 | this.user_messages.borrow_mut().add_message(user_message); 755 | Ok(()) 756 | } 757 | Err(err) => Err(match err { 758 | pubsub::SinkError::Publish(publish_error) => { 759 | assert_eq!(publish_error.messages.len(), 1); 760 | TopicSinkError::Publish(publish_error.source, vec![user_message]) 761 | } 762 | pubsub::SinkError::Response(_) => { 763 | unreachable!("response sink should not be used in start_send") 764 | } 765 | }), 766 | } 767 | } 768 | 769 | fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 770 | self.translate_poll_fn(pubsub::PublishTopicSink::poll_flush, cx) 771 | } 772 | fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 773 | self.translate_poll_fn(pubsub::PublishTopicSink::poll_close, cx) 774 | } 775 | } 776 | 777 | /// A retry policy which can be used by pubsub to retry api messages, but will provide the user 778 | /// with user messages to assess retry-worthyness 779 | struct TranslateRetryPolicy { 780 | user_messages: Shared>, 781 | user_retry: R, 782 | } 783 | 784 | impl RetryPolicy for TranslateRetryPolicy 785 | where 786 | R: RetryPolicy<[M], PubSubError>, 787 | { 788 | type RetryOp = TranslateRetryOp; 789 | 790 | fn new_operation(&mut self) -> Self::RetryOp { 791 | TranslateRetryOp { 792 | user_messages: Shared::clone(&self.user_messages), 793 | user_retry_op: self.user_retry.new_operation(), 794 | } 795 | } 796 | } 797 | 798 | struct TranslateRetryOp { 799 | user_messages: Shared>, 800 | user_retry_op: O, 801 | } 802 | 803 | impl RetryOperation for TranslateRetryOp 804 | where 805 | O: RetryOperation<[M], PubSubError>, 806 | { 807 | type Sleep = O::Sleep; 808 | 809 | fn check_retry( 810 | &mut self, 811 | failed_value: &pubsub::api::PublishRequest, 812 | error: &PubSubError, 813 | ) -> Option { 814 | // Given a failed request with api messages, translate it into user messages 815 | let mut user_messages = self.user_messages.borrow_mut(); 816 | let failed_messages = user_messages.view_messages(&failed_value.messages); 817 | 818 | self.user_retry_op.check_retry(failed_messages, error) 819 | } 820 | } 821 | 822 | /// A sink used to translate successful publishing responses from api messages back to user 823 | /// messages for consumption by the user's response sink 824 | struct TranslateSink> { 825 | user_messages: Shared>, 826 | user_sink: Shared>>, 827 | } 828 | 829 | impl Sink for TranslateSink 830 | where 831 | S: Sink, 832 | { 833 | type Error = S::Error; 834 | 835 | fn poll_ready(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 836 | self.user_sink.borrow_mut().poll_ready_unpin(cx) 837 | } 838 | fn start_send( 839 | self: Pin<&mut Self>, 840 | api_message: pubsub::api::PubsubMessage, 841 | ) -> Result<(), Self::Error> { 842 | let user_message = self.user_messages.borrow_mut().remove_success(api_message); 843 | self.user_sink.borrow_mut().start_send_unpin(user_message) 844 | } 845 | fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 846 | self.user_sink.borrow_mut().poll_flush_unpin(cx) 847 | } 848 | fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 849 | self.user_sink.borrow_mut().poll_close_unpin(cx) 850 | } 851 | } 852 | } 853 | -------------------------------------------------------------------------------- /src/backends/mock.rs: -------------------------------------------------------------------------------- 1 | //! In-memory messaging implementations, meant to imitate distributed messaging services for test 2 | //! purposes. 3 | //! 4 | //! See [`MockPublisher`] for an entry point to the mock system. 5 | 6 | use crate::{consumer::AcknowledgeableMessage, EncodableMessage, Topic, ValidatedMessage}; 7 | use async_channel as mpmc; 8 | use futures_util::{ 9 | sink, 10 | stream::{self, StreamExt}, 11 | }; 12 | use parking_lot::Mutex; 13 | use pin_project::pin_project; 14 | use std::{ 15 | collections::BTreeMap, 16 | error::Error as StdError, 17 | pin::Pin, 18 | sync::Arc, 19 | task::{Context, Poll}, 20 | }; 21 | 22 | /// Errors originating from mock publisher and consumer operations 23 | #[derive(Debug, thiserror::Error)] 24 | #[error(transparent)] 25 | pub struct Error { 26 | /// The underlying source of the error 27 | pub cause: Box, 28 | } 29 | 30 | impl Error { 31 | fn from(from: E) -> Self 32 | where 33 | Box: From, 34 | { 35 | Self { cause: from.into() } 36 | } 37 | } 38 | 39 | type Topics = BTreeMap; 40 | type Subscriptions = BTreeMap>; 41 | 42 | /// An in-memory publisher. 43 | /// 44 | /// Consumers for the published data can be created using the `new_consumer` method. 45 | /// 46 | /// Messages are published to particular [`Topics`](crate::Topic). Each topic may have multiple 47 | /// `Subscriptions`, and every message for a topic will be sent to each of its subscriptions. A 48 | /// subscription, in turn, may have multiple consumers; consumers will take messages from the 49 | /// subscription on a first-polled-first-served basis. 50 | /// 51 | /// This publisher can be cloned, allowing multiple publishers to send messages to the same set of 52 | /// topics and subscriptions. Any consumer created with `new_consumer` will receive all on-topic 53 | /// and on-subscription messages from all the associated publishers, regardless of whether the 54 | /// consumer was created from a cloned instance. 55 | #[derive(Debug, Clone)] 56 | pub struct MockPublisher { 57 | topics: Arc>, 58 | } 59 | 60 | impl MockPublisher { 61 | /// Create a new `MockPublisher` 62 | pub fn new() -> Self { 63 | MockPublisher { 64 | topics: Arc::new(Mutex::new(BTreeMap::new())), 65 | } 66 | } 67 | 68 | /// Create a new consumer which will listen for messages published to the given topic and 69 | /// subscription by this publisher (or any of its clones) 70 | pub fn new_consumer( 71 | &self, 72 | topic: impl Into, 73 | subscription: impl Into, 74 | ) -> MockConsumer { 75 | let mut topics = self.topics.lock(); 76 | let subscriptions = topics.entry(topic.into()).or_default(); 77 | 78 | let channel = subscriptions 79 | .entry(subscription.into()) 80 | .or_insert_with(|| { 81 | let (sender, receiver) = mpmc::unbounded(); 82 | Channel { sender, receiver } 83 | }) 84 | .clone(); 85 | 86 | MockConsumer { 87 | subscription_messages: channel.receiver, 88 | subscription_resend: channel.sender, 89 | } 90 | } 91 | } 92 | 93 | impl Default for MockPublisher { 94 | fn default() -> Self { 95 | Self::new() 96 | } 97 | } 98 | 99 | impl crate::Publisher for MockPublisher 100 | where 101 | M: crate::EncodableMessage, 102 | M::Error: StdError + 'static, 103 | S: sink::Sink, 104 | S::Error: StdError + 'static, 105 | { 106 | type PublishError = Error; 107 | type PublishSink = MockSink; 108 | 109 | fn publish_sink_with_responses( 110 | self, 111 | validator: M::Validator, 112 | response_sink: S, 113 | ) -> Self::PublishSink { 114 | MockSink { 115 | topics: self.topics, 116 | validator, 117 | response_sink, 118 | } 119 | } 120 | } 121 | 122 | /// The sink used by the `MockPublisher` 123 | #[pin_project] 124 | #[derive(Debug)] 125 | pub struct MockSink { 126 | topics: Arc>, 127 | validator: M::Validator, 128 | #[pin] 129 | response_sink: S, 130 | } 131 | 132 | #[derive(Debug, Clone)] 133 | struct Channel { 134 | sender: mpmc::Sender, 135 | receiver: mpmc::Receiver, 136 | } 137 | 138 | impl sink::Sink for MockSink 139 | where 140 | M: EncodableMessage, 141 | M::Error: StdError + 'static, 142 | S: sink::Sink, 143 | S::Error: StdError + 'static, 144 | { 145 | type Error = Error; 146 | 147 | fn poll_ready(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 148 | self.project() 149 | .response_sink 150 | .poll_ready(cx) 151 | .map_err(Error::from) 152 | } 153 | 154 | fn start_send(self: Pin<&mut Self>, message: M) -> Result<(), Self::Error> { 155 | let this = self.project(); 156 | 157 | let topic = message.topic(); 158 | let validated_message = message.encode(this.validator).map_err(Error::from)?; 159 | 160 | // lock critical section 161 | { 162 | let mut topics = this.topics.lock(); 163 | 164 | // send the message to every subscription listening on the given topic 165 | 166 | // find the subscriptions for this topic 167 | let subscriptions = topics.entry(topic).or_default(); 168 | 169 | // Send to every subscription that still has consumers. If a subscription's consumers are 170 | // all dropped, the channel will have been closed and should be removed from the list 171 | subscriptions.retain(|_subscription_name, channel| { 172 | match channel.sender.try_send(validated_message.clone()) { 173 | // if successfully sent, retain the channel 174 | Ok(()) => true, 175 | // if the channel has disconnected due to drops, remove it from the list 176 | Err(mpmc::TrySendError::Closed(_)) => false, 177 | Err(mpmc::TrySendError::Full(_)) => { 178 | unreachable!("unbounded channel should never be full") 179 | } 180 | } 181 | }); 182 | } 183 | 184 | // notify the caller that the message has been sent successfully 185 | this.response_sink 186 | .start_send(message) 187 | .map_err(Error::from)?; 188 | 189 | Ok(()) 190 | } 191 | 192 | fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 193 | self.project() 194 | .response_sink 195 | .poll_flush(cx) 196 | .map_err(Error::from) 197 | } 198 | 199 | fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 200 | self.project() 201 | .response_sink 202 | .poll_close(cx) 203 | .map_err(Error::from) 204 | } 205 | } 206 | 207 | /// An opaque identifier for individual subscriptions to a [`MockPublisher`] 208 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] 209 | pub struct MockSubscription(String); 210 | 211 | impl From for MockSubscription 212 | where 213 | S: Into, 214 | { 215 | fn from(string: S) -> Self { 216 | MockSubscription(string.into()) 217 | } 218 | } 219 | 220 | /// A consumer for messages from a particular subscription to a [`MockPublisher`] 221 | #[derive(Debug, Clone)] 222 | pub struct MockConsumer { 223 | // channel receiver to get messages from the subscription 224 | subscription_messages: mpmc::Receiver, 225 | 226 | // channel sender to resend messages to the subscription on nack 227 | subscription_resend: mpmc::Sender, 228 | } 229 | 230 | impl crate::Consumer for MockConsumer { 231 | type AckToken = MockAckToken; 232 | type Error = Error; 233 | type Stream = Self; 234 | 235 | fn stream(self) -> Self::Stream { 236 | self 237 | } 238 | } 239 | 240 | impl stream::Stream for MockConsumer { 241 | type Item = Result, Error>; 242 | 243 | fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 244 | self.subscription_messages 245 | .poll_next_unpin(cx) 246 | .map(|opt_message| { 247 | opt_message.map(|message| { 248 | Ok(AcknowledgeableMessage { 249 | ack_token: MockAckToken { 250 | message: message.clone(), 251 | subscription_resend: self.subscription_resend.clone(), 252 | }, 253 | message, 254 | }) 255 | }) 256 | }) 257 | } 258 | } 259 | 260 | /// An acknowledge token associated with a particular message from a [`MockConsumer`]. 261 | /// 262 | /// When `nack` is called for a particular message's token, that message will be re-submitted to 263 | /// consumers of the corresponding subscription. Messages otherwise do not have any timeout 264 | /// behavior, so a message is only re-sent to consumers if it is explicitly nack'ed; `ack` and 265 | /// `modify_deadline` have no effect 266 | #[derive(Debug)] 267 | pub struct MockAckToken { 268 | message: ValidatedMessage, 269 | subscription_resend: mpmc::Sender, 270 | } 271 | 272 | #[async_trait::async_trait] 273 | impl crate::consumer::AcknowledgeToken for MockAckToken { 274 | type AckError = Error; 275 | type NackError = Error; 276 | type ModifyError = Error; 277 | 278 | async fn ack(self) -> Result<(), Self::AckError> { 279 | Ok(()) 280 | } 281 | 282 | async fn nack(self) -> Result<(), Self::NackError> { 283 | self.subscription_resend 284 | .send(self.message) 285 | .await 286 | .map_err(|mpmc::SendError(_message)| Error { 287 | cause: "Could not nack message because all consumers have been dropped".into(), 288 | }) 289 | } 290 | 291 | async fn modify_deadline(&mut self, _seconds: u32) -> Result<(), Self::ModifyError> { 292 | // currently does nothing 293 | Ok(()) 294 | } 295 | } 296 | -------------------------------------------------------------------------------- /src/backends/mod.rs: -------------------------------------------------------------------------------- 1 | /// The Google Pub/Sub backend 2 | #[cfg(feature = "google")] 3 | pub mod googlepubsub; 4 | 5 | #[cfg(any(test, feature = "mock"))] 6 | pub mod mock; 7 | 8 | /// The Redis backend 9 | #[cfg(feature = "redis")] 10 | pub mod redis; 11 | -------------------------------------------------------------------------------- /src/backends/redis/consumer.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use futures_util::stream; 3 | use hedwig_core::Topic; 4 | use pin_project::pin_project; 5 | use redis::{ 6 | aio::{ConnectionManager, MultiplexedConnection}, 7 | streams::{StreamReadOptions, StreamReadReply}, 8 | AsyncCommands, RedisResult, 9 | }; 10 | use std::{ 11 | pin::Pin, 12 | task::{Context, Poll}, 13 | time::SystemTime, 14 | }; 15 | use tracing::warn; 16 | 17 | use crate::{ 18 | redis::{ID_KEY, PAYLOAD_KEY, SCHEMA_KEY}, 19 | Headers, ValidatedMessage, 20 | }; 21 | 22 | use super::{EncodedMessage, StreamName}; 23 | 24 | /// Redis consumer client 25 | #[derive(Debug, Clone)] 26 | pub struct ConsumerClient { 27 | client: redis::Client, 28 | } 29 | 30 | async fn xgroup_create_mkstream( 31 | con: &mut MultiplexedConnection, 32 | stream_name: &StreamName, 33 | group_name: &GroupName, 34 | ) -> RedisResult<()> { 35 | // The special ID $ is the ID of the last entry in the stream 36 | let id = "$"; 37 | 38 | con.xgroup_create_mkstream(&stream_name.0, &group_name.0, id) 39 | .await 40 | } 41 | 42 | async fn xread( 43 | con: &mut ConnectionManager, 44 | stream_name: &StreamName, 45 | stream_read_options: &StreamReadOptions, 46 | ) -> RedisResult { 47 | con.xread_options(&[&stream_name.0], &[">"], stream_read_options) 48 | .await 49 | } 50 | 51 | impl ConsumerClient { 52 | /// Create a consumer client from a redis Client 53 | pub fn from_client(client: redis::Client) -> Self { 54 | ConsumerClient { client } 55 | } 56 | 57 | /// Create a consumer group 58 | pub async fn create_consumer_group(&mut self, config: &Group) -> RedisResult<()> { 59 | let mut con = self.client.get_multiplexed_async_connection().await?; 60 | let stream_name = &config.stream_name; 61 | let group_name = &config.group_name; 62 | xgroup_create_mkstream(&mut con, stream_name, group_name).await 63 | } 64 | 65 | /// Create a stream, given the subscription 66 | pub async fn stream_subscription(&mut self, subscription: Group) -> RedisStream { 67 | let stream_name = subscription.stream_name; 68 | let group_name = subscription.group_name; 69 | let consumer_name = ConsumerName::new(); 70 | 71 | let stream_name = stream_name.clone(); 72 | 73 | let client = self.client.clone(); 74 | 75 | // TODO Implement reliability (use ack token) 76 | // The NOACK subcommand can be used to avoid adding the message to the PEL in cases where reliability is not 77 | // a requirement and the occasional message loss is acceptable. This is equivalent to acknowledging the 78 | // message when it is read. 79 | let stream_read_options = StreamReadOptions::default() 80 | .group(&group_name.0, &consumer_name.0) 81 | // Block for up to 1 second (default behavior is to return immediately) for a single message. This does not 82 | // block until the batch is complete, just for a single message. 83 | .block(1_000) 84 | .noack(); 85 | 86 | let (tx, rx) = tokio::sync::mpsc::channel(1); 87 | 88 | tokio::spawn(async move { 89 | loop { 90 | if tx.is_closed() { 91 | break; 92 | } 93 | 94 | let con_res = ConnectionManager::new_with_config( 95 | client.clone(), 96 | super::connection_manager_config(), 97 | ) 98 | .await; 99 | 100 | if let Ok(mut con) = con_res { 101 | loop { 102 | if tx.is_closed() { 103 | break; 104 | } 105 | 106 | // Read from the stream 107 | 108 | let result: RedisResult = 109 | xread(&mut con, &stream_name, &stream_read_options).await; 110 | 111 | match result { 112 | Ok(entry) => { 113 | for stream_key in entry.keys { 114 | for message in stream_key.ids { 115 | if let ( 116 | Some(redis::Value::BulkString(b64_data)), 117 | Some(redis::Value::BulkString(schema)), 118 | Some(redis::Value::BulkString(id)), 119 | ) = ( 120 | message.map.get(PAYLOAD_KEY), 121 | message.map.get(SCHEMA_KEY), 122 | message.map.get(ID_KEY), 123 | ) { 124 | let schema = String::from_utf8(schema.clone()) 125 | .expect("Expecting utf8 encoded schema") 126 | .into(); 127 | let topic = Topic::from(stream_name.as_topic()); 128 | let b64_data = String::from_utf8(b64_data.clone()) 129 | .expect("Expecting utf8 encoded payload"); 130 | let id = String::from_utf8(id.clone()) 131 | .expect("Expecting utf8 encoded id"); 132 | 133 | if let Err(err) = tx 134 | .send(EncodedMessage { 135 | id, 136 | schema, 137 | topic, 138 | b64_data, 139 | }) 140 | .await 141 | { 142 | warn!(err = ?err, "Internal error"); 143 | } 144 | } else { 145 | // TODO Handle error instead of warn 146 | warn!(message = ?message, "Invalid message"); 147 | } 148 | } 149 | } 150 | } 151 | Err(err) => { 152 | warn!(err = ?err, "Stream error"); 153 | if err.is_io_error() { 154 | break; 155 | } 156 | } 157 | } 158 | } 159 | } 160 | } 161 | }); 162 | 163 | RedisStream { receiver: rx } 164 | } 165 | } 166 | 167 | /// A message received from Redis 168 | pub type RedisMessage = crate::consumer::AcknowledgeableMessage; 169 | 170 | /// Errors encountered while streaming messages 171 | #[derive(Debug, thiserror::Error)] 172 | pub enum RedisStreamError { 173 | /// An error from the underlying stream 174 | #[error(transparent)] 175 | Stream(#[from] redis::RedisError), 176 | 177 | /// An error from a missing hedwig attribute 178 | #[error("missing expected attribute: {key}")] 179 | MissingAttribute { 180 | /// the missing attribute 181 | key: &'static str, 182 | }, 183 | 184 | /// An error from a hedwig attribute with an invalid value 185 | #[error("invalid attribute value for {key}: {invalid_value}")] 186 | InvalidAttribute { 187 | /// the invalid attribute 188 | key: &'static str, 189 | /// the invalid value 190 | invalid_value: String, 191 | /// the error describing the invalidity 192 | #[source] 193 | source: BoxError, 194 | }, 195 | 196 | /// The message is malformed 197 | #[error("malformed message")] 198 | MalformedMessage, 199 | } 200 | 201 | type BoxError = Box; 202 | 203 | /// A stream of messages from a subscription 204 | /// 205 | /// Created by [`ConsumerClient::stream_subscription`] 206 | #[pin_project] 207 | pub struct RedisStream { 208 | receiver: tokio::sync::mpsc::Receiver, 209 | } 210 | 211 | impl stream::Stream for RedisStream { 212 | type Item = Result, RedisStreamError>; 213 | 214 | fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 215 | let this = self.as_mut().project(); 216 | this.receiver.poll_recv(cx).map(|opt| { 217 | opt.map(|encoded_message| { 218 | let validated_message = redis_to_hedwig(encoded_message).unwrap(); 219 | Ok(RedisMessage { 220 | ack_token: AcknowledgeToken, 221 | message: validated_message, 222 | }) 223 | }) 224 | }) 225 | } 226 | } 227 | 228 | fn redis_to_hedwig(encoded_message: EncodedMessage) -> Result { 229 | use base64::Engine; 230 | 231 | let b64_data = &encoded_message.b64_data; 232 | let schema = encoded_message.schema; 233 | 234 | let data = base64::engine::general_purpose::STANDARD 235 | .decode(b64_data) 236 | .map_err(|_| RedisStreamError::MalformedMessage)?; 237 | 238 | let id = uuid::Uuid::new_v4(); 239 | let timestamp = SystemTime::now(); 240 | let headers = Headers::new(); 241 | 242 | Ok(ValidatedMessage::new(id, timestamp, schema, headers, data)) 243 | } 244 | 245 | /// An acknowledgement token for a message 246 | #[derive(Debug)] 247 | pub struct AcknowledgeToken; 248 | 249 | /// Errors encountered while acknowledging a message 250 | #[derive(Debug, Clone, Eq, PartialEq, thiserror::Error)] 251 | #[error("failed to ack/nack/modify")] 252 | pub struct AcknowledgeError; 253 | 254 | #[async_trait] 255 | impl crate::consumer::AcknowledgeToken for AcknowledgeToken { 256 | type AckError = AcknowledgeError; 257 | type ModifyError = AcknowledgeError; 258 | type NackError = AcknowledgeError; 259 | 260 | async fn ack(self) -> Result<(), Self::AckError> { 261 | // no-op because noack option is enabled 262 | // TODO Implement reliability 263 | Ok(()) 264 | } 265 | 266 | async fn nack(self) -> Result<(), Self::NackError> { 267 | // no-op because noack option is enabled 268 | // TODO Implement reliability 269 | Ok(()) 270 | } 271 | 272 | async fn modify_deadline(&mut self, _seconds: u32) -> Result<(), Self::ModifyError> { 273 | // no-op because noack option is enabled 274 | // TODO Implement reliability 275 | Ok(()) 276 | } 277 | } 278 | 279 | impl crate::consumer::Consumer for RedisStream { 280 | type AckToken = AcknowledgeToken; 281 | type Error = RedisStreamError; 282 | type Stream = RedisStream; 283 | 284 | fn stream(self) -> Self::Stream { 285 | self 286 | } 287 | } 288 | 289 | struct ConsumerName(String); 290 | 291 | impl ConsumerName { 292 | fn new() -> Self { 293 | Self(uuid::Uuid::new_v4().to_string()) 294 | } 295 | } 296 | 297 | /// A consumer group name 298 | #[derive(Debug, Clone)] 299 | pub struct GroupName(String); 300 | 301 | impl GroupName { 302 | /// Create a new group name 303 | pub fn new(name: impl Into) -> Self { 304 | Self(name.into()) 305 | } 306 | } 307 | 308 | /// A consumer group 309 | #[derive(Debug, Clone)] 310 | pub struct Group { 311 | group_name: GroupName, 312 | stream_name: StreamName, 313 | } 314 | 315 | impl Group { 316 | /// Create a new consumer group 317 | pub fn new(name: GroupName, stream_name: StreamName) -> Self { 318 | Self { 319 | group_name: name, 320 | stream_name, 321 | } 322 | } 323 | } 324 | -------------------------------------------------------------------------------- /src/backends/redis/mod.rs: -------------------------------------------------------------------------------- 1 | mod consumer; 2 | mod publisher; 3 | 4 | use std::time::Duration; 5 | 6 | pub use consumer::*; 7 | pub use publisher::*; 8 | use redis::aio::ConnectionManagerConfig; 9 | 10 | use hedwig_core::Topic; 11 | 12 | const ID_KEY: &str = "hedwig_id"; 13 | const PAYLOAD_KEY: &str = "hedwig_payload"; 14 | const SCHEMA_KEY: &str = "hedwig_schema"; 15 | const MESSAGE_TIMESTAMP_KEY: &str = "hedwig_message_timestamp"; 16 | const PUBLISHER_KEY: &str = "hedwig_publisher"; 17 | 18 | const ENCODING_ATTR: (&str, &str) = ("hedwig_encoding", "base64"); 19 | const FORMAT_VERSION_ATTR: (&str, &str) = ("hedwig_format_version", "1.0"); 20 | 21 | // TODO Keep BACKOFF_MAX_DELAY as default, but should be a configuration option 22 | const BACKOFF_MAX_DELAY: Duration = Duration::from_secs(60); 23 | 24 | fn connection_manager_config() -> ConnectionManagerConfig { 25 | ConnectionManagerConfig::new() 26 | // Note: despite ConnectionManagerConfig documentations says that a factor of 1000 means 1 sec, it is wrong. 27 | // This is how the delay is muliplied at each retry, starting from 1s. 28 | .set_factor(2) 29 | // This is really millis, not secs 30 | .set_max_delay(BACKOFF_MAX_DELAY.as_millis() as u64) 31 | } 32 | 33 | /// Error type for Redis backend 34 | #[derive(Debug, thiserror::Error)] 35 | pub enum RedisError { 36 | /// Unexpected disconnection 37 | #[error("data store disconnected")] 38 | ClientError(#[from] redis::RedisError), 39 | /// Deadline exceeded 40 | #[error("deadline exceeded")] 41 | DeadlineExceeded, 42 | /// Generic error 43 | #[error(transparent)] 44 | GenericError(Box), 45 | } 46 | 47 | /// Stream name 48 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] 49 | pub struct StreamName(String); 50 | 51 | impl StreamName { 52 | /// Create a new stream name from a topic 53 | pub fn from_topic(topic: impl std::fmt::Display) -> Self { 54 | StreamName(format!("hedwig:{topic}")) 55 | } 56 | 57 | /// Get the topic from the stream name 58 | pub fn as_topic(&self) -> &str { 59 | &self.0.as_str()[7..] 60 | } 61 | } 62 | 63 | impl From for StreamName { 64 | fn from(topic: hedwig_core::Topic) -> Self { 65 | StreamName(format!("hedwig:{topic}")) 66 | } 67 | } 68 | 69 | /// Configuration for the Redis client 70 | #[derive(Debug, Clone)] 71 | pub struct ClientBuilderConfig { 72 | /// Redis endpoint 73 | pub endpoint: String, 74 | } 75 | 76 | /// Builder for the Redis client 77 | pub struct ClientBuilder { 78 | config: ClientBuilderConfig, 79 | } 80 | 81 | /// Redis client builder 82 | impl ClientBuilder { 83 | /// Create a new client builder 84 | pub async fn new(config: ClientBuilderConfig) -> Result { 85 | Ok(ClientBuilder { config }) 86 | } 87 | } 88 | 89 | impl ClientBuilder { 90 | /// Build a new consumer client 91 | pub async fn build_consumer( 92 | &self, 93 | // TODO 94 | _queue: impl Into, 95 | ) -> Result { 96 | let client = redis::Client::open(self.config.endpoint.as_str())?; 97 | Ok(ConsumerClient::from_client(client)) 98 | } 99 | 100 | /// Build a new publisher client 101 | pub async fn build_publisher( 102 | &self, 103 | publisher_id: impl Into, 104 | ) -> Result { 105 | let client = redis::Client::open(self.config.endpoint.as_str())?; 106 | Ok(PublisherClient::from_client(client, publisher_id)) 107 | } 108 | } 109 | 110 | struct EncodedMessage { 111 | id: String, 112 | topic: Topic, 113 | schema: std::borrow::Cow<'static, str>, 114 | b64_data: String, 115 | } 116 | -------------------------------------------------------------------------------- /src/backends/redis/publisher.rs: -------------------------------------------------------------------------------- 1 | use base64::Engine; 2 | use core::fmt; 3 | use futures_util::sink::Sink; 4 | use pin_project::pin_project; 5 | use redis::{ 6 | aio::ConnectionManager, 7 | streams::{StreamTrimStrategy, StreamTrimmingMode}, 8 | AsyncCommands, RedisResult, 9 | }; 10 | use std::{ 11 | pin::Pin, 12 | task::{Context, Poll}, 13 | }; 14 | use tracing::warn; 15 | 16 | use crate::{redis::EncodedMessage, EncodableMessage}; 17 | 18 | use super::{ 19 | RedisError, FORMAT_VERSION_ATTR, ID_KEY, MESSAGE_TIMESTAMP_KEY, PAYLOAD_KEY, PUBLISHER_KEY, 20 | SCHEMA_KEY, 21 | }; 22 | use super::{StreamName, ENCODING_ATTR}; 23 | 24 | /// Publisher client 25 | #[derive(Debug, Clone)] 26 | pub struct PublisherClient { 27 | client: redis::Client, 28 | publisher_id: PublisherId, 29 | } 30 | 31 | impl PublisherClient { 32 | /// Create a new publisher client from a Redis client 33 | pub fn from_client(client: redis::Client, publisher_id: impl Into) -> Self { 34 | let publisher_id = PublisherId::new(publisher_id); 35 | PublisherClient { 36 | client, 37 | publisher_id, 38 | } 39 | } 40 | } 41 | 42 | /// Errors which can occur while publishing a message 43 | #[derive(Debug)] 44 | pub enum PublishError { 45 | /// An error from publishing 46 | Publish { 47 | /// The cause of the error 48 | cause: RedisError, 49 | 50 | /// The batch of messages which failed to be published 51 | messages: Vec, 52 | }, 53 | 54 | /// An error from submitting a successfully published message to the user-provided response 55 | /// sink 56 | Response(E), 57 | 58 | /// An error from validating the given message 59 | InvalidMessage { 60 | /// The cause of the error 61 | cause: M::Error, 62 | 63 | /// The message which failed to be validated 64 | message: M, 65 | }, 66 | } 67 | 68 | impl fmt::Display for PublishError 69 | where 70 | M::Error: fmt::Display, 71 | E: fmt::Display, 72 | { 73 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 74 | match self { 75 | PublishError::Publish { messages, .. } => f.write_fmt(format_args!( 76 | "could not publish {} messages", 77 | messages.len() 78 | )), 79 | PublishError::Response(..) => f.write_str( 80 | "could not forward response for a successfully published message to the sink", 81 | ), 82 | PublishError::InvalidMessage { .. } => f.write_str("could not validate message"), 83 | } 84 | } 85 | } 86 | 87 | impl std::error::Error for PublishError 88 | where 89 | M: fmt::Debug, 90 | M::Error: std::error::Error + 'static, 91 | E: std::error::Error + 'static, 92 | { 93 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { 94 | match self { 95 | PublishError::Publish { cause, .. } => Some(cause), 96 | PublishError::Response(cause) => Some(cause as &_), 97 | PublishError::InvalidMessage { cause, .. } => Some(cause as &_), 98 | } 99 | } 100 | } 101 | 102 | /// Topic configuration 103 | pub struct TopicConfig { 104 | /// The topic name 105 | pub name: StreamName, 106 | } 107 | 108 | impl PublisherClient { 109 | /// Create a new publisher 110 | pub async fn publisher(&self) -> Publisher { 111 | let client = self.client.clone(); 112 | let publisher_id = self.publisher_id.clone(); 113 | 114 | let (tx, mut rx) = tokio::sync::mpsc::channel(1); 115 | 116 | tokio::spawn(async move { 117 | loop { 118 | if rx.is_closed() { 119 | break; 120 | } 121 | 122 | let con_res = ConnectionManager::new_with_config( 123 | client.clone(), 124 | super::connection_manager_config(), 125 | ) 126 | .await; 127 | 128 | if let Ok(mut con) = con_res { 129 | if rx.is_closed() { 130 | break; 131 | } 132 | 133 | while let Some(EncodedMessage { 134 | id, 135 | topic, 136 | b64_data, 137 | schema, 138 | }) = rx.recv().await 139 | { 140 | let key = StreamName::from(topic); 141 | let b64_data = b64_data.as_str(); 142 | 143 | let res = 144 | push(&mut con, &key, b64_data, &schema, &id, &publisher_id.0).await; 145 | 146 | if let Err(err) = res { 147 | warn!("{:?}", err); 148 | if err.is_io_error() { 149 | break; 150 | } 151 | } 152 | } 153 | } 154 | } 155 | }); 156 | 157 | Publisher { sender: tx } 158 | } 159 | } 160 | 161 | async fn push( 162 | con: &mut ConnectionManager, 163 | key: &StreamName, 164 | payload: &str, 165 | schema: &str, 166 | hedwig_id: &str, 167 | publisher_id: &str, 168 | ) -> RedisResult<()> { 169 | // TODO trimming mode should not be needed if everything is set up correctly 170 | // Workaround to prevent increasing indefinitely the queue 171 | let options = redis::streams::StreamAddOptions::default().trim(StreamTrimStrategy::maxlen( 172 | StreamTrimmingMode::Approx, 173 | 1_000, 174 | )); 175 | 176 | let message_timestamp: String = std::time::SystemTime::now() 177 | .duration_since(std::time::UNIX_EPOCH) 178 | .unwrap_or_default() 179 | .as_millis() 180 | .to_string(); 181 | 182 | con.xadd_options( 183 | &key.0, 184 | "*", 185 | &[ 186 | (PAYLOAD_KEY, payload), 187 | FORMAT_VERSION_ATTR, 188 | (ID_KEY, hedwig_id), 189 | (MESSAGE_TIMESTAMP_KEY, &message_timestamp), 190 | (PUBLISHER_KEY, publisher_id), 191 | (SCHEMA_KEY, schema), 192 | ENCODING_ATTR, 193 | ], 194 | &options, 195 | ) 196 | .await 197 | } 198 | 199 | #[derive(Debug, Clone)] 200 | struct PublisherId(String); 201 | 202 | impl PublisherId { 203 | fn new(s: impl Into) -> Self { 204 | Self(s.into()) 205 | } 206 | } 207 | 208 | /// Redis publisher 209 | #[derive(Clone)] 210 | pub struct Publisher { 211 | sender: tokio::sync::mpsc::Sender, 212 | } 213 | 214 | impl crate::publisher::Publisher for Publisher 215 | where 216 | M: EncodableMessage + Send + 'static, 217 | S: Sink + Send + 'static, 218 | { 219 | type PublishError = PublishError; 220 | type PublishSink = PublishSink; 221 | 222 | // TODO For reliability, implement response sink, so users can ack messages 223 | fn publish_sink_with_responses( 224 | self, 225 | validator: M::Validator, 226 | _response_sink: S, 227 | ) -> Self::PublishSink { 228 | PublishSink { 229 | validator, 230 | sender: self.sender.clone(), 231 | _m: std::marker::PhantomData, 232 | buffer: None, 233 | } 234 | } 235 | } 236 | 237 | /// Publish sink 238 | #[pin_project] 239 | pub struct PublishSink> { 240 | validator: M::Validator, 241 | sender: tokio::sync::mpsc::Sender, 242 | _m: std::marker::PhantomData<(M, S)>, 243 | buffer: Option, 244 | } 245 | 246 | impl Sink for PublishSink 247 | where 248 | M: EncodableMessage + Send + 'static, 249 | S: Sink + Send + 'static, 250 | { 251 | type Error = PublishError; 252 | 253 | fn poll_ready(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { 254 | self.poll_flush_buffered_message(cx) 255 | } 256 | 257 | fn start_send(mut self: Pin<&mut Self>, message: M) -> Result<(), Self::Error> { 258 | let this = self.as_mut().project(); 259 | 260 | if this.buffer.replace(message).is_some() { 261 | panic!("each `start_send` must be preceded by a successful call to `poll_ready`"); 262 | } 263 | 264 | Ok(()) 265 | } 266 | 267 | fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { 268 | self.poll_flush_buffered_message(cx) 269 | } 270 | 271 | fn poll_close(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { 272 | Poll::Ready(Ok(())) 273 | } 274 | } 275 | 276 | fn encode_message( 277 | validator: &M::Validator, 278 | message: M, 279 | ) -> Result> 280 | where 281 | M: EncodableMessage + Send + 'static, 282 | { 283 | let validated = match message.encode(validator) { 284 | Ok(validated_msg) => validated_msg, 285 | Err(err) => { 286 | return Err(PublishError::InvalidMessage { 287 | cause: err, 288 | message, 289 | }) 290 | } 291 | }; 292 | 293 | let bytes = validated.data(); 294 | let schema = validated.schema().to_string().into(); 295 | 296 | // Encode as base64, because Redis needs it 297 | let b64_data = base64::engine::general_purpose::STANDARD.encode(bytes); 298 | let id = validated.uuid().to_string(); 299 | 300 | Ok(EncodedMessage { 301 | id, 302 | schema, 303 | topic: message.topic(), 304 | b64_data, 305 | }) 306 | } 307 | 308 | impl PublishSink 309 | where 310 | M: EncodableMessage + Send + 'static, 311 | S: Sink + Send + 'static, 312 | { 313 | fn poll_flush_buffered_message( 314 | self: Pin<&mut Self>, 315 | cx: &mut Context<'_>, 316 | ) -> Poll>> { 317 | let this = self.project(); 318 | 319 | if this.sender.capacity() == 0 { 320 | cx.waker().wake_by_ref(); 321 | return Poll::Pending; 322 | } 323 | 324 | let Some(message) = this.buffer.take() else { 325 | // Nothing pending 326 | return Poll::Ready(Ok(())); 327 | }; 328 | 329 | let Ok(encoded_message) = encode_message(this.validator, message) else { 330 | // TODO Handle errors 331 | return Poll::Ready(Ok(())); 332 | }; 333 | 334 | // Cannot fail here, we checked capacity before 335 | this.sender.try_send(encoded_message).unwrap(); 336 | Poll::Ready(Ok(())) 337 | } 338 | } 339 | -------------------------------------------------------------------------------- /src/consumer.rs: -------------------------------------------------------------------------------- 1 | //! Types, traits, and functions necessary to consume messages using hedwig 2 | //! 3 | //! See the [`Consumer`] trait. 4 | 5 | use crate::message::ValidatedMessage; 6 | use async_trait::async_trait; 7 | use bytes::Bytes; 8 | use either::Either; 9 | use futures_util::stream; 10 | use pin_project::pin_project; 11 | use std::{ 12 | pin::Pin, 13 | task::{Context, Poll}, 14 | }; 15 | 16 | pub use hedwig_core::message::DecodableMessage; 17 | 18 | /// Message consumers ingest messages from a queue service and present them to the user application 19 | /// as a [`Stream`](futures_util::stream::Stream). 20 | /// 21 | /// ## Message Decoding 22 | /// 23 | /// Messages pulled from the service are assumed to have been created by some [hedwig 24 | /// publisher](crate::Publisher) and therefore were validated against the included schema 25 | /// when publishing. It is the decoder's responsibility (when provided to functions like 26 | /// [`consume`](Consumer::consume)) to check this schema and the accompanying payload for validity. 27 | /// 28 | /// ## Acknowledging Messages 29 | /// Typically message services deliver messages with a particular delivery time window, during 30 | /// which this message won't be sent to other consumers. In AWS SQS this is called the [visibility 31 | /// timeout][AWS], and in GCP PubSub this is the [ack deadline][GCP]. 32 | /// 33 | /// If a message is successfully acknowledged within this time, it will be considered processed and 34 | /// not delivered to other consumers (and possibly deleted depending on the service's 35 | /// configuration). A message can conversely be negatively-acknowledged, to indicate e.g. 36 | /// processing has failed and the message should be delivered again to some consumer. This time 37 | /// window can also be modified for each message, to allow for longer or shorter message processing 38 | /// than the default configured time window. 39 | /// 40 | /// Implementations of this trait do not ack/nack/modify messages themselves, and instead present 41 | /// this functionality to users with the [`AcknowledgeableMessage`] type. Message processors are 42 | /// responsible for handling message acknowledgement, including extensions for processing time as 43 | /// necessary. 44 | /// 45 | /// Bear in mind that message delivery and acknowledgement are all best-effort in distributed 46 | /// message services. An acknowledged or extended message may still be re-delivered for any number 47 | /// of reasons, and applications should be made resilient to such events. 48 | /// 49 | /// [AWS]: https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/sqs-visibility-timeout.html 50 | /// [GCP]: https://cloud.google.com/pubsub/docs/subscriber 51 | // If we had async drop, sending nacks on drop would be nice. Alas, rust isn't there yet 52 | pub trait Consumer { 53 | /// The type of acknowledgement tokens produced by the underlying service implementation 54 | type AckToken: AcknowledgeToken; 55 | /// Errors encountered while streaming messages 56 | type Error; 57 | /// The stream returned by [`stream`] 58 | type Stream: stream::Stream< 59 | Item = Result>, Self::Error>, 60 | >; 61 | 62 | /// Begin pulling messages from the backing message service. 63 | /// 64 | /// The messages produced by this stream have not been decoded yet. Users should typically call 65 | /// [`consume`](Consumer::consume) instead, to produce decoded messages. 66 | fn stream(self) -> Self::Stream; 67 | 68 | /// Create a stream of decoded messages from this consumer, using a decoder for the given 69 | /// [decodable](DecodableMessage) message type. 70 | fn consume(self, decoder: M::Decoder) -> MessageStream 71 | where 72 | Self: Sized, 73 | M: DecodableMessage, 74 | { 75 | MessageStream { 76 | stream: self.stream(), 77 | decoder, 78 | _message_type: std::marker::PhantomData, 79 | } 80 | } 81 | } 82 | 83 | /// A received message which can be acknowledged to prevent re-delivery by the backing message 84 | /// service. 85 | /// 86 | /// See the documentation for acknowledging messages on [`Consumer`] 87 | #[derive(Debug)] 88 | #[must_use = "Messages should be ack'ed to prevent repeated delivery, or nack'ed to improve responsiveness"] 89 | pub struct AcknowledgeableMessage { 90 | /// The acknowledgement token which executes the ack/nack/modify operations 91 | pub ack_token: A, 92 | 93 | /// The underlying message 94 | pub message: M, 95 | } 96 | 97 | impl AcknowledgeableMessage 98 | where 99 | A: AcknowledgeToken, 100 | { 101 | /// Acknowledge this message, declaring that processing was successful and the message should 102 | /// not be re-delivered to consumers. 103 | pub async fn ack(self) -> Result { 104 | self.ack_token.ack().await?; 105 | Ok(self.message) 106 | } 107 | 108 | /// Negatively acknowledge this message, declaring that processing was unsuccessful and the 109 | /// message should be re-delivered to consumers. 110 | pub async fn nack(self) -> Result { 111 | self.ack_token.nack().await?; 112 | Ok(self.message) 113 | } 114 | 115 | /// Modify the acknowledgement deadline for this message to the given number of seconds. 116 | /// 117 | /// The new deadline will typically be this number of seconds after the service receives this 118 | /// modification requesst, though users should check their implementation's documented 119 | /// behavior. 120 | pub async fn modify_deadline(&mut self, seconds: u32) -> Result<(), A::ModifyError> { 121 | self.ack_token.modify_deadline(seconds).await 122 | } 123 | } 124 | 125 | impl std::ops::Deref for AcknowledgeableMessage { 126 | type Target = M; 127 | 128 | fn deref(&self) -> &M { 129 | &self.message 130 | } 131 | } 132 | 133 | impl std::ops::DerefMut for AcknowledgeableMessage { 134 | fn deref_mut(&mut self) -> &mut M { 135 | &mut self.message 136 | } 137 | } 138 | 139 | /// A token associated with some message received from a message service, used to issue an 140 | /// ack/nack/modify request 141 | /// 142 | /// See the documentation for acknowledging messages on [`Consumer`] 143 | #[async_trait] 144 | #[must_use = "Messages should be ack'ed to prevent repeated delivery, or nack'ed to improve responsiveness"] 145 | pub trait AcknowledgeToken { 146 | /// Errors returned by [`ack`](AcknowledgeToken::ack) 147 | type AckError; 148 | /// Errors returned by [`nack`](AcknowledgeToken::nack) 149 | type NackError; 150 | /// Errors returned by [`modify_deadline`](AcknowledgeToken::modify_deadline) 151 | type ModifyError; 152 | 153 | /// Acknowledge the associated message 154 | async fn ack(self) -> Result<(), Self::AckError>; 155 | 156 | /// Negatively acknowledge the associated message 157 | async fn nack(self) -> Result<(), Self::NackError>; 158 | 159 | /// Change the associated message's acknowledge deadline to the given number of seconds 160 | // uses u32 seconds instead of e.g. Duration because SQS and PubSub both have second 161 | // granularity; Duration::from_millis(999) would truncate to 0, which might be surprising 162 | async fn modify_deadline(&mut self, seconds: u32) -> Result<(), Self::ModifyError>; 163 | } 164 | 165 | /// The stream returned by the [`consume`](Consumer::consume) function 166 | #[pin_project] 167 | #[derive(Debug)] 168 | pub struct MessageStream { 169 | #[pin] 170 | stream: S, 171 | decoder: D, 172 | _message_type: std::marker::PhantomData, 173 | } 174 | 175 | impl stream::Stream for MessageStream 176 | where 177 | S: stream::Stream< 178 | Item = Result>, StreamError>, 179 | >, 180 | M: DecodableMessage, 181 | { 182 | #[allow(clippy::type_complexity)] // it is what it is, aliases would all be generic anyway 183 | type Item = Result, Either>; 184 | 185 | fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 186 | let this = self.project(); 187 | let decoder = this.decoder; 188 | this.stream.poll_next(cx).map(|opt| { 189 | opt.map(|res| { 190 | res.map_err(Either::Left).and_then( 191 | |AcknowledgeableMessage { ack_token, message }| { 192 | Ok(AcknowledgeableMessage { 193 | ack_token, 194 | message: M::decode(message, decoder).map_err(Either::Right)?, 195 | }) 196 | }, 197 | ) 198 | }) 199 | }) 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Hedwig is a message bus library that works with arbitrary pubsub services such as AWS SNS/SQS 2 | //! or Google Cloud Pubsub. Messages are validated before they are published. The publisher and 3 | //! consumer are de-coupled and fan-out is supported out of the box. 4 | //! 5 | //! The Rust library currently only supports publishing. 6 | //! 7 | //! # Examples 8 | //! 9 | //! Publish a message. Payload encoded with JSON and validated using a JSON Schema. 10 | //! 11 | //! ``` 12 | //! use hedwig::{validators, Publisher, Consumer}; 13 | //! # use uuid::Uuid; 14 | //! # use std::{path::Path, time::SystemTime}; 15 | //! # use futures_util::{sink::SinkExt, stream::StreamExt}; 16 | //! # #[cfg(not(all(feature = "protobuf", feature = "mock")))] 17 | //! # fn main() {} 18 | //! # #[cfg(all(feature = "protobuf", feature = "mock"))] // example uses a protobuf validator. 19 | //! # #[tokio::main(flavor = "current_thread")] 20 | //! # async fn main() -> Result<(), Box> { 21 | //! 22 | //! #[derive(Clone, PartialEq, Eq, prost::Message)] 23 | //! struct UserCreatedMessage { 24 | //! #[prost(string, tag = "1")] 25 | //! user_id: String, 26 | //! } 27 | //! 28 | //! impl<'a> hedwig::EncodableMessage for UserCreatedMessage { 29 | //! type Error = validators::ProstValidatorError; 30 | //! type Validator = validators::ProstValidator; 31 | //! fn topic(&self) -> hedwig::Topic { 32 | //! "user.created".into() 33 | //! } 34 | //! fn encode(&self, validator: &Self::Validator) -> Result { 35 | //! Ok(validator.validate( 36 | //! uuid::Uuid::new_v4(), 37 | //! SystemTime::now(), 38 | //! "user.created/1.0", 39 | //! Default::default(), 40 | //! self, 41 | //! )?) 42 | //! } 43 | //! } 44 | //! 45 | //! impl hedwig::DecodableMessage for UserCreatedMessage { 46 | //! type Error = validators::ProstDecodeError; 47 | //! type Decoder = 48 | //! validators::ProstDecoder>; 49 | //! 50 | //! fn decode(msg: hedwig::ValidatedMessage, decoder: &Self::Decoder) -> Result { 51 | //! decoder.decode(msg) 52 | //! } 53 | //! } 54 | //! 55 | //! 56 | //! let publisher = /* Some publisher */ 57 | //! # hedwig::mock::MockPublisher::new(); 58 | //! let consumer = /* Consumer associated to that publisher */ 59 | //! # publisher.new_consumer("user.created", "example_subscription"); 60 | //! 61 | //! let mut publish_sink = Publisher::::publish_sink(publisher, validators::ProstValidator::new()); 62 | //! let mut consumer_stream = consumer.consume::( 63 | //! validators::ProstDecoder::new(validators::prost::ExactSchemaMatcher::new("user.created/1.0")), 64 | //! ); 65 | //! 66 | //! publish_sink.send(UserCreatedMessage { user_id: String::from("U_123") }).await?; 67 | //! 68 | //! assert_eq!( 69 | //! "U_123", 70 | //! consumer_stream.next().await.unwrap()?.ack().await?.user_id 71 | //! ); 72 | //! 73 | //! # Ok(()) 74 | //! # } 75 | //! ``` 76 | #![cfg_attr(docsrs, feature(doc_cfg))] 77 | #![deny(missing_docs)] 78 | 79 | pub use hedwig_core::{message, Headers, Topic, ValidatedMessage}; 80 | 81 | mod backends; 82 | mod consumer; 83 | mod publisher; 84 | mod tests; 85 | pub mod validators; 86 | 87 | #[allow(unused_imports)] 88 | pub use backends::*; 89 | 90 | pub use consumer::*; 91 | pub use publisher::*; 92 | 93 | // TODO make these public somewhere? 94 | #[cfg(feature = "google")] 95 | pub(crate) const HEDWIG_ID: &str = "hedwig_id"; 96 | #[cfg(feature = "google")] 97 | pub(crate) const HEDWIG_MESSAGE_TIMESTAMP: &str = "hedwig_message_timestamp"; 98 | #[cfg(feature = "google")] 99 | pub(crate) const HEDWIG_SCHEMA: &str = "hedwig_schema"; 100 | #[cfg(feature = "google")] 101 | pub(crate) const HEDWIG_PUBLISHER: &str = "hedwig_publisher"; 102 | #[cfg(feature = "google")] 103 | pub(crate) const HEDWIG_FORMAT_VERSION: &str = "hedwig_format_version"; 104 | 105 | /// All errors that may be returned when operating top level APIs. 106 | #[derive(Debug, thiserror::Error)] 107 | #[non_exhaustive] 108 | pub enum Error { 109 | /// Unable to encode message payload 110 | #[error("Unable to encode message payload")] 111 | EncodeMessage(#[source] Box), 112 | } 113 | -------------------------------------------------------------------------------- /src/publisher.rs: -------------------------------------------------------------------------------- 1 | //! Types, traits, and functions necessary to publish messages using hedwig 2 | 3 | use futures_util::sink; 4 | use std::{ 5 | pin::Pin, 6 | task::{Context, Poll}, 7 | }; 8 | 9 | pub use hedwig_core::message::EncodableMessage; 10 | 11 | /// Message publishers. 12 | /// 13 | /// Message publishers validate, encode, and deliver messages to an endpoint, possibly a remote 14 | /// one. Message publishers may also additionally validate a message for publisher-specific 15 | /// requirements (e.g. size). 16 | pub trait Publisher = Drain> { 17 | /// The error type that may be encountered when publishing a message 18 | type PublishError; 19 | /// The [`Sink`](futures_util::sink::Sink) type provided by the publisher to accept messages, 20 | /// validate them, then publish them to the destination. 21 | type PublishSink: sink::Sink; 22 | 23 | /// Create a new sink to accept messages. 24 | /// 25 | /// The sink will use the given validator to validate and/or encode messages, possibly batch 26 | /// them together, then publish them to their destination. The details of the internal encoding 27 | /// and batching may vary by `Publisher` implementation. 28 | fn publish_sink(self, validator: M::Validator) -> Self::PublishSink 29 | where 30 | Self: Sized, 31 | S: Default, 32 | { 33 | self.publish_sink_with_responses(validator, S::default()) 34 | } 35 | 36 | /// Create a new sink to accept messages. 37 | /// 38 | /// This creates a sink like [`publish_sink`](Publisher::publish_sink) while additionally 39 | /// listening for successful responses; after a message has been successfully published, it 40 | /// will be passed to the given response sink to complete any necessary work (e.g. 41 | /// acknowledging success or collecting metrics) 42 | fn publish_sink_with_responses( 43 | self, 44 | validator: M::Validator, 45 | response_sink: S, 46 | ) -> Self::PublishSink; 47 | } 48 | 49 | /// Like [`futures_util::sink::Drain`] but implements `Default` 50 | #[derive(Debug)] 51 | pub struct Drain(std::marker::PhantomData); 52 | 53 | impl Default for Drain { 54 | fn default() -> Self { 55 | Self(std::marker::PhantomData) 56 | } 57 | } 58 | 59 | impl sink::Sink for Drain { 60 | type Error = futures_util::never::Never; 61 | 62 | fn poll_ready(self: Pin<&mut Self>, _: &mut Context) -> Poll> { 63 | Poll::Ready(Ok(())) 64 | } 65 | fn start_send(self: Pin<&mut Self>, _: T) -> Result<(), Self::Error> { 66 | Ok(()) 67 | } 68 | fn poll_flush(self: Pin<&mut Self>, _: &mut Context) -> Poll> { 69 | Poll::Ready(Ok(())) 70 | } 71 | fn poll_close(self: Pin<&mut Self>, _: &mut Context) -> Poll> { 72 | Poll::Ready(Ok(())) 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/tests/google.rs: -------------------------------------------------------------------------------- 1 | #![cfg(all(feature = "google", feature = "protobuf"))] 2 | 3 | type BoxError = Box; 4 | use crate::{ 5 | googlepubsub::{ 6 | retry_policy::{RetryOperation, RetryPolicy}, 7 | AuthFlow, ClientBuilder, ClientBuilderConfig, PubSubConfig, PubSubError, PublishError, 8 | StreamSubscriptionConfig, SubscriptionConfig, SubscriptionName, TopicConfig, TopicName, 9 | }, 10 | message, 11 | validators::{ 12 | prost::{ExactSchemaMatcher, SchemaMismatchError}, 13 | ProstDecodeError, ProstDecoder, ProstValidator, ProstValidatorError, 14 | }, 15 | Consumer, DecodableMessage, EncodableMessage, Headers, Publisher, Topic, ValidatedMessage, 16 | }; 17 | use futures_util::{pin_mut, SinkExt, StreamExt, TryFutureExt, TryStreamExt}; 18 | use std::{ 19 | sync::mpsc, 20 | task::{Context, Poll}, 21 | }; 22 | use ya_gcp::pubsub::emulator::Emulator; 23 | 24 | const SCHEMA: &str = "test-schema"; 25 | const TOPIC: &str = "test-topic"; 26 | 27 | #[derive(Clone, PartialEq, Eq, prost::Message)] 28 | struct TestMessage { 29 | #[prost(string, tag = "1")] 30 | payload: String, 31 | } 32 | 33 | impl EncodableMessage for TestMessage { 34 | type Error = ProstValidatorError; 35 | type Validator = ProstValidator; 36 | 37 | fn topic(&self) -> Topic { 38 | TOPIC.into() 39 | } 40 | 41 | fn encode(&self, validator: &Self::Validator) -> Result { 42 | validator.validate( 43 | uuid::Uuid::nil(), 44 | std::time::SystemTime::UNIX_EPOCH, 45 | SCHEMA, 46 | Headers::from([(String::from("key"), String::from("value"))]), 47 | self, 48 | ) 49 | } 50 | } 51 | 52 | impl DecodableMessage for TestMessage { 53 | type Decoder = ProstDecoder>; 54 | type Error = ProstDecodeError; 55 | 56 | fn decode(msg: ValidatedMessage, validator: &Self::Decoder) -> Result { 57 | validator.decode(msg) 58 | } 59 | } 60 | 61 | #[test] 62 | fn decode_with_headers() -> Result<(), BoxError> { 63 | let orig_message = TestMessage { 64 | payload: "foobar".into(), 65 | }; 66 | 67 | let encoded = orig_message.encode(&ProstValidator::new())?; 68 | 69 | let decoded = message::ValidatedMessage::::decode( 70 | encoded, 71 | &ProstDecoder::new(ExactSchemaMatcher::new(SCHEMA)), 72 | )?; 73 | 74 | let headers = Headers::from([(String::from("key"), String::from("value"))]); 75 | 76 | assert_eq!(decoded.headers(), &headers); 77 | 78 | Ok(()) 79 | } 80 | 81 | #[tokio::test] 82 | #[ignore = "pubsub emulator is finicky, run this test manually"] 83 | async fn roundtrip_protobuf() -> Result<(), BoxError> { 84 | let project_name = "test-project"; 85 | let topic_name = TopicName::new(TOPIC); 86 | let subscription_name = SubscriptionName::new("test-subscription"); 87 | 88 | let emulator = Emulator::new().project(project_name).await?; 89 | 90 | let client_builder = ClientBuilder::new( 91 | ClientBuilderConfig::new().auth_flow(AuthFlow::NoAuth), 92 | PubSubConfig::new().endpoint(emulator.endpoint()), 93 | ) 94 | .await?; 95 | 96 | let mut publisher_client = client_builder 97 | .build_publisher(project_name, "test_publisher") 98 | .await?; 99 | 100 | publisher_client 101 | .create_topic(TopicConfig { 102 | name: topic_name.clone(), 103 | ..TopicConfig::default() 104 | }) 105 | .await?; 106 | 107 | let mut consumer_client = client_builder 108 | .build_consumer(project_name, "test_queue") 109 | .await?; 110 | 111 | consumer_client 112 | .create_subscription(SubscriptionConfig { 113 | name: subscription_name.clone(), 114 | topic: topic_name.clone(), 115 | ..SubscriptionConfig::default() 116 | }) 117 | .await?; 118 | 119 | let mut publisher = 120 | Publisher::::publish_sink(publisher_client.publisher(), ProstValidator::new()); 121 | 122 | publisher 123 | .send(TestMessage { 124 | payload: "foobar".into(), 125 | }) 126 | .await?; 127 | 128 | let consumer = consumer_client 129 | .stream_subscription(subscription_name, StreamSubscriptionConfig::default()) 130 | .consume::(ProstDecoder::new(ExactSchemaMatcher::new(SCHEMA))); 131 | 132 | pin_mut!(consumer); 133 | 134 | assert_eq!( 135 | TestMessage { 136 | payload: "foobar".into() 137 | }, 138 | Option::unwrap(consumer.next().await)?.ack().await? 139 | ); 140 | Ok(()) 141 | } 142 | 143 | /// Test that the publisher-side response sink receives elements when the publisher publishes 144 | #[tokio::test] 145 | #[ignore = "pubsub emulator is finicky, run this test manually"] 146 | async fn response_sink_responses() -> Result<(), BoxError> { 147 | let project_name = "test-project"; 148 | let topic_name = TopicName::new(TOPIC); 149 | let subscription_name = SubscriptionName::new("test-subscription"); 150 | 151 | let emulator = Emulator::new().project(project_name).await?; 152 | 153 | let client_builder = ClientBuilder::new( 154 | ClientBuilderConfig::new().auth_flow(AuthFlow::NoAuth), 155 | PubSubConfig::new().endpoint(emulator.endpoint()), 156 | ) 157 | .await?; 158 | 159 | let mut publisher_client = client_builder 160 | .build_publisher(project_name, "test_publisher") 161 | .await?; 162 | 163 | publisher_client 164 | .create_topic(TopicConfig { 165 | name: topic_name.clone(), 166 | ..TopicConfig::default() 167 | }) 168 | .await?; 169 | 170 | let mut consumer_client = client_builder 171 | .build_consumer(project_name, "test_queue") 172 | .await?; 173 | 174 | consumer_client 175 | .create_subscription(SubscriptionConfig { 176 | name: subscription_name.clone(), 177 | topic: topic_name.clone(), 178 | ..SubscriptionConfig::default() 179 | }) 180 | .await?; 181 | 182 | let (response_sink, mut responses) = futures_channel::mpsc::unbounded(); 183 | let mut cx = Context::from_waker(futures_util::task::noop_waker_ref()); 184 | 185 | let mut publisher = Publisher::::publish_sink_with_responses( 186 | publisher_client.publisher(), 187 | ProstValidator::new(), 188 | response_sink, 189 | ); 190 | 191 | let consumer = consumer_client 192 | .stream_subscription(subscription_name, StreamSubscriptionConfig::default()) 193 | .consume::(ProstDecoder::new(ExactSchemaMatcher::new(SCHEMA))); 194 | 195 | pin_mut!(consumer); 196 | 197 | { 198 | let message = TestMessage { 199 | payload: "foobar".into(), 200 | }; 201 | 202 | publisher.feed(message.clone()).await?; 203 | 204 | // the response sink should not be populated until a flush 205 | assert_eq!(Poll::Pending, responses.poll_next_unpin(&mut cx)); 206 | publisher.flush().await?; 207 | assert_eq!( 208 | Poll::Ready(Some(message.clone())), 209 | responses.poll_next_unpin(&mut cx) 210 | ); 211 | 212 | assert_eq!(message, Option::unwrap(consumer.next().await)?.ack().await?); 213 | } 214 | 215 | { 216 | let message1 = TestMessage { 217 | payload: "one".into(), 218 | }; 219 | let message2 = TestMessage { 220 | payload: "two".into(), 221 | }; 222 | let message3 = TestMessage { 223 | payload: "three".into(), 224 | }; 225 | // create a message that will exceed the message limits (~10MB) and therefore error 226 | let invalid_message4 = TestMessage { 227 | payload: "4".repeat(10 * 1_000_000 + 1), 228 | }; 229 | let message5 = TestMessage { 230 | payload: "five".into(), 231 | }; 232 | 233 | publisher.feed(message1.clone()).await?; 234 | publisher.feed(message2.clone()).await?; 235 | publisher.feed(message3.clone()).await?; 236 | 237 | // buffering the invalid message (via feed) actually works, its validity is checked later 238 | // when submitted to the underlying sink with the next poll_ready 239 | publisher.feed(invalid_message4.clone()).await?; 240 | match publisher.poll_ready_unpin(&mut cx) { 241 | Poll::Ready(Err(PublishError::Publish { cause, messages })) => { 242 | assert_eq!(vec![invalid_message4], messages); 243 | assert_eq!(tonic::Code::InvalidArgument, cause.code()); 244 | } 245 | other => panic!("expected invalid arg error, was {:?}", other), 246 | } 247 | 248 | publisher.feed(message5.clone()).await?; 249 | 250 | // no responses are sent yet 251 | assert_eq!(Poll::Pending, responses.poll_next_unpin(&mut cx)); 252 | 253 | // the flush can still happen despite the error and the non-error values should come through 254 | publisher.flush().await?; 255 | assert_eq!( 256 | vec![ 257 | message1.clone(), 258 | message2.clone(), 259 | message3.clone(), 260 | message5.clone() 261 | ], 262 | responses.by_ref().take(4).collect::>().await 263 | ); 264 | 265 | assert_eq!( 266 | vec![ 267 | message1.clone(), 268 | message2.clone(), 269 | message3.clone(), 270 | message5.clone() 271 | ], 272 | consumer 273 | .by_ref() 274 | .take(4) 275 | .map_err(BoxError::from) 276 | .and_then(|msg| msg.ack().map_err(BoxError::from)) 277 | .try_collect::>() 278 | .await? 279 | ); 280 | } 281 | 282 | { 283 | let message6 = TestMessage { 284 | payload: "six".into(), 285 | }; 286 | let message7 = TestMessage { 287 | payload: "seven".into(), 288 | }; 289 | // create a message that will *not* exceed the message limits, but will exceed the total 290 | // request limits even when it's the only message in a request. This induces an error later 291 | // in the process, at the time of flush instead of insertion 292 | let invalid_message8 = TestMessage { 293 | payload: "8".repeat(10 * 1_000_000 - 6), 294 | }; 295 | let message9 = TestMessage { 296 | payload: "nine".into(), 297 | }; 298 | 299 | publisher.feed(message6.clone()).await?; 300 | publisher.feed(message7.clone()).await?; 301 | 302 | publisher.feed(invalid_message8.clone()).await?; 303 | // the error doesn't happen here because the invalid message was only just submitted to the 304 | // sub-sink by this ready check. The buffer will first note that it's over capacity, and 305 | // induce a flush. 306 | assert!(matches!( 307 | publisher.poll_ready_unpin(&mut cx), 308 | Poll::Ready(Ok(())) 309 | )); 310 | // to actually poll that flush, we need a new element in the hedwig buffer to forward 311 | // readiness checks to the pubsub sink (we're avoiding a manual `flush` call to test the 312 | // path where flushes happen unprompted) 313 | publisher.start_send_unpin(message9.clone())?; 314 | 315 | // now readiness checking will drive the flush, and eventually find the invalid message and 316 | // return an error 317 | match futures_util::future::poll_fn(|cx| publisher.poll_ready_unpin(cx)).await { 318 | Err(PublishError::Publish { cause, messages }) => { 319 | assert_eq!(vec![invalid_message8], messages); 320 | assert_eq!(tonic::Code::InvalidArgument, cause.code()); 321 | } 322 | other => panic!("expected invalid arg error, was {:?}", other), 323 | } 324 | 325 | // flushing did allow two messages through before the error 326 | assert_eq!( 327 | vec![message6.clone(), message7.clone()], 328 | responses.by_ref().take(2).collect::>().await 329 | ); 330 | 331 | // then a manual flush can send the last message submitted after the invalid message 332 | publisher.flush().await?; 333 | assert_eq!( 334 | vec![message9.clone()], 335 | responses.by_ref().take(1).collect::>().await 336 | ); 337 | 338 | // all the sent messages eventually arrive to the consumer 339 | assert_eq!( 340 | vec![message6.clone(), message7.clone(), message9.clone()], 341 | consumer 342 | .by_ref() 343 | .take(3) 344 | .map_err(BoxError::from) 345 | .and_then(|msg| msg.ack().map_err(BoxError::from)) 346 | .try_collect::>() 347 | .await? 348 | ); 349 | } 350 | Ok(()) 351 | } 352 | 353 | /// Check to see that the retry policy will translate from api messages to user messages 354 | #[tokio::test] 355 | #[ignore = "pubsub emulator is finicky, run this test manually"] 356 | async fn retry_message_translate() -> Result<(), BoxError> { 357 | let project_name = "roundtrip-test-project"; 358 | let topic_name = TopicName::new(TOPIC); 359 | 360 | let emulator = Emulator::new().project(project_name).await?; 361 | 362 | let client_builder = ClientBuilder::new( 363 | ClientBuilderConfig::new().auth_flow(AuthFlow::NoAuth), 364 | PubSubConfig::new().endpoint(emulator.endpoint()), 365 | ) 366 | .await?; 367 | 368 | let mut publisher_client = client_builder 369 | .build_publisher(project_name, "roundtrip_test_publisher") 370 | .await?; 371 | 372 | publisher_client 373 | .create_topic(TopicConfig { 374 | name: topic_name.clone(), 375 | ..TopicConfig::default() 376 | }) 377 | .await?; 378 | 379 | // Create a retry policy which will send the failure values to a channel (for manual 380 | // inspection) then fail the operation without retrying 381 | #[derive(Clone)] 382 | struct TestRetryPolicy { 383 | sender: mpsc::Sender>, 384 | } 385 | 386 | struct TestRetryOperation { 387 | sender: mpsc::Sender>, 388 | } 389 | 390 | impl RetryPolicy<[TestMessage], PubSubError> for TestRetryPolicy { 391 | type RetryOp = TestRetryOperation; 392 | 393 | fn new_operation(&mut self) -> Self::RetryOp { 394 | TestRetryOperation { 395 | sender: self.sender.clone(), 396 | } 397 | } 398 | } 399 | 400 | impl RetryOperation<[TestMessage], PubSubError> for TestRetryOperation { 401 | type Sleep = futures_util::future::Ready<()>; 402 | 403 | fn check_retry( 404 | &mut self, 405 | failed_value: &[TestMessage], 406 | _error: &PubSubError, 407 | ) -> Option { 408 | self.sender 409 | .send(failed_value.to_owned()) 410 | .expect("receiver should not be dropped while senders in use"); 411 | None 412 | } 413 | } 414 | 415 | // construct messages such that the first two will buffer and the third will force a flush of 416 | // the first two. The request limit is 10MB, so 2+2MB start the buffer and an additional 8MB 417 | // will trigger a flush 418 | let message1 = TestMessage { 419 | payload: "1".repeat(2 * 1_000_000), 420 | }; 421 | let message2 = TestMessage { 422 | payload: "2".repeat(2 * 1_000_000), 423 | }; 424 | let message3 = TestMessage { 425 | payload: "3".repeat(8 * 1_000_000), 426 | }; 427 | let message4 = TestMessage { 428 | payload: "4".into(), 429 | }; 430 | 431 | let (retry_tx, retry_rx) = mpsc::channel(); 432 | let mut publisher = Publisher::::publish_sink( 433 | publisher_client 434 | .publisher() 435 | .with_retry_policy(TestRetryPolicy { sender: retry_tx }), 436 | ProstValidator::new(), 437 | ); 438 | 439 | publisher.feed(message1.clone()).await?; 440 | publisher.feed(message2.clone()).await?; 441 | publisher.feed(message3.clone()).await?; 442 | publisher.feed(message4.clone()).await?; 443 | 444 | // flushing (and thus errors/retries) should not have been triggered yet 445 | assert_eq!(Err(mpsc::TryRecvError::Empty), retry_rx.try_recv()); 446 | 447 | // drop the emulator to kill the process and trigger errors on publishing 448 | std::mem::drop(emulator); 449 | 450 | // flushing still hasn't happened 451 | assert_eq!(Err(mpsc::TryRecvError::Empty), retry_rx.try_recv()); 452 | 453 | // check readiness to trigger the capacity flush (less than a full flush though, only enough to 454 | // make room for a new request) 455 | match futures_util::future::poll_fn(|cx| publisher.poll_ready_unpin(cx)).await { 456 | Err(PublishError::Publish { cause: _, messages }) => { 457 | assert_eq!(vec![message1.clone(), message2.clone()], messages); 458 | } 459 | other => panic!("expected publish error, was {:?}", other), 460 | } 461 | 462 | //now the retry attempts of the first flush should be visible 463 | assert_eq!(Ok(vec![message1, message2]), retry_rx.try_recv()); 464 | // nothing else has attempted flushing though 465 | assert_eq!(Err(mpsc::TryRecvError::Empty), retry_rx.try_recv()); 466 | 467 | // flush the rest 468 | match publisher.flush().await { 469 | Err(PublishError::Publish { cause: _, messages }) => { 470 | assert_eq!(vec![message3.clone(), message4.clone()], messages); 471 | } 472 | other => panic!("expected publish error, was {:?}", other), 473 | } 474 | 475 | // witness the retries are of everything left 476 | assert_eq!(Ok(vec![message3, message4]), retry_rx.try_recv()); 477 | 478 | Ok(()) 479 | } 480 | -------------------------------------------------------------------------------- /src/tests/json.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "json-schema")] 2 | 3 | use crate::{ 4 | mock::{Error as MockError, MockPublisher}, 5 | validators, 6 | validators::JsonSchemaValidatorError, 7 | Consumer, DecodableMessage, EncodableMessage, Headers, Publisher, Topic, ValidatedMessage, 8 | }; 9 | 10 | use futures_util::{sink::SinkExt, stream::StreamExt}; 11 | use std::time::SystemTime; 12 | use uuid::Uuid; 13 | 14 | pub(crate) const SCHEMA: &str = r#"{ 15 | "$id": "https://hedwig.corp/schema", 16 | "$schema": "https://json-schema.org/draft-04/schema#", 17 | "description": "Example Schema", 18 | "schemas": { 19 | "user.created": { 20 | "1.*": { 21 | "description": "A new user was created", 22 | "type": "object", 23 | "x-versions": [ 24 | "1.0" 25 | ], 26 | "required": [ 27 | "user_id" 28 | ], 29 | "properties": { 30 | "user_id": { 31 | "$ref": "https://hedwig.corp/schema#/definitions/UserId/1.0" 32 | } 33 | } 34 | } 35 | }, 36 | "invalid.route": { 37 | "1.*": {} 38 | } 39 | }, 40 | "definitions": { 41 | "UserId": { 42 | "1.0": { 43 | "type": "string" 44 | } 45 | } 46 | } 47 | }"#; 48 | 49 | #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] 50 | pub(crate) struct JsonUserCreatedMessage { 51 | #[serde(skip)] 52 | pub(crate) uuid: uuid::Uuid, 53 | #[serde(skip)] 54 | pub(crate) schema: &'static str, 55 | #[serde(skip)] 56 | pub(crate) headers: Headers, 57 | #[serde(skip, default = "SystemTime::now")] 58 | pub(crate) time: SystemTime, 59 | pub(crate) user_id: I, 60 | } 61 | 62 | impl JsonUserCreatedMessage { 63 | pub(crate) fn new_valid>(id: V) -> Self { 64 | JsonUserCreatedMessage { 65 | uuid: Uuid::new_v4(), 66 | schema: "https://hedwig.corp/schema#/schemas/user.created/1.0", 67 | user_id: id.into(), 68 | headers: Default::default(), 69 | time: SystemTime::now(), 70 | } 71 | } 72 | } 73 | 74 | impl<'a, I: serde::Serialize> EncodableMessage for JsonUserCreatedMessage { 75 | type Error = validators::JsonSchemaValidatorError; 76 | type Validator = validators::JsonSchemaValidator; 77 | 78 | fn topic(&self) -> Topic { 79 | "user.created".into() 80 | } 81 | fn encode(&self, validator: &Self::Validator) -> Result { 82 | validator.validate( 83 | self.uuid, 84 | self.time, 85 | self.schema, 86 | self.headers.clone(), 87 | self, 88 | ) 89 | } 90 | } 91 | 92 | impl DecodableMessage for JsonUserCreatedMessage { 93 | type Error = serde_json::Error; 94 | type Decoder = (); 95 | 96 | fn decode(msg: ValidatedMessage, _: &()) -> Result { 97 | Ok(JsonUserCreatedMessage { 98 | uuid: *msg.uuid(), 99 | headers: msg.headers().clone(), 100 | schema: "https://hedwig.corp/schema#/schemas/user.created/1.0", 101 | time: *msg.timestamp(), 102 | ..serde_json::from_slice(msg.data())? 103 | }) 104 | } 105 | } 106 | 107 | #[tokio::test] 108 | async fn publish_messages() -> Result<(), Box> { 109 | let publisher = MockPublisher::new(); 110 | let message_one = JsonUserCreatedMessage::new_valid("U123"); 111 | let message_two = JsonUserCreatedMessage::new_valid("U124"); 112 | let message_three = JsonUserCreatedMessage::new_valid("U126"); 113 | let message_invalid = JsonUserCreatedMessage { 114 | uuid: Uuid::new_v4(), 115 | schema: "https://hedwig.corp/schema#/schemas/user.created/1.0", 116 | user_id: 125u64, 117 | time: SystemTime::now(), 118 | headers: Headers::new(), 119 | }; 120 | let mut responses = Vec::new(); 121 | 122 | // prepare a consumer to read any sent messages 123 | let mut consumer = publisher 124 | .new_consumer(message_one.topic(), "subscription1") 125 | .consume::>(()); 126 | 127 | // publishing the message with a u64 id should error on trying to send 128 | let mut publish_sink = >>::publish_sink( 129 | publisher.clone(), 130 | crate::validators::JsonSchemaValidator::new(SCHEMA).unwrap(), 131 | ); 132 | assert!(matches!( 133 | publish_sink 134 | .send(message_invalid) 135 | .await 136 | .map_err(|MockError { cause }| cause 137 | .downcast::() 138 | .map(|boxed| *boxed)), 139 | Err(Ok(JsonSchemaValidatorError::ValidateData { .. })) 140 | )); 141 | 142 | // publishing the type with string ids should work 143 | let mut publish_sink = 144 | , _>>::publish_sink_with_responses( 145 | publisher.clone(), 146 | crate::validators::JsonSchemaValidator::new(SCHEMA).unwrap(), 147 | &mut responses, 148 | ); 149 | 150 | assert!(publish_sink.send(message_one.clone()).await.is_ok()); 151 | assert!(publish_sink.send(message_two.clone()).await.is_ok()); 152 | assert!(publish_sink.send(message_three.clone()).await.is_ok()); 153 | 154 | // if the sink uses buffering, the user should be informed of successful publishes in the 155 | // response sink. 156 | assert_eq!( 157 | vec![ 158 | message_one.clone(), 159 | message_two.clone(), 160 | message_three.clone() 161 | ], 162 | responses 163 | ); 164 | 165 | // Now actually read from the consumer. 166 | // The ordering doesn't necessarily need to be preserved, but for the purpose of this test we 167 | // know that `MockPublisher` does. 168 | assert_eq!( 169 | message_one, 170 | consumer.next().await.unwrap().unwrap().ack().await.unwrap() 171 | ); 172 | assert_eq!( 173 | message_two, 174 | consumer.next().await.unwrap().unwrap().ack().await.unwrap() 175 | ); 176 | assert_eq!( 177 | message_three, 178 | consumer.next().await.unwrap().unwrap().ack().await.unwrap() 179 | ); 180 | 181 | Ok(()) 182 | } 183 | 184 | #[test] 185 | fn publish_sink_is_send() { 186 | let publisher = MockPublisher::new(); 187 | let sink = >>::publish_sink( 188 | publisher, 189 | crate::validators::JsonSchemaValidator::new(SCHEMA).unwrap(), 190 | ); 191 | crate::tests::assert_send_val(&sink); 192 | } 193 | -------------------------------------------------------------------------------- /src/tests/mod.rs: -------------------------------------------------------------------------------- 1 | #![cfg(test)] 2 | 3 | pub(crate) mod google; 4 | pub(crate) mod json; 5 | 6 | pub(crate) fn assert_error() {} 7 | pub(crate) fn assert_send_val(_: &T) {} 8 | -------------------------------------------------------------------------------- /src/validators/json_schema.rs: -------------------------------------------------------------------------------- 1 | use std::time::SystemTime; 2 | use uuid::Uuid; 3 | use valico::json_schema::Scope; 4 | 5 | use crate::{Headers, ValidatedMessage}; 6 | 7 | /// Errors that may occur when validating messages using a JSON schema. 8 | #[derive(Debug, thiserror::Error)] 9 | #[non_exhaustive] 10 | #[cfg_attr(docsrs, doc(cfg(feature = "json-schema")))] 11 | pub enum JsonSchemaValidatorError { 12 | /// Unable to deserialize the schema 13 | #[error("unable to deserialize the schema")] 14 | SchemaDeserialize(#[source] serde_json::Error), 15 | /// Unable to compile the schema 16 | #[error("unable to compile the schema")] 17 | SchemaCompile(#[source] valico::json_schema::SchemaError), 18 | /// Could not parse a schema URL 19 | #[error("could not parse `{1}` as a schema URL")] 20 | SchemaUrlParse(#[source] url::ParseError, String), 21 | /// Could not resolve the schema URL 22 | #[error("could not resolve `{0}` to a schema")] 23 | SchemaUrlResolve(url::Url), 24 | /// Could not serialize message data 25 | #[error("could not serialize the message data")] 26 | SerializeData(#[source] serde_json::Error), 27 | /// Could not validate message data 28 | #[error("message data does not validate per the schema: {0}")] 29 | ValidateData(String), 30 | } 31 | 32 | /// Validator that validates JSON payloads according to a provided [JSON Schema]. 33 | /// 34 | /// [JSON Schema]: https://json-schema.org/ 35 | #[cfg_attr(docsrs, doc(cfg(feature = "json-schema")))] 36 | pub struct JsonSchemaValidator { 37 | scope: Scope, 38 | } 39 | 40 | impl JsonSchemaValidator { 41 | /// Construct a new JSON schema validator. 42 | /// 43 | /// The `schema` argument must contain the JSON-encoded JSON-schema. 44 | pub fn new(schema: &str) -> Result { 45 | Self::from_reader(std::io::Cursor::new(schema)) 46 | } 47 | 48 | /// Construct a new JSON schema validator. 49 | pub fn from_reader(schema: R) -> Result 50 | where 51 | R: std::io::Read, 52 | { 53 | Self::from_json( 54 | serde_json::from_reader(schema).map_err(JsonSchemaValidatorError::SchemaDeserialize)?, 55 | ) 56 | } 57 | 58 | /// Construct a new JSON schema validator. 59 | pub fn from_json( 60 | schema: serde_json::Value, 61 | ) -> Result { 62 | let mut scope = Scope::new(); 63 | scope 64 | .compile(schema, false) 65 | .map_err(JsonSchemaValidatorError::SchemaCompile)?; 66 | Ok(JsonSchemaValidator { scope }) 67 | } 68 | 69 | /// Validate the JSON payload using JSON schema and construct a [`ValidatedMessage`]. 70 | pub fn validate( 71 | &self, 72 | id: Uuid, 73 | timestamp: SystemTime, 74 | schema: &'static str, 75 | headers: Headers, 76 | data: &M, 77 | ) -> Result { 78 | // convert user.created/1.0 -> user.created/1.* 79 | let msg_schema_trimmed = schema.trim_end_matches(char::is_numeric); 80 | let msg_schema_url = if msg_schema_trimmed != schema { 81 | let wildcard_url = String::from(msg_schema_trimmed) + "*"; 82 | url::Url::parse(&wildcard_url) 83 | .map_err(|e| JsonSchemaValidatorError::SchemaUrlParse(e, wildcard_url))? 84 | } else { 85 | url::Url::parse(schema) 86 | .map_err(|e| JsonSchemaValidatorError::SchemaUrlParse(e, schema.into()))? 87 | }; 88 | let msg_schema = self 89 | .scope 90 | .resolve(&msg_schema_url) 91 | .ok_or(JsonSchemaValidatorError::SchemaUrlResolve(msg_schema_url))?; 92 | let value = serde_json::to_value(data).map_err(JsonSchemaValidatorError::SerializeData)?; 93 | let validation_state = msg_schema.validate(&value); 94 | if !validation_state.is_strictly_valid() { 95 | return Err(JsonSchemaValidatorError::ValidateData(format!( 96 | "{:?}", 97 | validation_state 98 | ))); 99 | } 100 | Ok(ValidatedMessage::new( 101 | id, 102 | timestamp, 103 | schema, 104 | headers, 105 | serde_json::to_vec(&value).map_err(JsonSchemaValidatorError::SerializeData)?, 106 | )) 107 | } 108 | } 109 | 110 | #[cfg(test)] 111 | mod tests { 112 | use super::*; 113 | use crate::{tests::json::*, EncodableMessage}; 114 | use uuid::Uuid; 115 | 116 | #[test] 117 | fn invalid_data_type() { 118 | let validator = JsonSchemaValidator::new(SCHEMA).unwrap(); 119 | let message = JsonUserCreatedMessage { 120 | uuid: Uuid::new_v4(), 121 | schema: "https://hedwig.corp/schema#/schemas/user.created/1.0", 122 | user_id: 123u64, 123 | time: SystemTime::now(), 124 | headers: Headers::new(), 125 | }; 126 | assert!(matches!( 127 | message.encode(&validator).err(), 128 | Some(JsonSchemaValidatorError::ValidateData(_)) 129 | )); 130 | } 131 | 132 | #[test] 133 | fn missing_schema() { 134 | let validator = JsonSchemaValidator::new(SCHEMA).unwrap(); 135 | let message = JsonUserCreatedMessage { 136 | uuid: Uuid::new_v4(), 137 | schema: "https://hedwig.corp/schema#/schemas/user.created/2.0", 138 | user_id: String::from("123"), 139 | time: SystemTime::now(), 140 | headers: Headers::new(), 141 | }; 142 | assert!(matches!( 143 | message.encode(&validator).err(), 144 | Some(JsonSchemaValidatorError::SchemaUrlResolve(_)) 145 | )); 146 | } 147 | 148 | #[test] 149 | fn overbroad_schema_url() { 150 | let validator = JsonSchemaValidator::new(SCHEMA).unwrap(); 151 | let message = JsonUserCreatedMessage { 152 | uuid: Uuid::new_v4(), 153 | schema: "https://hedwig.corp/schema#/schemas/user.created/*", 154 | user_id: String::from("123"), 155 | time: SystemTime::now(), 156 | headers: Headers::new(), 157 | }; 158 | assert!(matches!( 159 | message.encode(&validator).err(), 160 | Some(JsonSchemaValidatorError::SchemaUrlResolve(_)) 161 | )); 162 | } 163 | 164 | #[test] 165 | fn exact_schema_url_wildcard() { 166 | let validator = JsonSchemaValidator::new(SCHEMA).unwrap(); 167 | let message = JsonUserCreatedMessage { 168 | uuid: Uuid::new_v4(), 169 | schema: "https://hedwig.corp/schema#/schemas/user.created/1.*", 170 | user_id: String::from("123"), 171 | time: SystemTime::now(), 172 | headers: Headers::new(), 173 | }; 174 | message.encode(&validator).expect("should work"); 175 | } 176 | 177 | #[test] 178 | fn invalid_schema_url() { 179 | let validator = JsonSchemaValidator::new(SCHEMA).unwrap(); 180 | let message = JsonUserCreatedMessage { 181 | uuid: Uuid::new_v4(), 182 | schema: "hedwig.corp/schema#/schemas/user.created/1.*", 183 | user_id: String::from("123"), 184 | time: SystemTime::now(), 185 | headers: Headers::new(), 186 | }; 187 | assert!(matches!( 188 | message.encode(&validator).err(), 189 | Some(JsonSchemaValidatorError::SchemaUrlParse(..)) 190 | )); 191 | } 192 | 193 | #[test] 194 | fn errors_send_sync() { 195 | crate::tests::assert_error::(); 196 | } 197 | 198 | #[test] 199 | fn validation_retains_timestamp() { 200 | let validator = JsonSchemaValidator::new(SCHEMA).unwrap(); 201 | let timestamp = SystemTime::UNIX_EPOCH + std::time::Duration::from_secs(42); 202 | let validated = validator 203 | .validate( 204 | Uuid::new_v4(), 205 | timestamp, 206 | "https://hedwig.corp/schema#/schemas/user.created/1.*", 207 | Headers::new(), 208 | &serde_json::json!({ "user_id": "123" }), 209 | ) 210 | .expect("ok"); 211 | assert_eq!(validated.timestamp(), ×tamp); 212 | } 213 | 214 | #[test] 215 | fn validation_retains_headers() { 216 | let validator = JsonSchemaValidator::new(SCHEMA).unwrap(); 217 | let headers = vec![("hello", "world"), ("123", "456")] 218 | .into_iter() 219 | .map(|(k, v)| (k.into(), v.into())) 220 | .collect::(); 221 | let validated = validator 222 | .validate( 223 | Uuid::new_v4(), 224 | SystemTime::UNIX_EPOCH, 225 | "https://hedwig.corp/schema#/schemas/user.created/1.*", 226 | headers.clone(), 227 | &serde_json::json!({ "user_id": "123" }), 228 | ) 229 | .expect("ok"); 230 | assert_eq!(validated.headers(), &headers); 231 | } 232 | 233 | #[test] 234 | fn validation_retains_uuid() { 235 | let validator = JsonSchemaValidator::new(SCHEMA).unwrap(); 236 | let uuid = Uuid::new_v4(); 237 | let validated = validator 238 | .validate( 239 | uuid, 240 | SystemTime::UNIX_EPOCH, 241 | "https://hedwig.corp/schema#/schemas/user.created/1.*", 242 | Headers::new(), 243 | &serde_json::json!({ "user_id": "123" }), 244 | ) 245 | .expect("ok"); 246 | assert_eq!(validated.uuid(), &uuid); 247 | } 248 | } 249 | -------------------------------------------------------------------------------- /src/validators/mod.rs: -------------------------------------------------------------------------------- 1 | //! Implementations of validators. 2 | //! 3 | //! Validators are responsible for ensuring the message payload is valid according some description 4 | //! and then constructing instances of [`ValidatedMessage`] that contain the encoded data in some 5 | //! on-wire format. 6 | //! 7 | //! [`ValidatedMessage`]: crate::ValidatedMessage 8 | 9 | #[cfg(feature = "json-schema")] 10 | mod json_schema; 11 | #[cfg(feature = "json-schema")] 12 | pub use self::json_schema::*; 13 | 14 | #[cfg(feature = "prost")] 15 | pub mod prost; 16 | #[cfg(feature = "prost")] 17 | pub use self::prost::{ProstDecodeError, ProstDecoder, ProstValidator, ProstValidatorError}; 18 | -------------------------------------------------------------------------------- /src/validators/prost.rs: -------------------------------------------------------------------------------- 1 | //! Validation and decoding for messages encoded with protobuf using [`prost`](::prost) 2 | //! 3 | //! ``` 4 | //! use hedwig::validators::prost::{ProstValidator, ProstDecoder, ExactSchemaMatcher}; 5 | //! # use uuid::Uuid; 6 | //! # use std::time::SystemTime; 7 | //! 8 | //! #[derive(Clone, PartialEq, ::prost::Message)] 9 | //! struct MyMessage { 10 | //! #[prost(string, tag = "1")] 11 | //! payload: String, 12 | //! } 13 | //! let schema = "my-message.proto"; 14 | //! 15 | //! let message = MyMessage { 16 | //! payload: "foobar".to_owned(), 17 | //! }; 18 | //! 19 | //! // Demonstrate a message going roundtrip through the validator and the decoder 20 | //! 21 | //! let validator = ProstValidator::new(); 22 | //! let validated_message = validator.validate( 23 | //! Uuid::new_v4(), 24 | //! SystemTime::now(), 25 | //! schema, 26 | //! hedwig::Headers::default(), 27 | //! &message, 28 | //! )?; 29 | //! 30 | //! let decoder = ProstDecoder::new( 31 | //! ExactSchemaMatcher::::new(schema) 32 | //! ); 33 | //! let decoded_message = decoder.decode(validated_message)?; 34 | //! 35 | //! assert_eq!(message, decoded_message); 36 | //! 37 | //! # Ok::<_, Box>(()) 38 | //! ``` 39 | 40 | use std::time::SystemTime; 41 | use uuid::Uuid; 42 | 43 | use crate::{Headers, ValidatedMessage}; 44 | 45 | /// Errors that may occur when validating ProtoBuf messages. 46 | #[derive(thiserror::Error, Debug)] 47 | #[error("unable to encode the protobuf payload")] 48 | #[cfg_attr(docsrs, doc(cfg(feature = "prost")))] 49 | pub struct ProstValidatorError(#[source] prost::EncodeError); 50 | 51 | /// Errors that may occur when decoding ProtoBuf messages. 52 | #[derive(thiserror::Error, Debug, PartialEq, Eq)] 53 | #[cfg_attr(docsrs, doc(cfg(feature = "prost")))] 54 | pub enum ProstDecodeError { 55 | /// The message's schema did not match the decoded message type 56 | #[error("invalid schema for decoded message type")] 57 | InvalidSchema(#[source] E), 58 | 59 | /// The message failed to decode from protobuf 60 | #[error(transparent)] 61 | Decode(#[from] prost::DecodeError), 62 | } 63 | 64 | #[derive(Default)] 65 | struct UseNewToConstruct; 66 | 67 | /// Validator that encodes data into protobuf payloads using [`prost`]. 68 | #[derive(Default)] 69 | #[cfg_attr(docsrs, doc(cfg(feature = "prost")))] 70 | pub struct ProstValidator(UseNewToConstruct); 71 | 72 | impl ProstValidator { 73 | /// Construct a new validator. 74 | pub fn new() -> Self { 75 | ProstValidator(UseNewToConstruct) 76 | } 77 | 78 | /// Validate and construct a [`ValidatedMessage`] with a protobuf payload. 79 | pub fn validate( 80 | &self, 81 | id: Uuid, 82 | timestamp: SystemTime, 83 | schema: S, 84 | headers: Headers, 85 | data: &M, 86 | ) -> Result 87 | where 88 | M: prost::Message, 89 | S: Into>, 90 | { 91 | let mut bytes = bytes::BytesMut::new(); 92 | data.encode(&mut bytes).map_err(ProstValidatorError)?; 93 | Ok(ValidatedMessage::new(id, timestamp, schema, headers, bytes)) 94 | } 95 | } 96 | 97 | /// Validator that decodes data from protobuf payloads using [`prost`]. 98 | pub struct ProstDecoder { 99 | schema_matcher: S, 100 | } 101 | 102 | impl ProstDecoder { 103 | /// Create a new decoder with the given [`SchemaMatcher`] 104 | pub fn new(schema_matcher: S) -> Self { 105 | Self { schema_matcher } 106 | } 107 | 108 | /// Decode the given protobuf-encoded message into its structured data 109 | pub fn decode( 110 | &self, 111 | msg: ValidatedMessage, 112 | ) -> Result> 113 | where 114 | S: SchemaMatcher, 115 | S::InvalidSchemaError: std::error::Error + 'static, 116 | M: prost::Message + Default, 117 | { 118 | self.schema_matcher 119 | .try_match_schema(msg.schema()) 120 | .map_err(ProstDecodeError::InvalidSchema)?; 121 | 122 | Ok(M::decode(msg.into_data())?) 123 | } 124 | } 125 | 126 | /// A means of asserting that an incoming message's [`schema`](ValidatedMessage::schema) matches 127 | /// a given message type's deserialized format. 128 | /// 129 | ///``` 130 | /// use hedwig::validators::prost::SchemaMatcher; 131 | /// 132 | /// struct MyMessage { 133 | /// // ... 134 | /// } 135 | /// 136 | /// // SchemaMatcher has a blanket impl over closures 137 | /// let my_matcher = |schema: &str| { 138 | /// // imagine some rudimentary version check 139 | /// if schema.starts_with("messages/my-message/my-schema-") 140 | /// && (schema.ends_with("my-schema-v1.proto") || 141 | /// schema.ends_with("my-schema-v2.proto")) { 142 | /// Ok(()) 143 | /// } else { 144 | /// Err(format!("incompatible schema: {}", schema)) 145 | /// } 146 | /// }; 147 | /// 148 | /// assert_eq!( 149 | /// Ok(()), 150 | /// SchemaMatcher::::try_match_schema( 151 | /// &my_matcher, 152 | /// "messages/my-message/my-schema-v2.proto" 153 | /// ) 154 | /// ); 155 | /// 156 | /// assert_eq!( 157 | /// Err("incompatible schema: messages/my-message/my-schema-v3.proto".to_owned()), 158 | /// SchemaMatcher::::try_match_schema( 159 | /// &my_matcher, 160 | /// "messages/my-message/my-schema-v3.proto" 161 | /// ) 162 | /// ); 163 | ///``` 164 | pub trait SchemaMatcher { 165 | /// The error returned when a given schema does not match the message type 166 | type InvalidSchemaError; 167 | 168 | /// Check whether messages with the given schema are valid for deserializing into the trait's 169 | /// generic message type. 170 | /// 171 | /// Returns an error if the schema does not match 172 | fn try_match_schema(&self, schema: &str) -> Result<(), Self::InvalidSchemaError>; 173 | } 174 | 175 | // blanket impl SchemaMatcher over closures for convenience 176 | impl SchemaMatcher for F 177 | where 178 | F: Fn(&str) -> Result<(), E>, 179 | { 180 | type InvalidSchemaError = E; 181 | 182 | fn try_match_schema(&self, schema: &str) -> Result<(), Self::InvalidSchemaError> { 183 | (self)(schema) 184 | } 185 | } 186 | 187 | /// An error indicating that a received message had a schema which did not match the deserialized 188 | /// message type 189 | #[derive(Debug, Clone, Eq, PartialEq, thiserror::Error)] 190 | #[error("deserialized schema {encountered} does not match expected schema {expected} for type {message_type}")] 191 | pub struct SchemaMismatchError { 192 | expected: &'static str, 193 | encountered: String, 194 | message_type: &'static str, 195 | } 196 | 197 | impl SchemaMismatchError { 198 | /// Create a new error for the given message type 199 | pub fn new(expected: &'static str, encountered: String) -> Self { 200 | SchemaMismatchError { 201 | expected, 202 | encountered, 203 | message_type: std::any::type_name::(), 204 | } 205 | } 206 | } 207 | 208 | /// A [`SchemaMatcher`] which expects all incoming schemas to match exactly one string for the 209 | /// given message type 210 | /// 211 | /// ``` 212 | /// use hedwig::validators::prost::{ExactSchemaMatcher, SchemaMatcher, SchemaMismatchError}; 213 | /// 214 | /// struct MyMessage { 215 | /// // ... 216 | /// } 217 | /// let schema = "messages/my-message/my-schema-v1.proto"; 218 | /// 219 | /// let my_matcher = ExactSchemaMatcher::::new(schema); 220 | /// 221 | /// assert_eq!(Ok(()), my_matcher.try_match_schema(schema)); 222 | /// 223 | /// let bad_schema = "messages/my-message/my-schema-v2.proto"; 224 | /// assert_eq!( 225 | /// Err(SchemaMismatchError::new::( 226 | /// schema, 227 | /// bad_schema.to_owned() 228 | /// )), 229 | /// my_matcher.try_match_schema(bad_schema) 230 | /// ); 231 | ///``` 232 | pub struct ExactSchemaMatcher { 233 | expected_schema: &'static str, 234 | _message_type: std::marker::PhantomData, // instead of to make Send + Sync unconditional 235 | } 236 | 237 | impl ExactSchemaMatcher { 238 | /// Create a new schema matcher with the given expected schema 239 | pub fn new(expected_schema: &'static str) -> Self { 240 | Self { 241 | expected_schema, 242 | _message_type: std::marker::PhantomData, 243 | } 244 | } 245 | } 246 | 247 | impl SchemaMatcher for ExactSchemaMatcher { 248 | type InvalidSchemaError = SchemaMismatchError; 249 | 250 | fn try_match_schema(&self, schema: &str) -> Result<(), Self::InvalidSchemaError> { 251 | if self.expected_schema == schema { 252 | Ok(()) 253 | } else { 254 | Err(SchemaMismatchError::new::( 255 | self.expected_schema, 256 | schema.to_owned(), 257 | )) 258 | } 259 | } 260 | } 261 | --------------------------------------------------------------------------------