├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── doc ├── best_practices.md ├── performance.md ├── producers.md ├── rationale.md ├── serialization.md └── terminology.md ├── project.clj ├── src └── franzy │ ├── clients │ ├── cluster.clj │ ├── codec.clj │ ├── connect │ │ └── schema.clj │ ├── consumer │ │ ├── callbacks.clj │ │ ├── client.clj │ │ ├── defaults.clj │ │ ├── partitioners.clj │ │ ├── protocols.clj │ │ ├── results.clj │ │ ├── schema.clj │ │ └── types.clj │ ├── partitions.clj │ └── producer │ │ ├── callbacks.clj │ │ ├── client.clj │ │ ├── defaults.clj │ │ ├── partitioners.clj │ │ ├── protocols.clj │ │ ├── schema.clj │ │ └── types.clj │ └── serialization │ ├── deserializers.clj │ └── serializers.clj └── test └── franzy ├── clients ├── cluster_tests.clj ├── consumer │ ├── client_tests.clj │ └── schema_tests.clj ├── decoding_tests.clj ├── partitions_tests.clj └── producer │ └── schema_test.clj ├── core_test.clj └── serialization └── serialization_tests.clj /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | /data 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | onyx.log-[0-9]* 12 | hs_err_pid* 13 | *.log 14 | .#* 15 | .DS_Store 16 | *.jfr 17 | log_artifact 18 | /doc/api 19 | log4j.properties 20 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/). 3 | 4 | ## [0.0.2] - 2016-03-12 5 | 6 | ### Added 7 | 8 | - Parsers for connection data/connection strings 9 | - Cluster Metadata support and associated functionality 10 | - Helpers for working with topic partitions 11 | - Ability to create a partition destination to test/develop or for special use-cases for partitioners 12 | - Tweaked schema slightly for better performance and got rid of some calls to deprecated schema functions 13 | 14 | ### Fixed 15 | 16 | - Fixed decode/encode flipped in one of the artities for consumer constructor 17 | 18 | ## [0.0.1] - 2016-03-11 19 | ### Added 20 | 21 | - Initial Release 22 | 23 | [0.0.1]: https://github.com/ymilky/franzy/compare/0.0.1...0.0.1 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Eclipse Public License - v 1.0 2 | 3 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 4 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 5 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 6 | 7 | 1. DEFINITIONS 8 | 9 | "Contribution" means: 10 | 11 | a) in the case of the initial Contributor, the initial code and documentation 12 | distributed under this Agreement, and 13 | b) in the case of each subsequent Contributor: 14 | i) changes to the Program, and 15 | ii) additions to the Program; 16 | 17 | where such changes and/or additions to the Program originate from and are 18 | distributed by that particular Contributor. A Contribution 'originates' 19 | from a Contributor if it was added to the Program by such Contributor 20 | itself or anyone acting on such Contributor's behalf. Contributions do not 21 | include additions to the Program which: (i) are separate modules of 22 | software distributed in conjunction with the Program under their own 23 | license agreement, and (ii) are not derivative works of the Program. 24 | 25 | "Contributor" means any person or entity that distributes the Program. 26 | 27 | "Licensed Patents" mean patent claims licensable by a Contributor which are 28 | necessarily infringed by the use or sale of its Contribution alone or when 29 | combined with the Program. 30 | 31 | "Program" means the Contributions distributed in accordance with this 32 | Agreement. 33 | 34 | "Recipient" means anyone who receives the Program under this Agreement, 35 | including all Contributors. 36 | 37 | 2. GRANT OF RIGHTS 38 | a) Subject to the terms of this Agreement, each Contributor hereby grants 39 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 40 | reproduce, prepare derivative works of, publicly display, publicly 41 | perform, distribute and sublicense the Contribution of such Contributor, 42 | if any, and such derivative works, in source code and object code form. 43 | b) Subject to the terms of this Agreement, each Contributor hereby grants 44 | Recipient a non-exclusive, worldwide, royalty-free patent license under 45 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 46 | transfer the Contribution of such Contributor, if any, in source code and 47 | object code form. This patent license shall apply to the combination of 48 | the Contribution and the Program if, at the time the Contribution is 49 | added by the Contributor, such addition of the Contribution causes such 50 | combination to be covered by the Licensed Patents. The patent license 51 | shall not apply to any other combinations which include the Contribution. 52 | No hardware per se is licensed hereunder. 53 | c) Recipient understands that although each Contributor grants the licenses 54 | to its Contributions set forth herein, no assurances are provided by any 55 | Contributor that the Program does not infringe the patent or other 56 | intellectual property rights of any other entity. Each Contributor 57 | disclaims any liability to Recipient for claims brought by any other 58 | entity based on infringement of intellectual property rights or 59 | otherwise. As a condition to exercising the rights and licenses granted 60 | hereunder, each Recipient hereby assumes sole responsibility to secure 61 | any other intellectual property rights needed, if any. For example, if a 62 | third party patent license is required to allow Recipient to distribute 63 | the Program, it is Recipient's responsibility to acquire that license 64 | before distributing the Program. 65 | d) Each Contributor represents that to its knowledge it has sufficient 66 | copyright rights in its Contribution, if any, to grant the copyright 67 | license set forth in this Agreement. 68 | 69 | 3. REQUIREMENTS 70 | 71 | A Contributor may choose to distribute the Program in object code form under 72 | its own license agreement, provided that: 73 | 74 | a) it complies with the terms and conditions of this Agreement; and 75 | b) its license agreement: 76 | i) effectively disclaims on behalf of all Contributors all warranties 77 | and conditions, express and implied, including warranties or 78 | conditions of title and non-infringement, and implied warranties or 79 | conditions of merchantability and fitness for a particular purpose; 80 | ii) effectively excludes on behalf of all Contributors all liability for 81 | damages, including direct, indirect, special, incidental and 82 | consequential damages, such as lost profits; 83 | iii) states that any provisions which differ from this Agreement are 84 | offered by that Contributor alone and not by any other party; and 85 | iv) states that source code for the Program is available from such 86 | Contributor, and informs licensees how to obtain it in a reasonable 87 | manner on or through a medium customarily used for software exchange. 88 | 89 | When the Program is made available in source code form: 90 | 91 | a) it must be made available under this Agreement; and 92 | b) a copy of this Agreement must be included with each copy of the Program. 93 | Contributors may not remove or alter any copyright notices contained 94 | within the Program. 95 | 96 | Each Contributor must identify itself as the originator of its Contribution, 97 | if 98 | any, in a manner that reasonably allows subsequent Recipients to identify the 99 | originator of the Contribution. 100 | 101 | 4. COMMERCIAL DISTRIBUTION 102 | 103 | Commercial distributors of software may accept certain responsibilities with 104 | respect to end users, business partners and the like. While this license is 105 | intended to facilitate the commercial use of the Program, the Contributor who 106 | includes the Program in a commercial product offering should do so in a manner 107 | which does not create potential liability for other Contributors. Therefore, 108 | if a Contributor includes the Program in a commercial product offering, such 109 | Contributor ("Commercial Contributor") hereby agrees to defend and indemnify 110 | every other Contributor ("Indemnified Contributor") against any losses, 111 | damages and costs (collectively "Losses") arising from claims, lawsuits and 112 | other legal actions brought by a third party against the Indemnified 113 | Contributor to the extent caused by the acts or omissions of such Commercial 114 | Contributor in connection with its distribution of the Program in a commercial 115 | product offering. The obligations in this section do not apply to any claims 116 | or Losses relating to any actual or alleged intellectual property 117 | infringement. In order to qualify, an Indemnified Contributor must: 118 | a) promptly notify the Commercial Contributor in writing of such claim, and 119 | b) allow the Commercial Contributor to control, and cooperate with the 120 | Commercial Contributor in, the defense and any related settlement 121 | negotiations. The Indemnified Contributor may participate in any such claim at 122 | its own expense. 123 | 124 | For example, a Contributor might include the Program in a commercial product 125 | offering, Product X. That Contributor is then a Commercial Contributor. If 126 | that Commercial Contributor then makes performance claims, or offers 127 | warranties related to Product X, those performance claims and warranties are 128 | such Commercial Contributor's responsibility alone. Under this section, the 129 | Commercial Contributor would have to defend claims against the other 130 | Contributors related to those performance claims and warranties, and if a 131 | court requires any other Contributor to pay any damages as a result, the 132 | Commercial Contributor must pay those damages. 133 | 134 | 5. NO WARRANTY 135 | 136 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN 137 | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR 138 | IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, 139 | NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each 140 | Recipient is solely responsible for determining the appropriateness of using 141 | and distributing the Program and assumes all risks associated with its 142 | exercise of rights under this Agreement , including but not limited to the 143 | risks and costs of program errors, compliance with applicable laws, damage to 144 | or loss of data, programs or equipment, and unavailability or interruption of 145 | operations. 146 | 147 | 6. DISCLAIMER OF LIABILITY 148 | 149 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 150 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 151 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 152 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 153 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 154 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 155 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 156 | OF SUCH DAMAGES. 157 | 158 | 7. GENERAL 159 | 160 | If any provision of this Agreement is invalid or unenforceable under 161 | applicable law, it shall not affect the validity or enforceability of the 162 | remainder of the terms of this Agreement, and without further action by the 163 | parties hereto, such provision shall be reformed to the minimum extent 164 | necessary to make such provision valid and enforceable. 165 | 166 | If Recipient institutes patent litigation against any entity (including a 167 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 168 | (excluding combinations of the Program with other software or hardware) 169 | infringes such Recipient's patent(s), then such Recipient's rights granted 170 | under Section 2(b) shall terminate as of the date such litigation is filed. 171 | 172 | All Recipient's rights under this Agreement shall terminate if it fails to 173 | comply with any of the material terms or conditions of this Agreement and does 174 | not cure such failure in a reasonable period of time after becoming aware of 175 | such noncompliance. If all Recipient's rights under this Agreement terminate, 176 | Recipient agrees to cease use and distribution of the Program as soon as 177 | reasonably practicable. However, Recipient's obligations under this Agreement 178 | and any licenses granted by Recipient relating to the Program shall continue 179 | and survive. 180 | 181 | Everyone is permitted to copy and distribute copies of this Agreement, but in 182 | order to avoid inconsistency the Agreement is copyrighted and may only be 183 | modified in the following manner. The Agreement Steward reserves the right to 184 | publish new versions (including revisions) of this Agreement from time to 185 | time. No one other than the Agreement Steward has the right to modify this 186 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 187 | Eclipse Foundation may assign the responsibility to serve as the Agreement 188 | Steward to a suitable separate entity. Each new version of the Agreement will 189 | be given a distinguishing version number. The Program (including 190 | Contributions) may always be distributed subject to the version of the 191 | Agreement under which it was received. In addition, after a new version of the 192 | Agreement is published, Contributor may elect to distribute the Program 193 | (including its Contributions) under the new version. Except as expressly 194 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 195 | licenses to the intellectual property of any Contributor under this Agreement, 196 | whether expressly, by implication, estoppel or otherwise. All rights in the 197 | Program not expressly granted under this Agreement are reserved. 198 | 199 | This Agreement is governed by the laws of the State of New York and the 200 | intellectual property laws of the United States of America. No party to this 201 | Agreement will bring a legal action under this Agreement more than one year 202 | after the cause of action arose. Each party waives its rights to a jury trial in 203 | any resulting litigation. 204 | -------------------------------------------------------------------------------- /doc/best_practices.md: -------------------------------------------------------------------------------- 1 | # Best Practices 2 | 3 | Some general best practices: 4 | 5 | * Decide early on a data format and serializer/deserializer. It will be difficult to change later on without replaying the complete logs. 6 | * Decide as early as possible on your partitioning and clustering strategies. This will influence how and where you run a Franzy producer or consumer. 7 | * Limit the creation of data structures and objects when possible. This is particularly recommended for your actual consumer and producer instances, as well as callbacks. 8 | * Creating and re-creating producers, consumers, and callbacks is usually an anti-pattern. 9 | * Creating and re-creating objects will lead to lots of garbage collection in addition to frequent reconnects, rebalances, and so on. 10 | * Prefer records over maps, particularly for consumption. You can use records to produce as well if you prefer them over maps, or simply use explicit arities of production functions. 11 | * Keep in mind that most of what you do, ideally, should be called in a loop over and over. Keep this loop predictable and efficient, as any garbage, cache thrashing, etc. will cut your throughput consuming or producing. 12 | * Use transducers to extract and process your results 13 | * Do explore and read about the Kafka config settings. Tweaking them can make a huge difference in performance, but these settings will be relative to your use-case. 14 | * Store your configuration as EDN or using some kind of configuration management library. Minimum dynamically build it or inject it, rather than defining it globally. 15 | * Wrap your consumers and producers in a library like [Component](https://github.com/stuartsierra/component) or [Mount](https://github.com/tolitius/mount). 16 | * Examples: coming soon 17 | * Use log4j and create a log4j.properties or a compatible alternative for Kafka if you need more information about what is going on (wrong) with Kafka. Likewise, as Kafka uses Zookeeper, you may elect to also separately process Zookeeper logs. 18 | * For general logging, I highly recommend [Timbre](https://github.com/ptaoussanis/timbre). 19 | 20 | ## Consumers and Producers 21 | 22 | * Define keys up-front. Many serializers will require you to explicitly provide a key for producer records. 23 | * Use core.async, manifold, or a similar library for asynchronous producing and consuming. Generally, Kafka recommends that you limit consumers and producers to a single thread each. If you have multiple consumers, you could for example have a thread for each. 24 | * Always close your consumer and producer. You can use with-open or a custom macro to close your consumers and producers, but be sure that you do so in try/catch/finally style. This is automatic when using with-open. 25 | * When closing a consumer, be sure to attempt to wake it up from another thread when shutting it down. This prevents the consumer from delaying or deadlocking shut down when stuck in a long-poll. 26 | * Remember to assign your consumers a meaningful group.id 27 | * Distribute consumers and producers across machines if possible. If you use a thread per consumer for example, it is not hard to imagine that with many consumers you may exhaust your JVM threads/thread pools. 28 | * Assign your producer or a consumer a `:client.id` in its configuration. This helps trace problems when they happen and will make it easier for you to understand what is going on in Zookeeper. 29 | * Set your `:metric.num.samples` and `:metric.sample.window.ms` for more useful and possibly better performing metrics to fit your use-case. 30 | * Pass your serializers/deserializers explicitly rather than as a config property strings. Although this is fully supported in Kafka, it is far easier to maintain code that will fail to compile/run because of a changed or missing namespace. 31 | 32 | ### Consumers 33 | 34 | * Set `:fetch.min.bytes` and `:max.partition.fetch.bytes` where possible to prevent consumers from getting stuck or taking long CPU/network cycles. The more predictable your fetches, the better. Be aware if you have large messages, you could cause the consumer to be stuck on a message with too small a max setting. 35 | * If your partitioning is very predictable and constant, that is you know the layout of your data will not change, you can optimize your consumer by providing a `:partition.assignment.strategy` that fits with your use-case. You may use one of the provided or implement your own using the tools provided by Franzy. 36 | * Set `:retry.backoff.ms` to avoid failed fetching hammering the server. For example, if you set a high number of retries per fetch, you can bog down the consumer in a tight loop. 37 | * Set `:fetch.max.wait.ms` to avoid repeated network IO from failed/empty polls. The consumer will block if there isn't enough data as specified by `:fetch.min.bytes`, but answer when it is able to retrieve enough data. For a topic partition that isn't seeing much production where the consumer is near the end of the log, this may be a bad idea, however. 38 | 39 | ### Producers 40 | 41 | * Use batching when possible via `:batch.size` for better performance. Be aware of the trade-offs between small and large batches, and try to make your batches predictable. 42 | 43 | ## Offsets 44 | 45 | * When dealing with offset management and processing in general, try to create idempotent functions and solutions. Kafka in many cases will call callbacks multiple times with the same data. Additionally it may be useful to have an idempotent design when things go wrong and you need to reset the consumer's position to replay the logs. 46 | * Do not subscribe and attempt to manually assign offsets at the same time. They are mutually exclusive. You have been warned, again. 47 | * Do not assume offset 0 is the beginning of a topic. As the log grows, messages may be removed over time. 48 | * If manually managing offsets, ensure you use a high-performance, fault-tolerant, fast data store. Fault-tolerance should be stressed in this situation as losing an offset could cause data loss. 49 | * Ensure you commit offsets in the same thread you are polling. 50 | * It is often a more workable solution for those manually comitting offsets to use batching. Take a few items from your poll in a batch, process them, and commit when you know your batch has ack'd sucessfully in your application code. A consumer poll can return thousands of records and assuming autocommit will always work if you cannot miss data requires that you have great confidence in what is processing your batch. 51 | * One way to get a bit more robust capabilities when doing manual offset committing is to pass batches to a stream processor such as Onyx that can ack when it succeeds. When the job succeeds, you can then commit your offsets to Kafka safely. Some advantages are many stream processors have at-least-once or at-most-once semantics, retries, job durability, job state management, and more to make your life easier. If storing your offsets externally or in a separate process from the consumer, you can consider using the job itself to committ offsets as the last step/complete step. 52 | 53 | 54 | -------------------------------------------------------------------------------- /doc/performance.md: -------------------------------------------------------------------------------- 1 | # Performance 2 | 3 | ## General 4 | 5 | Franzy attempts to make some compromises regarding elegant code, while still trying to maintain general Clojure best practices. 6 | 7 | Performance tuning is an ongoing concern and development activity. Relative to the native Java producer/consumer interfaces it wraps, it should be slower due to its nature. Some performance benefits related to creating results for production and consumption when interfacing with the client may be as or even more efficient than comparable Java, but it will vary on a per-implementation basis. 8 | 9 | ## Serialization 10 | 11 | Serializers are built on well-tested libraries that offer good performance relative to most options. All provided serializers use `deftype` for usability elsewhere and performance or are pure Java. 12 | 13 | Nippy serialization currently offer a good usability-to-compression-to-performance option. 14 | 15 | ## Conversions 16 | 17 | Performance of consumers and producers can be further tweaked by extending and overriding built-in implementations as desired. A compromise of accurate/future-proof conversions vs. performance was made. Originally, all conversions were hand-rolled, but this lead to less extensibility and much uglier code. 18 | 19 | Conversions are primarily done at the time of writing using protocols, with hand-rolled methods only as necessary. Clever methods of using reflection, undocumented metadata, etc. have been intentionally avoided. Likewise, using add-on libraries that make possibly accurate claims about fixing some of the underlying issues are actively avoided for now. 20 | 21 | ## Callbacks 22 | 23 | All provided callbacks are implemented using `deftype` or `reify` for performance and usability. This was done to avoid any extra overhead imposed and to free up optimizing callbacks as a purely user-based exercise. 24 | 25 | More specifically, if your callbacks are slow, it is probably your fault. If your callback is a bottleneck, there is probably an issue with your design. 26 | 27 | ## Construction 28 | 29 | Extreme care and notes are made in the code to avoid recreating objects where possible to avoid excessive GC and allocations. For example, consumers may have callbacks passed on creation to help avoid the temptation to create callbacks in a loop. 30 | 31 | You should always avoid creating/recreating consumers, producers, callbacks, etc. in loops. Instead, pool your objects, close over them, cache, etc. 32 | 33 | ## Validation 34 | 35 | Validation may be enabled/disabled at any time to avoid any performance overhead. See [Schema](https://github.com/plumatic/schema) for more details. 36 | 37 | ## Offset Management 38 | 39 | Franzy offers automatic offset management via Kafka. This is a high-performance and user-friendly option. 40 | 41 | You may also elect to manually manage offsets. If you decide to do so, ensure your data store is both high-performance and reliable. The trade-offs between these are per-application specific. For example, you can still use Zookeeper if desired, or you may elect to use Redis, Aerospike, etc. for offset management. Understand that some datastores are less reliable than others. 42 | 43 | If you are ensure about this choice, please send me a message and I will be happy to discuss it with you. I will not bash specific data stores here, but will happily do so in private. 44 | 45 | ## Partitioning 46 | 47 | Understand Kafka partitioning fully if you want to tweak your implementation. 48 | 49 | Here is a good starting place - [How to Choose the Number of Topics and Partitions in a Kafka Cluster](http://www.confluent.io/blog/how-to-choose-the-number-of-topicspartitions-in-a-kafka-cluster/) 50 | 51 | ## Data Types 52 | 53 | Favor record implementations over maps if possible. Records will provide a better memory footprint and will shine if your operations require large collections of map structures. For instance, if you are creating a large list of topic partitions to make an API call, create them as records. They will work just the same as records. 54 | 55 | Understand that records behave slightly differently than maps. If you don't know why or how this is, please take the time to research the issue. 56 | 57 | ## Crude Producer Benchmark 58 | 59 | Non-scientific benchmark based off another Kafka client benchmark to test on your machine: 60 | 61 | ```clojure 62 | (defn nippy-producer-bench [] 63 | (let [pc {:bootstrap.servers ["127.0.0.1:9092"]} 64 | key-serializer (serializers/keyword-serializer) 65 | value-serializer (nippy-serializers/nippy-serializer) 66 | options (pd/make-default-producer-options) 67 | pr (pt/->ProducerRecord "test-nippy" 0 "key" "data")] 68 | (with-open [p (producer/make-producer pc key-serializer value-serializer options)] 69 | (time 70 | (doseq [i (range 1000000)] 71 | (send-async! p pr)))))) 72 | ``` 73 | 74 | Many factors may affect this and results will vary highly. As stated, extremely non-scientific. 75 | 76 | If you have real-world benchmarks using this library, please let me know. 77 | -------------------------------------------------------------------------------- /doc/producers.md: -------------------------------------------------------------------------------- 1 | # Producers 2 | 3 | For production use, ensure you are not creating/recreating the producer constantly. Additionally, be sure you are not holding any stray references before shutting down the producer. The examples given in Franzy and Franzy-examples are only for getting a feel for what is possible in the API, actual usage will vary greatly depending on your data flow. 4 | 5 | A common pattern for producers is to either directly accept input for production or to take values off of some queuing mechanism, for example a core.async channel. Your durability and latency requirements should reflect this choice. Once you have a value, at the simplest level, production involves sending your value with some information about where it should go - a topic, partition, and a key. 6 | 7 | Kafka guarantees ordering within a partition, but not between partitions. You can manually select a partition to send data to, rely on a partitioning algorithm you've configured via the producer configuration, or call some other function in your code that will select a partition. The `default-partitioner' function in the Franzy producer package is one example of a partitioner (built-in). The key you provide will help a partitioner decide in what partition to place your value. 8 | 9 | In practice, it is generally best to know up front where you are sending your data and why. For example, to provide parallelism to scale consumers, you might elect to place different user data in different partitions. Selecting a partition per user would usually be a bad idea and is a common misconception for beginners. Among many reasons, you will eventually hit scalability limits that are linked more to the underlying file descriptors that need to be allocated than anything else in Kafka. Instead, when deciding how to partition your data when producing, you should think how you can bucket data in ways that make sense for both your ordering and scalability needs. If you have for example a few super users who cause logjams in your consumers, you might elect to spread their data into different partitions. 10 | 11 | A producer can produce data by sending a producer record to Kafka. A producer record can be passed to the `send-sync!` or `send-async!` protocols as a map, producer record type, or as explicit parameters. You should explicitly provide the topic, partition, key, and value if you know up front where your data should go. If you are a more advanced user and want to let a Kafka partitioner do the job, you may provide only the topic and value, or topic, key, and value depending on the partitioner implementation and data format. 12 | 13 | ## Partitioners 14 | 15 | ### Default Partitioner 16 | 17 | The default partitioning strategy is as follows, via (DefaultPartitioner): 18 | 19 | * If a partition is specified in the record, use it 20 | * If no partition is specified but a key is present choose a partition based on a hash of the key 21 | * If no partition or key is present choose a partition in a round-robin fashion 22 | 23 | Take special notice of the case when the key is present - your key will be hashed with murmur2. 24 | 25 | ### Rolling Your Own Partitioner 26 | 27 | If you want to avoid calling your own partition function each time to calculate a partition before you produce, you can provide a `partitioner.class` key to your producer configuration. This class should be discoverable on your classpath, given the string value fully-qualified class name for the configuration key. 28 | 29 | You will need to implement the Partitioner interface. You of course can roll this in pure Java, or simply use deftype/gen-class and if needed, AOT compilation. Of course there's nothing preventing you from instead just manually calling your own function, but the advantage to providing it via configuration is it prevents any of your client code from missing this curcial step which would lead to undesirable effects in your partitioning strategy given an error. 30 | 31 | As a best practice, you should decide your partitioning strategy before creating your topic and adding data to partitions. Failure to do so may lead to situations where your pre-existing data is not partitioned according to the same strategy as future data. Since your partitioning strategy will play a role in how the data is consumed and in what order (per partition), this can have some dire consequences in some systems. If you forget, a simple fix is to replay your log until the point in time where you changed your partitioning strategy, and then write the old records into new partitions if possible, or replay all the data into a new topic. The former is doable only if the old data does not need to be ordered before existing data or you are going to rewrite all the day. The latter solution is usually cleaner and easier to implement. 32 | 33 | ## Encapsulating a Producer 34 | 35 | Typically you will want to encapsulate your producer somehow to be able to maintain a reference to it and avoid recreating it as previously discussed. 36 | 37 | It is highly recommended not to globally declare your producer using def or defonce as a singleton if possible. This often can create subtle bugs and shutdown issues, and can result in multiple instantiations of the same producer do to the namespace being evaluated at different points. 38 | 39 | The most common patterns to safely manage your producer are one of the following (but not limited to): 40 | 41 | * Component 42 | * Mount 43 | * Atom/Maps 44 | 45 | Usually you should construct the producer inside whatever own it via the config and pass any supporting data into a construction method as well such as configuration for core.async channel sizes, producer options, topics, partitioning strategies, and other dependencies. In the case of component, this usually just means a simple make/new function for the component in conjunction with the component's start protocol implementation. 46 | 47 | ## Possible Flow 48 | 49 | The component strategy has many permutations and is similar to the other strategies. A common pattern I have used is to create a component with a thread or go-loop set to a key(s) in the component. You will also have a few channels for input, output, errors, control (kill/pause/etc), as keys in the component. The thread/go-loop will take values from an inbox, usually a core.async channel, process them, and write output such as acks to one or more output channels. 50 | 51 | The acks can then be used to notify other parts of your application such as a UI that the write to Kafka succeeded. Moreover, you may have other threads and go-loops that manage the ack data to write this information to another store, for example Redis or Cassandra, or simply to notify other parts of your application more directly. 52 | 53 | If production fails, you can retry either by looping again before taking another inbox value or by using the mechanisms provided by Kafka itself to retry. If you cannot proceed on a failure due to durability requirements or a network outage, then simply close the producer and act accordingly in your application. It is very important that you always consider how you will cleanup your resources. In a component, this is down during the stop protocol. Your actual implementation may vary a lot depending on how you manage this phase. For example you may elect to take values from your loops to block until they shut down. If you have high durability requirements, you may need to flush your channels completely before a clean shutdown can happen. In other words, if you are using an input channel, you need to make sure it has no pending values left before shutting down the production loop. 54 | 55 | For shutdown, always be sure you cleanup thing in the proper order and be sure to close: 56 | 57 | * Threads/Channels that you own, i.e. not from the consumer-side of the channel 58 | * The producer, via close. Note that close can take an optional timeout via the producer options or directly in the 1-arity close method. 59 | * Any other resources such as open connections to databases, file systems, etc. 60 | 61 | Finally, again remember that if you are queueing values, it is your responsibility to decide if a shutdown requires emptying the queue first or discarding any unprocessed data. This issue is a common mistake I've come across in production code for Kafka. 62 | 63 | 64 | ...more soon. 65 | -------------------------------------------------------------------------------- /doc/rationale.md: -------------------------------------------------------------------------------- 1 | # Rationale 2 | 3 | Franzy was created to be a self-serving Kafka client with a few specific goals in mind that differ from existing Kafka clients in various ways. It turns out that selfishness is sometimes useful to other people. With that in mind, some of the useful bits from a larger app using this code base were extracted, while the more specific bits continue to be selfish. Maybe one day they will find a home here. 4 | 5 | Many people might say, why another Clojure client? There are many reasons, some outlined below. The simple version is I wrote a lot of code for my own project and I was nice enough to open-source. You can be nice enough to either use it or not use it, but don't complain. 6 | 7 | I started really getting annoyed with certain clients, including the official clients. At one point I needed something very high-performance, so I started writing a low-level client from scratch in Rust. While it ran awesome for the basics I implemented, I decided I had no interest in maintaining it. Thus, I decided to suck it up and accept that wrappers while crude free me from dealing with most issues while still allowing a decent interface. 8 | 9 | As the Kafka 0.9 client release was announced, I started developing a wrapper directly from their git repo. Months later, I open sourced it and it's what you see now. I have a lot of other projects on my plate, and my interest was a feature complete Kafka platform in addition to the consumer and producer. I hope that's what I have provided and that others will step in to help improve and maintain it. I have a lot of suggestions and ideas, so there's much room for improvement. 10 | 11 | ## General 12 | 13 | * Pure Clojure data structures, no Kafka Java API objects or otherwise floating around for users of this library to worry about managing 14 | * Mostly Clojure style conventions - keyword keys, lowercase functions and keys, Clojure data structures as parameters 15 | * Transparent results regardless of format to/from Kafka - Clojure in, Clojure out 16 | * Do not reinvent the wheel too much - accomplished via wrapping the existing, battle-tested Java APIs as much as possible 17 | * Franzy originally started implementing the from scratch including the protocol, and while this proved to be faster with more elegant concurrency possibilities, I have no resources to support such a large endeavour. If you plan to do this, I would be happy to collaborate and add to Franzy. 18 | * Although writing a low-level client is fast and interesting, it has been my experience as well as many others that these implementations are often lacking features and worse, highly problematic and incorrect. 19 | * As of this writing, I feel Kafka is a bit in flux on some things and better to let the core developers implement it rather than always being a few steps behind. 20 | * Give the option of either storing offsets in Kafka or in your own datastore 21 | * Avoid forcing a particular datastore on users such as Zookeeper or Redis, within reason 22 | * Extensibility 23 | * Allow people to easily add serializers, useful callbacks, offset management strategies, partitioning strategies, etc. 24 | * Use what is already done toward the goal of extensibility in the official Java client, but wrap it in a more Clojure-friendly way 25 | * Easy integration with core.async, manifold, built-in Java/Clojure libraries, or any other async libraries 26 | * Make data available as persistent collections and support transducers toward this goal 27 | 28 | ## Compatibility 29 | 30 | * Provide a Kafka client with 0.9 (and hopefully above) compatibility 31 | * Support a few important APIs that may be deprecated, but are at least important in the short-term to getting things done 32 | * Clojure 1.8 (and above) support 33 | 34 | ## Performance 35 | 36 | * Reasonable performance, bearing in mind overhead of conversions and wrapping Java in Clojure. 37 | * Avoid realizing lazy results when unnecessary, likewise avoiding lazy overhead when unnecessary. 38 | * No neat Clojure tricks that add needless overhead to key functions. For example, no extra apply tricks and giant cond blocks for producer and consumer related operations for the sake of allowing many ways to call the same thing. 39 | * Ability to toggle validation on/off as required. Sometimes, we like to swim in shark infested waters. 40 | 41 | ## Stability and Usage 42 | 43 | * Validation of configurations and important API data structures. 44 | * No guessing what a map, vector, etc. is supposed to contain or having to refer to the code just to know a parameter type. 45 | * Reasonable amount of schema/type hints. 46 | 47 | ## Integrations 48 | 49 | * Ability to be integrated easily into plug-ins and add-ons for stream processing related libraries and other things that typically interact closely with Kafka 50 | * Easy [Onyx](https://github.com/onyx-platform/onyx) input/output task integration via plug-in 51 | * Granular control of serialization, deserialization options, batch size, offset seek, and more via catalog 52 | * No unnecessary dependencies pulled into Onyx 53 | * No forcing a particular format such as json to/from Onyx 54 | 55 | ## Other Clients 56 | 57 | There are a number of other excellent clients that already exist and I suggest you happily use them instead of this one if it makes sense for you. This includes clients I could have used for Java, Clojure, Scala, and Node.js among other languages. 58 | 59 | For Clojure, I found the following clients to be useful in various ways, please thank the authors and/or use their clients instead of this one: 60 | 61 | * [clj-kafka](https://github.com/pingles/clj-kafka) - Great client that I used in many other projects 62 | * [Kinsky](https://github.com/pyr/kinsky) - 0.9 and basic async support/example 63 | * [Kafka-Fast](https://github.com/gerritjvv/kafka-fast) - Low-level, excellent, fast client that uses Redis for offset management 64 | 65 | ## Why Another? 66 | 67 | Unfortunately, no existing clients met the various points listed in the sections above. The following section is by no means directed at/only at Clojure clients, but rather clients as a whole. Anything listed here should also apply to Franzy as well (if not already fixed/doesn't apply) and be an issue or work-item. 68 | 69 | Generally, a lot of authors, especially those that wrap clients have a nasty habit of dropping functionality. Key areas include security, partitioning, and many of the more advanced settings that users should be using. Kafka makes it very easy to create a configuration and client, then make it work, however this does not mean it will work optimally. Some examples of dropped behavior include overloads, important constructors, etc. While this is understandable, there is often a reason why the source implementations provide this functionality and it is often not just syntatic sugar. 70 | 71 | Without getting overly specific, many other clients did one or more of the following to make themselves less suitable to my needs: 72 | 73 | * No recent commits or activity otherwise 74 | * Outdated dependencies such as Clojure 75 | * Indecisive about supporting new versions of Kafka 76 | * Slow, too much overhead, or too much extra code for inter-op with Clojure 77 | * Broken on Kafka 0.9 and above or no support for 0.9 consumer API 78 | * Did not follow the Kafka protocol spec correctly - relevant only to any non-wrapped clients 79 | * Mistakes in consumer rebalancing, threading, clustering, and otherwise doing things that made life impossible 80 | * Missing key information in result sets like offsets, metadata, etc. and no ability to fetch them easily without breaking the abstraction or spinning what amounted to an entirely new client anyway 81 | * Slow/neutered/incorrect/baked-in serialization - many clients force unnecessary dependencies for serializers, don't use them efficiently, neglect streams, etc. 82 | * Realizing lazy data eagerly when not necessary or even consistent with Kafka itself 83 | * No support for both synchronous and asynchronous consumption 84 | * Flaws in key algorithms such as offset management and consumer positioning 85 | * API warts such as requiring inline conversions to byte arrays for producers 86 | * Crippled security settings in consumer, producer, admin, etc. SSL support at a minimum should be a non-starter, even if you are not currently required to use it 87 | * Memory leaks, connection leaks, bizarre semantics that are non-standard to the interfaces the client is written against, etc. 88 | * Dropped data, especially metadata from many API method requests/responses. While the author may not have needed that data, many of us do. 89 | -------------------------------------------------------------------------------- /doc/serialization.md: -------------------------------------------------------------------------------- 1 | # Serialization 2 | 3 | ## Usage 4 | 5 | Serializers/deserializers can be passed in the factory function when creating a consumer or producer. 6 | 7 | Your usage of serializer and options should always be symmetrical. That is, whatever you encode, you must be able to decode. Do not for example mix a nippy serializer and a fressian deserializer. This applies at the partition level, but it is recommended you also do not mix across partitions as your consumer will be unable to handle the results of an encoding it is not prepared to decode. Likewise, the same applies to a producer. 8 | 9 | Alternatively, you may specify the serializer/deserializer via qualified class reference by using the Kafka config properties: 10 | 11 | | Name | Description | Example | 12 | |----------------------|--------------------------|--------------------------------------------------------------| 13 | | `value.serializer` | serializes your values | `"org.apache.kafka.common.serialization.StringSerializer"` | 14 | | `key.serializer` | serializes your keys | `"org.apache.kafka.common.serialization.LongSerializer"` | 15 | | `value.deserializer` | deserializes your values | `"org.apache.kafka.common.serialization.StringDeserializer"` | 16 | | `key.deserializer` | deserializes your keys | `"org.apache.kafka.common.serialization.LongDeserializer"` | 17 | 18 | ## Serializers/Deserializers 19 | 20 | Please create a work item if there is a serializer you need that is missing. One of the goals of Franzy is to provide comprehensive serialization solutions that can be added as libraries to minimize the impact of serializer related dependencies and development roadblocks. 21 | 22 | The following serializers/deserializers are currently available or planned for your use: 23 | 24 | | Name | Speed | Compression | Ease of Use | 25 | |----------------------|----------------------------|--------------------------|----------------------------------------------------------------------------------| 26 | | EDN | great | minimal, but can compose | great | 27 | | String | great | none | poor, useful for keys or simple data only | 28 | | Keyword | great | none | poor, useful for keys or simple data only | 29 | | Integer | great | none | poor, useful for keys or simple data only | 30 | | Long | great | none | poor, useful for keys or simple data only | 31 | | Byte Array | great | none, but can compose | worst, hard in-line code, but versatile | 32 | | Simple EDN | good, great for small data | minimal, but can compose | great, but can lead to easy OOM error | 33 | | JSON | good | none/native json | good, but requires settings/handlers for complex types/preservations of types | 34 | | JSON Smile | ok | good | good, but same as JSON with possible risk of Smile-issues | 35 | | Nippy | very good | great | great, probably the best current balance of speed and compression | 36 | | Fressian | good | very good | good, but prefer nippy in most cases unless domain reasons | 37 | | Debug | poor/depends | depends | ok, not intended for production use, but can compose anything and pass back info | 38 | | Transit | great | good | great, but should match use case, otherwise prefer nippy | 39 | | Avro | great | great | poor, a bit pedantic and requires up-front schema | 40 | | UUID/SQUID (planned) | great | none | poor, useful for keys or simple data only | 41 | | Gzip (planned) | great | very good | prefer nippy, unless specific domain reason | 42 | 43 | # Available Serializers 44 | 45 | * EDN - built-in, good for Clojure data values 46 | * Simple EDN - built-in, good for small Clojure values 47 | * Integer - built-in, good for keys 48 | * Long - built-in, good for keys 49 | * String built-in, good for keys 50 | * Byte Array - built-in, good for values, use if you want to manually handle de/serialization for some strange reason 51 | * Keyword - built-in, good for keys 52 | * Debug - built-in, good for debugging, a shocker - can compose other serializers and log the output 53 | * [Franzy-JSON](https://github.com/ymilky/franzy-json) - JSON Serialization with optional Smile support 54 | * [Franzy-Nippy](https://github.com/ymilky/franzy-nippy) - Nippy serialization - *highly recommended* 55 | * [Franzy-Fressian](https://github.com/ymilky/franzy-fressian) - [Fressian](https://github.com/Datomic/fressian) serialization, especially useful for those integrating with [Datomic](http://www.datomic.com) 56 | * [Franzy Transit](https://github.com/ymilky/franzy-transit) - [Transit]() with support for JSON, JSON-verbose, msgpack 57 | * [Franzy Avro](https://github.com/ymilky/franzy-avro) - [Avro](https://avro.apache.org/) with support for EDN 58 | -------------------------------------------------------------------------------- /doc/terminology.md: -------------------------------------------------------------------------------- 1 | # Basic Terminology 2 | 3 | The following terminology is used heavily in this library, documentation, and code comments: 4 | 5 | * Topic - feeds of messages in categories 6 | * Partition - Divides topics, for consumption, load balancing, etc. Each partition may exist on a separate machine, or the same machine. Multiple consumers may read from multiple partitions. 7 | * Offset - A position in a particular topic partition, used on a per-consumer basis 8 | * Partitioner/Partition Strategy - How data will be distributed within Kafka. 9 | * Consumer Group - a logical grouping of consumers, used to implement queuing and publish-subscribe semantics, depending on the consumer. 10 | * Assignment - The topic, partition, and offset a consumer is assgined to 11 | * Consumer Rebalance - Occurs when consumers come to a consensus on which consumer is consuming which partitions. Triggered on each addition or removal of both broker nodes and other consumers within the same consumer group. 12 | * Broker - A node in a Kafka cluster. These will be specified in your connection parameters when you use a producer, consumer, or other APIs that need to interact with the Kafka cluster. 13 | * Producer - processes that publishes message. 14 | * Consumer - process that consumes a message. 15 | * Producer Record - a message that will be sent by the producer to a topic. 16 | * Consumer Record - a message that will be returned to a consumer from a topic. 17 | * Topic Partition - a logical grouping of a topic and any valid partition. 18 | * Leader - each partition has one server that acts as the leader, and zero or more servers as followers. 19 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject ymilky/franzy "0.0.2-SNAPSHOT" 2 | :description "Clojure Kafka client with support for Kafka producer, consumer, rebalancing, administration, and validation." 3 | :url "https://github.com/ymilky/franzy" 4 | :author "ymilky and others, but see README" 5 | :license {:name "Eclipse Public License" 6 | :url "http://www.eclipse.org/legal/epl-v10.html"} 7 | :repositories {"snapshots" {:url "https://clojars.org/repo" 8 | :username :env 9 | :password :env 10 | :sign-releases false} 11 | "releases" {:url "https://clojars.org/repo" 12 | :username :env 13 | :password :env 14 | :sign-releases false}} 15 | :dependencies [[org.clojure/clojure "1.8.0"] 16 | [prismatic/schema "1.0.5"] 17 | [org.apache.kafka/kafka-clients "0.9.0.1"] 18 | [ymilky/franzy-common "0.0.2-SNAPSHOT"]] 19 | :plugins [[lein-codox "0.9.4"]] 20 | :codox {:metadata {:doc/format :markdown} 21 | :doc-paths ["README.md"] 22 | :output-path "doc/api"} 23 | :profiles {:dev 24 | {:dependencies [[midje "1.7.0"] 25 | [com.taoensso/timbre "4.3.1"]] 26 | :plugins [[lein-midje "3.2"] 27 | [lein-set-version "0.4.1"] 28 | [lein-update-dependency "0.1.2"] 29 | [lein-pprint "1.1.1"]]} 30 | :reflection-check {:global-vars {*warn-on-reflection* true 31 | *assert* false 32 | *unchecked-math* :warn-on-boxed}}}) 33 | -------------------------------------------------------------------------------- /src/franzy/clients/cluster.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.cluster 2 | "Helpers and useful functions for working with Cluster metadata." 3 | (:require [franzy.clients.codec :as codec] 4 | [franzy.clients.partitions :as partitions]) 5 | (:import (org.apache.kafka.common Cluster) 6 | (java.net InetSocketAddress))) 7 | 8 | (defn bootstrap-cluster-hosts 9 | "Bootstraps a cluster from a collection of maps of host names and ports for InetAddresses. 10 | 11 | ex: `(bootstrap-cluster-hosts [{:host-name \"localhost\" :port 9092}])`" 12 | [host-map-coll] 13 | (->> (map (fn [{:keys [^String host-name port]}] (InetSocketAddress. host-name (int port))) host-map-coll) 14 | (Cluster/bootstrap))) 15 | 16 | (defn bootstrap-cluster 17 | "Bootstraps a cluster from a collection of InetAddresses." 18 | [addresses] 19 | (Cluster/bootstrap addresses)) 20 | 21 | (defn empty-cluster [] 22 | "Creates an empty cluster with no nodes and no topic-partitions." 23 | (Cluster/empty)) 24 | 25 | (defn make-cluster 26 | "Creates a cluster from a given collection of nodes and partition info and an optional set of unauthorized topics. 27 | 28 | If no parameters are provided, an empty cluster is created." 29 | (^Cluster [] (Cluster/empty)) 30 | (^Cluster [nodes partition-info] 31 | (make-cluster nodes partition-info nil)) 32 | (^Cluster [nodes partition-info unauthorized-topics] 33 | (let [node-coll (map codec/map->node nodes) 34 | partition-coll (map codec/map->partition-info partition-info) 35 | unauthorized-topic-set (into #{} unauthorized-topics)] 36 | (Cluster. node-coll partition-coll unauthorized-topic-set)))) 37 | 38 | (defn mock-nodes 39 | "Creates a mock number of nodes based on the provided node count." 40 | [node-count] 41 | (map (fn [n] {:id n :host "127.0.0.1" :port 9092}) (range 1 (inc node-count)))) 42 | 43 | (defn mock-cluster 44 | "Creates a mock cluster for testing, dev, and as dummy data for Kafka functions requiring clusters such as partitioners." 45 | [node-count topic-partitions unauthorized-topics] 46 | (let [nodes (mock-nodes node-count) 47 | partitions (partitions/mock-partition-info topic-partitions)] 48 | (make-cluster nodes partitions unauthorized-topics))) 49 | 50 | (defn available-partitions 51 | "Retrieve a collection of available partitions for a topic in a cluster." 52 | [^Cluster cluster ^String topic] 53 | (->> (.availablePartitionsForTopic cluster topic) 54 | (codec/decode))) 55 | 56 | (defn leader-for 57 | "Retrives the partition leader for a given topic partition." 58 | ([^Cluster cluster {:keys [topic partition]}] 59 | (leader-for cluster topic partition)) 60 | ([^Cluster cluster topic partition] 61 | (-> 62 | (.leaderFor cluster (codec/map->topic-partition topic partition)) 63 | (codec/decode)))) 64 | 65 | (defn node-by-id 66 | "Retrieves a node by its node id." 67 | [^Cluster cluster node-id] 68 | (some-> (.nodeById cluster (int node-id)) 69 | (codec/decode))) 70 | 71 | (defn nodes 72 | "Retrieves a collection of nodes in the cluster." 73 | [^Cluster cluster] 74 | (->> (.nodes cluster) 75 | (codec/decode))) 76 | 77 | (defn partition-info-for-topic 78 | "Retrieves the partition info for a given topic partition." 79 | ([^Cluster cluster {:keys [topic partition]}] 80 | (partition-info-for-topic cluster topic partition)) 81 | ([^Cluster cluster topic partition] 82 | (->> (codec/map->topic-partition topic partition) 83 | (.partition cluster) 84 | (codec/decode)))) 85 | 86 | (defn partition-count 87 | "Retrives the partition count for a given topic." 88 | [^Cluster cluster ^String topic] 89 | (.partitionCountForTopic cluster topic)) 90 | 91 | (defn partitions-for-node 92 | "Retrieves a collection of partitions with a leader matching the given node id." 93 | [^Cluster cluster node-id] 94 | (->> (.partitionsForNode cluster (int node-id)) 95 | (codec/decode))) 96 | 97 | (defn partitions-for-topic 98 | "Retrieves a collection of partitions for a given topic." 99 | [^Cluster cluster ^String topic] 100 | (->> (.partitionsForTopic cluster topic) 101 | (codec/decode))) 102 | 103 | (defn topics 104 | "Retrieves a collection of topics in this cluster." 105 | [^Cluster cluster] 106 | (->> 107 | (.topics cluster) 108 | (codec/decode))) -------------------------------------------------------------------------------- /src/franzy/clients/codec.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.codec 2 | "Encodes and decodes Java and Clojure types used with org.apache.kafka.common 3 | 4 | See http://kafka.apache.org/090/javadoc/org/apache/kafka/common/package-summary.html" 5 | (:require [franzy.clients.consumer.types :as ct]) 6 | (:import (java.util Map Set Collection List) 7 | (org.apache.kafka.common MetricName PartitionInfo Node TopicPartition Cluster) 8 | (org.apache.kafka.clients.producer ProducerRecord RecordMetadata) 9 | (org.apache.kafka.clients.consumer OffsetAndMetadata ConsumerRecord ConsumerRecords$ConcatenatedIterable OffsetResetStrategy ConsumerRecords) 10 | (org.apache.kafka.common.metrics KafkaMetric))) 11 | 12 | (declare decode-xf) 13 | 14 | (defprotocol FranzCodec 15 | "Protocol used to encode and decode values between Franzy and the Kafka Java client. 16 | 17 | Extend this protocol to implement conversions between types and modify existing conversions as needed." 18 | (encode [v]) 19 | (decode [v])) 20 | 21 | (defn map->topic-partition 22 | "Convert a map of topic and partition to a Kafka TopicPartition. 23 | 24 | Example Usage: 25 | 26 | `(map->topic-partition {:topic \"pontifications\" :partition 613})`" 27 | (^TopicPartition [topic partition] 28 | (TopicPartition. (name topic) (int partition))) 29 | (^TopicPartition [{:keys [topic partition]}] 30 | (TopicPartition. (name topic) (int partition)))) 31 | 32 | (defn maps->topic-partitions 33 | "Converts a collection of maps to topic partitions." 34 | [topic-partitions] 35 | (->> 36 | topic-partitions 37 | (map map->topic-partition))) 38 | 39 | ;;TODO: get rid of this, can probably manage without this extra fn and combine with the previous conversion fn 40 | (defn maps->topic-partition-array 41 | "Converts a collection of maps to topic partition arrays." 42 | [topic-partitions] 43 | (->> topic-partitions 44 | (maps->topic-partitions) 45 | (into []) 46 | (into-array TopicPartition))) 47 | 48 | (defn map->node 49 | "Converts a node map to a Kafka Node." 50 | ^Node 51 | [{:keys [id host port]}] 52 | (Node. id host port)) 53 | 54 | (defn node-maps->node-array 55 | "Converts a collection of node maps to a Kafka Node[]" 56 | [nodes] 57 | (->> nodes 58 | (map map->node) 59 | (into-array Node))) 60 | 61 | (defn map->partition-info 62 | ^PartitionInfo 63 | [{:keys [topic partition leader replicas in-sync-replicas]}] 64 | (let [leader-node (map->node leader) 65 | replica-nodes (node-maps->node-array replicas) 66 | in-sync-replica-nodes (node-maps->node-array in-sync-replicas)] 67 | (PartitionInfo. topic partition leader-node replica-nodes in-sync-replica-nodes))) 68 | 69 | (defn map->producer-record 70 | ^ProducerRecord 71 | ([{:keys [topic partition key value]}] 72 | (map->producer-record topic partition key value)) 73 | ([^String topic partition key value] 74 | "Creates a record used by a producer when sending data to Kafka. 75 | 76 | * topic - The topic the record will be appended to (required) 77 | * partition - The partition to which the record should be sent (optional) 78 | * key - The key that will be included in the record (optional) 79 | * value - The record contents (required)" 80 | (if (nil? partition) 81 | ;Note: Producer Record has 3 constructors, but just proxies with a null key if key is null, so it's not needed here 82 | (ProducerRecord. topic key value) 83 | ;Note: we could assume that a nil partition should :or to 1, but we let Kafka handle this behavio rather than force it 84 | (ProducerRecord. topic (int partition) key value)))) 85 | 86 | (defn map->offset-metadata 87 | ^OffsetAndMetadata 88 | [{:keys [offset metadata]}] 89 | ;;TODO: schema 90 | (OffsetAndMetadata. offset metadata)) 91 | 92 | (defn map->topic-partition-offsets-map 93 | "Takes a Clojure map where the keys are topic partition maps and the values are offset metaedata maps, then converts it to a java.util.Map made of TopicPartition as keys and OffsetAndMetadata as values. 94 | 95 | Example: 96 | 97 | `(map->topic-partition-offsets-map 98 | {{:topic \"theweather\" :partition 0} {:offset 89 :metadata \"seti alpha 6\"} 99 | {:topic \"thegovernment\" :partition 1} {:offset 2112 :metadata \"1984\"} 100 | {:topic \"popsongs\" :partition 1} {:offset 69 :metadata \"All I need is a miracle\"}})`" 101 | [m] 102 | (->> m 103 | (reduce 104 | (fn [m [k v]] (assoc! m (map->topic-partition k) (map->offset-metadata v))) (transient {})) 105 | (persistent!))) 106 | 107 | (defn map->consumer-record 108 | "Convert a map of to a Kafka ConsumerRecord" 109 | ^ConsumerRecord 110 | [{:keys [key offset partition topic value]}] 111 | ;;TODO: not sure why/when some of these might be optional (topic is required) per the Java Constructor - Need to deal with this vs. schema 112 | (ConsumerRecord. (name topic) (some-> partition int) (some-> offset long) key value)) 113 | 114 | (defn consumer-record->map 115 | "Converts a Kafka Java API ConsumerRecord to a map." 116 | [^ConsumerRecord consumer-record] 117 | {:topic (.topic consumer-record) 118 | :partition (.partition consumer-record) 119 | :offset (.offset consumer-record) 120 | :key (.key consumer-record) 121 | :value (.value consumer-record)}) 122 | 123 | (defn consumer-records->map 124 | "Converts a Kafka Java API ConsumerRecords to a map." 125 | [^ConsumerRecords consumer-records] 126 | (map map->consumer-record (iterator-seq (.iterator consumer-records)))) 127 | 128 | ;;I have no idea why they chose not just send in the same topic partition+offset data structures as the client, but so be it.... 129 | (defn map->topic-partition-offset-number 130 | "Converts a map where the keys are topic partitions and the values are offset positions (Long) to a map of Map 131 | 132 | Example: 133 | 134 | `(map->topic-partition-offset-number 135 | {{:topic \"fixins\" :partition 0} 0 136 | {:topic \"fixins\" :partition 1} 0} 137 | {:topic \"expired-condiments\" :partition 55} 23})`" 138 | [m] 139 | (->> m 140 | (reduce 141 | (fn [m [k v]] (assoc! m (map->topic-partition k) v)) (transient {})) 142 | (persistent!))) 143 | 144 | (defn map->Cluster 145 | "Converts a map of cluster metadata to a Cluster Java metadata object." 146 | [{:keys [nodes partitions unauthorized-topics]}] 147 | ((Cluster. (map map->node nodes) (map map->partition-info partitions) (into #{} unauthorized-topics)))) 148 | 149 | (defn lazy-consumer-records 150 | "Creates a lazy wrapper around Java ConsumerRecordsConcatenatedIterables. 151 | Useful if you want to create a wrapper or consume existing Java Kafka client code." 152 | [^ConsumerRecords$ConcatenatedIterable iterable] 153 | (lazy-seq 154 | (when-let [s (iterator-seq (.iterator iterable))] 155 | (cons (first s) (rest s))))) 156 | 157 | ;;TODO: strategy for better dealing with parsing enums 158 | (defn keyword->offset-reset-strategy 159 | ^OffsetResetStrategy [offset-reset-strategy] 160 | (some->> offset-reset-strategy 161 | (name) 162 | (.toUpperCase) 163 | (OffsetResetStrategy/valueOf))) 164 | 165 | (extend-protocol FranzCodec 166 | TopicPartition 167 | (encode [topic-partition] topic-partition) 168 | (decode [topic-partition] 169 | {:topic (.topic topic-partition) 170 | :partition (.partition topic-partition)}) 171 | 172 | ConsumerRecord 173 | (encode [consumer-record] consumer-record) 174 | (decode [consumer-record] 175 | ;;here we are preferring the record form over the map form 176 | ;;you can replace this with consumer-record->map if you want maps 177 | ;;doto just sounds mean 178 | (ct/->ConsumerRecord (.topic consumer-record) 179 | (.partition consumer-record) 180 | (.offset consumer-record) 181 | (.key consumer-record) 182 | (.value consumer-record))) 183 | 184 | ;;Prefer reified consume records, leaving this here for perf testing 185 | ConsumerRecords 186 | (encode [consumer-records] consumer-records) 187 | (decode [consumer-records] 188 | (map decode (iterator-seq (.iterator consumer-records)))) 189 | 190 | OffsetAndMetadata 191 | (encode [offset-metadata] offset-metadata) 192 | (decode [offset-metadata] 193 | {:offset (.offset offset-metadata) 194 | :metadata (.metadata offset-metadata)}) 195 | Node 196 | (encode [node] node) 197 | (decode [node] 198 | {:id (.id node) 199 | :host (.host node) 200 | :port (.port node)}) 201 | 202 | PartitionInfo 203 | (encode [partition-info] partition-info) 204 | (decode [partition-info] 205 | {:topic (.topic partition-info) 206 | :partition (.partition partition-info) 207 | :leader (-> (.leader partition-info) 208 | (decode)) 209 | ;;TODO: better hanlding of this issue w/ protocol: http://dev.clojure.org/jira/browse/CLJ-1790 210 | :replicas (->> (.replicas partition-info) 211 | (into [] decode-xf)) 212 | :in-sync-replicas (->> (.inSyncReplicas partition-info) 213 | (into [] decode-xf))}) 214 | 215 | ProducerRecord 216 | (encode [producer-record] producer-record) 217 | (decode [producer-record] 218 | {:topic (.topic producer-record) 219 | :partition (.partition producer-record) 220 | :key (.key producer-record) 221 | :value (.value producer-record)}) 222 | 223 | RecordMetadata 224 | (encode [record-metadata] record-metadata) 225 | (decode [record-metadata] 226 | {:topic (.topic record-metadata) 227 | :partition (.partition record-metadata) 228 | :offset (.offset record-metadata)}) 229 | 230 | MetricName 231 | (encode [metric-name] metric-name) 232 | (decode [metric-name] 233 | {:name (.name metric-name) 234 | :description (.description metric-name) 235 | :group (.group metric-name) 236 | :tags (->> (.tags metric-name) 237 | (decode))}) 238 | 239 | KafkaMetric 240 | (encode [metric] metric) 241 | (decode [metric] 242 | (when metric 243 | (let [metric-value (.value metric)] 244 | {:metric-name (-> (.metricName metric) 245 | (decode)) 246 | ;;some weirdness here with things like -Infinity/Infinity when they probably didn't want to use a nullable double, 247 | ;;might want to drop it from the map, return nil for the entire map, or just leave it as -Infinity? 248 | :value (if (Double/isInfinite metric-value) nil metric-value)}))) 249 | 250 | Cluster 251 | (encode [cluster] cluster) 252 | ;;Note: this object is a bit richer than just returning a map, so for querying the extra data, don't decode it, 253 | ;instead used the provided cluster protocol 254 | (decode [cluster] 255 | {:nodes (->> (.nodes cluster) 256 | (decode)) 257 | :topics (->> (.topics cluster) 258 | (decode)) 259 | :unauthorized-topics (->> (.unauthorizedTopics cluster) 260 | (decode))}) 261 | 262 | 263 | 264 | List 265 | (encode [v] v) 266 | (decode [v] 267 | (when (seq v) 268 | (into [] decode-xf v))) 269 | 270 | Collection 271 | (encode [v] v) 272 | (decode [v] 273 | (when (seq v) 274 | (into [] decode-xf v))) 275 | 276 | Set 277 | (encode [v] v) 278 | (decode [v] 279 | (when (seq v) 280 | (into #{} decode-xf v))) 281 | 282 | ;;might want to toss this out 283 | Iterable 284 | (encode [it] it) 285 | (decode [it] 286 | (map decode (iterator-seq (.iterator it)))) 287 | 288 | Map 289 | (encode [v] v) 290 | (decode [v] 291 | (->> v 292 | (reduce (fn [m [k val]] 293 | (assoc! m (as-> (decode k) dk 294 | (if (string? dk) (keyword dk) dk)) 295 | (decode val))) 296 | (transient {})) 297 | persistent!)) 298 | 299 | nil 300 | (encode [v] v) 301 | (decode [v] v) 302 | 303 | Object 304 | (encode [v] v) 305 | (decode [v] v)) 306 | 307 | (def decode-xf 308 | "Transducer, applied on decode of collections that may be overriden using alter-var-root for example." 309 | (map decode)) 310 | -------------------------------------------------------------------------------- /src/franzy/clients/connect/schema.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.connect.schema 2 | (:require [schema.core :as s] 3 | [franzy.common.schema :as fs] 4 | [franzy.common.models.schema :as fms])) 5 | 6 | ;;TODO: more restrictive schema + review 7 | (def KafkaConnectConfig 8 | "Schema for a Kafka Connect Config. 9 | 10 | Note: Although Kafka Connect itself is not yet (or ever - rumblings about deprecation, also prefer Onyx, Storm, Spark, etc.), supported by this system, the configuration is provided here for integration with other Kafka clients or any application validation users may require. 11 | 12 | See http://kafka.apache.org/documentation.html#connectconfigs" 13 | {(s/required-key :bootstrap.servers) fs/NonEmptyStringOrStringList ;;TODO: more strict schema 14 | (s/optional-key :group.id) s/Str 15 | (s/optional-key :internal.key.converter) s/Str ;TODO: class 16 | (s/optional-key :internal.value.converter) s/Str ;TODO: class 17 | (s/optional-key :key.converter) s/Str ;TODO: class 18 | (s/optional-key :value.converter) s/Str ;TODO: class 19 | (s/optional-key :cluster) s/Str 20 | (s/optional-key :heartbeat.interval.ms) fs/PosInt 21 | (s/optional-key :session.timeout.ms) fs/SPosInt 22 | (s/optional-key :ssl.key.password) s/Str 23 | (s/optional-key :ssl.keystroke.location) s/Str 24 | (s/optional-key :ssl.keystore.password) s/Str 25 | (s/optional-key :ssl.truststore.location) s/Str 26 | (s/optional-key :ssl.truststore.password) s/Str 27 | (s/optional-key :connections.max.idle.ms) fs/SPosLong 28 | (s/optional-key :receive.buffer.bytes) fs/SPosInt 29 | (s/optional-key :request.timeout.ms) fs/SPosInt 30 | (s/optional-key :sasl.kerberos.service.name) s/Str ;;TODO: list 31 | (s/optional-key :security.protocol) fms/SecurityProtocolEnum 32 | (s/optional-key :send.buffer.bytes) fs/SPosInt 33 | (s/optional-key :ssl.enabled.protocols) fs/StringOrStringList 34 | (s/optional-key :ssl.keystore.type) s/Str 35 | (s/optional-key :ssl.protocol) s/Str 36 | (s/optional-key :ssl.provider) s/Str 37 | (s/optional-key :ssl.truststore.type) s/Str 38 | (s/optional-key :worker.sync.timeout.ms) fs/SPosInt 39 | (s/optional-key :worker.unsync.backoff.ms) fs/SPosInt 40 | (s/optional-key :metadata.max.age.ms) fs/SPosLong 41 | (s/optional-key :metric.reporters) fs/StringOrStringList 42 | (s/optional-key :metric.num.samples) fs/PosInt 43 | (s/optional-key :metrics.sample.window.ms) fs/SPosLong 44 | (s/optional-key :offset.flush.interval.ms) fs/SPosLong 45 | (s/optional-key :offset.flush.timeout.ms) fs/SPosLong 46 | (s/optional-key :reconnect.backoff.ms) fs/SPosLong 47 | (s/optional-key :rest.advertised.host.name) s/Str 48 | (s/optional-key :rest.advertised.host.port) s/Str 49 | (s/optional-key :rest.host.name) s/Str 50 | (s/optional-key :rest.host.port) s/Str 51 | (s/optional-key :retry.backoff.ms) fs/SPosLong 52 | (s/optional-key :sasl.kerberos.kinit.cmd) s/Str 53 | (s/optional-key :sasl.kerberos.min.time.before.relogin) fs/SPosLong 54 | (s/optional-key :sasl.kerberos.ticket.renew.jitter) fs/SPosDouble 55 | (s/optional-key :sasl.kerberos.ticket.renew.window.factor) fs/SPosDouble 56 | (s/optional-key :ssl.cipher.suites) fs/StringOrStringList 57 | (s/optional-key :ssl.endpoint.identification.algorithm) s/Str 58 | (s/optional-key :ssl.keymanager.algorithm) s/Str 59 | (s/optional-key :ssl.trustmanager.algorithm) s/Str 60 | (s/optional-key :task.shutdown.graceful.timeout.ms) fs/SPosLong}) 61 | 62 | -------------------------------------------------------------------------------- /src/franzy/clients/consumer/callbacks.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.consumer.callbacks 2 | (:require [franzy.clients.codec :as codec]) 3 | (:import (org.apache.kafka.clients.consumer ConsumerRebalanceListener OffsetCommitCallback))) 4 | 5 | (deftype NoOpConsumerRebalanceListener [] 6 | ConsumerRebalanceListener 7 | (onPartitionsAssigned [_ _]) 8 | (onPartitionsRevoked [_ _])) 9 | 10 | (defn ^NoOpConsumerRebalanceListener no-op-consumer-rebalance-listener [] 11 | "Creates a no-op consumer rebalance listener." 12 | (NoOpConsumerRebalanceListener.)) 13 | 14 | (defn ^NoOpConsumerRebalanceListener no-op-consumer-rebalance-listener 15 | ;;doing nothing has never been so good... 16 | "Creates a no-op consumer rebalance listener 17 | This callback is implemented as a concrete type, which you may use for introspection for debugging, testing, etc." 18 | ^ConsumerRebalanceListener [] 19 | (NoOpConsumerRebalanceListener.)) 20 | 21 | (defn consumer-rebalance-listener 22 | "Creates a ConsumerRebalanceListener from Clojure function(s) that will receive a list of topic partitions assigned or 23 | revoked, subject to the codec. 24 | 25 | For the 2-arity version of this function, you must provide a 1-arity function receiving a map of topic partitions. 26 | The partitions-assigned-fn will be called when a partition is assigned and will receive any 27 | topic partitions assigned. Likewise, when a partition is revoked, the partitions-revoked-fn will be called. 28 | 29 | The 1-arity version of this function works in a similar fashion, but you must provide a 2-arity function that will 30 | receive topic partitions as its first arugment, and a keyword as its second argument. 31 | When partitions are assigned, the :assigned keyword will be passed to your function. 32 | Likewise, when partitons are revoked, the :revoked keyword will be passed. 33 | 34 | The 0-arity version of this function will create a NoOpConsumerRebalanceListener, which you can use for testing, 35 | defaults, etc. 36 | 37 | You may use this callback to trigger custom actions when the set of partitions assigned to the consumer changes. 38 | For example, if you want to save offsets to your own datastore, you may do so either when the partition is assigned 39 | or revoked. 40 | 41 | It is recommended that if this datastore is not Kafka itself, it should be high-performance and fault-tolerant. 42 | Any consumers manually managing offsets should either use this function to create and later register this callback, 43 | or you should manually implement the ConsumerRebalanceListener interface yourself on a reified object or via deftype. 44 | 45 | See https://kafka.apache.org/090/javadoc/org/apache/kafka/clients/consumer/ConsumerRebalanceListener.html for more information." 46 | (^ConsumerRebalanceListener [] 47 | "Creates a no-op consumer rebalance listener, useful for testing, defaults, etc." 48 | (no-op-consumer-rebalance-listener)) 49 | (^ConsumerRebalanceListener [partition-rebalancer-fn] 50 | ;;per user-request - useful for multi-methods 51 | (reify ConsumerRebalanceListener 52 | (onPartitionsAssigned [_ topic-partitions] 53 | (partition-rebalancer-fn (->> topic-partitions 54 | (codec/decode)) :assigned)) 55 | (onPartitionsRevoked [_ topic-partitions] 56 | (partition-rebalancer-fn (->> topic-partitions 57 | (codec/decode)) :revoked)))) 58 | (^ConsumerRebalanceListener [partitions-assigned-fn partitions-revoked-fn] 59 | (reify ConsumerRebalanceListener 60 | (onPartitionsAssigned [_ topic-partitions] 61 | (partitions-assigned-fn (->> topic-partitions 62 | (codec/decode)))) 63 | (onPartitionsRevoked [_ topic-partitions] 64 | (partitions-revoked-fn (->> topic-partitions 65 | (codec/decode))))))) 66 | 67 | (deftype NoOpOffsetCommitCallback [] 68 | OffsetCommitCallback 69 | ;;looks like an atari enemy 70 | (onComplete [_ _ _])) 71 | 72 | (defn ^NoOpOffsetCommitCallback no-op-offset-commit-callback 73 | "Creates a no-op offset commit callack. 74 | This callback is implemented as a concrete type, which you may use for introspection, testing, logging, etc." 75 | [] 76 | (NoOpOffsetCommitCallback.)) 77 | 78 | (defn offset-commit-callback 79 | "Creates an OffsetCommitCallback from an optional Clojure function(s) and passes an exceptions and offset metadata 80 | created while committing offsets. 81 | 82 | There are 3 arities you may use to create this callback 83 | 84 | The single arity version will create a callback given a 2-arity offset commit function. 85 | The first argument passed to your function will be a map of offset metadata. 86 | The second argument will be any exceptions. These arguments are mutually exclusive. 87 | 88 | If you prefer a version that separates errors and offsets, use the 2-arity version of this function. 89 | The 2-arity version receives an offset commit success function of a single arity and will receive the offsets. 90 | The offset-commit failure version of this function will receive an exception. 91 | 92 | If neither of these suit your use-case or you need something for testing, convenience, defaults, etc, the 0-arity 93 | version of this function will generate a NoOpOffsetCommitCallback for you. Like the name, this is a no-op. 94 | 95 | An Offset commit callback is used to provide asynchronous handling of offset commit request completion. 96 | For example, when you wish to add metadata to particular offsets you commit, you can listen for the completition of 97 | the offset metadata storage using this callback. 98 | This function will be called when the offset commit request sent to the server has been acknowledged. 99 | 100 | See https://kafka.apache.org/090/javadoc/org/apache/kafka/clients/consumer/OffsetCommitCallback.html for more information." 101 | (^OffsetCommitCallback [] 102 | (no-op-offset-commit-callback)) 103 | (^OffsetCommitCallback [offset-commit-fn] 104 | (reify OffsetCommitCallback 105 | (onComplete [_ offsets e] 106 | (offset-commit-fn (->> offsets (codec/decode)) e)))) 107 | (^OffsetCommitCallback [offset-commit-success-fn offset-commit-failure-fn] 108 | (reify OffsetCommitCallback 109 | (onComplete [_ offsets e] 110 | (if offsets 111 | (->> offsets (codec/decode) (offset-commit-success-fn)) 112 | (offset-commit-failure-fn e)))))) 113 | -------------------------------------------------------------------------------- /src/franzy/clients/consumer/client.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.consumer.client 2 | (:require [schema.core :as s] 3 | [franzy.clients.consumer.schema :as cs] 4 | [franzy.common.metadata.protocols :refer [KafkaMeasurable TopicMetadataProvider PartitionMetadataProvider]] 5 | [franzy.clients.consumer.protocols :refer :all] 6 | [franzy.clients.codec :as codec] 7 | [franzy.clients.consumer.callbacks :as callbacks] 8 | [franzy.clients.consumer.results :as consumer-results] 9 | [franzy.common.configuration.codec :as config-codec] 10 | [franzy.clients.consumer.defaults :as defaults]) 11 | (:import (org.apache.kafka.clients.consumer KafkaConsumer ConsumerRebalanceListener Consumer) 12 | (org.apache.kafka.common.serialization Deserializer) 13 | (java.util List Properties) 14 | (java.util.regex Pattern) 15 | (java.io Closeable))) 16 | 17 | (deftype FranzConsumer 18 | [^Consumer consumer consumer-options] 19 | FranzyConsumer 20 | (pause! [_ topic-partitions] 21 | "Suspend fetching from the requested partitions. 22 | Future calls to poll, i.e. `(poll! c {:poll-timeout-ms 1000})` will not return any records from these partitions 23 | until they have been resumed using 24 | 25 | `(resume c topic-partitions)`. 26 | 27 | Note that this method does not affect partition subscription. In particular, it does not cause a group rebalance 28 | when automatic assignment is used." 29 | (->> 30 | topic-partitions 31 | (codec/maps->topic-partition-array) 32 | (.pause consumer))) 33 | (poll! [this] 34 | (poll! this nil)) 35 | (poll! [_ {:keys [consumer-records-fn poll-timeout-ms] 36 | :or {consumer-records-fn (get consumer-options :consumer-records-fn seq) 37 | poll-timeout-ms (:poll-timeout-ms consumer-options)}}] 38 | "Polls for all topics or topic partitions specified by assign/subscribe and returns a lazy sequence of consumer record maps. 39 | 40 | It is an error to not have subscribed to any topics or partitions before polling for data. 41 | 42 | On each poll, consumer will try to use the last consumed offset as the starting offset and fetch sequentially. 43 | The last consumed offset can be manually set through `(seek! c topic-partition offset)` or automatically set 44 | as the last committed offset for the subscribed list of partitions" 45 | ;;TODO: move last step into codec 46 | (some->> (.poll consumer poll-timeout-ms) 47 | (consumer-results/consumer-records) 48 | ;(consumer-records-fn) 49 | )) 50 | (resume! [_ topic-partitions] 51 | "Resume specified partitions which have been paused with (pause c topic-partitions). 52 | New calls to (poll c timeout) will return records from these partitions if there are any to be fetched. 53 | If the partitions were not previously paused, this method is a no-op." 54 | (->> topic-partitions 55 | (codec/maps->topic-partition-array) 56 | (.resume consumer))) 57 | (wakeup! [_] 58 | "Wakeup the consumer. This method is thread-safe and is useful in particular to abort a long poll. The thread which is blocking in an operation will throw WakeupException." 59 | (.wakeup consumer)) 60 | Closeable 61 | (close [_] 62 | (.close consumer)) 63 | TopicMetadataProvider 64 | (list-topics [_] 65 | "Get metadata about partitions for all topics that the user is authorized to view." 66 | (->> (.listTopics consumer) 67 | (codec/decode))) 68 | PartitionMetadataProvider 69 | (partitions-for [_ topic] 70 | "Get metadata about the partitions for a given topic." 71 | (->> topic 72 | (.partitionsFor consumer) 73 | (codec/decode))) 74 | SeekableLog 75 | (next-offset [_ topic-partition] 76 | "Gets the offsets of the next record that will be fetched in the given topic partition. 77 | 78 | Example: 79 | 80 | `(next-offset {:topic \"linked-in-profiles\" :partition 43252})` 81 | -> {:offset 23432 :metadata \"this headhunter has great opportunities for only me, adding to rolodex.\"}" 82 | (->> topic-partition 83 | (codec/map->topic-partition) 84 | (.position consumer))) 85 | (seek-to-offset! [_ topic-partition offset] 86 | "Given a topic partition and an offset number, seeks to an offset in the given partition for that topic. 87 | 88 | Overrides the fetch offsets that the consumer will use on the next poll, ex: (poll! c). 89 | If this API is invoked for the same partition more than once, the latest offset will be used on the next poll. 90 | 91 | Example: `(seek-to-offset! {:topic \"history-of-world\" :partition 0} 0)` - seeks to the beginning of time, in this dimension. 92 | 93 | > Note: that you may lose data if this API is arbitrarily used in the middle of consumption, to reset the fetch offsets. 94 | 95 | Use with extreme care, realizing that this API statefully repositions where Kafka will seek from. 96 | 97 | > Note: You may not seek to an unassigned partition. For example, if you attempt to seek before/while subscribing, an exception will be thrown by design. This is because the consumer has not yet been assigned a topic and partition that you can seek into. The solution is to either manually assign a partition to your consumer, or if using subscriptions, seek after subscription. 98 | 99 | You can assume that you have been assigned a valid topic and partition when receiving a valid event inside a consumer rebalance callback, and thus seek inside the callback." 100 | (as-> (codec/map->topic-partition topic-partition) tp 101 | (.seek consumer tp offset))) 102 | (seek-to-beginning-offset! [_ topic-partitions] 103 | "Seeks to the beginning of a given topic and partition. 104 | Typically identical to calling (seek-to-offset c topic-partition 0), but makes semantic intent clear and leaves the implementation detail of the beginning position to Kafka." 105 | (->> topic-partitions 106 | (codec/maps->topic-partition-array) 107 | (.seekToBeginning consumer))) 108 | (seek-to-end-offset! [_ topic-partitions] 109 | "Seeks to the last offset for each of the given partitions. 110 | This function evaluates lazily, seeking to the final offset in all partitions only when `(poll! c)` or `(next-offset c topic-partition)` are called. 111 | 112 | > Note: Do not attempt to manually seek to the end offset via seek-to-offset!, instead call this function to ensure correct behavior. Though your seek may succeed, it will not be guaranteed to yield the next offset correctly as Kafka is distributed." 113 | (->> topic-partitions 114 | (codec/maps->topic-partition-array) 115 | (.seekToEnd consumer))) 116 | OffsetCommiter 117 | (commit-offsets-async! [_] 118 | "Same as `(commit-offsets-async! c options)`, but behaves as a fire-and-forget commit. 119 | If you require notification when a commit completes, you must pass a commit offset callback via a different arity of this function." 120 | (.commitAsync consumer)) 121 | (commit-offsets-async! [_ {:keys [offset-commit-callback] 122 | :or {offset-commit-callback (:offset-commit-callback consumer-options)}}] 123 | "Commits a list of offsets to Kafka, returned on the last poll, ex: `(poll! c)`. 124 | 125 | You may optionally pass an offset commit callback implement org.apache.kafka.clients.consumer.OffsetCommitCallback. 126 | The callback will be invoked when the commit completes. 127 | If you always need the same offset commit callback, prefer setting this via the offset-commit-callback key in the consumer options. 128 | The function must be a 2 arity function, in the form of `(fn [topic-partition offset-metadata])`. 129 | You may construct the callback from a Clojure function by calling `(offset-commit-callback my-offset-commit-processing-fn)`. 130 | You should avoid creating callbacks anew each call, and instead cache, reify, deftype, defrecord, impl Java interface, etc. instead. 131 | 132 | This commits offsets only to Kafka. The offsets committed using this API will be used on the first fetch after every rebalance and also on startup. 133 | As such, if you need to store offsets in anything other than Kafka, this API should not be used. 134 | 135 | This is an asynchronous call and will not block. Any errors encountered are either passed to the callback (if provided) or discarded." 136 | (->> 137 | (.commitAsync consumer offset-commit-callback))) 138 | (commit-offsets-async! [_ offsets {:keys [offset-commit-callback] 139 | :or {offset-commit-callback (:offset-commit-callback consumer-options)}}] 140 | "Commits a list of offsets to kafka given a map where the keys are topic partitions and the values are offset metadataof offset metadata. 141 | 142 | This commits offsets to Kafka. The offsets committed using this API will be used on the first fetch after every rebalance and also on startup. 143 | As such, if you need to store offsets in anything other than Kafka, this API should not be used. 144 | The committed offset should be the next message your application will consume, i.e. last-processed-offset + 1. 145 | 146 | This is an asynchronous call and will not block. 147 | Any errors encountered are either passed to the callback (if provided) or discarded. 148 | Example: 149 | 150 | `(commit-offsets-async! 151 | {{:topic \"failed-startups\" :partition 102} {:offset 124, :metadata \"uber for spoiled people\"} 152 | {:topic \"failed-startups\" :partition 103} {:offset 2006, :metadata \"starting a music and fashion startup was a great decision\"}} 153 | {:offset-commit-callback call-me-maybe-offsets-callback-fn)`" 154 | (.commitAsync consumer (codec/map->topic-partition-offsets-map offsets) offset-commit-callback)) 155 | (commit-offsets-sync! [_] 156 | "Synchronous version of `(commit-offsets-async! c)`. 157 | 158 | If you require a blocking commit of offsets to Kafka, you can either call this function or manually implement blocking using the alternative async version of this function. 159 | Any exceptions that occur during a commit will be returned to the caller on the calling thread." 160 | (.commitSync consumer)) 161 | (commit-offsets-sync! [_ offsets] 162 | "Commit the specified offsets for the specified map of topics and partitions. 163 | 164 | This commits offsets to Kafka. The offsets committed using this API will be used on the first fetch after every rebalance and also on startup. 165 | As such, if you need to store offsets in anything other than Kafka, this API should not be used. 166 | The committed offset should be the next message your application will consume, i.e. lastProcessedMessageOffset + 1. 167 | 168 | This is a synchronous commits and will block until either the commit succeeds or an unrecoverable error is encountered (in which case it is thrown to the caller). 169 | 170 | Example: 171 | 172 | `(commit-offsets! 173 | {{:topic \"failed-startups\" :partition 25} {:offset 14, :metadata \"flooz was my best idea\"} 174 | {:topic \"failed-startups\" :partition 25} {:offset 4791, :metadata \"billions for yet another chat app\"}})`" 175 | (->> offsets 176 | (codec/map->topic-partition-offsets-map) 177 | (.commitSync consumer))) 178 | (committed-offsets [_ topic-partition] 179 | "Get the last committed offset for the given partition (whether the commit was issued consumer or another). 180 | 181 | Returns offset metadata. 182 | 183 | Example: 184 | `(committed-offsets {:topic \"words-of-advice-for-young-people\" :partition 67})` 185 | 186 | `{:offset 96 :metadata \"Though it is hard to commit, it is easy to buy a fortune cookie.\"}`" 187 | (->> 188 | topic-partition 189 | (codec/map->topic-partition) 190 | (.committed consumer) 191 | (codec/decode))) 192 | PartitionAssignable 193 | (assigned-partitions [_] 194 | "Gets the partitions currently assigned to this consumer. 195 | 196 | Returns a set of topic partitions. 197 | 198 | Example: 199 | 200 | `(assigned-partitions c)` 201 | 202 | `#{{:topic \"michael-ironside-action-credits\" :partition 55} {:topic \"michael-ironside-action-credits\" :partition 56} {:topic \"gigantic-collection-of-illegible-logentries\" :partition 93243}}`" 203 | ;;TODO: rewrite to speed up 204 | (->> 205 | (.assignment consumer) 206 | (codec/decode))) 207 | AutomaticPartitionAssignor 208 | (subscribe-to-partitions! [this topics] 209 | "Given a collection of topics, dynamically assigns partitions to those topics. 210 | 211 | Example: 212 | 213 | `(subscribe-to-partitions! [\"extremely-repetitive-music-recommendations\" \"small-data-called-big-data\" \"cats-watching-cat-videos\"])`" 214 | (subscribe-to-partitions! this topics nil)) 215 | (subscribe-to-partitions! [_ topics {:keys [^ConsumerRebalanceListener rebalance-listener-callback] 216 | :or {rebalance-listener-callback (:rebalance-listener-callback consumer-options)}}] 217 | "Subscribe to the given list of topics to get dynamically assigned partitions. 218 | 219 | Optionally, you can pass a map of consumer rebalance functions if you want to handle when a partition is assigned and/or revoked. 220 | You can specify a function for either case of revoke, both, or none at all. 221 | 222 | Example: 223 | 224 | `(subscribe! [\"air-guitar-players\" \"bad-hair-days\"] {:rebalance-listener-callback best-callback-on-the-high-seas})` 225 | 226 | Topic subscriptions are not incremental. 227 | This list will replace the current assignment (if there is one). 228 | Note that it is not possible to combine topic subscription with group management with manual partition assignment through (assign topics). 229 | If the given list of topics is empty, it is treated the same as (unsubscribe! c). 230 | 231 | As part of group management, the consumer will keep track of the list of consumers that belong to a particular group and will trigger a rebalance operation if one of the following events trigger 232 | 233 | * Number of partitions change for any of the subscribed list of topics 234 | * Topic is created or deleted 235 | * An existing member of the consumer group dies 236 | * A new member is added to an existing consumer group via the join API 237 | 238 | When any of these events are triggered, the provided listener will be invoked first to indicate that the consumer's assignment has been revoked, and then again when the new assignment has been received. 239 | Note that this listener will immediately override any listener set in a previous call to subscribe. 240 | It is guaranteed, however, that the partitions revoked/assigned through this interface are from topics subscribed in this call. See ConsumerRebalanceListener for more details." 241 | ;;at least in the time of writing, there appears to be a case where if somehow this callback is null, explosions happen 242 | ;;here we are being extra-safe, but this let can probably be removed in the future 243 | (let [^ConsumerRebalanceListener listener (or rebalance-listener-callback (callbacks/consumer-rebalance-listener))] 244 | ;;1) We could bind topics in the let statement using this cond, and call subscribe just once below, this is an experiment to avoid reflection 245 | ;;2) I am not sure I really like the idea of this cond vs. simply having distinct functions, however this de-clutters the protocol a bit. 246 | ;; Since the Java API has arities that are not distinct and by type, it is either this or more functions on the protocol. You saw nothing. 247 | (cond 248 | ;;this is like one of those cards in the back of a magazine... 249 | (sequential? topics) 250 | (.subscribe consumer ^List topics listener) 251 | (instance? Pattern topics) 252 | (.subscribe consumer ^Pattern topics listener) 253 | (string? topics) 254 | (.subscribe consumer ^List (vec [topics]) listener) 255 | :else (throw (ex-info "topics must be a sequence of topic strings, a topic string, or a regular expression pattern." {:topics topics}))))) 256 | (partition-subscriptions [_] 257 | "Returns a set of the names of any currently subscribed topics. 258 | Will return the same topics used in the most recent call to `(subscribe-to-partitions! c topics)`, or an empty set if no such call has been made." 259 | ;;FIXME: I think it might be worth parsing the results out here instead of returning these nasty subscription strings. Le sigh. 260 | (->> (.subscription consumer) 261 | (codec/decode))) 262 | (clear-subscriptions! [_] 263 | "Clears any subscriptions, and thus currently assigned partitions to this consumer." 264 | (.unsubscribe consumer)) 265 | ManualPartitionAssignor 266 | (assign-partitions! [_ topic-partitions] 267 | "Manually assign a list of topic partitions to this consumer. 268 | 269 | > Note: It is an error to both subscribe and assign partitions manually using the same consumer. 270 | 271 | * Do: Assign a topic partition and manually seek to an offset if desired. 272 | * Don't: Subscribe to a topic partition, then assign the consumer to another topic partition, the same partition. Seriously, don't do it. 273 | 274 | Example: 275 | 276 | `(assign-partitions! c [{:topic \"piles-of-logs\" :partition 0} {:topic \"piles-of-logs\" :partition 1} {:topic \"overpriced-things\" :partition 999}])`" 277 | ;;It remains a mystery to me why the Java method accepts a list of topic partitions rather than a set 278 | ;;Conversely, the assignments call returns a set of topic partitions (probably correct), but alright, let's just live on the wild side and not force passing a set here. 279 | (->> 280 | topic-partitions 281 | (codec/maps->topic-partitions) 282 | (.assign consumer))) 283 | (clear-partition-assignments! [_] 284 | "Clears any currently assigned partitions to this consumer." 285 | ;;this may seem weird since we're not subscribing in the manual partition assignment case, 286 | ;; but calling this actually clears assigned partitions, it's a bad method name in Java 287 | (.unsubscribe consumer)) 288 | KafkaMeasurable 289 | (metrics [_] 290 | (->> 291 | (.metrics consumer) 292 | (codec/decode)))) 293 | 294 | (s/defn make-consumer :- FranzConsumer 295 | "Create a Kafka Consumer from a configuration, with optional deserializers and optional consumer options. 296 | If a callback is given, call it when stopping the consumer. 297 | If deserializers are provided, use them, otherwise expect deserializers via class name in the config map. 298 | 299 | This consumer is a wrapper of Kafka Java Consumer API. 300 | It provides a Clojure (ish) wrapper, with Clojure data structures to/from Kafka, and implements various protocols to 301 | allow more specialized consumers following this implementation. 302 | If you prefer a lower-level implementation or wish to test your consumer, you may wish to browse this implementation 303 | and implement one or all the protocols provided. 304 | 305 | This consumer provides implementations for both a manual and automatic consumer. You must not mix and match 306 | automatic and manual consumption. If you do violate this rule, an exception will be thrown. Generally, this means 307 | you either need to subscribe to a specific topic partition to receive an automatic assignment, or manually assign 308 | yourself. 309 | 310 | Moreover, it is important to note that the offset position will be determined by your consumer configuration and 311 | whether or not you are saving offsets in Kafka itself, or an external location. If you need to manually reset or position 312 | the consumer offset in a particular partition, you can seek to it directly. Seeking will only work after partition assignment. 313 | For a subscription-based consumer, it is an error to seek before being assigned a partition. 314 | If you want to seek on assignmen for a subscription-based consumer, please do so using a callback to guarantee you 315 | have been assigned a valid partition. 316 | 317 | For per function documentation, please see the source for extensive comments, usage examples, etc. 318 | 319 | > Note: This implementation stresses a reasonable compromise between raw performance, extensibility, and usability, all things considered as: 320 | 321 | 1. A wrapper 322 | 2. Clojure 323 | 324 | Consumer options serve the following purposes: 325 | 326 | * Avoid repeated/inconvenient passing of defaults to various methods requiring options such as timeouts. Many consumers do not need per-call options. 327 | * Long-term extensibility as more features are added to this client, mitigating signature changes and excessive arities 328 | * Cheaper lookups and smaller memory footprint as the options are created in final form as records. 329 | * Dynamic construction of consumer options via stream processors, back-off logic, etc. 330 | * Reduction in garbage collection for consumers that do not need per-call options. Overall, less intermediate maps and reified objects. 331 | * Avoid slow memory allocations for the aforementioned cases. 332 | * Mitigate Kafka Java API changes. The API has often been in flux and sometimes it is necessary for extra options to handle weirdness from Java API bugs. 333 | 334 | > Note: Consumer options are distinct from the Kafka Consumer Configuration." 335 | ([config :- cs/ConsumerConfig] 336 | (make-consumer config nil)) 337 | ([config :- cs/ConsumerConfig 338 | options :- (s/maybe cs/ConsumerOptions)] 339 | (-> config 340 | ^Properties (config-codec/encode) 341 | (KafkaConsumer.) 342 | (FranzConsumer. (defaults/make-default-consumer-options options)))) 343 | ([config :- cs/ConsumerConfig 344 | key-deserializer :- Deserializer 345 | value-deserializer :- Deserializer] 346 | (make-consumer config key-deserializer value-deserializer nil)) 347 | ([config :- cs/ConsumerConfig 348 | key-deserializer :- Deserializer 349 | value-deserializer :- Deserializer 350 | options :- (s/maybe cs/ConsumerOptions)] 351 | (-> config 352 | ^Properties (config-codec/encode) 353 | (KafkaConsumer. key-deserializer value-deserializer) 354 | (FranzConsumer. (defaults/make-default-consumer-options options))))) 355 | -------------------------------------------------------------------------------- /src/franzy/clients/consumer/defaults.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.consumer.defaults 2 | (:require [schema.core :as s] 3 | [franzy.clients.consumer.schema :as cs] 4 | [franzy.clients.consumer.callbacks :as callbacks] 5 | [franzy.clients.consumer.types :as ct])) 6 | 7 | (s/defn default-consumer-options [] :- cs/ConsumerOptions 8 | "Default consumer options." 9 | {:consumer-records-fn seq 10 | :poll-timeout-ms 1000 11 | :offset-commit-callback (callbacks/offset-commit-callback) 12 | :rebalance-listener-callback (callbacks/no-op-consumer-rebalance-listener)}) 13 | 14 | (s/defn make-default-consumer-options :- cs/ConsumerOptions 15 | "Creates default consumer options, merging any provided options accordingly." 16 | ([] 17 | (make-default-consumer-options nil)) 18 | ([options :- (s/maybe cs/ConsumerOptions)] 19 | (ct/make-consumer-options (merge (default-consumer-options) options)))) 20 | -------------------------------------------------------------------------------- /src/franzy/clients/consumer/partitioners.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.consumer.partitioners 2 | (:require [franzy.clients.consumer.protocols :refer :all]) 3 | (:import 4 | (org.apache.kafka.clients.consumer.internals PartitionAssignor) 5 | (org.apache.kafka.clients.consumer RangeAssignor RoundRobinAssignor))) 6 | 7 | ;;Note: instead of doing this, we could probably do something better with https://github.com/ztellman/potemkin 8 | ;;For now, prefer not to add more complexity and dependencies unless it is really needed as implemented Java interfaces directly is fine in most cases. 9 | ;;Probably a macro could also replace this...thoughts? 10 | ;;This is here for a colleague for now... 11 | (defn make-partition-assignor 12 | "Convenience wrapper for implementors that prefer implementing a Clojure protocol (ex: FranzPartitionAssignor) 13 | that maps to the PartitionAssignor Java interface, rather than implementing the interface directly. 14 | It is recommended for performance to implement the Java PartitionAssignor interface directly, use gen-class, 15 | and any of the previous with a combination of extend-type, however this may be inconvenient in existing code-bases and 16 | in cases where it is more convenient to use a protocol to add some extra partition related methods." 17 | ^PartitionAssignor [partition-assignor] 18 | (reify 19 | PartitionAssignor 20 | (subscription [_ topics] 21 | (partition-subscription partition-assignor topics)) 22 | (assign [_ partitions-per-topic subscriptions] 23 | (assign-partition! partition-assignor partitions-per-topic subscriptions)) 24 | (onAssignment [_ assignment] 25 | (partition-assigned partition-assignor assignment)) 26 | (name [_] 27 | (partition-assignor-name partition-assignor)))) 28 | 29 | (defn range-assignor 30 | "The range assignor works on a per-topic basis. For each topic, we lay out the available partitions in numeric order and the consumers in lexicographic order. We then divide the number of partitions by the total number of consumers to determine the number of partitions to assign to each consumer. If it does not evenly divide, then the first few consumers will have one extra partition. 31 | 32 | For example, suppose there are two consumers C0 and C1, two topics t0 and t1, and each topic has 3 partitions, resulting in partitions: 33 | 34 | > t0p0, t0p1, t0p2, t1p0, t1p1, and t1p2. 35 | 36 | The assignment will be: 37 | 38 | > C0: [t0p0, t0p1, t1p0, t1p1] C1: [t0p2, t1p2]" 39 | ^RangeAssignor [] 40 | (RangeAssignor.)) 41 | 42 | (defn round-robin-assignor 43 | "The roundrobin assignor lays out all the available partitions and all the available consumers. 44 | It then proceeds to do a roundrobin assignment from partition to consumer. 45 | 46 | If the subscriptions of all consumer instances are identical, then the partitions will be uniformly distributed. 47 | (i.e., the partition ownership counts will be within a delta of exactly one across all consumers.) 48 | 49 | For example, suppose there are two consumers C0 and C1, two topics t0 and t1, and each topic has 3 partitions, 50 | resulting in partitions: 51 | 52 | > t0p0, t0p1, t0p2, t1p0, t1p1, and t1p2. 53 | 54 | The assignment will be: 55 | 56 | > C0: [t0p0, t0p2, t1p1] C1: [t0p1, t1p0, t1p2]" 57 | ^RoundRobinAssignor [] 58 | (RoundRobinAssignor.)) 59 | -------------------------------------------------------------------------------- /src/franzy/clients/consumer/protocols.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.consumer.protocols) 2 | 3 | (defprotocol KafkaConsumerRecords 4 | "Protocol for behaviors of Kafka result sets (ConsumerRecords)" 5 | (record-count [this]) 6 | (record-partitions [this]) 7 | (records-by-topic [this topic]) 8 | (records-by-topic-partition 9 | [this topic partition] 10 | [this topic-partition])) 11 | 12 | (defprotocol OffsetCommiter 13 | "Commits Kafka offsets, typically to Kafka itself or a highly reliable, fast datastore. 14 | For example, an implementor may choose with great reservation to commit to Zookeeper. Then divorce. 15 | One day someone will appreciate that you can commit." 16 | (commit-offsets-async! 17 | [this] 18 | [this opts] 19 | [this offsets opts]) 20 | (commit-offsets-sync! 21 | [this] 22 | [this offsets]) 23 | (committed-offsets [this topic-partition])) 24 | 25 | (defprotocol SeekableLog 26 | "Protocol for a log, such as Kafka that is positionally seekable." 27 | (next-offset [this topic-partition]) 28 | (seek-to-offset! [this topic-partition offset]) 29 | (seek-to-beginning-offset! [this topic-partitions]) 30 | (seek-to-end-offset! [this topic-partitions])) 31 | 32 | ;;Partitionable? 33 | (defprotocol PartitionAssignable 34 | "Capable of being assigned, and thus auditing assigned partitions." 35 | ;;Like a space captain, a captain in space. 36 | (assigned-partitions [this])) 37 | 38 | ;;TODO: these 2 protocols should really be either/or 39 | ;;For now, we wanted 1 consumer that given the choice per the Java Client rather than re-implementing things 40 | ;;and splitting things up. As such, these need different names to avoid collisions, but it should be noted they are functionally 41 | ;;very different with very different characteristics that may not be managable with the same # of functions. 42 | (defprotocol ManualPartitionAssignor 43 | "Manually assigns topic partitions to consumers." 44 | (assign-partitions! [this topic-partitions]) 45 | ;;see clear-subscriptions! commentary 46 | (clear-partition-assignments! [this])) 47 | 48 | (defprotocol AutomaticPartitionAssignor 49 | "Automatically assigns topic partitions to consumers." 50 | (subscribe-to-partitions! 51 | [this topics] 52 | [this topics opts]) 53 | (partition-subscriptions [this]) 54 | ;;essentially in the automatic and manual case, these do the same things due to the Java client, 55 | ;;however in practice, manually assigning and subscribing might require very different semantics 56 | ;;there may be a future method introduced into the Java client to separate these due to some discussions, thus we separate here 57 | (clear-subscriptions! [this])) 58 | 59 | (defprotocol FranzyConsumer 60 | "Protocol for implementing a Kafka consumer. 61 | 62 | For more details regarding Kafka Consumers, see: 63 | https://kafka.apache.org/090/javadoc/org/apache/kafka/clients/consumer/Consumer.html 64 | 65 | For an example of the Java implementation of this interface for Kafka 0.9 and above, see: 66 | https://kafka.apache.org/090/javadoc/index.html?org/apache/kafka/clients/consumer/KafkaConsumer.html" 67 | (pause! [this topic-partitions]) 68 | (poll! 69 | [this] 70 | [this opts]) 71 | (resume! [this topic-partitions]) 72 | (wakeup! [this]) 73 | ;;via ICloseable instead of shutdown 74 | ;(close [this]) 75 | ) 76 | 77 | (defprotocol FranzPartitionAssignor 78 | "Protocol used for implementors that need a specialized algorithm for assigning partitions to Kafka. 79 | Example strategies include by range or round-robin. 80 | See franzy.clients.consumer.partitioners examples." 81 | (partition-subscription [this topics]) 82 | (assign-partition! [this partitions-per-topic subscriptions]) 83 | (partition-assigned [this assignment]) 84 | (partition-assignor-name [this])) 85 | -------------------------------------------------------------------------------- /src/franzy/clients/consumer/results.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.consumer.results 2 | "Kafka result set (ConsumerRecords) implementations." 3 | (:require [franzy.clients.codec :as codec] 4 | [franzy.clients.consumer.protocols :as proto]) 5 | (:import (clojure.lang IReduceInit Seqable) 6 | (org.apache.kafka.clients.consumer ConsumerRecords))) 7 | 8 | (defn consumer-records 9 | "Implementation of a Kafka result set. 10 | As Kafka result sets are stateful and provide various behaviors as well as shapes of information, it is not suitable for many advanced clients to receive only maps as results. 11 | 12 | This implementation guarantees the following: 13 | 14 | * Lazy/Non-lazy chunked/unchunked access to results from Kafka, with optional transducers applied without excessive intermediate objects. 15 | * Full fidelity of the results returned from Kafka (by topic, partition, all, record count, and future additions from the Java side). Nothing lost, much gained. 16 | * Ability to slice records via transducer or by calling built-in functions to slice on topic or topic partition. 17 | * Preservation of the result type from Kafka. No inadvetent consumption of iterators or eagerly realizing things if not desired. 18 | * Ability of sequence operations to be applied to result set via Seqable, and return only Clojure types consistent with the rest of the API. 19 | * Ability to reduce the result set itself in a high performance way via IReduceInit, and return only Clojure types consistent with the rest of the API. 20 | * Frees client implementations, testing, etc. from dealing wtih this behavior - no complecting the client implementation with handling the result set behavior." 21 | [^ConsumerRecords records] 22 | (reify 23 | proto/KafkaConsumerRecords 24 | (record-count [_] 25 | "Counts the number of records in the topic" 26 | (.count records)) 27 | (record-partitions [_] 28 | "Lists the topic partitions present in the results." 29 | (->> 30 | (.partitions records) 31 | (codec/decode))) 32 | (records-by-topic [_ topic] 33 | "Returns consumer records by topic name." 34 | (->> (.records records ^String topic) 35 | (codec/lazy-consumer-records) 36 | (map codec/decode))) 37 | (records-by-topic-partition [_ topic partition] 38 | "Returns consumer records from a specific topic partition. 39 | 40 | > Note: The results are realized eagerly." 41 | (->> (codec/map->topic-partition topic partition) 42 | (.records records) 43 | (codec/decode))) 44 | (records-by-topic-partition [this {:keys [topic partition]}] 45 | (proto/records-by-topic-partition this topic partition)) 46 | Seqable 47 | (seq [_] 48 | "Creates a lazy, seqable compliant wrapper around the results and ensures the results are mapped to the correct type." 49 | (->> records 50 | (codec/lazy-consumer-records) 51 | (map codec/decode))) 52 | IReduceInit 53 | (reduce [_ f init] 54 | "Allows the results to be reduced and mapped efficiently to the correct type." 55 | ;;TODO: iterator-seq or just use java iterator? Need to make sure this is safe and may not want to hang on to this iterator here. 56 | (when-let [iter (.iterator records)] 57 | (loop [ret init] 58 | (if (.hasNext iter) 59 | (let [record (->> (.next iter) 60 | (codec/decode)) 61 | ret (f ret record)] 62 | (if (reduced? ret) 63 | @ret 64 | (recur ret))) 65 | ret)))))) 66 | -------------------------------------------------------------------------------- /src/franzy/clients/consumer/schema.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.consumer.schema 2 | "Schemas for Kafka Consumers and related types. 3 | 4 | For some context, see http://kafka.apache.org/090/javadoc/org/apache/kafka/clients/consumer/package-frame.html" 5 | (:require [schema.core :as s] 6 | [franzy.common.schema :as fs] 7 | [franzy.common.models.schema :as fms]) 8 | (:import (org.apache.kafka.clients.consumer OffsetResetStrategy OffsetCommitCallback ConsumerRebalanceListener) 9 | (java.nio ByteBuffer))) 10 | 11 | ;;TODO: cleaner enum handling 12 | (def OffsetResetStrategyEnum 13 | "Schema for a Kafka Offset Reset Strategy 14 | 15 | See http://kafka.apache.org/090/javadoc/org/apache/kafka/clients/consumer/OffsetResetStrategy.html" 16 | ;;TODO: switch back to keywords via coercer + java.util.EnumSet/allOf per enum 17 | ;;LATEST, EARLIEST, NONE - must be lower, roar my lion! 18 | ;;Note the Java implementation of this enum is incomplete (roar), standard tricks don't apply based on what the app is expecting 19 | (apply s/enum (map (comp keyword str clojure.string/lower-case) (OffsetResetStrategy/values)))) 20 | 21 | (def ConsumerOptions 22 | "Schema for options for a Kafka Franzy-specific consumer." 23 | {(s/optional-key :consumer-records-fn) fs/Function ;;TODO: more restrictive check? 24 | (s/optional-key :poll-timeout-ms) fs/SPosInt 25 | (s/optional-key :offset-commit-callback) (s/maybe OffsetCommitCallback) 26 | (s/optional-key :rebalance-listener-callback) (s/maybe ConsumerRebalanceListener)}) 27 | 28 | (def ConsumerConfig 29 | "Schema for a Kafka Consumer configuration, passed as properties to Kafka. 30 | 31 | See http://kafka.apache.org/documentation.html#consumerconfigs" 32 | {(s/required-key :bootstrap.servers) fs/NonEmptyStringOrStringList ;;TODO: more strict schema 33 | (s/optional-key :key.deserializer) s/Str 34 | (s/optional-key :value.deserializer) s/Str 35 | (s/optional-key :fetch.min.bytes) fs/PosInt 36 | (s/optional-key :group.id) s/Str 37 | (s/optional-key :heartbeat.interval.ms) fs/PosInt 38 | (s/optional-key :max.partition.fetch.bytes) fs/PosInt 39 | (s/optional-key :session.timeout.ms) fs/SPosInt 40 | (s/optional-key :ssl.key.password) s/Str 41 | (s/optional-key :ssl.keystroke.location) s/Str 42 | (s/optional-key :ssl.keystore.password) s/Str 43 | (s/optional-key :ssl.truststore.location) s/Str 44 | (s/optional-key :ssl.truststore.password) s/Str 45 | (s/optional-key :auto.offset.reset) OffsetResetStrategyEnum 46 | (s/optional-key :connections.max.idle.ms) fs/SPosLong 47 | (s/optional-key :enable.auto.commit) s/Bool 48 | (s/optional-key :partition.assignment.strategy) fs/StringOrStringList 49 | (s/optional-key :receive.buffer.bytes) fs/SPosInt 50 | (s/optional-key :request.timeout.ms) fs/SPosInt 51 | (s/optional-key :sasl.kerberos.service.name) s/Str 52 | (s/optional-key :security.protocol) fms/SecurityProtocolEnum 53 | (s/optional-key :send.buffer.bytes) fs/SPosInt 54 | (s/optional-key :ssl.enabled.protocols) fs/StringOrStringList 55 | (s/optional-key :ssl.keystore.type) s/Str 56 | (s/optional-key :ssl.protocol) s/Str 57 | (s/optional-key :ssl.provider) s/Str 58 | (s/optional-key :ssl.truststore.type) s/Str 59 | (s/optional-key :auto.commit.interval.ms) fs/SPosLong 60 | (s/optional-key :check.crcs) s/Bool 61 | (s/optional-key :client.id) s/Str 62 | (s/optional-key :fetch.max.wait.ms) fs/PosInt 63 | (s/optional-key :metadata.max.age.ms) fs/SPosLong 64 | (s/optional-key :metric.reporters) fs/StringOrStringList 65 | (s/optional-key :metric.num.samples) fs/PosInt 66 | (s/optional-key :metrics.sample.window.ms) fs/SPosLong 67 | (s/optional-key :reconnect.backoff.ms) fs/SPosLong 68 | (s/optional-key :retry.backoff.ms) fs/SPosLong 69 | (s/optional-key :sasl.kerberos.kinit.cmd) s/Str 70 | (s/optional-key :sasl.kerberos.min.time.before.relogin) fs/SPosLong 71 | (s/optional-key :sasl.kerberos.ticket.renew.jitter) fs/SPosDouble 72 | (s/optional-key :sasl.kerberos.ticket.renew.window.factor) fs/SPosDouble 73 | (s/optional-key :ssl.cipher.suites) fs/StringOrStringList 74 | (s/optional-key :ssl.endpoint.identification.algorithm) s/Str 75 | (s/optional-key :ssl.keymanager.algorithm) s/Str 76 | (s/optional-key :ssl.trustmanager.algorithm) s/Str}) 77 | 78 | (def ConsumerRecord 79 | "Schema for a Kafka Consumer Record. 80 | 81 | See http://kafka.apache.org/090/javadoc/org/apache/kafka/clients/consumer/ConsumerRecord.html" 82 | {(s/required-key :topic) fs/NonEmptyString 83 | (s/required-key :partition) fs/SPosInt 84 | (s/required-key :offset) fs/SPosLong 85 | (s/required-key :key) fs/AnyButNil 86 | (s/required-key :value) fs/AnyButNil}) 87 | 88 | ;;TODO: more strict 89 | (def ConsumerRebalanceListenerCallbackFn 90 | "Schema for a Kafka Consumer Rebalance Listener Callback. 91 | 92 | See http://kafka.apache.org/090/javadoc/org/apache/kafka/clients/consumer/ConsumerRebalanceListener.html" 93 | (s/make-fn-schema s/Any [[s/Any s/Any]])) 94 | 95 | ;;checks for the right keys, but the fn schema is purely descriptive per schema docs. meh. Probably can be fixed. 96 | (def ConsumerRebalanceListenerCallbacks 97 | "Schema for creating consumer rebalance callbacks from clojure functions." 98 | {(s/optional-key :partitions-assigned-fn) ConsumerRebalanceListenerCallbackFn 99 | (s/optional-key :partitions-revoked-fn) ConsumerRebalanceListenerCallbackFn}) 100 | 101 | (def ConsumerRebalanceListenerCallback 102 | "Schema for a consumer rebalance callback." 103 | (s/pred (partial instance? ConsumerRebalanceListener) 'consumer-rebalance-listener?)) 104 | 105 | (def OffsetAndMetadata 106 | "Schema for Kafka offset commit metadata. 107 | 108 | See http://kafka.apache.org/090/javadoc/org/apache/kafka/clients/consumer/OffsetAndMetadata.html" 109 | {(s/required-key :offset) fs/SPosLong 110 | (s/required-key :metadata) (s/maybe s/Str)}) 111 | 112 | ;;TODO: possibly deprecate as this really needs to be an object with methods, not just data - keeping for now to use to return partition assignment data 113 | (def PartitionAssignment 114 | "Schema for a Kafka partition assignment." 115 | {(s/required-key :topics) (s/maybe fs/StringOrStringList) 116 | (s/required-key :user-data) (s/maybe ByteBuffer)}) 117 | -------------------------------------------------------------------------------- /src/franzy/clients/consumer/types.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.consumer.types 2 | (:require [schema.core :as s] 3 | [franzy.clients.consumer.schema :as cs])) 4 | 5 | (defrecord ConsumerOptions 6 | [consumer-records-fn poll-timeout-ms offset-commit-callback rebalance-listener-callback]) 7 | 8 | (s/defn make-consumer-options :- cs/ConsumerOptions 9 | [m] 10 | (map->ConsumerOptions m)) 11 | 12 | (defrecord ConsumerRecord 13 | [topic partition offset key value]) 14 | 15 | (s/defn make-consumer-record [m] :- cs/ConsumerRecord 16 | (map->ConsumerRecord m)) 17 | 18 | (defrecord PartitionAssignment 19 | [topics user-data]) 20 | 21 | (s/defn make-partition-assignment :- cs/PartitionAssignment 22 | [m] 23 | (map->PartitionAssignment m)) 24 | 25 | (defrecord OffsetMetadata 26 | [offset metadata]) 27 | 28 | (s/defn make-offset-metadata :- cs/OffsetAndMetadata 29 | [m] 30 | (map->OffsetMetadata m)) 31 | -------------------------------------------------------------------------------- /src/franzy/clients/partitions.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.partitions 2 | "Helpers and useful functions for working with topic partitions." 3 | (:require [franzy.common.models.types :as mt])) 4 | 5 | (defn mock-partition-info 6 | "Creates mock partition info for use with a mock cluster based on a collection of topic partition maps/records." 7 | [topic-partitions] 8 | (map (fn [topic-partition] 9 | {:topic (:topic topic-partition) 10 | :partition (:partition topic-partition) 11 | :leader {:host "127.0.0.1" :id 1 :port 2181} 12 | :replicas [{:host "127.0.0.1" :id 1 :port 2181}] 13 | :in-sync-replicas [{:host "127.0.0.1" :id 1 :port 2181}] 14 | }) topic-partitions)) 15 | 16 | (defn topic-partition-range 17 | "Creates a linear amount of topic partitions based on a given topic and partition count." 18 | [topic partitions] 19 | (map (fn [n] (mt/->TopicPartition topic n)) (range 0 partitions))) 20 | 21 | (defn topics-from-partitions 22 | "Creates a set of all the topics found in a collection of topic partitions." 23 | [topic-partitions] 24 | (into #{} (map :topic) topic-partitions)) 25 | 26 | (defn topic-partition-info->topic-partition 27 | "Creates a topic partition from partition info." 28 | [topic-partition-info] 29 | (select-keys topic-partition-info [:topic :partition])) -------------------------------------------------------------------------------- /src/franzy/clients/producer/callbacks.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.producer.callbacks 2 | (:require [franzy.clients.codec :as codec]) 3 | (:import (org.apache.kafka.clients.producer Callback))) 4 | 5 | ;;deftype for debugging/meta purposes 6 | (deftype NoOpSendCallback [] 7 | Callback 8 | (onCompletion [_ _ _])) 9 | 10 | (defn ^NoOpSendCallback no-op-send-callback [] 11 | "Creates a no-op send callback, for testing, defaults, etc." 12 | (NoOpSendCallback.)) 13 | 14 | (defn send-callback 15 | "Creates a Kafka Java compatible callback for use with a producer send function. 16 | 17 | The callback will execute when the request is complete. 18 | This callback will generally execute in the background I/O thread so it should be fast, taking minimal time to execute. 19 | 20 | You may pass a Clojure function to create this callback, however it must be of 2 arity. 21 | The first argument will be record metadata as a map (converted from Java), and the second argument will be an exception. 22 | Your callback will receive one or the other as a value and should respond accordingly. 23 | 24 | Example: 25 | (send-callback (fn naming-me-might-help-debug [record-metadata e] 26 | (println \"Record metadata:\" record-metadata) 27 | (println \"Kafka said no, here's why:\" e)) 28 | 29 | See https://kafka.apache.org/090/javadoc/org/apache/kafka/clients/producer/Callback.html for more details." 30 | (^Callback [] 31 | "Creates a no-op callback." 32 | (no-op-send-callback)) 33 | (^Callback [send-callback-fn] 34 | (reify Callback 35 | (onCompletion [_ record-metadata exception] 36 | (println "calling send callback...") 37 | (send-callback-fn (codec/decode record-metadata) exception)))) 38 | (^Callback [record-metadata-fn exception-handler-fn] 39 | "Takes 2 functions, 1 to process record metadata, and another to process exceptions. 40 | This is a convenience function for those that prefer to separately handle record metadata and exceptions. 41 | You may alternatively prefer the 1-arity version and a function that closes over 2 functions." 42 | (reify Callback 43 | (onCompletion [_ record-metadata exception] 44 | (when record-metadata 45 | (record-metadata-fn (codec/decode record-metadata))) 46 | ;;rather than an if, an extra when to be a bit more bullet-proof because this was a bug for at least 1 build of Kafka 47 | ;;normally, these should be mutually exclusive, but trust is for the young 48 | (when exception 49 | (exception-handler-fn exception)))))) 50 | 51 | -------------------------------------------------------------------------------- /src/franzy/clients/producer/client.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.producer.client 2 | (:require [schema.core :as s] 3 | [franzy.clients.producer.schema :as ps] 4 | [franzy.common.configuration.codec :as config-codec] 5 | [franzy.clients.codec :as codec] 6 | [franzy.common.metadata.protocols :refer [KafkaMeasurable PartitionMetadataProvider]] 7 | [franzy.clients.producer.protocols :refer :all] 8 | [franzy.common.async.wrappers :as async-wrappers] 9 | [franzy.clients.producer.defaults :as defaults]) 10 | (:import (org.apache.kafka.clients.producer KafkaProducer Callback Producer) 11 | (org.apache.kafka.common.serialization Serializer) 12 | (java.util Properties) 13 | (java.io Closeable))) 14 | 15 | (deftype FranzProducer 16 | [^Producer producer producer-options] 17 | FranzyProducer 18 | (flush! [_] 19 | "Invoking this function makes all buffered records immediately available to send (even if linger.ms is greater than 0) and blocks on the completion of the requests associated with these records." 20 | (.flush producer)) 21 | (send-async! [this m] 22 | (send-async! this m nil)) 23 | (send-async! [this {:keys [topic partition key value]} options] 24 | {:pre [(not (nil? topic)) 25 | (not (nil? value))]} 26 | (send-async! this topic partition key value options)) 27 | (send-async! [_ topic partition k v {:keys [send-callback] 28 | :or {send-callback (:send-callback producer-options)}}] 29 | "Asynchronously sends a record to a topic, and invokes the provided callback when the send has been acknowledged. 30 | 31 | You must provide a topic, partition, key, and value. Keys are optional for some serializers that can automatically 32 | create a key for you. This behavior is strongly discouraged. 33 | You should always specify your keys if possible to be explicit about your intentions. 34 | Failure to provide a key for many serializers, example binary-based will results in a CRC error if CRC checking is enabled. 35 | 36 | You may provide a send-callback either via the ProducerOptions or by passing it in the options map at the call-site. 37 | You can use `(callbacks/send-callback my-2-arity-callback)` to create a callback from a 2-arity Clojure function. 38 | Alternatively, you can implement your own callback via the Java type. 39 | You are strongly discouraged from creating new callbacks every call, rather deftype, defrecord, cache, bind, close over, or otherwise store your callback. 40 | For more information on callbacks, see franzy.clients.producer.callbacks 41 | 42 | Kafka sends asynchronously. This function will return immediately once the record has been stored in the buffer of records waiting to be sent. 43 | This allows sending many records in parallel without blocking to wait for the response after each one. 44 | 45 | The result of the send is a map constructed from Kafka RecordMetadata specifying the partition the record was sent to and the offset it was assigned. 46 | This map is available via the send function callback. 47 | If a send callback function is specified, the callback will be invoked with the map of RecordMetadata and/or any exception if present. 48 | 49 | The RecordMetadata will be returned as a map, for example: 50 | {:topic \"80s-movies\" :partition 0 :offset 1024} 51 | 52 | Since the send call is asynchronous it returns a Future for the RecordMetadata that will be assigned to this record. 53 | Dereferencing the future will block, returning the record metadata map. 54 | 55 | For more information, please see: https://kafka.apache.org/090/javadoc/org/apache/kafka/clients/producer/KafkaProducer.html#send(org.apache.kafka.clients.producer.ProducerRecord,%20org.apache.kafka.clients.producer.Callback)" 56 | (-> producer 57 | (.send (codec/map->producer-record topic partition k v) ^Callback send-callback) 58 | (async-wrappers/wrap-future codec/decode))) 59 | (send-sync! [this m] 60 | (send-sync! this m nil)) 61 | (send-sync! [this m options] 62 | "Blocking version of `(send-async! p)`. 63 | 64 | Return when the associated send future completes. 65 | 66 | > Note: If you have enabled any callbacks via the producer options, they will be invoked asynchronously when the send completes. 67 | Be aware that this may produce unwanted side-effects if you are running the same logic synchronously after this call completes. 68 | Either don't set the producer send-callback option, or pass {:send-callback nil} to override it in these cases." 69 | (deref (send-async! this m options))) 70 | (send-sync! [this topic partition k v options] 71 | (deref (send-async! this topic partition k v options))) 72 | (close [_ {:keys [close-timeout close-time-unit] 73 | :or {close-timeout (:close-timeout producer-options) 74 | close-time-unit (:close-time-unit producer-options)}}] 75 | (.close producer close-timeout close-time-unit)) 76 | Closeable 77 | (close [_] 78 | (.close producer)) 79 | PartitionMetadataProvider 80 | (partitions-for [_ topic] 81 | "Get metadata about the partitions for a given topic." 82 | (->> topic 83 | (.partitionsFor producer) 84 | (codec/decode))) 85 | KafkaMeasurable 86 | (metrics [_] 87 | (->> 88 | (.metrics producer) 89 | (codec/decode)))) 90 | 91 | (s/defn make-producer :- FranzProducer 92 | "Create a Kafka Producer from a configuration, with optional serializers and optional producer options. 93 | If a callback is given, call it when stopping the consumer. 94 | If deserializers are provided, use them, otherwise expect deserializers via class name in the config map. 95 | 96 | This producer implementation wraps the Kafka Java Producer API. 97 | It provides a Clojure (ish) wrapper, with Clojure data structures to/from Kafka, 98 | and implements various protocols to allow more specialized consumers following this implementation. 99 | If you prefer a lower-level implementation or wish to test your producer, you may wish to browse this implementation 100 | and implement one or all the protocols provided. 101 | 102 | For per function documentation, please see the source for extensive comments, usage examples, etc. 103 | 104 | > Note: This implementation stresses a reasonable compromise between raw performance, extensibility, and usability, all things considered as: 105 | 106 | 1. A wrapper 107 | 2. Clojure 108 | 109 | Producer options serve the following purposes: 110 | 111 | * Avoid repeated/inconvenient passing of defaults to various methods requiring options such as timeouts. Many producers do not need per-call options. 112 | * Long-term extensibility as more features are added to this client, mitigating signature changes and excessive arities 113 | * Cheaper lookups and smaller memory footprint as the options are created in final form as records. 114 | * Dynamic construction of producer options via stream processors, back-off logic, etc. 115 | * Reduction in garbage collection for producers that do not need per-call options. Overall, less intermediate maps and reified objects. 116 | * Avoid slow memory allocations for the aforementioned cases. 117 | * Mitigate Kafka Java API changes. The API has often been in flux and sometimes it is necessary for extra options to handle weirdness from Java API bugs. 118 | 119 | > Note: Consumer options are distinct from the Kafka Consumer Configuration." 120 | ([config :- ps/ProducerConfig] 121 | (make-producer config nil)) 122 | ([config :- ps/ProducerConfig 123 | options :- (s/maybe ps/ProducerOptions)] 124 | (-> config 125 | ^Properties (config-codec/encode) 126 | (KafkaProducer.) 127 | (FranzProducer. (defaults/make-default-producer-options options)))) 128 | ([config :- ps/ProducerConfig 129 | key-serializer :- Serializer 130 | value-serializer :- Serializer] 131 | (make-producer config key-serializer value-serializer nil)) 132 | ([config :- ps/ProducerConfig 133 | key-serializer :- Serializer 134 | value-serializer :- Serializer 135 | options :- (s/maybe ps/ProducerOptions)] 136 | (-> config 137 | ^Properties (config-codec/encode) 138 | (KafkaProducer. key-serializer value-serializer) 139 | (FranzProducer. (defaults/make-default-producer-options options))))) 140 | -------------------------------------------------------------------------------- /src/franzy/clients/producer/defaults.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.producer.defaults 2 | (:require [schema.core :as s] 3 | [franzy.clients.producer.schema :as ps] 4 | [franzy.clients.producer.types :as pt] 5 | [franzy.clients.producer.callbacks :as callbacks]) 6 | (:import (java.util.concurrent TimeUnit))) 7 | 8 | (s/defn default-producer-options [] :- ps/ProducerOptions 9 | "Default producer options." 10 | {:close-timeout 3000 11 | :close-timeout-unit TimeUnit/MILLISECONDS 12 | :send-callback (callbacks/send-callback)}) 13 | 14 | (s/defn make-default-producer-options :- ps/ProducerOptions 15 | "Creates default producer options, mergining any provided options accordingly." 16 | ([] 17 | (make-default-producer-options nil)) 18 | ([options :- (s/maybe ps/ProducerOptions)] 19 | (pt/make-producer-options (merge (default-producer-options) options)))) 20 | -------------------------------------------------------------------------------- /src/franzy/clients/producer/partitioners.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.producer.partitioners 2 | "Partitioners used to partition topic partitions. 3 | 4 | Partitioners should implement the `Partitioner` interface directly to be used with Kafka configurations." 5 | (:require [franzy.clients.partitions :as partitions] 6 | [franzy.clients.cluster :as cluster]) 7 | (:import (org.apache.kafka.clients.producer.internals DefaultPartitioner) 8 | (org.apache.kafka.clients.producer Partitioner) 9 | (org.apache.kafka.common.serialization Serializer))) 10 | 11 | (defn make-partitioner [partitioner-fn close-fn] 12 | "Simple wrapper, usually used for testing to create a custom topic partitioner on-demand. 13 | 14 | For real partitioner implementations, prefer to implement via deftype, defrecord, or gen-class." 15 | (reify 16 | Partitioner 17 | (partition [_ topic key key-bytes value value-bytes cluster] 18 | (partitioner-fn topic key key-bytes value value-bytes cluster)) 19 | (close [_] (close-fn)))) 20 | 21 | (defn default-partitioner [] 22 | "Creates a default partitioner for partitioning topics. 23 | 24 | This is the default implementation used by Kafka. 25 | 26 | Useful for defaults, swapping partitioning implementations, or testing." 27 | (DefaultPartitioner.)) 28 | 29 | (defn calculate-partition 30 | "Calculates the hypothetical partition for a given topic of n partitions and value to partition, with an optional key. 31 | 32 | The calculate partition is dependent on the serializer used and the partitioning algorithm, specificed as a partitioner 33 | interface implementor. If no partitioner is provided, the default partitioner is assumed. 34 | 35 | This function allows a deterministic way of figuring out which partition your data will go to from a given 36 | producer input, even when disconnected from Kafka. 37 | 38 | Note that the partition will change for partitioners that depend on some random runtime state. If this is the case, 39 | ensure you pass the partitioner itself with any required state inside it so your results are reproducible. 40 | 41 | Additionally, some partitioners may not return the same result in the future if the number of partitions increases. 42 | Be mindful of the partitioning algorithm. 43 | 44 | Most good partitioners should return predictable results, however since the implementation is oopen, there is no guarantee." 45 | ([{:keys [topic key value]} partitions key-serializer value-serializer] 46 | (calculate-partition topic key value partitions key-serializer value-serializer nil)) 47 | ([{:keys [topic key value]} partitions key-serializer value-serializer partitioner] 48 | (calculate-partition topic key value partitions key-serializer value-serializer partitioner)) 49 | ([^String topic key value partitions ^Serializer key-serializer ^Serializer value-serializer ^Partitioner partitioner] 50 | (let [key-bytes (.serialize key-serializer topic key) 51 | value-bytes (.serialize value-serializer topic value) 52 | cluster (cluster/mock-cluster 1 (partitions/topic-partition-range topic partitions) #{})] 53 | (.partition (or partitioner (default-partitioner)) topic key key-bytes value value-bytes cluster)))) -------------------------------------------------------------------------------- /src/franzy/clients/producer/protocols.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.producer.protocols) 2 | 3 | (defprotocol FranzyProducer 4 | "Protocol for implementing a Kafka Producer." 5 | (flush! [producer]) 6 | (send-async! 7 | [this m] 8 | [this m opts] 9 | [this topic partition k v opts]) 10 | (send-sync! 11 | [this m] 12 | [this m options] 13 | [this topic partition k v opts]) 14 | (close 15 | ;[this] 16 | [this opts])) 17 | -------------------------------------------------------------------------------- /src/franzy/clients/producer/schema.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.producer.schema 2 | "Schemas for Kafka Producers and related types. 3 | 4 | For some context, see http://kafka.apache.org/090/javadoc/org/apache/kafka/clients/producer/package-frame.html" 5 | (:require [schema.core :as s] 6 | [franzy.common.schema :as fs] 7 | [franzy.common.models.schema :as fms]) 8 | (:import (org.apache.kafka.clients.producer Callback))) 9 | 10 | ;;;TODO: more restrictive schema 11 | ;(def ProducerCallback 12 | ; (s/make-fn-schema s/Any [s/Any s/Any])) 13 | 14 | ;;TODO: more restrictive schema, callback schema 15 | (def ProducerOptions 16 | "Schema for a Franzy-specific Kafka Producer." 17 | {(s/optional-key :close-timeout) fs/SPosInt 18 | (s/optional-key :close-timeout-unit) fms/TimeUnitEnum 19 | (s/optional-key :send-callback) Callback}) 20 | 21 | ;;TODO: more restrictive schema 22 | (def ProducerConfig 23 | "Schema for a Kafka Producer Configuration. 24 | 25 | http://kafka.apache.org/documentation.html#producerconfigs" 26 | {(s/required-key :bootstrap.servers) fs/NonEmptyStringOrStringList ;;TODO: more strict schema 27 | (s/optional-key :key.serializer) s/Str 28 | (s/optional-key :value.serializer) s/Str 29 | (s/optional-key :acks) fms/KafkaAck 30 | (s/optional-key :buffer.memory) fs/SPosLong 31 | (s/optional-key :compression.type) s/Str 32 | (s/optional-key :retries) fs/SPosInt 33 | (s/optional-key :ssl.key.password) s/Str 34 | (s/optional-key :ssl.keystroke.location) s/Str 35 | (s/optional-key :ssl.keystore.password) s/Str 36 | (s/optional-key :ssl.truststore.location) s/Str 37 | (s/optional-key :ssl.truststore.password) s/Str 38 | (s/optional-key :batch.size) fs/SPosInt 39 | (s/optional-key :client.id) s/Str 40 | (s/optional-key :connections.max.idle.ms) fs/SPosLong 41 | (s/optional-key :linger.ms) fs/SPosLong 42 | (s/optional-key :max.block.ms) fs/SPosLong 43 | (s/optional-key :max.request.size) fs/SPosInt 44 | (s/optional-key :partitioner.class) s/Str 45 | (s/optional-key :receive.buffer.bytes) fs/SPosInt 46 | (s/optional-key :request.timeout.ms) fs/SPosInt 47 | (s/optional-key :sasl.kerberos.service.name) s/Str 48 | (s/optional-key :security.protocol) fms/SecurityProtocolEnum 49 | (s/optional-key :send.buffer.bytes) fs/SPosInt 50 | (s/optional-key :ssl.enabled.protocols) fs/StringOrStringList 51 | (s/optional-key :ssl.keystore.type) s/Str 52 | (s/optional-key :ssl.protocol) s/Str 53 | (s/optional-key :ssl.provider) s/Str 54 | (s/optional-key :ssl.truststore.type) s/Str 55 | (s/optional-key :timeout.ms) fs/SPosInt 56 | (s/optional-key :block.on.buffer.full) s/Bool 57 | (s/optional-key :max.in.flight.requests.per.connection) fs/PosInt 58 | (s/optional-key :metadata.fetch.timeout.ms) fs/SPosLong 59 | (s/optional-key :metadata.max.age.ms) fs/SPosLong 60 | (s/optional-key :metric.reporters) fs/StringOrStringList 61 | (s/optional-key :metric.num.samples) fs/PosInt 62 | (s/optional-key :metrics.sample.window.ms) fs/SPosLong 63 | (s/optional-key :reconnect.backoff.ms) fs/SPosLong 64 | (s/optional-key :retry.backoff.ms) fs/SPosLong 65 | (s/optional-key :sasl.kerberos.kinit.cmd) s/Str 66 | (s/optional-key :sasl.kerberos.min.time.before.relogin) fs/SPosLong 67 | (s/optional-key :sasl.kerberos.ticket.renew.jitter) fs/SPosDouble 68 | (s/optional-key :sasl.kerberos.ticket.renew.window.factor) fs/SPosDouble 69 | (s/optional-key :ssl.cipher.suites) fs/StringOrStringList 70 | (s/optional-key :ssl.endpoint.identification.algorithm) s/Str 71 | (s/optional-key :ssl.keymanager.algorithm) s/Str 72 | (s/optional-key :ssl.trustmanager.algorithm) s/Str}) 73 | 74 | (def ProducerRecord 75 | "Schema for a Kafka Producer Record. 76 | 77 | See http://kafka.apache.org/090/javadoc/org/apache/kafka/clients/producer/ProducerRecord.html" 78 | {(s/required-key :topic) fs/NonEmptyString 79 | ;Optional for now, but always need to check and set it to zero in that case. 80 | ;I prefer explicit behavior of which partition to send data to and probably an awful idea to default data to a partition in a system like Kafka 81 | (s/optional-key :partition) fs/SPosInt 82 | ;Optional, but a really bad idea to omit, unless using a string-based for a key. Kafka itself considered axing this behavior 83 | ;Making key required though might break a lot of existing code. Make required? TBD... 84 | (s/optional-key :key) fs/AnyButNil 85 | (s/required-key :value) fs/AnyButNil}) 86 | 87 | (def RecordMetadata 88 | "Schema for Kafka Record Metadata 89 | 90 | See http://kafka.apache.org/090/javadoc/org/apache/kafka/clients/producer/RecordMetadata.html" 91 | {(s/required-key :topic) fs/NonEmptyString 92 | (s/required-key :partition) fs/SPosInt 93 | (s/required-key :offset) fs/SPosLong}) 94 | -------------------------------------------------------------------------------- /src/franzy/clients/producer/types.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.producer.types 2 | (:require [schema.core :as s] 3 | [franzy.clients.producer.schema :as ps]) 4 | (:import (java.util.concurrent TimeUnit))) 5 | 6 | (defrecord ProducerOptions 7 | [close-timeout close-timeout-unit send-callback]) 8 | 9 | (s/defn make-producer-options :- ps/ProducerOptions 10 | "Creates a producer options record. 11 | 12 | The default close-timeout-unit if not provided is milliseconds." 13 | ([m] 14 | (map->ProducerOptions m)) 15 | ([close-timeout send-callback] 16 | (make-producer-options close-timeout TimeUnit/MILLISECONDS send-callback)) 17 | ([close-timeout close-timeout-unit send-callback] 18 | (->ProducerOptions close-timeout (or close-timeout-unit TimeUnit/MILLISECONDS) send-callback))) 19 | 20 | (defrecord RecordMetadata 21 | [topic partition offset]) 22 | 23 | (s/defn make-record-metadata :- ps/RecordMetadata 24 | "Creates a record metadata record." 25 | ([m] 26 | (map->RecordMetadata m)) 27 | ([topic partition offset] 28 | (->RecordMetadata topic partition offset))) 29 | 30 | (defrecord ProducerRecord 31 | [topic partition key value]) 32 | 33 | (s/defn make-producer-record :- ps/ProducerRecord 34 | "Creates a producer record (record). 35 | 36 | You must provide one of the following: 37 | 38 | * Topic and Value - Will use partitioner in producer config to decide which partition. 39 | * Topic, Partition, Key, Value - Will use explicit arguments. 40 | * Topic, Key, Value - Will use the key and partitioner to decide which partition." 41 | ([m] 42 | (map->ProducerRecord m)) 43 | ([topic value] 44 | (make-producer-record topic nil nil value)) 45 | ([topic key value] 46 | (make-producer-record topic nil key value)) 47 | ([topic partition key value] 48 | (->ProducerRecord topic partition key value))) 49 | 50 | -------------------------------------------------------------------------------- /src/franzy/serialization/deserializers.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.serialization.deserializers 2 | "Implementations of core Apache Kafka and Franzy deserializers. 3 | For more serializers, see Franzy docs." 4 | (:require [clojure.edn :as edn]) 5 | (:import (org.apache.kafka.common.serialization Deserializer StringDeserializer LongDeserializer IntegerDeserializer ByteArrayDeserializer) 6 | (java.io PushbackReader ByteArrayInputStream))) 7 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 8 | ;; Implementors - Please read notes in serializers.clj 9 | ;; 10 | ;; A general word of caution: 11 | ;; Many applications and developers have a tendancy to serialize things directly from user input to Kafka. 12 | ;; Be aware that this is a potential attack vector, especially during deserialization. Always validate your inputs! 13 | ;; Consider yourself warned. Not that anyone really wants YOUR data anyway, however someone may send cat pictures to your 14 | ;; Storm job as a result. Or worse. 15 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 16 | 17 | ;;Options vs. just passing raw params? 18 | ;; for performance, it seems better to close over any functions passed rather than pass a map with function lookups here 19 | ;; despite the cumbersome arity/impl requirements - most implementors don't need all fns 20 | (defn deserializer 21 | ^Deserializer 22 | ([deserialize-fn] 23 | (deserializer deserialize-fn nil nil)) 24 | ^Deserializer 25 | ([deserialize-fn configure-fn close-fn] 26 | (reify 27 | Deserializer 28 | (configure [_ configs is-key?] 29 | "Configures a deserializer as necessary, for any stateful configuration. 30 | Typically Clojure-based serializers will not need an implementation for this function. 31 | A common use-case is to differentiate between deserializing a key vs. a value. 32 | 33 | See the source of org.apache.kafka.common.serialization.StringDeserializer for an example." 34 | (when configure-fn 35 | (configure-fn configs is-key?))) 36 | (deserialize [_ topic data] 37 | "Main deserialization function. All deserializers must implement this function." 38 | (deserialize-fn topic data)) 39 | (close [_] 40 | "Closes the deserializer. 41 | Any stateful deserializers should implement close. 42 | Close may be called multiple times and thus must be idempotent." 43 | (when close-fn 44 | (close-fn)))))) 45 | 46 | (defn byte-array-deserializer 47 | "Kafka raw byte array deserializer. 48 | Useful for value deserialization." 49 | ^Deserializer [] 50 | (ByteArrayDeserializer.)) 51 | 52 | (defn integer-deserializer 53 | "Kafka integer deserializer. 54 | Useful for key deserialization." 55 | ^Deserializer [] 56 | (IntegerDeserializer.)) 57 | 58 | (defn long-deserializer 59 | "Kafka long deserializer. 60 | Useful for key deserialization." 61 | ^Deserializer [] 62 | (LongDeserializer.)) 63 | 64 | (defn string-deserializer 65 | "Kafka string deserializer. 66 | Useful for key deserialization." 67 | ^Deserializer [] 68 | (StringDeserializer.)) 69 | 70 | (deftype EdnDeserializer [opts] 71 | Deserializer 72 | (configure [_ _ _]) 73 | (deserialize [_ _ data] 74 | (when data 75 | (with-open [r (PushbackReader. (clojure.java.io/reader (ByteArrayInputStream. data)))] 76 | ;;Can't remember if this binding is needed anymore with safer edn/read, but we like safe(r/ish) via edn/read 77 | ;;Hey you're sending raw EDN over the network, you like to live on the wild side, friend! 78 | (binding [*read-eval* false] 79 | (edn/read (or opts {}) r))))) 80 | (close [_])) 81 | 82 | (defn edn-deserializer 83 | "An EDN deserializer for Kafka. 84 | Contents of each item serialized must fit in memory. 85 | 86 | > Note: Any users of EDN deserializers should note the usual serialization/deserialization attack vectors. 87 | You should always validate any data before it is serialized so that an attack may not be executed on deserialization. 88 | Although EDN facilities try to protect you against this, nothing in this life is ever for sure. Be vigilant." 89 | (^EdnDeserializer [] (edn-deserializer nil)) 90 | (^EdnDeserializer [opts] 91 | (EdnDeserializer. opts))) 92 | 93 | (deftype SimpleEdnDeserializer [opts] 94 | Deserializer 95 | (configure [_ _ _]) 96 | (deserialize [_ _ data] 97 | (edn/read-string (or opts {}) (String. ^bytes data "UTF-8"))) 98 | (close [_])) 99 | 100 | (defn simple-edn-deserializer 101 | "A Simple EDN deserializer for Kafka. 102 | Useful for value deserialization." 103 | ^SimpleEdnDeserializer 104 | ([] (simple-edn-deserializer nil)) 105 | ^SimpleEdnDeserializer 106 | ([opts] 107 | (SimpleEdnDeserializer. opts))) 108 | 109 | (deftype KeywordDeserializer [] 110 | Deserializer 111 | (configure [_ _ _]) 112 | (deserialize [_ _ data] 113 | (when data 114 | (keyword (String. ^bytes data "UTF-8")))) 115 | (close [_])) 116 | 117 | (defn keyword-deserializer 118 | "A deserializer that deserializes string values as keywords. 119 | Useful for key deserializers." 120 | ^Deserializer [] 121 | (KeywordDeserializer.)) 122 | 123 | (deftype DebugDeserializer [logging-fn ^Deserializer deserializer] 124 | Deserializer 125 | (configure [_ configs is-key] 126 | (logging-fn {:deserializer deserializer 127 | :fn :configure 128 | :configs configs 129 | :is-key is-key}) 130 | (.configure deserializer configs is-key)) 131 | (deserialize [_ topic data] 132 | (logging-fn {:deserializer deserializer 133 | :fn :serialize 134 | :topic topic 135 | :data data}) 136 | (.deserialize deserializer topic data)) 137 | (close [_] 138 | (logging-fn {:deserializer deserializer 139 | :fn :close}) 140 | (.close deserializer))) 141 | 142 | (defn debug-deserializer 143 | "Simple debug serializer that wraps your deserializer and desired logging function. 144 | The logging function should take at least a single arity. 145 | The function will receive a map of state information with the following possible keys, which you may choose to destructure accordingly: 146 | 147 | * :deserializer - An instance of the deserializer itself 148 | * :fn - Keyword name of the function being logged. Possible values `[:configure :serialize :close]` 149 | * :configs - Optional, present when configuring serializer, and only applicable for certain types of serializers 150 | * :is-key - Optional, present when configuring the serializer, and only applicable for certain types of serializers 151 | * :topic The topic being serialized, when calling serialize. 152 | * :data The data being serialized, when calling serialize. 153 | 154 | Example usage: 155 | 156 | `(debug-deserializer 157 | (fn [{:keys [fn deserializer configs is-key topic data] :as m}] 158 | (timbre/debug \"full debug map:\" m) 159 | (when data 160 | (timbre/info \"data:\" data))) 161 | (edn-deserializer))`" 162 | ^DebugDeserializer [logging-fn ^Deserializer deserializer] 163 | (DebugDeserializer. logging-fn deserializer)) 164 | 165 | ;;TODO: composite deserializer? 166 | -------------------------------------------------------------------------------- /src/franzy/serialization/serializers.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.serialization.serializers 2 | "Implementations of core Apache Kafka and Franzy serializers. 3 | For more serializers, see Franzy docs." 4 | (:import (org.apache.kafka.common.serialization LongSerializer Serializer IntegerSerializer StringSerializer ByteArraySerializer) 5 | (java.io ByteArrayOutputStream))) 6 | 7 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 8 | ;; Notes: 9 | ;; 10 | ;; These serializers (and deserializers) are used to send your data to/from Kafka. As such, any implementors should 11 | ;; be very sensitive to performance, state, and memory usage. Your serializer should ideally be barebones, stateless, 12 | ;; and close using the provided close method or each time within the serialize call (if the overhead to open/close is minimal). 13 | ;; The serializers/deserializers provided use deftype as an optimization and for JVM friendliness. 14 | ;; A convenience method that reifies a type given some set of related functions is available if you do not want/cannot, or do not 15 | ;; need the semantics of deftype. You should prefer deftype, however, for performance critical applications. 16 | ;; 17 | ;; 2 ways of constructing serializers that are acceptable 18 | ;; 19 | ;; 1. Using the serializer convenience function to reify a type - use this if you are lazy, want a quick-off, etc. 20 | ;; 2. Using deftype, implementing "Serializer". Use this if you want the possibility of using your serializer from 21 | ;; other JVM languages via a named type. 22 | ;; 23 | ;; Quite often in Kafka, your data will be consumed from a variety of places. If you're not 100% Clojure, I strongly 24 | ;; advise using deftype and enabling AOT compilation so your serializer can be used easily from Java, Scala, Groovy, 25 | ;; etc. Of course you should probably be writing this logic in Clojure, because you are that girl/guy. 26 | ;; 27 | ;; All these serializers have to fit in memory. If you need something more, you'll have to write some lower-level Kafka encoding/decoding 28 | ;; You can then register them on the server. This is close to the way the old Storm Kafka serialization worked, but all has changed in Kafka 0.9 29 | ;; These serializers piggy-back on the byte array serializer built-in, which is the most flexible way, albeit with the following caveats: 30 | ;; 31 | ;; 1. Your data must fit into memory as it sends a raw byte array across the wire. 32 | ;; 2. Your data must not be some weird byte format that Kafka doesn't understand or it will throw CRC errors if you have those checks turned on. 33 | ;; Stick to conventional formats,m your are not special. (why are you always my former co-workers?) 34 | ;; 3. You probably shouldn't be sending a gig or something to/from Kafka. Chances are no one likes your data anyway. 35 | ;; The network overhead alone of bringing down a single record of your monstrosity will negate the reason for using Kafka. 36 | ;; 37 | ;; Nevertheless, stupid people do indeed live among us. Before you implement your own serializer, think why. 38 | ;; If you wish to serialize something to leverage better compression, closer to your use-case, etc, you're doing it right. 39 | ;; Bearing in mind these warnings/diatribes, if you do really need large data, I recommend chunking it across several records. 40 | ;; You should do so atomically and block writes until you are sure those records have comitted in that partition so you know the order, otherwise it will 41 | ;; be in your unfortunate hands to re-assemble chunks in the correct order, when they arrive, if they arrive when you poll for records. 42 | ;; 43 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 44 | 45 | ;;TODO: refactor + schema 46 | ;;Options vs. just passing raw params? 47 | ;; for performance, it seems better to close over the values rather than pass a map with function lookups here despite the cumbersome arity/impl requirements 48 | (defn serializer 49 | ^Serializer 50 | ([serializer-fn] 51 | (serializer serializer-fn nil nil)) 52 | ^Serializer 53 | ([serializer-fn configure-fn close-fn] 54 | (reify 55 | Serializer 56 | (configure [_ configs is-key?] 57 | "Configures a serializer as necessary, for any stateful configuration. 58 | Typically Clojure-based serializers will not need an implementation for this function. 59 | A common use-case is to differentiate between serializing a key vs. a value. 60 | 61 | See the source of org.apache.kafka.common.serialization.StringSerializer for an example." 62 | (when configure-fn 63 | (configure-fn configs is-key?))) 64 | (serialize [_ topic data] 65 | "Main deserialization function. All deserializers must implement this function." 66 | (serializer-fn topic data)) 67 | (close [_] 68 | "Closes the serializer. 69 | Any stateful serializers should implement close. 70 | Close may be called multiple times and thus must be idempotent." 71 | 72 | (when close-fn (close-fn)))))) 73 | 74 | (defn byte-array-serializer 75 | "Kafka raw byte array serializer. 76 | Useful for value serialization." 77 | ^Serializer [] 78 | (ByteArraySerializer.)) 79 | 80 | (defn string-serializer 81 | "Kafka string serializer. 82 | This serializer allows serializing values without a key." 83 | ^Serializer [] 84 | (StringSerializer.)) 85 | 86 | (defn integer-serializer 87 | "Kafka integer serializer. 88 | Useful for key serialization." 89 | ^Serializer [] 90 | (IntegerSerializer.)) 91 | 92 | (defn long-serializer 93 | "Kafka long serializer. 94 | Useful for key serialization." 95 | ^Serializer [] 96 | (LongSerializer.)) 97 | 98 | (deftype EdnSerializer [opts] 99 | Serializer 100 | (configure [_ _ _]) 101 | (serialize [_ _ data] 102 | ;;TODO: process + inject more options? better defaults via configure or opts? 103 | ;;no reason to close bos, but we do so to keep clean 104 | (with-open [bos (ByteArrayOutputStream. 1024)] 105 | (with-open [w (if opts (clojure.java.io/writer bos opts) (clojure.java.io/writer bos))] 106 | (binding [*print-length* false 107 | *out* w] 108 | (pr data))) 109 | ;;death to efficiency, but easiest way without writing something low-level to encode a stream directly into Kafka 110 | (.toByteArray bos))) 111 | (close [_])) 112 | 113 | (defn edn-serializer 114 | (^EdnSerializer [] (edn-serializer nil)) 115 | (^EdnSerializer [opts] 116 | (EdnSerializer. opts))) 117 | 118 | (deftype SimpleEdnSerializer [] 119 | Serializer 120 | (configure [_ _ _]) 121 | (serialize [_ _ data] 122 | (some-> data pr-str .getBytes)) 123 | (close [_])) 124 | 125 | (defn simple-edn-serializer 126 | "A simple EDN deserializer for small amounts of data for Kafka. 127 | Useful for value serialization." 128 | ^SimpleEdnSerializer [] 129 | (SimpleEdnSerializer.)) 130 | 131 | (deftype KeywordSerializer [] 132 | Serializer 133 | (configure [_ _ _]) 134 | (serialize [_ _ data] 135 | (some-> data name .getBytes)) 136 | (close [_])) 137 | 138 | (defn keyword-serializer 139 | "A serializer that serializers string values as keywords. 140 | Useful for key serializers." 141 | ^KeywordSerializer [] 142 | (KeywordSerializer.)) 143 | 144 | (deftype DebugSerializer [logging-fn ^Serializer serializer] 145 | Serializer 146 | (configure [_ configs is-key] 147 | (logging-fn {:serializer serializer 148 | :fn :configure 149 | :configs configs 150 | :is-key is-key}) 151 | (.configure serializer configs is-key)) 152 | (serialize [_ topic data] 153 | (logging-fn {:serializer serializer 154 | :fn :serialize 155 | :topic topic 156 | :data data}) 157 | (.serialize serializer topic data)) 158 | (close [_] 159 | (logging-fn {:serializer serializer 160 | :fn :close}) 161 | (.close serializer))) 162 | 163 | (defn debug-serializer 164 | "Simple debug serializer that wraps your serializer and desired logging function. 165 | The logging function should take at least a single arity. 166 | The function will receive a map of state information with the following possible keys, which you may choose to destructure accordingly: 167 | 168 | * :serializer - An instance of the serializer itself 169 | * :fn - Keyword name of the function being logged. Possible values `[:configure :serialize :close]` 170 | * :configs - Optional, present when configuring serializer, and only applicable for certain types of serializers 171 | * :is-key - Optional, present when configuring the serializer, and only applicable for certain types of serializers 172 | * :topic The topic being serialized, when calling serialize. 173 | * :data The data being serialized, when calling serialize. 174 | 175 | Example usage: 176 | 177 | `(debug-serializer 178 | (fn [{:keys [fn serializer configs is-key topic data] :as m}] 179 | (timbre/debug \"full debug map:\" m) 180 | (when data 181 | (timbre/info \"data:\" data))) 182 | (edn-serializer))`" 183 | ^DebugSerializer 184 | [logging-fn ^Serializer serializer] 185 | (DebugSerializer. logging-fn serializer)) 186 | 187 | ;;TODO: composite serializer? 188 | -------------------------------------------------------------------------------- /test/franzy/clients/cluster_tests.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.cluster-tests 2 | (:require [midje.sweet :refer :all] 3 | [franzy.common.models.types :as mt] 4 | [franzy.clients.cluster :as cluster] 5 | [schema.core :as s] 6 | [franzy.common.models.schema :as fs]) 7 | (:import (org.apache.kafka.common Cluster))) 8 | 9 | (def mock-node-count 5) 10 | 11 | (def mock-topic-partitions 12 | [{:topic "excel-database" :partition 0} 13 | {:topic "excel-database" :partition 1} 14 | {:topic "consulting-fees-from-replacing-nodejs" :partition 1010} 15 | {:topic "fried-side-items" :partition 50}]) 16 | 17 | (defn create-mock-cluster [] 18 | (cluster/mock-cluster mock-node-count mock-topic-partitions #{"michael-bolton"})) 19 | 20 | (def mock-cluster (create-mock-cluster)) 21 | 22 | 23 | (fact 24 | "The nodes in a cluster may be queried." 25 | (let [nodes (cluster/nodes mock-cluster)] 26 | (count nodes) => 5 27 | (s/check [fs/Node] nodes) => nil)) 28 | 29 | (fact 30 | "The partitions for a topic may be queried" 31 | (cluster/partitions-for-topic mock-cluster "fried-side-items") => [{:topic "fried-side-items", 32 | :partition 50, 33 | :leader {:id 1, :host "127.0.0.1", :port 2181}, 34 | :replicas [{:id 1, :host "127.0.0.1", :port 2181}], 35 | :in-sync-replicas [{:id 1, :host "127.0.0.1", :port 2181}]}]) 36 | 37 | (fact 38 | "Available partitions in the cluster can be listed." 39 | (let [partitions-info (cluster/available-partitions mock-cluster "excel-database")] 40 | (s/check [fs/PartitionInfo] partitions-info) => nil 41 | partitions-info => [{:topic "excel-database", 42 | :partition 0, 43 | :leader {:id 1, :host "127.0.0.1", :port 2181}, 44 | :replicas [{:id 1, :host "127.0.0.1", :port 2181}], 45 | :in-sync-replicas [{:id 1, :host "127.0.0.1", :port 2181}]} 46 | {:topic "excel-database", 47 | :partition 1, 48 | :leader {:id 1, :host "127.0.0.1", :port 2181}, 49 | :replicas [{:id 1, :host "127.0.0.1", :port 2181}], 50 | :in-sync-replicas [{:id 1, :host "127.0.0.1", :port 2181}]}])) 51 | 52 | (fact 53 | "Clusters canc be bootstrapped from a list of inet addresses or inet address maps" 54 | (let [boot-cluster (cluster/bootstrap-cluster-hosts [{:host-name "127.0.0.1" :port 9092} {:host-name "127.0.0.1" :port 9093}])] 55 | (instance? Cluster boot-cluster) => true 56 | (count (cluster/nodes boot-cluster)) => 2)) 57 | 58 | (fact 59 | "Empty clusters can be created." 60 | (let [empty-cluster (cluster/empty-cluster)] 61 | (instance? Cluster empty-cluster) => true 62 | (count (cluster/nodes empty-cluster)) => 0)) 63 | 64 | (fact 65 | "Partition leaders for the cluster can be queried for by topic partition." 66 | (let [leader (cluster/leader-for mock-cluster "consulting-fees-from-replacing-nodejs" 1010)] 67 | (s/check fs/Node leader) => nil 68 | leader => {:id 1, :host "127.0.0.1", :port 2181})) 69 | 70 | (fact 71 | "Cluster nodes can be queried by id." 72 | (let [node (cluster/node-by-id mock-cluster 1)] 73 | (s/check fs/Node node) => nil 74 | node => {:id 1, :host "127.0.0.1", :port 9092})) 75 | 76 | (fact 77 | "The partition count for a topic in the cluster can be queried." 78 | (cluster/partition-count mock-cluster "excel-database") => 2) 79 | 80 | (fact 81 | "The partition info for a topic partition can be queried." 82 | (let [partition-info (cluster/partition-info-for-topic mock-cluster "excel-database" 1)] 83 | (s/check fs/PartitionInfo partition-info) => nil 84 | partition-info => {:topic "excel-database", 85 | :partition 1, 86 | :leader {:id 1, :host "127.0.0.1", :port 2181}, 87 | :replicas [{:id 1, :host "127.0.0.1", :port 2181}], 88 | :in-sync-replicas [{:id 1, :host "127.0.0.1", :port 2181}]})) 89 | 90 | (fact 91 | "The partition info for a node in the cluster can be queried." 92 | (let [partition-info-coll (cluster/partitions-for-node mock-cluster 1)] 93 | (nil? partition-info-coll) => false 94 | (coll? partition-info-coll) => true 95 | (empty? partition-info-coll) => false 96 | (count partition-info-coll) => (count mock-topic-partitions) 97 | (s/check [fs/PartitionInfo] partition-info-coll) => nil 98 | partition-info-coll => [{:topic "excel-database", 99 | :partition 0, 100 | :leader {:id 1, :host "127.0.0.1", :port 2181}, 101 | :replicas [{:id 1, :host "127.0.0.1", :port 2181}], 102 | :in-sync-replicas [{:id 1, :host "127.0.0.1", :port 2181}]} 103 | {:topic "excel-database", 104 | :partition 1, 105 | :leader {:id 1, :host "127.0.0.1", :port 2181}, 106 | :replicas [{:id 1, :host "127.0.0.1", :port 2181}], 107 | :in-sync-replicas [{:id 1, :host "127.0.0.1", :port 2181}]} 108 | {:topic "consulting-fees-from-replacing-nodejs", 109 | :partition 1010, 110 | :leader {:id 1, :host "127.0.0.1", :port 2181}, 111 | :replicas [{:id 1, :host "127.0.0.1", :port 2181}], 112 | :in-sync-replicas [{:id 1, :host "127.0.0.1", :port 2181}]} 113 | {:topic "fried-side-items", 114 | :partition 50, 115 | :leader {:id 1, :host "127.0.0.1", :port 2181}, 116 | :replicas [{:id 1, :host "127.0.0.1", :port 2181}], 117 | :in-sync-replicas [{:id 1, :host "127.0.0.1", :port 2181}]}])) 118 | 119 | (fact 120 | "The partition info for a topic can be queried." 121 | (let [partition-info-coll (cluster/partitions-for-topic mock-cluster "excel-database")] 122 | (nil? partition-info-coll) => false 123 | (coll? partition-info-coll) => true 124 | (empty? partition-info-coll) => false 125 | (count partition-info-coll) => 2 126 | (s/check [fs/PartitionInfo] partition-info-coll) => nil 127 | partition-info-coll => [{:topic "excel-database", 128 | :partition 0, 129 | :leader {:id 1, :host "127.0.0.1", :port 2181}, 130 | :replicas [{:id 1, :host "127.0.0.1", :port 2181}], 131 | :in-sync-replicas [{:id 1, :host "127.0.0.1", :port 2181}]} 132 | {:topic "excel-database", 133 | :partition 1, 134 | :leader {:id 1, :host "127.0.0.1", :port 2181}, 135 | :replicas [{:id 1, :host "127.0.0.1", :port 2181}], 136 | :in-sync-replicas [{:id 1, :host "127.0.0.1", :port 2181}]}])) 137 | 138 | (fact 139 | "The topics in a cluster can be queried." 140 | (let [topics (cluster/topics mock-cluster)] 141 | (nil? topics) => false 142 | (coll? topics) => true 143 | (empty? topics) => false 144 | (count topics) => 3 145 | (s/check #{s/Str} topics) => nil 146 | topics => #{"excel-database" "fried-side-items" "consulting-fees-from-replacing-nodejs"})) -------------------------------------------------------------------------------- /test/franzy/clients/consumer/client_tests.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.consumer.client-tests 2 | (:require [midje.sweet :refer :all] 3 | [franzy.clients.consumer.client :as cl])) 4 | 5 | (facts "Clients should instantiate properly." 6 | (fact "Invoking make-consumer with just a config map should not throw a ClassCastException when converting config map to Properties instance." 7 | (let [config {:bootstrap.servers "127.0.0.1" 8 | :value.deserializer "org.apache.kafka.common.serialization.ByteArrayDeserializer" 9 | :value.serializer "org.apache.kafka.common.serialization.ByteArrayDeserializer"}] 10 | (cl/make-consumer config) =not=> (throws ClassCastException "clojure.lang.PersistentArrayMap cannot be cast to java.util.Properties")))) -------------------------------------------------------------------------------- /test/franzy/clients/consumer/schema_tests.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.consumer.schema-tests 2 | (:require [midje.sweet :refer :all] 3 | [schema.core :as s] 4 | [franzy.clients.consumer.schema :as cs] 5 | [franzy.clients.consumer.types :as ct] 6 | [franzy.clients.consumer.defaults :as defaults] 7 | [franzy.clients.consumer.callbacks :as callbacks]) 8 | (:import (java.nio ByteBuffer))) 9 | 10 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 11 | ;; Tests to ensure that was the schema involves, we don't murder Kafka. 12 | ;; While these tests are perhaps repetitive and overlapping, we test schema more and more aggressively as time goes on. 13 | ;; In the real-world, someone can and will make a giant mistake. The problem is Kafka, especially as features are added 14 | ;; will happily accept our bad data. Putting a distributed system in an undefined is not fun, and thus, this mess. 15 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 16 | 17 | (facts 18 | "ConsumerOptions must pass schema validation." 19 | (let [valid-options {:consumer-records-fn seq 20 | :poll-timeout-ms 3000 21 | :offset-commit-callback (callbacks/offset-commit-callback) 22 | :rebalance-listener-callback (callbacks/consumer-rebalance-listener)}] 23 | (fact 24 | "Consumer options that are valid pass validation." 25 | (s/check cs/ConsumerOptions valid-options) => nil 26 | (s/check cs/ConsumerOptions {}) => nil 27 | (s/check cs/ConsumerOptions (dissoc valid-options :consumer-records-fn)) => nil 28 | (s/check cs/ConsumerOptions (dissoc valid-options :poll-timeout-ms)) => nil 29 | (s/check cs/ConsumerOptions (dissoc valid-options :offset-commit-callback)) => nil 30 | (s/check cs/ConsumerOptions (dissoc valid-options :rebalance-listener-callback)) => nil 31 | (s/check cs/ConsumerOptions (assoc valid-options :offset-commit-callback nil)) => nil) 32 | (fact 33 | "Consumer option defaults must pass validation." 34 | (s/check cs/ConsumerOptions (defaults/default-consumer-options)) => nil) 35 | (fact 36 | "Consumer options that are not valid must fail validation." 37 | (s/check cs/ConsumerOptions (assoc valid-options :consumer-records-fn "number muncher")) =not=> nil 38 | (s/check cs/ConsumerOptions (assoc valid-options :poll-timeout-ms "unnecessary kitchen accessory")) =not=> nil 39 | (s/check cs/ConsumerOptions (assoc valid-options :poll-timeout-ms nil)) =not=> nil 40 | (s/check cs/ConsumerOptions (assoc valid-options :consumer-records-fn nil)) =not=> nil 41 | (s/check cs/ConsumerOptions (assoc valid-options :rebalance-listener-callback "foolery")) =not=> nil 42 | (s/check cs/ConsumerOptions (assoc valid-options :rebalance-listener-callback identity)) =not=> nil 43 | 44 | (s/check cs/ConsumerOptions (assoc valid-options :offset-commit-callback identity)) =not=> nil))) 45 | 46 | ;; (set! Soviet Paranoia) 47 | (facts 48 | "ConsumerRecord must pass schema validation." 49 | (let [valid-cr {:topic "My book about me" 50 | :partition 79 51 | :offset 4 52 | :key 15 53 | :value {:type "Spaghetti Monster" 54 | :description "Pastafarian" 55 | :other "Touched by his noodly appendage" 56 | :win-api-enum-values [true false 0 "FileNotFound" {:yes "certainly!"}]}}] 57 | (fact 58 | "Valid consumer records must pass schema validation." 59 | (s/check cs/ConsumerRecord valid-cr) => nil 60 | (s/check cs/ConsumerRecord (assoc valid-cr :value [1 2 3])) => nil 61 | (s/check cs/ConsumerRecord (assoc valid-cr :topic 32)) =not=> nil 62 | (s/check cs/ConsumerRecord (assoc valid-cr :partition Integer/MAX_VALUE)) => nil 63 | (s/check cs/ConsumerRecord (assoc valid-cr :offset Long/MAX_VALUE)) => nil 64 | (s/check cs/ConsumerRecord (assoc valid-cr :offset 9)) => nil) 65 | (fact 66 | "Invalid consumer records must fail schema validation." 67 | (s/check cs/ConsumerRecord (dissoc valid-cr :topic)) =not=> nil 68 | (s/check cs/ConsumerRecord (dissoc valid-cr :partition)) =not=> nil 69 | (s/check cs/ConsumerRecord (dissoc valid-cr :offset)) =not=> nil 70 | (s/check cs/ConsumerRecord (dissoc valid-cr :key)) =not=> nil 71 | (s/check cs/ConsumerRecord (dissoc valid-cr :value)) =not=> nil 72 | (s/check cs/ConsumerRecord (assoc valid-cr :topic 32)) =not=> nil 73 | (s/check cs/ConsumerRecord (assoc valid-cr :topic nil)) =not=> nil 74 | (s/check cs/ConsumerRecord (assoc valid-cr :topic [])) =not=> nil 75 | (s/check cs/ConsumerRecord (assoc valid-cr :topic #{})) =not=> nil 76 | (s/check cs/ConsumerRecord (assoc valid-cr :topic '())) =not=> nil 77 | (s/check cs/ConsumerRecord (assoc valid-cr :topic {})) =not=> nil 78 | (s/check cs/ConsumerRecord (assoc valid-cr :partition "99")) =not=> nil 79 | (s/check cs/ConsumerRecord (assoc valid-cr :partition [23])) =not=> nil 80 | (s/check cs/ConsumerRecord (assoc valid-cr :partition nil)) =not=> nil 81 | (s/check cs/ConsumerRecord (assoc valid-cr :partition (+ 1 Integer/MAX_VALUE))) =not=> nil 82 | (s/check cs/ConsumerRecord (assoc valid-cr :partition -1)) =not=> nil 83 | (s/check cs/ConsumerRecord (assoc valid-cr :partition Double/MAX_VALUE)) =not=> nil 84 | (s/check cs/ConsumerRecord (assoc valid-cr :offset nil)) =not=> nil 85 | (s/check cs/ConsumerRecord (assoc valid-cr :offset [23])) =not=> nil 86 | (s/check cs/ConsumerRecord (assoc valid-cr :offset Double/NaN)) =not=> nil 87 | (s/check cs/ConsumerRecord (assoc valid-cr :offset (+ Long/MAX_VALUE 1))) =not=> nil 88 | (s/check cs/ConsumerRecord (assoc valid-cr :offset -1)) =not=> nil 89 | (s/check cs/ConsumerRecord (assoc valid-cr :key nil)) =not=> nil))) 90 | 91 | (facts 92 | "OffsetMetadata must pass schema validation." 93 | (let [offset-metadata {:offset 1 94 | :metadata "what network is not social?"}] 95 | (fact 96 | "Valid offset metadata must pass schema validation." 97 | (s/check cs/OffsetAndMetadata offset-metadata) => nil 98 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :offset Long/MAX_VALUE)) => nil 99 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :offset 0)) => nil 100 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :metadata nil)) => nil 101 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :metadata "")) => nil) 102 | (fact 103 | "Invalid offset metadata must fail schema validation." 104 | (s/check cs/OffsetAndMetadata {}) =not=> nil 105 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :offset -1)) =not=> nil 106 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :offset nil)) =not=> nil 107 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :offset (+ Long/MAX_VALUE 1))) =not=> nil 108 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :offset [])) =not=> nil 109 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :offset #{})) =not=> nil 110 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :offset '())) =not=> nil 111 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :offset {})) =not=> nil 112 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :offset "1")) =not=> nil 113 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :key 123)) =not=> nil 114 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :metadata 1)) =not=> nil 115 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :metadata [1 2 3])) =not=> nil 116 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :metadata {})) =not=> nil 117 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :metadata #{})) =not=> nil 118 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :metadata '())) =not=> nil 119 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :metadata true)) =not=> nil 120 | (s/check cs/OffsetAndMetadata (dissoc offset-metadata :metadata)) =not=> nil 121 | (s/check cs/OffsetAndMetadata (assoc offset-metadata :meat-data "meat on meat sandwich")) =not=> nil))) 122 | 123 | (facts 124 | "OffsetResetStrategyEnum must pass schema validation." 125 | (let [strat :earliest 126 | strategies [:latest :earliest :none]] 127 | (fact 128 | "Valid offset reset strategies pass schema validation." 129 | (s/check cs/OffsetResetStrategyEnum strat) => nil 130 | (doseq [reset-strat strategies] 131 | (s/check cs/OffsetResetStrategyEnum reset-strat) => nil)) 132 | (fact 133 | "Invalid offset reset strategies must fail schema validation." 134 | (s/check cs/OffsetResetStrategyEnum "") =not=> nil 135 | (s/check cs/OffsetResetStrategyEnum "the beginning of time") =not=> nil 136 | (s/check cs/OffsetResetStrategyEnum nil) =not=> nil 137 | (s/check cs/OffsetResetStrategyEnum [:earliest]) =not=> nil 138 | (s/check cs/OffsetResetStrategyEnum "earliest") =not=> nil))) 139 | 140 | (facts 141 | "PartitionAssignment values must pass schema validation." 142 | (let [assignment {:topics ["userdata-to-sell" "modified-privacy policies" "cheesy-buttons"] 143 | :user-data (ByteBuffer/allocate 1)}] 144 | (fact 145 | "PartitionAssignment values that are valid must pass schema validation." 146 | (s/check cs/PartitionAssignment assignment) => nil 147 | (s/check cs/PartitionAssignment (assoc assignment :user-data nil)) => nil 148 | (s/check cs/PartitionAssignment (assoc assignment :topics [])) => nil 149 | (s/check cs/PartitionAssignment (assoc assignment :topics nil)) => nil 150 | (s/check cs/PartitionAssignment (assoc assignment :topics "laughing to the bank")) => nil) 151 | (fact 152 | "PartitionAssignment values that are invalid must not pass schema validation." 153 | (s/check cs/PartitionAssignment nil) =not=> nil 154 | (s/check cs/PartitionAssignment {}) =not=> nil 155 | (s/check cs/PartitionAssignment (assoc assignment :topic "funding schools for a tax dodge")) =not=> nil 156 | (s/check cs/PartitionAssignment (assoc assignment :user-data (byte-array 1))) =not=> nil))) 157 | 158 | (facts 159 | "ConsumerRebalanceListener callbacks must pass schema validation." 160 | (fact 161 | "Valid callbacks pass schema validation." 162 | (s/check cs/ConsumerRebalanceListenerCallback (callbacks/no-op-consumer-rebalance-listener)) => nil) 163 | (fact 164 | "Invalid callbacks fail schema validation." 165 | (s/check cs/ConsumerRebalanceListenerCallback identity) =not=> nil)) 166 | 167 | ;;TODO: more validations 168 | (facts 169 | "Consumer configuraitons must pass schema validation." 170 | (let [cc {:bootstrap.servers ["127.0.0.1:8080"]}] 171 | (fact 172 | "Consumer configurations that are valid pass schema validation." 173 | (s/check cs/ConsumerConfig cc) => nil) 174 | (fact 175 | "Consumer configurations that are invalid fail schema validation." 176 | (s/check cs/ConsumerConfig {}) =not=> nil 177 | (s/check cs/ConsumerConfig (assoc cc :format.harddrive true)) =not=> nil 178 | (s/check cs/ConsumerConfig (dissoc cc :bootstrap.servers)) =not=> nil 179 | (s/check cs/ConsumerConfig (assoc cc :bootstrap.servers [])) =not=> nil 180 | (s/check cs/ConsumerConfig (assoc cc :bootstrap.servers nil)) =not=> nil))) 181 | 182 | ;;cold, hard 183 | (facts 184 | "ConsumerRecords records must pass schema validation." 185 | (let [cr (ct/->ConsumerRecord "coifs" 12 0 "abc" 186 | {:vicious-bytes (byte-array 1) 187 | :bad-remakes ["I am Legend" "Star Trek 2" "Hitchhiker's Guide to the Galaxy"]})] 188 | (fact 189 | "Valid consumer records pass validation." 190 | (s/check cs/ConsumerRecord cr) => nil 191 | (s/check cs/ConsumerRecord (ct/map->ConsumerRecord {:topic "hot-dog-bun-allocation" :partition 0 :offset 12 :key 123 :value 0})) => nil) 192 | (fact 193 | "Invalid consumer records fail validation." 194 | (s/check cs/ConsumerRecord (assoc cr :topic nil)) =not=> nil 195 | (s/check cs/ConsumerRecord (assoc cr :partition nil)) =not=> nil 196 | (s/check cs/ConsumerRecord (assoc cr :offset nil)) =not=> nil 197 | ;;I have become....map 198 | (s/check cs/ConsumerRecord (dissoc cr :key)) =not=> nil))) 199 | 200 | -------------------------------------------------------------------------------- /test/franzy/clients/decoding_tests.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.decoding-tests 2 | (:require [midje.sweet :refer :all] 3 | [schema.core :as s] 4 | [franzy.clients.consumer.schema :as cs] 5 | [franzy.clients.consumer.types :as ct] 6 | [franzy.clients.codec :as codec]) 7 | (:import (org.apache.kafka.clients.consumer ConsumerRecord OffsetAndMetadata))) 8 | 9 | (facts 10 | "ConsumerRecord objects should be properly decoded to Clojure." 11 | (let [jcr (ConsumerRecord. "murdered-starks" 99 500 :sean-bean 1) 12 | cr (ct/->ConsumerRecord "murdered-starks" 99 500 :sean-bean 1)] 13 | (fact 14 | ;;Department of Redundancy Lives On! 15 | "ConsumerRecord objects should decode properly to ConsumerRecord Records." 16 | (codec/decode jcr) => cr 17 | (s/check cs/ConsumerRecord (codec/decode jcr)) => nil))) 18 | 19 | (facts 20 | "OffsetAndMetadata objects should be properly decoded to Clojure." 21 | (let [jomd (OffsetAndMetadata. 33 "What is dead may never die, but can be flayed.") 22 | omd (ct/->OffsetMetadata 33 "What is dead may never die, but can be flayed.") 23 | omd-map {:offset 33 :metadata "What is dead may never die, but can be flayed."}] 24 | ;;TODO: if switching to record, use this instead, and change map version to call function instead of codec decode protocol 25 | ;(fact 26 | ; "OffsetAndMetadata objects should be decoded to OffsetMetadata records." 27 | ; (codec/decode jomd) => omd 28 | ; (s/check cs/OffsetAndMetadata (codec/decode jomd)) => nil) 29 | (fact 30 | "OffsetAndMetadata objects should be decoded properly to Clojure maps." 31 | (codec/decode jomd) => omd-map 32 | (s/check cs/OffsetAndMetadata (codec/decode jomd)) => nil))) 33 | -------------------------------------------------------------------------------- /test/franzy/clients/partitions_tests.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.partitions-tests 2 | (:require [midje.sweet :refer :all] 3 | [franzy.clients.partitions :as partitions] 4 | [franzy.common.models.schema :as fs] 5 | [schema.core :as s])) 6 | 7 | (fact 8 | "A collection of topic partitions can be generated given a topic and number of partitions" 9 | (let [topic-partitions (partitions/topic-partition-range "socks-with-sandals-say-no" 5)] 10 | (nil? topic-partitions) => false 11 | (coll? topic-partitions) => true 12 | (empty? topic-partitions) => false 13 | (count topic-partitions) => 5 14 | (s/check [fs/TopicPartition] topic-partitions) => nil)) 15 | 16 | (fact 17 | "A set of topics can be extracted from a collection of topic partitions." 18 | (let [topics (partitions/topics-from-partitions [{:topic "sjw" :partition 99} {:topic "sjw" :partition 0} {:topic "too-much-time" :partition 5}])] 19 | (nil? topics) => false 20 | (coll? topics) => true 21 | (empty? topics) => false 22 | (set? topics) => true 23 | (count topics) => 2 24 | topics => (contains "sjw" "too-much-time"))) 25 | 26 | (fact 27 | "A topic partition can be converted from topic partition info." 28 | (let [topic-partition (partitions/topic-partition-info->topic-partition {:topic "excel-database", 29 | :partition 15, 30 | :leader {:id 1, :host "127.0.0.1", :port 2181}, 31 | :replicas [{:id 1, :host "127.0.0.1", :port 2181}], 32 | :in-sync-replicas [{:id 1, :host "127.0.0.1", :port 2181}]})] 33 | (nil? topic-partition) => false 34 | (coll? topic-partition) => true 35 | (empty? topic-partition) => false 36 | (map? topic-partition) => true 37 | (s/check fs/TopicPartition topic-partition) => nil 38 | topic-partition => {:topic "excel-database" :partition 15})) 39 | 40 | (fact 41 | "Partition info can be created from a topic partition." 42 | (let [mock-partition-info (partitions/mock-partition-info [{:topic "excel-database" :partition 15}])] 43 | (nil? mock-partition-info) => false 44 | (coll? mock-partition-info) => true 45 | (empty? mock-partition-info) => false 46 | (s/check [fs/PartitionInfo] mock-partition-info) => nil 47 | (first mock-partition-info) => {:topic "excel-database", 48 | :partition 15, 49 | :leader {:id 1, :host "127.0.0.1", :port 2181}, 50 | :replicas [{:id 1, :host "127.0.0.1", :port 2181}], 51 | :in-sync-replicas [{:id 1, :host "127.0.0.1", :port 2181}]})) 52 | -------------------------------------------------------------------------------- /test/franzy/clients/producer/schema_test.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.clients.producer.schema-test 2 | (:require [midje.sweet :refer :all] 3 | [schema.core :as s] 4 | [franzy.clients.producer.schema :as ps] 5 | [franzy.clients.producer.defaults :as defaults] 6 | [franzy.clients.producer.callbacks :as callbacks]) 7 | (:import (java.util.concurrent TimeUnit))) 8 | 9 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 10 | ;; Tests to ensure that was the schema involves, we don't murder Kafka. 11 | ;; While these tests are perhaps repetitive and overlapping, we test schema more and more aggressively as time goes on. 12 | ;; In the real-world, someone can and will make a giant mistake. The problem is Kafka, especially as features are added 13 | ;; will happily accept our bad data. Putting a distributed system in an undefined is not fun, and thus, this mess. 14 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 15 | 16 | (facts 17 | "ProducerOptions must pass schema validation." 18 | (let [valid-options {:close-timeout 1000 19 | :close-timeout-unit TimeUnit/MILLISECONDS 20 | :send-callback (callbacks/send-callback)}] 21 | (fact 22 | "ProducerOptions that are valid pass schema validation." 23 | (s/check ps/ProducerOptions valid-options) => nil 24 | (s/check ps/ProducerOptions {}) => nil 25 | (s/check ps/ProducerOptions (dissoc valid-options :close-timeout)) => nil 26 | (s/check ps/ProducerOptions (dissoc valid-options :close-timeout-unit)) => nil 27 | (s/check ps/ProducerOptions (dissoc valid-options :send-callback)) => nil) 28 | (fact 29 | "ProducerOptions that are invalid must fail schema validation." 30 | (s/check ps/ProducerOptions (assoc valid-options :close-timeout "5000")) =not=> nil 31 | (s/check ps/ProducerOptions (assoc valid-options :close-timeout nil)) =not=> nil 32 | (s/check ps/ProducerOptions (assoc valid-options :send-callback "pickle party")) =not=> nil) 33 | (fact 34 | "Producer option defaults must pass schema validation." 35 | (s/check ps/ProducerOptions (defaults/default-producer-options)) => nil))) 36 | 37 | (facts 38 | "ProducerRecords must pass schema validation." 39 | (let [valid-pr {:topic "people-at-the-beach-at-2-pm" 40 | :partition 12 41 | :key "master" 42 | :value {:dana "only zuul"}}] 43 | (fact 44 | "ProducerRecords that are valid pass schema validation, despite the best intentions of..." 45 | (s/check ps/ProducerRecord valid-pr) => nil 46 | (s/check ps/ProducerRecord (dissoc valid-pr :partition)) => nil ;;I don't like that this works, but according to Kafka it does 47 | (s/check ps/ProducerRecord (dissoc valid-pr :key)) => nil ;;as does this bad idea 48 | ;;below we test with extreme paranoia as screwing up keys and values is as about as we can do 49 | (s/check ps/ProducerRecord (assoc valid-pr :value "bran is at home with a raisin")) => nil 50 | (s/check ps/ProducerRecord (assoc valid-pr :value 1234)) => nil 51 | (s/check ps/ProducerRecord (assoc valid-pr :value [1 2 3 4])) => nil 52 | (s/check ps/ProducerRecord (assoc valid-pr :value '("cats" "will" "break" "your" "glass" "in" "the" "morning"))) => nil 53 | (s/check ps/ProducerRecord (assoc valid-pr :value #{99 "years" "is" "a" "longgg" "long" "time"})) => nil 54 | (s/check ps/ProducerRecord (assoc valid-pr :value "bran is at home with a raisin")) => nil 55 | (s/check ps/ProducerRecord (assoc valid-pr :key "leftover chinese never reheats well")) => nil 56 | (s/check ps/ProducerRecord (assoc valid-pr :key 1234)) => nil 57 | (s/check ps/ProducerRecord (assoc valid-pr :key [1 2 3 4])) => nil 58 | (s/check ps/ProducerRecord (assoc valid-pr :key '("only" "love" "can" "break" "your" "heart"))) => nil 59 | (s/check ps/ProducerRecord (assoc valid-pr :key #{92 "goals" "had" "the" "Gretzky"})) => nil) 60 | (fact 61 | "ProducerRecords that are invalid fail schema validation." 62 | (s/check ps/ProducerRecord {}) =not=> nil 63 | (s/check ps/ProducerRecord (assoc valid-pr :topic nil)) =not=> nil 64 | (s/check ps/ProducerRecord (assoc valid-pr :topic "")) =not=> nil 65 | (s/check ps/ProducerRecord (assoc valid-pr :topic 2314)) =not=> nil 66 | (s/check ps/ProducerRecord (dissoc valid-pr :topic)) =not=> nil 67 | (s/check ps/ProducerRecord (assoc valid-pr :partition nil)) =not=> nil 68 | (s/check ps/ProducerRecord (assoc valid-pr :partition "remaking ghostbusters")) =not=> nil 69 | (s/check ps/ProducerRecord (assoc valid-pr :value nil)) =not=> nil 70 | 71 | (s/check ps/ProducerRecord (assoc valid-pr :key nil)) =not=> nil 72 | (s/check ps/ProducerRecord (dissoc valid-pr :value)) =not=> nil))) 73 | 74 | (facts 75 | "RecordMetadata must pass schema validation." 76 | (let [valid-metadata {:topic "a-boy-his-blob-and-a-trashbag-of-illicit-substances" 77 | :partition 1 78 | :offset 99}] 79 | (fact 80 | "Valid record metadata must pass schema validation." 81 | (s/check ps/RecordMetadata valid-metadata) => nil 82 | (s/check ps/RecordMetadata (assoc valid-metadata :partition Integer/MAX_VALUE)) => nil 83 | (s/check ps/RecordMetadata (assoc valid-metadata :offset Long/MAX_VALUE)) => nil) 84 | (fact 85 | "Invalid record metadata must fail schema validation." 86 | (s/check ps/RecordMetadata {}) =not=> nil 87 | (s/check ps/RecordMetadata (assoc valid-metadata :topic nil)) =not=> nil 88 | (s/check ps/RecordMetadata (assoc valid-metadata :topic "")) =not=> nil 89 | (s/check ps/RecordMetadata (assoc valid-metadata :partition nil)) =not=> nil 90 | (s/check ps/RecordMetadata (assoc valid-metadata :offset nil)) =not=> nil 91 | (s/check ps/RecordMetadata (assoc valid-metadata :topic nil)) =not=> nil 92 | (s/check ps/RecordMetadata (dissoc valid-metadata :topic)) =not=> nil 93 | (s/check ps/RecordMetadata (dissoc valid-metadata :partition)) =not=> nil 94 | (s/check ps/RecordMetadata (dissoc valid-metadata :offset)) =not=> nil 95 | (s/check ps/RecordMetadata (assoc valid-metadata :offset Double/NaN)) =not=> nil 96 | (s/check ps/RecordMetadata (assoc valid-metadata :partition Double/NaN)) =not=> nil 97 | (s/check ps/RecordMetadata (assoc valid-metadata :partition Double/NEGATIVE_INFINITY)) =not=> nil 98 | (s/check ps/RecordMetadata (assoc valid-metadata :partition Double/POSITIVE_INFINITY)) =not=> nil))) 99 | 100 | ;;TODO: more tests 101 | (facts 102 | "ProducerConfig must pass schema validation." 103 | (let [pc {:bootstrap.servers ["127.0.0.1:2181"]}] 104 | (fact 105 | "Valid producer configurations pass schema validation." 106 | (s/check ps/ProducerConfig pc) => nil) 107 | (fact 108 | "Invalid producer configurations fail schema validation." 109 | (s/check ps/ProducerConfig {}) =not=> nil 110 | (s/check ps/ProducerConfig (assoc pc :format.harddrive true)) =not=> nil 111 | (s/check ps/ProducerConfig (dissoc pc :bootstrap.servers)) =not=> nil 112 | (s/check ps/ProducerConfig (assoc pc :bootstrap.servers [])) =not=> nil 113 | (s/check ps/ProducerConfig (assoc pc :bootstrap.servers nil)) =not=> nil))) 114 | -------------------------------------------------------------------------------- /test/franzy/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.core-test 2 | (:require [midje.sweet :refer :all])) 3 | -------------------------------------------------------------------------------- /test/franzy/serialization/serialization_tests.clj: -------------------------------------------------------------------------------- 1 | (ns franzy.serialization.serialization-tests 2 | (:require [midje.sweet :refer :all] 3 | [franzy.serialization.serializers :as serializers] 4 | [franzy.serialization.deserializers :as deserializers]) 5 | (:import (org.apache.kafka.common.serialization Deserializer Serializer))) 6 | 7 | (facts 8 | "String serializers serialize strings." :serializers 9 | (let [serializer (serializers/string-serializer) 10 | deserializer (deserializers/string-deserializer) 11 | topic "wild-kingdom" 12 | data "A quick brown fox jumped over the fence and then was hit by a gas guzzling SUV."] 13 | (fact 14 | "A string serializer should be able to produce the same string in a round trip." :serializers 15 | (->> (.serialize serializer topic data) 16 | (.deserialize deserializer topic)) => data))) 17 | 18 | (facts 19 | "Long serializers correctly serialize longs." :serializers 20 | (let [serializer (serializers/long-serializer) 21 | deserializer (deserializers/long-deserializer) 22 | topic "machine-settings" 23 | data Long/MAX_VALUE] 24 | (fact 25 | "A long serializer should be able to produce the same long in a round trip." 26 | (->> (.serialize serializer topic data) 27 | (.deserialize deserializer topic)) => data))) 28 | 29 | (facts 30 | "Integer serializers serialize integers." :serializers 31 | (let [serializer (serializers/integer-serializer) 32 | deserializer (deserializers/integer-deserializer) 33 | topic "liters-of-carbonated-sugar" 34 | data Integer/MAX_VALUE] 35 | (fact 36 | "An integer serializer should be able to produce the same integer in a round trip." 37 | (->> (.serialize serializer topic data) 38 | (.deserialize deserializer topic)) => data))) 39 | 40 | (facts 41 | "Byte array serializers serialize byte arrays." :serializers 42 | (let [serializer (serializers/byte-array-serializer) 43 | deserializer (deserializers/byte-array-deserializer) 44 | topic "not-safe-for-work" 45 | data (byte-array 55)] 46 | (fact 47 | "A byte array serializer should be able to produce the same integer in a round trip." 48 | (->> (.serialize serializer topic data) 49 | (.deserialize deserializer topic)) => data))) 50 | 51 | ;;TODO: more tests here 52 | (facts 53 | "EDN serializers serialize EDN." :serializers 54 | (let [serializer (serializers/edn-serializer) 55 | deserializer (deserializers/edn-deserializer) 56 | simple-serializer (serializers/simple-edn-serializer) 57 | simple-deserializer (deserializers/simple-edn-deserializer) 58 | topic "coffee-talk" 59 | data {:clo-to-my-jure 60 | {:string-key "can be a string" 61 | :key-master :gatekeeper 62 | :long-key Long/MAX_VALUE 63 | :int-key Integer/MAX_VALUE 64 | :double-key Double/MAX_VALUE 65 | :short-key Short/MIN_VALUE 66 | :vector-key ["A good year is not a tire." 56.2 nil {:the-key "is bbq"}] 67 | :map-key {:nested {:cool-runnings '("alligators" "with" "nailguns" "cranially" "mounted")} 68 | :michael-stipe "loves the swim move with his hands" 69 | :have-you-a-frequency-kenneth? false 70 | :frequency-of-kenneth nil} 71 | :set-key #{"the set" "a last bastion" "of unique values" "traveling through the internets"} 72 | :list-key '("listerine" "is" "recommended" "when standing" "over" "my" "desk") 73 | :zero-key 0 74 | :nil-key nil}}] 75 | (fact 76 | "An edn serializer should be able to produce the same edn in a round trip." 77 | (->> (.serialize serializer topic data) 78 | (.deserialize deserializer topic)) => data) 79 | (fact 80 | "A simple edn serializer should be able to produce the same edn in a round trip." 81 | (->> (.serialize simple-serializer topic data) 82 | (.deserialize simple-deserializer topic)) => data) 83 | (fact 84 | "For smaller results, a simple edn serializer/deserializer should produce the same results as its big brother." 85 | (->> (.serialize simple-serializer topic data) 86 | (.deserialize simple-deserializer topic)) => (->> (.serialize serializer topic data) 87 | (.deserialize deserializer topic))) 88 | (fact 89 | "Mixing and matching edn serializers for small results should produce the same, misguided results." 90 | ;;I expect stupidity because I've met myself 91 | (->> (.serialize serializer topic data) 92 | (.deserialize simple-deserializer topic)) => data 93 | (->> (.serialize simple-serializer topic data) 94 | (.deserialize deserializer topic)) => data) 95 | (fact 96 | "Large results serialize." :high-memory 97 | ;;TODO: go bigger, but this is annoying for running tests when gc overhead limit exceeds... 98 | (let [large-data (vec (repeat 4096 Long/MAX_VALUE)) ;(vec (repeat Long/MAX_VALUE UUID/randomUUID)) 99 | ] 100 | (->> (.serialize serializer topic large-data) 101 | (.deserialize deserializer topic)) => large-data)))) 102 | 103 | ;;TODO: more tests here 104 | (facts 105 | "Keyword serializers serialize keywords." :serializers 106 | (let [serializer (serializers/keyword-serializer) 107 | deserializer (deserializers/keyword-deserializer) 108 | topic "not-safe-for-work" 109 | ;;we test a "big" keyword to be sure, because we like big 110 | ;;credit: JAMC - bside version, a soundtrack for this test - https://www.youtube.com/watch?v=rZjDdXRC5N8 111 | data :unlike-the-mole-im-not-in-a-hole-and-I-cant-see-anyway-just-like-a-doll-im-one-foot-tall-but-dolls-cant-see-anyway-the-frozen-stare-the-clothes-and-hair-these-make-me-taste-like-a-man-tied-to-a-door-chained-to-a-floor-an-hour-glass-grain-of-sand-Life-in-a-sack-Is-coming-back-Im-like-the-clock-Im-like-the-clock-Im-like-the-clock-On-the-wall-On-the-wall-On-the-wall-Swim-in-the-sea-Swim-inside-me-But-you-cant-swim-far-away-I-never-grew-Covered-up-by-you-And-nothing-grows-anyway-Life-in-a-sack-is-coming-back-Im-like-the-clock-Im-like-the-clock-Im-like-the-clock-On-the-wall-On-the-wall-On-the-wall 112 | ] 113 | (fact 114 | "A keyword serializer should be able to produce the same keyword in a round trip." 115 | (->> (.serialize serializer topic data) 116 | (.deserialize deserializer topic)) => data))) 117 | 118 | (facts 119 | "Debug serializers serialize wrap serializers." :serializers 120 | (let [serializer (serializers/edn-serializer) 121 | deserializer (deserializers/edn-deserializer) 122 | logging-fn identity 123 | debug-ser (serializers/debug-serializer logging-fn serializer) 124 | debug-deser (deserializers/debug-deserializer logging-fn deserializer) 125 | topic "not-safe-for-work" 126 | data {:recommended-combinations [["machego quince"] ["cheese" "chocolate"] ["burritos" "all the time"] 127 | ["Michael Bay movies" "garbage can"]]}] 128 | (fact 129 | "A debug serializer should proxy its data." 130 | (->> (.serialize debug-ser topic data) 131 | (.deserialize deserializer topic)) => data) 132 | (fact 133 | "A debug deserializer should proxy its data." 134 | (->> (.serialize serializer topic data) 135 | (.deserialize debug-deser topic)) => data) 136 | (fact 137 | "A debug serializer and deserializer should proxy data." 138 | (->> (.serialize debug-ser topic data) 139 | (.deserialize debug-deser topic)) => data) 140 | (fact 141 | "A debug serializer should pass its data in a map during serialization." 142 | (let [log-fn (fn [m] 143 | (:data m) => data 144 | (:serializer m) => serializer) 145 | debug-data-ser (serializers/debug-serializer log-fn serializer)] 146 | (->> (.serialize debug-data-ser topic data)))))) 147 | 148 | (facts 149 | "Custom serializers can be created by reifying and closing over Clojure functions" :serializers 150 | (fact 151 | "Custom serializers that are created wtih reify satisfy the Serializer interface." 152 | (instance? Serializer (serializers/serializer (fn [_ _]))) => true) 153 | (fact 154 | "Custom deserializers that are created wtih reify satisfy the Deserializer interface." 155 | (instance? Deserializer (deserializers/deserializer (fn [_ _]))) => true) 156 | (let [^Deserializer deserializer (deserializers/deserializer (fn [_ ^bytes data] 157 | (some-> ^bytes data (String. "UTF-8")))) 158 | ^Serializer serializer (serializers/serializer (fn [_ ^String data] 159 | (some-> data name .getBytes))) 160 | topic "shoegazers" 161 | data "https://www.youtube.com/watch?v=U2qbMP4YSu0"] 162 | (fact 163 | "A reified string serializer can round-trip serialize strings to Kafka." 164 | (->> (.serialize serializer topic data) 165 | (.deserialize deserializer topic)) => data))) 166 | --------------------------------------------------------------------------------