names = np.stringPropertyNames();
73 | for (final String name : names) {
74 | final String value = np.getProperty(name);
75 | this.set_prop(name, value);
76 | }
77 | }
78 | }
79 |
80 | /**
81 | * Transforming properties to JSON string.
82 | *
83 | * @return JSON string
84 | */
85 | public String to_json() {
86 | final Gson gson = new Gson();
87 | final String props_json = gson.toJson(this.props);
88 | return props_json;
89 | };
90 |
91 | /**
92 | * Transforming properties to JSON string
93 | *
94 | * @return pretty JSON string
95 | */
96 | public String to_json_pretty() {
97 | final Gson gson = new GsonBuilder().setPrettyPrinting().create();
98 | final String props_json = gson.toJson(this.props);
99 | return props_json;
100 | };
101 |
102 | /**
103 | * Method to set properties from JSON strings
104 | *
105 | * @param json [String] JSON string with properties in format key:value
106 | */
107 | public void from_json(final String json) {
108 | final Gson gson = new Gson();
109 | final Properties new_props = gson.fromJson(json, Properties.class);
110 | this.merge_props(new_props);
111 | };
112 |
113 | /**
114 | * Set properties from JSON strings.
115 | *
116 | * Allowing to decide whether or not properties stored already should be
117 | * overwritten.
118 | *
119 | * @param json [String] JSON string with properties in format key:value
120 | */
121 | public void from_json(final String json, final Boolean replace) {
122 |
123 | // handle replace option
124 | if (replace) {
125 | final Properties new_props = new Properties();
126 | this.props = new_props;
127 | }
128 |
129 | // parse JSON and add to this.props
130 | final Gson gson = new Gson();
131 | final Properties new_props = gson.fromJson(json, Properties.class);
132 | this.merge_props(new_props);
133 | };
134 |
135 | /**
136 | * toString replacement
137 | */
138 | @Override
139 | public String toString() {
140 | return this.to_json_pretty();
141 | };
142 |
143 |
144 | /**
145 | * Example usage
146 | *
147 | */
148 | public static void main(final String... args) throws Exception {
149 |
150 | final Kafka_props props = new Kafka_props();
151 |
152 | props.set_prop("this key", "value");
153 |
154 | System.out.println();
155 | System.out.println(props.props());
156 |
157 | System.out.println();
158 | System.out.println(props.to_json());
159 |
160 | System.out.println();
161 | System.out.println(props);
162 |
163 | props.from_json("{'test_from_json':'ok', 'test_from_json_2':'also ok'}");
164 | System.out.println();
165 | System.out.println(props);
166 | };
167 |
168 | }
169 |
--------------------------------------------------------------------------------
/man/kafka_records_class.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/kafka_records.R
3 | \name{kafka_records_class}
4 | \alias{kafka_records_class}
5 | \title{R6 Class for Kafka Records}
6 | \description{
7 | R6 Class for Kafka Records
8 |
9 | R6 Class for Kafka Records
10 | }
11 | \details{
12 | This class will handle kafka records.
13 | It allows to manage polling for new messages, retrieval of messages from JVM,
14 | local storage of message batches and iteration and forwarding messages or
15 | message batches for consumption.
16 |
17 | It abstracts storage, polling, forwarding into an iteratable interface where
18 | messages can be accessed via \code{next_record()} and \code{next_record_batch()}.
19 |
20 | The main problem here is that message consumption is not trivial for a couple
21 | of reasons: (1) The R interface has to be in sync with the Java side of things
22 | - there is an records object at the Java side.
23 | (2) Kafka does fetch messages in batches. While batches might be as small as
24 | 0 or 1 message the default is to consume messages in batches of 500. This
25 | makes consuming single messages a non trivial process since the batch size
26 | is determined by how the consumer options were set e.g. timeouts and max
27 | fetch sizes the number of available messages on the topic - all things
28 | outside the s specific call of the poll method which executes data retrieval.
29 | (3) Extra processing has to be done to translate records from Java into R.
30 | }
31 | \section{Methods}{
32 | \subsection{Public methods}{
33 | \itemize{
34 | \item \href{#method-new}{\code{kafka_records_class$new()}}
35 | \item \href{#method-next_record}{\code{kafka_records_class$next_record()}}
36 | \item \href{#method-next_record_batch}{\code{kafka_records_class$next_record_batch()}}
37 | \item \href{#method-clone}{\code{kafka_records_class$clone()}}
38 | }
39 | }
40 | \if{html}{\out{
}}
41 | \if{html}{\out{}}
42 | \if{latex}{\out{\hypertarget{method-new}{}}}
43 | \subsection{Method \code{new()}}{
44 | Create a new instance of class
45 | \subsection{Usage}{
46 | \if{html}{\out{}}\preformatted{kafka_records_class$new(parent)}\if{html}{\out{
}}
47 | }
48 |
49 | \subsection{Arguments}{
50 | \if{html}{\out{}}
51 | \describe{
52 | \item{\code{parent}}{enclosing consumer object}
53 | }
54 | \if{html}{\out{
}}
55 | }
56 | }
57 | \if{html}{\out{
}}
58 | \if{html}{\out{}}
59 | \if{latex}{\out{\hypertarget{method-next_record}{}}}
60 | \subsection{Method \code{next_record()}}{
61 | Returns the next record ready for consumption. If the last poll returned
62 | a batch of messages one of those will be returned one after another.
63 | If all of these have been returned already a new poll will be initiated.
64 |
65 | If the poll does not return any records a new poll will be initiated
66 | until data is returned.
67 | \subsection{Usage}{
68 | \if{html}{\out{}}\preformatted{kafka_records_class$next_record(timeout_ms = Inf)}\if{html}{\out{
}}
69 | }
70 |
71 | \subsection{Arguments}{
72 | \if{html}{\out{}}
73 | \describe{
74 | \item{\code{timeout_ms}}{defaults to `Inf`.
75 | Time for which poll will wait for data
76 | Passed through to kafka_consumer$poll()}
77 | }
78 | \if{html}{\out{
}}
79 | }
80 | }
81 | \if{html}{\out{
}}
82 | \if{html}{\out{}}
83 | \if{latex}{\out{\hypertarget{method-next_record_batch}{}}}
84 | \subsection{Method \code{next_record_batch()}}{
85 |
86 | \subsection{Usage}{
87 | \if{html}{\out{}}\preformatted{kafka_records_class$next_record_batch(timeout_ms = Inf)}\if{html}{\out{
}}
88 | }
89 |
90 | \subsection{Arguments}{
91 | \if{html}{\out{}}
92 | \describe{
93 | \item{\code{timeout_ms}}{defaults to `Inf`.
94 | Time for which poll will wait for data
95 | Passed through to kafka_consumer$poll()
96 |
97 | Returns all available, unconsumed messages. If no unconsumed messages
98 | are available it will poll for a new batch and return it. #'
99 |
100 | If the poll does not return any records a new poll will be initiated
101 | until data is returned.
102 |
103 | Reference to consumer object that serves as parent.
104 |
105 | Holds a batch of messages received from kafka consumer as data.frame
106 | or data.table.
107 |
108 | Records which message from local storage records is to be consumed next
109 |
110 | Use poll method on kafka consumer to get new messages.}
111 | }
112 | \if{html}{\out{
}}
113 | }
114 | }
115 | \if{html}{\out{
}}
116 | \if{html}{\out{}}
117 | \if{latex}{\out{\hypertarget{method-clone}{}}}
118 | \subsection{Method \code{clone()}}{
119 | The objects of this class are cloneable with this method.
120 | \subsection{Usage}{
121 | \if{html}{\out{}}\preformatted{kafka_records_class$clone(deep = FALSE)}\if{html}{\out{
}}
122 | }
123 |
124 | \subsection{Arguments}{
125 | \if{html}{\out{}}
126 | \describe{
127 | \item{\code{deep}}{Whether to make a deep clone.}
128 | }
129 | \if{html}{\out{
}}
130 | }
131 | }
132 | }
133 |
--------------------------------------------------------------------------------
/tests/testthat/test_kafka_consumer.R:
--------------------------------------------------------------------------------
1 | context("Basic Kafka Consumer Interaction")
2 |
3 |
4 |
5 | skip_if_kafka_on_is_missing <- function ()
6 | {
7 | if ( Sys.getenv("KAFKA_IS_ON") != 'true' ) {
8 | skip("Skipped: EnvVar 'KAFKA_IS_ON' is != 'true' ")
9 | }
10 | }
11 |
12 |
13 | test_that(
14 | desc = "Consumer seeking",
15 | code =
16 | {
17 |
18 | skip_if_kafka_on_is_missing()
19 |
20 | cns <- kafka_consumer()
21 |
22 | # single subscription
23 | cns$start()
24 | cns$topics_subscribe("test500000")
25 |
26 |
27 | # seek to beginning and read
28 | cns$topics_seek_to_beginning()
29 |
30 | # check for content retrieved
31 | d <- cns$consume_next()
32 | expect_true(!is.null(d))
33 |
34 |
35 | # consume some and seek to start again
36 | cns$consume_next()
37 | cns$consume_next()
38 |
39 | cns$topics_seek_to_beginning()
40 | expect_true(
41 | cns$topics_offsets()$offset == 0
42 | )
43 |
44 |
45 | # seek to end works
46 | cns$topics_seek_to_end()
47 | expect_true(
48 | cns$topics_offsets()$offset == 500000
49 | )
50 |
51 |
52 | # seek to start yet again
53 | cns$topics_seek_to_beginning()
54 | expect_true(
55 | cns$topics_offsets()$offset == 0
56 | )
57 |
58 |
59 |
60 | }
61 | )
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 | test_that(
70 | desc = "Start/End/Running",
71 | code =
72 | {
73 |
74 |
75 | skip_if_kafka_on_is_missing()
76 |
77 | cns <- kafka_consumer()
78 | cns$start()
79 |
80 | # running after startup?
81 | expect_true(cns$running())
82 |
83 |
84 | # not running after shutdown?
85 | expect_false(cns$end()$running())
86 |
87 | }
88 | )
89 |
90 | test_that(
91 | desc = "Props",
92 | code =
93 | {
94 |
95 |
96 | skip_if_kafka_on_is_missing()
97 |
98 | cns <- kafka_consumer()
99 | cns$start()
100 |
101 |
102 | # props exists?
103 | expect_true("list" %in% class(cns$props()))
104 | expect_true( length(cns$props()) > 0)
105 |
106 | # setting props works?
107 | cns$props(max.poll.records = 200)
108 | expect_true(cns$props()$max.poll.records == "200")
109 |
110 | # setting props via list works
111 | cns$props( .properties = list(max.poll.records = 333, a = 47) )
112 | expect_true(
113 | cns$props()$max.poll.records == "333" &
114 | cns$props()$a == "47"
115 | )
116 | }
117 | )
118 |
119 |
120 | test_that(
121 | desc = "Consumer topic list",
122 | code =
123 | {
124 |
125 | skip_if_kafka_on_is_missing()
126 |
127 | cns <- kafka_consumer()
128 | cns$start()
129 |
130 | # check if test topics are present
131 | expect_true(
132 | length(cns$topics_list()) >= 4
133 | )
134 |
135 | # check defaults to topic list names only
136 | expect_true(
137 | "character" %in% class(cns$topics_list())
138 | )
139 |
140 |
141 | # check that full=TRUE gives more infos
142 | expect_true(
143 | "list" %in% class(cns$topics_list(full = TRUE))
144 | )
145 | expect_true(
146 | "data.frame" %in% class(cns$topics_list(full = TRUE)[[1]])
147 | )
148 |
149 | }
150 | )
151 |
152 |
153 |
154 |
155 | test_that(
156 | desc = "Consumer topic subscription",
157 | code =
158 | {
159 |
160 | skip_if_kafka_on_is_missing()
161 |
162 | cns <- kafka_consumer()
163 | cns$start()
164 |
165 |
166 | # empty subscription on startup
167 | expect_true(
168 | length(cns$topics_subscription()) == 0
169 | )
170 |
171 |
172 | # single subscription
173 | cns$topics_subscribe("test3")
174 | expect_true(
175 | cns$topics_subscription() == "test3"
176 | )
177 |
178 |
179 | # replaced subscription
180 | cns$topics_subscribe(c("test", "test2"))
181 | expect_true(
182 | all(c("test", "test2") %in% cns$topics_subscription())
183 | )
184 |
185 |
186 | # no subscription at all
187 | cns$topics_subscribe(character())
188 | expect_true(
189 | length(cns$topics_subscription()) == 0
190 | )
191 |
192 | }
193 | )
194 |
195 |
196 |
197 |
198 |
199 | test_that(
200 | desc = "Consumer polling for messages",
201 | code =
202 | {
203 |
204 | skip_if_kafka_on_is_missing()
205 |
206 | cns <- kafka_consumer()
207 | cns$start()
208 |
209 |
210 | # consume messages and expect timout to not significantly be crossed
211 | cns$topics_subscribe("test500000")
212 |
213 | for ( i in 1:100 ){
214 | expect_true(as.numeric(system.time(cns$poll(1000))["elapsed"]) < 1.1 )
215 | }
216 |
217 | for ( i in 1:100 ){
218 | expect_true(
219 | as.numeric(system.time(cns$poll(100))["elapsed"]) < 0.2
220 | )
221 | }
222 |
223 |
224 | # use commit and expect no error
225 | for ( i in 1:100 ){
226 |
227 | cns$props()
228 | cns$topics_offsets()
229 | cns$poll(100)
230 | cns$commit()
231 | cns$commit(sync = FALSE)
232 | cns$topics_offsets()
233 |
234 | }
235 | expect_true(TRUE)
236 |
237 | }
238 | )
239 |
240 |
241 |
242 |
--------------------------------------------------------------------------------
/tests/testthat/test_kafka.R:
--------------------------------------------------------------------------------
1 | context("2 Basic Kafka Interaction")
2 |
3 |
4 |
5 | skip_if_kafka_on_is_missing <- function ()
6 | {
7 | if ( Sys.getenv("KAFKA_IS_ON") != 'true' ) {
8 | skip("Skipped: EnvVar 'KAFKA_IS_ON' is != 'true' ")
9 | }
10 | }
11 |
12 |
13 | test_that(
14 | desc = "Consumer seeking",
15 | code =
16 | {
17 |
18 | skip_if_kafka_on_is_missing()
19 |
20 | cns <- kafka_consumer()
21 |
22 | # single subscription
23 | cns$start()
24 | cns$topics_subscribe("test500000")
25 |
26 |
27 | # seek to beginning and read
28 | cns$topics_seek_to_beginning()
29 |
30 | # check for content retrieved
31 | d <- cns$consume_next()
32 | expect_true(!is.null(d))
33 |
34 |
35 | # consume some and seek to start again
36 | cns$consume_next()
37 | cns$consume_next()
38 |
39 | cns$topics_seek_to_beginning()
40 | expect_true(
41 | cns$topics_offsets()$offset == 0
42 | )
43 |
44 |
45 | # seek to end works
46 | cns$topics_seek_to_end()
47 | expect_true(
48 | cns$topics_offsets()$offset == 500000
49 | )
50 |
51 |
52 | # seek to start yet again
53 | cns$topics_seek_to_beginning()
54 | expect_true(
55 | cns$topics_offsets()$offset == 0
56 | )
57 |
58 |
59 |
60 | }
61 | )
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 | test_that(
70 | desc = "Start/End/Running",
71 | code =
72 | {
73 |
74 |
75 | skip_if_kafka_on_is_missing()
76 |
77 | cns <- kafka_consumer()
78 | cns$start()
79 |
80 | # running after startup?
81 | expect_true(cns$running())
82 |
83 |
84 | # not running after shutdown?
85 | expect_false(cns$end()$running())
86 |
87 | }
88 | )
89 |
90 | test_that(
91 | desc = "Props",
92 | code =
93 | {
94 |
95 |
96 | skip_if_kafka_on_is_missing()
97 |
98 | cns <- kafka_consumer()
99 | cns$start()
100 |
101 |
102 | # props exists?
103 | expect_true("list" %in% class(cns$props()))
104 | expect_true( length(cns$props()) > 0)
105 |
106 | # setting props works?
107 | cns$props(max.poll.records = 200)
108 | expect_true(cns$props()$max.poll.records == "200")
109 |
110 | # setting props via list works
111 | cns$props( .properties = list(max.poll.records = 333, a = 47) )
112 | expect_true(
113 | cns$props()$max.poll.records == "333" &
114 | cns$props()$a == "47"
115 | )
116 | }
117 | )
118 |
119 |
120 | test_that(
121 | desc = "Consumer topic list",
122 | code =
123 | {
124 |
125 | skip_if_kafka_on_is_missing()
126 |
127 | cns <- kafka_consumer()
128 | cns$start()
129 |
130 | # check if test topics are present
131 | expect_true(
132 | length(cns$topics_list()) >= 4
133 | )
134 |
135 | # check defaults to topic list names only
136 | expect_true(
137 | "character" %in% class(cns$topics_list())
138 | )
139 |
140 |
141 | # check that full=TRUE gives more infos
142 | expect_true(
143 | "list" %in% class(cns$topics_list(full = TRUE))
144 | )
145 | expect_true(
146 | "data.frame" %in% class(cns$topics_list(full = TRUE)[[1]])
147 | )
148 |
149 | }
150 | )
151 |
152 |
153 |
154 |
155 | test_that(
156 | desc = "Consumer topic subscription",
157 | code =
158 | {
159 |
160 | skip_if_kafka_on_is_missing()
161 |
162 | cns <- kafka_consumer()
163 | cns$start()
164 |
165 |
166 | # empty subscription on startup
167 | expect_true(
168 | length(cns$topics_subscription()) == 0
169 | )
170 |
171 |
172 | # single subscription
173 | cns$topics_subscribe("test3")
174 | expect_true(
175 | cns$topics_subscription() == "test3"
176 | )
177 |
178 |
179 | # replaced subscription
180 | cns$topics_subscribe(c("test", "test2"))
181 | expect_true(
182 | all(c("test", "test2") %in% cns$topics_subscription())
183 | )
184 |
185 |
186 | # no subscription at all
187 | cns$topics_subscribe(character())
188 | expect_true(
189 | length(cns$topics_subscription()) == 0
190 | )
191 |
192 | }
193 | )
194 |
195 |
196 |
197 |
198 |
199 | test_that(
200 | desc = "Consumer polling for messages",
201 | code =
202 | {
203 |
204 | skip_if_kafka_on_is_missing()
205 |
206 | cns <- kafka_consumer()
207 | cns$start()
208 |
209 |
210 | # consume messages and expect timout to not significantly be crossed
211 | cns$topics_subscribe("test500000")
212 |
213 | for ( i in 1:100 ){
214 | expect_true(as.numeric(system.time(cns$poll(1000))["elapsed"]) < 1.1 )
215 | }
216 |
217 | for ( i in 1:100 ){
218 | expect_true(
219 | as.numeric(system.time(cns$poll(100))["elapsed"]) < 0.2
220 | )
221 | }
222 |
223 |
224 | # use commit and expect no error
225 | for ( i in 1:100 ){
226 |
227 | cns$props()
228 | cns$topics_offsets()
229 | cns$poll(100)
230 | cns$commit()
231 | cns$topics_offsets()
232 |
233 | }
234 | expect_true(TRUE)
235 |
236 | }
237 | )
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
--------------------------------------------------------------------------------
/R/kafka_producer_class.R:
--------------------------------------------------------------------------------
1 |
2 | #' R6 Class for Kafka Producer
3 | #'
4 | #' @import data.table
5 | #' @import jsonlite
6 | #' @import rJava
7 | #'
8 | #' @export
9 | #'
10 | kafka_producer_class <-
11 | R6::R6Class(
12 |
13 | #### options ###############################################################
14 | classname = "kafka_producer_class",
15 |
16 | portable = TRUE,
17 |
18 | parent_env = asNamespace("kafkaesque"),
19 |
20 |
21 |
22 |
23 | #### public ################################################################
24 |
25 | public =
26 | list(
27 |
28 | ## data ################################################################
29 |
30 | #' @field java_producer reference to the underlying Java object
31 | java_producer = list(),
32 |
33 |
34 |
35 | ## methods #############################################################
36 |
37 | #' Initialize
38 | #'
39 | #' @description
40 | #' Create a new producer object.
41 | #' Instead of \code{kafka_producer_class$new()} one can use \code{kafka_producer()}
42 | #'
43 | #' @return \code{self} for method chaining
44 | #'
45 | initialize =
46 | function() {
47 | self$java_producer <- rJava::.jnew("kafkaesque/Kafka_producer")
48 | },
49 |
50 |
51 | #'
52 | #' @description
53 | #' Code run when object is removed from session
54 | #'
55 | finalize =
56 | function() {
57 | self$java_producer$end()
58 | },
59 |
60 |
61 | #'
62 | #' @description
63 | #' Spin up producer and connect it to Kafka cluster
64 | #'
65 | #' @return \code{self} for method chaining
66 | #'
67 | start =
68 | function() {
69 | self$java_producer$start()
70 |
71 | # return for method chaining
72 | invisible(self)
73 | },
74 |
75 |
76 | #'
77 | #' @description
78 | #' Disconnect producer from Kafka cluster
79 | #'
80 | #' @return \code{self} for method chaining
81 | #'
82 | end =
83 | function() {
84 | self$java_producer$end()
85 |
86 | # return for method chaining
87 | invisible(self)
88 | },
89 |
90 | #'
91 | #' @description
92 | #' Disconnect and reconnect producer from Kafka cluster
93 | #'
94 | #' @return \code{self} for method chaining
95 | #'
96 | restart =
97 | function() {
98 | self$java_producer$restart()
99 |
100 | # return for method chaining
101 | invisible(self)
102 | },
103 |
104 |
105 | #'
106 | #' @description
107 | #' Whether or not producer is active (has been started or not)
108 | #'
109 | #' @return TRUE/FALSE
110 | #'
111 | running =
112 | function() {
113 | self$java_producer$running
114 | },
115 |
116 |
117 |
118 | #'
119 | #' @param ... a series of properties provided as \code{key = "values"}
120 | #' @param .properties a list of properties provided as \code{.properties = list(key = "values", ...)}
121 | #'
122 | #' @description
123 | #' Retrieving current current set of properties.
124 | #' If properties are supplied via props parameter thos properties will
125 | #' be set.
126 | #'
127 | #' @return returns a list of properties
128 | #'
129 | #'
130 | props =
131 | function(..., .properties = NULL) {
132 |
133 | # ? set properties
134 | if ( !is.null(.properties) ){
135 | self$java_producer$props_set(
136 | .jcastToArray(names(.properties)),
137 | .jcastToArray(format(.properties, scientific = FALSE))
138 | )
139 | } else if ( length(list(...)) > 0 ){
140 | .properties <- list(...)
141 | self$java_producer$props_set(
142 | .jcastToArray(names(.properties)),
143 | .jcastToArray(format(.properties, scientific = FALSE))
144 | )
145 | }
146 |
147 | # return properties
148 | jsonlite::fromJSON(
149 | iconv(
150 | x = self$java_producer$props$to_json(),
151 | to = "UTF-8"
152 | )
153 | )
154 | },
155 |
156 | #'
157 | #'
158 | #' @param topic topic to send messaged to
159 | #' @param msg message to be send or character vector of messages to be send
160 | #' @param key TBD
161 | #' @param partition TBD
162 | #'
163 | #' @description
164 | #'
165 | #' @return returns self for method chaining
166 | #'
167 | #'
168 | send =
169 | function(topic, msg, key = NULL, partition = NULL){
170 |
171 | #### TODO
172 | if( !is.null(key) || !is.null(partition)){
173 | message("TBD")
174 | return()
175 | }
176 |
177 | # send
178 | self$java_producer$send_message(topic, msg)
179 | }
180 |
181 | ),
182 |
183 | #### private ###############################################################
184 |
185 | private = NULL
186 | )
187 |
--------------------------------------------------------------------------------
/man/kafka_producer_class.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/kafka_producer_class.R
3 | \name{kafka_producer_class}
4 | \alias{kafka_producer_class}
5 | \title{R6 Class for Kafka Producer}
6 | \description{
7 | R6 Class for Kafka Producer
8 |
9 | R6 Class for Kafka Producer
10 | }
11 | \section{Public fields}{
12 | \if{html}{\out{}}
13 | \describe{
14 | \item{\code{java_producer}}{reference to the underlying Java object
15 | Initialize}
16 | }
17 | \if{html}{\out{
}}
18 | }
19 | \section{Methods}{
20 | \subsection{Public methods}{
21 | \itemize{
22 | \item \href{#method-new}{\code{kafka_producer_class$new()}}
23 | \item \href{#method-finalize}{\code{kafka_producer_class$finalize()}}
24 | \item \href{#method-start}{\code{kafka_producer_class$start()}}
25 | \item \href{#method-end}{\code{kafka_producer_class$end()}}
26 | \item \href{#method-restart}{\code{kafka_producer_class$restart()}}
27 | \item \href{#method-running}{\code{kafka_producer_class$running()}}
28 | \item \href{#method-props}{\code{kafka_producer_class$props()}}
29 | \item \href{#method-send}{\code{kafka_producer_class$send()}}
30 | \item \href{#method-clone}{\code{kafka_producer_class$clone()}}
31 | }
32 | }
33 | \if{html}{\out{
}}
34 | \if{html}{\out{}}
35 | \if{latex}{\out{\hypertarget{method-new}{}}}
36 | \subsection{Method \code{new()}}{
37 | Create a new producer object.
38 | Instead of \code{kafka_producer_class$new()} one can use \code{kafka_producer()}
39 | \subsection{Usage}{
40 | \if{html}{\out{}}\preformatted{kafka_producer_class$new()}\if{html}{\out{
}}
41 | }
42 |
43 | \subsection{Returns}{
44 | \code{self} for method chaining
45 | }
46 | }
47 | \if{html}{\out{
}}
48 | \if{html}{\out{}}
49 | \if{latex}{\out{\hypertarget{method-finalize}{}}}
50 | \subsection{Method \code{finalize()}}{
51 | Code run when object is removed from session
52 | \subsection{Usage}{
53 | \if{html}{\out{}}\preformatted{kafka_producer_class$finalize()}\if{html}{\out{
}}
54 | }
55 |
56 | }
57 | \if{html}{\out{
}}
58 | \if{html}{\out{}}
59 | \if{latex}{\out{\hypertarget{method-start}{}}}
60 | \subsection{Method \code{start()}}{
61 | Spin up producer and connect it to Kafka cluster
62 | \subsection{Usage}{
63 | \if{html}{\out{}}\preformatted{kafka_producer_class$start()}\if{html}{\out{
}}
64 | }
65 |
66 | \subsection{Returns}{
67 | \code{self} for method chaining
68 | }
69 | }
70 | \if{html}{\out{
}}
71 | \if{html}{\out{}}
72 | \if{latex}{\out{\hypertarget{method-end}{}}}
73 | \subsection{Method \code{end()}}{
74 | Disconnect producer from Kafka cluster
75 | \subsection{Usage}{
76 | \if{html}{\out{}}\preformatted{kafka_producer_class$end()}\if{html}{\out{
}}
77 | }
78 |
79 | \subsection{Returns}{
80 | \code{self} for method chaining
81 | }
82 | }
83 | \if{html}{\out{
}}
84 | \if{html}{\out{}}
85 | \if{latex}{\out{\hypertarget{method-restart}{}}}
86 | \subsection{Method \code{restart()}}{
87 | Disconnect and reconnect producer from Kafka cluster
88 | \subsection{Usage}{
89 | \if{html}{\out{}}\preformatted{kafka_producer_class$restart()}\if{html}{\out{
}}
90 | }
91 |
92 | \subsection{Returns}{
93 | \code{self} for method chaining
94 | }
95 | }
96 | \if{html}{\out{
}}
97 | \if{html}{\out{}}
98 | \if{latex}{\out{\hypertarget{method-running}{}}}
99 | \subsection{Method \code{running()}}{
100 | Whether or not producer is active (has been started or not)
101 | \subsection{Usage}{
102 | \if{html}{\out{}}\preformatted{kafka_producer_class$running()}\if{html}{\out{
}}
103 | }
104 |
105 | \subsection{Returns}{
106 | TRUE/FALSE
107 | }
108 | }
109 | \if{html}{\out{
}}
110 | \if{html}{\out{}}
111 | \if{latex}{\out{\hypertarget{method-props}{}}}
112 | \subsection{Method \code{props()}}{
113 | Retrieving current current set of properties.
114 | If properties are supplied via props parameter thos properties will
115 | be set.
116 | \subsection{Usage}{
117 | \if{html}{\out{}}\preformatted{kafka_producer_class$props(..., .properties = NULL)}\if{html}{\out{
}}
118 | }
119 |
120 | \subsection{Arguments}{
121 | \if{html}{\out{}}
122 | \describe{
123 | \item{\code{...}}{a series of properties provided as \code{key = "values"}}
124 |
125 | \item{\code{.properties}}{a list of properties provided as \code{.properties = list(key = "values", ...)}}
126 | }
127 | \if{html}{\out{
}}
128 | }
129 | \subsection{Returns}{
130 | returns a list of properties
131 | }
132 | }
133 | \if{html}{\out{
}}
134 | \if{html}{\out{}}
135 | \if{latex}{\out{\hypertarget{method-send}{}}}
136 | \subsection{Method \code{send()}}{
137 |
138 | \subsection{Usage}{
139 | \if{html}{\out{}}\preformatted{kafka_producer_class$send(topic, msg, key = NULL, partition = NULL)}\if{html}{\out{
}}
140 | }
141 |
142 | \subsection{Arguments}{
143 | \if{html}{\out{}}
144 | \describe{
145 | \item{\code{topic}}{topic to send messaged to}
146 |
147 | \item{\code{msg}}{message to be send or character vector of messages to be send}
148 |
149 | \item{\code{key}}{TBD}
150 |
151 | \item{\code{partition}}{TBD}
152 | }
153 | \if{html}{\out{
}}
154 | }
155 | \subsection{Returns}{
156 | returns self for method chaining
157 | }
158 | }
159 | \if{html}{\out{
}}
160 | \if{html}{\out{}}
161 | \if{latex}{\out{\hypertarget{method-clone}{}}}
162 | \subsection{Method \code{clone()}}{
163 | The objects of this class are cloneable with this method.
164 | \subsection{Usage}{
165 | \if{html}{\out{}}\preformatted{kafka_producer_class$clone(deep = FALSE)}\if{html}{\out{
}}
166 | }
167 |
168 | \subsection{Arguments}{
169 | \if{html}{\out{}}
170 | \describe{
171 | \item{\code{deep}}{Whether to make a deep clone.}
172 | }
173 | \if{html}{\out{
}}
174 | }
175 | }
176 | }
177 |
--------------------------------------------------------------------------------
/R/kafka_admin_class.R:
--------------------------------------------------------------------------------
1 |
2 | #' R6 Class for Kafka Admin
3 | #'
4 | #' @import data.table
5 | #' @import jsonlite
6 | #' @import rJava R6
7 | #'
8 | #' @export
9 | #'
10 | kafka_admin_class <-
11 | R6::R6Class(
12 |
13 | #### options ###############################################################
14 | classname = "kafka_admin_class",
15 |
16 | portable = TRUE,
17 |
18 | parent_env = asNamespace("kafkaesque"),
19 |
20 |
21 |
22 |
23 | #### public ################################################################
24 |
25 | public =
26 | list(
27 |
28 | ## data ################################################################
29 |
30 | #' @field java_admin reference to the underlying Java object
31 | java_admin = list(),
32 |
33 |
34 |
35 | ## methods #############################################################
36 |
37 | #' Initialize
38 | #'
39 | #' @description
40 | #' Create a new admin object.
41 | #' Instead of \code{kafka_admin_class$new()} one can use \code{kafka_admin()}
42 | #'
43 | #' @return \code{self} for method chaining
44 | #'
45 | initialize =
46 | function() {
47 | self$java_admin <- rJava::.jnew("kafkaesque/Kafka_admin")
48 | },
49 |
50 |
51 | #'
52 | #' @description
53 | #' Code run when object is removed from session
54 | #'
55 | finalize =
56 | function() {
57 | self$java_admin$end()
58 | },
59 |
60 |
61 | #'
62 | #' @description
63 | #' Spin up admin and connect it to Kafka cluster
64 | #'
65 | #' @return \code{self} for method chaining
66 | #'
67 | start =
68 | function() {
69 | self$java_admin$start()
70 |
71 | # return for method chaining
72 | invisible(self)
73 | },
74 |
75 |
76 | #'
77 | #' @description
78 | #' Disconnect admin from Kafka cluster
79 | #'
80 | #' @return \code{self} for method chaining
81 | #'
82 | end =
83 | function() {
84 | self$java_admin$end()
85 |
86 | # return for method chaining
87 | invisible(self)
88 | },
89 |
90 | #'
91 | #' @description
92 | #' Disconnect and reconnect admin from Kafka cluster
93 | #'
94 | #' @return \code{self} for method chaining
95 | #'
96 | restart =
97 | function() {
98 | self$java_admin$restart()
99 |
100 | # return for method chaining
101 | invisible(self)
102 | },
103 |
104 |
105 | #'
106 | #' @description
107 | #' Whether or not admin is active (has been started or not)
108 | #'
109 | #' @return TRUE/FALSE
110 | #'
111 | running =
112 | function() {
113 | self$java_admin$running
114 | },
115 |
116 |
117 |
118 | #'
119 | #' @param ... a series of properties provided as \code{key = "values"}
120 | #' @param .properties a list of properties provided as \code{.properties = list(key = "values", ...)}
121 | #'
122 | #' @description
123 | #' Retrieving current current set of properties.
124 | #' If properties are supplied via props parameter thos properties will
125 | #' be set.
126 | #'
127 | #' @return returns a list of properties
128 | #'
129 | #'
130 | props =
131 | function(..., .properties = NULL) {
132 |
133 | # ? set properties
134 | if ( !is.null(.properties) ){
135 | self$java_admin$props_set(
136 | .jcastToArray(names(.properties)),
137 | .jcastToArray(format(.properties, scientific = FALSE))
138 | )
139 | } else if ( length(list(...)) > 0 ){
140 | .properties <- list(...)
141 | self$java_admin$props_set(
142 | .jcastToArray(names(.properties)),
143 | .jcastToArray(format(.properties, scientific = FALSE))
144 | )
145 | }
146 |
147 | # return properties
148 | jsonlite::fromJSON(
149 | iconv(
150 | x = self$java_admin$props$to_json(),
151 | to = "UTF-8"
152 | )
153 | )
154 | },
155 |
156 | #'
157 | #' @description
158 | #' Query and return list of topics.
159 | #'
160 | #'
161 | #' @return returns a character vector of topics
162 | #'
163 | #'
164 | topics_list =
165 | function() {
166 | self$java_admin$topics_list()
167 | },
168 |
169 |
170 | #'
171 | #' @param topic the topic names to create
172 | #' @param partition the topics number of partitions
173 | #' @param replication_factor the topics replication factor
174 | #'
175 | #' @description
176 | #' Create new topics
177 | #'
178 | #'
179 | #' @return returns a character vector of topics
180 | #'
181 | #'
182 | topics_create =
183 | function(topic, partition = 1, replication_factor = 1) {
184 | self$java_admin$topics_create(
185 | topic = topic,
186 | partitions = as.integer(partition),
187 | replication_factor = as.integer(replication_factor)
188 | )
189 | },
190 |
191 |
192 | #'
193 | #' @param topic
194 | #'
195 | #' @description
196 | #' Delete topics
197 | #'
198 | #'
199 | #' @return returns a character vector of topics
200 | #'
201 | #'
202 | topics_delete =
203 | function(topic) {
204 | self$java_admin$topics_delete(topic)
205 | }
206 |
207 |
208 |
209 | ),
210 |
211 | #### private ###############################################################
212 |
213 | private = NULL
214 | )
215 |
--------------------------------------------------------------------------------
/R/kafka_records.R:
--------------------------------------------------------------------------------
1 | #' R6 Class for Kafka Records
2 | #'
3 | #' This class will handle kafka records.
4 | #' It allows to manage polling for new messages, retrieval of messages from JVM,
5 | #' local storage of message batches and iteration and forwarding messages or
6 | #' message batches for consumption.
7 | #'
8 | #' It abstracts storage, polling, forwarding into an iteratable interface where
9 | #' messages can be accessed via \code{next_record()} and \code{next_record_batch()}.
10 | #'
11 | #' The main problem here is that message consumption is not trivial for a couple
12 | #' of reasons: (1) The R interface has to be in sync with the Java side of things
13 | #' - there is an records object at the Java side.
14 | #' (2) Kafka does fetch messages in batches. While batches might be as small as
15 | #' 0 or 1 message the default is to consume messages in batches of 500. This
16 | #' makes consuming single messages a non trivial process since the batch size
17 | #' is determined by how the consumer options were set e.g. timeouts and max
18 | #' fetch sizes the number of available messages on the topic - all things
19 | #' outside the s specific call of the poll method which executes data retrieval.
20 | #' (3) Extra processing has to be done to translate records from Java into R.
21 | #'
22 | #' @import data.table
23 | #' @import rJava
24 | #'
25 | #'
26 | kafka_records_class <-
27 | R6::R6Class(
28 |
29 | #### options ###############################################################
30 | classname = "kafka_class_consumer",
31 | portable = TRUE,
32 | parent_env = asNamespace("kafkaesque"),
33 |
34 | #### public ###############################################################
35 |
36 | public = list(
37 |
38 | #'
39 | #'
40 | #' @param parent enclosing consumer object
41 | #'
42 | #' @description
43 | #'
44 | #' Create a new instance of class
45 | #'
46 | #'
47 | initialize =
48 | function ( parent ) {
49 | private$parent <- parent
50 | },
51 |
52 |
53 |
54 | #'
55 | #' @param timeout_ms defaults to `Inf`.
56 | #' Time for which poll will wait for data
57 | #' Passed through to kafka_consumer$poll()
58 | #'
59 | #' @description
60 | #'
61 | #' Returns the next record ready for consumption. If the last poll returned
62 | #' a batch of messages one of those will be returned one after another.
63 | #' If all of these have been returned already a new poll will be initiated.
64 | #'
65 | #' If the poll does not return any records a new poll will be initiated
66 | #' until data is returned.
67 | #'
68 | next_record =
69 | function(timeout_ms = Inf){
70 |
71 | # need to get new records or not?
72 | while ( nrow(private$records) == 0L || private$records_pointer == nrow(private$records) ){
73 | # get new records and reset records pointer
74 | private$new_records(timeout_ms = timeout_ms)
75 | private$records_pointer <- 0L
76 | }
77 |
78 | # increase records pointer and return next item
79 | private$records_pointer <- private$records_pointer + 1L
80 | private$records[private$records_pointer, ]
81 | },
82 |
83 |
84 | #'
85 | #' @description
86 | #'
87 | #' @param timeout_ms defaults to `Inf`.
88 | #' Time for which poll will wait for data
89 | #' Passed through to kafka_consumer$poll()
90 | #'
91 | #' Returns all available, unconsumed messages. If no unconsumed messages
92 | #' are available it will poll for a new batch and return it. #'
93 | #'
94 | #' If the poll does not return any records a new poll will be initiated
95 | #' until data is returned.
96 | #'
97 | next_record_batch =
98 | function(timeout_ms = Inf){
99 |
100 | # need to get new records or not?
101 | while ( nrow(private$records) == 0L || private$records_pointer == nrow(private$records) ){
102 | # get new records and reset records pointer
103 | private$new_records(timeout_ms = timeout_ms)
104 | private$records_pointer <- 0L
105 | }
106 |
107 | # extract next batch
108 | res <- private$records[seq(from = private$records_pointer + 1L, to = nrow(private$records)), ]
109 |
110 | # increase records pointer and return next batch of items
111 | private$records_pointer <- as.integer(nrow(private$records))
112 |
113 | # return
114 | res
115 | }
116 |
117 | ),
118 |
119 |
120 |
121 | #### private ###############################################################
122 |
123 | private =
124 | list(
125 |
126 | #' Reference to consumer object that serves as parent.
127 | #'
128 | parent = list(),
129 |
130 | #' Holds a batch of messages received from kafka consumer as data.frame
131 | #' or data.table.
132 | #'
133 | records = data.frame(),
134 |
135 | #' Records which message from local storage records is to be consumed next
136 | #'
137 | records_pointer = 0L,
138 |
139 | #' Use poll method on kafka consumer to get new messages.
140 | #'
141 | new_records =
142 | function(timeout_ms){
143 |
144 | # kafka poll for new messages
145 | private$parent$poll(timeout_ms = timeout_ms )
146 |
147 | # transform collection of messages into collection of arrays to make
148 | # transformation from Java to R easier
149 | obj <- private$parent$java_consumer$records_arrays()
150 |
151 |
152 | # transform records from Java to R
153 | private$records <-
154 | data.table::as.data.table(
155 | list(
156 | topic = obj$topic,
157 | key = obj$keys,
158 | partition = obj$partitions,
159 | offset = obj$offsets,
160 | timestamp = obj$timestamp,
161 | value = obj$values,
162 | timestampType = obj$timestampType,
163 | leaderEpoch = obj$leaderEpoch,
164 | serializedKeySize = obj$serializedKeySize,
165 | serializedValueSize = obj$serializedValueSize
166 | )
167 | )
168 | }
169 | )
170 |
171 | )
172 |
173 |
174 | #' Constructor for kafka_records_class
175 | #'
176 | #' @param parent enclosing consumer object
177 | #'
178 | kafka_records <-
179 | function(parent){
180 | kafka_records_class$new(parent = parent)
181 | }
182 |
--------------------------------------------------------------------------------
/man/kafka_admin_class.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/kafka_admin_class.R
3 | \name{kafka_admin_class}
4 | \alias{kafka_admin_class}
5 | \title{R6 Class for Kafka Admin}
6 | \description{
7 | R6 Class for Kafka Admin
8 |
9 | R6 Class for Kafka Admin
10 | }
11 | \section{Public fields}{
12 | \if{html}{\out{}}
13 | \describe{
14 | \item{\code{java_admin}}{reference to the underlying Java object
15 | Initialize}
16 | }
17 | \if{html}{\out{
}}
18 | }
19 | \section{Methods}{
20 | \subsection{Public methods}{
21 | \itemize{
22 | \item \href{#method-new}{\code{kafka_admin_class$new()}}
23 | \item \href{#method-finalize}{\code{kafka_admin_class$finalize()}}
24 | \item \href{#method-start}{\code{kafka_admin_class$start()}}
25 | \item \href{#method-end}{\code{kafka_admin_class$end()}}
26 | \item \href{#method-restart}{\code{kafka_admin_class$restart()}}
27 | \item \href{#method-running}{\code{kafka_admin_class$running()}}
28 | \item \href{#method-props}{\code{kafka_admin_class$props()}}
29 | \item \href{#method-topics_list}{\code{kafka_admin_class$topics_list()}}
30 | \item \href{#method-topics_create}{\code{kafka_admin_class$topics_create()}}
31 | \item \href{#method-topics_delete}{\code{kafka_admin_class$topics_delete()}}
32 | \item \href{#method-clone}{\code{kafka_admin_class$clone()}}
33 | }
34 | }
35 | \if{html}{\out{
}}
36 | \if{html}{\out{}}
37 | \if{latex}{\out{\hypertarget{method-new}{}}}
38 | \subsection{Method \code{new()}}{
39 | Create a new admin object.
40 | Instead of \code{kafka_admin_class$new()} one can use \code{kafka_admin()}
41 | \subsection{Usage}{
42 | \if{html}{\out{}}\preformatted{kafka_admin_class$new()}\if{html}{\out{
}}
43 | }
44 |
45 | \subsection{Returns}{
46 | \code{self} for method chaining
47 | }
48 | }
49 | \if{html}{\out{
}}
50 | \if{html}{\out{}}
51 | \if{latex}{\out{\hypertarget{method-finalize}{}}}
52 | \subsection{Method \code{finalize()}}{
53 | Code run when object is removed from session
54 | \subsection{Usage}{
55 | \if{html}{\out{}}\preformatted{kafka_admin_class$finalize()}\if{html}{\out{
}}
56 | }
57 |
58 | }
59 | \if{html}{\out{
}}
60 | \if{html}{\out{}}
61 | \if{latex}{\out{\hypertarget{method-start}{}}}
62 | \subsection{Method \code{start()}}{
63 | Spin up admin and connect it to Kafka cluster
64 | \subsection{Usage}{
65 | \if{html}{\out{}}\preformatted{kafka_admin_class$start()}\if{html}{\out{
}}
66 | }
67 |
68 | \subsection{Returns}{
69 | \code{self} for method chaining
70 | }
71 | }
72 | \if{html}{\out{
}}
73 | \if{html}{\out{}}
74 | \if{latex}{\out{\hypertarget{method-end}{}}}
75 | \subsection{Method \code{end()}}{
76 | Disconnect admin from Kafka cluster
77 | \subsection{Usage}{
78 | \if{html}{\out{}}\preformatted{kafka_admin_class$end()}\if{html}{\out{
}}
79 | }
80 |
81 | \subsection{Returns}{
82 | \code{self} for method chaining
83 | }
84 | }
85 | \if{html}{\out{
}}
86 | \if{html}{\out{}}
87 | \if{latex}{\out{\hypertarget{method-restart}{}}}
88 | \subsection{Method \code{restart()}}{
89 | Disconnect and reconnect admin from Kafka cluster
90 | \subsection{Usage}{
91 | \if{html}{\out{}}\preformatted{kafka_admin_class$restart()}\if{html}{\out{
}}
92 | }
93 |
94 | \subsection{Returns}{
95 | \code{self} for method chaining
96 | }
97 | }
98 | \if{html}{\out{
}}
99 | \if{html}{\out{}}
100 | \if{latex}{\out{\hypertarget{method-running}{}}}
101 | \subsection{Method \code{running()}}{
102 | Whether or not admin is active (has been started or not)
103 | \subsection{Usage}{
104 | \if{html}{\out{}}\preformatted{kafka_admin_class$running()}\if{html}{\out{
}}
105 | }
106 |
107 | \subsection{Returns}{
108 | TRUE/FALSE
109 | }
110 | }
111 | \if{html}{\out{
}}
112 | \if{html}{\out{}}
113 | \if{latex}{\out{\hypertarget{method-props}{}}}
114 | \subsection{Method \code{props()}}{
115 | Retrieving current current set of properties.
116 | If properties are supplied via props parameter thos properties will
117 | be set.
118 | \subsection{Usage}{
119 | \if{html}{\out{}}\preformatted{kafka_admin_class$props(..., .properties = NULL)}\if{html}{\out{
}}
120 | }
121 |
122 | \subsection{Arguments}{
123 | \if{html}{\out{}}
124 | \describe{
125 | \item{\code{...}}{a series of properties provided as \code{key = "values"}}
126 |
127 | \item{\code{.properties}}{a list of properties provided as \code{.properties = list(key = "values", ...)}}
128 | }
129 | \if{html}{\out{
}}
130 | }
131 | \subsection{Returns}{
132 | returns a list of properties
133 | }
134 | }
135 | \if{html}{\out{
}}
136 | \if{html}{\out{}}
137 | \if{latex}{\out{\hypertarget{method-topics_list}{}}}
138 | \subsection{Method \code{topics_list()}}{
139 | Query and return list of topics.
140 | \subsection{Usage}{
141 | \if{html}{\out{}}\preformatted{kafka_admin_class$topics_list()}\if{html}{\out{
}}
142 | }
143 |
144 | \subsection{Returns}{
145 | returns a character vector of topics
146 | }
147 | }
148 | \if{html}{\out{
}}
149 | \if{html}{\out{}}
150 | \if{latex}{\out{\hypertarget{method-topics_create}{}}}
151 | \subsection{Method \code{topics_create()}}{
152 | Create new topics
153 | \subsection{Usage}{
154 | \if{html}{\out{}}\preformatted{kafka_admin_class$topics_create(topic, partition = 1, replication_factor = 1)}\if{html}{\out{
}}
155 | }
156 |
157 | \subsection{Arguments}{
158 | \if{html}{\out{}}
159 | \describe{
160 | \item{\code{topic}}{the topic names to create}
161 |
162 | \item{\code{partition}}{the topics number of partitions}
163 |
164 | \item{\code{replication_factor}}{the topics replication factor}
165 | }
166 | \if{html}{\out{
}}
167 | }
168 | \subsection{Returns}{
169 | returns a character vector of topics
170 | }
171 | }
172 | \if{html}{\out{
}}
173 | \if{html}{\out{}}
174 | \if{latex}{\out{\hypertarget{method-topics_delete}{}}}
175 | \subsection{Method \code{topics_delete()}}{
176 | Delete topics
177 | \subsection{Usage}{
178 | \if{html}{\out{}}\preformatted{kafka_admin_class$topics_delete(topic)}\if{html}{\out{
}}
179 | }
180 |
181 | \subsection{Arguments}{
182 | \if{html}{\out{}}
183 | \describe{
184 | \item{\code{topic}}{}
185 | }
186 | \if{html}{\out{
}}
187 | }
188 | \subsection{Returns}{
189 | returns a character vector of topics
190 | }
191 | }
192 | \if{html}{\out{
}}
193 | \if{html}{\out{}}
194 | \if{latex}{\out{\hypertarget{method-clone}{}}}
195 | \subsection{Method \code{clone()}}{
196 | The objects of this class are cloneable with this method.
197 | \subsection{Usage}{
198 | \if{html}{\out{}}\preformatted{kafka_admin_class$clone(deep = FALSE)}\if{html}{\out{
}}
199 | }
200 |
201 | \subsection{Arguments}{
202 | \if{html}{\out{}}
203 | \describe{
204 | \item{\code{deep}}{Whether to make a deep clone.}
205 | }
206 | \if{html}{\out{
}}
207 | }
208 | }
209 | }
210 |
--------------------------------------------------------------------------------
/docker/server.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one or more
2 | # contributor license agreements. See the NOTICE file distributed with
3 | # this work for additional information regarding copyright ownership.
4 | # The ASF licenses this file to You under the Apache License, Version 2.0
5 | # (the "License"); you may not use this file except in compliance with
6 | # the License. You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | # see kafka.server.KafkaConfig for additional details and defaults
17 |
18 | ############################# Server Basics #############################
19 |
20 | # The id of the broker. This must be set to a unique integer for each broker.
21 | broker.id=0
22 |
23 | ############################# Socket Server Settings #############################
24 |
25 | # The address the socket server listens on. It will get the value returned from
26 | # java.net.InetAddress.getCanonicalHostName() if not configured.
27 | # FORMAT:
28 | # listeners = listener_name://host_name:port
29 | # EXAMPLE:
30 | # listeners = PLAINTEXT://your.host.name:9092
31 | listeners = PLAINTEXT://0.0.0.0:9092
32 |
33 | # Hostname and port the broker will advertise to producers and consumers. If not set,
34 | # it uses the value for "listeners" if configured. Otherwise, it will use the value
35 | # returned from java.net.InetAddress.getCanonicalHostName().
36 | advertised.listeners=PLAINTEXT://localhost:9092
37 |
38 | # Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details
39 | #listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL
40 |
41 | # The number of threads that the server uses for receiving requests from the network and sending responses to the network
42 | num.network.threads=3
43 |
44 | # The number of threads that the server uses for processing requests, which may include disk I/O
45 | num.io.threads=8
46 |
47 | # The send buffer (SO_SNDBUF) used by the socket server
48 | socket.send.buffer.bytes=102400
49 |
50 | # The receive buffer (SO_RCVBUF) used by the socket server
51 | socket.receive.buffer.bytes=102400
52 |
53 | # The maximum size of a request that the socket server will accept (protection against OOM)
54 | socket.request.max.bytes=104857600
55 |
56 |
57 | ############################# Log Basics #############################
58 |
59 | # A comma separated list of directories under which to store log files
60 | log.dirs=/tmp/kafka-logs
61 |
62 | # The default number of log partitions per topic. More partitions allow greater
63 | # parallelism for consumption, but this will also result in more files across
64 | # the brokers.
65 | num.partitions=1
66 |
67 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown.
68 | # This value is recommended to be increased for installations with data dirs located in RAID array.
69 | num.recovery.threads.per.data.dir=1
70 |
71 | ############################# Internal Topic Settings #############################
72 | # The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state"
73 | # For anything other than development testing, a value greater than 1 is recommended to ensure availability such as 3.
74 | offsets.topic.replication.factor=1
75 | transaction.state.log.replication.factor=1
76 | transaction.state.log.min.isr=1
77 |
78 | ############################# Log Flush Policy #############################
79 |
80 | # Messages are immediately written to the filesystem but by default we only fsync() to sync
81 | # the OS cache lazily. The following configurations control the flush of data to disk.
82 | # There are a few important trade-offs here:
83 | # 1. Durability: Unflushed data may be lost if you are not using replication.
84 | # 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
85 | # 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to excessive seeks.
86 | # The settings below allow one to configure the flush policy to flush data after a period of time or
87 | # every N messages (or both). This can be done globally and overridden on a per-topic basis.
88 |
89 | # The number of messages to accept before forcing a flush of data to disk
90 | #log.flush.interval.messages=10000
91 |
92 | # The maximum amount of time a message can sit in a log before we force a flush
93 | #log.flush.interval.ms=1000
94 |
95 | ############################# Log Retention Policy #############################
96 |
97 | # The following configurations control the disposal of log segments. The policy can
98 | # be set to delete segments after a period of time, or after a given size has accumulated.
99 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
100 | # from the end of the log.
101 |
102 | # The minimum age of a log file to be eligible for deletion due to age
103 | log.retention.hours=168
104 |
105 | # A size-based retention policy for logs. Segments are pruned from the log unless the remaining
106 | # segments drop below log.retention.bytes. Functions independently of log.retention.hours.
107 | #log.retention.bytes=1073741824
108 |
109 | # The maximum size of a log segment file. When this size is reached a new log segment will be created.
110 | log.segment.bytes=1073741824
111 |
112 | # The interval at which log segments are checked to see if they can be deleted according
113 | # to the retention policies
114 | log.retention.check.interval.ms=300000
115 |
116 | ############################# Zookeeper #############################
117 |
118 | # Zookeeper connection string (see zookeeper docs for details).
119 | # This is a comma separated host:port pairs, each corresponding to a zk
120 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
121 | # You can also append an optional chroot string to the urls to specify the
122 | # root directory for all kafka znodes.
123 | zookeeper.connect=localhost:2181
124 |
125 | # Timeout in ms for connecting to zookeeper
126 | zookeeper.connection.timeout.ms=18000
127 |
128 |
129 | ############################# Group Coordinator Settings #############################
130 |
131 | # The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance.
132 | # The rebalance will be further delayed by the value of group.initial.rebalance.delay.ms as new members join the group, up to a maximum of max.poll.interval.ms.
133 | # The default value for this is 3 seconds.
134 | # We override this to 0 here as it makes for a better out-of-the-box experience for development and testing.
135 | # However, in production environments the default value of 3 seconds is more suitable as this will help to avoid unnecessary, and potentially expensive, rebalances during application startup.
136 | group.initial.rebalance.delay.ms=0
137 |
--------------------------------------------------------------------------------
/java/kafkaesque/src/main/java/kafkaesque/Kafka_admin.java:
--------------------------------------------------------------------------------
1 | package kafkaesque;
2 |
3 | import java.util.*;
4 | import java.util.concurrent.ExecutionException;
5 |
6 | import org.apache.kafka.clients.admin.*;
7 | import org.apache.kafka.common.*;
8 | import org.apache.kafka.common.acl.*;
9 | import org.apache.kafka.common.protocol.types.Field.Str;
10 |
11 |
12 |
13 | /**
14 | * Shell Object to spin up producer, change config, send messages and close it again
15 | */
16 | public class Kafka_admin {
17 |
18 | /**
19 | * Properties / Config
20 | *
21 | * Create Config with default producer settings
22 | */
23 | public Kafka_admin_props props = new Kafka_admin_props();
24 |
25 | /**
26 | * Kafka Producer
27 | *
28 | * Define Placeholder for producer to create and use later on
29 | */
30 | public AdminClient admin;
31 |
32 | /**
33 | * Properties / Config
34 | *
35 | * @param keys array of property keys
36 | * @param values array of property values
37 | *
38 | * @return returns all settings
39 | *
40 | */
41 | public Kafka_admin_props props_set(String[] keys, String[] values) {
42 |
43 | // go through settings and store them
44 | for (int i = 0; i < keys.length; i++) {
45 | this.props.set_prop(keys[i], values[i]);
46 | }
47 |
48 | // return updated state
49 | return this.props;
50 | };
51 |
52 |
53 | /**
54 | * Properties / Config
55 | *
56 | * @param keys array of property keys
57 | * @param values array of property values
58 | *
59 | * @return returns all settings
60 | *
61 | */
62 | public Kafka_admin_props props_set(String keys, String values) {
63 |
64 | // go through settings and store them
65 | this.props.set_prop(keys, values);
66 |
67 | // return updated state
68 | return this.props;
69 | };
70 |
71 |
72 | /**
73 | * Holds information as to whether or not admin id running or not ( == was started and has not
74 | * been ended)
75 | */
76 | public boolean running = false; // TODO: does running apply to admin clients or should this be
77 | // dropped???
78 |
79 |
80 | /**
81 | * Create a kafka producer object with a specific config
82 | *
83 | */
84 | public void start() {
85 | this.end();
86 | this.admin = AdminClient.create(this.props.props());
87 | this.running = true; // TODO: does running apply to admin clients or should this be dropped???
88 | }
89 |
90 |
91 | /**
92 | * Close kafka producer
93 | */
94 | public void end() {
95 | if (this.admin != null) {
96 | this.admin.close();
97 | }
98 | this.running = false; // TODO: does running apply to admin clients or should this be dropped???
99 | }
100 |
101 |
102 | /**
103 | * Close and start admin
104 | *
105 | */
106 | public void restart() {
107 | this.end();
108 | this.start();
109 | }
110 |
111 |
112 | /**
113 | * List kafka topics
114 | *
115 | *
116 | * @return string array of topic names
117 | *
118 | * @throws InterruptedException
119 | * @throws ExecutionException
120 | *
121 | */
122 | public String[] topics_list() throws InterruptedException, ExecutionException {
123 |
124 | // retrieve info
125 | ListTopicsResult list_topics_result = this.admin.listTopics();
126 |
127 | // convert info to type String[]
128 | KafkaFuture> names_future = list_topics_result.names();
129 | Set names = names_future.get();
130 | String[] topics_list = names.toArray(new String[names.size()]);
131 |
132 | // return
133 | return topics_list;
134 | }
135 |
136 |
137 | /**
138 | * Create Topics
139 | *
140 | * @see {@link Kafka_admin#topics_create(String[], int[], int[])}
141 | *
142 | */
143 | public String[] topics_create(String topic, int partitions, int replication_factor)
144 | throws InterruptedException, ExecutionException {
145 |
146 | // convert input to arrays
147 | String[] tp = {topic};
148 | int[] prt = {partitions};
149 | int[] rpl = {replication_factor};
150 |
151 | // forward input to method with array signature + return topics
152 | return this.topics_create(tp, prt, rpl);
153 | }
154 |
155 |
156 | /**
157 | * Create one or many topics
158 | *
159 | * @param topic String/String array of topic names to create
160 | * @param partitions int/int array of partitions each entry corresponding to the i-th
161 | * topic name
162 | * @param replication_factor int/int array of replication factors - the number of nodes that
163 | * should hold a copy of the data for this topic - each entry
164 | * corresponding to the i-th topic name
165 | *
166 | * @return string array of topic names
167 | *
168 | * @throws InterruptedException
169 | * @throws ExecutionException
170 | *
171 | */
172 | public String[] topics_create(String[] topic, int[] partitions, int[] replication_factor)
173 | throws InterruptedException, ExecutionException {
174 |
175 | // collect topic information
176 | Collection topics = new HashSet();
177 |
178 | for (int i = 0; i < topic.length; i++) {
179 | Integer rep_int = replication_factor[i];
180 | short rep = rep_int.shortValue();
181 | int part = partitions[i];
182 |
183 | NewTopic topic_item = new NewTopic(topic[i], part, rep);
184 | topics.add(topic_item);
185 | }
186 |
187 | // execute topic creation
188 | CreateTopicsResult res = admin.createTopics(topics);
189 | KafkaFuture all = res.all();
190 | all.get();
191 |
192 | // return topics
193 | return this.topics_list();
194 | }
195 |
196 |
197 | public String[] topics_delete(String[] topics) throws InterruptedException, ExecutionException {
198 | // transform input to type collection
199 | List topics_collection = new ArrayList(Arrays.asList(topics));
200 |
201 | // execute deletion request
202 | DeleteTopicsResult res = this.admin.deleteTopics(topics_collection);
203 | KafkaFuture all = res.all();
204 | all.get();
205 |
206 | // return topics
207 | return this.topics_list();
208 | }
209 |
210 |
211 | public String[] topics_delete(String topic) throws InterruptedException, ExecutionException {
212 | // transform input to type collection
213 | List topics_collection = new ArrayList(Arrays.asList(topic));
214 |
215 | // execute deletion request
216 | DeleteTopicsResult res = this.admin.deleteTopics(topics_collection);
217 | KafkaFuture all = res.all();
218 | all.get();
219 |
220 | // return topics
221 | return this.topics_list();
222 | }
223 |
224 |
225 |
226 |
227 | public static void main(final String... args) throws Exception {
228 | System.out.println("-----------------------------------------------------");
229 |
230 | final Kafka_admin admin = new Kafka_admin();
231 | admin.start();
232 |
233 |
234 | System.out.println(Json.to_json_pretty(admin.topics_list()));
235 | System.out.println(Json.to_json_pretty(admin.topics_create("dings", 1, 1)));
236 | System.out.println(Json.to_json_pretty(admin.topics_list()));
237 | String[] a = {"dings"};
238 | System.out.println(Json.to_json_pretty(admin.topics_delete(a)));
239 | System.out.println(Json.to_json_pretty(admin.topics_list()));
240 |
241 | admin.end();
242 |
243 | System.out.println("-----------------------------------------------------");
244 | }
245 |
246 | }
247 |
--------------------------------------------------------------------------------
/java/kafkaesque/src/main/java/kafkaesque/Kafka_Consumer.java:
--------------------------------------------------------------------------------
1 | package kafkaesque;
2 |
3 | import java.time.Duration;
4 | import java.util.*;
5 | import java.util.UUID;
6 |
7 | import org.apache.kafka.clients.consumer.ConsumerRecords;
8 | import org.apache.kafka.clients.consumer.KafkaConsumer;
9 | import org.apache.kafka.common.PartitionInfo;
10 |
11 | /**
12 | * Shell Object to spin up consumer, change config, send messages and close it again
13 | */
14 | public class Kafka_consumer {
15 |
16 | /**
17 | * Properties / Config
18 | *
19 | * Create Config with default consumer settings
20 | */
21 | public Kafka_consumer_props props = new Kafka_consumer_props(true);
22 |
23 | /**
24 | * Properties / Config
25 | *
26 | * @param keys array of property keys
27 | * @param values array of property values
28 | *
29 | * @return returns all settings
30 | *
31 | */
32 | public Kafka_consumer_props props_set(String[] keys, String[] values) {
33 |
34 | // go through settings and store them
35 | for (int i = 0; i < keys.length; i++) {
36 | this.props.set_prop(keys[i], values[i]);
37 | }
38 |
39 | // return updated state
40 | return this.props;
41 | };
42 |
43 |
44 | /**
45 | * Properties / Config
46 | *
47 | * @param keys array of property keys
48 | * @param values array of property values
49 | *
50 | * @return returns all settings
51 | *
52 | */
53 | public Kafka_consumer_props props_set(String keys, String values) {
54 |
55 | // go through settings and store them
56 | this.props.set_prop(keys, values);
57 |
58 | // return updated state
59 | return this.props;
60 | };
61 |
62 |
63 |
64 | /**
65 | * Kafka Consumer
66 | *
67 | * Define Placeholder for consumer to create and use later on
68 | */
69 | public KafkaConsumer cons;
70 |
71 |
72 |
73 | /**
74 | * Holds information as to whether or not consumer id running or not ( == was started and has not
75 | * been ended)
76 | */
77 | public boolean running = false;
78 |
79 |
80 |
81 | /**
82 | * Create a kafka consumer object with a specific config
83 | *
84 | */
85 | public void start() {
86 |
87 | // either use group id in properties object or use random value
88 | final String group_id = this.props.get_prop("group.id", UUID.randomUUID().toString());
89 |
90 | // set group id
91 | this.props.set_prop("group.id", group_id);
92 |
93 | // create new consumer
94 | this.cons = new KafkaConsumer<>(this.props.props());
95 | this.running = true;
96 | }
97 |
98 |
99 |
100 | /**
101 | * Create a kafka consumer object with a specific config
102 | *
103 | */
104 | public void restart() {
105 | this.end();
106 | this.start();
107 | }
108 |
109 |
110 | /**
111 | * Close kafka consumer
112 | */
113 | public void end() {
114 | if (this.cons != null) {
115 | this.cons.close();
116 | }
117 | this.running = false;
118 | }
119 |
120 |
121 | /**
122 | * commit all topics synchronously
123 | */
124 | public void commit_sync() {
125 | cons.commitSync();
126 | }
127 |
128 |
129 | /**
130 | * commit all topics asynchronously
131 | */
132 | public void commit_async() {
133 | cons.commitAsync();
134 | }
135 |
136 |
137 | /**
138 | * List topics
139 | */
140 | public String topics_list() {
141 | final Map> topics = cons.listTopics();
142 | return Json.to_json(topics);
143 | }
144 |
145 | public Map> topics;
146 |
147 | /**
148 | * Subscribe to topics
149 | */
150 | public String[] topics_subscribe(final String topic) {
151 | final List tpcs = Arrays.asList(topic);
152 | this.cons.subscribe(tpcs);
153 | return this.topics_subscription();
154 | }
155 |
156 |
157 | /**
158 | * Subscribe to topics
159 | */
160 | public String[] topics_subscribe(final String[] topics) {
161 | final List tpcs = Arrays.asList(topics);
162 | this.cons.subscribe(tpcs);
163 | return this.topics_subscription();
164 | }
165 |
166 |
167 | /**
168 | * Return topics subscribed to
169 | */
170 | public String[] topics_subscription() {
171 | // get current subscription and cast type set to type String[] of size set
172 | String[] str = new String[cons.subscription().size()];
173 | str = cons.subscription().toArray(str);
174 | return str;
175 | }
176 |
177 |
178 | /**
179 | * Seek to beginning of all topic(-partitions) assigned to
180 | *
181 | */
182 | public void topics_seek_to_beginning() {
183 | // seek for all topics and partitions currently assigned
184 | this.cons.seekToBeginning(this.cons.assignment());
185 | }
186 |
187 |
188 | /**
189 | * Seek to end of all topic(-partitions) assigned to
190 | *
191 | */
192 | public void topics_seek_to_end() {
193 | // seek for all topics and partitions currently assigned
194 | this.cons.seekToEnd(this.cons.assignment());
195 | }
196 |
197 |
198 | /**
199 | *
200 | *
201 | */
202 | public Kafka_offset_arrays topics_offsets() {
203 | return new Kafka_offset_arrays(this.cons);
204 | }
205 |
206 |
207 |
208 | /**
209 | * Storage for messages returned from polling
210 | */
211 | public ConsumerRecords records;
212 |
213 |
214 |
215 | /**
216 | *
217 | * Poll Kafka for new messages
218 | *
219 | */
220 | public int poll() {
221 | // poll for data
222 | this.records = this.cons.poll(Duration.ofMillis(100));
223 |
224 | // return number of messages retrieved
225 | return records.count();
226 | }
227 |
228 |
229 | /**
230 | *
231 | * Poll Kafka for new messages
232 | *
233 | */
234 | public int poll(final int timeout_ms) {
235 | // poll for data
236 | this.records = this.cons.poll(Duration.ofMillis(timeout_ms));
237 |
238 | // return number of messages retrieved
239 | return records.count();
240 | }
241 |
242 |
243 | /**
244 | *
245 | * Return current set of records as JSON string
246 | *
247 | * @return JSON string of record data
248 | *
249 | */
250 | public Kafka_record_arrays records_arrays() {
251 | Kafka_record_arrays res = new Kafka_record_arrays(this.records);
252 | return res;
253 | }
254 |
255 |
256 | /**
257 | *
258 | * Return current set of records as JSON string
259 | *
260 | * @return JSON string of record data
261 | *
262 | */
263 | public String records_json() {
264 | return Json.to_json_pretty(this.records);
265 | }
266 |
267 |
268 | /**
269 | *
270 | * Poll Kafka for new messages and print them
271 | *
272 | */
273 | public void poll_print() {
274 | this.records = this.cons.poll(Duration.ofMillis(100));
275 | this.records.forEach(record -> {
276 | System.out.println("1 Got Record: (" + record.key() + ", " + record.value() + ") at offset "
277 | + record.offset());
278 | });
279 | }
280 |
281 |
282 | public static void main(final String... args) throws Exception {
283 |
284 | System.out.println("\n\n-----------------------------------------------------");
285 | final Kafka_consumer cons = new Kafka_consumer();
286 |
287 | cons.props_set("max.poll.records", "1");
288 | cons.start();
289 | cons.topics_subscribe("test3");
290 |
291 | System.out.println(Json.to_json_pretty(cons.topics_subscription()));
292 |
293 | System.out.println("-----------------------------------------------------\n\n");
294 |
295 | cons.poll();
296 | while (cons.records.count() == 0) {
297 | cons.poll();
298 | }
299 |
300 | System.out.println(cons.records_json());
301 |
302 | System.out.println(cons.topics_offsets());
303 |
304 | System.out.println("-----------------------------------------------------\n\n");
305 | }
306 |
307 | }
308 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output: github_document
3 | ---
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 | ```{r scaffolding options, include=FALSE}
14 | github_user_name <- "petermeissner"
15 | codecov_user_name <- github_user_name
16 | travis_user_name <- github_user_name
17 | appveyor_user_name <- github_user_name
18 | ```
19 |
20 |
21 |
22 |
23 |
24 |
25 | ```{r knitr options, echo = FALSE}
26 | knitr::opts_chunk$set(
27 | fig.path = "man/figures/README-"
28 | )
29 | ```
30 |
31 |
32 | ```{r r options, include=FALSE}
33 | options("width" = 110)
34 | options(scipen = 20)
35 |
36 | # get package info
37 | tmp <- packageDescription( "kafkaesque" )
38 |
39 | package_name <- tmp$Package
40 |
41 | ```
42 |
43 | ```{r readme title, results='asis', echo=FALSE}
44 | cat("#", tmp$Title)
45 | ```
46 |
47 | ```{r, include=FALSE}
48 | md_list <-
49 | function(x, item_string = c("-", "*", "+"), item_ident = c("", " ", " ")){
50 |
51 | for ( i in seq_along(x)){
52 |
53 | cat(item_ident[1], item_string[1], " **", names(x)[[i]], "**\n", sep = "")
54 |
55 | if ( is.list(x[[i]]) & !is.null(names(x[[i]])) ){
56 |
57 | lapply(x[[i]], md_list, item_string = item_string[-1], item_ident = item_ident[-1])
58 |
59 | } else {
60 |
61 | for (k in seq_along(x[[i]])) {
62 | cat(item_ident[-1][1], item_string[-1][1], " ", x[[i]][[k]], "\n", sep = "")
63 | }
64 |
65 | }
66 | }
67 | }
68 |
69 | ```
70 |
71 |
72 |
73 | **Status**
74 |
75 | [](https://codecov.io/gh/petermeissner/kafkaesque)[](https://github.com/petermeissner/kafkaesque/actions/workflows/tests.yml)
76 |
77 |
78 | This package is - so far - feature complete and should be functional.
79 | This is a structured extract and rewrite from client work.
80 | Though core parts of the package are used in production and are 'battle tested'
81 | the package presented here is not - so far.
82 |
83 |
84 | Package has not been published to CRAN.
85 | Attempts have been made.
86 | CRAN has a policy on package size (<= 5MB) and publishing to CRAN would mean only
87 | publishing the R code without any Java dependencies.
88 | One can add an install function that downloads the necessary JAR files
89 | after installing the package from CRAN.
90 | So far no decision has been made if I should pursue this way or not.
91 |
92 |
93 |
94 |
95 |
96 | ```{r, include=FALSE}
97 | filelist.R <- list.files("R", recursive = TRUE, pattern="\\.R$", ignore.case = TRUE, full.names = TRUE)
98 | filelist.Java <- list.files("java/kafkaesque/src/main/java/kafkaesque", recursive = TRUE, pattern="\\.java$", ignore.case = TRUE, full.names = TRUE)
99 | filelist.tests <- list.files("tests", recursive = TRUE, pattern="\\.R$", ignore.case = TRUE, full.names = TRUE)
100 | filelist.cpp <- list.files("src", recursive = TRUE, pattern="\\.cpp$", ignore.case = TRUE, full.names = TRUE)
101 | lines.R <- unlist(lapply(filelist.R, readLines))
102 | lines.Java <- unlist(lapply(filelist.Java, readLines, warn=FALSE))
103 | lines.tests <- unlist(lapply(filelist.tests, readLines))
104 | lines.cpp <- unlist(lapply(filelist.cpp, readLines))
105 | length.R <- length(grep("(^\\s*$)|(^\\s*#)|(^\\s*//)", lines.R, value = TRUE, invert = TRUE))
106 | length.Java <- length(grep("(^\\s*$)|(^\\s*/*\\*)|(^\\s*#)|(^\\s*//)", lines.Java, value = TRUE, invert = TRUE))
107 | length.tests <- length(grep("(^\\s*$)|(^\\s*#)|(^\\s*//)", lines.tests, value = TRUE, invert = TRUE))
108 | length.cpp <- length(grep("(^\\s*$)|(^\\s*#)|(^\\s*//)", lines.cpp, value = TRUE, invert = TRUE))
109 | ```
110 |
111 |
112 | *lines of R code:* `r length.R`, *lines of Java code:* `r length.Java`, *lines of test code:* `r length.tests`
113 |
114 |
115 |
116 | **Version**
117 |
118 | ```{r, include=FALSE}
119 | source_files <-
120 | grep(
121 | "/R/|/src/|/tests/",
122 | list.files(recursive = TRUE, full.names = TRUE),
123 | value = TRUE
124 | )
125 | last_change <-
126 | as.character(
127 | format(max(file.info(source_files)$mtime), tz="UTC")
128 | )
129 | ```
130 |
131 |
132 | ```{r, results='asis', echo=FALSE}
133 | cat(tmp$Version, "(",last_change,"UTC )")
134 | ```
135 |
136 | **Description**
137 |
138 |
139 | ```{r, results='asis', echo=FALSE}
140 | cat(tmp$Description)
141 | ```
142 |
143 |
144 | **License**
145 |
146 | ```{r, results='asis', echo=FALSE}
147 | cat(tmp$License, "
")
148 | cat(tmp$Author)
149 | ```
150 |
151 |
152 |
153 | **Citation**
154 |
155 |
156 | ```{r, results='asis', echo=FALSE}
157 | cat("```r\n")
158 | cat("citation(\"",package_name,"\")", sep = "")
159 | cat("\n```\n")
160 | ```
161 |
162 | ```{r, results='asis', echo=FALSE}
163 | cat("```r\n")
164 | print_text <- capture.output(print(citation(package_name), style = "text"))
165 | cat(gsub("_", "", print_text))
166 | cat("\n```\n")
167 | ```
168 |
169 |
170 | **BibTex for citing**
171 |
172 | ```{r, results='asis', echo=FALSE}
173 | cat("```r\n")
174 | cat("toBibtex(citation(\"",package_name,"\"))", sep = "")
175 | cat("\n```\n")
176 | ```
177 |
178 | ```{r, results='asis', echo=FALSE}
179 | cat("```\n")
180 | cat(as.character(toBibtex(citation(package_name))), sep = "\n")
181 | cat("\n```\n")
182 | ```
183 |
184 |
185 |
186 | **Installation**
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 | Latest development version from Github:
201 |
202 |
203 | ```{r, results='asis', echo=FALSE}
204 | cat("```r\n")
205 | cat("devtools::install_github(\"petermeissner/",package_name,"\")", sep = "")
206 | cat("\n```\n")
207 | ```
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 | # Prerequisites
218 |
219 | For the package to work (more precisely do any significant work)
220 | it needs a running Kafka that can be reached over network.
221 |
222 | A simple way to get a test version and the same version used throughout the
223 | README is to run the following docker command ([Link to Dockerfile](https://github.com/petermeissner/kafkaesque/blob/master/docker/Dockerfile)).
224 |
225 | ```
226 | docker run -p 127.0.0.1:2181:2181 -p 127.0.0.1:9092:9092 petermeissner/kafkatest
227 | ```
228 |
229 |
230 |
231 |
232 |
233 |
234 | # Content
235 |
236 | ```{r}
237 | library(kafkaesque)
238 | ls("package:kafkaesque")
239 | ```
240 |
241 | # Alternatives
242 |
243 | There are no viable alternatives at the moment, that I know off.
244 |
245 | - There is **[{rkafka}](https://cran.r-project.org/web/packages/rkafka/index.html)** which was a invaluable source of inspiration to take the first steps in making R talk to Kafka. Its using the {rJava} and Kafka's Java API. Unfortuantly the code does not work with any kind of recent versions of Kafka. So basically the package is dead and no developement has happened for years.
246 | - Another attempt has been made with the **[{fRanz}](https://github.com/uptake/fRanz)** (refering to the author Franz Kafka) using a C++ library under the hood. Unfortunatly this started very promissing fast paced but then developement died as quickly.
247 |
248 |
249 | # Usage
250 |
251 |
252 | ## Consumer
253 |
254 | ### Start Consumer (... Stop, Status)
255 |
256 | ```{r}
257 |
258 | library("kafkaesque")
259 |
260 | # new consumer
261 | consumer <- kafka_consumer()
262 |
263 | # starting/connecting - + status
264 | consumer$start()
265 |
266 | consumer$running()
267 | consumer$end()$running()
268 | consumer$start()$running()
269 |
270 | ```
271 |
272 |
273 |
274 | ### Properties aka Config
275 |
276 | See here for list of consumer properties: https://kafka.apache.org/documentation/#consumerconfigs.
277 |
278 |
279 | ```{r}
280 | consumer$props()
281 | ```
282 |
283 |
284 | ```{r}
285 | consumer$props(max.poll.records = 200)
286 | ```
287 |
288 |
289 |
290 | ### Topics and Subscriptions
291 |
292 | ```{r}
293 |
294 | # list topics available to consumer
295 | consumer$topics_list()
296 |
297 | # subscribe to topic
298 | consumer$topics_subscribe("test500000")
299 | consumer$topics_subscription()
300 |
301 |
302 | ```
303 |
304 |
305 |
306 | ### Retrieving a Message
307 |
308 | ```{r}
309 |
310 | # retrieve next message
311 | consumer$consume_next()
312 |
313 | ```
314 |
315 |
316 | ### Looping over Messages and Executing Code
317 |
318 |
319 | ```{r}
320 | # loop over messages and execute code
321 | res <-
322 | consumer$consume_loop(
323 | f = function(loop_env){ print(loop_env$messages)},
324 | check = function(loop_env){loop_env$meta$loop_counter < 4},
325 | batch = TRUE
326 | )
327 |
328 | # having a look at the statistics
329 | res
330 |
331 |
332 | ```
333 |
334 | ### Looping over Batches of Messages and Executing Code
335 |
336 | ```{r}
337 |
338 | # loop over batches of messages and execute code
339 | res <-
340 | consumer$consume_loop(
341 | f = function(loop_env){ print(loop_env$messages); cat("\n")},
342 | check = function(loop_env){loop_env$meta$message_counter < 1000},
343 | batch = TRUE
344 | )
345 |
346 | res
347 |
348 | ```
349 |
350 |
351 | ### Offsets and Seeking
352 |
353 |
354 | ```{r}
355 | # get current offsets from Kafka
356 | consumer$topics_offsets()
357 |
358 | # seek to end of topics
359 | consumer$topics_seek_to_end()
360 | consumer$topics_offsets()
361 |
362 | # seek to beginning of topics
363 | consumer$topics_seek_to_beginning()
364 | consumer$topics_offsets()
365 |
366 | ```
367 |
368 |
369 |
370 | ## Producer
371 |
372 |
373 | ### Start Producer (... Stop, Status)
374 |
375 | ```{r}
376 |
377 | library("kafkaesque")
378 |
379 | # new producer
380 | producer <- kafka_producer()
381 |
382 | # starting/connecting - + status
383 | producer$start()
384 |
385 | producer$running()
386 | producer$end()$running()
387 | producer$start()$running()
388 | producer$restart()$running()
389 |
390 | ```
391 |
392 |
393 | ### Properties aka Config
394 |
395 | See here for list of consumer properties: https://kafka.apache.org/documentation/#producerconfigs.
396 |
397 |
398 | ```{r}
399 | producer$props()
400 | ```
401 |
402 |
403 | ```{r}
404 | producer$props(whatever.you.may.want.to.set = "true")
405 | ```
406 |
407 |
408 |
409 | ### Sending Messages
410 |
411 | ```{r}
412 | producer$send(topic = "test", msg = "Die Kuh macht muh.")
413 | producer$send(topic = "test", msg = "Das Schaf macht mäh.")
414 | producer$send(topic = "test", msg = "Das Pferd macht wihiiiiiiiii-pffffff.")
415 | ```
416 |
417 |
418 |
419 |
420 | ## Setting and Getting Java-LogLEvels
421 |
422 |
423 | ```{r}
424 | kafka_get_log_level()
425 |
426 | # one of off, fatal, error, warn, info, debug, trace, all
427 | kafka_set_log_level("info")
428 | producer$start()
429 |
430 |
431 | # set back to normal
432 | kafka_set_log_level("error")
433 | ```
434 |
435 |
436 |
437 |
438 | ## Admin
439 |
440 | ### Properties aka Config
441 |
442 |
443 | ```{r}
444 | admin <- kafka_admin()
445 | admin$start()
446 |
447 | admin$props()
448 | admin$props(whatever.you.may.want.to.set = "true")
449 | ```
450 |
451 |
452 | ### Get List of Topics
453 |
454 | ```{r}
455 | admin$topics_list()
456 | ```
457 |
458 |
459 |
460 | ### Create Topics
461 |
462 | ```{r}
463 | admin$topics_list()
464 |
465 | topics <- c("chuckle", "chit_chat")
466 | admin$topics_create(
467 | topic = topics,
468 | partition = c(1L, 1L),
469 | replication_factor = c(1L, 1L)
470 | )
471 | ```
472 |
473 |
474 |
475 | ### Delete Topics
476 |
477 |
478 |
479 | ```{r}
480 | admin$topics_delete(topics)
481 | ```
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 | # Developement Notes
490 |
491 | For R development Rstudio was used. For Java development Visual Studio Code
492 | lend a helping hand with Maven as build tooling.
493 |
494 | For development one packages is needed:
495 |
496 | - {kafkaesque} contains all the R functions, classes and methods and also all
497 | Java code that is specific to this project (and not just a dependency)
498 |
499 | Java sources are in `./java/kafkaesque/` folder - so your Java project should
500 | take this as root folder.
501 | Building Java sources can be
502 | done via Maven: `mvn install` will compile everything and copy the `kafkaesque.jar`
503 | into the package's `./inst/java/` folder with all its java dependencies.
504 |
505 | After Java compilation, the R packages has to be (re-)build and (re-)installed
506 | (with most likely re-starting the R session frist (Ctrl-Shift-F10 in Rstudio)).
507 |
508 | If developing Java in VScode - as I did here - pressing Ctr-Shift-B should allow
509 | to select the two most important tasks: resolving dependencies
510 | and compiling the Java code and distributing it
511 | to the right places as described above.
512 |
513 |
514 |
515 |
516 |
--------------------------------------------------------------------------------
/man/kafka_consumer_class.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/kafka_consumer_class.R
3 | \name{kafka_consumer_class}
4 | \alias{kafka_consumer_class}
5 | \title{R6 Class for Kafka Consumer}
6 | \description{
7 | R6 Class for Kafka Consumer
8 |
9 | R6 Class for Kafka Consumer
10 | }
11 | \section{Public fields}{
12 | \if{html}{\out{}}
13 | \describe{
14 | \item{\code{java_consumer}}{reference to the underlying Java object
15 | Initialize}
16 |
17 | \item{\code{records}}{If poll() did fetch any messages, they are stored here until the
18 | next call to poll().
19 |
20 | Note: Consider using consume methods instead of managing records on your own.}
21 | }
22 | \if{html}{\out{
}}
23 | }
24 | \section{Methods}{
25 | \subsection{Public methods}{
26 | \itemize{
27 | \item \href{#method-new}{\code{kafka_consumer_class$new()}}
28 | \item \href{#method-finalize}{\code{kafka_consumer_class$finalize()}}
29 | \item \href{#method-start}{\code{kafka_consumer_class$start()}}
30 | \item \href{#method-end}{\code{kafka_consumer_class$end()}}
31 | \item \href{#method-running}{\code{kafka_consumer_class$running()}}
32 | \item \href{#method-poll}{\code{kafka_consumer_class$poll()}}
33 | \item \href{#method-commit}{\code{kafka_consumer_class$commit()}}
34 | \item \href{#method-consume_next}{\code{kafka_consumer_class$consume_next()}}
35 | \item \href{#method-consume_loop}{\code{kafka_consumer_class$consume_loop()}}
36 | \item \href{#method-props}{\code{kafka_consumer_class$props()}}
37 | \item \href{#method-topics_subscribe}{\code{kafka_consumer_class$topics_subscribe()}}
38 | \item \href{#method-topics_subscription}{\code{kafka_consumer_class$topics_subscription()}}
39 | \item \href{#method-topics_list}{\code{kafka_consumer_class$topics_list()}}
40 | \item \href{#method-topics_seek_to_beginning}{\code{kafka_consumer_class$topics_seek_to_beginning()}}
41 | \item \href{#method-topics_seek_to_end}{\code{kafka_consumer_class$topics_seek_to_end()}}
42 | \item \href{#method-topics_offsets}{\code{kafka_consumer_class$topics_offsets()}}
43 | \item \href{#method-clone}{\code{kafka_consumer_class$clone()}}
44 | }
45 | }
46 | \if{html}{\out{
}}
47 | \if{html}{\out{}}
48 | \if{latex}{\out{\hypertarget{method-new}{}}}
49 | \subsection{Method \code{new()}}{
50 | Create a new consumer object.
51 | Instead of \code{kafka_class_consumer$new()} one can use \code{kafka_consumer()}
52 | \subsection{Usage}{
53 | \if{html}{\out{}}\preformatted{kafka_consumer_class$new()}\if{html}{\out{
}}
54 | }
55 |
56 | \subsection{Returns}{
57 | \code{self} for method chaining
58 | }
59 | }
60 | \if{html}{\out{
}}
61 | \if{html}{\out{}}
62 | \if{latex}{\out{\hypertarget{method-finalize}{}}}
63 | \subsection{Method \code{finalize()}}{
64 | Code run when object is removed from session
65 | \subsection{Usage}{
66 | \if{html}{\out{}}\preformatted{kafka_consumer_class$finalize()}\if{html}{\out{
}}
67 | }
68 |
69 | }
70 | \if{html}{\out{
}}
71 | \if{html}{\out{}}
72 | \if{latex}{\out{\hypertarget{method-start}{}}}
73 | \subsection{Method \code{start()}}{
74 | Spin up consumer and connect it to Kafka cluster
75 | \subsection{Usage}{
76 | \if{html}{\out{}}\preformatted{kafka_consumer_class$start()}\if{html}{\out{
}}
77 | }
78 |
79 | \subsection{Returns}{
80 | \code{self} for method chaining
81 | }
82 | }
83 | \if{html}{\out{
}}
84 | \if{html}{\out{}}
85 | \if{latex}{\out{\hypertarget{method-end}{}}}
86 | \subsection{Method \code{end()}}{
87 | Disconnect consumer from Kafka cluster
88 | \subsection{Usage}{
89 | \if{html}{\out{}}\preformatted{kafka_consumer_class$end()}\if{html}{\out{
}}
90 | }
91 |
92 | \subsection{Returns}{
93 | \code{self} for method chaining
94 | }
95 | }
96 | \if{html}{\out{
}}
97 | \if{html}{\out{}}
98 | \if{latex}{\out{\hypertarget{method-running}{}}}
99 | \subsection{Method \code{running()}}{
100 | Whether or not consumer is active (has been started or not)
101 | \subsection{Usage}{
102 | \if{html}{\out{}}\preformatted{kafka_consumer_class$running()}\if{html}{\out{
}}
103 | }
104 |
105 | \subsection{Returns}{
106 | TRUE/FALSE
107 | }
108 | }
109 | \if{html}{\out{
}}
110 | \if{html}{\out{}}
111 | \if{latex}{\out{\hypertarget{method-poll}{}}}
112 | \subsection{Method \code{poll()}}{
113 | Polling for messages
114 | \subsection{Usage}{
115 | \if{html}{\out{}}\preformatted{kafka_consumer_class$poll(timeout_ms = Inf)}\if{html}{\out{
}}
116 | }
117 |
118 | \subsection{Arguments}{
119 | \if{html}{\out{}}
120 | \describe{
121 | \item{\code{timeout_ms}}{number of miliseconds to wait for polling to return
122 | messages, defaults to Inf}
123 | }
124 | \if{html}{\out{
}}
125 | }
126 | \subsection{Returns}{
127 | the number of records retrieved by last poll
128 | }
129 | }
130 | \if{html}{\out{
}}
131 | \if{html}{\out{}}
132 | \if{latex}{\out{\hypertarget{method-commit}{}}}
133 | \subsection{Method \code{commit()}}{
134 | Commit offsets returned on the last poll() for all the subscribed
135 | list of topics and partitions.
136 | \subsection{Usage}{
137 | \if{html}{\out{}}\preformatted{kafka_consumer_class$commit(sync = TRUE)}\if{html}{\out{
}}
138 | }
139 |
140 | \subsection{Arguments}{
141 | \if{html}{\out{}}
142 | \describe{
143 | \item{\code{sync}}{synchronous or asynchronous commit}
144 | }
145 | \if{html}{\out{
}}
146 | }
147 | \subsection{Returns}{
148 | \code{self} for method chaining
149 | }
150 | }
151 | \if{html}{\out{
}}
152 | \if{html}{\out{}}
153 | \if{latex}{\out{\hypertarget{method-consume_next}{}}}
154 | \subsection{Method \code{consume_next()}}{
155 | Consume one message either from the records already fetched from last poll or via initiating a new poll.
156 | \subsection{Usage}{
157 | \if{html}{\out{}}\preformatted{kafka_consumer_class$consume_next(timeout_ms = Inf)}\if{html}{\out{
}}
158 | }
159 |
160 | \subsection{Arguments}{
161 | \if{html}{\out{}}
162 | \describe{
163 | \item{\code{timeout_ms}}{defaults to `Inf`.
164 | Time for which poll will wait for data
165 | Passed through to kafka_consumer$poll()}
166 | }
167 | \if{html}{\out{
}}
168 | }
169 | }
170 | \if{html}{\out{
}}
171 | \if{html}{\out{}}
172 | \if{latex}{\out{\hypertarget{method-consume_loop}{}}}
173 | \subsection{Method \code{consume_loop()}}{
174 | Method that is basically an infinite loop (until the check expression
175 | evaluates to FALSE) that will evaluate the supplied expression for
176 | each loop.
177 |
178 | There are several objects available to the expression supplied:
179 |
180 | - messages: a data.frame/data.table with one or more rows - see batch parameter
181 | - loop_counter: single number equal the current loop count.
182 | - message_counter: single number equal to the number of messages already processed.
183 | - start_time: the result of a call to Sys.time() when first the method started
184 | \subsection{Usage}{
185 | \if{html}{\out{}}\preformatted{kafka_consumer_class$consume_loop(
186 | f = function(loop_env) { print(loop_env$messages) },
187 | check = function(loop_env) { loop_env$meta$loop_counter < 1 },
188 | loop_env = new.env(),
189 | batch = FALSE,
190 | timeout_ms = Inf
191 | )}\if{html}{\out{
}}
192 | }
193 |
194 | \subsection{Arguments}{
195 | \if{html}{\out{}}
196 | \describe{
197 | \item{\code{f}}{loop execution function exepting one argument namely loop_env}
198 |
199 | \item{\code{check}}{function that will exept one argument namely loop_env
200 | and will evaluate to TRUE or FALSE to either
201 | continue or stop processing}
202 |
203 | \item{\code{loop_env}}{Environment to store meta info in and pass to loop
204 | execution function and check function. Stored information:
205 |
206 | `loop_env$meta$start_time` -
207 | the result of a call to Sys.time()
208 | when consume loop execution started;
209 |
210 | `loop_env$meta$loop_counter` -
211 | counter that counts the current loop iteration;
212 |
213 | `loop_env$meta$message_counter` -
214 | counter that counts the number of messages already processed}
215 |
216 | \item{\code{batch}}{defaults to FALSE, Kafka's the default is to poll for as much
217 | data as one can get given the consumers limits on the number and
218 | size of messages as well as the chosen timeout. No matter how many
219 | data is returned from a poll the method process - if batch is set to
220 | FALSE - return only a single message at a time. If batch is set to
221 | TRUE however the msgs data.frame/data.table will contain all messages
222 | that were retrieved by the last poll unless consumed already.}
223 |
224 | \item{\code{timeout_ms}}{defaults to `Inf`.
225 | Time for which poll will wait for data
226 | Passed through to kafka_consumer$poll()}
227 | }
228 | \if{html}{\out{
}}
229 | }
230 | }
231 | \if{html}{\out{
}}
232 | \if{html}{\out{}}
233 | \if{latex}{\out{\hypertarget{method-props}{}}}
234 | \subsection{Method \code{props()}}{
235 | Retrieving current current set of properties.
236 | If properties are supplied via props parameter thos properties will
237 | be set.
238 | \subsection{Usage}{
239 | \if{html}{\out{}}\preformatted{kafka_consumer_class$props(..., .properties = NULL)}\if{html}{\out{
}}
240 | }
241 |
242 | \subsection{Arguments}{
243 | \if{html}{\out{}}
244 | \describe{
245 | \item{\code{...}}{a series of properties provided as \code{key = "values"}}
246 |
247 | \item{\code{.properties}}{a list of properties provided as \code{.properties = list(key = "values", ...)}}
248 | }
249 | \if{html}{\out{
}}
250 | }
251 | \subsection{Returns}{
252 | returns a list of properties
253 | }
254 | }
255 | \if{html}{\out{
}}
256 | \if{html}{\out{}}
257 | \if{latex}{\out{\hypertarget{method-topics_subscribe}{}}}
258 | \subsection{Method \code{topics_subscribe()}}{
259 | Subscribe to one or more topics
260 | \subsection{Usage}{
261 | \if{html}{\out{}}\preformatted{kafka_consumer_class$topics_subscribe(topics)}\if{html}{\out{
}}
262 | }
263 |
264 | \subsection{Arguments}{
265 | \if{html}{\out{}}
266 | \describe{
267 | \item{\code{topics}}{character vector defining topics or topic regex to subscribe to}
268 | }
269 | \if{html}{\out{
}}
270 | }
271 | \subsection{Returns}{
272 | \code{self} for method chaining
273 | }
274 | }
275 | \if{html}{\out{
}}
276 | \if{html}{\out{}}
277 | \if{latex}{\out{\hypertarget{method-topics_subscription}{}}}
278 | \subsection{Method \code{topics_subscription()}}{
279 | List current subscription
280 | \subsection{Usage}{
281 | \if{html}{\out{}}\preformatted{kafka_consumer_class$topics_subscription()}\if{html}{\out{
}}
282 | }
283 |
284 | }
285 | \if{html}{\out{
}}
286 | \if{html}{\out{}}
287 | \if{latex}{\out{\hypertarget{method-topics_list}{}}}
288 | \subsection{Method \code{topics_list()}}{
289 | List topics available to consumer
290 | \subsection{Usage}{
291 | \if{html}{\out{}}\preformatted{kafka_consumer_class$topics_list(full = FALSE)}\if{html}{\out{
}}
292 | }
293 |
294 | \subsection{Arguments}{
295 | \if{html}{\out{}}
296 | \describe{
297 | \item{\code{full}}{defaults to FALSE, whether or not to return all data
298 | returned fro Java object (TRUE) or only a simple character vector
299 | listing the names of the data topics available for consumption
300 | (FALSE)}
301 | }
302 | \if{html}{\out{
}}
303 | }
304 | }
305 | \if{html}{\out{
}}
306 | \if{html}{\out{}}
307 | \if{latex}{\out{\hypertarget{method-topics_seek_to_beginning}{}}}
308 | \subsection{Method \code{topics_seek_to_beginning()}}{
309 | Seek to beginning of all topics subscribed and all partitions
310 | \subsection{Usage}{
311 | \if{html}{\out{}}\preformatted{kafka_consumer_class$topics_seek_to_beginning()}\if{html}{\out{
}}
312 | }
313 |
314 | }
315 | \if{html}{\out{
}}
316 | \if{html}{\out{}}
317 | \if{latex}{\out{\hypertarget{method-topics_seek_to_end}{}}}
318 | \subsection{Method \code{topics_seek_to_end()}}{
319 | Seek to end of all topics subscribed and all partitions
320 | \subsection{Usage}{
321 | \if{html}{\out{}}\preformatted{kafka_consumer_class$topics_seek_to_end()}\if{html}{\out{
}}
322 | }
323 |
324 | }
325 | \if{html}{\out{
}}
326 | \if{html}{\out{}}
327 | \if{latex}{\out{\hypertarget{method-topics_offsets}{}}}
328 | \subsection{Method \code{topics_offsets()}}{
329 |
330 | \subsection{Usage}{
331 | \if{html}{\out{}}\preformatted{kafka_consumer_class$topics_offsets()}\if{html}{\out{
}}
332 | }
333 |
334 | }
335 | \if{html}{\out{
}}
336 | \if{html}{\out{}}
337 | \if{latex}{\out{\hypertarget{method-clone}{}}}
338 | \subsection{Method \code{clone()}}{
339 | The objects of this class are cloneable with this method.
340 | \subsection{Usage}{
341 | \if{html}{\out{}}\preformatted{kafka_consumer_class$clone(deep = FALSE)}\if{html}{\out{
}}
342 | }
343 |
344 | \subsection{Arguments}{
345 | \if{html}{\out{}}
346 | \describe{
347 | \item{\code{deep}}{Whether to make a deep clone.}
348 | }
349 | \if{html}{\out{
}}
350 | }
351 | }
352 | }
353 |
--------------------------------------------------------------------------------
/R/kafka_consumer_class.R:
--------------------------------------------------------------------------------
1 |
2 | #' R6 Class for Kafka Consumer
3 | #'
4 | #' @import data.table
5 | #' @import jsonlite
6 | #' @import rJava
7 | #'
8 | #' @export
9 | #'
10 | kafka_consumer_class <-
11 | R6::R6Class(
12 |
13 | #### options ###############################################################
14 | classname = "kafka_class_consumer",
15 |
16 | portable = TRUE,
17 |
18 | parent_env = asNamespace("kafkaesque"),
19 |
20 |
21 |
22 |
23 | #### public ################################################################
24 |
25 | public =
26 | list(
27 |
28 | ## data ################################################################
29 |
30 | #' @field java_consumer reference to the underlying Java object
31 | java_consumer = list(),
32 |
33 |
34 |
35 | ## methods #############################################################
36 |
37 | #' Initialize
38 | #'
39 | #' @description
40 | #' Create a new consumer object.
41 | #' Instead of \code{kafka_class_consumer$new()} one can use \code{kafka_consumer()}
42 | #'
43 | #' @return \code{self} for method chaining
44 | #'
45 | initialize =
46 | function() {
47 | self$java_consumer <- rJava::.jnew("kafkaesque/Kafka_consumer")
48 | self$records <- kafka_records(parent = self)
49 | },
50 |
51 |
52 | #'
53 | #' @description
54 | #' Code run when object is removed from session
55 | #'
56 | finalize =
57 | function() {
58 | self$java_consumer$end()
59 | },
60 |
61 |
62 | #'
63 | #' @description
64 | #' Spin up consumer and connect it to Kafka cluster
65 | #'
66 | #' @return \code{self} for method chaining
67 | #'
68 | start =
69 | function() {
70 | self$java_consumer$start()
71 |
72 | # return for method chaining
73 | invisible(self)
74 | },
75 |
76 |
77 | #'
78 | #' @description
79 | #' Disconnect consumer from Kafka cluster
80 | #'
81 | #' @return \code{self} for method chaining
82 | #'
83 | end =
84 | function() {
85 | self$java_consumer$end()
86 |
87 | # return for method chaining
88 | invisible(self)
89 | },
90 |
91 | #'
92 | #' @description
93 | #' Whether or not consumer is active (has been started or not)
94 | #'
95 | #' @return TRUE/FALSE
96 | #'
97 | running =
98 | function() {
99 | self$java_consumer$running
100 | },
101 |
102 |
103 |
104 |
105 | #'
106 | #' @param timeout_ms number of miliseconds to wait for polling to return
107 | #' messages, defaults to Inf
108 | #'
109 | #' @description
110 | #' Polling for messages
111 | #'
112 | #' @return the number of records retrieved by last poll
113 | #'
114 | poll =
115 | function(timeout_ms = Inf) {
116 | stopifnot( self$running() == TRUE )
117 |
118 | if ( is.infinite(timeout_ms) ){
119 | self$java_consumer$poll()
120 | } else {
121 | .jcall(
122 | obj = self$java_consumer,
123 | returnSig = "I",
124 | method = "poll",
125 | as.integer(timeout_ms)
126 | )
127 | }
128 |
129 | # return for method chaining
130 | self$java_consumer$records$count()
131 | },
132 |
133 |
134 | #'
135 | #' @param sync synchronous or asynchronous commit
136 | #'
137 | #' @description
138 | #'
139 | #' Commit offsets returned on the last poll() for all the subscribed
140 | #' list of topics and partitions.
141 | #'
142 | #' @return \code{self} for method chaining
143 | #'
144 | commit =
145 | function(sync = TRUE) {
146 |
147 | # execute commit
148 | if ( sync == TRUE ){
149 | self$java_consumer$commit_sync()
150 | } else {
151 | self$java_consumer$commit_async()
152 | }
153 |
154 | # return for method chaining
155 | invisible(self)
156 | },
157 |
158 |
159 |
160 |
161 | #'
162 | #' @field records
163 | #'
164 | #' If poll() did fetch any messages, they are stored here until the
165 | #' next call to poll().
166 | #'
167 | #' Note: Consider using consume methods instead of managing records on your own.
168 | #'
169 | records = list(),
170 |
171 |
172 |
173 | #' @param timeout_ms defaults to `Inf`.
174 | #' Time for which poll will wait for data
175 | #' Passed through to kafka_consumer$poll()
176 | #'
177 | #' @description
178 | #'
179 | #' Consume one message either from the records already fetched from last poll or via initiating a new poll.
180 | #'
181 | consume_next =
182 | function ( timeout_ms = Inf ) {
183 | stopifnot( self$running() == TRUE )
184 | self$records$next_record( timeout_ms = Inf )
185 | },
186 |
187 |
188 | #' @param check function that will exept one argument namely loop_env
189 | #' and will evaluate to TRUE or FALSE to either
190 | #' continue or stop processing
191 | #'
192 | #' @param batch defaults to FALSE, Kafka's the default is to poll for as much
193 | #' data as one can get given the consumers limits on the number and
194 | #' size of messages as well as the chosen timeout. No matter how many
195 | #' data is returned from a poll the method process - if batch is set to
196 | #' FALSE - return only a single message at a time. If batch is set to
197 | #' TRUE however the msgs data.frame/data.table will contain all messages
198 | #' that were retrieved by the last poll unless consumed already.
199 | #'
200 | #' @param timeout_ms defaults to `Inf`.
201 | #' Time for which poll will wait for data
202 | #' Passed through to kafka_consumer$poll()
203 | #'
204 | #' @param f loop execution function exepting one argument namely loop_env
205 | #'
206 | #' @param loop_env Environment to store meta info in and pass to loop
207 | #' execution function and check function. Stored information:
208 | #'
209 | #' `loop_env$meta$start_time` -
210 | #' the result of a call to Sys.time()
211 | #' when consume loop execution started;
212 | #'
213 | #' `loop_env$meta$loop_counter` -
214 | #' counter that counts the current loop iteration;
215 | #'
216 | #' `loop_env$meta$message_counter` -
217 | #' counter that counts the number of messages already processed
218 | #'
219 | #' @description
220 | #'
221 | #' Method that is basically an infinite loop (until the check expression
222 | #' evaluates to FALSE) that will evaluate the supplied expression for
223 | #' each loop.
224 | #'
225 | #' There are several objects available to the expression supplied:
226 | #'
227 | #' - messages: a data.frame/data.table with one or more rows - see batch parameter
228 | #' - loop_counter: single number equal the current loop count.
229 | #' - message_counter: single number equal to the number of messages already processed.
230 | #' - start_time: the result of a call to Sys.time() when first the method started
231 | #'
232 | #'
233 | consume_loop =
234 | function (
235 | f = function(loop_env){print(loop_env$messages)},
236 | check = function(loop_env){loop_env$meta$loop_counter < 1},
237 | loop_env = new.env(),
238 | batch = FALSE,
239 | timeout_ms = Inf
240 | ) {
241 |
242 | # set up environments to pass around/share data in check() and f()
243 | loop_env$meta <- new.env()
244 | loop_env$meta$start_time <- Sys.time()
245 | loop_env$meta$loop_counter <- 0
246 | loop_env$meta$message_counter <- 0
247 |
248 |
249 | # loop while check evaluates to TRUE
250 | while ( check(loop_env = loop_env) ){
251 |
252 | loop_env$meta$loop_counter <-
253 | loop_env$meta$loop_counter + 1
254 |
255 | if ( batch == TRUE ){
256 |
257 | loop_env$messages <-
258 | self$records$next_record_batch(timeout_ms = timeout_ms)
259 |
260 | } else {
261 |
262 | loop_env$messages <-
263 | self$records$next_record(timeout_ms = timeout_ms)
264 |
265 | }
266 |
267 | f(loop_env = loop_env)
268 |
269 | loop_env$meta$message_counter <-
270 | loop_env$meta$message_counter + nrow(loop_env$messages)
271 | }
272 |
273 |
274 | # last steps
275 | loop_env$meta$end_time <- Sys.time()
276 | loop_env$meta <- as.list(loop_env$meta)
277 | loop_env <- as.list(loop_env)
278 |
279 | # return
280 | return(loop_env)
281 | },
282 |
283 |
284 | #'
285 | #' @param ... a series of properties provided as \code{key = "values"}
286 | #' @param .properties a list of properties provided as \code{.properties = list(key = "values", ...)}
287 | #'
288 | #' @description
289 | #' Retrieving current current set of properties.
290 | #' If properties are supplied via props parameter thos properties will
291 | #' be set.
292 | #'
293 | #' @return returns a list of properties
294 | #'
295 | #'
296 | props =
297 | function(..., .properties = NULL) {
298 |
299 | # ? set properties
300 | if ( !is.null(.properties) ){
301 | self$java_consumer$props_set(
302 | .jcastToArray(names(.properties)),
303 | .jcastToArray(format(.properties, scientific = FALSE))
304 | )
305 | } else if ( length(list(...)) > 0 ){
306 | .properties <- list(...)
307 | self$java_consumer$props_set(
308 | .jcastToArray(names(.properties)),
309 | .jcastToArray(format(.properties, scientific = FALSE))
310 | )
311 | }
312 |
313 | # return properties
314 | jsonlite::fromJSON(
315 | iconv(
316 | x = self$java_consumer$props$to_json(),
317 | to = "UTF-8"
318 | )
319 | )
320 | },
321 |
322 |
323 | #'
324 | #' @description
325 | #' Subscribe to one or more topics
326 | #'
327 | #' @param topics character vector defining topics or topic regex to subscribe to
328 | #'
329 | #' @return \code{self} for method chaining
330 | #'
331 | topics_subscribe =
332 | function(topics) {
333 |
334 | stopifnot( self$running() == TRUE )
335 |
336 | topics_string_array <- .jarray(topics)
337 | self$java_consumer$topics_subscribe(topics_string_array)
338 |
339 | # return for method chaining
340 | invisible(self)
341 | },
342 |
343 |
344 | #'
345 | #' @description
346 | #'
347 | #' List current subscription
348 | #'
349 | topics_subscription =
350 | function() {
351 |
352 | stopifnot( self$running() == TRUE )
353 |
354 | self$java_consumer$topics_subscription()
355 | },
356 |
357 | #'
358 | #' @param full defaults to FALSE, whether or not to return all data
359 | #' returned fro Java object (TRUE) or only a simple character vector
360 | #' listing the names of the data topics available for consumption
361 | #' (FALSE)
362 | #'
363 | #' @description
364 | #'
365 | #' List topics available to consumer
366 | #'
367 | topics_list =
368 | function(full=FALSE) {
369 |
370 | stopifnot( self$running() == TRUE )
371 |
372 | tmp <- jsonlite::fromJSON(self$java_consumer$topics_list())
373 | if ( full == FALSE ) {
374 | topics <- unique(unlist(lapply(tmp, `[[`, "topic")))
375 | return(topics[!grepl("^__", topics)])
376 | } else {
377 | return ( tmp)
378 | }
379 | },
380 |
381 |
382 | #' @description
383 | #'
384 | #' Seek to beginning of all topics subscribed and all partitions
385 | #'
386 | topics_seek_to_beginning =
387 | function(){
388 | # execute seeking
389 | self$java_consumer$topics_seek_to_beginning()
390 | },
391 |
392 |
393 | #' @description
394 | #'
395 | #' Seek to end of all topics subscribed and all partitions
396 | #'
397 | topics_seek_to_end =
398 | function(){
399 | # execute seeking
400 | self$java_consumer$topics_seek_to_end()
401 | },
402 |
403 |
404 | #'
405 | #' @description
406 | #'
407 | #'
408 | #'
409 | topics_offsets =
410 | function() {
411 | obj <- self$java_consumer$topics_offsets()
412 |
413 | # return
414 | data.table::data.table(
415 | topic = obj$topics,
416 | partition = obj$partitions,
417 | offset = obj$offsets
418 | )
419 | }
420 |
421 | ),
422 |
423 | #### private ###############################################################
424 |
425 | private = NULL
426 | )
427 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | # Kafka R Bindings via ‘rJava’
17 |
18 | **Status**
19 |
20 | [](https://codecov.io/gh/petermeissner/kafkaesque)[](https://github.com/petermeissner/kafkaesque/actions/workflows/tests.yml)
23 |
24 | This package is - so far - feature complete and should be functional.
25 | This is a structured extract and rewrite from client work. Though core
26 | parts of the package are used in production and are ‘battle tested’ the
27 | package presented here is not - so far.
28 |
29 | Package has not been published to CRAN. Attempts have been made. CRAN
30 | has a policy on package size (\<= 5MB) and publishing to CRAN would mean
31 | only publishing the R code without any Java dependencies. One can add an
32 | install function that downloads the necessary JAR files after installing
33 | the package from CRAN. So far no decision has been made if I should
34 | pursue this way or not.
35 |
36 | Presentations:
37 | - User!2021:
38 | * video: https://youtu.be/5e7W4ktjASQ?t=2209
39 | - UROS 2020:
40 | * slides: https://r-project.ro/conference2020/presentations/Mei%C3%9Fner_web_scraping_at_scale_15min_talk_uros_2020.pdf
41 | * video: https://youtu.be/THDoBJCM9ZE?t=409
42 |
43 |
44 | *lines of R code:* 593, *lines of Java code:* 577, *lines of test code:*
45 | 580
46 |
47 | **Version**
48 |
49 | 0.1.5 ( 2021-03-21 19:08:23 UTC )
50 |
51 | **Description**
52 |
53 | Provides R bindings for Consumer, Admin and Producer APIs for Kafka via
54 | ‘rJava’: “Apache Kafka is an open-source distributed event streaming
55 | platform used by thousands of companies for high-performance data
56 | pipelines, streaming analytics, data integration, and mission-critical
57 | applications.” Kafka is distributed, highly scalable, provides
58 | persistent event storage and is designed for high throughput and low
59 | latency.
60 |
61 | **License**
62 |
63 | GPL (\>= 3)
Peter Meissner \[aut, cre\], Marius Pirv \[aut\],
64 | virtual7 \[cph\]
65 |
66 | **Citation**
67 |
68 | ``` r
69 | citation("kafkaesque")
70 | ```
71 |
72 | ``` r
73 | Meissner P, Pirv M (2021). kafkaesque: Kafka R Bindings via 'rJava'. R package version 0.1.5.
74 | ```
75 |
76 | **BibTex for citing**
77 |
78 | ``` r
79 | toBibtex(citation("kafkaesque"))
80 | ```
81 |
82 | @Manual{,
83 | title = {kafkaesque: Kafka R Bindings via 'rJava'},
84 | author = {Peter Meissner and Marius Pirv},
85 | year = {2021},
86 | note = {R package version 0.1.5},
87 | }
88 |
89 | **Installation**
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 | Latest development version from Github:
104 |
105 | ``` r
106 | devtools::install_github("petermeissner/kafkaesque")
107 | ```
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 | # Prerequisites
118 |
119 | For the package to work (more precisely do any significant work) it
120 | needs a running Kafka that can be reached over network.
121 |
122 | A simple way to get a test version and the same version used throughout
123 | the README is to run the following docker command ([Link to
124 | Dockerfile](https://github.com/petermeissner/kafkaesque/blob/master/docker/Dockerfile)).
125 |
126 | docker run -p 127.0.0.1:2181:2181 -p 127.0.0.1:9092:9092 petermeissner/kafkatest
127 |
128 | # Content
129 |
130 | ``` r
131 | library(kafkaesque)
132 | ```
133 |
134 | ## Loading required package: rJava
135 |
136 | ``` r
137 | ls("package:kafkaesque")
138 | ```
139 |
140 | ## [1] "%>%" "gnrndmsg" "kafka_admin" "kafka_admin_class"
141 | ## [5] "kafka_consumer" "kafka_consumer_class" "kafka_get_log_level" "kafka_producer"
142 | ## [9] "kafka_producer_class" "kafka_set_log_level"
143 |
144 | # Alternatives
145 |
146 | There are no viable alternatives at the moment, that I know off.
147 |
148 | - There is
149 | **[{rkafka}](https://cran.r-project.org/web/packages/rkafka/index.html)**
150 | which was a invaluable source of inspiration to take the first steps
151 | in making R talk to Kafka. Its using the {rJava} and Kafka’s Java
152 | API. Unfortuantly the code does not work with any kind of recent
153 | versions of Kafka. So basically the package is dead and no
154 | developement has happened for years.
155 | - Another attempt has been made with the
156 | **[{fRanz}](https://github.com/uptake/fRanz)** (refering to the
157 | author Franz Kafka) using a C++ library under the hood. Unfortunatly
158 | this started very promissing fast paced but then developement died
159 | as quickly.
160 |
161 | # Usage
162 |
163 | ## Consumer
164 |
165 | ### Start Consumer (… Stop, Status)
166 |
167 | ``` r
168 | library("kafkaesque")
169 |
170 | # new consumer
171 | consumer <- kafka_consumer()
172 |
173 | # starting/connecting - + status
174 | consumer$start()
175 |
176 | consumer$running()
177 | ```
178 |
179 | ## [1] TRUE
180 |
181 | ``` r
182 | consumer$end()$running()
183 | ```
184 |
185 | ## [1] FALSE
186 |
187 | ``` r
188 | consumer$start()$running()
189 | ```
190 |
191 | ## [1] TRUE
192 |
193 | ### Properties aka Config
194 |
195 | See here for list of consumer properties:
196 | .
197 |
198 | ``` r
199 | consumer$props()
200 | ```
201 |
202 | ## $key.deserializer
203 | ## [1] "org.apache.kafka.common.serialization.StringDeserializer"
204 | ##
205 | ## $auto.offset.reset
206 | ## [1] "earliest"
207 | ##
208 | ## $bootstrap.servers
209 | ## [1] "localhost:9092"
210 | ##
211 | ## $group.id
212 | ## [1] "891ee35c-95a7-4fa8-bb9c-ca10b81a7bd8"
213 | ##
214 | ## $value.deserializer
215 | ## [1] "org.apache.kafka.common.serialization.StringDeserializer"
216 | ##
217 | ## $client.id
218 | ## [1] "kafkaesque_consumer"
219 |
220 | ``` r
221 | consumer$props(max.poll.records = 200)
222 | ```
223 |
224 | ## $key.deserializer
225 | ## [1] "org.apache.kafka.common.serialization.StringDeserializer"
226 | ##
227 | ## $max.poll.records
228 | ## [1] "200"
229 | ##
230 | ## $auto.offset.reset
231 | ## [1] "earliest"
232 | ##
233 | ## $bootstrap.servers
234 | ## [1] "localhost:9092"
235 | ##
236 | ## $group.id
237 | ## [1] "891ee35c-95a7-4fa8-bb9c-ca10b81a7bd8"
238 | ##
239 | ## $value.deserializer
240 | ## [1] "org.apache.kafka.common.serialization.StringDeserializer"
241 | ##
242 | ## $client.id
243 | ## [1] "kafkaesque_consumer"
244 |
245 | ### Topics and Subscriptions
246 |
247 | ``` r
248 | # list topics available to consumer
249 | consumer$topics_list()
250 | ```
251 |
252 | ## [1] "test500000" "test2" "test3" "test"
253 |
254 | ``` r
255 | # subscribe to topic
256 | consumer$topics_subscribe("test500000")
257 | consumer$topics_subscription()
258 | ```
259 |
260 | ## [1] "test500000"
261 |
262 | ### Retrieving a Message
263 |
264 | ``` r
265 | # retrieve next message
266 | consumer$consume_next()
267 | ```
268 |
269 | ## topic key partition offset timestamp value timestampType leaderEpoch serializedKeySize
270 | ## 1: test500000 0 500000 1641799430491 1 CreateTime 0 -1
271 | ## serializedValueSize
272 | ## 1: 1
273 |
274 | ### Looping over Messages and Executing Code
275 |
276 | ``` r
277 | # loop over messages and execute code
278 | res <-
279 | consumer$consume_loop(
280 | f = function(loop_env){ print(loop_env$messages)},
281 | check = function(loop_env){loop_env$meta$loop_counter < 4},
282 | batch = TRUE
283 | )
284 | ```
285 |
286 | ## topic key partition offset timestamp value timestampType leaderEpoch serializedKeySize
287 | ## 1: test500000 0 500001 1641799430491 2 CreateTime 0 -1
288 | ## 2: test500000 0 500002 1641799430491 3 CreateTime 0 -1
289 | ## 3: test500000 0 500003 1641799430491 4 CreateTime 0 -1
290 | ## 4: test500000 0 500004 1641799430491 5 CreateTime 0 -1
291 | ## 5: test500000 0 500005 1641799430491 6 CreateTime 0 -1
292 | ## ---
293 | ## 495: test500000 0 500495 1641799430492 496 CreateTime 0 -1
294 | ## 496: test500000 0 500496 1641799430492 497 CreateTime 0 -1
295 | ## 497: test500000 0 500497 1641799430492 498 CreateTime 0 -1
296 | ## 498: test500000 0 500498 1641799430492 499 CreateTime 0 -1
297 | ## 499: test500000 0 500499 1641799430492 500 CreateTime 0 -1
298 | ## serializedValueSize
299 | ## 1: 1
300 | ## 2: 1
301 | ## 3: 1
302 | ## 4: 1
303 | ## 5: 1
304 | ## ---
305 | ## 495: 3
306 | ## 496: 3
307 | ## 497: 3
308 | ## 498: 3
309 | ## 499: 3
310 | ## topic key partition offset timestamp value timestampType leaderEpoch serializedKeySize
311 | ## 1: test500000 0 500500 1641799430492 501 CreateTime 0 -1
312 | ## 2: test500000 0 500501 1641799430492 502 CreateTime 0 -1
313 | ## 3: test500000 0 500502 1641799430492 503 CreateTime 0 -1
314 | ## 4: test500000 0 500503 1641799430492 504 CreateTime 0 -1
315 | ## 5: test500000 0 500504 1641799430492 505 CreateTime 0 -1
316 | ## ---
317 | ## 496: test500000 0 500995 1641799430492 996 CreateTime 0 -1
318 | ## 497: test500000 0 500996 1641799430492 997 CreateTime 0 -1
319 | ## 498: test500000 0 500997 1641799430492 998 CreateTime 0 -1
320 | ## 499: test500000 0 500998 1641799430492 999 CreateTime 0 -1
321 | ## 500: test500000 0 500999 1641799430492 1000 CreateTime 0 -1
322 | ## serializedValueSize
323 | ## 1: 3
324 | ## 2: 3
325 | ## 3: 3
326 | ## 4: 3
327 | ## 5: 3
328 | ## ---
329 | ## 496: 3
330 | ## 497: 3
331 | ## 498: 3
332 | ## 499: 3
333 | ## 500: 4
334 | ## topic key partition offset timestamp value timestampType leaderEpoch serializedKeySize
335 | ## 1: test500000 0 501000 1641799430492 1001 CreateTime 0 -1
336 | ## 2: test500000 0 501001 1641799430492 1002 CreateTime 0 -1
337 | ## 3: test500000 0 501002 1641799430492 1003 CreateTime 0 -1
338 | ## 4: test500000 0 501003 1641799430492 1004 CreateTime 0 -1
339 | ## 5: test500000 0 501004 1641799430492 1005 CreateTime 0 -1
340 | ## ---
341 | ## 496: test500000 0 501495 1641799430493 1496 CreateTime 0 -1
342 | ## 497: test500000 0 501496 1641799430493 1497 CreateTime 0 -1
343 | ## 498: test500000 0 501497 1641799430493 1498 CreateTime 0 -1
344 | ## 499: test500000 0 501498 1641799430493 1499 CreateTime 0 -1
345 | ## 500: test500000 0 501499 1641799430493 1500 CreateTime 0 -1
346 | ## serializedValueSize
347 | ## 1: 4
348 | ## 2: 4
349 | ## 3: 4
350 | ## 4: 4
351 | ## 5: 4
352 | ## ---
353 | ## 496: 4
354 | ## 497: 4
355 | ## 498: 4
356 | ## 499: 4
357 | ## 500: 4
358 | ## topic key partition offset timestamp value timestampType leaderEpoch serializedKeySize
359 | ## 1: test500000 0 501500 1641799430493 1501 CreateTime 0 -1
360 | ## 2: test500000 0 501501 1641799430493 1502 CreateTime 0 -1
361 | ## 3: test500000 0 501502 1641799430493 1503 CreateTime 0 -1
362 | ## 4: test500000 0 501503 1641799430493 1504 CreateTime 0 -1
363 | ## 5: test500000 0 501504 1641799430493 1505 CreateTime 0 -1
364 | ## ---
365 | ## 496: test500000 0 501995 1641799430493 1996 CreateTime 0 -1
366 | ## 497: test500000 0 501996 1641799430493 1997 CreateTime 0 -1
367 | ## 498: test500000 0 501997 1641799430493 1998 CreateTime 0 -1
368 | ## 499: test500000 0 501998 1641799430493 1999 CreateTime 0 -1
369 | ## 500: test500000 0 501999 1641799430493 2000 CreateTime 0 -1
370 | ## serializedValueSize
371 | ## 1: 4
372 | ## 2: 4
373 | ## 3: 4
374 | ## 4: 4
375 | ## 5: 4
376 | ## ---
377 | ## 496: 4
378 | ## 497: 4
379 | ## 498: 4
380 | ## 499: 4
381 | ## 500: 4
382 |
383 | ``` r
384 | # having a look at the statistics
385 | res
386 | ```
387 |
388 | ## $meta
389 | ## $meta$end_time
390 | ## [1] "2022-01-10 08:25:17 CET"
391 | ##
392 | ## $meta$loop_counter
393 | ## [1] 4
394 | ##
395 | ## $meta$start_time
396 | ## [1] "2022-01-10 08:25:17 CET"
397 | ##
398 | ## $meta$message_counter
399 | ## [1] 1999
400 | ##
401 | ##
402 | ## $messages
403 | ## topic key partition offset timestamp value timestampType leaderEpoch serializedKeySize
404 | ## 1: test500000 0 501500 1641799430493 1501 CreateTime 0 -1
405 | ## 2: test500000 0 501501 1641799430493 1502 CreateTime 0 -1
406 | ## 3: test500000 0 501502 1641799430493 1503 CreateTime 0 -1
407 | ## 4: test500000 0 501503 1641799430493 1504 CreateTime 0 -1
408 | ## 5: test500000 0 501504 1641799430493 1505 CreateTime 0 -1
409 | ## ---
410 | ## 496: test500000 0 501995 1641799430493 1996 CreateTime 0 -1
411 | ## 497: test500000 0 501996 1641799430493 1997 CreateTime 0 -1
412 | ## 498: test500000 0 501997 1641799430493 1998 CreateTime 0 -1
413 | ## 499: test500000 0 501998 1641799430493 1999 CreateTime 0 -1
414 | ## 500: test500000 0 501999 1641799430493 2000 CreateTime 0 -1
415 | ## serializedValueSize
416 | ## 1: 4
417 | ## 2: 4
418 | ## 3: 4
419 | ## 4: 4
420 | ## 5: 4
421 | ## ---
422 | ## 496: 4
423 | ## 497: 4
424 | ## 498: 4
425 | ## 499: 4
426 | ## 500: 4
427 |
428 | ### Looping over Batches of Messages and Executing Code
429 |
430 | ``` r
431 | # loop over batches of messages and execute code
432 | res <-
433 | consumer$consume_loop(
434 | f = function(loop_env){ print(loop_env$messages); cat("\n")},
435 | check = function(loop_env){loop_env$meta$message_counter < 1000},
436 | batch = TRUE
437 | )
438 | ```
439 |
440 | ## topic key partition offset timestamp value timestampType leaderEpoch serializedKeySize
441 | ## 1: test500000 0 502000 1641799430493 2001 CreateTime 0 -1
442 | ## 2: test500000 0 502001 1641799430493 2002 CreateTime 0 -1
443 | ## 3: test500000 0 502002 1641799430493 2003 CreateTime 0 -1
444 | ## 4: test500000 0 502003 1641799430493 2004 CreateTime 0 -1
445 | ## 5: test500000 0 502004 1641799430493 2005 CreateTime 0 -1
446 | ## ---
447 | ## 496: test500000 0 502495 1641799430493 2496 CreateTime 0 -1
448 | ## 497: test500000 0 502496 1641799430493 2497 CreateTime 0 -1
449 | ## 498: test500000 0 502497 1641799430493 2498 CreateTime 0 -1
450 | ## 499: test500000 0 502498 1641799430493 2499 CreateTime 0 -1
451 | ## 500: test500000 0 502499 1641799430493 2500 CreateTime 0 -1
452 | ## serializedValueSize
453 | ## 1: 4
454 | ## 2: 4
455 | ## 3: 4
456 | ## 4: 4
457 | ## 5: 4
458 | ## ---
459 | ## 496: 4
460 | ## 497: 4
461 | ## 498: 4
462 | ## 499: 4
463 | ## 500: 4
464 | ##
465 | ## topic key partition offset timestamp value timestampType leaderEpoch serializedKeySize
466 | ## 1: test500000 0 502500 1641799430493 2501 CreateTime 0 -1
467 | ## 2: test500000 0 502501 1641799430493 2502 CreateTime 0 -1
468 | ## 3: test500000 0 502502 1641799430493 2503 CreateTime 0 -1
469 | ## 4: test500000 0 502503 1641799430493 2504 CreateTime 0 -1
470 | ## 5: test500000 0 502504 1641799430493 2505 CreateTime 0 -1
471 | ## ---
472 | ## 496: test500000 0 502995 1641799430494 2996 CreateTime 0 -1
473 | ## 497: test500000 0 502996 1641799430494 2997 CreateTime 0 -1
474 | ## 498: test500000 0 502997 1641799430494 2998 CreateTime 0 -1
475 | ## 499: test500000 0 502998 1641799430494 2999 CreateTime 0 -1
476 | ## 500: test500000 0 502999 1641799430494 3000 CreateTime 0 -1
477 | ## serializedValueSize
478 | ## 1: 4
479 | ## 2: 4
480 | ## 3: 4
481 | ## 4: 4
482 | ## 5: 4
483 | ## ---
484 | ## 496: 4
485 | ## 497: 4
486 | ## 498: 4
487 | ## 499: 4
488 | ## 500: 4
489 |
490 | ``` r
491 | res
492 | ```
493 |
494 | ## $meta
495 | ## $meta$end_time
496 | ## [1] "2022-01-10 08:25:17 CET"
497 | ##
498 | ## $meta$loop_counter
499 | ## [1] 2
500 | ##
501 | ## $meta$start_time
502 | ## [1] "2022-01-10 08:25:17 CET"
503 | ##
504 | ## $meta$message_counter
505 | ## [1] 1000
506 | ##
507 | ##
508 | ## $messages
509 | ## topic key partition offset timestamp value timestampType leaderEpoch serializedKeySize
510 | ## 1: test500000 0 502500 1641799430493 2501 CreateTime 0 -1
511 | ## 2: test500000 0 502501 1641799430493 2502 CreateTime 0 -1
512 | ## 3: test500000 0 502502 1641799430493 2503 CreateTime 0 -1
513 | ## 4: test500000 0 502503 1641799430493 2504 CreateTime 0 -1
514 | ## 5: test500000 0 502504 1641799430493 2505 CreateTime 0 -1
515 | ## ---
516 | ## 496: test500000 0 502995 1641799430494 2996 CreateTime 0 -1
517 | ## 497: test500000 0 502996 1641799430494 2997 CreateTime 0 -1
518 | ## 498: test500000 0 502997 1641799430494 2998 CreateTime 0 -1
519 | ## 499: test500000 0 502998 1641799430494 2999 CreateTime 0 -1
520 | ## 500: test500000 0 502999 1641799430494 3000 CreateTime 0 -1
521 | ## serializedValueSize
522 | ## 1: 4
523 | ## 2: 4
524 | ## 3: 4
525 | ## 4: 4
526 | ## 5: 4
527 | ## ---
528 | ## 496: 4
529 | ## 497: 4
530 | ## 498: 4
531 | ## 499: 4
532 | ## 500: 4
533 |
534 | ### Offsets and Seeking
535 |
536 | ``` r
537 | # get current offsets from Kafka
538 | consumer$topics_offsets()
539 | ```
540 |
541 | ## topic partition offset
542 | ## 1: test500000 0 503000
543 |
544 | ``` r
545 | # seek to end of topics
546 | consumer$topics_seek_to_end()
547 | consumer$topics_offsets()
548 | ```
549 |
550 | ## topic partition offset
551 | ## 1: test500000 0 1000000
552 |
553 | ``` r
554 | # seek to beginning of topics
555 | consumer$topics_seek_to_beginning()
556 | consumer$topics_offsets()
557 | ```
558 |
559 | ## topic partition offset
560 | ## 1: test500000 0 500000
561 |
562 | ## Producer
563 |
564 | ### Start Producer (… Stop, Status)
565 |
566 | ``` r
567 | library("kafkaesque")
568 |
569 | # new producer
570 | producer <- kafka_producer()
571 |
572 | # starting/connecting - + status
573 | producer$start()
574 |
575 | producer$running()
576 | ```
577 |
578 | ## [1] TRUE
579 |
580 | ``` r
581 | producer$end()$running()
582 | ```
583 |
584 | ## [1] FALSE
585 |
586 | ``` r
587 | producer$start()$running()
588 | ```
589 |
590 | ## [1] TRUE
591 |
592 | ``` r
593 | producer$restart()$running()
594 | ```
595 |
596 | ## [1] TRUE
597 |
598 | ### Properties aka Config
599 |
600 | See here for list of consumer properties:
601 | .
602 |
603 | ``` r
604 | producer$props()
605 | ```
606 |
607 | ## $bootstrap.servers
608 | ## [1] "localhost:9092"
609 | ##
610 | ## $value.serializer
611 | ## [1] "org.apache.kafka.common.serialization.StringSerializer"
612 | ##
613 | ## $client.id
614 | ## [1] "kafkaesque_producer"
615 | ##
616 | ## $key.serializer
617 | ## [1] "org.apache.kafka.common.serialization.StringSerializer"
618 |
619 | ``` r
620 | producer$props(whatever.you.may.want.to.set = "true")
621 | ```
622 |
623 | ## $whatever.you.may.want.to.set
624 | ## [1] "true"
625 | ##
626 | ## $bootstrap.servers
627 | ## [1] "localhost:9092"
628 | ##
629 | ## $value.serializer
630 | ## [1] "org.apache.kafka.common.serialization.StringSerializer"
631 | ##
632 | ## $client.id
633 | ## [1] "kafkaesque_producer"
634 | ##
635 | ## $key.serializer
636 | ## [1] "org.apache.kafka.common.serialization.StringSerializer"
637 |
638 | ### Sending Messages
639 |
640 | ``` r
641 | producer$send(topic = "test", msg = "Die Kuh macht muh.")
642 | producer$send(topic = "test", msg = "Das Schaf macht mäh.")
643 | producer$send(topic = "test", msg = "Das Pferd macht wihiiiiiiiii-pffffff.")
644 | ```
645 |
646 | ## Setting and Getting Java-LogLEvels
647 |
648 | ``` r
649 | kafka_get_log_level()
650 | ```
651 |
652 | ## [1] "ERROR"
653 |
654 | ``` r
655 | # one of off, fatal, error, warn, info, debug, trace, all
656 | kafka_set_log_level("info")
657 | ```
658 |
659 | ## [1] "INFO"
660 |
661 | ``` r
662 | producer$start()
663 |
664 |
665 | # set back to normal
666 | kafka_set_log_level("error")
667 | ```
668 |
669 | ## [1] "ERROR"
670 |
671 | ## Admin
672 |
673 | ### Properties aka Config
674 |
675 | ``` r
676 | admin <- kafka_admin()
677 | admin$start()
678 |
679 | admin$props()
680 | ```
681 |
682 | ## $bootstrap.servers
683 | ## [1] "localhost:9092"
684 | ##
685 | ## $client.id
686 | ## [1] "kafkaesque_admin"
687 |
688 | ``` r
689 | admin$props(whatever.you.may.want.to.set = "true")
690 | ```
691 |
692 | ## $whatever.you.may.want.to.set
693 | ## [1] "true"
694 | ##
695 | ## $bootstrap.servers
696 | ## [1] "localhost:9092"
697 | ##
698 | ## $client.id
699 | ## [1] "kafkaesque_admin"
700 |
701 | ### Get List of Topics
702 |
703 | ``` r
704 | admin$topics_list()
705 | ```
706 |
707 | ## [1] "test500000" "test2" "test3" "test"
708 |
709 | ### Create Topics
710 |
711 | ``` r
712 | admin$topics_list()
713 | ```
714 |
715 | ## [1] "test500000" "test2" "test3" "test"
716 |
717 | ``` r
718 | topics <- c("chuckle", "chit_chat")
719 | admin$topics_create(
720 | topic = topics,
721 | partition = c(1L, 1L),
722 | replication_factor = c(1L, 1L)
723 | )
724 | ```
725 |
726 | ## [1] "chuckle" "test500000" "test2" "test3" "test" "chit_chat"
727 |
728 | ### Delete Topics
729 |
730 | ``` r
731 | admin$topics_delete(topics)
732 | ```
733 |
734 | ## [1] "test500000" "test2" "test3" "test"
735 |
736 | # Developement Notes
737 |
738 | For R development Rstudio was used. For Java development Visual Studio
739 | Code lend a helping hand with Maven as build tooling.
740 |
741 | For development one packages is needed:
742 |
743 | - {kafkaesque} contains all the R functions, classes and methods and
744 | also all Java code that is specific to this project (and not just a
745 | dependency)
746 |
747 | Java sources are in `./java/kafkaesque/` folder - so your Java project
748 | should take this as root folder. Building Java sources can be done via
749 | Maven: `mvn install` will compile everything and copy the
750 | `kafkaesque.jar` into the package’s `./inst/java/` folder with all its
751 | java dependencies.
752 |
753 | After Java compilation, the R packages has to be (re-)build and
754 | (re-)installed (with most likely re-starting the R session frist
755 | (Ctrl-Shift-F10 in Rstudio)).
756 |
757 | If developing Java in VScode - as I did here - pressing Ctr-Shift-B
758 | should allow to select the two most important tasks: resolving
759 | dependencies and compiling the Java code and distributing it to the
760 | right places as described above.
761 |
--------------------------------------------------------------------------------