├── .gitignore
├── README.md
├── docker-compose.yml
├── flink-processor
├── Dockerfile
├── pom.xml
├── src
│ └── main
│ │ ├── java
│ │ ├── Main.java
│ │ ├── Weather.java
│ │ └── WeatherDeserializationSchema.java
│ │ └── resources
│ │ └── log4j2.properties
└── wait-for-it.sh
├── kafka-producer
├── Dockerfile
├── python-producer.py
├── requirements.txt
└── wait-for-it.sh
└── postgres
├── Dockerfile
└── create_table.sql
/.gitignore:
--------------------------------------------------------------------------------
1 | flink-processor/target
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Building Real-Time Data Streaming using Kafka, Apache Flink and Postgres
2 |
3 | #### Youtube Video :video_camera:
4 |
5 | https://www.youtube.com/watch?v=FoypLT2W91c
6 |
7 | #### Steps Walkthrough
8 |
9 | https://medium.com/@kavitmht/building-a-real-time-data-streaming-pipeline-using-apache-kafka-flink-and-postgres-a22101c97895
10 |
11 |
12 | Connect with me on:-
13 |
14 | Twitter 👦🏻:- https://twitter.com/kmmtmm92
15 |
16 | Youtube 📹:- https://www.youtube.com/channel/UCpmw7QtwoHXV05D3NUWZ3oQ
17 |
18 | Github 💭:- https://github.com/Kavit900
19 |
20 | Instagram 📸:- https://www.instagram.com/code_with_kavit/
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.9"
2 |
3 | networks:
4 | bridge:
5 | driver: bridge
6 |
7 | services:
8 | zookeeper:
9 | image: confluentinc/cp-zookeeper:latest
10 | environment:
11 | ZOOKEEPER_CLIENT_PORT: 32181
12 | ZOOKEEPER_TICK_TIME: 2000
13 | networks:
14 | bridge:
15 | aliases:
16 | - zookeeper
17 |
18 | kafka:
19 | image: confluentinc/cp-kafka
20 | depends_on:
21 | - zookeeper
22 | environment:
23 | KAFKA_BROKER_ID: 1
24 | KAFKA_ADVERTISED_HOST_NAME: 0.0.0.0
25 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:32181
26 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
27 | KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
28 | JMX_PORT: 9999
29 | networks:
30 | bridge:
31 | aliases:
32 | - kafka
33 |
34 | kafka-producer:
35 | image: kafka-producer
36 | depends_on:
37 | - kafka
38 | environment:
39 | KAFKA_SERVER: "kafka:9092"
40 | ZOOKEEPER_SERVER: "zookeeper:32181"
41 | PRODUCER_INTERVAL: 100
42 | networks:
43 | - bridge
44 |
45 | flink-processor:
46 | image: flink-processor
47 | depends_on:
48 | - kafka
49 | environment:
50 | KAFKA_SERVER: "kafka:9092"
51 | ZOOKEEPER_SERVER: "zookeeper:32181"
52 | PRODUCER_INTERVAL: 100
53 | networks:
54 | - bridge
55 |
56 | postgres:
57 | build:
58 | context: ./postgres
59 | container_name: postgres
60 | restart: always
61 | environment:
62 | - POSTGRES_USER=postgres
63 | - POSTGRES_PASSWORD=postgres
64 | - POSTGRES_DB=postgres
65 | logging:
66 | options:
67 | max-size: 10m
68 | max-file: "3"
69 | ports:
70 | - 5438:5432
71 | networks:
72 | - bridge
73 |
74 | volumes:
75 | settings:
76 | data:
--------------------------------------------------------------------------------
/flink-processor/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM openjdk:8u151-jdk-alpine3.7
2 |
3 | # Install Bash
4 | RUN apk add --no-cache bash libc6-compat
5 |
6 | # Copy resources
7 | WORKDIR /
8 | COPY wait-for-it.sh wait-for-it.sh
9 | COPY target/flink-kafka2postgres-1.0-SNAPSHOT-jar-with-dependencies.jar flink-processor.jar
10 |
11 | # Wait for Zookeeper and Kafka to be available and run application
12 | CMD ./wait-for-it.sh -s -t 30 $ZOOKEEPER_SERVER -- ./wait-for-it.sh -s -t 30 $KAFKA_SERVER -- java -Xmx512m -jar flink-processor.jar
--------------------------------------------------------------------------------
/flink-processor/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | org.kavit
8 | flink-kafka2postgres
9 | 1.0-SNAPSHOT
10 |
11 |
12 | 8
13 | 8
14 | UTF-8
15 | 1.16.0
16 | 2.13.4
17 | 5.8.2
18 | 3.2.0
19 | 2.17.2
20 | ${target.java.version}
21 | ${target.java.version}
22 | UTF-8
23 | 8
24 |
25 |
26 |
27 |
28 |
29 | org.apache.flink
30 | flink-streaming-java
31 | ${flink.version}
32 |
33 |
34 | org.apache.flink
35 | flink-clients
36 | ${flink.version}
37 |
38 |
39 |
40 | org.apache.flink
41 | flink-table
42 | ${flink.version}
43 | pom
44 |
45 |
46 |
47 | org.apache.flink
48 | flink-table-api-java-bridge
49 | ${flink.version}
50 |
51 |
52 |
53 | org.apache.flink
54 | flink-table-api-java
55 | ${flink.version}
56 |
57 |
58 |
59 | org.apache.flink
60 | flink-runtime
61 | ${flink.version}
62 |
63 |
64 |
65 | org.apache.flink
66 | flink-table-planner_2.12
67 | ${flink.version}
68 |
69 |
70 |
71 |
72 |
73 | org.apache.flink
74 | flink-connector-files
75 | ${flink.version}
76 |
77 |
78 | org.apache.flink
79 | flink-connector-jdbc
80 | ${flink.version}
81 |
82 |
83 | org.apache.flink
84 | flink-json
85 | ${flink.version}
86 |
87 |
88 |
89 |
90 |
91 | org.postgresql
92 | postgresql
93 | 42.5.0
94 |
95 |
96 |
97 |
98 |
99 |
100 | org.apache.flink
101 | flink-csv
102 | ${flink.version}
103 |
104 |
105 | org.apache.flink
106 | flink-json
107 | ${flink.version}
108 |
109 |
110 |
111 |
112 | com.fasterxml.jackson.core
113 | jackson-databind
114 | ${jackson.version}
115 |
116 |
117 | com.fasterxml.jackson.core
118 | jackson-annotations
119 | ${jackson.version}
120 |
121 |
122 |
123 | com.fasterxml.jackson.datatype
124 | jackson-datatype-jsr310
125 | ${jackson.version}
126 |
127 |
128 | org.apache.flink
129 | flink-connector-kafka
130 | ${flink.version}
131 |
132 |
133 | org.apache.kafka
134 | kafka-clients
135 | ${kafka.version}
136 |
137 |
138 |
139 |
140 |
141 | org.apache.logging.log4j
142 | log4j-slf4j-impl
143 | ${log4j.version}
144 | runtime
145 |
146 |
147 | org.apache.logging.log4j
148 | log4j-api
149 | ${log4j.version}
150 | runtime
151 |
152 |
153 | org.apache.logging.log4j
154 | log4j-core
155 | ${log4j.version}
156 | runtime
157 |
158 |
159 |
160 | com.github.javafaker
161 | javafaker
162 | 1.0.2
163 | test
164 |
165 |
166 | net.mguenther.kafka
167 | kafka-junit
168 | ${kafka.version}
169 | test
170 |
171 |
172 | ch.qos.reload4j
173 | reload4j
174 |
175 |
176 | org.apache.kafka
177 | kafka-log4j-appender
178 |
179 |
180 |
181 |
182 | org.apache.flink
183 | flink-test-utils
184 | ${flink.version}
185 | test
186 |
187 |
188 | log4j
189 | log4j
190 |
191 |
192 |
193 |
194 |
195 | org.apache.flink
196 | flink-runtime-web
197 | ${flink.version}
198 | test
199 |
200 |
201 | org.junit.jupiter
202 | junit-jupiter-engine
203 | ${junit.jupiter.version}
204 | test
205 |
206 |
207 | org.junit.jupiter
208 | junit-jupiter-api
209 | ${junit.jupiter.version}
210 | test
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 | org.apache.maven.plugins
219 | maven-compiler-plugin
220 | 3.8.1
221 |
222 | ${target.java.version}
223 | ${target.java.version}
224 |
225 |
226 |
227 | org.apache.maven.plugins
228 | maven-surefire-plugin
229 | 2.22.2
230 |
231 | --illegal-access=permit
232 |
233 |
234 |
235 | org.apache.maven.plugins
236 | maven-failsafe-plugin
237 | 2.22.2
238 |
239 | --illegal-access=permit
240 |
241 |
242 |
243 |
244 |
245 | org.apache.maven.plugins
246 | maven-shade-plugin
247 | 3.1.1
248 |
249 |
250 |
251 | package
252 |
253 | shade
254 |
255 |
256 |
257 |
258 | org.apache.flink:flink-shaded-force-shading
259 | com.google.code.findbugs:jsr305
260 | org.slf4j:*
261 | org.apache.logging.log4j:*
262 |
263 |
264 | false
265 |
266 |
267 |
268 |
269 | *:*
270 |
271 | META-INF/*.SF
272 | META-INF/*.DSA
273 | META-INF/*.RSA
274 |
275 |
276 |
277 | true
278 | jar-with-dependencies
279 |
280 |
281 | *:*
282 |
283 |
284 |
285 |
286 |
287 | Main
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 | com.diffplug.spotless
297 | spotless-maven-plugin
298 | 2.23.0
299 |
300 |
301 |
302 | check
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 | org.eclipse.m2e
321 | lifecycle-mapping
322 | 1.0.0
323 |
324 |
325 |
326 |
327 |
328 | org.apache.maven.plugins
329 | maven-shade-plugin
330 | [3.1.1,)
331 |
332 | shade
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 | org.apache.maven.plugins
342 | maven-compiler-plugin
343 | [3.1,)
344 |
345 | testCompile
346 | compile
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
--------------------------------------------------------------------------------
/flink-processor/src/main/java/Main.java:
--------------------------------------------------------------------------------
1 | import org.apache.flink.api.common.eventtime.WatermarkStrategy;
2 | import org.apache.flink.connector.jdbc.JdbcConnectionOptions;
3 | import org.apache.flink.connector.jdbc.JdbcExecutionOptions;
4 | import org.apache.flink.connector.jdbc.JdbcSink;
5 | import org.apache.flink.connector.kafka.source.KafkaSource;
6 | import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
7 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
9 | import org.apache.kafka.common.TopicPartition;
10 |
11 | import org.apache.flink.api.common.functions.AggregateFunction;
12 | import org.apache.flink.api.java.tuple.Tuple2;
13 | import org.apache.flink.streaming.api.datastream.DataStream;
14 | import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
15 | import org.apache.flink.streaming.api.windowing.time.Time;
16 | import org.apache.flink.api.common.functions.MapFunction;
17 |
18 |
19 | import java.util.Arrays;
20 | import java.util.HashSet;
21 |
22 | public class Main {
23 |
24 | static final String BROKERS = "kafka:9092";
25 |
26 | public static void main(String[] args) throws Exception {
27 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
28 |
29 | System.out.println("Environment created");
30 | KafkaSource source = KafkaSource.builder()
31 | .setBootstrapServers(BROKERS)
32 | .setProperty("partition.discovery.interval.ms", "1000")
33 | .setTopics("weather")
34 | .setGroupId("groupdId-919292")
35 | .setStartingOffsets(OffsetsInitializer.earliest())
36 | .setValueOnlyDeserializer(new WeatherDeserializationSchema())
37 | .build();
38 |
39 | DataStreamSource kafka = env.fromSource(source, WatermarkStrategy.noWatermarks(), "kafka");
40 |
41 | System.out.println("Kafka source created");
42 |
43 | DataStream> averageTemperatureStream = kafka.keyBy(myEvent -> myEvent.city)
44 | .window(TumblingProcessingTimeWindows.of(Time.seconds(60)))
45 | .aggregate(new AverageAggregator());
46 |
47 | DataStream> cityAndValueStream = averageTemperatureStream
48 | .map(new MapFunction, Tuple2>() {
49 | @Override
50 | public Tuple2 map(Tuple2 input) throws Exception {
51 | return new Tuple2<>(input.f0.city, input.f1);
52 | }
53 | });
54 |
55 | System.out.println("Aggregation created");
56 |
57 |
58 | // cityAndValueStream.print();
59 | cityAndValueStream.addSink(JdbcSink.sink("insert into weather (city, average_temperature) values (?, ?)",
60 | (statement, event) -> {
61 | statement.setString(1, event.f0);
62 | statement.setDouble(2, event.f1);
63 | },
64 | JdbcExecutionOptions.builder()
65 | .withBatchSize(1000)
66 | .withBatchIntervalMs(200)
67 | .withMaxRetries(5)
68 | .build(),
69 | new JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
70 | .withUrl("jdbc:postgresql://docker.for.mac.host.internal:5438/postgres")
71 | .withDriverName("org.postgresql.Driver")
72 | .withUsername("postgres")
73 | .withPassword("postgres")
74 | .build()
75 | ));
76 |
77 | env.execute("Kafka-flink-postgres");
78 | }
79 |
80 | /**
81 | * Aggregation function for average.
82 | */
83 | public static class AverageAggregator implements AggregateFunction> {
84 |
85 | @Override
86 | public MyAverage createAccumulator() {
87 | return new MyAverage();
88 | }
89 |
90 | @Override
91 | public MyAverage add(Weather weather, MyAverage myAverage) {
92 | //logger.debug("add({},{})", myAverage.city, myEvent);
93 | myAverage.city = weather.city;
94 | myAverage.count = myAverage.count + 1;
95 | myAverage.sum = myAverage.sum + weather.temperature;
96 | return myAverage;
97 | }
98 |
99 | @Override
100 | public Tuple2 getResult(MyAverage myAverage) {
101 | return new Tuple2<>(myAverage, myAverage.sum / myAverage.count);
102 | }
103 |
104 | @Override
105 | public MyAverage merge(MyAverage myAverage, MyAverage acc1) {
106 | myAverage.sum = myAverage.sum + acc1.sum;
107 | myAverage.count = myAverage.count + acc1.count;
108 | return myAverage;
109 | }
110 | }
111 |
112 | public static class MyAverage {
113 |
114 | public String city;
115 | public Integer count = 0;
116 | public Double sum = 0d;
117 |
118 | @Override
119 | public String toString() {
120 | return "MyAverage{" +
121 | "city='" + city + '\'' +
122 | ", count=" + count +
123 | ", sum=" + sum +
124 | '}';
125 | }
126 | }
127 | }
--------------------------------------------------------------------------------
/flink-processor/src/main/java/Weather.java:
--------------------------------------------------------------------------------
1 | import java.util.Objects;
2 |
3 | public class Weather {
4 |
5 | /*
6 | {
7 | "city": "New York",
8 | "temperature": "10.34"
9 | }
10 | */
11 |
12 | public String city;
13 | public Double temperature;
14 |
15 | public Weather() {}
16 |
17 | public Weather(String city, String temperature) {
18 | this.city = city;
19 | this.temperature = Double.valueOf(temperature);
20 | }
21 |
22 | @Override
23 | public String toString() {
24 | final StringBuilder sb = new StringBuilder("Weather{");
25 | sb.append("city=").append(city).append('\'');
26 | sb.append(", temperature=").append(String.valueOf(temperature)).append('\'');
27 | return sb.toString();
28 | }
29 |
30 | public int hashCode() {
31 | return Objects.hash(super.hashCode(), city, temperature);
32 | }
33 | }
--------------------------------------------------------------------------------
/flink-processor/src/main/java/WeatherDeserializationSchema.java:
--------------------------------------------------------------------------------
1 | import com.fasterxml.jackson.databind.ObjectMapper;
2 | import com.fasterxml.jackson.databind.json.JsonMapper;
3 | import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
4 | import java.io.IOException;
5 | import org.apache.flink.api.common.serialization.AbstractDeserializationSchema;
6 |
7 | public class WeatherDeserializationSchema extends AbstractDeserializationSchema {
8 | private static final long serialVersionUUID = 1L;
9 |
10 | private transient ObjectMapper objectMapper;
11 |
12 | @Override
13 | public void open(InitializationContext context) {
14 | objectMapper = JsonMapper.builder().build().registerModule(new JavaTimeModule());
15 | }
16 |
17 | @Override
18 | public Weather deserialize(byte[] message) throws IOException {
19 | return objectMapper.readValue(message, Weather.class);
20 | }
21 | }
--------------------------------------------------------------------------------
/flink-processor/src/main/resources/log4j2.properties:
--------------------------------------------------------------------------------
1 | rootLogger.level = INFO
2 | rootLogger.appenderRef.console.ref = ConsoleAppender
3 |
4 | appender.console.name = ConsoleAppender
5 | appender.console.type = CONSOLE
6 | appender.console.layout.type = PatternLayout
7 | appender.console.layout.pattern = %d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
--------------------------------------------------------------------------------
/flink-processor/wait-for-it.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # Use this script to test if a given TCP host/port are available
3 |
4 | cmdname=$(basename $0)
5 |
6 | echoerr() { if [[ $QUIET -ne 1 ]]; then echo "$@" 1>&2; fi }
7 |
8 | usage()
9 | {
10 | cat << USAGE >&2
11 | Usage:
12 | $cmdname host:port [-s] [-t timeout] [-- command args]
13 | -h HOST | --host=HOST Host or IP under test
14 | -p PORT | --port=PORT TCP port under test
15 | Alternatively, you specify the host and port as host:port
16 | -s | --strict Only execute subcommand if the test succeeds
17 | -q | --quiet Don't output any status messages
18 | -t TIMEOUT | --timeout=TIMEOUT
19 | Timeout in seconds, zero for no timeout
20 | -- COMMAND ARGS Execute command with args after the test finishes
21 | USAGE
22 | exit 1
23 | }
24 |
25 | wait_for()
26 | {
27 | if [[ $TIMEOUT -gt 0 ]]; then
28 | echoerr "$cmdname: waiting $TIMEOUT seconds for $HOST:$PORT"
29 | else
30 | echoerr "$cmdname: waiting for $HOST:$PORT without a timeout"
31 | fi
32 | start_ts=$(date +%s)
33 | while :
34 | do
35 | if [[ $ISBUSY -eq 1 ]]; then
36 | nc -z $HOST $PORT
37 | result=$?
38 | else
39 | (echo > /dev/tcp/$HOST/$PORT) >/dev/null 2>&1
40 | result=$?
41 | fi
42 | if [[ $result -eq 0 ]]; then
43 | end_ts=$(date +%s)
44 | echoerr "$cmdname: $HOST:$PORT is available after $((end_ts - start_ts)) seconds"
45 | break
46 | fi
47 | sleep 1
48 | done
49 | return $result
50 | }
51 |
52 | wait_for_wrapper()
53 | {
54 | # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692
55 | if [[ $QUIET -eq 1 ]]; then
56 | timeout $BUSYTIMEFLAG $TIMEOUT $0 --quiet --child --host=$HOST --port=$PORT --timeout=$TIMEOUT &
57 | else
58 | timeout $BUSYTIMEFLAG $TIMEOUT $0 --child --host=$HOST --port=$PORT --timeout=$TIMEOUT &
59 | fi
60 | PID=$!
61 | trap "kill -INT -$PID" INT
62 | wait $PID
63 | RESULT=$?
64 | if [[ $RESULT -ne 0 ]]; then
65 | echoerr "$cmdname: timeout occurred after waiting $TIMEOUT seconds for $HOST:$PORT"
66 | fi
67 | return $RESULT
68 | }
69 |
70 | # process arguments
71 | while [[ $# -gt 0 ]]
72 | do
73 | case "$1" in
74 | *:* )
75 | hostport=(${1//:/ })
76 | HOST=${hostport[0]}
77 | PORT=${hostport[1]}
78 | shift 1
79 | ;;
80 | --child)
81 | CHILD=1
82 | shift 1
83 | ;;
84 | -q | --quiet)
85 | QUIET=1
86 | shift 1
87 | ;;
88 | -s | --strict)
89 | STRICT=1
90 | shift 1
91 | ;;
92 | -h)
93 | HOST="$2"
94 | if [[ $HOST == "" ]]; then break; fi
95 | shift 2
96 | ;;
97 | --host=*)
98 | HOST="${1#*=}"
99 | shift 1
100 | ;;
101 | -p)
102 | PORT="$2"
103 | if [[ $PORT == "" ]]; then break; fi
104 | shift 2
105 | ;;
106 | --port=*)
107 | PORT="${1#*=}"
108 | shift 1
109 | ;;
110 | -t)
111 | TIMEOUT="$2"
112 | if [[ $TIMEOUT == "" ]]; then break; fi
113 | shift 2
114 | ;;
115 | --timeout=*)
116 | TIMEOUT="${1#*=}"
117 | shift 1
118 | ;;
119 | --)
120 | shift
121 | CLI=("$@")
122 | break
123 | ;;
124 | --help)
125 | usage
126 | ;;
127 | *)
128 | echoerr "Unknown argument: $1"
129 | usage
130 | ;;
131 | esac
132 | done
133 |
134 | if [[ "$HOST" == "" || "$PORT" == "" ]]; then
135 | echoerr "Error: you need to provide a host and port to test."
136 | usage
137 | fi
138 |
139 | TIMEOUT=${TIMEOUT:-15}
140 | STRICT=${STRICT:-0}
141 | CHILD=${CHILD:-0}
142 | QUIET=${QUIET:-0}
143 |
144 | # check to see if timeout is from busybox?
145 | # check to see if timeout is from busybox?
146 | TIMEOUT_PATH=$(realpath $(which timeout))
147 | if [[ $TIMEOUT_PATH =~ "busybox" ]]; then
148 | ISBUSY=1
149 | BUSYTIMEFLAG="-t"
150 | else
151 | ISBUSY=0
152 | BUSYTIMEFLAG=""
153 | fi
154 |
155 | if [[ $CHILD -gt 0 ]]; then
156 | wait_for
157 | RESULT=$?
158 | exit $RESULT
159 | else
160 | if [[ $TIMEOUT -gt 0 ]]; then
161 | wait_for_wrapper
162 | RESULT=$?
163 | else
164 | wait_for
165 | RESULT=$?
166 | fi
167 | fi
168 |
169 | if [[ $CLI != "" ]]; then
170 | if [[ $RESULT -ne 0 && $STRICT -eq 1 ]]; then
171 | echoerr "$cmdname: strict mode, refusing to execute subprocess"
172 | exit $RESULT
173 | fi
174 | exec "${CLI[@]}"
175 | else
176 | exit $RESULT
177 | fi
--------------------------------------------------------------------------------
/kafka-producer/Dockerfile:
--------------------------------------------------------------------------------
1 | From python:3.8-slim
2 |
3 | COPY requirements.txt .
4 |
5 | RUN set -ex; \
6 | pip install --no-cache-dir -r requirements.txt
7 |
8 | # Copy resources
9 | WORKDIR /
10 | COPY wait-for-it.sh wait-for-it.sh
11 |
12 | ADD python-producer.py .
13 |
14 | CMD ./wait-for-it.sh -s -t 30 $ZOOKEEPER_SERVER -- ./wait-for-it.sh -s -t 30 $KAFKA_SERVER -- python -u python-producer.py
--------------------------------------------------------------------------------
/kafka-producer/python-producer.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import time
3 | import random
4 | import schedule
5 | from json import dumps
6 |
7 | from faker import Faker
8 | from kafka import KafkaProducer
9 |
10 |
11 |
12 | kafka_nodes = "kafka:9092"
13 | myTopic = "weather"
14 |
15 | def gen_data():
16 | faker = Faker()
17 |
18 | prod = KafkaProducer(bootstrap_servers=kafka_nodes, value_serializer=lambda x:dumps(x).encode('utf-8'))
19 | my_data = {'city': faker.city(), 'temperature': random.uniform(10.0, 110.0)}
20 | print(my_data)
21 | prod.send(topic=myTopic, value=my_data)
22 |
23 | prod.flush()
24 |
25 | if __name__ == "__main__":
26 | gen_data()
27 | schedule.every(10).seconds.do(gen_data)
28 |
29 | while True:
30 | schedule.run_pending()
31 | time.sleep(0.5)
--------------------------------------------------------------------------------
/kafka-producer/requirements.txt:
--------------------------------------------------------------------------------
1 | kafka-python==2.0.2
2 | schedule==1.1.0
3 | aiokafka==0.7.2
4 | Faker==15.1.3
--------------------------------------------------------------------------------
/kafka-producer/wait-for-it.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # Use this script to test if a given TCP host/port are available
3 |
4 | cmdname=$(basename $0)
5 |
6 | echoerr() { if [[ $QUIET -ne 1 ]]; then echo "$@" 1>&2; fi }
7 |
8 | usage()
9 | {
10 | cat << USAGE >&2
11 | Usage:
12 | $cmdname host:port [-s] [-t timeout] [-- command args]
13 | -h HOST | --host=HOST Host or IP under test
14 | -p PORT | --port=PORT TCP port under test
15 | Alternatively, you specify the host and port as host:port
16 | -s | --strict Only execute subcommand if the test succeeds
17 | -q | --quiet Don't output any status messages
18 | -t TIMEOUT | --timeout=TIMEOUT
19 | Timeout in seconds, zero for no timeout
20 | -- COMMAND ARGS Execute command with args after the test finishes
21 | USAGE
22 | exit 1
23 | }
24 |
25 | wait_for()
26 | {
27 | if [[ $TIMEOUT -gt 0 ]]; then
28 | echoerr "$cmdname: waiting $TIMEOUT seconds for $HOST:$PORT"
29 | else
30 | echoerr "$cmdname: waiting for $HOST:$PORT without a timeout"
31 | fi
32 | start_ts=$(date +%s)
33 | while :
34 | do
35 | if [[ $ISBUSY -eq 1 ]]; then
36 | nc -z $HOST $PORT
37 | result=$?
38 | else
39 | (echo > /dev/tcp/$HOST/$PORT) >/dev/null 2>&1
40 | result=$?
41 | fi
42 | if [[ $result -eq 0 ]]; then
43 | end_ts=$(date +%s)
44 | echoerr "$cmdname: $HOST:$PORT is available after $((end_ts - start_ts)) seconds"
45 | break
46 | fi
47 | sleep 1
48 | done
49 | return $result
50 | }
51 |
52 | wait_for_wrapper()
53 | {
54 | # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692
55 | if [[ $QUIET -eq 1 ]]; then
56 | timeout $BUSYTIMEFLAG $TIMEOUT $0 --quiet --child --host=$HOST --port=$PORT --timeout=$TIMEOUT &
57 | else
58 | timeout $BUSYTIMEFLAG $TIMEOUT $0 --child --host=$HOST --port=$PORT --timeout=$TIMEOUT &
59 | fi
60 | PID=$!
61 | trap "kill -INT -$PID" INT
62 | wait $PID
63 | RESULT=$?
64 | if [[ $RESULT -ne 0 ]]; then
65 | echoerr "$cmdname: timeout occurred after waiting $TIMEOUT seconds for $HOST:$PORT"
66 | fi
67 | return $RESULT
68 | }
69 |
70 | # process arguments
71 | while [[ $# -gt 0 ]]
72 | do
73 | case "$1" in
74 | *:* )
75 | hostport=(${1//:/ })
76 | HOST=${hostport[0]}
77 | PORT=${hostport[1]}
78 | shift 1
79 | ;;
80 | --child)
81 | CHILD=1
82 | shift 1
83 | ;;
84 | -q | --quiet)
85 | QUIET=1
86 | shift 1
87 | ;;
88 | -s | --strict)
89 | STRICT=1
90 | shift 1
91 | ;;
92 | -h)
93 | HOST="$2"
94 | if [[ $HOST == "" ]]; then break; fi
95 | shift 2
96 | ;;
97 | --host=*)
98 | HOST="${1#*=}"
99 | shift 1
100 | ;;
101 | -p)
102 | PORT="$2"
103 | if [[ $PORT == "" ]]; then break; fi
104 | shift 2
105 | ;;
106 | --port=*)
107 | PORT="${1#*=}"
108 | shift 1
109 | ;;
110 | -t)
111 | TIMEOUT="$2"
112 | if [[ $TIMEOUT == "" ]]; then break; fi
113 | shift 2
114 | ;;
115 | --timeout=*)
116 | TIMEOUT="${1#*=}"
117 | shift 1
118 | ;;
119 | --)
120 | shift
121 | CLI=("$@")
122 | break
123 | ;;
124 | --help)
125 | usage
126 | ;;
127 | *)
128 | echoerr "Unknown argument: $1"
129 | usage
130 | ;;
131 | esac
132 | done
133 |
134 | if [[ "$HOST" == "" || "$PORT" == "" ]]; then
135 | echoerr "Error: you need to provide a host and port to test."
136 | usage
137 | fi
138 |
139 | TIMEOUT=${TIMEOUT:-15}
140 | STRICT=${STRICT:-0}
141 | CHILD=${CHILD:-0}
142 | QUIET=${QUIET:-0}
143 |
144 | # check to see if timeout is from busybox?
145 | # check to see if timeout is from busybox?
146 | TIMEOUT_PATH=$(realpath $(which timeout))
147 | if [[ $TIMEOUT_PATH =~ "busybox" ]]; then
148 | ISBUSY=1
149 | BUSYTIMEFLAG="-t"
150 | else
151 | ISBUSY=0
152 | BUSYTIMEFLAG=""
153 | fi
154 |
155 | if [[ $CHILD -gt 0 ]]; then
156 | wait_for
157 | RESULT=$?
158 | exit $RESULT
159 | else
160 | if [[ $TIMEOUT -gt 0 ]]; then
161 | wait_for_wrapper
162 | RESULT=$?
163 | else
164 | wait_for
165 | RESULT=$?
166 | fi
167 | fi
168 |
169 | if [[ $CLI != "" ]]; then
170 | if [[ $RESULT -ne 0 && $STRICT -eq 1 ]]; then
171 | echoerr "$cmdname: strict mode, refusing to execute subprocess"
172 | exit $RESULT
173 | fi
174 | exec "${CLI[@]}"
175 | else
176 | exit $RESULT
177 | fi
--------------------------------------------------------------------------------
/postgres/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM postgres:latest
2 |
3 | COPY create_table.sql /docker-entrypoint-initdb.d/
--------------------------------------------------------------------------------
/postgres/create_table.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE weather (
2 | id SERIAL PRIMARY KEY,
3 | city VARCHAR (255) NOT NULL,
4 | average_temperature DOUBLE PRECISION
5 | );
--------------------------------------------------------------------------------