├── .gitignore ├── README.md ├── docker-compose.yml ├── flink-processor ├── Dockerfile ├── pom.xml ├── src │ └── main │ │ ├── java │ │ ├── Main.java │ │ ├── Weather.java │ │ └── WeatherDeserializationSchema.java │ │ └── resources │ │ └── log4j2.properties └── wait-for-it.sh ├── kafka-producer ├── Dockerfile ├── python-producer.py ├── requirements.txt └── wait-for-it.sh └── postgres ├── Dockerfile └── create_table.sql /.gitignore: -------------------------------------------------------------------------------- 1 | flink-processor/target -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Building Real-Time Data Streaming using Kafka, Apache Flink and Postgres 2 | 3 | #### Youtube Video :video_camera: 4 | 5 | https://www.youtube.com/watch?v=FoypLT2W91c 6 | 7 | #### Steps Walkthrough 8 | 9 | https://medium.com/@kavitmht/building-a-real-time-data-streaming-pipeline-using-apache-kafka-flink-and-postgres-a22101c97895 10 | 11 | 12 | Connect with me on:- 13 | 14 | Twitter 👦🏻:- https://twitter.com/kmmtmm92 15 | 16 | Youtube 📹:- https://www.youtube.com/channel/UCpmw7QtwoHXV05D3NUWZ3oQ 17 | 18 | Github 💭:- https://github.com/Kavit900 19 | 20 | Instagram 📸:- https://www.instagram.com/code_with_kavit/ -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | networks: 4 | bridge: 5 | driver: bridge 6 | 7 | services: 8 | zookeeper: 9 | image: confluentinc/cp-zookeeper:latest 10 | environment: 11 | ZOOKEEPER_CLIENT_PORT: 32181 12 | ZOOKEEPER_TICK_TIME: 2000 13 | networks: 14 | bridge: 15 | aliases: 16 | - zookeeper 17 | 18 | kafka: 19 | image: confluentinc/cp-kafka 20 | depends_on: 21 | - zookeeper 22 | environment: 23 | KAFKA_BROKER_ID: 1 24 | KAFKA_ADVERTISED_HOST_NAME: 0.0.0.0 25 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:32181 26 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092 27 | KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 28 | JMX_PORT: 9999 29 | networks: 30 | bridge: 31 | aliases: 32 | - kafka 33 | 34 | kafka-producer: 35 | image: kafka-producer 36 | depends_on: 37 | - kafka 38 | environment: 39 | KAFKA_SERVER: "kafka:9092" 40 | ZOOKEEPER_SERVER: "zookeeper:32181" 41 | PRODUCER_INTERVAL: 100 42 | networks: 43 | - bridge 44 | 45 | flink-processor: 46 | image: flink-processor 47 | depends_on: 48 | - kafka 49 | environment: 50 | KAFKA_SERVER: "kafka:9092" 51 | ZOOKEEPER_SERVER: "zookeeper:32181" 52 | PRODUCER_INTERVAL: 100 53 | networks: 54 | - bridge 55 | 56 | postgres: 57 | build: 58 | context: ./postgres 59 | container_name: postgres 60 | restart: always 61 | environment: 62 | - POSTGRES_USER=postgres 63 | - POSTGRES_PASSWORD=postgres 64 | - POSTGRES_DB=postgres 65 | logging: 66 | options: 67 | max-size: 10m 68 | max-file: "3" 69 | ports: 70 | - 5438:5432 71 | networks: 72 | - bridge 73 | 74 | volumes: 75 | settings: 76 | data: -------------------------------------------------------------------------------- /flink-processor/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:8u151-jdk-alpine3.7 2 | 3 | # Install Bash 4 | RUN apk add --no-cache bash libc6-compat 5 | 6 | # Copy resources 7 | WORKDIR / 8 | COPY wait-for-it.sh wait-for-it.sh 9 | COPY target/flink-kafka2postgres-1.0-SNAPSHOT-jar-with-dependencies.jar flink-processor.jar 10 | 11 | # Wait for Zookeeper and Kafka to be available and run application 12 | CMD ./wait-for-it.sh -s -t 30 $ZOOKEEPER_SERVER -- ./wait-for-it.sh -s -t 30 $KAFKA_SERVER -- java -Xmx512m -jar flink-processor.jar -------------------------------------------------------------------------------- /flink-processor/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | org.kavit 8 | flink-kafka2postgres 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 8 13 | 8 14 | UTF-8 15 | 1.16.0 16 | 2.13.4 17 | 5.8.2 18 | 3.2.0 19 | 2.17.2 20 | ${target.java.version} 21 | ${target.java.version} 22 | UTF-8 23 | 8 24 | 25 | 26 | 27 | 28 | 29 | org.apache.flink 30 | flink-streaming-java 31 | ${flink.version} 32 | 33 | 34 | org.apache.flink 35 | flink-clients 36 | ${flink.version} 37 | 38 | 39 | 40 | org.apache.flink 41 | flink-table 42 | ${flink.version} 43 | pom 44 | 45 | 46 | 47 | org.apache.flink 48 | flink-table-api-java-bridge 49 | ${flink.version} 50 | 51 | 52 | 53 | org.apache.flink 54 | flink-table-api-java 55 | ${flink.version} 56 | 57 | 58 | 59 | org.apache.flink 60 | flink-runtime 61 | ${flink.version} 62 | 63 | 64 | 65 | org.apache.flink 66 | flink-table-planner_2.12 67 | ${flink.version} 68 | 69 | 70 | 71 | 72 | 73 | org.apache.flink 74 | flink-connector-files 75 | ${flink.version} 76 | 77 | 78 | org.apache.flink 79 | flink-connector-jdbc 80 | ${flink.version} 81 | 82 | 83 | org.apache.flink 84 | flink-json 85 | ${flink.version} 86 | 87 | 88 | 89 | 90 | 91 | org.postgresql 92 | postgresql 93 | 42.5.0 94 | 95 | 96 | 97 | 98 | 99 | 100 | org.apache.flink 101 | flink-csv 102 | ${flink.version} 103 | 104 | 105 | org.apache.flink 106 | flink-json 107 | ${flink.version} 108 | 109 | 110 | 111 | 112 | com.fasterxml.jackson.core 113 | jackson-databind 114 | ${jackson.version} 115 | 116 | 117 | com.fasterxml.jackson.core 118 | jackson-annotations 119 | ${jackson.version} 120 | 121 | 122 | 123 | com.fasterxml.jackson.datatype 124 | jackson-datatype-jsr310 125 | ${jackson.version} 126 | 127 | 128 | org.apache.flink 129 | flink-connector-kafka 130 | ${flink.version} 131 | 132 | 133 | org.apache.kafka 134 | kafka-clients 135 | ${kafka.version} 136 | 137 | 138 | 139 | 140 | 141 | org.apache.logging.log4j 142 | log4j-slf4j-impl 143 | ${log4j.version} 144 | runtime 145 | 146 | 147 | org.apache.logging.log4j 148 | log4j-api 149 | ${log4j.version} 150 | runtime 151 | 152 | 153 | org.apache.logging.log4j 154 | log4j-core 155 | ${log4j.version} 156 | runtime 157 | 158 | 159 | 160 | com.github.javafaker 161 | javafaker 162 | 1.0.2 163 | test 164 | 165 | 166 | net.mguenther.kafka 167 | kafka-junit 168 | ${kafka.version} 169 | test 170 | 171 | 172 | ch.qos.reload4j 173 | reload4j 174 | 175 | 176 | org.apache.kafka 177 | kafka-log4j-appender 178 | 179 | 180 | 181 | 182 | org.apache.flink 183 | flink-test-utils 184 | ${flink.version} 185 | test 186 | 187 | 188 | log4j 189 | log4j 190 | 191 | 192 | 193 | 194 | 195 | org.apache.flink 196 | flink-runtime-web 197 | ${flink.version} 198 | test 199 | 200 | 201 | org.junit.jupiter 202 | junit-jupiter-engine 203 | ${junit.jupiter.version} 204 | test 205 | 206 | 207 | org.junit.jupiter 208 | junit-jupiter-api 209 | ${junit.jupiter.version} 210 | test 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | org.apache.maven.plugins 219 | maven-compiler-plugin 220 | 3.8.1 221 | 222 | ${target.java.version} 223 | ${target.java.version} 224 | 225 | 226 | 227 | org.apache.maven.plugins 228 | maven-surefire-plugin 229 | 2.22.2 230 | 231 | --illegal-access=permit 232 | 233 | 234 | 235 | org.apache.maven.plugins 236 | maven-failsafe-plugin 237 | 2.22.2 238 | 239 | --illegal-access=permit 240 | 241 | 242 | 243 | 244 | 245 | org.apache.maven.plugins 246 | maven-shade-plugin 247 | 3.1.1 248 | 249 | 250 | 251 | package 252 | 253 | shade 254 | 255 | 256 | 257 | 258 | org.apache.flink:flink-shaded-force-shading 259 | com.google.code.findbugs:jsr305 260 | org.slf4j:* 261 | org.apache.logging.log4j:* 262 | 263 | 264 | false 265 | 266 | 267 | 268 | 269 | *:* 270 | 271 | META-INF/*.SF 272 | META-INF/*.DSA 273 | META-INF/*.RSA 274 | 275 | 276 | 277 | true 278 | jar-with-dependencies 279 | 280 | 281 | *:* 282 | 283 | 284 | 285 | 286 | 287 | Main 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | com.diffplug.spotless 297 | spotless-maven-plugin 298 | 2.23.0 299 | 300 | 301 | 302 | check 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | org.eclipse.m2e 321 | lifecycle-mapping 322 | 1.0.0 323 | 324 | 325 | 326 | 327 | 328 | org.apache.maven.plugins 329 | maven-shade-plugin 330 | [3.1.1,) 331 | 332 | shade 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | org.apache.maven.plugins 342 | maven-compiler-plugin 343 | [3.1,) 344 | 345 | testCompile 346 | compile 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | -------------------------------------------------------------------------------- /flink-processor/src/main/java/Main.java: -------------------------------------------------------------------------------- 1 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 2 | import org.apache.flink.connector.jdbc.JdbcConnectionOptions; 3 | import org.apache.flink.connector.jdbc.JdbcExecutionOptions; 4 | import org.apache.flink.connector.jdbc.JdbcSink; 5 | import org.apache.flink.connector.kafka.source.KafkaSource; 6 | import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | import org.apache.kafka.common.TopicPartition; 10 | 11 | import org.apache.flink.api.common.functions.AggregateFunction; 12 | import org.apache.flink.api.java.tuple.Tuple2; 13 | import org.apache.flink.streaming.api.datastream.DataStream; 14 | import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows; 15 | import org.apache.flink.streaming.api.windowing.time.Time; 16 | import org.apache.flink.api.common.functions.MapFunction; 17 | 18 | 19 | import java.util.Arrays; 20 | import java.util.HashSet; 21 | 22 | public class Main { 23 | 24 | static final String BROKERS = "kafka:9092"; 25 | 26 | public static void main(String[] args) throws Exception { 27 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 28 | 29 | System.out.println("Environment created"); 30 | KafkaSource source = KafkaSource.builder() 31 | .setBootstrapServers(BROKERS) 32 | .setProperty("partition.discovery.interval.ms", "1000") 33 | .setTopics("weather") 34 | .setGroupId("groupdId-919292") 35 | .setStartingOffsets(OffsetsInitializer.earliest()) 36 | .setValueOnlyDeserializer(new WeatherDeserializationSchema()) 37 | .build(); 38 | 39 | DataStreamSource kafka = env.fromSource(source, WatermarkStrategy.noWatermarks(), "kafka"); 40 | 41 | System.out.println("Kafka source created"); 42 | 43 | DataStream> averageTemperatureStream = kafka.keyBy(myEvent -> myEvent.city) 44 | .window(TumblingProcessingTimeWindows.of(Time.seconds(60))) 45 | .aggregate(new AverageAggregator()); 46 | 47 | DataStream> cityAndValueStream = averageTemperatureStream 48 | .map(new MapFunction, Tuple2>() { 49 | @Override 50 | public Tuple2 map(Tuple2 input) throws Exception { 51 | return new Tuple2<>(input.f0.city, input.f1); 52 | } 53 | }); 54 | 55 | System.out.println("Aggregation created"); 56 | 57 | 58 | // cityAndValueStream.print(); 59 | cityAndValueStream.addSink(JdbcSink.sink("insert into weather (city, average_temperature) values (?, ?)", 60 | (statement, event) -> { 61 | statement.setString(1, event.f0); 62 | statement.setDouble(2, event.f1); 63 | }, 64 | JdbcExecutionOptions.builder() 65 | .withBatchSize(1000) 66 | .withBatchIntervalMs(200) 67 | .withMaxRetries(5) 68 | .build(), 69 | new JdbcConnectionOptions.JdbcConnectionOptionsBuilder() 70 | .withUrl("jdbc:postgresql://docker.for.mac.host.internal:5438/postgres") 71 | .withDriverName("org.postgresql.Driver") 72 | .withUsername("postgres") 73 | .withPassword("postgres") 74 | .build() 75 | )); 76 | 77 | env.execute("Kafka-flink-postgres"); 78 | } 79 | 80 | /** 81 | * Aggregation function for average. 82 | */ 83 | public static class AverageAggregator implements AggregateFunction> { 84 | 85 | @Override 86 | public MyAverage createAccumulator() { 87 | return new MyAverage(); 88 | } 89 | 90 | @Override 91 | public MyAverage add(Weather weather, MyAverage myAverage) { 92 | //logger.debug("add({},{})", myAverage.city, myEvent); 93 | myAverage.city = weather.city; 94 | myAverage.count = myAverage.count + 1; 95 | myAverage.sum = myAverage.sum + weather.temperature; 96 | return myAverage; 97 | } 98 | 99 | @Override 100 | public Tuple2 getResult(MyAverage myAverage) { 101 | return new Tuple2<>(myAverage, myAverage.sum / myAverage.count); 102 | } 103 | 104 | @Override 105 | public MyAverage merge(MyAverage myAverage, MyAverage acc1) { 106 | myAverage.sum = myAverage.sum + acc1.sum; 107 | myAverage.count = myAverage.count + acc1.count; 108 | return myAverage; 109 | } 110 | } 111 | 112 | public static class MyAverage { 113 | 114 | public String city; 115 | public Integer count = 0; 116 | public Double sum = 0d; 117 | 118 | @Override 119 | public String toString() { 120 | return "MyAverage{" + 121 | "city='" + city + '\'' + 122 | ", count=" + count + 123 | ", sum=" + sum + 124 | '}'; 125 | } 126 | } 127 | } -------------------------------------------------------------------------------- /flink-processor/src/main/java/Weather.java: -------------------------------------------------------------------------------- 1 | import java.util.Objects; 2 | 3 | public class Weather { 4 | 5 | /* 6 | { 7 | "city": "New York", 8 | "temperature": "10.34" 9 | } 10 | */ 11 | 12 | public String city; 13 | public Double temperature; 14 | 15 | public Weather() {} 16 | 17 | public Weather(String city, String temperature) { 18 | this.city = city; 19 | this.temperature = Double.valueOf(temperature); 20 | } 21 | 22 | @Override 23 | public String toString() { 24 | final StringBuilder sb = new StringBuilder("Weather{"); 25 | sb.append("city=").append(city).append('\''); 26 | sb.append(", temperature=").append(String.valueOf(temperature)).append('\''); 27 | return sb.toString(); 28 | } 29 | 30 | public int hashCode() { 31 | return Objects.hash(super.hashCode(), city, temperature); 32 | } 33 | } -------------------------------------------------------------------------------- /flink-processor/src/main/java/WeatherDeserializationSchema.java: -------------------------------------------------------------------------------- 1 | import com.fasterxml.jackson.databind.ObjectMapper; 2 | import com.fasterxml.jackson.databind.json.JsonMapper; 3 | import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; 4 | import java.io.IOException; 5 | import org.apache.flink.api.common.serialization.AbstractDeserializationSchema; 6 | 7 | public class WeatherDeserializationSchema extends AbstractDeserializationSchema { 8 | private static final long serialVersionUUID = 1L; 9 | 10 | private transient ObjectMapper objectMapper; 11 | 12 | @Override 13 | public void open(InitializationContext context) { 14 | objectMapper = JsonMapper.builder().build().registerModule(new JavaTimeModule()); 15 | } 16 | 17 | @Override 18 | public Weather deserialize(byte[] message) throws IOException { 19 | return objectMapper.readValue(message, Weather.class); 20 | } 21 | } -------------------------------------------------------------------------------- /flink-processor/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- 1 | rootLogger.level = INFO 2 | rootLogger.appenderRef.console.ref = ConsoleAppender 3 | 4 | appender.console.name = ConsoleAppender 5 | appender.console.type = CONSOLE 6 | appender.console.layout.type = PatternLayout 7 | appender.console.layout.pattern = %d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n -------------------------------------------------------------------------------- /flink-processor/wait-for-it.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Use this script to test if a given TCP host/port are available 3 | 4 | cmdname=$(basename $0) 5 | 6 | echoerr() { if [[ $QUIET -ne 1 ]]; then echo "$@" 1>&2; fi } 7 | 8 | usage() 9 | { 10 | cat << USAGE >&2 11 | Usage: 12 | $cmdname host:port [-s] [-t timeout] [-- command args] 13 | -h HOST | --host=HOST Host or IP under test 14 | -p PORT | --port=PORT TCP port under test 15 | Alternatively, you specify the host and port as host:port 16 | -s | --strict Only execute subcommand if the test succeeds 17 | -q | --quiet Don't output any status messages 18 | -t TIMEOUT | --timeout=TIMEOUT 19 | Timeout in seconds, zero for no timeout 20 | -- COMMAND ARGS Execute command with args after the test finishes 21 | USAGE 22 | exit 1 23 | } 24 | 25 | wait_for() 26 | { 27 | if [[ $TIMEOUT -gt 0 ]]; then 28 | echoerr "$cmdname: waiting $TIMEOUT seconds for $HOST:$PORT" 29 | else 30 | echoerr "$cmdname: waiting for $HOST:$PORT without a timeout" 31 | fi 32 | start_ts=$(date +%s) 33 | while : 34 | do 35 | if [[ $ISBUSY -eq 1 ]]; then 36 | nc -z $HOST $PORT 37 | result=$? 38 | else 39 | (echo > /dev/tcp/$HOST/$PORT) >/dev/null 2>&1 40 | result=$? 41 | fi 42 | if [[ $result -eq 0 ]]; then 43 | end_ts=$(date +%s) 44 | echoerr "$cmdname: $HOST:$PORT is available after $((end_ts - start_ts)) seconds" 45 | break 46 | fi 47 | sleep 1 48 | done 49 | return $result 50 | } 51 | 52 | wait_for_wrapper() 53 | { 54 | # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692 55 | if [[ $QUIET -eq 1 ]]; then 56 | timeout $BUSYTIMEFLAG $TIMEOUT $0 --quiet --child --host=$HOST --port=$PORT --timeout=$TIMEOUT & 57 | else 58 | timeout $BUSYTIMEFLAG $TIMEOUT $0 --child --host=$HOST --port=$PORT --timeout=$TIMEOUT & 59 | fi 60 | PID=$! 61 | trap "kill -INT -$PID" INT 62 | wait $PID 63 | RESULT=$? 64 | if [[ $RESULT -ne 0 ]]; then 65 | echoerr "$cmdname: timeout occurred after waiting $TIMEOUT seconds for $HOST:$PORT" 66 | fi 67 | return $RESULT 68 | } 69 | 70 | # process arguments 71 | while [[ $# -gt 0 ]] 72 | do 73 | case "$1" in 74 | *:* ) 75 | hostport=(${1//:/ }) 76 | HOST=${hostport[0]} 77 | PORT=${hostport[1]} 78 | shift 1 79 | ;; 80 | --child) 81 | CHILD=1 82 | shift 1 83 | ;; 84 | -q | --quiet) 85 | QUIET=1 86 | shift 1 87 | ;; 88 | -s | --strict) 89 | STRICT=1 90 | shift 1 91 | ;; 92 | -h) 93 | HOST="$2" 94 | if [[ $HOST == "" ]]; then break; fi 95 | shift 2 96 | ;; 97 | --host=*) 98 | HOST="${1#*=}" 99 | shift 1 100 | ;; 101 | -p) 102 | PORT="$2" 103 | if [[ $PORT == "" ]]; then break; fi 104 | shift 2 105 | ;; 106 | --port=*) 107 | PORT="${1#*=}" 108 | shift 1 109 | ;; 110 | -t) 111 | TIMEOUT="$2" 112 | if [[ $TIMEOUT == "" ]]; then break; fi 113 | shift 2 114 | ;; 115 | --timeout=*) 116 | TIMEOUT="${1#*=}" 117 | shift 1 118 | ;; 119 | --) 120 | shift 121 | CLI=("$@") 122 | break 123 | ;; 124 | --help) 125 | usage 126 | ;; 127 | *) 128 | echoerr "Unknown argument: $1" 129 | usage 130 | ;; 131 | esac 132 | done 133 | 134 | if [[ "$HOST" == "" || "$PORT" == "" ]]; then 135 | echoerr "Error: you need to provide a host and port to test." 136 | usage 137 | fi 138 | 139 | TIMEOUT=${TIMEOUT:-15} 140 | STRICT=${STRICT:-0} 141 | CHILD=${CHILD:-0} 142 | QUIET=${QUIET:-0} 143 | 144 | # check to see if timeout is from busybox? 145 | # check to see if timeout is from busybox? 146 | TIMEOUT_PATH=$(realpath $(which timeout)) 147 | if [[ $TIMEOUT_PATH =~ "busybox" ]]; then 148 | ISBUSY=1 149 | BUSYTIMEFLAG="-t" 150 | else 151 | ISBUSY=0 152 | BUSYTIMEFLAG="" 153 | fi 154 | 155 | if [[ $CHILD -gt 0 ]]; then 156 | wait_for 157 | RESULT=$? 158 | exit $RESULT 159 | else 160 | if [[ $TIMEOUT -gt 0 ]]; then 161 | wait_for_wrapper 162 | RESULT=$? 163 | else 164 | wait_for 165 | RESULT=$? 166 | fi 167 | fi 168 | 169 | if [[ $CLI != "" ]]; then 170 | if [[ $RESULT -ne 0 && $STRICT -eq 1 ]]; then 171 | echoerr "$cmdname: strict mode, refusing to execute subprocess" 172 | exit $RESULT 173 | fi 174 | exec "${CLI[@]}" 175 | else 176 | exit $RESULT 177 | fi -------------------------------------------------------------------------------- /kafka-producer/Dockerfile: -------------------------------------------------------------------------------- 1 | From python:3.8-slim 2 | 3 | COPY requirements.txt . 4 | 5 | RUN set -ex; \ 6 | pip install --no-cache-dir -r requirements.txt 7 | 8 | # Copy resources 9 | WORKDIR / 10 | COPY wait-for-it.sh wait-for-it.sh 11 | 12 | ADD python-producer.py . 13 | 14 | CMD ./wait-for-it.sh -s -t 30 $ZOOKEEPER_SERVER -- ./wait-for-it.sh -s -t 30 $KAFKA_SERVER -- python -u python-producer.py -------------------------------------------------------------------------------- /kafka-producer/python-producer.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import time 3 | import random 4 | import schedule 5 | from json import dumps 6 | 7 | from faker import Faker 8 | from kafka import KafkaProducer 9 | 10 | 11 | 12 | kafka_nodes = "kafka:9092" 13 | myTopic = "weather" 14 | 15 | def gen_data(): 16 | faker = Faker() 17 | 18 | prod = KafkaProducer(bootstrap_servers=kafka_nodes, value_serializer=lambda x:dumps(x).encode('utf-8')) 19 | my_data = {'city': faker.city(), 'temperature': random.uniform(10.0, 110.0)} 20 | print(my_data) 21 | prod.send(topic=myTopic, value=my_data) 22 | 23 | prod.flush() 24 | 25 | if __name__ == "__main__": 26 | gen_data() 27 | schedule.every(10).seconds.do(gen_data) 28 | 29 | while True: 30 | schedule.run_pending() 31 | time.sleep(0.5) -------------------------------------------------------------------------------- /kafka-producer/requirements.txt: -------------------------------------------------------------------------------- 1 | kafka-python==2.0.2 2 | schedule==1.1.0 3 | aiokafka==0.7.2 4 | Faker==15.1.3 -------------------------------------------------------------------------------- /kafka-producer/wait-for-it.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Use this script to test if a given TCP host/port are available 3 | 4 | cmdname=$(basename $0) 5 | 6 | echoerr() { if [[ $QUIET -ne 1 ]]; then echo "$@" 1>&2; fi } 7 | 8 | usage() 9 | { 10 | cat << USAGE >&2 11 | Usage: 12 | $cmdname host:port [-s] [-t timeout] [-- command args] 13 | -h HOST | --host=HOST Host or IP under test 14 | -p PORT | --port=PORT TCP port under test 15 | Alternatively, you specify the host and port as host:port 16 | -s | --strict Only execute subcommand if the test succeeds 17 | -q | --quiet Don't output any status messages 18 | -t TIMEOUT | --timeout=TIMEOUT 19 | Timeout in seconds, zero for no timeout 20 | -- COMMAND ARGS Execute command with args after the test finishes 21 | USAGE 22 | exit 1 23 | } 24 | 25 | wait_for() 26 | { 27 | if [[ $TIMEOUT -gt 0 ]]; then 28 | echoerr "$cmdname: waiting $TIMEOUT seconds for $HOST:$PORT" 29 | else 30 | echoerr "$cmdname: waiting for $HOST:$PORT without a timeout" 31 | fi 32 | start_ts=$(date +%s) 33 | while : 34 | do 35 | if [[ $ISBUSY -eq 1 ]]; then 36 | nc -z $HOST $PORT 37 | result=$? 38 | else 39 | (echo > /dev/tcp/$HOST/$PORT) >/dev/null 2>&1 40 | result=$? 41 | fi 42 | if [[ $result -eq 0 ]]; then 43 | end_ts=$(date +%s) 44 | echoerr "$cmdname: $HOST:$PORT is available after $((end_ts - start_ts)) seconds" 45 | break 46 | fi 47 | sleep 1 48 | done 49 | return $result 50 | } 51 | 52 | wait_for_wrapper() 53 | { 54 | # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692 55 | if [[ $QUIET -eq 1 ]]; then 56 | timeout $BUSYTIMEFLAG $TIMEOUT $0 --quiet --child --host=$HOST --port=$PORT --timeout=$TIMEOUT & 57 | else 58 | timeout $BUSYTIMEFLAG $TIMEOUT $0 --child --host=$HOST --port=$PORT --timeout=$TIMEOUT & 59 | fi 60 | PID=$! 61 | trap "kill -INT -$PID" INT 62 | wait $PID 63 | RESULT=$? 64 | if [[ $RESULT -ne 0 ]]; then 65 | echoerr "$cmdname: timeout occurred after waiting $TIMEOUT seconds for $HOST:$PORT" 66 | fi 67 | return $RESULT 68 | } 69 | 70 | # process arguments 71 | while [[ $# -gt 0 ]] 72 | do 73 | case "$1" in 74 | *:* ) 75 | hostport=(${1//:/ }) 76 | HOST=${hostport[0]} 77 | PORT=${hostport[1]} 78 | shift 1 79 | ;; 80 | --child) 81 | CHILD=1 82 | shift 1 83 | ;; 84 | -q | --quiet) 85 | QUIET=1 86 | shift 1 87 | ;; 88 | -s | --strict) 89 | STRICT=1 90 | shift 1 91 | ;; 92 | -h) 93 | HOST="$2" 94 | if [[ $HOST == "" ]]; then break; fi 95 | shift 2 96 | ;; 97 | --host=*) 98 | HOST="${1#*=}" 99 | shift 1 100 | ;; 101 | -p) 102 | PORT="$2" 103 | if [[ $PORT == "" ]]; then break; fi 104 | shift 2 105 | ;; 106 | --port=*) 107 | PORT="${1#*=}" 108 | shift 1 109 | ;; 110 | -t) 111 | TIMEOUT="$2" 112 | if [[ $TIMEOUT == "" ]]; then break; fi 113 | shift 2 114 | ;; 115 | --timeout=*) 116 | TIMEOUT="${1#*=}" 117 | shift 1 118 | ;; 119 | --) 120 | shift 121 | CLI=("$@") 122 | break 123 | ;; 124 | --help) 125 | usage 126 | ;; 127 | *) 128 | echoerr "Unknown argument: $1" 129 | usage 130 | ;; 131 | esac 132 | done 133 | 134 | if [[ "$HOST" == "" || "$PORT" == "" ]]; then 135 | echoerr "Error: you need to provide a host and port to test." 136 | usage 137 | fi 138 | 139 | TIMEOUT=${TIMEOUT:-15} 140 | STRICT=${STRICT:-0} 141 | CHILD=${CHILD:-0} 142 | QUIET=${QUIET:-0} 143 | 144 | # check to see if timeout is from busybox? 145 | # check to see if timeout is from busybox? 146 | TIMEOUT_PATH=$(realpath $(which timeout)) 147 | if [[ $TIMEOUT_PATH =~ "busybox" ]]; then 148 | ISBUSY=1 149 | BUSYTIMEFLAG="-t" 150 | else 151 | ISBUSY=0 152 | BUSYTIMEFLAG="" 153 | fi 154 | 155 | if [[ $CHILD -gt 0 ]]; then 156 | wait_for 157 | RESULT=$? 158 | exit $RESULT 159 | else 160 | if [[ $TIMEOUT -gt 0 ]]; then 161 | wait_for_wrapper 162 | RESULT=$? 163 | else 164 | wait_for 165 | RESULT=$? 166 | fi 167 | fi 168 | 169 | if [[ $CLI != "" ]]; then 170 | if [[ $RESULT -ne 0 && $STRICT -eq 1 ]]; then 171 | echoerr "$cmdname: strict mode, refusing to execute subprocess" 172 | exit $RESULT 173 | fi 174 | exec "${CLI[@]}" 175 | else 176 | exit $RESULT 177 | fi -------------------------------------------------------------------------------- /postgres/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:latest 2 | 3 | COPY create_table.sql /docker-entrypoint-initdb.d/ -------------------------------------------------------------------------------- /postgres/create_table.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE weather ( 2 | id SERIAL PRIMARY KEY, 3 | city VARCHAR (255) NOT NULL, 4 | average_temperature DOUBLE PRECISION 5 | ); --------------------------------------------------------------------------------