├── gradle.properties ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── .gitignore ├── settings.gradle ├── src └── main │ ├── java │ └── com │ │ └── ververica │ │ └── lablatency │ │ ├── util │ │ └── TimeUtil.java │ │ ├── source │ │ ├── MeasurementSource.java │ │ └── MeasurementGenerator.java │ │ ├── event │ │ ├── EnrichedMeasurement.java │ │ ├── MeasurementRecord.java │ │ ├── Measurement.java │ │ └── WindowedMeasurement.java │ │ └── job │ │ ├── IngestingJob.java │ │ ├── WindowingJobNoAggregation.java │ │ ├── WindowingJob.java │ │ ├── EnrichingJobSync.java │ │ ├── EnrichingJobAsync.java │ │ ├── SortingJobPerEventTimer.java │ │ └── SortingJobCoalescedTimer.java │ └── resources │ └── log4j2.properties ├── gradlew.bat ├── README.md ├── gradlew └── LICENSE /gradle.properties: -------------------------------------------------------------------------------- 1 | org.gradle.caching = true 2 | org.gradle.parallel = true 3 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ververica/lab-flink-latency/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.1-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled class file 2 | *.class 3 | 4 | # Log file 5 | *.log 6 | 7 | # BlueJ files 8 | *.ctxt 9 | 10 | # Debugger 11 | .attach_* 12 | 13 | # Eclipse 14 | .project 15 | .settings 16 | .classpath 17 | bin/ 18 | 19 | # Gradle build process files 20 | /.gradle/ 21 | build/ 22 | **/.gradletasknamecache 23 | 24 | # IntelliJ 25 | .idea 26 | 27 | # Mobile Tools for Java (J2ME) 28 | .mtj.tmp/ 29 | 30 | # Package Files # 31 | *.jar 32 | *.war 33 | *.nar 34 | *.ear 35 | *.zip 36 | *.tar.gz 37 | *.rar 38 | 39 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 40 | hs_err_pid* 41 | 42 | # Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored) 43 | !gradle-wrapper.jar 44 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id "com.gradle.enterprise" version "3.6.3" 3 | } 4 | 5 | rootProject.name = 'lab-flink-latency' 6 | 7 | // CI=true, TRAVIS=true, CONTINUOUS_INTEGRATION=true set automatically during Travis execution 8 | // see https://docs.travis-ci.com/user/environment-variables#default-environment-variables 9 | def isCIBuild = ['CI', 'TRAVIS', 'CONTINUOUS_INTEGRATION'].every { System.getenv(it) == 'true' } 10 | if (isCIBuild) { 11 | gradleEnterprise { 12 | buildScan { 13 | // Build Scan enabled and TOS accepted for Travis build. This does not apply to builds on 14 | // non-Travis machines. Developers need to separately enable and accept TOS to use build scans. 15 | termsOfServiceUrl = 'https://gradle.com/terms-of-service' 16 | termsOfServiceAgree = 'yes' 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/lablatency/util/TimeUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Ververica GmbH 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.ververica.lablatency.util; 17 | 18 | public class TimeUtil { 19 | public static void busyWaitMicros(long micros) { 20 | long waitUntil = System.nanoTime() + (micros * 1_000); 21 | while (waitUntil > System.nanoTime()) {; 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | loogers=rootLooger 19 | appender.console.type=Console 20 | appender.console.name=STDOUT 21 | appender.console.layout.type=PatternLayout 22 | appender.console.layout.pattern=%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n 23 | rootLogger.level=INFO 24 | rootLogger.appenderRef.console.ref=STDOUT -------------------------------------------------------------------------------- /src/main/java/com/ververica/lablatency/source/MeasurementSource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Ververica GmbH 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.ververica.lablatency.source; 17 | 18 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; 19 | 20 | import com.ververica.lablatency.event.Measurement; 21 | import com.ververica.lablatency.util.TimeUtil; 22 | 23 | import java.util.List; 24 | import java.util.Random; 25 | 26 | public class MeasurementSource extends RichParallelSourceFunction { 27 | 28 | private final Random rand; 29 | private final int spikeInterval; 30 | private final int waitMicro; 31 | private final List measurements; 32 | 33 | private transient volatile boolean cancelled; 34 | 35 | public MeasurementSource(int spikeInterval, int waitMicro) { 36 | this.rand = new Random(); 37 | this.spikeInterval = spikeInterval; 38 | this.waitMicro = waitMicro; 39 | this.measurements = MeasurementGenerator.generateMeasurements(); 40 | } 41 | 42 | @Override 43 | public void run(SourceContext sourceContext) throws Exception { 44 | while (!cancelled) { 45 | 46 | // simulate measurement spikes every spikeInterval minute 47 | if (System.currentTimeMillis() / 1000 / 60 % spikeInterval != 0) { 48 | Thread.sleep(1); 49 | } 50 | 51 | TimeUtil.busyWaitMicros(this.waitMicro); 52 | 53 | int index = rand.nextInt(measurements.size()); 54 | 55 | synchronized (sourceContext.getCheckpointLock()) { 56 | sourceContext.collect(measurements.get(index)); 57 | } 58 | } 59 | } 60 | 61 | @Override 62 | public void cancel() { 63 | this.cancelled = true; 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/lablatency/event/EnrichedMeasurement.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Ververica GmbH 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.ververica.lablatency.event; 17 | 18 | import java.util.Objects; 19 | 20 | @SuppressWarnings("unused") 21 | public class EnrichedMeasurement extends Measurement { 22 | 23 | private String locationInfo; 24 | 25 | public EnrichedMeasurement() {} 26 | 27 | public EnrichedMeasurement( 28 | int sensorId, 29 | double value, 30 | String location, 31 | String measurementInformation, 32 | String locationInfo) { 33 | super(sensorId, value, location, measurementInformation); 34 | this.locationInfo = locationInfo; 35 | } 36 | 37 | public EnrichedMeasurement(Measurement measurement, String locationInfo) { 38 | super(measurement); 39 | this.locationInfo = locationInfo; 40 | } 41 | 42 | public String getLocationInfo() { 43 | return locationInfo; 44 | } 45 | 46 | public void setLocationInfo(String locationInfo) { 47 | this.locationInfo = locationInfo; 48 | } 49 | 50 | @Override 51 | public boolean equals(Object o) { 52 | if (this == o) { 53 | return true; 54 | } 55 | if (o == null || getClass() != o.getClass()) { 56 | return false; 57 | } 58 | if (!super.equals(o)) { 59 | return false; 60 | } 61 | EnrichedMeasurement that = (EnrichedMeasurement) o; 62 | return Objects.equals(locationInfo, that.locationInfo); 63 | } 64 | 65 | @Override 66 | public int hashCode() { 67 | return Objects.hash(super.hashCode(), locationInfo); 68 | } 69 | 70 | @Override 71 | public String toString() { 72 | return "EnrichedMeasurement{" 73 | + super.toString() 74 | + ", locationInfo='" 75 | + locationInfo 76 | + '\'' 77 | + "}"; 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/lablatency/source/MeasurementGenerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Ververica GmbH 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.ververica.lablatency.source; 17 | 18 | import com.ververica.lablatency.event.Measurement; 19 | import org.apache.commons.lang3.RandomStringUtils; 20 | import org.slf4j.Logger; 21 | import org.slf4j.LoggerFactory; 22 | 23 | import java.io.BufferedReader; 24 | import java.io.IOException; 25 | import java.io.InputStream; 26 | import java.io.InputStreamReader; 27 | import java.util.ArrayList; 28 | import java.util.List; 29 | import java.util.Random; 30 | 31 | public class MeasurementGenerator { 32 | public static final Logger LOG = LoggerFactory.getLogger(MeasurementGenerator.class); 33 | 34 | public static final int NUM_OF_MEASUREMENTS = 100_000; 35 | public static final int NUM_OF_SENSOR_ID = 100; 36 | public static final int LEN_OF_INFO = 64; 37 | public static final int RANDOM_SEED = 1; 38 | 39 | public static List generateMeasurements() { 40 | Random rand = new Random(RANDOM_SEED); 41 | final List locations = readLocationsFromFile(); 42 | List measurements = new ArrayList<>(); 43 | for (int i = 0; i < NUM_OF_MEASUREMENTS; i++) { 44 | Measurement aMeasurement = 45 | new Measurement( 46 | rand.nextInt(NUM_OF_SENSOR_ID), 47 | rand.nextDouble() * 100, 48 | locations.get(rand.nextInt(locations.size())), 49 | "More info: " + RandomStringUtils.randomAlphabetic(LEN_OF_INFO)); 50 | measurements.add(aMeasurement); 51 | } 52 | return measurements; 53 | } 54 | 55 | private static List readLocationsFromFile() { 56 | List locations = new ArrayList<>(); 57 | try (InputStream is = MeasurementGenerator.class.getResourceAsStream("/cities.csv"); 58 | BufferedReader br = new BufferedReader(new InputStreamReader(is))) { 59 | String city; 60 | while ((city = br.readLine()) != null) { 61 | locations.add(city); 62 | } 63 | } catch (IOException e) { 64 | LOG.error("Unable to read cities from file.", e); 65 | throw new RuntimeException(e); 66 | } 67 | return locations; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/lablatency/event/MeasurementRecord.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Ververica GmbH 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.ververica.lablatency.event; 17 | 18 | import java.util.Arrays; 19 | import java.util.Objects; 20 | 21 | @SuppressWarnings({"unused", "RedundantSuppression"}) 22 | public class MeasurementRecord { 23 | 24 | private long timestamp; 25 | private byte[] key; 26 | private byte[] value; 27 | private int partition; 28 | 29 | public MeasurementRecord() {} 30 | 31 | public MeasurementRecord( 32 | final long timestamp, final byte[] key, final byte[] value, final int partition) { 33 | this.timestamp = timestamp; 34 | this.key = key; 35 | this.value = value; 36 | this.partition = partition; 37 | } 38 | 39 | public long getTimestamp() { 40 | return timestamp; 41 | } 42 | 43 | public void setTimestamp(final long timestamp) { 44 | this.timestamp = timestamp; 45 | } 46 | 47 | public byte[] getKey() { 48 | return key; 49 | } 50 | 51 | public void setKey(final byte[] key) { 52 | this.key = key; 53 | } 54 | 55 | public byte[] getValue() { 56 | return value; 57 | } 58 | 59 | public void setValue(final byte[] value) { 60 | this.value = value; 61 | } 62 | 63 | public int getPartition() { 64 | return partition; 65 | } 66 | 67 | public void setPartition(final int partition) { 68 | this.partition = partition; 69 | } 70 | 71 | @Override 72 | public boolean equals(final Object o) { 73 | if (this == o) { 74 | return true; 75 | } 76 | if (o == null || getClass() != o.getClass()) { 77 | return false; 78 | } 79 | final MeasurementRecord that = (MeasurementRecord) o; 80 | return timestamp == that.timestamp 81 | && partition == that.partition 82 | && Arrays.equals(key, that.key) 83 | && Arrays.equals(value, that.value); 84 | } 85 | 86 | @Override 87 | public int hashCode() { 88 | int result = Objects.hash(timestamp, partition); 89 | result = 31 * result + Arrays.hashCode(key); 90 | result = 31 * result + Arrays.hashCode(value); 91 | return result; 92 | } 93 | 94 | @Override 95 | public String toString() { 96 | return "MeasurementRecord{" 97 | + "timestamp=" 98 | + timestamp 99 | + ", key=" 100 | + Arrays.toString(key) 101 | + ", value=" 102 | + Arrays.toString(value) 103 | + ", partition=" 104 | + partition 105 | + '}'; 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 33 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 34 | 35 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 36 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 37 | 38 | @rem Find java.exe 39 | if defined JAVA_HOME goto findJavaFromJavaHome 40 | 41 | set JAVA_EXE=java.exe 42 | %JAVA_EXE% -version >NUL 2>&1 43 | if "%ERRORLEVEL%" == "0" goto init 44 | 45 | echo. 46 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 47 | echo. 48 | echo Please set the JAVA_HOME variable in your environment to match the 49 | echo location of your Java installation. 50 | 51 | goto fail 52 | 53 | :findJavaFromJavaHome 54 | set JAVA_HOME=%JAVA_HOME:"=% 55 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 56 | 57 | if exist "%JAVA_EXE%" goto init 58 | 59 | echo. 60 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 61 | echo. 62 | echo Please set the JAVA_HOME variable in your environment to match the 63 | echo location of your Java installation. 64 | 65 | goto fail 66 | 67 | :init 68 | @rem Get command-line arguments, handling Windows variants 69 | 70 | if not "%OS%" == "Windows_NT" goto win9xME_args 71 | 72 | :win9xME_args 73 | @rem Slurp the command line arguments. 74 | set CMD_LINE_ARGS= 75 | set _SKIP=2 76 | 77 | :win9xME_args_slurp 78 | if "x%~1" == "x" goto execute 79 | 80 | set CMD_LINE_ARGS=%* 81 | 82 | :execute 83 | @rem Setup the command line 84 | 85 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 86 | 87 | @rem Execute Gradle 88 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 89 | 90 | :end 91 | @rem End local scope for the variables with windows NT shell 92 | if "%ERRORLEVEL%"=="0" goto mainEnd 93 | 94 | :fail 95 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 96 | rem the _cmd.exe /c_ return code! 97 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 98 | exit /b 1 99 | 100 | :mainEnd 101 | if "%OS%"=="Windows_NT" endlocal 102 | 103 | :omega 104 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/lablatency/event/Measurement.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Ververica GmbH 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.ververica.lablatency.event; 17 | 18 | import java.io.Serializable; 19 | import java.util.Objects; 20 | 21 | @SuppressWarnings({"unused", "RedundantSuppression"}) 22 | public class Measurement implements Serializable { 23 | 24 | private int sensorId; 25 | private double value; 26 | private String location; 27 | private String measurementInformation; 28 | 29 | public Measurement() {} 30 | 31 | public Measurement( 32 | final int sensorId, 33 | final double value, 34 | final String location, 35 | final String measurementInformation) { 36 | this.sensorId = sensorId; 37 | this.value = value; 38 | this.location = location; 39 | this.measurementInformation = measurementInformation; 40 | } 41 | 42 | public Measurement(Measurement measurement) { 43 | this.sensorId = measurement.getSensorId(); 44 | this.value = measurement.getValue(); 45 | this.location = measurement.getLocation(); 46 | this.measurementInformation = measurement.getMeasurementInformation(); 47 | } 48 | 49 | public String getMeasurementInformation() { 50 | return measurementInformation; 51 | } 52 | 53 | public void setMeasurementInformation(final String measurementInformation) { 54 | this.measurementInformation = measurementInformation; 55 | } 56 | 57 | public int getSensorId() { 58 | return sensorId; 59 | } 60 | 61 | public void setSensorId(final int sensorId) { 62 | this.sensorId = sensorId; 63 | } 64 | 65 | public double getValue() { 66 | return value; 67 | } 68 | 69 | public void setValue(final double value) { 70 | this.value = value; 71 | } 72 | 73 | public String getLocation() { 74 | return location; 75 | } 76 | 77 | public void setLocation(final String location) { 78 | this.location = location; 79 | } 80 | 81 | @Override 82 | public boolean equals(final Object o) { 83 | if (this == o) { 84 | return true; 85 | } 86 | if (o == null || getClass() != o.getClass()) { 87 | return false; 88 | } 89 | final Measurement that = (Measurement) o; 90 | return sensorId == that.sensorId 91 | && Double.compare(that.value, value) == 0 92 | && Objects.equals(location, that.location) 93 | && Objects.equals(measurementInformation, that.measurementInformation); 94 | } 95 | 96 | @Override 97 | public int hashCode() { 98 | return Objects.hash(sensorId, value, location, measurementInformation); 99 | } 100 | 101 | @Override 102 | public String toString() { 103 | return "Measurement{" 104 | + "sensorId=" 105 | + sensorId 106 | + ", value=" 107 | + value 108 | + ", location='" 109 | + location 110 | + '\'' 111 | + ", measurementInformation='" 112 | + measurementInformation 113 | + '\'' 114 | + '}'; 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | lab-flink-latency 2 | ================= 3 | 4 | Lab to showcase different Flink job latency optimization techniques covered in our Flink Forward 2021 talk 5 | ["Getting into Low-Latency Gears with Apache Flink"](https://www.flink-forward.org/global-2021/conference-program#getting-into-low-latency-gears-with-apache-flink). 6 | 7 | This lab consists of several jobs which are described as follows. 8 | 9 | ## IngestingJob 10 | 11 | This job is used to ingest randomly generated sensor measurements into a Kafka topic. Use `--kafka` to specify the 12 | Kafka bootstrap servers. This defaults to `localhost:9092`. Use `--topic` to specify the name of the Kakfa topic to 13 | ingest into. This default is `lablatency`. You can also use 14 | `--wait-micro ` to adjust the ingestion rate. 15 | 16 | ## WindowingJob 17 | 18 | This job calculates the number of measurements and the sum of the measurement values per minute (window size), and updates the 19 | result every 10 seconds (slide size). The latency of this job can be optimized by using the following techniques. 20 | 21 | ### Optimization 1 22 | Increase the job parallelism, e.g., from 2 to 3. Best to have the number of the partitions of your Kafka topic 23 | divisible by 2 and by 3 to avoid data skew. 24 | 25 | ### Optimization 2 26 | Use the hashmap/filesystem state backend by changing the configuration from 27 | 28 | state.backend: rocksdb 29 | # 0.4 is Flink's default 30 | taskmanager.memory.managed.fraction: '0.4' 31 | 32 | to 33 | 34 | # use filesystem if Flink < 1.13 35 | state.backend: hashmap 36 | taskmanager.memory.managed.fraction: '0.0' 37 | 38 | ### Optimization 3 39 | Reduce the watermark interval from the default `200 ms` to `100 ms`: 40 | 41 | pipeline.auto-watermark-interval: 100 ms 42 | 43 | ### Optimization 4 44 | Reduce the network buffer timeout from the default `100 ms` to `10 ms`: 45 | 46 | execution.buffer-timeout: 10 ms 47 | 48 | ## WindowingJobNoAggregation 49 | 50 | Similar to WindowingJob, except that there is no incremental aggregation during windowing in this job. 51 | 52 | ## EnrichingJobSync 53 | 54 | This job enriches measurements with the location information retrieved from a simulated external service which has a 55 | random latency in the range of 1-6 ms. When location information is retrieved, the job caches it for 1 second to serve further 56 | retrieving requests. 57 | 58 | ## EnrichingJobAsync 59 | 60 | Similar to `EnrichingJobSync`, except that this job uses 61 | [Flink's Async I/O](https://nightlies.apache.org/flink/flink-docs-release-1.14/docs/dev/datastream/operators/asyncio/) 62 | to get better performance. 63 | 64 | ## SortingJobPerEventTimer 65 | 66 | This job sorts a stream of measurements keyed by sensor IDs, then calculates an 67 | [exponential moving average](https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average ) for each 68 | sensor. When sorting, it creates a timer per event. 69 | 70 | ## SortingJobCoalescedTimer 71 | 72 | Similar to `SortingJobPerEventTimer`, except that when sorting, it coalesces timers to the next 100ms (configurable 73 | via `--round-timer-to`) or to the next watermark if `--round-timer-to` is set to `0`. 74 | 75 | This job can be run with the follow options/configurations to manage the per-event overhead. 76 | 77 | ### User Code 78 | 79 | Create only one ObjectMapper per operator instance (default) 80 | 81 | --use-one-mapper true 82 | 83 | Create one ObjectMapper per event 84 | 85 | --use-one-mapper false 86 | 87 | ### Serialization 88 | 89 | Use the POJO serializer (default) 90 | 91 | --force-kryo false 92 | 93 | Force using the Kryo serializer 94 | 95 | --force-kryo true 96 | 97 | ### Object Reuse 98 | 99 | Disable object reuse with the following configuration (default) 100 | 101 | pipeline.object-reuse: false 102 | 103 | Enable object reuse with the following configuration 104 | 105 | pipeline.object-reuse: true 106 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/lablatency/job/IngestingJob.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Ververica GmbH 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.ververica.lablatency.job; 17 | 18 | import org.apache.flink.api.common.serialization.SerializationSchema; 19 | import org.apache.flink.api.java.utils.ParameterTool; 20 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 21 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; 22 | import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema; 23 | 24 | import com.fasterxml.jackson.core.JsonProcessingException; 25 | import com.fasterxml.jackson.databind.ObjectMapper; 26 | import com.ververica.lablatency.event.Measurement; 27 | import com.ververica.lablatency.source.MeasurementSource; 28 | import org.apache.kafka.clients.producer.ProducerRecord; 29 | import org.slf4j.Logger; 30 | import org.slf4j.LoggerFactory; 31 | 32 | import javax.annotation.Nullable; 33 | 34 | import java.util.Properties; 35 | 36 | /** This is a Flink job. */ 37 | public class IngestingJob { 38 | 39 | static class KafkaSerSchema implements KafkaSerializationSchema { 40 | 41 | private static final Logger LOG = LoggerFactory.getLogger(KafkaSerSchema.class); 42 | private ObjectMapper mapper; 43 | private final String topic; 44 | 45 | public KafkaSerSchema(String topic) { 46 | this.topic = topic; 47 | } 48 | 49 | @Override 50 | public void open(SerializationSchema.InitializationContext context) throws Exception { 51 | this.mapper = new ObjectMapper(); 52 | } 53 | 54 | @Override 55 | public ProducerRecord serialize( 56 | Measurement measurement, @Nullable Long aLong) { 57 | try { 58 | return new ProducerRecord<>( 59 | this.topic, 60 | null, 61 | System.currentTimeMillis(), 62 | null, 63 | this.mapper.writeValueAsBytes(measurement)); 64 | } catch (JsonProcessingException e) { 65 | LOG.error("Failed to serialize measurement: " + e.getMessage()); 66 | return null; 67 | } 68 | } 69 | } 70 | 71 | public static void main(String[] args) throws Exception { 72 | 73 | final ParameterTool params = ParameterTool.fromArgs(args); 74 | 75 | final String jobName = params.get("job-name", IngestingJob.class.getSimpleName()); 76 | final String kafkaAddress = params.get("kafka", "localhost:9092"); 77 | final String topic = params.get("topic", "lablatency"); 78 | 79 | // when spikeInterval==1, every minute is a spike, it actually means there is no spikes 80 | int spikeInterval = params.getInt("spike-interval", 1); 81 | int waitMicro = params.getInt("wait-micro", 0); 82 | 83 | // Flink environment setup 84 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 85 | 86 | env.disableOperatorChaining(); // to check throughput 87 | 88 | // Properties for Kafka 89 | Properties kafkaProducerProps = new Properties(); 90 | kafkaProducerProps.setProperty("bootstrap.servers", kafkaAddress); 91 | kafkaProducerProps.setProperty("transaction.timeout.ms", "600000"); 92 | 93 | FlinkKafkaProducer producer = 94 | new FlinkKafkaProducer<>( 95 | topic, 96 | new KafkaSerSchema(topic), 97 | kafkaProducerProps, 98 | FlinkKafkaProducer.Semantic.AT_LEAST_ONCE); 99 | 100 | env.addSource(new MeasurementSource(spikeInterval, waitMicro)).addSink(producer); 101 | 102 | env.execute(jobName); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/lablatency/event/WindowedMeasurement.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Ververica GmbH 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.ververica.lablatency.event; 17 | 18 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 19 | 20 | import java.util.Objects; 21 | 22 | @SuppressWarnings({"unused", "RedundantSuppression"}) 23 | public class WindowedMeasurement { 24 | 25 | private long windowStart; 26 | private long windowEnd; 27 | private String location; 28 | private long eventsPerWindow; 29 | private double sumPerWindow; 30 | 31 | public WindowedMeasurement() {} 32 | 33 | public WindowedMeasurement( 34 | final long windowStart, 35 | final long windowEnd, 36 | final String location, 37 | final long eventsPerWindow, 38 | final double sumPerWindow) { 39 | this.windowStart = windowStart; 40 | this.windowEnd = windowEnd; 41 | this.location = location; 42 | this.eventsPerWindow = eventsPerWindow; 43 | this.sumPerWindow = sumPerWindow; 44 | } 45 | 46 | public long getWindowStart() { 47 | return windowStart; 48 | } 49 | 50 | public void setWindowStart(final long windowStart) { 51 | this.windowStart = windowStart; 52 | } 53 | 54 | public long getWindowEnd() { 55 | return windowEnd; 56 | } 57 | 58 | public void setWindowEnd(final long windowEnd) { 59 | this.windowEnd = windowEnd; 60 | } 61 | 62 | public void setWindow(TimeWindow window) { 63 | setWindowStart(window.getStart()); 64 | setWindowEnd(window.getEnd()); 65 | } 66 | 67 | public String getLocation() { 68 | return location; 69 | } 70 | 71 | public void setLocation(final String location) { 72 | this.location = location; 73 | } 74 | 75 | public long getEventsPerWindow() { 76 | return eventsPerWindow; 77 | } 78 | 79 | public void setEventsPerWindow(final long eventsPerWindow) { 80 | this.eventsPerWindow = eventsPerWindow; 81 | } 82 | 83 | public double getSumPerWindow() { 84 | return sumPerWindow; 85 | } 86 | 87 | public void setSumPerWindow(final double sumPerWindow) { 88 | this.sumPerWindow = sumPerWindow; 89 | } 90 | 91 | public void addMeasurement(Measurement measurement) { 92 | addMeasurement(measurement.getValue()); 93 | } 94 | 95 | public void addMeasurement(double value) { 96 | sumPerWindow += value; 97 | ++eventsPerWindow; 98 | } 99 | 100 | @Override 101 | public boolean equals(final Object o) { 102 | if (this == o) { 103 | return true; 104 | } 105 | if (o == null || getClass() != o.getClass()) { 106 | return false; 107 | } 108 | final WindowedMeasurement that = (WindowedMeasurement) o; 109 | return windowStart == that.windowStart 110 | && windowEnd == that.windowEnd 111 | && eventsPerWindow == that.eventsPerWindow 112 | && Double.compare(that.sumPerWindow, sumPerWindow) == 0 113 | && Objects.equals(location, that.location); 114 | } 115 | 116 | @Override 117 | public int hashCode() { 118 | return Objects.hash(windowStart, windowEnd, location, eventsPerWindow, sumPerWindow); 119 | } 120 | 121 | @Override 122 | public String toString() { 123 | return "WindowedMeasurement{" 124 | + "windowStart=" 125 | + windowStart 126 | + ", windowEnd=" 127 | + windowEnd 128 | + ", location='" 129 | + location 130 | + '\'' 131 | + ", eventsPerWindow=" 132 | + eventsPerWindow 133 | + ", sumPerWindow=" 134 | + sumPerWindow 135 | + '}'; 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | # 4 | # Copyright 2015 the original author or authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | ## 21 | ## Gradle start up script for UN*X 22 | ## 23 | ############################################################################## 24 | 25 | # Attempt to set APP_HOME 26 | # Resolve links: $0 may be a link 27 | PRG="$0" 28 | # Need this for relative symlinks. 29 | while [ -h "$PRG" ] ; do 30 | ls=`ls -ld "$PRG"` 31 | link=`expr "$ls" : '.*-> \(.*\)$'` 32 | if expr "$link" : '/.*' > /dev/null; then 33 | PRG="$link" 34 | else 35 | PRG=`dirname "$PRG"`"/$link" 36 | fi 37 | done 38 | SAVED="`pwd`" 39 | cd "`dirname \"$PRG\"`/" >/dev/null 40 | APP_HOME="`pwd -P`" 41 | cd "$SAVED" >/dev/null 42 | 43 | APP_NAME="Gradle" 44 | APP_BASE_NAME=`basename "$0"` 45 | 46 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 47 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 48 | 49 | # Use the maximum available, or set MAX_FD != -1 to use that value. 50 | MAX_FD="maximum" 51 | 52 | warn () { 53 | echo "$*" 54 | } 55 | 56 | die () { 57 | echo 58 | echo "$*" 59 | echo 60 | exit 1 61 | } 62 | 63 | # OS specific support (must be 'true' or 'false'). 64 | cygwin=false 65 | msys=false 66 | darwin=false 67 | nonstop=false 68 | case "`uname`" in 69 | CYGWIN* ) 70 | cygwin=true 71 | ;; 72 | Darwin* ) 73 | darwin=true 74 | ;; 75 | MINGW* ) 76 | msys=true 77 | ;; 78 | NONSTOP* ) 79 | nonstop=true 80 | ;; 81 | esac 82 | 83 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 84 | 85 | # Determine the Java command to use to start the JVM. 86 | if [ -n "$JAVA_HOME" ] ; then 87 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 88 | # IBM's JDK on AIX uses strange locations for the executables 89 | JAVACMD="$JAVA_HOME/jre/sh/java" 90 | else 91 | JAVACMD="$JAVA_HOME/bin/java" 92 | fi 93 | if [ ! -x "$JAVACMD" ] ; then 94 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 95 | 96 | Please set the JAVA_HOME variable in your environment to match the 97 | location of your Java installation." 98 | fi 99 | else 100 | JAVACMD="java" 101 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 102 | 103 | Please set the JAVA_HOME variable in your environment to match the 104 | location of your Java installation." 105 | fi 106 | 107 | # Increase the maximum file descriptors if we can. 108 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 109 | MAX_FD_LIMIT=`ulimit -H -n` 110 | if [ $? -eq 0 ] ; then 111 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 112 | MAX_FD="$MAX_FD_LIMIT" 113 | fi 114 | ulimit -n $MAX_FD 115 | if [ $? -ne 0 ] ; then 116 | warn "Could not set maximum file descriptor limit: $MAX_FD" 117 | fi 118 | else 119 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 120 | fi 121 | fi 122 | 123 | # For Darwin, add options to specify how the application appears in the dock 124 | if $darwin; then 125 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 126 | fi 127 | 128 | # For Cygwin or MSYS, switch paths to Windows format before running java 129 | if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then 130 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 131 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 132 | JAVACMD=`cygpath --unix "$JAVACMD"` 133 | 134 | # We build the pattern for arguments to be converted via cygpath 135 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 136 | SEP="" 137 | for dir in $ROOTDIRSRAW ; do 138 | ROOTDIRS="$ROOTDIRS$SEP$dir" 139 | SEP="|" 140 | done 141 | OURCYGPATTERN="(^($ROOTDIRS))" 142 | # Add a user-defined pattern to the cygpath arguments 143 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 144 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 145 | fi 146 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 147 | i=0 148 | for arg in "$@" ; do 149 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 150 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 151 | 152 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 153 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 154 | else 155 | eval `echo args$i`="\"$arg\"" 156 | fi 157 | i=`expr $i + 1` 158 | done 159 | case $i in 160 | 0) set -- ;; 161 | 1) set -- "$args0" ;; 162 | 2) set -- "$args0" "$args1" ;; 163 | 3) set -- "$args0" "$args1" "$args2" ;; 164 | 4) set -- "$args0" "$args1" "$args2" "$args3" ;; 165 | 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 166 | 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 167 | 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 168 | 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 169 | 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 170 | esac 171 | fi 172 | 173 | # Escape application args 174 | save () { 175 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 176 | echo " " 177 | } 178 | APP_ARGS=`save "$@"` 179 | 180 | # Collect all arguments for the java command, following the shell quoting and substitution rules 181 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 182 | 183 | exec "$JAVACMD" "$@" 184 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/lablatency/job/WindowingJobNoAggregation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Ververica GmbH 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.ververica.lablatency.job; 17 | 18 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 19 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 20 | import org.apache.flink.api.common.serialization.DeserializationSchema; 21 | import org.apache.flink.api.common.typeinfo.TypeInformation; 22 | import org.apache.flink.api.java.utils.ParameterTool; 23 | import org.apache.flink.configuration.Configuration; 24 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram; 25 | import org.apache.flink.streaming.api.datastream.DataStream; 26 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 27 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 28 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink; 29 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; 30 | import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows; 31 | import org.apache.flink.streaming.api.windowing.time.Time; 32 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 33 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 34 | import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; 35 | import org.apache.flink.util.Collector; 36 | 37 | import com.fasterxml.jackson.databind.ObjectMapper; 38 | import com.ververica.lablatency.event.Measurement; 39 | import com.ververica.lablatency.event.MeasurementRecord; 40 | import com.ververica.lablatency.event.WindowedMeasurement; 41 | import org.apache.kafka.clients.consumer.ConsumerRecord; 42 | import org.slf4j.Logger; 43 | import org.slf4j.LoggerFactory; 44 | 45 | import java.io.IOException; 46 | import java.time.Duration; 47 | import java.util.Properties; 48 | import java.util.concurrent.TimeUnit; 49 | 50 | import static org.apache.flink.api.java.typeutils.TypeExtractor.getForClass; 51 | 52 | /** WindowingJob without incremental aggregation. */ 53 | public class WindowingJobNoAggregation { 54 | private static final Logger LOG = LoggerFactory.getLogger(WindowingJobNoAggregation.class); 55 | 56 | public static void main(String[] args) throws Exception { 57 | 58 | ParameterTool params = ParameterTool.fromArgs(args); 59 | LOG.info("params: " + params.getProperties()); 60 | 61 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 62 | 63 | final String jobName = 64 | params.get("job-name", WindowingJobNoAggregation.class.getSimpleName()); 65 | final String kafkaAddress = params.get("kafka", "localhost:9092"); 66 | final String topic = params.get("topic", "lablatency"); 67 | final String group = params.get("group", "lablatency"); 68 | 69 | final int slideSize = params.getInt("slide-size", 10); 70 | final int outOfOrderness = params.getInt("out-of-orderness", 250); 71 | 72 | Properties kafkaConsumerProps = new Properties(); 73 | kafkaConsumerProps.setProperty("bootstrap.servers", kafkaAddress); 74 | kafkaConsumerProps.setProperty("group.id", group); 75 | FlinkKafkaConsumer consumer = 76 | new FlinkKafkaConsumer<>(topic, new KafkaDeSerSchema(), kafkaConsumerProps); 77 | // start from the latest message 78 | consumer.setStartFromLatest(); 79 | 80 | DataStream sourceStream = 81 | env.addSource(consumer) 82 | .name("KafkaSource") 83 | .uid("KafkaSource") 84 | .assignTimestampsAndWatermarks( 85 | WatermarkStrategy.forBoundedOutOfOrderness( 86 | Duration.ofMillis(outOfOrderness)) 87 | .withTimestampAssigner( 88 | (element, timestamp) -> element.getTimestamp()) 89 | .withIdleness(Duration.ofSeconds(1))) 90 | .name("Watermarks") 91 | .uid("Watermarks") 92 | .flatMap(new MeasurementDeserializer()) 93 | .name("Deserialization") 94 | .uid("Deserialization"); 95 | 96 | SingleOutputStreamOperator aggregatedPerLocation = 97 | sourceStream 98 | .keyBy(Measurement::getLocation) 99 | .window( 100 | SlidingEventTimeWindows.of( 101 | Time.of(1, TimeUnit.MINUTES), 102 | Time.of(slideSize, TimeUnit.SECONDS))) 103 | .process(new MeasurementProcessWindowFunction()) 104 | .name("MainOperator:Window") 105 | .uid("MainOperator:Window"); 106 | 107 | aggregatedPerLocation 108 | .addSink(new DiscardingSink<>()) 109 | .name("NormalOutput") 110 | .uid("NormalOutput") 111 | .disableChaining(); 112 | 113 | env.execute(jobName); 114 | } 115 | 116 | /** Get MeasurementRecord from Kafka ConsumerRecord. */ 117 | static class KafkaDeSerSchema implements KafkaDeserializationSchema { 118 | 119 | @Override 120 | public void open(DeserializationSchema.InitializationContext context) throws Exception {} 121 | 122 | @Override 123 | public boolean isEndOfStream(MeasurementRecord nextElement) { 124 | return false; 125 | } 126 | 127 | @Override 128 | public MeasurementRecord deserialize(ConsumerRecord record) 129 | throws Exception { 130 | return new MeasurementRecord( 131 | record.timestamp(), record.key(), record.value(), record.partition()); 132 | } 133 | 134 | @Override 135 | public TypeInformation getProducedType() { 136 | return getForClass(MeasurementRecord.class); 137 | } 138 | } 139 | 140 | /** Deserializes MeasurementRecord into Measurement. */ 141 | public static class MeasurementDeserializer 142 | extends RichFlatMapFunction { 143 | 144 | private static final long serialVersionUID = 1L; 145 | private static final Logger LOG = LoggerFactory.getLogger(MeasurementDeserializer.class); 146 | 147 | private ObjectMapper objectMapper; 148 | 149 | @Override 150 | public void open(final Configuration parameters) throws Exception { 151 | super.open(parameters); 152 | this.objectMapper = new ObjectMapper(); 153 | } 154 | 155 | @Override 156 | public void flatMap(final MeasurementRecord kafkaRecord, final Collector out) { 157 | final Measurement measurement; 158 | try { 159 | measurement = 160 | this.objectMapper.readValue(kafkaRecord.getValue(), Measurement.class); 161 | } catch (IOException e) { 162 | LOG.error("Failed to deserialize: " + e.getLocalizedMessage()); 163 | return; 164 | } 165 | out.collect(measurement); 166 | } 167 | } 168 | 169 | /** ProcessWindowFunction produces WindowedMeasurement. */ 170 | public static class MeasurementProcessWindowFunction 171 | extends ProcessWindowFunction { 172 | 173 | private static final long serialVersionUID = 1L; 174 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000; 175 | 176 | private transient DescriptiveStatisticsHistogram eventTimeLag; 177 | 178 | @Override 179 | public void open(Configuration parameters) throws Exception { 180 | super.open(parameters); 181 | 182 | eventTimeLag = 183 | getRuntimeContext() 184 | .getMetricGroup() 185 | .histogram( 186 | "eventTimeLag", 187 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE)); 188 | } 189 | 190 | @Override 191 | public void process( 192 | final String location, 193 | final Context context, 194 | final Iterable input, 195 | final Collector out) { 196 | 197 | WindowedMeasurement aggregate = new WindowedMeasurement(); 198 | for (Measurement record : input) { 199 | double result = record.getValue(); 200 | aggregate.addMeasurement(result); 201 | } 202 | final TimeWindow window = context.window(); 203 | aggregate.setWindow(window); 204 | aggregate.setLocation(location); 205 | 206 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd()); 207 | out.collect(aggregate); 208 | } 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/lablatency/job/WindowingJob.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Ververica GmbH 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.ververica.lablatency.job; 17 | 18 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 19 | import org.apache.flink.api.common.functions.AggregateFunction; 20 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 21 | import org.apache.flink.api.common.serialization.DeserializationSchema; 22 | import org.apache.flink.api.common.typeinfo.TypeInformation; 23 | import org.apache.flink.api.java.tuple.Tuple2; 24 | import org.apache.flink.api.java.utils.ParameterTool; 25 | import org.apache.flink.configuration.Configuration; 26 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram; 27 | import org.apache.flink.streaming.api.datastream.DataStream; 28 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 29 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 30 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink; 31 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; 32 | import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows; 33 | import org.apache.flink.streaming.api.windowing.time.Time; 34 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 35 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 36 | import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; 37 | import org.apache.flink.util.Collector; 38 | 39 | import com.fasterxml.jackson.databind.ObjectMapper; 40 | import com.ververica.lablatency.event.Measurement; 41 | import com.ververica.lablatency.event.MeasurementRecord; 42 | import com.ververica.lablatency.event.WindowedMeasurement; 43 | import org.apache.kafka.clients.consumer.ConsumerRecord; 44 | import org.slf4j.Logger; 45 | import org.slf4j.LoggerFactory; 46 | 47 | import java.io.IOException; 48 | import java.time.Duration; 49 | import java.util.Properties; 50 | import java.util.concurrent.TimeUnit; 51 | 52 | import static org.apache.flink.api.java.typeutils.TypeExtractor.getForClass; 53 | 54 | /** WindowingJob */ 55 | public class WindowingJob { 56 | private static final Logger LOG = LoggerFactory.getLogger(WindowingJob.class); 57 | 58 | public static void main(String[] args) throws Exception { 59 | 60 | ParameterTool params = ParameterTool.fromArgs(args); 61 | LOG.info("params: " + params.getProperties()); 62 | 63 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 64 | 65 | final String jobName = params.get("job-name", WindowingJob.class.getSimpleName()); 66 | final String kafkaAddress = params.get("kafka", "localhost:9092"); 67 | final String topic = params.get("topic", "lablatency"); 68 | final String group = params.get("group", "lablatency"); 69 | 70 | final int slideSize = params.getInt("slide-size", 10); 71 | final int outOfOrderness = params.getInt("out-of-orderness", 250); 72 | 73 | Properties kafkaConsumerProps = new Properties(); 74 | kafkaConsumerProps.setProperty("bootstrap.servers", kafkaAddress); 75 | kafkaConsumerProps.setProperty("group.id", group); 76 | FlinkKafkaConsumer consumer = 77 | new FlinkKafkaConsumer<>(topic, new KafkaDeSerSchema(), kafkaConsumerProps); 78 | // start from the latest message 79 | consumer.setStartFromLatest(); 80 | 81 | DataStream sourceStream = 82 | env.addSource(consumer) 83 | .name("KafkaSource") 84 | .uid("KafkaSource") 85 | .assignTimestampsAndWatermarks( 86 | WatermarkStrategy.forBoundedOutOfOrderness( 87 | Duration.ofMillis(outOfOrderness)) 88 | .withTimestampAssigner( 89 | (element, timestamp) -> element.getTimestamp()) 90 | .withIdleness(Duration.ofSeconds(1))) 91 | .name("Watermarks") 92 | .uid("Watermarks") 93 | .flatMap(new MeasurementDeserializer()) 94 | .name("Deserialization") 95 | .uid("Deserialization"); 96 | 97 | SingleOutputStreamOperator aggregatedPerLocation = 98 | sourceStream 99 | .keyBy(Measurement::getLocation) 100 | .window( 101 | SlidingEventTimeWindows.of( 102 | Time.of(1, TimeUnit.MINUTES), 103 | Time.of(slideSize, TimeUnit.SECONDS))) 104 | .aggregate( 105 | new MeasurementAggregateFunction(), 106 | new MeasurementProcessWindowFunction()) 107 | .name("MainOperator:Window") 108 | .uid("MainOperator:Window"); 109 | 110 | aggregatedPerLocation 111 | .addSink(new DiscardingSink<>()) 112 | .name("NormalOutput") 113 | .uid("NormalOutput") 114 | .disableChaining(); 115 | 116 | env.execute(jobName); 117 | } 118 | 119 | /** Get MeasurementRecord from Kafka ConsumerRecord. */ 120 | static class KafkaDeSerSchema implements KafkaDeserializationSchema { 121 | 122 | @Override 123 | public void open(DeserializationSchema.InitializationContext context) throws Exception {} 124 | 125 | @Override 126 | public boolean isEndOfStream(MeasurementRecord nextElement) { 127 | return false; 128 | } 129 | 130 | @Override 131 | public MeasurementRecord deserialize(ConsumerRecord record) 132 | throws Exception { 133 | return new MeasurementRecord( 134 | record.timestamp(), record.key(), record.value(), record.partition()); 135 | } 136 | 137 | @Override 138 | public TypeInformation getProducedType() { 139 | return getForClass(MeasurementRecord.class); 140 | } 141 | } 142 | 143 | /** Deserializes MeasurementRecord into Measurement. */ 144 | public static class MeasurementDeserializer 145 | extends RichFlatMapFunction { 146 | 147 | private static final long serialVersionUID = 1L; 148 | private static final Logger LOG = LoggerFactory.getLogger(MeasurementDeserializer.class); 149 | 150 | private ObjectMapper objectMapper; 151 | 152 | @Override 153 | public void open(final Configuration parameters) throws Exception { 154 | super.open(parameters); 155 | this.objectMapper = new ObjectMapper(); 156 | } 157 | 158 | @Override 159 | public void flatMap(final MeasurementRecord kafkaRecord, final Collector out) { 160 | final Measurement measurement; 161 | try { 162 | measurement = 163 | this.objectMapper.readValue(kafkaRecord.getValue(), Measurement.class); 164 | } catch (IOException e) { 165 | LOG.error("Failed to deserialize: " + e.getLocalizedMessage()); 166 | return; 167 | } 168 | out.collect(measurement); 169 | } 170 | } 171 | 172 | /** Incrementally aggregate measurements. */ 173 | public static class MeasurementAggregateFunction 174 | implements AggregateFunction, Tuple2> { 175 | 176 | @Override 177 | public Tuple2 createAccumulator() { 178 | return new Tuple2<>(0L, 0.0); 179 | } 180 | 181 | @Override 182 | public Tuple2 add(Measurement measurement, Tuple2 accumulator) { 183 | return new Tuple2<>(accumulator.f0 + 1, accumulator.f1 + measurement.getValue()); 184 | } 185 | 186 | @Override 187 | public Tuple2 getResult(Tuple2 accumulator) { 188 | return new Tuple2<>(accumulator.f0, accumulator.f1); 189 | } 190 | 191 | @Override 192 | public Tuple2 merge(Tuple2 a, Tuple2 b) { 193 | return new Tuple2<>(a.f0 + b.f0, a.f1 + b.f1); 194 | } 195 | } 196 | 197 | /** ProcessWindowFunction produces WindowedMeasurement. */ 198 | public static class MeasurementProcessWindowFunction 199 | extends ProcessWindowFunction< 200 | Tuple2, WindowedMeasurement, String, TimeWindow> { 201 | 202 | private static final long serialVersionUID = 1L; 203 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000; 204 | 205 | private transient DescriptiveStatisticsHistogram eventTimeLag; 206 | 207 | @Override 208 | public void open(Configuration parameters) throws Exception { 209 | super.open(parameters); 210 | 211 | eventTimeLag = 212 | getRuntimeContext() 213 | .getMetricGroup() 214 | .histogram( 215 | "eventTimeLag", 216 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE)); 217 | } 218 | 219 | @Override 220 | public void process( 221 | final String location, 222 | final Context context, 223 | final Iterable> input, 224 | final Collector out) { 225 | 226 | WindowedMeasurement windowedMeasurement = new WindowedMeasurement(); 227 | Tuple2 aggregated = input.iterator().next(); 228 | 229 | windowedMeasurement.setEventsPerWindow(aggregated.f0); 230 | windowedMeasurement.setSumPerWindow(aggregated.f1); 231 | 232 | final TimeWindow window = context.window(); 233 | windowedMeasurement.setWindow(window); 234 | windowedMeasurement.setLocation(location); 235 | 236 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd()); 237 | out.collect(windowedMeasurement); 238 | } 239 | } 240 | } 241 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/lablatency/job/EnrichingJobSync.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Ververica GmbH 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.ververica.lablatency.job; 17 | 18 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 19 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 20 | import org.apache.flink.api.common.serialization.DeserializationSchema; 21 | import org.apache.flink.api.common.typeinfo.TypeInformation; 22 | import org.apache.flink.api.java.tuple.Tuple2; 23 | import org.apache.flink.api.java.utils.ParameterTool; 24 | import org.apache.flink.configuration.Configuration; 25 | import org.apache.flink.metrics.Counter; 26 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram; 27 | import org.apache.flink.streaming.api.datastream.DataStream; 28 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 29 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction; 30 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink; 31 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 32 | import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; 33 | import org.apache.flink.util.Collector; 34 | 35 | import com.fasterxml.jackson.databind.ObjectMapper; 36 | import com.ververica.lablatency.event.EnrichedMeasurement; 37 | import com.ververica.lablatency.event.Measurement; 38 | import com.ververica.lablatency.event.MeasurementRecord; 39 | import org.apache.commons.lang3.RandomStringUtils; 40 | import org.apache.commons.lang3.RandomUtils; 41 | import org.apache.kafka.clients.consumer.ConsumerRecord; 42 | import org.slf4j.Logger; 43 | import org.slf4j.LoggerFactory; 44 | 45 | import java.io.IOException; 46 | import java.time.Duration; 47 | import java.util.HashMap; 48 | import java.util.Map; 49 | import java.util.Properties; 50 | import java.util.concurrent.CompletableFuture; 51 | import java.util.concurrent.ExecutorService; 52 | import java.util.concurrent.Executors; 53 | import java.util.concurrent.ThreadFactory; 54 | import java.util.function.Consumer; 55 | import java.util.function.Supplier; 56 | 57 | import static org.apache.flink.api.java.typeutils.TypeExtractor.getForClass; 58 | 59 | /** EnrichingJob: enrich measurements with location synchronously. */ 60 | public class EnrichingJobSync { 61 | private static final Logger LOG = LoggerFactory.getLogger(EnrichingJobSync.class); 62 | 63 | public static void main(String[] args) throws Exception { 64 | 65 | ParameterTool params = ParameterTool.fromArgs(args); 66 | LOG.info("params: " + params.getProperties()); 67 | 68 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 69 | 70 | final String jobName = params.get("job-name", EnrichingJobSync.class.getSimpleName()); 71 | final String kafkaAddress = params.get("kafka", "localhost:9092"); 72 | final String topic = params.get("topic", "lablatency"); 73 | final String group = params.get("group", "lablatency"); 74 | 75 | final int outOfOrderness = params.getInt("out-of-orderness", 250); 76 | final int responseTimeMin = params.getInt("response-time-min", 1); 77 | final int responseTimeMax = params.getInt("response-time-max", 6); 78 | final int cacheExpiryMs = params.getInt("cache-expiry-ms", 1000); 79 | 80 | Properties kafkaConsumerProps = new Properties(); 81 | kafkaConsumerProps.setProperty("bootstrap.servers", kafkaAddress); 82 | kafkaConsumerProps.setProperty("group.id", group); 83 | FlinkKafkaConsumer consumer = 84 | new FlinkKafkaConsumer<>(topic, new KafkaDeSerSchema(), kafkaConsumerProps); 85 | // start from the latest message 86 | consumer.setStartFromLatest(); 87 | 88 | DataStream> sourceStream = 89 | env.addSource(consumer) 90 | .name("KafkaSource") 91 | .uid("KafkaSource") 92 | .assignTimestampsAndWatermarks( 93 | WatermarkStrategy.forBoundedOutOfOrderness( 94 | Duration.ofMillis(outOfOrderness)) 95 | .withTimestampAssigner( 96 | (element, timestamp) -> element.getTimestamp()) 97 | .withIdleness(Duration.ofSeconds(1))) 98 | .name("Watermarks") 99 | .uid("Watermarks") 100 | .flatMap(new MeasurementDeserializer()) 101 | .name("Deserialization") 102 | .uid("Deserialization"); 103 | 104 | DataStream enrichedStream = 105 | sourceStream 106 | .keyBy(x -> x.f0.getLocation()) 107 | .process( 108 | new EnrichMeasurementWithLocationInfo( 109 | cacheExpiryMs, responseTimeMin, responseTimeMax)) 110 | .name("MainOperator:Enrich"); 111 | enrichedStream 112 | .addSink(new DiscardingSink<>()) 113 | .name("NormalOutput") 114 | .uid("NormalOutput") 115 | .disableChaining(); 116 | 117 | env.execute(jobName); 118 | } 119 | 120 | /** Get MeasurementRecord from Kafka ConsumerRecord. */ 121 | static class KafkaDeSerSchema implements KafkaDeserializationSchema { 122 | 123 | @Override 124 | public void open(DeserializationSchema.InitializationContext context) throws Exception {} 125 | 126 | @Override 127 | public boolean isEndOfStream(MeasurementRecord nextElement) { 128 | return false; 129 | } 130 | 131 | @Override 132 | public MeasurementRecord deserialize(ConsumerRecord record) 133 | throws Exception { 134 | return new MeasurementRecord( 135 | record.timestamp(), record.key(), record.value(), record.partition()); 136 | } 137 | 138 | @Override 139 | public TypeInformation getProducedType() { 140 | return getForClass(MeasurementRecord.class); 141 | } 142 | } 143 | 144 | /** Deserializes MeasurementRecord into Measurement. */ 145 | public static class MeasurementDeserializer 146 | extends RichFlatMapFunction> { 147 | 148 | private static final long serialVersionUID = 1L; 149 | private static final Logger LOG = LoggerFactory.getLogger(MeasurementDeserializer.class); 150 | 151 | private ObjectMapper objectMapper; 152 | 153 | @Override 154 | public void open(final Configuration parameters) throws Exception { 155 | super.open(parameters); 156 | this.objectMapper = new ObjectMapper(); 157 | } 158 | 159 | @Override 160 | public void flatMap( 161 | final MeasurementRecord kafkaRecord, 162 | final Collector> out) { 163 | final Measurement measurement; 164 | try { 165 | measurement = 166 | this.objectMapper.readValue(kafkaRecord.getValue(), Measurement.class); 167 | } catch (IOException e) { 168 | LOG.error("Failed to deserialize: " + e.getLocalizedMessage()); 169 | return; 170 | } 171 | out.collect(new Tuple2<>(measurement, kafkaRecord.getTimestamp())); 172 | } 173 | } 174 | 175 | public static class EnrichMeasurementWithLocationInfo 176 | extends KeyedProcessFunction, EnrichedMeasurement> { 177 | private static final long serialVersionUID = 1L; 178 | 179 | private transient LocationInfoServiceClient locationInfoServiceClient; 180 | private transient Map> cache; 181 | 182 | private static final int PROCESSING_TIME_DELAY_WINDOW_SIZE = 10_000; 183 | private transient DescriptiveStatisticsHistogram processingTimeDelay; 184 | 185 | private final int cacheExpiryMs; 186 | private Counter cacheSizeMetric; 187 | private Counter servedFromCacheMetric; 188 | private final int responseTimeMin; 189 | private final int responseTimeMax; 190 | 191 | /** 192 | * Creates a new enrichment function with a (local) cache that expires after the given 193 | * number of milliseconds. 194 | */ 195 | public EnrichMeasurementWithLocationInfo( 196 | int cacheExpiryMs, int responseTimeMin, int responseTimeMax) { 197 | this.cacheExpiryMs = cacheExpiryMs; 198 | this.responseTimeMin = responseTimeMin; 199 | this.responseTimeMax = responseTimeMax; 200 | } 201 | 202 | @Override 203 | public void open(final Configuration parameters) { 204 | locationInfoServiceClient = 205 | new LocationInfoServiceClient(this.responseTimeMin, this.responseTimeMax); 206 | processingTimeDelay = 207 | getRuntimeContext() 208 | .getMetricGroup() 209 | .histogram( 210 | "processingTimeDelay", 211 | new DescriptiveStatisticsHistogram( 212 | PROCESSING_TIME_DELAY_WINDOW_SIZE)); 213 | cache = new HashMap<>(); 214 | servedFromCacheMetric = getRuntimeContext().getMetricGroup().counter("servedFromCache"); 215 | cacheSizeMetric = getRuntimeContext().getMetricGroup().counter("cacheSize"); 216 | } 217 | 218 | @Override 219 | public void processElement( 220 | Tuple2 measurement, 221 | Context ctx, 222 | Collector out) 223 | throws Exception { 224 | 225 | String location = measurement.f0.getLocation(); 226 | final String locationInfo; 227 | 228 | Tuple2 cachedLocationInfo = cache.get(location); 229 | if (cachedLocationInfo != null 230 | && System.currentTimeMillis() - cachedLocationInfo.f0 <= cacheExpiryMs) { 231 | locationInfo = cachedLocationInfo.f1; 232 | servedFromCacheMetric.inc(); 233 | } else { 234 | locationInfo = locationInfoServiceClient.getLocationInfo(location); 235 | if (cache.put(location, new Tuple2<>(System.currentTimeMillis(), locationInfo)) 236 | == null) { 237 | cacheSizeMetric.inc(); 238 | } 239 | } 240 | processingTimeDelay.update(System.currentTimeMillis() - ctx.timestamp()); 241 | 242 | out.collect(new EnrichedMeasurement(measurement.f0, locationInfo)); 243 | } 244 | } 245 | 246 | /** Location service client. */ 247 | public static class LocationInfoServiceClient { 248 | private static final int LEN_OF_INFO = 100; 249 | private static final ExecutorService pool = 250 | Executors.newFixedThreadPool( 251 | 30, 252 | new ThreadFactory() { 253 | private final ThreadFactory threadFactory = 254 | Executors.defaultThreadFactory(); 255 | 256 | @Override 257 | public Thread newThread(Runnable r) { 258 | Thread thread = threadFactory.newThread(r); 259 | thread.setName("location-service-client-" + thread.getName()); 260 | return thread; 261 | } 262 | }); 263 | private final int responseTimeMin; 264 | private final int responseTimeMax; 265 | 266 | /** 267 | * Creates a new enrichment function with a (local) cache that expires after the given 268 | * number of milliseconds. 269 | */ 270 | public LocationInfoServiceClient(int responseTimeMin, int responseTimeMax) { 271 | this.responseTimeMin = responseTimeMin; 272 | this.responseTimeMax = responseTimeMax; 273 | } 274 | /** Gets the info for the given location. */ 275 | public String getLocationInfo(String location) { 276 | return new LocationInfoSupplier().get(); 277 | } 278 | 279 | /** Asynchronous getter for the info for the given location. */ 280 | public void asyncGetLocationInfo(String location, Consumer callback) { 281 | CompletableFuture.supplyAsync(new LocationInfoSupplier(), pool) 282 | .thenAcceptAsync( 283 | callback, 284 | org.apache.flink.runtime.concurrent.Executors.directExecutor()); 285 | } 286 | 287 | private class LocationInfoSupplier implements Supplier { 288 | @Override 289 | public String get() { 290 | try { 291 | Thread.sleep(RandomUtils.nextInt(responseTimeMin, responseTimeMax)); 292 | } catch (InterruptedException e) { 293 | // Swallowing interruption here 294 | } 295 | return RandomStringUtils.randomAlphabetic(LEN_OF_INFO); 296 | } 297 | } 298 | } 299 | } 300 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/lablatency/job/EnrichingJobAsync.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Ververica GmbH 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.ververica.lablatency.job; 17 | 18 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 19 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 20 | import org.apache.flink.api.common.serialization.DeserializationSchema; 21 | import org.apache.flink.api.common.typeinfo.TypeInformation; 22 | import org.apache.flink.api.java.tuple.Tuple2; 23 | import org.apache.flink.api.java.utils.ParameterTool; 24 | import org.apache.flink.configuration.Configuration; 25 | import org.apache.flink.metrics.Counter; 26 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram; 27 | import org.apache.flink.streaming.api.datastream.AsyncDataStream; 28 | import org.apache.flink.streaming.api.datastream.DataStream; 29 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 30 | import org.apache.flink.streaming.api.functions.async.ResultFuture; 31 | import org.apache.flink.streaming.api.functions.async.RichAsyncFunction; 32 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink; 33 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 34 | import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; 35 | import org.apache.flink.util.Collector; 36 | 37 | import com.fasterxml.jackson.databind.ObjectMapper; 38 | import com.ververica.lablatency.event.EnrichedMeasurement; 39 | import com.ververica.lablatency.event.Measurement; 40 | import com.ververica.lablatency.event.MeasurementRecord; 41 | import org.apache.commons.lang3.RandomStringUtils; 42 | import org.apache.commons.lang3.RandomUtils; 43 | import org.apache.kafka.clients.consumer.ConsumerRecord; 44 | import org.slf4j.Logger; 45 | import org.slf4j.LoggerFactory; 46 | 47 | import java.io.IOException; 48 | import java.time.Duration; 49 | import java.util.Collections; 50 | import java.util.HashMap; 51 | import java.util.Map; 52 | import java.util.Properties; 53 | import java.util.concurrent.CompletableFuture; 54 | import java.util.concurrent.ExecutorService; 55 | import java.util.concurrent.Executors; 56 | import java.util.concurrent.ThreadFactory; 57 | import java.util.concurrent.TimeUnit; 58 | import java.util.function.Consumer; 59 | import java.util.function.Supplier; 60 | 61 | import static org.apache.flink.api.java.typeutils.TypeExtractor.getForClass; 62 | 63 | /** EnrichingJob: enrich measurements with location asynchronously. */ 64 | public class EnrichingJobAsync { 65 | private static final Logger LOG = LoggerFactory.getLogger(EnrichingJobAsync.class); 66 | 67 | public static void main(String[] args) throws Exception { 68 | 69 | ParameterTool params = ParameterTool.fromArgs(args); 70 | LOG.info("params: " + params.getProperties()); 71 | 72 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 73 | 74 | final String jobName = params.get("job-name", EnrichingJobAsync.class.getSimpleName()); 75 | final String kafkaAddress = params.get("kafka", "localhost:9092"); 76 | final String topic = params.get("topic", "lablatency"); 77 | final String group = params.get("group", "lablatency"); 78 | 79 | final int outOfOrderness = params.getInt("out-of-orderness", 250); 80 | final int responseTimeMin = params.getInt("response-time-min", 1); 81 | final int responseTimeMax = params.getInt("response-time-max", 6); 82 | final int cacheExpiryMs = params.getInt("cache-expiry-ms", 1000); 83 | 84 | Properties kafkaConsumerProps = new Properties(); 85 | kafkaConsumerProps.setProperty("bootstrap.servers", kafkaAddress); 86 | kafkaConsumerProps.setProperty("group.id", group); 87 | FlinkKafkaConsumer consumer = 88 | new FlinkKafkaConsumer<>(topic, new KafkaDeSerSchema(), kafkaConsumerProps); 89 | // start from the latest message 90 | consumer.setStartFromLatest(); 91 | 92 | DataStream> sourceStream = 93 | env.addSource(consumer) 94 | .name("KafkaSource") 95 | .uid("KafkaSource") 96 | .assignTimestampsAndWatermarks( 97 | WatermarkStrategy.forBoundedOutOfOrderness( 98 | Duration.ofMillis(outOfOrderness)) 99 | .withTimestampAssigner( 100 | (element, timestamp) -> element.getTimestamp()) 101 | .withIdleness(Duration.ofSeconds(1))) 102 | .name("Watermarks") 103 | .uid("Watermarks") 104 | .flatMap(new MeasurementDeserializer()) 105 | .name("Deserialization") 106 | .uid("Deserialization"); 107 | 108 | DataStream enrichedStream = 109 | AsyncDataStream.unorderedWait( 110 | sourceStream.keyBy(x -> x.f0.getLocation()), 111 | new EnrichMeasurementWithLocationInfoAsync( 112 | cacheExpiryMs, responseTimeMin, responseTimeMax), 113 | 0, 114 | TimeUnit.MILLISECONDS, 115 | 30) 116 | .name("MainOperator:Enrich"); 117 | enrichedStream 118 | .addSink(new DiscardingSink<>()) 119 | .name("NormalOutput") 120 | .uid("NormalOutput") 121 | .disableChaining(); 122 | 123 | env.execute(jobName); 124 | } 125 | 126 | /** Get MeasurementRecord from Kafka ConsumerRecord. */ 127 | static class KafkaDeSerSchema implements KafkaDeserializationSchema { 128 | 129 | @Override 130 | public void open(DeserializationSchema.InitializationContext context) throws Exception {} 131 | 132 | @Override 133 | public boolean isEndOfStream(MeasurementRecord nextElement) { 134 | return false; 135 | } 136 | 137 | @Override 138 | public MeasurementRecord deserialize(ConsumerRecord record) 139 | throws Exception { 140 | return new MeasurementRecord( 141 | record.timestamp(), record.key(), record.value(), record.partition()); 142 | } 143 | 144 | @Override 145 | public TypeInformation getProducedType() { 146 | return getForClass(MeasurementRecord.class); 147 | } 148 | } 149 | 150 | /** Deserializes MeasurementRecord into Measurement. */ 151 | public static class MeasurementDeserializer 152 | extends RichFlatMapFunction> { 153 | 154 | private static final long serialVersionUID = 1L; 155 | private static final Logger LOG = LoggerFactory.getLogger(MeasurementDeserializer.class); 156 | 157 | private ObjectMapper objectMapper; 158 | 159 | @Override 160 | public void open(final Configuration parameters) throws Exception { 161 | super.open(parameters); 162 | this.objectMapper = new ObjectMapper(); 163 | } 164 | 165 | @Override 166 | public void flatMap( 167 | final MeasurementRecord kafkaRecord, 168 | final Collector> out) { 169 | final Measurement measurement; 170 | try { 171 | measurement = 172 | this.objectMapper.readValue(kafkaRecord.getValue(), Measurement.class); 173 | } catch (IOException e) { 174 | LOG.error("Failed to deserialize: " + e.getLocalizedMessage()); 175 | return; 176 | } 177 | out.collect(new Tuple2<>(measurement, kafkaRecord.getTimestamp())); 178 | } 179 | } 180 | 181 | /** Enrich measurement with location asynchronously. */ 182 | public static class EnrichMeasurementWithLocationInfoAsync 183 | extends RichAsyncFunction, EnrichedMeasurement> { 184 | private static final long serialVersionUID = 2L; 185 | 186 | private transient LocationInfoServiceClient locationInfoServiceClient; 187 | private transient Map> cache; 188 | 189 | private static final int PROCESSING_TIME_DELAY_WINDOW_SIZE = 10_000; 190 | private transient DescriptiveStatisticsHistogram processingTimeDelay; 191 | 192 | private final int cacheExpiryMs; 193 | private Counter cacheSizeMetric; 194 | private Counter servedFromCacheMetric; 195 | private final int responseTimeMin; 196 | private final int responseTimeMax; 197 | 198 | /** 199 | * Creates a new enrichment function with a (local) cache that expires after the given 200 | * number of milliseconds. 201 | */ 202 | public EnrichMeasurementWithLocationInfoAsync( 203 | int cacheExpiryMs, int responseTimeMin, int responseTimeMax) { 204 | this.cacheExpiryMs = cacheExpiryMs; 205 | this.responseTimeMin = responseTimeMin; 206 | this.responseTimeMax = responseTimeMax; 207 | } 208 | 209 | @Override 210 | public void open(final Configuration parameters) { 211 | locationInfoServiceClient = 212 | new LocationInfoServiceClient(this.responseTimeMin, this.responseTimeMax); 213 | processingTimeDelay = 214 | getRuntimeContext() 215 | .getMetricGroup() 216 | .histogram( 217 | "processingTimeDelay", 218 | new DescriptiveStatisticsHistogram( 219 | PROCESSING_TIME_DELAY_WINDOW_SIZE)); 220 | cache = new HashMap<>(); 221 | servedFromCacheMetric = getRuntimeContext().getMetricGroup().counter("servedFromCache"); 222 | cacheSizeMetric = getRuntimeContext().getMetricGroup().counter("cacheSize"); 223 | } 224 | 225 | @Override 226 | public void asyncInvoke( 227 | Tuple2 measurement, 228 | ResultFuture resultFuture) { 229 | String location = measurement.f0.getLocation(); 230 | final String locationInfo; 231 | 232 | Tuple2 cachedLocationInfo = cache.get(location); 233 | if (cachedLocationInfo != null 234 | && System.currentTimeMillis() - cachedLocationInfo.f0 <= cacheExpiryMs) { 235 | locationInfo = cachedLocationInfo.f1; 236 | EnrichedMeasurement enrichedMeasurement = 237 | new EnrichedMeasurement(measurement.f0, locationInfo); 238 | resultFuture.complete(Collections.singleton(enrichedMeasurement)); 239 | servedFromCacheMetric.inc(); 240 | } else { 241 | locationInfoServiceClient.asyncGetLocationInfo( 242 | measurement.f0.getLocation(), 243 | new LocationServiceCallBack(resultFuture, measurement, location)); 244 | } 245 | } 246 | 247 | private class LocationServiceCallBack implements Consumer { 248 | private final ResultFuture resultFuture; 249 | private final Tuple2 measurement; 250 | private final String location; 251 | 252 | public LocationServiceCallBack( 253 | final ResultFuture resultFuture, 254 | final Tuple2 measurement, 255 | final String location) { 256 | this.resultFuture = resultFuture; 257 | this.measurement = measurement; 258 | this.location = location; 259 | } 260 | 261 | @Override 262 | public void accept(final String locationInfo) { 263 | EnrichedMeasurement enrichedMeasurement = 264 | new EnrichedMeasurement(measurement.f0, locationInfo); 265 | resultFuture.complete(Collections.singleton(enrichedMeasurement)); 266 | 267 | processingTimeDelay.update(System.currentTimeMillis() - measurement.f1); 268 | 269 | if (cache.put(location, new Tuple2<>(System.currentTimeMillis(), locationInfo)) 270 | == null) { 271 | cacheSizeMetric.inc(); 272 | } 273 | } 274 | } 275 | } 276 | 277 | /** Location service client. */ 278 | public static class LocationInfoServiceClient { 279 | private static final int LEN_OF_INFO = 100; 280 | private static final ExecutorService pool = 281 | Executors.newFixedThreadPool( 282 | 30, 283 | new ThreadFactory() { 284 | private final ThreadFactory threadFactory = 285 | Executors.defaultThreadFactory(); 286 | 287 | @Override 288 | public Thread newThread(Runnable r) { 289 | Thread thread = threadFactory.newThread(r); 290 | thread.setName("location-service-client-" + thread.getName()); 291 | return thread; 292 | } 293 | }); 294 | private final int responseTimeMin; 295 | private final int responseTimeMax; 296 | 297 | /** 298 | * Creates a new enrichment function with a (local) cache that expires after the given 299 | * number of milliseconds. 300 | */ 301 | public LocationInfoServiceClient(int responseTimeMin, int responseTimeMax) { 302 | this.responseTimeMin = responseTimeMin; 303 | this.responseTimeMax = responseTimeMax; 304 | } 305 | /** Gets the info for the given location. */ 306 | public String getLocationInfo(String location) { 307 | return new LocationInfoSupplier().get(); 308 | } 309 | 310 | /** Asynchronous getter for the info for the given location. */ 311 | public void asyncGetLocationInfo(String location, Consumer callback) { 312 | CompletableFuture.supplyAsync(new LocationInfoSupplier(), pool) 313 | .thenAcceptAsync( 314 | callback, 315 | org.apache.flink.runtime.concurrent.Executors.directExecutor()); 316 | } 317 | 318 | private class LocationInfoSupplier implements Supplier { 319 | @Override 320 | public String get() { 321 | try { 322 | Thread.sleep(RandomUtils.nextInt(responseTimeMin, responseTimeMax)); 323 | } catch (InterruptedException e) { 324 | // Swallowing interruption here 325 | } 326 | return RandomStringUtils.randomAlphabetic(LEN_OF_INFO); 327 | } 328 | } 329 | } 330 | } 331 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/lablatency/job/SortingJobPerEventTimer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Ververica GmbH 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.ververica.lablatency.job; 17 | 18 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 19 | import org.apache.flink.api.common.functions.MapFunction; 20 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 21 | import org.apache.flink.api.common.serialization.DeserializationSchema; 22 | import org.apache.flink.api.common.state.ListState; 23 | import org.apache.flink.api.common.state.ListStateDescriptor; 24 | import org.apache.flink.api.common.state.ValueState; 25 | import org.apache.flink.api.common.state.ValueStateDescriptor; 26 | import org.apache.flink.api.common.typeinfo.TypeHint; 27 | import org.apache.flink.api.common.typeinfo.TypeInformation; 28 | import org.apache.flink.api.common.typeinfo.Types; 29 | import org.apache.flink.api.java.tuple.Tuple2; 30 | import org.apache.flink.api.java.tuple.Tuple3; 31 | import org.apache.flink.api.java.utils.ParameterTool; 32 | import org.apache.flink.configuration.Configuration; 33 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram; 34 | import org.apache.flink.streaming.api.TimerService; 35 | import org.apache.flink.streaming.api.datastream.DataStream; 36 | import org.apache.flink.streaming.api.datastream.DataStreamUtils; 37 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 38 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction; 39 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink; 40 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 41 | import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; 42 | import org.apache.flink.util.Collector; 43 | 44 | import com.fasterxml.jackson.databind.ObjectMapper; 45 | import com.ververica.lablatency.event.Measurement; 46 | import com.ververica.lablatency.event.MeasurementRecord; 47 | import org.apache.kafka.clients.consumer.ConsumerRecord; 48 | import org.slf4j.Logger; 49 | import org.slf4j.LoggerFactory; 50 | 51 | import java.io.IOException; 52 | import java.time.Duration; 53 | import java.util.ArrayList; 54 | import java.util.HashMap; 55 | import java.util.Map; 56 | import java.util.Properties; 57 | 58 | import static org.apache.flink.api.java.typeutils.TypeExtractor.getForClass; 59 | 60 | /** SortingJob with per event timers. */ 61 | public class SortingJobPerEventTimer { 62 | private static final Logger LOG = LoggerFactory.getLogger(SortingJobPerEventTimer.class); 63 | 64 | public static void main(String[] args) throws Exception { 65 | 66 | ParameterTool params = ParameterTool.fromArgs(args); 67 | LOG.info("params: " + params.getProperties()); 68 | 69 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 70 | 71 | final String jobName = 72 | params.get("job-name", SortingJobPerEventTimer.class.getSimpleName()); 73 | final String kafkaAddress = params.get("kafka", "localhost:9092"); 74 | final String topic = params.get("topic", "lablatency"); 75 | final String group = params.get("group", "lablatency"); 76 | 77 | final int outOfOrderness = params.getInt("out-of-orderness", 250); 78 | 79 | final boolean useOneMapper = params.getBoolean("use-one-mapper", true); 80 | final boolean forceKryo = params.getBoolean("force-kryo", false); 81 | 82 | if (forceKryo) { 83 | env.getConfig().enableForceKryo(); 84 | } 85 | 86 | Properties kafkaConsumerProps = new Properties(); 87 | kafkaConsumerProps.setProperty("bootstrap.servers", kafkaAddress); 88 | kafkaConsumerProps.setProperty("group.id", group); 89 | FlinkKafkaConsumer consumer = 90 | new FlinkKafkaConsumer<>(topic, new KafkaDeSerSchema(), kafkaConsumerProps); 91 | // start from the latest message 92 | consumer.setStartFromLatest(); 93 | 94 | DataStream> sourceStream = 95 | env.addSource(consumer) 96 | .name("KafkaSource") 97 | .uid("KafkaSource") 98 | .assignTimestampsAndWatermarks( 99 | WatermarkStrategy.forBoundedOutOfOrderness( 100 | Duration.ofMillis(outOfOrderness)) 101 | .withTimestampAssigner( 102 | (element, timestamp) -> element.getTimestamp()) 103 | .withIdleness(Duration.ofSeconds(1))) 104 | .name("Watermarks") 105 | .uid("Watermarks") 106 | .flatMap( 107 | useOneMapper 108 | ? new MeasurementDeserializerOneGlobalMapper() 109 | : new MeasurementDeserializerOneMapperPerEvent()) 110 | .name("Deserialization") 111 | .uid("Deserialization"); 112 | 113 | DataStream> sortedStream = 114 | sourceStream 115 | .keyBy(x -> x.f0.getSensorId()) 116 | .process(new SortFunction()) 117 | .name("MainOperator:Sort") 118 | .uid("MainOperator:Sort"); 119 | 120 | DataStreamUtils.reinterpretAsKeyedStream( 121 | sortedStream 122 | .map(new FixSensorsFunction(Tuple2.of("Berlin", 1.0))) 123 | .name("Fix defective sensors") 124 | .uid("Fix defective sensors"), 125 | x -> x.f0.getSensorId()) 126 | .process(new MovingAverageSensors()) 127 | .addSink(new DiscardingSink<>()) 128 | .name("NormalOutput") 129 | .uid("NormalOutput") 130 | .disableChaining(); 131 | 132 | env.execute(jobName); 133 | } 134 | 135 | /** Get MeasurementRecord from Kafka ConsumerRecord. */ 136 | static class KafkaDeSerSchema implements KafkaDeserializationSchema { 137 | 138 | @Override 139 | public void open(DeserializationSchema.InitializationContext context) throws Exception {} 140 | 141 | @Override 142 | public boolean isEndOfStream(MeasurementRecord nextElement) { 143 | return false; 144 | } 145 | 146 | @Override 147 | public MeasurementRecord deserialize(ConsumerRecord record) 148 | throws Exception { 149 | return new MeasurementRecord( 150 | record.timestamp(), record.key(), record.value(), record.partition()); 151 | } 152 | 153 | @Override 154 | public TypeInformation getProducedType() { 155 | return getForClass(MeasurementRecord.class); 156 | } 157 | } 158 | 159 | /** Deserializes MeasurementRecord into Measurement: create one ObjectMapper per event */ 160 | public static class MeasurementDeserializerOneMapperPerEvent 161 | extends RichFlatMapFunction> { 162 | 163 | private static final long serialVersionUID = 1L; 164 | private static final Logger LOG = 165 | LoggerFactory.getLogger(MeasurementDeserializerOneMapperPerEvent.class); 166 | 167 | @Override 168 | public void open(final Configuration parameters) throws Exception { 169 | super.open(parameters); 170 | } 171 | 172 | @Override 173 | public void flatMap( 174 | final MeasurementRecord kafkaRecord, 175 | final Collector> out) { 176 | final Measurement measurement; 177 | try { 178 | measurement = 179 | new ObjectMapper().readValue(kafkaRecord.getValue(), Measurement.class); 180 | } catch (IOException e) { 181 | LOG.error("Failed to deserialize: " + e.getLocalizedMessage()); 182 | return; 183 | } 184 | out.collect(Tuple2.of(measurement, kafkaRecord.getTimestamp())); 185 | } 186 | } 187 | 188 | /** 189 | * Deserializes MeasurementRecord into Measurement: create one global ObjectMapper per operator 190 | * instance 191 | */ 192 | public static class MeasurementDeserializerOneGlobalMapper 193 | extends RichFlatMapFunction> { 194 | 195 | private static final long serialVersionUID = 1L; 196 | private static final Logger LOG = 197 | LoggerFactory.getLogger(MeasurementDeserializerOneGlobalMapper.class); 198 | 199 | private ObjectMapper objectMapper; 200 | 201 | @Override 202 | public void open(final Configuration parameters) throws Exception { 203 | super.open(parameters); 204 | this.objectMapper = new ObjectMapper(); 205 | } 206 | 207 | @Override 208 | public void flatMap( 209 | final MeasurementRecord kafkaRecord, 210 | final Collector> out) { 211 | final Measurement measurement; 212 | try { 213 | measurement = 214 | this.objectMapper.readValue(kafkaRecord.getValue(), Measurement.class); 215 | } catch (IOException e) { 216 | LOG.error("Failed to deserialize: " + e.getLocalizedMessage()); 217 | return; 218 | } 219 | out.collect(Tuple2.of(measurement, kafkaRecord.getTimestamp())); 220 | } 221 | } 222 | 223 | /** SortFunction without timer coealescing. */ 224 | public static class SortFunction 225 | extends KeyedProcessFunction< 226 | Integer, Tuple2, Tuple2> { 227 | 228 | private ListState> listState; 229 | 230 | private transient DescriptiveStatisticsHistogram eventTimeLag; 231 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000; 232 | 233 | @Override 234 | public void open(Configuration parameters) throws Exception { 235 | super.open(parameters); 236 | 237 | ListStateDescriptor> desc = 238 | new ListStateDescriptor<>( 239 | "events", 240 | TypeInformation.of(new TypeHint>() {})); 241 | listState = getRuntimeContext().getListState(desc); 242 | 243 | eventTimeLag = 244 | getRuntimeContext() 245 | .getMetricGroup() 246 | .histogram( 247 | "eventTimeLag", 248 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE)); 249 | } 250 | 251 | @Override 252 | public void processElement( 253 | Tuple2 value, 254 | Context ctx, 255 | Collector> out) 256 | throws Exception { 257 | 258 | TimerService timerService = ctx.timerService(); 259 | long currentTimestamp = ctx.timestamp(); 260 | 261 | if (currentTimestamp > timerService.currentWatermark()) { 262 | listState.add(value); 263 | timerService.registerEventTimeTimer(currentTimestamp); 264 | } 265 | } 266 | 267 | @Override 268 | public void onTimer( 269 | long timestamp, OnTimerContext ctx, Collector> out) 270 | throws Exception { 271 | 272 | ArrayList> list = new ArrayList<>(); 273 | listState 274 | .get() 275 | .forEach( 276 | event -> { 277 | // we do not emit all events earlier than watermark because 278 | // otherwise 279 | // those emitted events will all have the same timestamp as this 280 | // timer 281 | if (event.f1 == timestamp) { 282 | eventTimeLag.update(System.currentTimeMillis() - timestamp); 283 | out.collect(event); 284 | } else { 285 | list.add(event); 286 | } 287 | }); 288 | listState.update(list); 289 | } 290 | } 291 | 292 | /** 293 | * Implements an exponentially moving average with a coefficient of 0.5, i.e. 294 | * 295 | *
    296 | *
  • avg[0] = value[0] (not forwarded to the next stream) 297 | *
  • avg[i] = avg[i-1] * 0.5 + value[i] * 0.5 (for i > 0) 298 | *
299 | * 300 | *

See 301 | * https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average 302 | */ 303 | private static class MovingAverageSensors 304 | extends KeyedProcessFunction< 305 | Integer, Tuple2, Tuple3> { 306 | private static final long serialVersionUID = 1L; 307 | 308 | private transient ValueState movingAverage; 309 | 310 | @Override 311 | public void open(Configuration parameters) throws Exception { 312 | super.open(parameters); 313 | movingAverage = 314 | getRuntimeContext() 315 | .getState(new ValueStateDescriptor<>("movingAverage", Types.DOUBLE)); 316 | } 317 | 318 | @Override 319 | public void processElement( 320 | Tuple2 value, 321 | KeyedProcessFunction< 322 | Integer, 323 | Tuple2, 324 | Tuple3> 325 | .Context 326 | ctx, 327 | Collector> out) 328 | throws Exception { 329 | 330 | Double last = movingAverage.value(); 331 | if (last != null) { 332 | last = (last + value.f0.getValue()) / 2.0; 333 | movingAverage.update(last); 334 | 335 | // do not forward the first value (it only stands alone) 336 | out.collect(Tuple3.of(ctx.getCurrentKey(), last, ctx.timestamp())); 337 | } else { 338 | movingAverage.update(value.f0.getValue()); 339 | } 340 | } 341 | } 342 | 343 | private static class FixSensorsFunction 344 | implements MapFunction, Tuple2> { 345 | 346 | private final Map locationCorrections = new HashMap<>(); 347 | 348 | @SafeVarargs 349 | public FixSensorsFunction(Tuple2... locationCorrections) { 350 | for (Tuple2 locationCorrection : locationCorrections) { 351 | this.locationCorrections.put(locationCorrection.f0, locationCorrection.f1); 352 | } 353 | } 354 | 355 | @Override 356 | public Tuple2 map(Tuple2 value) throws Exception { 357 | if (locationCorrections.containsKey(value.f0.getLocation())) { 358 | return Tuple2.of( 359 | new Measurement( 360 | value.f0.getSensorId(), 361 | value.f0.getValue() 362 | + locationCorrections.get(value.f0.getLocation()), 363 | value.f0.getLocation(), 364 | value.f0.getMeasurementInformation()), 365 | value.f1); 366 | } else { 367 | return value; 368 | } 369 | } 370 | } 371 | } 372 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/lablatency/job/SortingJobCoalescedTimer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Ververica GmbH 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.ververica.lablatency.job; 17 | 18 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 19 | import org.apache.flink.api.common.functions.MapFunction; 20 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 21 | import org.apache.flink.api.common.functions.RichMapFunction; 22 | import org.apache.flink.api.common.serialization.DeserializationSchema; 23 | import org.apache.flink.api.common.state.ListState; 24 | import org.apache.flink.api.common.state.ListStateDescriptor; 25 | import org.apache.flink.api.common.state.ValueState; 26 | import org.apache.flink.api.common.state.ValueStateDescriptor; 27 | import org.apache.flink.api.common.typeinfo.TypeHint; 28 | import org.apache.flink.api.common.typeinfo.TypeInformation; 29 | import org.apache.flink.api.common.typeinfo.Types; 30 | import org.apache.flink.api.java.tuple.Tuple2; 31 | import org.apache.flink.api.java.tuple.Tuple3; 32 | import org.apache.flink.api.java.utils.ParameterTool; 33 | import org.apache.flink.configuration.Configuration; 34 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram; 35 | import org.apache.flink.streaming.api.TimerService; 36 | import org.apache.flink.streaming.api.datastream.DataStream; 37 | import org.apache.flink.streaming.api.datastream.DataStreamUtils; 38 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 39 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction; 40 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink; 41 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 42 | import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; 43 | import org.apache.flink.util.Collector; 44 | 45 | import com.fasterxml.jackson.databind.ObjectMapper; 46 | import com.ververica.lablatency.event.Measurement; 47 | import com.ververica.lablatency.event.MeasurementRecord; 48 | import org.apache.kafka.clients.consumer.ConsumerRecord; 49 | import org.slf4j.Logger; 50 | import org.slf4j.LoggerFactory; 51 | 52 | import java.io.IOException; 53 | import java.time.Duration; 54 | import java.util.ArrayList; 55 | import java.util.Comparator; 56 | import java.util.HashMap; 57 | import java.util.Map; 58 | import java.util.Properties; 59 | 60 | import static org.apache.flink.api.java.typeutils.TypeExtractor.getForClass; 61 | 62 | /** SortingJob with coalesced timers. */ 63 | public class SortingJobCoalescedTimer { 64 | private static final Logger LOG = LoggerFactory.getLogger(SortingJobCoalescedTimer.class); 65 | 66 | public static void main(String[] args) throws Exception { 67 | 68 | ParameterTool params = ParameterTool.fromArgs(args); 69 | LOG.info("params: " + params.getProperties()); 70 | 71 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 72 | 73 | final String jobName = 74 | params.get("job-name", SortingJobCoalescedTimer.class.getSimpleName()); 75 | final String kafkaAddress = params.get("kafka", "localhost:9092"); 76 | final String topic = params.get("topic", "lablatency"); 77 | final String group = params.get("group", "lablatency"); 78 | 79 | final int outOfOrderness = params.getInt("out-of-orderness", 250); 80 | 81 | final int roundTimerTo = params.getInt("round-timer-to", 100); 82 | 83 | final boolean useOneMapper = params.getBoolean("use-one-mapper", true); 84 | final boolean forceKryo = params.getBoolean("force-kryo", false); 85 | 86 | if (forceKryo) { 87 | env.getConfig().enableForceKryo(); 88 | } 89 | 90 | Properties kafkaConsumerProps = new Properties(); 91 | kafkaConsumerProps.setProperty("bootstrap.servers", kafkaAddress); 92 | kafkaConsumerProps.setProperty("group.id", group); 93 | FlinkKafkaConsumer consumer = 94 | new FlinkKafkaConsumer<>(topic, new KafkaDeSerSchema(), kafkaConsumerProps); 95 | // start from the latest message 96 | consumer.setStartFromLatest(); 97 | 98 | DataStream> sourceStream = 99 | env.addSource(consumer) 100 | .name("KafkaSource") 101 | .uid("KafkaSource") 102 | .assignTimestampsAndWatermarks( 103 | WatermarkStrategy.forBoundedOutOfOrderness( 104 | Duration.ofMillis(outOfOrderness)) 105 | .withTimestampAssigner( 106 | (element, timestamp) -> element.getTimestamp()) 107 | .withIdleness(Duration.ofSeconds(1))) 108 | .name("Watermarks") 109 | .uid("Watermarks") 110 | .flatMap( 111 | useOneMapper 112 | ? new MeasurementDeserializerOneGlobalMapper() 113 | : new MeasurementDeserializerOneMapperPerEvent()) 114 | .name("Deserialization") 115 | .uid("Deserialization"); 116 | 117 | DataStream> sortedStream = 118 | sourceStream 119 | .keyBy(x -> x.f0.getSensorId()) 120 | .process(new SortFunction(roundTimerTo)) 121 | .name("Sort") 122 | .uid("Sort") 123 | .assignTimestampsAndWatermarks( 124 | WatermarkStrategy 125 | .>forMonotonousTimestamps() 126 | .withTimestampAssigner((element, timestamp) -> element.f1) 127 | .withIdleness(Duration.ofSeconds(1))) 128 | .name("Watermarks2") 129 | .uid("Watermarks2") 130 | .map(new MapMeasurement()) 131 | .name("MainOperator:After2ndWatermark") 132 | .uid("MainOperator:After2ndWatermark"); 133 | 134 | DataStreamUtils.reinterpretAsKeyedStream( 135 | sortedStream 136 | .map(new FixSensorsFunction(Tuple2.of("Berlin", 1.0))) 137 | .name("Fix defective sensors") 138 | .uid("Fix defective sensors"), 139 | x -> x.f0.getSensorId()) 140 | .process(new MovingAverageSensors()) 141 | .addSink(new DiscardingSink<>()) 142 | .name("NormalOutput") 143 | .uid("NormalOutput") 144 | .disableChaining(); 145 | 146 | env.execute(jobName); 147 | } 148 | 149 | /** Get MeasurementRecord from Kafka ConsumerRecord. */ 150 | static class KafkaDeSerSchema implements KafkaDeserializationSchema { 151 | 152 | @Override 153 | public void open(DeserializationSchema.InitializationContext context) throws Exception {} 154 | 155 | @Override 156 | public boolean isEndOfStream(MeasurementRecord nextElement) { 157 | return false; 158 | } 159 | 160 | @Override 161 | public MeasurementRecord deserialize(ConsumerRecord record) 162 | throws Exception { 163 | return new MeasurementRecord( 164 | record.timestamp(), record.key(), record.value(), record.partition()); 165 | } 166 | 167 | @Override 168 | public TypeInformation getProducedType() { 169 | return getForClass(MeasurementRecord.class); 170 | } 171 | } 172 | 173 | /** Deserializes MeasurementRecord into Measurement: create one ObjectMapper per event */ 174 | public static class MeasurementDeserializerOneMapperPerEvent 175 | extends RichFlatMapFunction> { 176 | 177 | private static final long serialVersionUID = 1L; 178 | private static final Logger LOG = 179 | LoggerFactory.getLogger(MeasurementDeserializerOneMapperPerEvent.class); 180 | 181 | @Override 182 | public void open(final Configuration parameters) throws Exception { 183 | super.open(parameters); 184 | } 185 | 186 | @Override 187 | public void flatMap( 188 | final MeasurementRecord kafkaRecord, 189 | final Collector> out) { 190 | final Measurement measurement; 191 | try { 192 | measurement = 193 | new ObjectMapper().readValue(kafkaRecord.getValue(), Measurement.class); 194 | } catch (IOException e) { 195 | LOG.error("Failed to deserialize: " + e.getLocalizedMessage()); 196 | return; 197 | } 198 | out.collect(Tuple2.of(measurement, kafkaRecord.getTimestamp())); 199 | } 200 | } 201 | 202 | /** 203 | * Deserializes MeasurementRecord into Measurement: create one global ObjectMapper per operator 204 | * instance 205 | */ 206 | public static class MeasurementDeserializerOneGlobalMapper 207 | extends RichFlatMapFunction> { 208 | 209 | private static final long serialVersionUID = 1L; 210 | private static final Logger LOG = 211 | LoggerFactory.getLogger(MeasurementDeserializerOneGlobalMapper.class); 212 | 213 | private ObjectMapper objectMapper; 214 | 215 | @Override 216 | public void open(final Configuration parameters) throws Exception { 217 | super.open(parameters); 218 | this.objectMapper = new ObjectMapper(); 219 | } 220 | 221 | @Override 222 | public void flatMap( 223 | final MeasurementRecord kafkaRecord, 224 | final Collector> out) { 225 | final Measurement measurement; 226 | try { 227 | measurement = 228 | this.objectMapper.readValue(kafkaRecord.getValue(), Measurement.class); 229 | } catch (IOException e) { 230 | LOG.error("Failed to deserialize: " + e.getLocalizedMessage()); 231 | return; 232 | } 233 | out.collect(Tuple2.of(measurement, kafkaRecord.getTimestamp())); 234 | } 235 | } 236 | 237 | private static class MeasurementByTimeComparator 238 | implements Comparator> { 239 | @Override 240 | public int compare(Tuple2 o1, Tuple2 o2) { 241 | return Long.compare(o1.f1, o2.f1); 242 | } 243 | } 244 | 245 | /** SortFunction with timer coalescing: round timer to {@code roundTo} */ 246 | public static class SortFunction 247 | extends KeyedProcessFunction< 248 | Integer, Tuple2, Tuple2> { 249 | 250 | private static final Logger LOG = LoggerFactory.getLogger(SortFunction.class); 251 | private ListState> listState; 252 | private final int roundTo; 253 | 254 | public SortFunction(int roundTo) { 255 | this.roundTo = roundTo; 256 | } 257 | 258 | @Override 259 | public void open(Configuration parameters) throws Exception { 260 | super.open(parameters); 261 | 262 | ListStateDescriptor> desc = 263 | new ListStateDescriptor<>( 264 | "events", 265 | TypeInformation.of(new TypeHint>() {})); 266 | listState = getRuntimeContext().getListState(desc); 267 | } 268 | 269 | @Override 270 | public void processElement( 271 | Tuple2 value, 272 | Context ctx, 273 | Collector> out) 274 | throws Exception { 275 | 276 | TimerService timerService = ctx.timerService(); 277 | long currentTimestamp = ctx.timestamp(); 278 | long currentWatermark = timerService.currentWatermark(); 279 | 280 | if (currentTimestamp > currentWatermark) { 281 | listState.add(value); 282 | if (this.roundTo == 0) { 283 | timerService.registerEventTimeTimer(currentWatermark + 1); 284 | } else { 285 | timerService.registerEventTimeTimer( 286 | currentTimestamp / this.roundTo * this.roundTo + this.roundTo); 287 | } 288 | } 289 | } 290 | 291 | @Override 292 | public void onTimer( 293 | long timestamp, OnTimerContext ctx, Collector> out) 294 | throws Exception { 295 | 296 | long currentWatermark = ctx.timerService().currentWatermark(); 297 | 298 | ArrayList> list = new ArrayList<>(); 299 | listState.get().forEach(list::add); 300 | LOG.info("Sorting list with size: " + list.size()); 301 | list.sort(new MeasurementByTimeComparator()); 302 | 303 | int index = 0; 304 | for (Tuple2 event : list) { 305 | // this requires re-assign timestamps and watermarks. Otherwise, the emitted events 306 | // here are all having the same timestamp as this timer. 307 | if (event != null && event.f1 <= currentWatermark) { 308 | out.collect(event); 309 | index++; 310 | } else { 311 | break; 312 | } 313 | } 314 | list.subList(0, index).clear(); 315 | listState.update(list); 316 | } 317 | } 318 | 319 | /** This is class is used to calculate eventTimeLag after the second watermark are added */ 320 | public static class MapMeasurement 321 | extends RichMapFunction, Tuple2> { 322 | 323 | private transient DescriptiveStatisticsHistogram eventTimeLag; 324 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000; 325 | 326 | @Override 327 | public void open(final Configuration parameters) throws Exception { 328 | eventTimeLag = 329 | getRuntimeContext() 330 | .getMetricGroup() 331 | .histogram( 332 | "eventTimeLag", 333 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE)); 334 | } 335 | 336 | @Override 337 | public Tuple2 map(Tuple2 value) throws Exception { 338 | eventTimeLag.update(System.currentTimeMillis() - value.f1); 339 | return value; 340 | } 341 | } 342 | 343 | /** 344 | * Implements an exponentially moving average with a coefficient of 0.5, i.e. 345 | * 346 | *

    347 | *
  • avg[0] = value[0] (not forwarded to the next stream) 348 | *
  • avg[i] = avg[i-1] * 0.5 + value[i] * 0.5 (for i > 0) 349 | *
350 | * 351 | *

See 352 | * https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average 353 | */ 354 | private static class MovingAverageSensors 355 | extends KeyedProcessFunction< 356 | Integer, Tuple2, Tuple3> { 357 | private static final long serialVersionUID = 1L; 358 | 359 | private transient ValueState movingAverage; 360 | 361 | @Override 362 | public void open(Configuration parameters) throws Exception { 363 | super.open(parameters); 364 | movingAverage = 365 | getRuntimeContext() 366 | .getState(new ValueStateDescriptor<>("movingAverage", Types.DOUBLE)); 367 | } 368 | 369 | @Override 370 | public void processElement( 371 | Tuple2 value, 372 | Context ctx, 373 | Collector> out) 374 | throws Exception { 375 | 376 | Double last = movingAverage.value(); 377 | if (last != null) { 378 | last = (last + value.f0.getValue()) / 2.0; 379 | movingAverage.update(last); 380 | 381 | // do not forward the first value (it only stands alone) 382 | out.collect(Tuple3.of(ctx.getCurrentKey(), last, ctx.timestamp())); 383 | } else { 384 | movingAverage.update(value.f0.getValue()); 385 | } 386 | } 387 | } 388 | 389 | private static class FixSensorsFunction 390 | implements MapFunction, Tuple2> { 391 | 392 | private final Map locationCorrections = new HashMap<>(); 393 | 394 | @SafeVarargs 395 | public FixSensorsFunction(Tuple2... locationCorrections) { 396 | for (Tuple2 locationCorrection : locationCorrections) { 397 | this.locationCorrections.put(locationCorrection.f0, locationCorrection.f1); 398 | } 399 | } 400 | 401 | @Override 402 | public Tuple2 map(Tuple2 value) throws Exception { 403 | if (locationCorrections.containsKey(value.f0.getLocation())) { 404 | return Tuple2.of( 405 | new Measurement( 406 | value.f0.getSensorId(), 407 | value.f0.getValue() 408 | + locationCorrections.get(value.f0.getLocation()), 409 | value.f0.getLocation(), 410 | value.f0.getMeasurementInformation()), 411 | value.f1); 412 | } else { 413 | return value; 414 | } 415 | } 416 | } 417 | } 418 | --------------------------------------------------------------------------------