├── .gitignore ├── README.md ├── pom.xml └── src ├── main ├── java │ └── com │ │ └── ververica │ │ └── flinktraining │ │ ├── exercises │ │ └── troubleshoot │ │ │ ├── ObjectReuseJob.java │ │ │ ├── ObjectReuseJobUtils.java │ │ │ ├── TroubledStreamingJob.java │ │ │ └── TroubledStreamingJobUtils.java │ │ ├── provided │ │ ├── DoNotChangeThis.java │ │ └── troubleshoot │ │ │ ├── ExtendedMeasurement.java │ │ │ ├── FakeKafkaRecord.java │ │ │ ├── FakeKafkaSource.java │ │ │ ├── GeoUtils.java │ │ │ ├── MeanGauge.java │ │ │ ├── Measurement.java │ │ │ ├── ObjectReuseExtendedMeasurementSource.java │ │ │ ├── SourceUtils.java │ │ │ ├── WeatherUtils.java │ │ │ └── WindowedMeasurements.java │ │ └── solutions │ │ └── troubleshoot │ │ ├── ObjectReuseJobSolution1.java │ │ ├── ObjectReuseJobSolution2.java │ │ ├── SimpleMeasurement.java │ │ ├── TroubledStreamingJobSolution1.java │ │ ├── TroubledStreamingJobSolution2.java │ │ ├── TroubledStreamingJobSolution31.java │ │ ├── TroubledStreamingJobSolution32.java │ │ ├── TroubledStreamingJobSolution33.java │ │ ├── TroubledStreamingJobSolution41.java │ │ ├── TroubledStreamingJobSolution42.java │ │ ├── TroubledStreamingJobSolution43.java │ │ └── immutable │ │ ├── ExtendedMeasurement.java │ │ ├── ExtendedMeasurementSerializer.java │ │ ├── ExtendedMeasurementTypeInfo.java │ │ ├── Location.java │ │ ├── LocationSerializer.java │ │ ├── LocationTypeInfo.java │ │ ├── MeasurementValue.java │ │ ├── MeasurementValueSerializer.java │ │ ├── MeasurementValueTypeInfo.java │ │ ├── ObjectReuseExtendedMeasurementSource.java │ │ ├── Sensor.java │ │ ├── SensorSerializer.java │ │ └── SensorTypeInfo.java └── resources │ ├── cities.csv │ └── log4j2.properties └── test └── java └── com └── ververica └── training ├── ObjectReuseJobRunner.java └── TroubledStreamingJobRunner.java /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | dependency-reduced-pom.xml 3 | .idea 4 | *.iml 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | **⚠️ This repository was archived and it's content was moved to https://github.com/ververica/flink-training/ ⚠️** 3 | 4 | --- 5 | 6 | # Apache Flink® Troubleshooting Training 7 | 8 | ## Introduction 9 | 10 | This repository provides the basis of the hands-on part of the "Apache Flink Troubleshooting" training session at Flink Forward Europe 2019. 11 | 12 | ### Requirements 13 | 14 | To make use of this repository participants will need: 15 | 16 | * git 17 | * JDK 8 18 | * maven 19 | * a Java IDE (Intellij IDEA/Eclipse) 20 | 21 | ### Training Preparations 22 | 23 | In order to avoid potential issues with the WiFi at the training venue, please checkout and build the project prior to the training: 24 | 25 | ```bash 26 | git clone git@github.com:ververica/flink-training-troubleshooting.git 27 | cd flink-training-troubleshooting 28 | mvn clean package 29 | ``` 30 | 31 | ### Infrastructure 32 | 33 | During the training, participants will be asked to run the Flink job `TroubledStreamingJob` locally as well as on Ververica Platform. 34 | 35 | ### Running Locally 36 | 37 | Just run the test in `TroubledStreamingJobRunner` which will call the main-method of `TroubledStreamingJob` with a local configuration and automatically pulls in dependencies with "provided" scope. 38 | 39 | Once running, you can access Flink's Web UI via http://localhost:8081. 40 | 41 | ### The Flink Job 42 | 43 | This simple Flink job reads measurement data from a Kafka topic with eight partitions. For the purpose of this training the `KafkaConsumer` is replaced by `FakeKafkaSource`. The result of a calculation based on the measurement value is averaged over 1 second. The overall flow is depicted below: 44 | 45 | ``` 46 | +-------------------+ +-----------------------+ +-----------------+ +----------------------+ +--------------------+ 47 | | | | | | | | | | | 48 | | Fake Kafka Source | --> | Watermarks/Timestamps | --> | Deserialization | --> | Windowed Aggregation | --> | Sink: NormalOutput | 49 | | | | | | | | | | | 50 | +-------------------+ +-----------------------+ +-----------------+ +----------------------+ +--------------------+ 51 | \ 52 | \ +--------------------+ 53 | \ | | 54 | +-----------> | Sink: LateDataSink | 55 | | | 56 | +--------------------+ 57 | ``` 58 | 59 | In local mode, sinks print their values on `stdout` (NormalOutput) and `stderr` (LateDataSink) for simplified debugging while as without local mode, a `DiscardingSink` is used for each sink. 60 | 61 | ---- 62 | 63 | *Apache Flink, Flink®, Apache®, the squirrel logo, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation.* 64 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 19 | 22 | 4.0.0 23 | 24 | com.ververica 25 | flink-training-troubleshooting 26 | 0.1 27 | jar 28 | 29 | Flink Training Troubleshooting 30 | http://www.ververica.com 31 | 32 | 33 | 1.8 34 | 1.8 35 | UTF-8 36 | 1.9.0 37 | 2.9.10 38 | 2.11 39 | 2.10.0 40 | 41 | 42 | 43 | 44 | 45 | com.fasterxml.jackson.core 46 | jackson-databind 47 | ${jackson.version} 48 | 49 | 50 | com.fasterxml.jackson.core 51 | jackson-annotations 52 | ${jackson.version} 53 | 54 | 55 | 56 | 57 | 58 | org.apache.flink 59 | flink-java 60 | ${flink.version} 61 | provided 62 | 63 | 64 | org.apache.flink 65 | flink-streaming-java_${scala.binary.version} 66 | ${flink.version} 67 | provided 68 | 69 | 70 | 71 | org.apache.flink 72 | flink-runtime-web_${scala.binary.version} 73 | ${flink.version} 74 | provided 75 | 76 | 77 | 78 | 79 | 80 | org.slf4j 81 | slf4j-api 82 | 1.7.25 83 | 84 | 85 | 86 | org.apache.logging.log4j 87 | log4j-slf4j-impl 88 | ${log4j.version} 89 | runtime 90 | 91 | 92 | 93 | org.apache.logging.log4j 94 | log4j-api 95 | ${log4j.version} 96 | runtime 97 | 98 | 99 | org.apache.logging.log4j 100 | log4j-core 101 | ${log4j.version} 102 | runtime 103 | 104 | 105 | org.apache.logging.log4j 106 | log4j-jcl 107 | ${log4j.version} 108 | runtime 109 | 110 | 111 | 112 | 113 | 114 | org.apache.flink 115 | flink-test-utils_${scala.binary.version} 116 | ${flink.version} 117 | test 118 | 119 | 120 | org.junit.jupiter 121 | junit-jupiter-api 122 | 5.1.0 123 | test 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | org.apache.maven.plugins 133 | maven-shade-plugin 134 | 3.0.0 135 | 136 | 137 | 138 | package 139 | 140 | shade 141 | 142 | 143 | 144 | 145 | org.apache.flink:force-shading 146 | com.google.code.findbugs:jsr305 147 | org.slf4j:* 148 | org.apache.logging.log4j:* 149 | commons-logging:* 150 | log4j:* 151 | com.data-artisans:frocksdbjni 152 | 153 | 154 | 155 | 156 | 158 | *:* 159 | 160 | META-INF/*.SF 161 | META-INF/*.DSA 162 | META-INF/*.RSA 163 | 164 | 165 | 166 | 167 | 169 | com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJob 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | org.eclipse.m2e 184 | lifecycle-mapping 185 | 1.0.0 186 | 187 | 188 | 189 | 190 | 191 | org.apache.maven.plugins 192 | maven-shade-plugin 193 | [3.0.0,) 194 | 195 | shade 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | org.apache.maven.plugins 205 | maven-compiler-plugin 206 | [3.1,) 207 | 208 | testCompile 209 | compile 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | add-dependencies-for-IDEA 230 | 231 | 232 | 233 | idea.version 234 | 235 | 236 | 237 | 238 | 239 | org.apache.flink 240 | flink-java 241 | ${flink.version} 242 | compile 243 | 244 | 245 | org.apache.flink 246 | flink-streaming-java_${scala.binary.version} 247 | ${flink.version} 248 | compile 249 | 250 | 251 | org.apache.flink 252 | flink-runtime-web_${scala.binary.version} 253 | ${flink.version} 254 | compile 255 | 256 | 257 | 258 | 259 | 260 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/exercises/troubleshoot/ObjectReuseJob.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.exercises.troubleshoot; 2 | 3 | import org.apache.flink.api.common.functions.AggregateFunction; 4 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 5 | import org.apache.flink.api.common.functions.RichMapFunction; 6 | import org.apache.flink.api.java.tuple.Tuple2; 7 | import org.apache.flink.api.java.utils.ParameterTool; 8 | import org.apache.flink.configuration.Configuration; 9 | import org.apache.flink.streaming.api.datastream.DataStreamUtils; 10 | import org.apache.flink.streaming.api.datastream.KeyedStream; 11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 13 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction; 14 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink; 15 | import org.apache.flink.streaming.api.windowing.time.Time; 16 | import org.apache.flink.util.Collector; 17 | import org.apache.flink.util.OutputTag; 18 | 19 | import com.ververica.flinktraining.provided.troubleshoot.ExtendedMeasurement; 20 | import com.ververica.flinktraining.provided.troubleshoot.ObjectReuseExtendedMeasurementSource; 21 | import com.ververica.flinktraining.provided.troubleshoot.GeoUtils; 22 | import com.ververica.flinktraining.provided.troubleshoot.MeanGauge; 23 | import com.ververica.flinktraining.provided.troubleshoot.WeatherUtils; 24 | 25 | import java.util.EnumMap; 26 | import java.util.HashMap; 27 | import java.util.Map; 28 | import java.util.concurrent.TimeUnit; 29 | 30 | import static com.ververica.flinktraining.exercises.troubleshoot.ObjectReuseJobUtils.createConfiguredEnvironment; 31 | 32 | public class ObjectReuseJob { 33 | 34 | public static void main(String[] args) throws Exception { 35 | ParameterTool parameters = ParameterTool.fromArgs(args); 36 | 37 | final boolean local = parameters.getBoolean("local", false); 38 | 39 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local); 40 | 41 | final boolean objectReuse = parameters.getBoolean("objectReuse", false); 42 | if (objectReuse) { 43 | env.getConfig().enableObjectReuse(); 44 | } 45 | 46 | //Checkpointing Configuration 47 | env.enableCheckpointing(5000); 48 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000); 49 | 50 | final OutputTag temperatureTag = 51 | new OutputTag("temperature") { 52 | private static final long serialVersionUID = -3127503822430851744L; 53 | }; 54 | final OutputTag windTag = 55 | new OutputTag("wind") { 56 | private static final long serialVersionUID = 4249595595891069268L; 57 | }; 58 | 59 | SingleOutputStreamOperator splitStream = env 60 | .addSource(new ObjectReuseExtendedMeasurementSource()) 61 | .name("FakeMeasurementSource") 62 | .uid("FakeMeasurementSource") 63 | .keyBy(ExtendedMeasurement::getSensor) 64 | .process(new SplitSensors(temperatureTag, windTag)) 65 | .name("SplitSensors") 66 | .uid("SplitSensors"); 67 | 68 | // (1) stream with the temperature converted into local temperature units (°F in the US) 69 | splitStream.getSideOutput(temperatureTag) 70 | .map(new ConvertToLocalTemperature()) 71 | .name("ConvertToLocalTemperature") 72 | .uid("ConvertToLocalTemperature") 73 | .addSink(new DiscardingSink<>()) 74 | .name("LocalizedTemperatureSink") 75 | .uid("LocalizedTemperatureSink") 76 | .disableChaining(); 77 | 78 | // no need to do keyBy again; we did not change the key! 79 | KeyedStream keyedTemperatureStream = 80 | DataStreamUtils.reinterpretAsKeyedStream( 81 | splitStream.getSideOutput(temperatureTag), 82 | ExtendedMeasurement::getSensor); 83 | 84 | // (2) stream with an (exponentially) moving average of the temperature (smoothens sensor 85 | // measurements, variant A); then converted into local temperature units (°F in the US) 86 | keyedTemperatureStream 87 | .flatMap(new MovingAverageSensors()) 88 | .name("MovingAverageTemperature") 89 | .uid("MovingAverageTemperature") 90 | .map(new ConvertToLocalTemperature()) 91 | .name("ConvertToLocalAverageTemperature") 92 | .uid("ConvertToLocalAverageTemperature") 93 | .addSink(new DiscardingSink<>()) 94 | .name("LocalizedAverageTemperatureSink") 95 | .uid("LocalizedAverageTemperatureSink") 96 | .disableChaining(); 97 | 98 | // (3) stream with an windowed-average of the temperature (to smoothens sensor 99 | // measurements, variant B); then converted into local temperature units (°F in the US) 100 | keyedTemperatureStream 101 | .timeWindow(Time.of(10, TimeUnit.SECONDS), Time.of(1, TimeUnit.SECONDS)) 102 | .aggregate(new WindowAverageSensor()) 103 | .name("WindowAverageTemperature") 104 | .uid("WindowAverageTemperature") 105 | .map(new ConvertToLocalTemperature()) 106 | .name("ConvertToLocalWindowedTemperature") 107 | .uid("ConvertToLocalWindowedTemperature") 108 | .addSink(new DiscardingSink<>()) 109 | .name("LocalizedWindowedTemperatureSink") 110 | .uid("LocalizedWindowedTemperatureSink") 111 | .disableChaining(); 112 | 113 | // (4) stream with the wind speed converted into local speed units (mph in the US) 114 | splitStream.getSideOutput(windTag) 115 | .map(new ConvertToLocalWindSpeed()) 116 | .name("NormalizeWindSpeed") 117 | .uid("NormalizeWindSpeed") 118 | .addSink(new DiscardingSink<>()) 119 | .name("WindSink") 120 | .uid("WindSink") 121 | .disableChaining(); 122 | 123 | env.execute(ObjectReuseJob.class.getSimpleName()); 124 | } 125 | 126 | /** 127 | * Splits a stream into multiple side-outputs, one for each sensor. 128 | */ 129 | private static class SplitSensors extends 130 | KeyedProcessFunction { 131 | private static final long serialVersionUID = 1L; 132 | 133 | private EnumMap> outputTagBySensor = 134 | new EnumMap<>(ExtendedMeasurement.SensorType.class); 135 | 136 | SplitSensors( 137 | OutputTag temperatureTag, 138 | OutputTag windTag) { 139 | outputTagBySensor.put(ExtendedMeasurement.SensorType.Temperature, temperatureTag); 140 | outputTagBySensor.put(ExtendedMeasurement.SensorType.Wind, windTag); 141 | } 142 | 143 | @Override 144 | public void processElement(ExtendedMeasurement value, Context ctx, Collector out) { 145 | ExtendedMeasurement.SensorType sensorType = value.getSensor().getSensorType(); 146 | OutputTag output = outputTagBySensor.get(sensorType); 147 | ctx.output(output, value); 148 | } 149 | } 150 | 151 | /** 152 | * Implements an exponentially moving average with a coefficient of 0.5, i.e. 153 | *
    154 | *
  • avg[0] = value[0] (not forwarded to the next stream)
  • 155 | *
  • avg[i] = avg[i-1] * 0.5 + value[i] * 0.5 (for i > 0)
  • 156 | *
157 | * 158 | * See 159 | * https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average 160 | */ 161 | private static class MovingAverageSensors extends 162 | RichFlatMapFunction { 163 | private static final long serialVersionUID = 1L; 164 | 165 | private Map> lastAverage = new HashMap<>(); 166 | 167 | @Override 168 | public void flatMap(ExtendedMeasurement value, Collector out) { 169 | ExtendedMeasurement.Sensor sensor = value.getSensor(); 170 | 171 | Tuple2 last = lastAverage.get(sensor); 172 | if (last != null) { 173 | float newAccuracy = (last.f0 + value.getMeasurement().getAccuracy()) / 2.0f; 174 | double newValue = (last.f1 + value.getMeasurement().getValue()) / 2.0; 175 | value.getMeasurement().setAccuracy(newAccuracy); 176 | value.getMeasurement().setValue(newValue); 177 | // do not forward the first value (it only stands alone) 178 | out.collect(value); 179 | } 180 | lastAverage.put( 181 | sensor, 182 | Tuple2.of( 183 | value.getMeasurement().getAccuracy(), 184 | value.getMeasurement().getValue() 185 | )); 186 | } 187 | } 188 | 189 | @SuppressWarnings("WeakerAccess") 190 | public static class WindowedAggregate { 191 | public double sumValue = 0.0; 192 | public double sumAccuracy = 0.0; 193 | public long count = 0; 194 | public ExtendedMeasurement lastValue = null; 195 | 196 | public WindowedAggregate() { 197 | } 198 | } 199 | 200 | /** 201 | * Aggregate function determining average sensor values and accuracies per sensor instance. 202 | */ 203 | private static class WindowAverageSensor implements 204 | AggregateFunction { 205 | @Override 206 | public WindowedAggregate createAccumulator() { 207 | return new WindowedAggregate(); 208 | } 209 | 210 | @Override 211 | public WindowedAggregate add(ExtendedMeasurement value, WindowedAggregate accumulator) { 212 | accumulator.sumAccuracy += value.getMeasurement().getAccuracy(); 213 | accumulator.sumValue += value.getMeasurement().getValue(); 214 | accumulator.count++; 215 | accumulator.lastValue = value; 216 | return accumulator; 217 | } 218 | 219 | @Override 220 | public ExtendedMeasurement getResult(WindowedAggregate accumulator) { 221 | ExtendedMeasurement result = accumulator.lastValue; 222 | result.getMeasurement().setValue(accumulator.sumValue / accumulator.count); 223 | result.getMeasurement() 224 | .setAccuracy((float) (accumulator.sumAccuracy / accumulator.count)); 225 | return result; 226 | } 227 | 228 | @Override 229 | public WindowedAggregate merge(WindowedAggregate a, WindowedAggregate b) { 230 | a.count += b.count; 231 | a.sumValue += b.sumValue; 232 | a.sumAccuracy += b.sumAccuracy; 233 | if (b.lastValue.getMeasurement().getTimestamp() > a.lastValue.getMeasurement().getTimestamp()) { 234 | a.lastValue = b.lastValue; 235 | } 236 | return a; 237 | } 238 | } 239 | 240 | /** 241 | * Converts SI units to locale-dependent units, i.e. °C to °F for the US. Adds a custom metric 242 | * to report temperatures in the US. 243 | */ 244 | private static class ConvertToLocalTemperature extends 245 | RichMapFunction { 246 | private static final long serialVersionUID = 1L; 247 | 248 | private transient MeanGauge normalizedTemperatureUS; 249 | 250 | @Override 251 | public void open(final Configuration parameters) { 252 | normalizedTemperatureUS = getRuntimeContext().getMetricGroup() 253 | .gauge("normalizedTemperatureUSmean", new MeanGauge()); 254 | getRuntimeContext().getMetricGroup().gauge( 255 | "normalizedTemperatureUSmin", new MeanGauge.MinGauge(normalizedTemperatureUS)); 256 | getRuntimeContext().getMetricGroup().gauge( 257 | "normalizedTemperatureUSmax", new MeanGauge.MaxGauge(normalizedTemperatureUS)); 258 | } 259 | 260 | @Override 261 | public ExtendedMeasurement map(ExtendedMeasurement value) { 262 | ExtendedMeasurement.Location location = value.getLocation(); 263 | if (GeoUtils.isInUS(location.getLongitude(), location.getLatitude())) { 264 | ExtendedMeasurement.MeasurementValue measurement = value.getMeasurement(); 265 | double normalized = WeatherUtils.celciusToFahrenheit(measurement.getValue()); 266 | measurement.setValue(normalized); 267 | normalizedTemperatureUS.addValue(normalized); 268 | } 269 | return value; 270 | } 271 | } 272 | 273 | /** 274 | * Converts SI units to locale-dependent units, i.e. km/h to mph for the US. Adds a custom metric 275 | * to report wind speeds in the US. 276 | */ 277 | private static class ConvertToLocalWindSpeed extends 278 | RichMapFunction { 279 | private static final long serialVersionUID = 1L; 280 | 281 | @Override 282 | public ExtendedMeasurement map(ExtendedMeasurement value) { 283 | ExtendedMeasurement.Location location = value.getLocation(); 284 | if (GeoUtils.isInUS(location.getLongitude(), location.getLatitude())) { 285 | ExtendedMeasurement.MeasurementValue measurement = value.getMeasurement(); 286 | measurement.setValue(WeatherUtils.kphToMph(measurement.getValue())); 287 | } 288 | return value; 289 | } 290 | } 291 | } 292 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/exercises/troubleshoot/ObjectReuseJobUtils.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.exercises.troubleshoot; 2 | 3 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 4 | import org.apache.flink.api.common.time.Time; 5 | import org.apache.flink.api.java.utils.ParameterTool; 6 | import org.apache.flink.configuration.Configuration; 7 | import org.apache.flink.core.fs.Path; 8 | import org.apache.flink.runtime.state.StateBackend; 9 | import org.apache.flink.runtime.state.filesystem.FsStateBackend; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.util.FileUtils; 12 | 13 | import java.io.File; 14 | import java.io.IOException; 15 | import java.net.URI; 16 | import java.net.URISyntaxException; 17 | import java.nio.file.Files; 18 | import java.util.concurrent.TimeUnit; 19 | 20 | public class ObjectReuseJobUtils { 21 | public static StreamExecutionEnvironment createConfiguredEnvironment( 22 | final ParameterTool parameters, final boolean local) throws 23 | IOException, URISyntaxException { 24 | StreamExecutionEnvironment env; 25 | if (local) { 26 | env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration()); 27 | 28 | String statePath = parameters.get("fsStatePath"); 29 | Path checkpointPath; 30 | if (statePath != null) { 31 | FileUtils.deleteDirectory(new File(new URI(statePath))); 32 | checkpointPath = Path.fromLocalFile(new File(new URI(statePath))); 33 | } else { 34 | checkpointPath = Path.fromLocalFile(Files.createTempDirectory("checkpoints").toFile()); 35 | } 36 | 37 | StateBackend stateBackend = new FsStateBackend(checkpointPath); 38 | env.setStateBackend(stateBackend); 39 | } else { 40 | env = StreamExecutionEnvironment.getExecutionEnvironment(); 41 | } 42 | 43 | env.getConfig().setGlobalJobParameters(parameters); 44 | env.setRestartStrategy(RestartStrategies.fixedDelayRestart( 45 | Integer.MAX_VALUE, 46 | Time.of(15, TimeUnit.SECONDS) // delay 47 | )); 48 | return env; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/exercises/troubleshoot/TroubledStreamingJob.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.exercises.troubleshoot; 2 | 3 | import org.apache.flink.api.common.functions.RichMapFunction; 4 | import org.apache.flink.api.java.utils.ParameterTool; 5 | import org.apache.flink.configuration.Configuration; 6 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram; 7 | import org.apache.flink.streaming.api.TimeCharacteristic; 8 | import org.apache.flink.streaming.api.datastream.DataStream; 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink; 12 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor; 13 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; 14 | import org.apache.flink.streaming.api.windowing.time.Time; 15 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 16 | import org.apache.flink.util.Collector; 17 | import org.apache.flink.util.OutputTag; 18 | 19 | import com.fasterxml.jackson.databind.DeserializationFeature; 20 | import com.fasterxml.jackson.databind.JsonNode; 21 | import com.fasterxml.jackson.databind.ObjectMapper; 22 | import com.ververica.flinktraining.provided.troubleshoot.FakeKafkaRecord; 23 | import com.ververica.flinktraining.provided.troubleshoot.WindowedMeasurements; 24 | import com.ververica.flinktraining.provided.troubleshoot.SourceUtils; 25 | 26 | import java.io.IOException; 27 | import java.util.concurrent.TimeUnit; 28 | 29 | import static com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJobUtils.createConfiguredEnvironment; 30 | 31 | public class TroubledStreamingJob { 32 | 33 | public static void main(String[] args) throws Exception { 34 | ParameterTool parameters = ParameterTool.fromArgs(args); 35 | 36 | final boolean local = parameters.getBoolean("local", false); 37 | 38 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local); 39 | 40 | //Time Characteristics 41 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 42 | env.getConfig().setAutoWatermarkInterval(2000); 43 | 44 | //Checkpointing Configuration 45 | env.enableCheckpointing(5000); 46 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000); 47 | 48 | DataStream sourceStream = env 49 | .addSource(SourceUtils.createFakeKafkaSource()) 50 | .name("FakeKafkaSource") 51 | .uid("FakeKafkaSource") 52 | .assignTimestampsAndWatermarks(new MeasurementTSExtractor()) 53 | .name("Watermarks") 54 | .uid("Watermarks") 55 | .map(new MeasurementDeserializer()) 56 | .name("Deserialization") 57 | .uid("Deserialization"); 58 | 59 | OutputTag lateDataTag = new OutputTag("late-data") { 60 | private static final long serialVersionUID = 33513631677208956L; 61 | }; 62 | 63 | SingleOutputStreamOperator aggregatedPerLocation = sourceStream 64 | .keyBy(jsonNode -> jsonNode.get("location").asText()) 65 | .timeWindow(Time.of(1, TimeUnit.SECONDS)) 66 | .sideOutputLateData(lateDataTag) 67 | .process(new MeasurementWindowAggregatingFunction()) 68 | .name("WindowedAggregationPerLocation") 69 | .uid("WindowedAggregationPerLocation"); 70 | 71 | if (local) { 72 | aggregatedPerLocation.print() 73 | .name("NormalOutput") 74 | .uid("NormalOutput") 75 | .disableChaining(); 76 | aggregatedPerLocation.getSideOutput(lateDataTag).printToErr() 77 | .name("LateDataSink") 78 | .uid("LateDataSink") 79 | .disableChaining(); 80 | } else { 81 | aggregatedPerLocation.addSink(new DiscardingSink<>()) 82 | .name("NormalOutput") 83 | .uid("NormalOutput") 84 | .disableChaining(); 85 | aggregatedPerLocation.getSideOutput(lateDataTag).addSink(new DiscardingSink<>()) 86 | .name("LateDataSink") 87 | .uid("LateDataSink") 88 | .disableChaining(); 89 | } 90 | 91 | env.execute(TroubledStreamingJob.class.getSimpleName()); 92 | } 93 | 94 | /** 95 | * Deserializes the JSON Kafka message. 96 | */ 97 | public static class MeasurementDeserializer extends RichMapFunction { 98 | private static final long serialVersionUID = 1L; 99 | 100 | @Override 101 | public JsonNode map(final FakeKafkaRecord kafkaRecord) throws Exception { 102 | return deserialize(kafkaRecord.getValue()); 103 | } 104 | 105 | private JsonNode deserialize(final byte[] bytes) throws IOException { 106 | return ObjectMapperSingleton.getInstance().readValue(bytes, JsonNode.class); 107 | } 108 | } 109 | 110 | public static class MeasurementTSExtractor 111 | extends BoundedOutOfOrdernessTimestampExtractor { 112 | private static final long serialVersionUID = 1L; 113 | 114 | MeasurementTSExtractor() { 115 | super(Time.of(250, TimeUnit.MILLISECONDS)); 116 | } 117 | 118 | @Override 119 | public long extractTimestamp(final FakeKafkaRecord record) { 120 | return record.getTimestamp(); 121 | } 122 | } 123 | 124 | public static class MeasurementWindowAggregatingFunction 125 | extends ProcessWindowFunction { 126 | private static final long serialVersionUID = 1L; 127 | 128 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000; 129 | 130 | private transient DescriptiveStatisticsHistogram eventTimeLag; 131 | 132 | MeasurementWindowAggregatingFunction() { 133 | } 134 | 135 | @Override 136 | public void process( 137 | final String location, 138 | final Context context, 139 | final Iterable input, 140 | final Collector out) { 141 | 142 | WindowedMeasurements aggregate = new WindowedMeasurements(); 143 | for (JsonNode record : input) { 144 | double result = Double.parseDouble(record.get("value").asText()); 145 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result); 146 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1); 147 | } 148 | 149 | final TimeWindow window = context.window(); 150 | aggregate.setWindowStart(window.getStart()); 151 | aggregate.setWindowEnd(window.getEnd()); 152 | aggregate.setLocation(location); 153 | 154 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd()); 155 | out.collect(aggregate); 156 | } 157 | 158 | @Override 159 | public void open(Configuration parameters) throws Exception { 160 | super.open(parameters); 161 | 162 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag", 163 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE)); 164 | } 165 | } 166 | 167 | private static class ObjectMapperSingleton { 168 | static ObjectMapper getInstance() { 169 | ObjectMapper objectMapper = new ObjectMapper(); 170 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); 171 | return objectMapper; 172 | } 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/exercises/troubleshoot/TroubledStreamingJobUtils.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.exercises.troubleshoot; 2 | 3 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 4 | import org.apache.flink.api.common.time.Time; 5 | import org.apache.flink.api.java.utils.ParameterTool; 6 | import org.apache.flink.configuration.Configuration; 7 | import org.apache.flink.core.fs.Path; 8 | import org.apache.flink.runtime.state.StateBackend; 9 | import org.apache.flink.runtime.state.filesystem.FsStateBackend; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.util.FileUtils; 12 | 13 | import java.io.File; 14 | import java.io.IOException; 15 | import java.net.URI; 16 | import java.net.URISyntaxException; 17 | import java.nio.file.Files; 18 | import java.util.concurrent.TimeUnit; 19 | 20 | public class TroubledStreamingJobUtils { 21 | public static StreamExecutionEnvironment createConfiguredEnvironment( 22 | final ParameterTool parameters, final boolean local) throws 23 | IOException, URISyntaxException { 24 | StreamExecutionEnvironment env; 25 | if (local) { 26 | env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration()); 27 | 28 | String statePath = parameters.get("fsStatePath"); 29 | Path checkpointPath; 30 | if (statePath != null) { 31 | FileUtils.deleteDirectory(new File(new URI(statePath))); 32 | checkpointPath = Path.fromLocalFile(new File(new URI(statePath))); 33 | } else { 34 | checkpointPath = Path.fromLocalFile(Files.createTempDirectory("checkpoints").toFile()); 35 | } 36 | 37 | StateBackend stateBackend = new FsStateBackend(checkpointPath); 38 | env.setStateBackend(stateBackend); 39 | } else { 40 | env = StreamExecutionEnvironment.getExecutionEnvironment(); 41 | } 42 | 43 | env.getConfig().setGlobalJobParameters(parameters); 44 | env.setRestartStrategy(RestartStrategies.fixedDelayRestart( 45 | Integer.MAX_VALUE, 46 | Time.of(15, TimeUnit.SECONDS) // delay 47 | )); 48 | return env; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/provided/DoNotChangeThis.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.provided; 2 | 3 | import java.lang.annotation.ElementType; 4 | import java.lang.annotation.Retention; 5 | import java.lang.annotation.RetentionPolicy; 6 | import java.lang.annotation.Target; 7 | 8 | /** 9 | * Classes, methods or fields marked annotated with {@link DoNotChangeThis} should not be changed by training participants. They are either part of the required business logic, or usually outside of the scope of the Flink in a real-life scenario. 10 | */ 11 | @Retention(RetentionPolicy.SOURCE) 12 | @Target({ElementType.CONSTRUCTOR, ElementType.METHOD, ElementType.TYPE}) 13 | public @interface DoNotChangeThis { 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/provided/troubleshoot/ExtendedMeasurement.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.provided.troubleshoot; 2 | 3 | import com.ververica.flinktraining.provided.DoNotChangeThis; 4 | 5 | import java.util.Objects; 6 | 7 | @SuppressWarnings({"WeakerAccess", "unused"}) 8 | @DoNotChangeThis 9 | public class ExtendedMeasurement { 10 | 11 | private Sensor sensor; 12 | private Location location; 13 | private MeasurementValue measurement; 14 | 15 | public ExtendedMeasurement() { 16 | } 17 | 18 | public ExtendedMeasurement( 19 | Sensor sensor, 20 | Location location, 21 | MeasurementValue measurement) { 22 | this.sensor = sensor; 23 | this.location = location; 24 | this.measurement = measurement; 25 | } 26 | 27 | public Sensor getSensor() { 28 | return sensor; 29 | } 30 | 31 | public void setSensor(Sensor sensor) { 32 | this.sensor = sensor; 33 | } 34 | 35 | public Location getLocation() { 36 | return location; 37 | } 38 | 39 | public void setLocation(Location location) { 40 | this.location = location; 41 | } 42 | 43 | public MeasurementValue getMeasurement() { 44 | return measurement; 45 | } 46 | 47 | public void setMeasurement(MeasurementValue measurement) { 48 | this.measurement = measurement; 49 | } 50 | 51 | public enum SensorType { 52 | Temperature, 53 | Wind 54 | } 55 | 56 | public static class Sensor { 57 | private long sensorId; 58 | private long vendorId; 59 | private SensorType sensorType; 60 | 61 | public Sensor() { 62 | } 63 | 64 | public Sensor( 65 | long sensorId, 66 | long vendorId, 67 | SensorType sensorType) { 68 | this.sensorId = sensorId; 69 | this.vendorId = vendorId; 70 | this.sensorType = sensorType; 71 | } 72 | 73 | public long getSensorId() { 74 | return sensorId; 75 | } 76 | 77 | public void setSensorId(long sensorId) { 78 | this.sensorId = sensorId; 79 | } 80 | 81 | public long getVendorId() { 82 | return vendorId; 83 | } 84 | 85 | public void setVendorId(long vendorId) { 86 | this.vendorId = vendorId; 87 | } 88 | 89 | public SensorType getSensorType() { 90 | return sensorType; 91 | } 92 | 93 | public void setSensorType(SensorType sensorType) { 94 | this.sensorType = sensorType; 95 | } 96 | 97 | @Override 98 | public boolean equals(Object o) { 99 | if (this == o) { 100 | return true; 101 | } 102 | if (o == null || getClass() != o.getClass()) { 103 | return false; 104 | } 105 | Sensor sensor = (Sensor) o; 106 | return sensorId == sensor.sensorId && 107 | vendorId == sensor.vendorId && 108 | sensorType == sensor.sensorType; 109 | } 110 | 111 | @Override 112 | public int hashCode() { 113 | // NOTE: do not use the enum directly here. Why? 114 | // -> try with Sensor as a key in a distributed setting and see for yourself! 115 | return Objects.hash(sensorId, vendorId, sensorType.ordinal()); 116 | } 117 | } 118 | 119 | public static class Location { 120 | private double longitude; 121 | private double latitude; 122 | private double height; 123 | 124 | public Location() { 125 | } 126 | 127 | public Location(double longitude, double latitude, double height) { 128 | this.longitude = longitude; 129 | this.latitude = latitude; 130 | this.height = height; 131 | } 132 | 133 | public double getLongitude() { 134 | return longitude; 135 | } 136 | 137 | public void setLongitude(double longitude) { 138 | this.longitude = longitude; 139 | } 140 | 141 | public double getLatitude() { 142 | return latitude; 143 | } 144 | 145 | public void setLatitude(double latitude) { 146 | this.latitude = latitude; 147 | } 148 | 149 | public double getHeight() { 150 | return height; 151 | } 152 | 153 | public void setHeight(double height) { 154 | this.height = height; 155 | } 156 | 157 | @Override 158 | public boolean equals(Object o) { 159 | if (this == o) { 160 | return true; 161 | } 162 | if (o == null || getClass() != o.getClass()) { 163 | return false; 164 | } 165 | Location location = (Location) o; 166 | return Double.compare(location.longitude, longitude) == 0 && 167 | Double.compare(location.latitude, latitude) == 0 && 168 | Double.compare(location.height, height) == 0; 169 | } 170 | 171 | @Override 172 | public int hashCode() { 173 | return Objects.hash(longitude, latitude, height); 174 | } 175 | } 176 | 177 | public static class MeasurementValue { 178 | private double value; 179 | private float accuracy; 180 | private long timestamp; 181 | 182 | public MeasurementValue() { 183 | } 184 | 185 | public MeasurementValue(double value, float accuracy, long timestamp) { 186 | this.value = value; 187 | this.accuracy = accuracy; 188 | this.timestamp = timestamp; 189 | } 190 | 191 | public double getValue() { 192 | return value; 193 | } 194 | 195 | public void setValue(double value) { 196 | this.value = value; 197 | } 198 | 199 | public float getAccuracy() { 200 | return accuracy; 201 | } 202 | 203 | public void setAccuracy(float accuracy) { 204 | this.accuracy = accuracy; 205 | } 206 | 207 | public long getTimestamp() { 208 | return timestamp; 209 | } 210 | 211 | public void setTimestamp(long timestamp) { 212 | this.timestamp = timestamp; 213 | } 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/provided/troubleshoot/FakeKafkaRecord.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.provided.troubleshoot; 2 | 3 | import com.ververica.flinktraining.provided.DoNotChangeThis; 4 | 5 | import java.util.Arrays; 6 | import java.util.Objects; 7 | 8 | @DoNotChangeThis 9 | public class FakeKafkaRecord { 10 | 11 | private long timestamp; 12 | private byte[] key; 13 | private byte[] value; 14 | private int partition; 15 | 16 | public FakeKafkaRecord() { 17 | } 18 | 19 | public FakeKafkaRecord(final long timestamp, final byte[] key, final byte[] value, final int partition) { 20 | this.timestamp = timestamp; 21 | this.key = key; 22 | this.value = value; 23 | this.partition = partition; 24 | } 25 | 26 | public long getTimestamp() { 27 | return timestamp; 28 | } 29 | 30 | public void setTimestamp(final long timestamp) { 31 | this.timestamp = timestamp; 32 | } 33 | 34 | public byte[] getKey() { 35 | return key; 36 | } 37 | 38 | public void setKey(final byte[] key) { 39 | this.key = key; 40 | } 41 | 42 | public byte[] getValue() { 43 | return value; 44 | } 45 | 46 | public void setValue(final byte[] value) { 47 | this.value = value; 48 | } 49 | 50 | public int getPartition() { 51 | return partition; 52 | } 53 | 54 | public void setPartition(final int partition) { 55 | this.partition = partition; 56 | } 57 | 58 | @Override 59 | public boolean equals(final Object o) { 60 | if (this == o) { 61 | return true; 62 | } 63 | if (o == null || getClass() != o.getClass()) { 64 | return false; 65 | } 66 | final FakeKafkaRecord that = (FakeKafkaRecord) o; 67 | return timestamp == that.timestamp && 68 | partition == that.partition && 69 | Arrays.equals(key, that.key) && 70 | Arrays.equals(value, that.value); 71 | } 72 | 73 | @Override 74 | public int hashCode() { 75 | int result = Objects.hash(timestamp, partition); 76 | result = 31 * result + Arrays.hashCode(key); 77 | result = 31 * result + Arrays.hashCode(value); 78 | return result; 79 | } 80 | 81 | @Override 82 | public String toString() { 83 | final StringBuilder sb = new StringBuilder("FakeKafkaRecord{"); 84 | sb.append("timestamp=").append(timestamp); 85 | sb.append(", key=").append(Arrays.toString(key)); 86 | sb.append(", value=").append(Arrays.toString(value)); 87 | sb.append(", partition=").append(partition); 88 | sb.append('}'); 89 | return sb.toString(); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/provided/troubleshoot/FakeKafkaSource.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.provided.troubleshoot; 2 | 3 | import org.apache.flink.configuration.Configuration; 4 | import org.apache.flink.runtime.state.FunctionInitializationContext; 5 | import org.apache.flink.runtime.state.FunctionSnapshotContext; 6 | import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; 7 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; 8 | 9 | import com.ververica.flinktraining.provided.DoNotChangeThis; 10 | import org.slf4j.Logger; 11 | import org.slf4j.LoggerFactory; 12 | 13 | import java.util.Arrays; 14 | import java.util.BitSet; 15 | import java.util.List; 16 | import java.util.Random; 17 | import java.util.stream.Collectors; 18 | import java.util.stream.IntStream; 19 | 20 | /** 21 | * The {@link FakeKafkaSource} reads from {@code NO_OF_PARTIONS} Kafka partitions. 22 | *

23 | * The timestamps roughly start at the epoch and are ascending per partition. The partitions themselves can be out of sync. 24 | * * 25 | */ 26 | @DoNotChangeThis 27 | public class FakeKafkaSource extends RichParallelSourceFunction implements CheckpointedFunction { 28 | private static final long serialVersionUID = 4658785571367840693L; 29 | 30 | private static final int NO_OF_PARTIONS = 8; 31 | public static final Logger log = LoggerFactory.getLogger(FakeKafkaSource.class); 32 | 33 | private final Random rand; 34 | 35 | private transient volatile boolean cancelled; 36 | private transient int indexOfThisSubtask; 37 | private transient int numberOfParallelSubtasks; 38 | private transient List assignedPartitions; 39 | 40 | private final List serializedMeasurements; 41 | private final double poisonPillRate; 42 | private final BitSet idlePartitions; 43 | 44 | FakeKafkaSource(final int seed, final float poisonPillRate, List idlePartitions, List serializedMeasurements) { 45 | this.poisonPillRate = poisonPillRate; 46 | this.idlePartitions = new BitSet(NO_OF_PARTIONS); 47 | for (int i : idlePartitions) { 48 | this.idlePartitions.set(i); 49 | } 50 | this.serializedMeasurements = serializedMeasurements; 51 | 52 | this.rand = new Random(seed); 53 | } 54 | 55 | @Override 56 | public void open(final Configuration parameters) { 57 | indexOfThisSubtask = getRuntimeContext().getIndexOfThisSubtask(); 58 | numberOfParallelSubtasks = getRuntimeContext().getNumberOfParallelSubtasks(); 59 | 60 | assignedPartitions = IntStream.range(0, NO_OF_PARTIONS) 61 | .filter(i -> i % numberOfParallelSubtasks == indexOfThisSubtask) 62 | .boxed() 63 | .collect(Collectors.toList()); 64 | 65 | log.info("Now reading from partitions: {}", assignedPartitions); 66 | } 67 | 68 | 69 | @Override 70 | public void run(final SourceContext sourceContext) throws Exception { 71 | 72 | int numberOfPartitions = assignedPartitions.size(); 73 | 74 | if (!assignedPartitions.isEmpty()) { 75 | while (!cancelled) { 76 | int nextPartition = assignedPartitions.get(rand.nextInt(numberOfPartitions)); 77 | 78 | if (idlePartitions.get(nextPartition)) { 79 | Thread.sleep(1000); // avoid spinning wait 80 | continue; 81 | } 82 | 83 | long nextTimestamp = getTimestampForPartition(nextPartition); 84 | 85 | byte[] serializedMeasurement = 86 | serializedMeasurements.get(rand.nextInt(serializedMeasurements.size())); 87 | 88 | if (rand.nextFloat() > 1 - poisonPillRate) { 89 | serializedMeasurement = Arrays.copyOf(serializedMeasurement, 10); 90 | } 91 | 92 | synchronized (sourceContext.getCheckpointLock()) { 93 | sourceContext.collect( 94 | new FakeKafkaRecord( 95 | nextTimestamp, null, serializedMeasurement, nextPartition)); 96 | } 97 | } 98 | } else { 99 | // this source doesn't have any partitions and thus never emits any records 100 | // (and therefore also no watermarks), so we mark this subtask as idle to 101 | // not block watermark forwarding 102 | sourceContext.markAsTemporarilyIdle(); 103 | 104 | // wait until this is canceled 105 | final Object waitLock = new Object(); 106 | while (!cancelled) { 107 | try { 108 | //noinspection SynchronizationOnLocalVariableOrMethodParameter 109 | synchronized (waitLock) { 110 | waitLock.wait(); 111 | } 112 | } catch (InterruptedException e) { 113 | if (cancelled) { 114 | // restore the interrupted state, and fall through the loop 115 | Thread.currentThread().interrupt(); 116 | } 117 | } 118 | } 119 | } 120 | } 121 | 122 | private long getTimestampForPartition(int partition) { 123 | return System.currentTimeMillis() - (partition * 50L); 124 | } 125 | 126 | @Override 127 | public void cancel() { 128 | cancelled = true; 129 | 130 | // there will be an interrupt() call to the main thread anyways 131 | } 132 | 133 | @Override 134 | public void snapshotState(final FunctionSnapshotContext context) { 135 | } 136 | 137 | @Override 138 | public void initializeState(final FunctionInitializationContext context) { 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/provided/troubleshoot/GeoUtils.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.provided.troubleshoot; 2 | 3 | import com.ververica.flinktraining.provided.DoNotChangeThis; 4 | 5 | @SuppressWarnings("WeakerAccess") 6 | @DoNotChangeThis 7 | public class GeoUtils { 8 | 9 | // bounding box of the area of the USA 10 | public final static double US_LON_EAST = -66.9326; 11 | public final static double US_LON_WEST = -125.0011; 12 | public final static double US_LAT_NORTH = 49.5904; 13 | public final static double US_LAT_SOUTH = 24.9493; 14 | 15 | /** 16 | * Checks if a location specified by longitude and latitude values is 17 | * within the geo boundaries of the USA. 18 | * 19 | * @param lon longitude of the location to check 20 | * @param lat latitude of the location to check 21 | * 22 | * @return true if the location is within US boundaries, otherwise false. 23 | */ 24 | public static boolean isInUS(double lon, double lat) { 25 | return !(lon > US_LON_EAST || lon < US_LON_WEST) && 26 | !(lat > US_LAT_NORTH || lat < US_LAT_SOUTH); 27 | } 28 | 29 | // bounding box of the area of the USA 30 | public final static double DE_LON_EAST = 15.0419319; 31 | public final static double DE_LON_WEST = 5.8663153; 32 | public final static double DE_LAT_NORTH = 55.099161; 33 | public final static double DE_LAT_SOUTH = 47.2701114; 34 | 35 | /** 36 | * Checks if a location specified by longitude and latitude values is 37 | * within the geo boundaries of Germany. 38 | * 39 | * @param lon longitude of the location to check 40 | * @param lat latitude of the location to check 41 | * 42 | * @return true if the location is within German boundaries, otherwise false. 43 | */ 44 | public static boolean isInDE(double lon, double lat) { 45 | return !(lon > DE_LON_EAST || lon < DE_LON_WEST) && 46 | !(lat > DE_LAT_NORTH || lat < DE_LAT_SOUTH); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/provided/troubleshoot/MeanGauge.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.provided.troubleshoot; 2 | 3 | import org.apache.flink.metrics.Gauge; 4 | import org.apache.flink.metrics.View; 5 | 6 | import com.ververica.flinktraining.provided.DoNotChangeThis; 7 | import org.apache.commons.math3.stat.descriptive.moment.SecondMoment; 8 | 9 | /** 10 | * Gauge view for determining the mean per time span. Also allows access to min and max metrics via 11 | * the {@link MinGauge} and {@link MaxGauge} wrappers. 12 | */ 13 | @DoNotChangeThis 14 | public class MeanGauge implements Gauge, View { 15 | 16 | private SimpleStats stats = new SimpleStats(); 17 | private SimpleStats currentStats = new SimpleStats(); 18 | 19 | @Override 20 | public void update() { 21 | currentStats = stats.copy(); 22 | stats.clear(); 23 | } 24 | 25 | public void addValue(double d) { 26 | stats.increment(d); 27 | } 28 | 29 | @Override 30 | public Double getValue() { 31 | return currentStats.getMean(); 32 | } 33 | 34 | /** 35 | * Wraps around the {@link MeanGauge} view to get the min of all reported values. 36 | */ 37 | public static class MinGauge implements Gauge { 38 | private final MeanGauge base; 39 | 40 | public MinGauge(MeanGauge base) { 41 | this.base = base; 42 | } 43 | 44 | @Override 45 | public Double getValue() { 46 | return base.currentStats.getMin(); 47 | } 48 | } 49 | 50 | /** 51 | * Wraps around the {@link MeanGauge} view to get the max of all reported values. 52 | */ 53 | public static class MaxGauge implements Gauge { 54 | private final MeanGauge base; 55 | 56 | public MaxGauge(MeanGauge base) { 57 | this.base = base; 58 | } 59 | 60 | @Override 61 | public Double getValue() { 62 | return base.currentStats.getMax(); 63 | } 64 | } 65 | 66 | /** 67 | * Calculates min, max, mean (first moment), as well as the second moment. 68 | */ 69 | private static class SimpleStats extends SecondMoment { 70 | private static final long serialVersionUID = 1L; 71 | 72 | private double min = Double.NaN; 73 | private double max = Double.NaN; 74 | 75 | @Override 76 | public void increment(double d) { 77 | if (d < min || Double.isNaN(min)) { 78 | min = d; 79 | } 80 | if (d > max || Double.isNaN(max)) { 81 | max = d; 82 | } 83 | super.increment(d); 84 | } 85 | 86 | @Override 87 | public SimpleStats copy() { 88 | SimpleStats result = new SimpleStats(); 89 | SecondMoment.copy(this, result); 90 | result.min = min; 91 | result.max = max; 92 | return result; 93 | } 94 | 95 | double getMin() { 96 | return min; 97 | } 98 | 99 | double getMax() { 100 | return max; 101 | } 102 | 103 | double getMean() { 104 | return m1; 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/provided/troubleshoot/Measurement.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.provided.troubleshoot; 2 | 3 | import com.ververica.flinktraining.provided.DoNotChangeThis; 4 | 5 | import java.util.Objects; 6 | 7 | @DoNotChangeThis 8 | public class Measurement { 9 | 10 | private int sensorId; 11 | private double value; 12 | private String location; 13 | private String measurementInformation; 14 | 15 | public Measurement() { 16 | } 17 | 18 | public Measurement(final int sensorId, final double value, final String location, final String measurementInformation) { 19 | this.sensorId = sensorId; 20 | this.value = value; 21 | this.location = location; 22 | this.measurementInformation = measurementInformation; 23 | } 24 | 25 | public String getMeasurementInformation() { 26 | return measurementInformation; 27 | } 28 | 29 | public void setMeasurementInformation(final String measurementInformation) { 30 | this.measurementInformation = measurementInformation; 31 | } 32 | 33 | public int getSensorId() { 34 | return sensorId; 35 | } 36 | 37 | public void setSensorId(final int sensorId) { 38 | this.sensorId = sensorId; 39 | } 40 | 41 | public double getValue() { 42 | return value; 43 | } 44 | 45 | public void setValue(final double value) { 46 | this.value = value; 47 | } 48 | 49 | public String getLocation() { 50 | return location; 51 | } 52 | 53 | public void setLocation(final String location) { 54 | this.location = location; 55 | } 56 | 57 | @Override 58 | public boolean equals(final Object o) { 59 | if (this == o) { 60 | return true; 61 | } 62 | if (o == null || getClass() != o.getClass()) { 63 | return false; 64 | } 65 | final Measurement that = (Measurement) o; 66 | return sensorId == that.sensorId && 67 | Double.compare(that.value, value) == 0 && 68 | Objects.equals(location, that.location) && 69 | Objects.equals(measurementInformation, that.measurementInformation); 70 | } 71 | 72 | @Override 73 | public int hashCode() { 74 | return Objects.hash(sensorId, value, location, measurementInformation); 75 | } 76 | 77 | @Override 78 | public String toString() { 79 | final StringBuilder sb = new StringBuilder("Measurement{"); 80 | sb.append("sensorId=").append(sensorId); 81 | sb.append(", value=").append(value); 82 | sb.append(", location='").append(location).append('\''); 83 | sb.append(", measurementInformation='").append(measurementInformation).append('\''); 84 | sb.append('}'); 85 | return sb.toString(); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/provided/troubleshoot/ObjectReuseExtendedMeasurementSource.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.provided.troubleshoot; 2 | 3 | import org.apache.flink.configuration.Configuration; 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; 5 | 6 | import com.ververica.flinktraining.provided.DoNotChangeThis; 7 | 8 | import java.util.SplittableRandom; 9 | 10 | /** 11 | * Artificial source for sensor measurements (temperature and wind speed) of a pre-defined set of 12 | * sensors (per parallel instance) creating measurements for two locations (inside the bounding 13 | * boxes of Germany (DE) and the USA (US)) in SI units (°C and km/h). 14 | */ 15 | @SuppressWarnings("WeakerAccess") 16 | @DoNotChangeThis 17 | public class ObjectReuseExtendedMeasurementSource extends RichParallelSourceFunction { 18 | 19 | private static final long serialVersionUID = 1L; 20 | 21 | private static final int NUM_SENSORS = 10_000; 22 | 23 | public static final int LOWER_TEMPERATURE_CELCIUS = -10; 24 | public static final int UPPER_TEMPERATURE_CELCIUS = 35; 25 | public static final int LOWER_WIND_SPEED_KMH = 0; 26 | public static final int UPPER_WIND_SPEED_KMH = 335; 27 | 28 | private volatile boolean running = true; 29 | 30 | private transient ExtendedMeasurement.Sensor[] sensors; 31 | private transient ExtendedMeasurement.Location[] locations; 32 | private transient double[] lastValue; 33 | private transient MeanGauge sourceTemperatureUS; 34 | 35 | @Override 36 | public void open(final Configuration parameters) { 37 | initSensors(); 38 | 39 | sourceTemperatureUS = getRuntimeContext().getMetricGroup() 40 | .gauge("sourceTemperatureUSmean", new MeanGauge()); 41 | getRuntimeContext().getMetricGroup().gauge( 42 | "sourceTemperatureUSmin", new MeanGauge.MinGauge(sourceTemperatureUS)); 43 | getRuntimeContext().getMetricGroup().gauge( 44 | "sourceTemperatureUSmax", new MeanGauge.MaxGauge(sourceTemperatureUS)); 45 | } 46 | 47 | @Override 48 | public void run(SourceContext ctx) { 49 | final SplittableRandom rnd = new SplittableRandom(); 50 | final Object lock = ctx.getCheckpointLock(); 51 | 52 | while (running) { 53 | ExtendedMeasurement event = randomEvent(rnd); 54 | 55 | //noinspection SynchronizationOnLocalVariableOrMethodParameter 56 | synchronized (lock) { 57 | ctx.collect(event); 58 | } 59 | } 60 | } 61 | 62 | @Override 63 | public void cancel() { 64 | running = false; 65 | } 66 | 67 | /** 68 | * Creates sensor metadata that this source instance will work with. 69 | */ 70 | private void initSensors() { 71 | final SplittableRandom rnd = new SplittableRandom(); 72 | final ExtendedMeasurement.SensorType[] sensorTypes = 73 | ExtendedMeasurement.SensorType.values(); 74 | 75 | final int start = getRuntimeContext().getIndexOfThisSubtask() * NUM_SENSORS; 76 | this.sensors = new ExtendedMeasurement.Sensor[NUM_SENSORS]; 77 | this.lastValue = new double[NUM_SENSORS]; 78 | this.locations = new ExtendedMeasurement.Location[NUM_SENSORS]; 79 | for (int i = 0; i < NUM_SENSORS; ++i) { 80 | long sensorId = start + i; 81 | long vendorId = sensorId % 100; 82 | final ExtendedMeasurement.SensorType sensorType = 83 | sensorTypes[(i / 2) % sensorTypes.length]; 84 | sensors[i] = new ExtendedMeasurement.Sensor(sensorId, vendorId, sensorType); 85 | 86 | lastValue[i] = randomInitialMeasurementValue(rnd, sensorType); 87 | 88 | // assume that a sensor has a fixed position 89 | locations[i] = randomInitialLocation(rnd, i); 90 | } 91 | } 92 | 93 | /** 94 | * Creates a random measurement value that a sensor will start with. 95 | */ 96 | private double randomInitialMeasurementValue( 97 | SplittableRandom rnd, 98 | ExtendedMeasurement.SensorType sensorType) { 99 | switch (sensorType) { 100 | case Temperature: 101 | // -10°C - 35°C 102 | return rnd.nextInt( 103 | (UPPER_TEMPERATURE_CELCIUS - LOWER_TEMPERATURE_CELCIUS) * 10) / 10.0 + 104 | LOWER_TEMPERATURE_CELCIUS; 105 | case Wind: 106 | // 0km/h - 335km/h 107 | return rnd.nextInt((UPPER_WIND_SPEED_KMH - LOWER_WIND_SPEED_KMH) * 10) / 10.0 + 108 | LOWER_WIND_SPEED_KMH; 109 | default: 110 | throw new IllegalStateException("Unknown sensor type: " + sensorType); 111 | } 112 | } 113 | 114 | /** 115 | * Creates a random location for a sensor, distinguishing two bounding boxes: US and DE. 116 | */ 117 | private static ExtendedMeasurement.Location randomInitialLocation(SplittableRandom rnd, int i) { 118 | final double longitude; 119 | final double latitude; 120 | // let's assume that no selected region wraps around LON -180/+180 121 | if (i < NUM_SENSORS / 2) { 122 | // in US 123 | longitude = rnd.nextDouble() * (GeoUtils.US_LON_EAST - GeoUtils.US_LON_WEST) + GeoUtils.US_LON_WEST; 124 | latitude = rnd.nextDouble() * (GeoUtils.US_LAT_NORTH - GeoUtils.US_LAT_SOUTH) + GeoUtils.US_LAT_SOUTH; 125 | } else { 126 | // in DE 127 | longitude = rnd.nextDouble() * (GeoUtils.DE_LON_EAST - GeoUtils.DE_LON_WEST) + GeoUtils.DE_LON_WEST; 128 | latitude = rnd.nextDouble() * (GeoUtils.DE_LAT_NORTH - GeoUtils.DE_LAT_SOUTH) + GeoUtils.DE_LAT_SOUTH; 129 | } 130 | double height = rnd.nextDouble() * 3000; 131 | return new ExtendedMeasurement.Location(longitude, latitude, height); 132 | } 133 | 134 | /** 135 | * Creates a randomized sensor value during runtime of the source. Each new value differs 136 | * slightly from the previous value that this sensor had. 137 | */ 138 | private ExtendedMeasurement randomEvent(SplittableRandom rnd) { 139 | int randomIdx = rnd.nextInt(sensors.length); 140 | ExtendedMeasurement.Sensor sensor = sensors[randomIdx]; 141 | ExtendedMeasurement.Location location = locations[randomIdx]; 142 | 143 | long timestamp = System.currentTimeMillis(); 144 | 145 | final double value = randomChangeMeasurementValue( 146 | rnd, 147 | sensor.getSensorType(), 148 | location, 149 | lastValue[randomIdx]); 150 | 151 | lastValue[randomIdx] = value; 152 | 153 | final ExtendedMeasurement.MeasurementValue measurement = 154 | new ExtendedMeasurement.MeasurementValue( 155 | value, 156 | (float) (rnd.nextInt(100) - 50) / 10.0f, // +- 5 157 | timestamp); 158 | 159 | return new ExtendedMeasurement( 160 | new ExtendedMeasurement.Sensor( 161 | sensor.getSensorId(), sensor.getVendorId(), sensor.getSensorType()), 162 | new ExtendedMeasurement.Location( 163 | location.getLongitude(), location.getLatitude(), location.getHeight()), 164 | measurement); 165 | } 166 | 167 | /** 168 | * Generates a new sensor value that is +-3 of the old value and reports a custom metric for 169 | * sensor values in the US. 170 | */ 171 | private double randomChangeMeasurementValue( 172 | SplittableRandom rnd, 173 | ExtendedMeasurement.SensorType sensorType, 174 | ExtendedMeasurement.Location location, 175 | double lastValue) { 176 | double change = rnd.nextDouble(6) - 3.0; // +- 3 177 | final double value; 178 | switch (sensorType) { 179 | case Temperature: 180 | value = newValueWithinBounds( 181 | lastValue, change, LOWER_TEMPERATURE_CELCIUS, UPPER_TEMPERATURE_CELCIUS); 182 | if (GeoUtils.isInUS(location.getLongitude(), location.getLatitude())) { 183 | sourceTemperatureUS.addValue(value); 184 | } 185 | break; 186 | case Wind: 187 | value = newValueWithinBounds( 188 | lastValue, change, LOWER_WIND_SPEED_KMH, UPPER_WIND_SPEED_KMH); 189 | break; 190 | default: 191 | throw new InternalError("Unknown sensor type: " + sensorType); 192 | } 193 | return value; 194 | } 195 | 196 | /** 197 | * Returns either lastValue + change (if within the given bounds) or 198 | * lastValue - change (otherwise). 199 | */ 200 | private static double newValueWithinBounds( 201 | double lastValue, 202 | double change, 203 | double lowerLimit, 204 | double upperLimit) { 205 | double value; 206 | if (lastValue + change >= lowerLimit && lastValue + change <= upperLimit) { 207 | value = lastValue + change; 208 | } else { 209 | value = lastValue - change; 210 | } 211 | return value; 212 | } 213 | } 214 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/provided/troubleshoot/SourceUtils.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.provided.troubleshoot; 2 | 3 | import com.fasterxml.jackson.core.JsonProcessingException; 4 | import com.fasterxml.jackson.databind.ObjectMapper; 5 | import com.ververica.flinktraining.provided.DoNotChangeThis; 6 | import org.apache.commons.lang3.RandomStringUtils; 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | 10 | import java.io.BufferedReader; 11 | import java.io.IOException; 12 | import java.io.InputStream; 13 | import java.io.InputStreamReader; 14 | import java.util.ArrayList; 15 | import java.util.Arrays; 16 | import java.util.List; 17 | import java.util.Random; 18 | 19 | @DoNotChangeThis 20 | public class SourceUtils { 21 | 22 | public static final Logger log = LoggerFactory.getLogger(SourceUtils.class); 23 | 24 | public static final int NUM_OF_MEASUREMENTS = 100_000; 25 | public static final int RANDOM_SEED = 1; 26 | public static final float FAILURE_RATE = 0.0001f; 27 | public static final List IDLE_PARTITIONS = Arrays.asList(0, 4); 28 | 29 | public static FakeKafkaSource createFakeKafkaSource() { 30 | List serializedMeasurements = createSerializedMeasurements(); 31 | return new FakeKafkaSource(RANDOM_SEED, FAILURE_RATE, IDLE_PARTITIONS, serializedMeasurements); 32 | } 33 | 34 | private static List createSerializedMeasurements() { 35 | Random rand = new Random(RANDOM_SEED); 36 | ObjectMapper mapper = new ObjectMapper(); 37 | 38 | final List locations = readLocationsFromFile(); 39 | 40 | List measurements = new ArrayList<>(); 41 | for (int i = 0; i < NUM_OF_MEASUREMENTS; i++) { 42 | Measurement nextMeasurement = new Measurement(rand.nextInt(100), 43 | rand.nextDouble() * 100, locations.get(rand.nextInt(locations.size())), RandomStringUtils.randomAlphabetic(30)); 44 | try { 45 | measurements.add(mapper.writeValueAsBytes(nextMeasurement)); 46 | } catch (JsonProcessingException e) { 47 | log.error("Unable to serialize measurement.", e); 48 | throw new RuntimeException(e); 49 | } 50 | } 51 | return measurements; 52 | } 53 | 54 | private static List readLocationsFromFile() { 55 | List locations = new ArrayList<>(); 56 | try (InputStream is = SourceUtils.class.getResourceAsStream("/cities.csv"); 57 | BufferedReader br = new BufferedReader(new InputStreamReader(is));) { 58 | String city; 59 | while ((city = br.readLine()) != null) { 60 | locations.add(city); 61 | } 62 | } catch (IOException e) { 63 | log.error("Unable to read cities from file.", e); 64 | throw new RuntimeException(e); 65 | } 66 | return locations; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/provided/troubleshoot/WeatherUtils.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.provided.troubleshoot; 2 | 3 | import com.ververica.flinktraining.provided.DoNotChangeThis; 4 | 5 | /** 6 | * Various tools to convert units used in weather sensors. 7 | */ 8 | @SuppressWarnings("unused") 9 | @DoNotChangeThis 10 | public class WeatherUtils { 11 | 12 | /** 13 | * Converts the given temperature from Fahrenheit to Celcius. 14 | */ 15 | public static double fahrenheitToCelcius(double temperatureInFahrenheit) { 16 | return ((temperatureInFahrenheit - 32) * 5.0) / 9.0; 17 | } 18 | 19 | /** 20 | * Converts the given temperature from Celcius to Fahrenheit. 21 | */ 22 | public static double celciusToFahrenheit(double celcius) { 23 | return (celcius * 9.0) / 5.0 + 32; 24 | } 25 | 26 | /** 27 | * Miles per hour -> kilometres per hour. 28 | */ 29 | public static double mphToKph(double mph) { 30 | return mph * 1.60934; 31 | } 32 | 33 | /** 34 | * Kilometres per hour -> miles per hour 35 | */ 36 | public static double kphToMph(double kph) { 37 | return kph / 1.60934; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/provided/troubleshoot/WindowedMeasurements.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.provided.troubleshoot; 2 | 3 | import com.ververica.flinktraining.provided.DoNotChangeThis; 4 | 5 | import java.util.Objects; 6 | 7 | @DoNotChangeThis 8 | public class WindowedMeasurements { 9 | 10 | private long windowStart; 11 | private long windowEnd; 12 | private String location; 13 | private long eventsPerWindow; 14 | private double sumPerWindow; 15 | 16 | public WindowedMeasurements() { 17 | } 18 | 19 | public WindowedMeasurements(final long windowStart, final long windowEnd, final String location, final long eventsPerWindow, final double sumPerWindow) { 20 | this.windowStart = windowStart; 21 | this.windowEnd = windowEnd; 22 | this.location = location; 23 | this.eventsPerWindow = eventsPerWindow; 24 | this.sumPerWindow = sumPerWindow; 25 | } 26 | 27 | public long getWindowStart() { 28 | return windowStart; 29 | } 30 | 31 | public void setWindowStart(final long windowStart) { 32 | this.windowStart = windowStart; 33 | } 34 | 35 | public long getWindowEnd() { 36 | return windowEnd; 37 | } 38 | 39 | public void setWindowEnd(final long windowEnd) { 40 | this.windowEnd = windowEnd; 41 | } 42 | 43 | public String getLocation() { 44 | return location; 45 | } 46 | 47 | public void setLocation(final String location) { 48 | this.location = location; 49 | } 50 | 51 | public long getEventsPerWindow() { 52 | return eventsPerWindow; 53 | } 54 | 55 | public void setEventsPerWindow(final long eventsPerWindow) { 56 | this.eventsPerWindow = eventsPerWindow; 57 | } 58 | 59 | public double getSumPerWindow() { 60 | return sumPerWindow; 61 | } 62 | 63 | public void setSumPerWindow(final double sumPerWindow) { 64 | this.sumPerWindow = sumPerWindow; 65 | } 66 | 67 | @Override 68 | public boolean equals(final Object o) { 69 | if (this == o) { 70 | return true; 71 | } 72 | if (o == null || getClass() != o.getClass()) { 73 | return false; 74 | } 75 | final WindowedMeasurements that = (WindowedMeasurements) o; 76 | return windowStart == that.windowStart && 77 | windowEnd == that.windowEnd && 78 | eventsPerWindow == that.eventsPerWindow && 79 | Double.compare(that.sumPerWindow, sumPerWindow) == 0 && 80 | Objects.equals(location, that.location); 81 | } 82 | 83 | @Override 84 | public int hashCode() { 85 | return Objects.hash(windowStart, windowEnd, location, eventsPerWindow, sumPerWindow); 86 | } 87 | 88 | @Override 89 | public String toString() { 90 | final StringBuilder sb = new StringBuilder("WindowedMeasurements{"); 91 | sb.append("windowStart=").append(windowStart); 92 | sb.append(", windowEnd=").append(windowEnd); 93 | sb.append(", location='").append(location).append('\''); 94 | sb.append(", eventsPerWindow=").append(eventsPerWindow); 95 | sb.append(", sumPerWindow=").append(sumPerWindow); 96 | sb.append('}'); 97 | return sb.toString(); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/SimpleMeasurement.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot; 2 | 3 | import java.util.Objects; 4 | 5 | public class SimpleMeasurement { 6 | 7 | private int sensorId; 8 | private double value; 9 | private String location; 10 | 11 | public SimpleMeasurement() { 12 | } 13 | 14 | public SimpleMeasurement(final int sensorId, final double value, final String location, final String measurementInformation) { 15 | this.sensorId = sensorId; 16 | this.value = value; 17 | this.location = location; 18 | } 19 | 20 | public int getSensorId() { 21 | return sensorId; 22 | } 23 | 24 | public void setSensorId(final int sensorId) { 25 | this.sensorId = sensorId; 26 | } 27 | 28 | public double getValue() { 29 | return value; 30 | } 31 | 32 | public void setValue(final double value) { 33 | this.value = value; 34 | } 35 | 36 | public String getLocation() { 37 | return location; 38 | } 39 | 40 | public void setLocation(final String location) { 41 | this.location = location; 42 | } 43 | 44 | @Override 45 | public boolean equals(final Object o) { 46 | if (this == o) { 47 | return true; 48 | } 49 | if (o == null || getClass() != o.getClass()) { 50 | return false; 51 | } 52 | final SimpleMeasurement 53 | that = (SimpleMeasurement) o; 54 | return sensorId == that.sensorId && 55 | Double.compare(that.value, value) == 0 && 56 | Objects.equals(location, that.location); 57 | } 58 | 59 | @Override 60 | public int hashCode() { 61 | return Objects.hash(sensorId, value, location); 62 | } 63 | 64 | @Override 65 | public String toString() { 66 | final StringBuilder sb = new StringBuilder("Measurement{"); 67 | sb.append("sensorId=").append(sensorId); 68 | sb.append(", value=").append(value); 69 | sb.append(", location='").append(location).append('\''); 70 | sb.append('}'); 71 | return sb.toString(); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/TroubledStreamingJobSolution1.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot; 2 | 3 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 4 | import org.apache.flink.api.java.utils.ParameterTool; 5 | import org.apache.flink.configuration.Configuration; 6 | import org.apache.flink.metrics.Counter; 7 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram; 8 | import org.apache.flink.streaming.api.TimeCharacteristic; 9 | import org.apache.flink.streaming.api.datastream.DataStream; 10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink; 13 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor; 14 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; 15 | import org.apache.flink.streaming.api.windowing.time.Time; 16 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 17 | import org.apache.flink.util.Collector; 18 | import org.apache.flink.util.OutputTag; 19 | 20 | import com.fasterxml.jackson.databind.DeserializationFeature; 21 | import com.fasterxml.jackson.databind.JsonNode; 22 | import com.fasterxml.jackson.databind.ObjectMapper; 23 | import com.ververica.flinktraining.provided.troubleshoot.FakeKafkaRecord; 24 | import com.ververica.flinktraining.provided.troubleshoot.WindowedMeasurements; 25 | import com.ververica.flinktraining.provided.troubleshoot.SourceUtils; 26 | 27 | import java.io.IOException; 28 | import java.util.concurrent.TimeUnit; 29 | 30 | import static com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJobUtils.createConfiguredEnvironment; 31 | 32 | public class TroubledStreamingJobSolution1 { 33 | 34 | public static void main(String[] args) throws Exception { 35 | ParameterTool parameters = ParameterTool.fromArgs(args); 36 | 37 | final boolean local = parameters.getBoolean("local", false); 38 | 39 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local); 40 | 41 | //Time Characteristics 42 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 43 | env.getConfig().setAutoWatermarkInterval(2000); 44 | 45 | //Checkpointing Configuration 46 | env.enableCheckpointing(5000); 47 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000); 48 | 49 | DataStream sourceStream = env 50 | .addSource(SourceUtils.createFakeKafkaSource()) 51 | .name("FakeKafkaSource") 52 | .uid("FakeKafkaSource") 53 | .assignTimestampsAndWatermarks(new MeasurementTSExtractor()) 54 | .name("Watermarks") 55 | .uid("Watermarks") 56 | .flatMap(new MeasurementDeserializer()) 57 | .name("Deserialization") 58 | .uid("Deserialization"); 59 | 60 | OutputTag lateDataTag = new OutputTag("late-data") { 61 | private static final long serialVersionUID = 33513631677208956L; 62 | }; 63 | 64 | SingleOutputStreamOperator aggregatedPerLocation = sourceStream 65 | .keyBy(jsonNode -> jsonNode.get("location").asText()) 66 | .timeWindow(Time.of(1, TimeUnit.SECONDS)) 67 | .sideOutputLateData(lateDataTag) 68 | .process(new MeasurementWindowAggregatingFunction()) 69 | .name("WindowedAggregationPerLocation") 70 | .uid("WindowedAggregationPerLocation"); 71 | 72 | if (local) { 73 | aggregatedPerLocation.print() 74 | .name("NormalOutput") 75 | .uid("NormalOutput") 76 | .disableChaining(); 77 | aggregatedPerLocation.getSideOutput(lateDataTag).printToErr() 78 | .name("LateDataSink") 79 | .uid("LateDataSink") 80 | .disableChaining(); 81 | } else { 82 | aggregatedPerLocation.addSink(new DiscardingSink<>()) 83 | .name("NormalOutput") 84 | .uid("NormalOutput") 85 | .disableChaining(); 86 | aggregatedPerLocation.getSideOutput(lateDataTag).addSink(new DiscardingSink<>()) 87 | .name("LateDataSink") 88 | .uid("LateDataSink") 89 | .disableChaining(); 90 | } 91 | 92 | env.execute(TroubledStreamingJobSolution1.class.getSimpleName()); 93 | } 94 | 95 | /** 96 | * Deserializes the JSON Kafka message. 97 | */ 98 | public static class MeasurementDeserializer extends RichFlatMapFunction { 99 | private static final long serialVersionUID = 2L; 100 | 101 | private Counter numInvalidRecords; 102 | 103 | @Override 104 | public void open(final Configuration parameters) throws Exception { 105 | super.open(parameters); 106 | numInvalidRecords = getRuntimeContext().getMetricGroup().counter("numInvalidRecords"); 107 | } 108 | 109 | @Override 110 | public void flatMap(final FakeKafkaRecord kafkaRecord, final Collector out) { 111 | final JsonNode node; 112 | try { 113 | node = deserialize(kafkaRecord.getValue()); 114 | } catch (IOException e) { 115 | numInvalidRecords.inc(); 116 | return; 117 | } 118 | out.collect(node); 119 | } 120 | 121 | private JsonNode deserialize(final byte[] bytes) throws IOException { 122 | return ObjectMapperSingleton.getInstance().readValue(bytes, JsonNode.class); 123 | } 124 | } 125 | 126 | public static class MeasurementTSExtractor 127 | extends BoundedOutOfOrdernessTimestampExtractor { 128 | private static final long serialVersionUID = 1L; 129 | 130 | MeasurementTSExtractor() { 131 | super(Time.of(250, TimeUnit.MILLISECONDS)); 132 | } 133 | 134 | @Override 135 | public long extractTimestamp(final FakeKafkaRecord record) { 136 | return record.getTimestamp(); 137 | } 138 | } 139 | 140 | public static class MeasurementWindowAggregatingFunction 141 | extends ProcessWindowFunction { 142 | private static final long serialVersionUID = 1L; 143 | 144 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000; 145 | 146 | private transient DescriptiveStatisticsHistogram eventTimeLag; 147 | 148 | MeasurementWindowAggregatingFunction() { 149 | } 150 | 151 | @Override 152 | public void process( 153 | final String location, 154 | final Context context, 155 | final Iterable input, 156 | final Collector out) { 157 | 158 | WindowedMeasurements aggregate = new WindowedMeasurements(); 159 | for (JsonNode record : input) { 160 | double result = Double.parseDouble(record.get("value").asText()); 161 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result); 162 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1); 163 | } 164 | 165 | final TimeWindow window = context.window(); 166 | aggregate.setWindowStart(window.getStart()); 167 | aggregate.setWindowEnd(window.getEnd()); 168 | aggregate.setLocation(location); 169 | 170 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd()); 171 | out.collect(aggregate); 172 | } 173 | 174 | @Override 175 | public void open(Configuration parameters) throws Exception { 176 | super.open(parameters); 177 | 178 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag", 179 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE)); 180 | } 181 | } 182 | 183 | private static class ObjectMapperSingleton { 184 | static ObjectMapper getInstance() { 185 | ObjectMapper objectMapper = new ObjectMapper(); 186 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); 187 | return objectMapper; 188 | } 189 | } 190 | } 191 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/TroubledStreamingJobSolution2.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot; 2 | 3 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 4 | import org.apache.flink.api.java.utils.ParameterTool; 5 | import org.apache.flink.configuration.Configuration; 6 | import org.apache.flink.metrics.Counter; 7 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram; 8 | import org.apache.flink.streaming.api.TimeCharacteristic; 9 | import org.apache.flink.streaming.api.datastream.DataStream; 10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; 13 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink; 14 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; 15 | import org.apache.flink.streaming.api.watermark.Watermark; 16 | import org.apache.flink.streaming.api.windowing.time.Time; 17 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 18 | import org.apache.flink.util.Collector; 19 | import org.apache.flink.util.OutputTag; 20 | 21 | import com.fasterxml.jackson.databind.DeserializationFeature; 22 | import com.fasterxml.jackson.databind.JsonNode; 23 | import com.fasterxml.jackson.databind.ObjectMapper; 24 | import com.ververica.flinktraining.provided.troubleshoot.FakeKafkaRecord; 25 | import com.ververica.flinktraining.provided.troubleshoot.WindowedMeasurements; 26 | import com.ververica.flinktraining.provided.troubleshoot.SourceUtils; 27 | 28 | import java.io.IOException; 29 | import java.util.concurrent.TimeUnit; 30 | 31 | import static com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJobUtils.createConfiguredEnvironment; 32 | 33 | public class TroubledStreamingJobSolution2 { 34 | 35 | public static void main(String[] args) throws Exception { 36 | ParameterTool parameters = ParameterTool.fromArgs(args); 37 | 38 | final boolean local = parameters.getBoolean("local", false); 39 | 40 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local); 41 | 42 | //Time Characteristics 43 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 44 | env.getConfig().setAutoWatermarkInterval(2000); 45 | 46 | //Checkpointing Configuration 47 | env.enableCheckpointing(5000); 48 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000); 49 | 50 | DataStream sourceStream = env 51 | .addSource(SourceUtils.createFakeKafkaSource()) 52 | .name("FakeKafkaSource") 53 | .uid("FakeKafkaSource") 54 | .assignTimestampsAndWatermarks( 55 | new MeasurementTSExtractor(Time.of(250, TimeUnit.MILLISECONDS), 56 | Time.of(1, TimeUnit.SECONDS))) 57 | .name("Watermarks") 58 | .uid("Watermarks") 59 | .flatMap(new MeasurementDeserializer()) 60 | .name("Deserialization") 61 | .uid("Deserialization"); 62 | 63 | OutputTag lateDataTag = new OutputTag("late-data") { 64 | private static final long serialVersionUID = 33513631677208956L; 65 | }; 66 | 67 | SingleOutputStreamOperator aggregatedPerLocation = sourceStream 68 | .keyBy(jsonNode -> jsonNode.get("location").asText()) 69 | .timeWindow(Time.of(1, TimeUnit.SECONDS)) 70 | .sideOutputLateData(lateDataTag) 71 | .process(new MeasurementWindowAggregatingFunction()) 72 | .name("WindowedAggregationPerLocation") 73 | .uid("WindowedAggregationPerLocation"); 74 | 75 | if (local) { 76 | aggregatedPerLocation.print() 77 | .name("NormalOutput") 78 | .uid("NormalOutput") 79 | .disableChaining(); 80 | aggregatedPerLocation.getSideOutput(lateDataTag).printToErr() 81 | .name("LateDataSink") 82 | .uid("LateDataSink") 83 | .disableChaining(); 84 | } else { 85 | aggregatedPerLocation.addSink(new DiscardingSink<>()) 86 | .name("NormalOutput") 87 | .uid("NormalOutput") 88 | .disableChaining(); 89 | aggregatedPerLocation.getSideOutput(lateDataTag).addSink(new DiscardingSink<>()) 90 | .name("LateDataSink") 91 | .uid("LateDataSink") 92 | .disableChaining(); 93 | } 94 | 95 | env.execute(TroubledStreamingJobSolution2.class.getSimpleName()); 96 | } 97 | 98 | /** 99 | * Deserializes the JSON Kafka message. 100 | */ 101 | public static class MeasurementDeserializer extends RichFlatMapFunction { 102 | private static final long serialVersionUID = 2L; 103 | 104 | private Counter numInvalidRecords; 105 | 106 | @Override 107 | public void open(final Configuration parameters) throws Exception { 108 | super.open(parameters); 109 | numInvalidRecords = getRuntimeContext().getMetricGroup().counter("numInvalidRecords"); 110 | } 111 | 112 | @Override 113 | public void flatMap(final FakeKafkaRecord kafkaRecord, final Collector out) { 114 | final JsonNode node; 115 | try { 116 | node = deserialize(kafkaRecord.getValue()); 117 | } catch (IOException e) { 118 | numInvalidRecords.inc(); 119 | return; 120 | } 121 | out.collect(node); 122 | } 123 | 124 | private JsonNode deserialize(final byte[] bytes) throws IOException { 125 | return ObjectMapperSingleton.getInstance().readValue(bytes, JsonNode.class); 126 | } 127 | } 128 | 129 | public static class MeasurementTSExtractor implements AssignerWithPeriodicWatermarks { 130 | private static final long serialVersionUID = 2L; 131 | 132 | private long currentMaxTimestamp; 133 | private long lastEmittedWatermark = Long.MIN_VALUE; 134 | private long lastRecordProcessingTime; 135 | 136 | private final long maxOutOfOrderness; 137 | private final long idleTimeout; 138 | 139 | MeasurementTSExtractor(Time maxOutOfOrderness, Time idleTimeout) { 140 | if (maxOutOfOrderness.toMilliseconds() < 0) { 141 | throw new RuntimeException("Tried to set the maximum allowed " + 142 | "lateness to " + maxOutOfOrderness + 143 | ". This parameter cannot be negative."); 144 | } 145 | 146 | if (idleTimeout.toMilliseconds() < 0) { 147 | throw new RuntimeException("Tried to set the idle Timeout" + idleTimeout + 148 | ". This parameter cannot be negative."); 149 | } 150 | 151 | 152 | this.maxOutOfOrderness = maxOutOfOrderness.toMilliseconds(); 153 | this.idleTimeout = idleTimeout.toMilliseconds(); 154 | this.currentMaxTimestamp = Long.MIN_VALUE; 155 | } 156 | 157 | public long getMaxOutOfOrdernessInMillis() { 158 | return maxOutOfOrderness; 159 | } 160 | 161 | @Override 162 | public final Watermark getCurrentWatermark() { 163 | 164 | // if last record was processed more than the idleTimeout in the past, consider this 165 | // source idle and set timestamp to current processing time 166 | long currentProcessingTime = System.currentTimeMillis(); 167 | if (lastRecordProcessingTime < currentProcessingTime - idleTimeout) { 168 | this.currentMaxTimestamp = currentProcessingTime; 169 | } 170 | 171 | long potentialWM = this.currentMaxTimestamp - maxOutOfOrderness; 172 | if (potentialWM >= lastEmittedWatermark) { 173 | lastEmittedWatermark = potentialWM; 174 | } 175 | return new Watermark(lastEmittedWatermark); 176 | } 177 | 178 | @Override 179 | public final long extractTimestamp(FakeKafkaRecord element, long previousElementTimestamp) { 180 | lastRecordProcessingTime = System.currentTimeMillis(); 181 | long timestamp = element.getTimestamp(); 182 | if (timestamp > currentMaxTimestamp) { 183 | currentMaxTimestamp = timestamp; 184 | } 185 | return timestamp; 186 | } 187 | } 188 | 189 | public static class MeasurementWindowAggregatingFunction 190 | extends ProcessWindowFunction { 191 | private static final long serialVersionUID = 1L; 192 | 193 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000; 194 | 195 | private transient DescriptiveStatisticsHistogram eventTimeLag; 196 | 197 | MeasurementWindowAggregatingFunction() { 198 | } 199 | 200 | @Override 201 | public void process( 202 | final String location, 203 | final Context context, 204 | final Iterable input, 205 | final Collector out) { 206 | 207 | WindowedMeasurements aggregate = new WindowedMeasurements(); 208 | for (JsonNode record : input) { 209 | double result = Double.parseDouble(record.get("value").asText()); 210 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result); 211 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1); 212 | } 213 | 214 | final TimeWindow window = context.window(); 215 | aggregate.setWindowStart(window.getStart()); 216 | aggregate.setWindowEnd(window.getEnd()); 217 | aggregate.setLocation(location); 218 | 219 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd()); 220 | out.collect(aggregate); 221 | } 222 | 223 | @Override 224 | public void open(Configuration parameters) throws Exception { 225 | super.open(parameters); 226 | 227 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag", 228 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE)); 229 | } 230 | } 231 | 232 | private static class ObjectMapperSingleton { 233 | static ObjectMapper getInstance() { 234 | ObjectMapper objectMapper = new ObjectMapper(); 235 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); 236 | return objectMapper; 237 | } 238 | } 239 | } 240 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/TroubledStreamingJobSolution31.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot; 2 | 3 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 4 | import org.apache.flink.api.java.utils.ParameterTool; 5 | import org.apache.flink.configuration.Configuration; 6 | import org.apache.flink.metrics.Counter; 7 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram; 8 | import org.apache.flink.streaming.api.TimeCharacteristic; 9 | import org.apache.flink.streaming.api.datastream.DataStream; 10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; 13 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink; 14 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; 15 | import org.apache.flink.streaming.api.watermark.Watermark; 16 | import org.apache.flink.streaming.api.windowing.time.Time; 17 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 18 | import org.apache.flink.util.Collector; 19 | import org.apache.flink.util.OutputTag; 20 | 21 | import com.fasterxml.jackson.databind.DeserializationFeature; 22 | import com.fasterxml.jackson.databind.JsonNode; 23 | import com.fasterxml.jackson.databind.ObjectMapper; 24 | import com.ververica.flinktraining.provided.troubleshoot.FakeKafkaRecord; 25 | import com.ververica.flinktraining.provided.troubleshoot.WindowedMeasurements; 26 | import com.ververica.flinktraining.provided.troubleshoot.SourceUtils; 27 | 28 | import java.io.IOException; 29 | import java.util.concurrent.TimeUnit; 30 | 31 | import static com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJobUtils.createConfiguredEnvironment; 32 | 33 | public class TroubledStreamingJobSolution31 { 34 | 35 | public static void main(String[] args) throws Exception { 36 | ParameterTool parameters = ParameterTool.fromArgs(args); 37 | 38 | final boolean local = parameters.getBoolean("local", false); 39 | 40 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local); 41 | 42 | //Time Characteristics 43 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 44 | env.getConfig().setAutoWatermarkInterval(100); 45 | 46 | //Checkpointing Configuration 47 | env.enableCheckpointing(5000); 48 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000); 49 | 50 | DataStream sourceStream = env 51 | .addSource(SourceUtils.createFakeKafkaSource()) 52 | .name("FakeKafkaSource") 53 | .uid("FakeKafkaSource") 54 | .assignTimestampsAndWatermarks( 55 | new MeasurementTSExtractor(Time.of(250, TimeUnit.MILLISECONDS), 56 | Time.of(1, TimeUnit.SECONDS))) 57 | .name("Watermarks") 58 | .uid("Watermarks") 59 | .flatMap(new MeasurementDeserializer()) 60 | .name("Deserialization") 61 | .uid("Deserialization"); 62 | 63 | OutputTag lateDataTag = new OutputTag("late-data") { 64 | private static final long serialVersionUID = 33513631677208956L; 65 | }; 66 | 67 | SingleOutputStreamOperator aggregatedPerLocation = sourceStream 68 | .keyBy(jsonNode -> jsonNode.get("location").asText()) 69 | .timeWindow(Time.of(1, TimeUnit.SECONDS)) 70 | .sideOutputLateData(lateDataTag) 71 | .process(new MeasurementWindowAggregatingFunction()) 72 | .name("WindowedAggregationPerLocation") 73 | .uid("WindowedAggregationPerLocation"); 74 | 75 | if (local) { 76 | aggregatedPerLocation.print() 77 | .name("NormalOutput") 78 | .uid("NormalOutput") 79 | .disableChaining(); 80 | aggregatedPerLocation.getSideOutput(lateDataTag).printToErr() 81 | .name("LateDataSink") 82 | .uid("LateDataSink") 83 | .disableChaining(); 84 | } else { 85 | aggregatedPerLocation.addSink(new DiscardingSink<>()) 86 | .name("NormalOutput") 87 | .uid("NormalOutput") 88 | .disableChaining(); 89 | aggregatedPerLocation.getSideOutput(lateDataTag).addSink(new DiscardingSink<>()) 90 | .name("LateDataSink") 91 | .uid("LateDataSink") 92 | .disableChaining(); 93 | } 94 | 95 | env.execute(TroubledStreamingJobSolution31.class.getSimpleName()); 96 | } 97 | 98 | /** 99 | * Deserializes the JSON Kafka message. 100 | */ 101 | public static class MeasurementDeserializer extends RichFlatMapFunction { 102 | private static final long serialVersionUID = 2L; 103 | 104 | private Counter numInvalidRecords; 105 | 106 | @Override 107 | public void open(final Configuration parameters) throws Exception { 108 | super.open(parameters); 109 | numInvalidRecords = getRuntimeContext().getMetricGroup().counter("numInvalidRecords"); 110 | } 111 | 112 | @Override 113 | public void flatMap(final FakeKafkaRecord kafkaRecord, final Collector out) { 114 | final JsonNode node; 115 | try { 116 | node = deserialize(kafkaRecord.getValue()); 117 | } catch (IOException e) { 118 | numInvalidRecords.inc(); 119 | return; 120 | } 121 | out.collect(node); 122 | } 123 | 124 | private JsonNode deserialize(final byte[] bytes) throws IOException { 125 | return ObjectMapperSingleton.getInstance().readValue(bytes, JsonNode.class); 126 | } 127 | } 128 | 129 | public static class MeasurementTSExtractor implements AssignerWithPeriodicWatermarks { 130 | private static final long serialVersionUID = 2L; 131 | 132 | private long currentMaxTimestamp; 133 | private long lastEmittedWatermark = Long.MIN_VALUE; 134 | private long lastRecordProcessingTime; 135 | 136 | private final long maxOutOfOrderness; 137 | private final long idleTimeout; 138 | 139 | MeasurementTSExtractor(Time maxOutOfOrderness, Time idleTimeout) { 140 | if (maxOutOfOrderness.toMilliseconds() < 0) { 141 | throw new RuntimeException("Tried to set the maximum allowed " + 142 | "lateness to " + maxOutOfOrderness + 143 | ". This parameter cannot be negative."); 144 | } 145 | 146 | if (idleTimeout.toMilliseconds() < 0) { 147 | throw new RuntimeException("Tried to set the idle Timeout" + idleTimeout + 148 | ". This parameter cannot be negative."); 149 | } 150 | 151 | 152 | this.maxOutOfOrderness = maxOutOfOrderness.toMilliseconds(); 153 | this.idleTimeout = idleTimeout.toMilliseconds(); 154 | this.currentMaxTimestamp = Long.MIN_VALUE; 155 | } 156 | 157 | public long getMaxOutOfOrdernessInMillis() { 158 | return maxOutOfOrderness; 159 | } 160 | 161 | @Override 162 | public final Watermark getCurrentWatermark() { 163 | 164 | // if last record was processed more than the idleTimeout in the past, consider this 165 | // source idle and set timestamp to current processing time 166 | long currentProcessingTime = System.currentTimeMillis(); 167 | if (lastRecordProcessingTime < currentProcessingTime - idleTimeout) { 168 | this.currentMaxTimestamp = currentProcessingTime; 169 | } 170 | 171 | long potentialWM = this.currentMaxTimestamp - maxOutOfOrderness; 172 | if (potentialWM >= lastEmittedWatermark) { 173 | lastEmittedWatermark = potentialWM; 174 | } 175 | return new Watermark(lastEmittedWatermark); 176 | } 177 | 178 | @Override 179 | public final long extractTimestamp(FakeKafkaRecord element, long previousElementTimestamp) { 180 | lastRecordProcessingTime = System.currentTimeMillis(); 181 | long timestamp = element.getTimestamp(); 182 | if (timestamp > currentMaxTimestamp) { 183 | currentMaxTimestamp = timestamp; 184 | } 185 | return timestamp; 186 | } 187 | } 188 | 189 | public static class MeasurementWindowAggregatingFunction 190 | extends ProcessWindowFunction { 191 | private static final long serialVersionUID = 1L; 192 | 193 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000; 194 | 195 | private transient DescriptiveStatisticsHistogram eventTimeLag; 196 | 197 | MeasurementWindowAggregatingFunction() { 198 | } 199 | 200 | @Override 201 | public void process( 202 | final String location, 203 | final Context context, 204 | final Iterable input, 205 | final Collector out) { 206 | 207 | WindowedMeasurements aggregate = new WindowedMeasurements(); 208 | for (JsonNode record : input) { 209 | double result = Double.parseDouble(record.get("value").asText()); 210 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result); 211 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1); 212 | } 213 | 214 | final TimeWindow window = context.window(); 215 | aggregate.setWindowStart(window.getStart()); 216 | aggregate.setWindowEnd(window.getEnd()); 217 | aggregate.setLocation(location); 218 | 219 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd()); 220 | out.collect(aggregate); 221 | } 222 | 223 | @Override 224 | public void open(Configuration parameters) throws Exception { 225 | super.open(parameters); 226 | 227 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag", 228 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE)); 229 | } 230 | } 231 | 232 | private static class ObjectMapperSingleton { 233 | static ObjectMapper getInstance() { 234 | ObjectMapper objectMapper = new ObjectMapper(); 235 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); 236 | return objectMapper; 237 | } 238 | } 239 | } 240 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/TroubledStreamingJobSolution32.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot; 2 | 3 | import org.apache.flink.api.common.functions.AggregateFunction; 4 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 5 | import org.apache.flink.api.java.utils.ParameterTool; 6 | import org.apache.flink.configuration.Configuration; 7 | import org.apache.flink.metrics.Counter; 8 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram; 9 | import org.apache.flink.streaming.api.TimeCharacteristic; 10 | import org.apache.flink.streaming.api.datastream.DataStream; 11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 13 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; 14 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink; 15 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; 16 | import org.apache.flink.streaming.api.watermark.Watermark; 17 | import org.apache.flink.streaming.api.windowing.time.Time; 18 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 19 | import org.apache.flink.util.Collector; 20 | import org.apache.flink.util.OutputTag; 21 | 22 | import com.fasterxml.jackson.databind.DeserializationFeature; 23 | import com.fasterxml.jackson.databind.JsonNode; 24 | import com.fasterxml.jackson.databind.ObjectMapper; 25 | import com.ververica.flinktraining.provided.troubleshoot.FakeKafkaRecord; 26 | import com.ververica.flinktraining.provided.troubleshoot.WindowedMeasurements; 27 | import com.ververica.flinktraining.provided.troubleshoot.SourceUtils; 28 | 29 | import java.io.IOException; 30 | import java.util.concurrent.TimeUnit; 31 | 32 | import static com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJobUtils.createConfiguredEnvironment; 33 | 34 | public class TroubledStreamingJobSolution32 { 35 | 36 | public static void main(String[] args) throws Exception { 37 | ParameterTool parameters = ParameterTool.fromArgs(args); 38 | 39 | final boolean local = parameters.getBoolean("local", false); 40 | 41 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local); 42 | 43 | //Time Characteristics 44 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 45 | env.getConfig().setAutoWatermarkInterval(100); 46 | 47 | //Checkpointing Configuration 48 | env.enableCheckpointing(5000); 49 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000); 50 | 51 | DataStream sourceStream = env 52 | .addSource(SourceUtils.createFakeKafkaSource()) 53 | .name("FakeKafkaSource") 54 | .uid("FakeKafkaSource") 55 | .assignTimestampsAndWatermarks( 56 | new MeasurementTSExtractor(Time.of(250, TimeUnit.MILLISECONDS), 57 | Time.of(1, TimeUnit.SECONDS))) 58 | .name("Watermarks") 59 | .uid("Watermarks") 60 | .flatMap(new MeasurementDeserializer()) 61 | .name("Deserialization") 62 | .uid("Deserialization"); 63 | 64 | OutputTag lateDataTag = new OutputTag("late-data") { 65 | private static final long serialVersionUID = 33513631677208956L; 66 | }; 67 | 68 | SingleOutputStreamOperator aggregatedPerLocation = sourceStream 69 | .keyBy(jsonNode -> jsonNode.get("location").asText()) 70 | .timeWindow(Time.of(1, TimeUnit.SECONDS)) 71 | .sideOutputLateData(lateDataTag) 72 | .aggregate(new MeasurementWindowAggregatingFunction(), 73 | new MeasurementWindowProcessFunction()) 74 | .name("WindowedAggregationPerLocation") 75 | .uid("WindowedAggregationPerLocation"); 76 | 77 | if (local) { 78 | aggregatedPerLocation.print() 79 | .name("NormalOutput") 80 | .uid("NormalOutput") 81 | .disableChaining(); 82 | aggregatedPerLocation.getSideOutput(lateDataTag).printToErr() 83 | .name("LateDataSink") 84 | .uid("LateDataSink") 85 | .disableChaining(); 86 | } else { 87 | aggregatedPerLocation.addSink(new DiscardingSink<>()) 88 | .name("NormalOutput") 89 | .uid("NormalOutput") 90 | .disableChaining(); 91 | aggregatedPerLocation.getSideOutput(lateDataTag).addSink(new DiscardingSink<>()) 92 | .name("LateDataSink") 93 | .uid("LateDataSink") 94 | .disableChaining(); 95 | } 96 | 97 | env.execute(TroubledStreamingJobSolution32.class.getSimpleName()); 98 | } 99 | 100 | /** 101 | * Deserializes the JSON Kafka message. 102 | */ 103 | public static class MeasurementDeserializer extends RichFlatMapFunction { 104 | private static final long serialVersionUID = 2L; 105 | 106 | private Counter numInvalidRecords; 107 | 108 | @Override 109 | public void open(final Configuration parameters) throws Exception { 110 | super.open(parameters); 111 | numInvalidRecords = getRuntimeContext().getMetricGroup().counter("numInvalidRecords"); 112 | } 113 | 114 | @Override 115 | public void flatMap(final FakeKafkaRecord kafkaRecord, final Collector out) { 116 | final JsonNode node; 117 | try { 118 | node = deserialize(kafkaRecord.getValue()); 119 | } catch (IOException e) { 120 | numInvalidRecords.inc(); 121 | return; 122 | } 123 | out.collect(node); 124 | } 125 | 126 | private JsonNode deserialize(final byte[] bytes) throws IOException { 127 | return ObjectMapperSingleton.getInstance().readValue(bytes, JsonNode.class); 128 | } 129 | } 130 | 131 | public static class MeasurementTSExtractor implements AssignerWithPeriodicWatermarks { 132 | private static final long serialVersionUID = 2L; 133 | 134 | private long currentMaxTimestamp; 135 | private long lastEmittedWatermark = Long.MIN_VALUE; 136 | private long lastRecordProcessingTime; 137 | 138 | private final long maxOutOfOrderness; 139 | private final long idleTimeout; 140 | 141 | MeasurementTSExtractor(Time maxOutOfOrderness, Time idleTimeout) { 142 | if (maxOutOfOrderness.toMilliseconds() < 0) { 143 | throw new RuntimeException("Tried to set the maximum allowed " + 144 | "lateness to " + maxOutOfOrderness + 145 | ". This parameter cannot be negative."); 146 | } 147 | 148 | if (idleTimeout.toMilliseconds() < 0) { 149 | throw new RuntimeException("Tried to set the idle Timeout" + idleTimeout + 150 | ". This parameter cannot be negative."); 151 | } 152 | 153 | 154 | this.maxOutOfOrderness = maxOutOfOrderness.toMilliseconds(); 155 | this.idleTimeout = idleTimeout.toMilliseconds(); 156 | this.currentMaxTimestamp = Long.MIN_VALUE; 157 | } 158 | 159 | public long getMaxOutOfOrdernessInMillis() { 160 | return maxOutOfOrderness; 161 | } 162 | 163 | @Override 164 | public final Watermark getCurrentWatermark() { 165 | 166 | // if last record was processed more than the idleTimeout in the past, consider this 167 | // source idle and set timestamp to current processing time 168 | long currentProcessingTime = System.currentTimeMillis(); 169 | if (lastRecordProcessingTime < currentProcessingTime - idleTimeout) { 170 | this.currentMaxTimestamp = currentProcessingTime; 171 | } 172 | 173 | long potentialWM = this.currentMaxTimestamp - maxOutOfOrderness; 174 | if (potentialWM >= lastEmittedWatermark) { 175 | lastEmittedWatermark = potentialWM; 176 | } 177 | return new Watermark(lastEmittedWatermark); 178 | } 179 | 180 | @Override 181 | public final long extractTimestamp(FakeKafkaRecord element, long previousElementTimestamp) { 182 | lastRecordProcessingTime = System.currentTimeMillis(); 183 | long timestamp = element.getTimestamp(); 184 | if (timestamp > currentMaxTimestamp) { 185 | currentMaxTimestamp = timestamp; 186 | } 187 | return timestamp; 188 | } 189 | } 190 | 191 | public static class MeasurementWindowAggregatingFunction 192 | implements AggregateFunction { 193 | private static final long serialVersionUID = 2L; 194 | 195 | MeasurementWindowAggregatingFunction() {} 196 | 197 | @Override 198 | public WindowedMeasurements createAccumulator() { 199 | return new WindowedMeasurements(); 200 | } 201 | 202 | @Override 203 | public WindowedMeasurements add(final JsonNode record, final WindowedMeasurements aggregate) { 204 | double result = Double.parseDouble(record.get("value").asText()); 205 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result); 206 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1); 207 | return aggregate; 208 | } 209 | 210 | @Override 211 | public WindowedMeasurements getResult(final WindowedMeasurements windowedMeasurements) { 212 | return windowedMeasurements; 213 | } 214 | 215 | @Override 216 | public WindowedMeasurements merge(final WindowedMeasurements agg1, final WindowedMeasurements agg2) { 217 | agg2.setEventsPerWindow(agg1.getEventsPerWindow() + agg2.getEventsPerWindow()); 218 | agg2.setSumPerWindow(agg1.getSumPerWindow() + agg2.getSumPerWindow()); 219 | return agg2; 220 | } 221 | } 222 | 223 | public static class MeasurementWindowProcessFunction 224 | extends ProcessWindowFunction { 225 | private static final long serialVersionUID = 1L; 226 | 227 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000; 228 | 229 | private transient DescriptiveStatisticsHistogram eventTimeLag; 230 | 231 | MeasurementWindowProcessFunction() { 232 | } 233 | 234 | @Override 235 | public void process( 236 | final String location, 237 | final Context context, 238 | final Iterable input, 239 | final Collector out) { 240 | 241 | // Windows with pre-aggregation only forward the final to the WindowFunction 242 | WindowedMeasurements aggregate = input.iterator().next(); 243 | 244 | final TimeWindow window = context.window(); 245 | aggregate.setWindowStart(window.getStart()); 246 | aggregate.setWindowEnd(window.getEnd()); 247 | aggregate.setLocation(location); 248 | 249 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd()); 250 | out.collect(aggregate); 251 | } 252 | 253 | @Override 254 | public void open(Configuration parameters) throws Exception { 255 | super.open(parameters); 256 | 257 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag", 258 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE)); 259 | } 260 | } 261 | 262 | private static class ObjectMapperSingleton { 263 | static ObjectMapper getInstance() { 264 | ObjectMapper objectMapper = new ObjectMapper(); 265 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); 266 | return objectMapper; 267 | } 268 | } 269 | } 270 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/TroubledStreamingJobSolution33.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot; 2 | 3 | import org.apache.flink.api.common.functions.AggregateFunction; 4 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 5 | import org.apache.flink.api.java.utils.ParameterTool; 6 | import org.apache.flink.configuration.Configuration; 7 | import org.apache.flink.metrics.Counter; 8 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram; 9 | import org.apache.flink.streaming.api.TimeCharacteristic; 10 | import org.apache.flink.streaming.api.datastream.DataStream; 11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 13 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; 14 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink; 15 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; 16 | import org.apache.flink.streaming.api.watermark.Watermark; 17 | import org.apache.flink.streaming.api.windowing.time.Time; 18 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 19 | import org.apache.flink.util.Collector; 20 | import org.apache.flink.util.OutputTag; 21 | 22 | import com.fasterxml.jackson.databind.DeserializationFeature; 23 | import com.fasterxml.jackson.databind.JsonNode; 24 | import com.fasterxml.jackson.databind.ObjectMapper; 25 | import com.ververica.flinktraining.provided.troubleshoot.FakeKafkaRecord; 26 | import com.ververica.flinktraining.provided.troubleshoot.WindowedMeasurements; 27 | import com.ververica.flinktraining.provided.troubleshoot.SourceUtils; 28 | 29 | import java.io.IOException; 30 | import java.util.concurrent.TimeUnit; 31 | 32 | import static com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJobUtils.createConfiguredEnvironment; 33 | 34 | public class TroubledStreamingJobSolution33 { 35 | 36 | public static void main(String[] args) throws Exception { 37 | ParameterTool parameters = ParameterTool.fromArgs(args); 38 | 39 | final boolean local = parameters.getBoolean("local", false); 40 | 41 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local); 42 | 43 | //Time Characteristics 44 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 45 | env.getConfig().setAutoWatermarkInterval(100); 46 | env.setBufferTimeout(10); 47 | 48 | //Checkpointing Configuration 49 | env.enableCheckpointing(5000); 50 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000); 51 | 52 | DataStream sourceStream = env 53 | .addSource(SourceUtils.createFakeKafkaSource()) 54 | .name("FakeKafkaSource") 55 | .uid("FakeKafkaSource") 56 | .assignTimestampsAndWatermarks( 57 | new MeasurementTSExtractor(Time.of(250, TimeUnit.MILLISECONDS), 58 | Time.of(1, TimeUnit.SECONDS))) 59 | .name("Watermarks") 60 | .uid("Watermarks") 61 | .flatMap(new MeasurementDeserializer()) 62 | .name("Deserialization") 63 | .uid("Deserialization"); 64 | 65 | OutputTag lateDataTag = new OutputTag("late-data") { 66 | private static final long serialVersionUID = 33513631677208956L; 67 | }; 68 | 69 | SingleOutputStreamOperator aggregatedPerLocation = sourceStream 70 | .keyBy(jsonNode -> jsonNode.get("location").asText()) 71 | .timeWindow(Time.of(1, TimeUnit.SECONDS)) 72 | .sideOutputLateData(lateDataTag) 73 | .aggregate(new MeasurementWindowAggregatingFunction(), 74 | new MeasurementWindowProcessFunction()) 75 | .name("WindowedAggregationPerLocation") 76 | .uid("WindowedAggregationPerLocation"); 77 | 78 | if (local) { 79 | aggregatedPerLocation.print() 80 | .name("NormalOutput") 81 | .uid("NormalOutput") 82 | .disableChaining(); 83 | aggregatedPerLocation.getSideOutput(lateDataTag).printToErr() 84 | .name("LateDataSink") 85 | .uid("LateDataSink") 86 | .disableChaining(); 87 | } else { 88 | aggregatedPerLocation.addSink(new DiscardingSink<>()) 89 | .name("NormalOutput") 90 | .uid("NormalOutput") 91 | .disableChaining(); 92 | aggregatedPerLocation.getSideOutput(lateDataTag).addSink(new DiscardingSink<>()) 93 | .name("LateDataSink") 94 | .uid("LateDataSink") 95 | .disableChaining(); 96 | } 97 | 98 | env.execute(TroubledStreamingJobSolution33.class.getSimpleName()); 99 | } 100 | 101 | /** 102 | * Deserializes the JSON Kafka message. 103 | */ 104 | public static class MeasurementDeserializer extends RichFlatMapFunction { 105 | private static final long serialVersionUID = 2L; 106 | 107 | private Counter numInvalidRecords; 108 | 109 | @Override 110 | public void open(final Configuration parameters) throws Exception { 111 | super.open(parameters); 112 | numInvalidRecords = getRuntimeContext().getMetricGroup().counter("numInvalidRecords"); 113 | } 114 | 115 | @Override 116 | public void flatMap(final FakeKafkaRecord kafkaRecord, final Collector out) { 117 | final JsonNode node; 118 | try { 119 | node = deserialize(kafkaRecord.getValue()); 120 | } catch (IOException e) { 121 | numInvalidRecords.inc(); 122 | return; 123 | } 124 | out.collect(node); 125 | } 126 | 127 | private JsonNode deserialize(final byte[] bytes) throws IOException { 128 | return ObjectMapperSingleton.getInstance().readValue(bytes, JsonNode.class); 129 | } 130 | } 131 | 132 | public static class MeasurementTSExtractor implements AssignerWithPeriodicWatermarks { 133 | private static final long serialVersionUID = 2L; 134 | 135 | private long currentMaxTimestamp; 136 | private long lastEmittedWatermark = Long.MIN_VALUE; 137 | private long lastRecordProcessingTime; 138 | 139 | private final long maxOutOfOrderness; 140 | private final long idleTimeout; 141 | 142 | MeasurementTSExtractor(Time maxOutOfOrderness, Time idleTimeout) { 143 | if (maxOutOfOrderness.toMilliseconds() < 0) { 144 | throw new RuntimeException("Tried to set the maximum allowed " + 145 | "lateness to " + maxOutOfOrderness + 146 | ". This parameter cannot be negative."); 147 | } 148 | 149 | if (idleTimeout.toMilliseconds() < 0) { 150 | throw new RuntimeException("Tried to set the idle Timeout" + idleTimeout + 151 | ". This parameter cannot be negative."); 152 | } 153 | 154 | 155 | this.maxOutOfOrderness = maxOutOfOrderness.toMilliseconds(); 156 | this.idleTimeout = idleTimeout.toMilliseconds(); 157 | this.currentMaxTimestamp = Long.MIN_VALUE; 158 | } 159 | 160 | public long getMaxOutOfOrdernessInMillis() { 161 | return maxOutOfOrderness; 162 | } 163 | 164 | @Override 165 | public final Watermark getCurrentWatermark() { 166 | 167 | // if last record was processed more than the idleTimeout in the past, consider this 168 | // source idle and set timestamp to current processing time 169 | long currentProcessingTime = System.currentTimeMillis(); 170 | if (lastRecordProcessingTime < currentProcessingTime - idleTimeout) { 171 | this.currentMaxTimestamp = currentProcessingTime; 172 | } 173 | 174 | long potentialWM = this.currentMaxTimestamp - maxOutOfOrderness; 175 | if (potentialWM >= lastEmittedWatermark) { 176 | lastEmittedWatermark = potentialWM; 177 | } 178 | return new Watermark(lastEmittedWatermark); 179 | } 180 | 181 | @Override 182 | public final long extractTimestamp(FakeKafkaRecord element, long previousElementTimestamp) { 183 | lastRecordProcessingTime = System.currentTimeMillis(); 184 | long timestamp = element.getTimestamp(); 185 | if (timestamp > currentMaxTimestamp) { 186 | currentMaxTimestamp = timestamp; 187 | } 188 | return timestamp; 189 | } 190 | } 191 | 192 | public static class MeasurementWindowAggregatingFunction 193 | implements AggregateFunction { 194 | private static final long serialVersionUID = 2L; 195 | 196 | public MeasurementWindowAggregatingFunction() {} 197 | 198 | @Override 199 | public WindowedMeasurements createAccumulator() { 200 | return new WindowedMeasurements(); 201 | } 202 | 203 | @Override 204 | public WindowedMeasurements add(final JsonNode record, final WindowedMeasurements aggregate) { 205 | double result = Double.parseDouble(record.get("value").asText()); 206 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result); 207 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1); 208 | return aggregate; 209 | } 210 | 211 | @Override 212 | public WindowedMeasurements getResult(final WindowedMeasurements windowedMeasurements) { 213 | return windowedMeasurements; 214 | } 215 | 216 | @Override 217 | public WindowedMeasurements merge(final WindowedMeasurements agg1, final WindowedMeasurements agg2) { 218 | agg2.setEventsPerWindow(agg1.getEventsPerWindow() + agg2.getEventsPerWindow()); 219 | agg2.setSumPerWindow(agg1.getSumPerWindow() + agg2.getSumPerWindow()); 220 | return agg2; 221 | } 222 | } 223 | 224 | public static class MeasurementWindowProcessFunction 225 | extends ProcessWindowFunction { 226 | private static final long serialVersionUID = 1L; 227 | 228 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000; 229 | 230 | private transient DescriptiveStatisticsHistogram eventTimeLag; 231 | 232 | MeasurementWindowProcessFunction() { 233 | } 234 | 235 | @Override 236 | public void process( 237 | final String location, 238 | final Context context, 239 | final Iterable input, 240 | final Collector out) { 241 | 242 | // Windows with pre-aggregation only forward the final to the WindowFunction 243 | WindowedMeasurements aggregate = input.iterator().next(); 244 | 245 | final TimeWindow window = context.window(); 246 | aggregate.setWindowStart(window.getStart()); 247 | aggregate.setWindowEnd(window.getEnd()); 248 | aggregate.setLocation(location); 249 | 250 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd()); 251 | out.collect(aggregate); 252 | } 253 | 254 | @Override 255 | public void open(Configuration parameters) throws Exception { 256 | super.open(parameters); 257 | 258 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag", 259 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE)); 260 | } 261 | } 262 | 263 | private static class ObjectMapperSingleton { 264 | static ObjectMapper getInstance() { 265 | ObjectMapper objectMapper = new ObjectMapper(); 266 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); 267 | return objectMapper; 268 | } 269 | } 270 | } 271 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/TroubledStreamingJobSolution41.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot; 2 | 3 | import org.apache.flink.api.common.functions.AggregateFunction; 4 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 5 | import org.apache.flink.api.java.utils.ParameterTool; 6 | import org.apache.flink.configuration.Configuration; 7 | import org.apache.flink.metrics.Counter; 8 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram; 9 | import org.apache.flink.streaming.api.TimeCharacteristic; 10 | import org.apache.flink.streaming.api.datastream.DataStream; 11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 13 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; 14 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink; 15 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; 16 | import org.apache.flink.streaming.api.watermark.Watermark; 17 | import org.apache.flink.streaming.api.windowing.time.Time; 18 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 19 | import org.apache.flink.util.Collector; 20 | import org.apache.flink.util.OutputTag; 21 | 22 | import com.fasterxml.jackson.databind.DeserializationFeature; 23 | import com.fasterxml.jackson.databind.ObjectMapper; 24 | import com.ververica.flinktraining.provided.troubleshoot.FakeKafkaRecord; 25 | import com.ververica.flinktraining.provided.troubleshoot.Measurement; 26 | import com.ververica.flinktraining.provided.troubleshoot.WindowedMeasurements; 27 | import com.ververica.flinktraining.provided.troubleshoot.SourceUtils; 28 | 29 | import java.io.IOException; 30 | import java.util.concurrent.TimeUnit; 31 | 32 | import static com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJobUtils.createConfiguredEnvironment; 33 | 34 | public class TroubledStreamingJobSolution41 { 35 | 36 | public static void main(String[] args) throws Exception { 37 | ParameterTool parameters = ParameterTool.fromArgs(args); 38 | 39 | final boolean local = parameters.getBoolean("local", false); 40 | 41 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local); 42 | 43 | //Time Characteristics 44 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 45 | env.getConfig().setAutoWatermarkInterval(100); 46 | env.setBufferTimeout(10); 47 | 48 | //Checkpointing Configuration 49 | env.enableCheckpointing(5000); 50 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000); 51 | 52 | DataStream sourceStream = env 53 | .addSource(SourceUtils.createFakeKafkaSource()) 54 | .name("FakeKafkaSource") 55 | .uid("FakeKafkaSource") 56 | .assignTimestampsAndWatermarks( 57 | new MeasurementTSExtractor(Time.of(250, TimeUnit.MILLISECONDS), 58 | Time.of(1, TimeUnit.SECONDS))) 59 | .name("Watermarks") 60 | .uid("Watermarks") 61 | .flatMap(new MeasurementDeserializer()) 62 | .name("Deserialization") 63 | .uid("Deserialization"); 64 | 65 | OutputTag lateDataTag = new OutputTag("late-data") { 66 | private static final long serialVersionUID = 33513631677208956L; 67 | }; 68 | 69 | SingleOutputStreamOperator aggregatedPerLocation = sourceStream 70 | .keyBy(Measurement::getLocation) 71 | .timeWindow(Time.of(1, TimeUnit.SECONDS)) 72 | .sideOutputLateData(lateDataTag) 73 | .aggregate(new MeasurementWindowAggregatingFunction(), 74 | new MeasurementWindowProcessFunction()) 75 | .name("WindowedAggregationPerLocation") 76 | .uid("WindowedAggregationPerLocation"); 77 | 78 | if (local) { 79 | aggregatedPerLocation.print() 80 | .name("NormalOutput") 81 | .uid("NormalOutput") 82 | .disableChaining(); 83 | aggregatedPerLocation.getSideOutput(lateDataTag).printToErr() 84 | .name("LateDataSink") 85 | .uid("LateDataSink") 86 | .disableChaining(); 87 | } else { 88 | aggregatedPerLocation.addSink(new DiscardingSink<>()) 89 | .name("NormalOutput") 90 | .uid("NormalOutput") 91 | .disableChaining(); 92 | aggregatedPerLocation.getSideOutput(lateDataTag).addSink(new DiscardingSink<>()) 93 | .name("LateDataSink") 94 | .uid("LateDataSink") 95 | .disableChaining(); 96 | } 97 | 98 | env.execute(TroubledStreamingJobSolution41.class.getSimpleName()); 99 | } 100 | 101 | /** 102 | * Deserializes the JSON Kafka message. 103 | */ 104 | public static class MeasurementDeserializer extends RichFlatMapFunction { 105 | private static final long serialVersionUID = 3L; 106 | 107 | private Counter numInvalidRecords; 108 | 109 | @Override 110 | public void open(final Configuration parameters) throws Exception { 111 | super.open(parameters); 112 | numInvalidRecords = getRuntimeContext().getMetricGroup().counter("numInvalidRecords"); 113 | } 114 | 115 | @Override 116 | public void flatMap(final FakeKafkaRecord kafkaRecord, final Collector out) { 117 | final Measurement node; 118 | try { 119 | node = deserialize(kafkaRecord.getValue()); 120 | } catch (IOException e) { 121 | numInvalidRecords.inc(); 122 | return; 123 | } 124 | out.collect(node); 125 | } 126 | 127 | private Measurement deserialize(final byte[] bytes) throws IOException { 128 | return ObjectMapperSingleton.getInstance().readValue(bytes, Measurement.class); 129 | } 130 | } 131 | 132 | public static class MeasurementTSExtractor implements AssignerWithPeriodicWatermarks { 133 | private static final long serialVersionUID = 2L; 134 | 135 | private long currentMaxTimestamp; 136 | private long lastEmittedWatermark = Long.MIN_VALUE; 137 | private long lastRecordProcessingTime; 138 | 139 | private final long maxOutOfOrderness; 140 | private final long idleTimeout; 141 | 142 | MeasurementTSExtractor(Time maxOutOfOrderness, Time idleTimeout) { 143 | if (maxOutOfOrderness.toMilliseconds() < 0) { 144 | throw new RuntimeException("Tried to set the maximum allowed " + 145 | "lateness to " + maxOutOfOrderness + 146 | ". This parameter cannot be negative."); 147 | } 148 | 149 | if (idleTimeout.toMilliseconds() < 0) { 150 | throw new RuntimeException("Tried to set the idle Timeout" + idleTimeout + 151 | ". This parameter cannot be negative."); 152 | } 153 | 154 | 155 | this.maxOutOfOrderness = maxOutOfOrderness.toMilliseconds(); 156 | this.idleTimeout = idleTimeout.toMilliseconds(); 157 | this.currentMaxTimestamp = Long.MIN_VALUE; 158 | } 159 | 160 | public long getMaxOutOfOrdernessInMillis() { 161 | return maxOutOfOrderness; 162 | } 163 | 164 | @Override 165 | public final Watermark getCurrentWatermark() { 166 | 167 | // if last record was processed more than the idleTimeout in the past, consider this 168 | // source idle and set timestamp to current processing time 169 | long currentProcessingTime = System.currentTimeMillis(); 170 | if (lastRecordProcessingTime < currentProcessingTime - idleTimeout) { 171 | this.currentMaxTimestamp = currentProcessingTime; 172 | } 173 | 174 | long potentialWM = this.currentMaxTimestamp - maxOutOfOrderness; 175 | if (potentialWM >= lastEmittedWatermark) { 176 | lastEmittedWatermark = potentialWM; 177 | } 178 | return new Watermark(lastEmittedWatermark); 179 | } 180 | 181 | @Override 182 | public final long extractTimestamp(FakeKafkaRecord element, long previousElementTimestamp) { 183 | lastRecordProcessingTime = System.currentTimeMillis(); 184 | long timestamp = element.getTimestamp(); 185 | if (timestamp > currentMaxTimestamp) { 186 | currentMaxTimestamp = timestamp; 187 | } 188 | return timestamp; 189 | } 190 | } 191 | 192 | public static class MeasurementWindowAggregatingFunction 193 | implements AggregateFunction { 194 | private static final long serialVersionUID = -1083906142198231377L; 195 | 196 | public MeasurementWindowAggregatingFunction() {} 197 | 198 | @Override 199 | public WindowedMeasurements createAccumulator() { 200 | return new WindowedMeasurements(); 201 | } 202 | 203 | @Override 204 | public WindowedMeasurements add(final Measurement record, final WindowedMeasurements aggregate) { 205 | double result = record.getValue(); 206 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result); 207 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1); 208 | return aggregate; 209 | } 210 | 211 | @Override 212 | public WindowedMeasurements getResult(final WindowedMeasurements windowedMeasurements) { 213 | return windowedMeasurements; 214 | } 215 | 216 | @Override 217 | public WindowedMeasurements merge(final WindowedMeasurements agg1, final WindowedMeasurements agg2) { 218 | agg2.setEventsPerWindow(agg1.getEventsPerWindow() + agg2.getEventsPerWindow()); 219 | agg2.setSumPerWindow(agg1.getSumPerWindow() + agg2.getSumPerWindow()); 220 | return agg2; 221 | } 222 | } 223 | 224 | public static class MeasurementWindowProcessFunction 225 | extends ProcessWindowFunction { 226 | private static final long serialVersionUID = 1L; 227 | 228 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000; 229 | 230 | private transient DescriptiveStatisticsHistogram eventTimeLag; 231 | 232 | MeasurementWindowProcessFunction() { 233 | } 234 | 235 | @Override 236 | public void process( 237 | final String location, 238 | final Context context, 239 | final Iterable input, 240 | final Collector out) { 241 | 242 | // Windows with pre-aggregation only forward the final to the WindowFunction 243 | WindowedMeasurements aggregate = input.iterator().next(); 244 | 245 | final TimeWindow window = context.window(); 246 | aggregate.setWindowStart(window.getStart()); 247 | aggregate.setWindowEnd(window.getEnd()); 248 | aggregate.setLocation(location); 249 | 250 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd()); 251 | out.collect(aggregate); 252 | } 253 | 254 | @Override 255 | public void open(Configuration parameters) throws Exception { 256 | super.open(parameters); 257 | 258 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag", 259 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE)); 260 | } 261 | } 262 | 263 | private static class ObjectMapperSingleton { 264 | static ObjectMapper getInstance() { 265 | ObjectMapper objectMapper = new ObjectMapper(); 266 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); 267 | return objectMapper; 268 | } 269 | } 270 | } 271 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/TroubledStreamingJobSolution42.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot; 2 | 3 | import org.apache.flink.api.common.functions.AggregateFunction; 4 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 5 | import org.apache.flink.api.java.utils.ParameterTool; 6 | import org.apache.flink.configuration.Configuration; 7 | import org.apache.flink.metrics.Counter; 8 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram; 9 | import org.apache.flink.streaming.api.TimeCharacteristic; 10 | import org.apache.flink.streaming.api.datastream.DataStream; 11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 13 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; 14 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink; 15 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; 16 | import org.apache.flink.streaming.api.watermark.Watermark; 17 | import org.apache.flink.streaming.api.windowing.time.Time; 18 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 19 | import org.apache.flink.util.Collector; 20 | import org.apache.flink.util.OutputTag; 21 | 22 | import com.fasterxml.jackson.databind.DeserializationFeature; 23 | import com.fasterxml.jackson.databind.ObjectMapper; 24 | import com.ververica.flinktraining.provided.troubleshoot.FakeKafkaRecord; 25 | import com.ververica.flinktraining.provided.troubleshoot.Measurement; 26 | import com.ververica.flinktraining.provided.troubleshoot.WindowedMeasurements; 27 | import com.ververica.flinktraining.provided.troubleshoot.SourceUtils; 28 | 29 | import java.io.IOException; 30 | import java.util.concurrent.TimeUnit; 31 | 32 | import static com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJobUtils.createConfiguredEnvironment; 33 | 34 | public class TroubledStreamingJobSolution42 { 35 | 36 | public static void main(String[] args) throws Exception { 37 | ParameterTool parameters = ParameterTool.fromArgs(args); 38 | 39 | final boolean local = parameters.getBoolean("local", false); 40 | 41 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local); 42 | 43 | //Time Characteristics 44 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 45 | env.getConfig().setAutoWatermarkInterval(100); 46 | env.setBufferTimeout(10); 47 | 48 | //Checkpointing Configuration 49 | env.enableCheckpointing(5000); 50 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000); 51 | 52 | DataStream sourceStream = env 53 | .addSource(SourceUtils.createFakeKafkaSource()) 54 | .name("FakeKafkaSource") 55 | .uid("FakeKafkaSource") 56 | .assignTimestampsAndWatermarks( 57 | new MeasurementTSExtractor(Time.of(250, TimeUnit.MILLISECONDS), 58 | Time.of(1, TimeUnit.SECONDS))) 59 | .name("Watermarks") 60 | .uid("Watermarks") 61 | .flatMap(new MeasurementDeserializer()) 62 | .name("Deserialization") 63 | .uid("Deserialization"); 64 | 65 | OutputTag lateDataTag = new OutputTag("late-data") { 66 | private static final long serialVersionUID = 33513631677208956L; 67 | }; 68 | 69 | SingleOutputStreamOperator aggregatedPerLocation = sourceStream 70 | .keyBy(Measurement::getLocation) 71 | .timeWindow(Time.of(1, TimeUnit.SECONDS)) 72 | .sideOutputLateData(lateDataTag) 73 | .aggregate(new MeasurementWindowAggregatingFunction(), 74 | new MeasurementWindowProcessFunction()) 75 | .name("WindowedAggregationPerLocation") 76 | .uid("WindowedAggregationPerLocation"); 77 | 78 | if (local) { 79 | aggregatedPerLocation.print() 80 | .name("NormalOutput") 81 | .uid("NormalOutput") 82 | .disableChaining(); 83 | aggregatedPerLocation.getSideOutput(lateDataTag).printToErr() 84 | .name("LateDataSink") 85 | .uid("LateDataSink") 86 | .disableChaining(); 87 | } else { 88 | aggregatedPerLocation.addSink(new DiscardingSink<>()) 89 | .name("NormalOutput") 90 | .uid("NormalOutput") 91 | .disableChaining(); 92 | aggregatedPerLocation.getSideOutput(lateDataTag).addSink(new DiscardingSink<>()) 93 | .name("LateDataSink") 94 | .uid("LateDataSink") 95 | .disableChaining(); 96 | } 97 | 98 | env.execute(TroubledStreamingJobSolution42.class.getSimpleName()); 99 | } 100 | 101 | /** 102 | * Deserializes the JSON Kafka message. 103 | */ 104 | public static class MeasurementDeserializer extends RichFlatMapFunction { 105 | private static final long serialVersionUID = 3L; 106 | 107 | private Counter numInvalidRecords; 108 | private transient ObjectMapper instance; 109 | 110 | @Override 111 | public void open(final Configuration parameters) throws Exception { 112 | super.open(parameters); 113 | numInvalidRecords = getRuntimeContext().getMetricGroup().counter("numInvalidRecords"); 114 | instance = createObjectMapper(); 115 | } 116 | 117 | @Override 118 | public void flatMap(final FakeKafkaRecord kafkaRecord, final Collector out) { 119 | final Measurement node; 120 | try { 121 | node = deserialize(kafkaRecord.getValue()); 122 | } catch (IOException e) { 123 | numInvalidRecords.inc(); 124 | return; 125 | } 126 | out.collect(node); 127 | } 128 | 129 | private Measurement deserialize(final byte[] bytes) throws IOException { 130 | return instance.readValue(bytes, Measurement.class); 131 | } 132 | } 133 | 134 | public static class MeasurementTSExtractor implements AssignerWithPeriodicWatermarks { 135 | private static final long serialVersionUID = 2L; 136 | 137 | private long currentMaxTimestamp; 138 | private long lastEmittedWatermark = Long.MIN_VALUE; 139 | private long lastRecordProcessingTime; 140 | 141 | private final long maxOutOfOrderness; 142 | private final long idleTimeout; 143 | 144 | MeasurementTSExtractor(Time maxOutOfOrderness, Time idleTimeout) { 145 | if (maxOutOfOrderness.toMilliseconds() < 0) { 146 | throw new RuntimeException("Tried to set the maximum allowed " + 147 | "lateness to " + maxOutOfOrderness + 148 | ". This parameter cannot be negative."); 149 | } 150 | 151 | if (idleTimeout.toMilliseconds() < 0) { 152 | throw new RuntimeException("Tried to set the idle Timeout" + idleTimeout + 153 | ". This parameter cannot be negative."); 154 | } 155 | 156 | 157 | this.maxOutOfOrderness = maxOutOfOrderness.toMilliseconds(); 158 | this.idleTimeout = idleTimeout.toMilliseconds(); 159 | this.currentMaxTimestamp = Long.MIN_VALUE; 160 | } 161 | 162 | public long getMaxOutOfOrdernessInMillis() { 163 | return maxOutOfOrderness; 164 | } 165 | 166 | @Override 167 | public final Watermark getCurrentWatermark() { 168 | 169 | // if last record was processed more than the idleTimeout in the past, consider this 170 | // source idle and set timestamp to current processing time 171 | long currentProcessingTime = System.currentTimeMillis(); 172 | if (lastRecordProcessingTime < currentProcessingTime - idleTimeout) { 173 | this.currentMaxTimestamp = currentProcessingTime; 174 | } 175 | 176 | long potentialWM = this.currentMaxTimestamp - maxOutOfOrderness; 177 | if (potentialWM >= lastEmittedWatermark) { 178 | lastEmittedWatermark = potentialWM; 179 | } 180 | return new Watermark(lastEmittedWatermark); 181 | } 182 | 183 | @Override 184 | public final long extractTimestamp(FakeKafkaRecord element, long previousElementTimestamp) { 185 | lastRecordProcessingTime = System.currentTimeMillis(); 186 | long timestamp = element.getTimestamp(); 187 | if (timestamp > currentMaxTimestamp) { 188 | currentMaxTimestamp = timestamp; 189 | } 190 | return timestamp; 191 | } 192 | } 193 | 194 | public static class MeasurementWindowAggregatingFunction 195 | implements AggregateFunction { 196 | private static final long serialVersionUID = -1083906142198231377L; 197 | 198 | public MeasurementWindowAggregatingFunction() {} 199 | 200 | @Override 201 | public WindowedMeasurements createAccumulator() { 202 | return new WindowedMeasurements(); 203 | } 204 | 205 | @Override 206 | public WindowedMeasurements add(final Measurement record, final WindowedMeasurements aggregate) { 207 | double result = record.getValue(); 208 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result); 209 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1); 210 | return aggregate; 211 | } 212 | 213 | @Override 214 | public WindowedMeasurements getResult(final WindowedMeasurements windowedMeasurements) { 215 | return windowedMeasurements; 216 | } 217 | 218 | @Override 219 | public WindowedMeasurements merge(final WindowedMeasurements agg1, final WindowedMeasurements agg2) { 220 | agg2.setEventsPerWindow(agg1.getEventsPerWindow() + agg2.getEventsPerWindow()); 221 | agg2.setSumPerWindow(agg1.getSumPerWindow() + agg2.getSumPerWindow()); 222 | return agg2; 223 | } 224 | } 225 | 226 | public static class MeasurementWindowProcessFunction 227 | extends ProcessWindowFunction { 228 | private static final long serialVersionUID = 1L; 229 | 230 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000; 231 | 232 | private transient DescriptiveStatisticsHistogram eventTimeLag; 233 | 234 | MeasurementWindowProcessFunction() { 235 | } 236 | 237 | @Override 238 | public void process( 239 | final String location, 240 | final Context context, 241 | final Iterable input, 242 | final Collector out) { 243 | 244 | // Windows with pre-aggregation only forward the final to the WindowFunction 245 | WindowedMeasurements aggregate = input.iterator().next(); 246 | 247 | final TimeWindow window = context.window(); 248 | aggregate.setWindowStart(window.getStart()); 249 | aggregate.setWindowEnd(window.getEnd()); 250 | aggregate.setLocation(location); 251 | 252 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd()); 253 | out.collect(aggregate); 254 | } 255 | 256 | @Override 257 | public void open(Configuration parameters) throws Exception { 258 | super.open(parameters); 259 | 260 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag", 261 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE)); 262 | } 263 | } 264 | 265 | private static ObjectMapper createObjectMapper() { 266 | ObjectMapper objectMapper = new ObjectMapper(); 267 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); 268 | return objectMapper; 269 | } 270 | } 271 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/TroubledStreamingJobSolution43.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot; 2 | 3 | import org.apache.flink.api.common.functions.AggregateFunction; 4 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 5 | import org.apache.flink.api.java.utils.ParameterTool; 6 | import org.apache.flink.configuration.Configuration; 7 | import org.apache.flink.metrics.Counter; 8 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram; 9 | import org.apache.flink.streaming.api.TimeCharacteristic; 10 | import org.apache.flink.streaming.api.datastream.DataStream; 11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 13 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; 14 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink; 15 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; 16 | import org.apache.flink.streaming.api.watermark.Watermark; 17 | import org.apache.flink.streaming.api.windowing.time.Time; 18 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 19 | import org.apache.flink.util.Collector; 20 | import org.apache.flink.util.OutputTag; 21 | 22 | import com.fasterxml.jackson.databind.DeserializationFeature; 23 | import com.fasterxml.jackson.databind.ObjectMapper; 24 | import com.ververica.flinktraining.provided.troubleshoot.FakeKafkaRecord; 25 | import com.ververica.flinktraining.provided.troubleshoot.WindowedMeasurements; 26 | import com.ververica.flinktraining.provided.troubleshoot.SourceUtils; 27 | 28 | import java.io.IOException; 29 | import java.util.concurrent.TimeUnit; 30 | 31 | import static com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJobUtils.createConfiguredEnvironment; 32 | 33 | public class TroubledStreamingJobSolution43 { 34 | 35 | public static void main(String[] args) throws Exception { 36 | ParameterTool parameters = ParameterTool.fromArgs(args); 37 | 38 | final boolean local = parameters.getBoolean("local", false); 39 | 40 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local); 41 | 42 | //Time Characteristics 43 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 44 | env.getConfig().setAutoWatermarkInterval(100); 45 | env.setBufferTimeout(10); 46 | 47 | //Checkpointing Configuration 48 | env.enableCheckpointing(5000); 49 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000); 50 | 51 | DataStream sourceStream = env 52 | .addSource(SourceUtils.createFakeKafkaSource()) 53 | .name("FakeKafkaSource") 54 | .uid("FakeKafkaSource") 55 | .assignTimestampsAndWatermarks( 56 | new MeasurementTSExtractor(Time.of(250, TimeUnit.MILLISECONDS), 57 | Time.of(1, TimeUnit.SECONDS))) 58 | .name("Watermarks") 59 | .uid("Watermarks") 60 | .flatMap(new MeasurementDeserializer()) 61 | .name("Deserialization") 62 | .uid("Deserialization"); 63 | 64 | OutputTag lateDataTag = new OutputTag("late-data") { 65 | private static final long serialVersionUID = 33513631677208956L; 66 | }; 67 | 68 | SingleOutputStreamOperator aggregatedPerLocation = sourceStream 69 | .keyBy(SimpleMeasurement::getLocation) 70 | .timeWindow(Time.of(1, TimeUnit.SECONDS)) 71 | .sideOutputLateData(lateDataTag) 72 | .aggregate(new MeasurementWindowAggregatingFunction(), 73 | new MeasurementWindowProcessFunction()) 74 | .name("WindowedAggregationPerLocation") 75 | .uid("WindowedAggregationPerLocation"); 76 | 77 | if (local) { 78 | aggregatedPerLocation.print() 79 | .name("NormalOutput") 80 | .uid("NormalOutput") 81 | .disableChaining(); 82 | aggregatedPerLocation.getSideOutput(lateDataTag).printToErr() 83 | .name("LateDataSink") 84 | .uid("LateDataSink") 85 | .disableChaining(); 86 | } else { 87 | aggregatedPerLocation.addSink(new DiscardingSink<>()) 88 | .name("NormalOutput") 89 | .uid("NormalOutput") 90 | .disableChaining(); 91 | aggregatedPerLocation.getSideOutput(lateDataTag).addSink(new DiscardingSink<>()) 92 | .name("LateDataSink") 93 | .uid("LateDataSink") 94 | .disableChaining(); 95 | } 96 | 97 | env.execute(TroubledStreamingJobSolution43.class.getSimpleName()); 98 | } 99 | 100 | /** 101 | * Deserializes the JSON Kafka message. 102 | */ 103 | public static class MeasurementDeserializer extends RichFlatMapFunction { 104 | private static final long serialVersionUID = 4L; 105 | 106 | private Counter numInvalidRecords; 107 | private transient ObjectMapper instance; 108 | 109 | @Override 110 | public void open(final Configuration parameters) throws Exception { 111 | super.open(parameters); 112 | numInvalidRecords = getRuntimeContext().getMetricGroup().counter("numInvalidRecords"); 113 | instance = createObjectMapper(); 114 | } 115 | 116 | @Override 117 | public void flatMap(final FakeKafkaRecord kafkaRecord, final Collector out) { 118 | final SimpleMeasurement node; 119 | try { 120 | node = deserialize(kafkaRecord.getValue()); 121 | } catch (IOException e) { 122 | numInvalidRecords.inc(); 123 | return; 124 | } 125 | out.collect(node); 126 | } 127 | 128 | private SimpleMeasurement deserialize(final byte[] bytes) throws IOException { 129 | return instance.readValue(bytes, SimpleMeasurement.class); 130 | } 131 | } 132 | 133 | public static class MeasurementTSExtractor implements AssignerWithPeriodicWatermarks { 134 | private static final long serialVersionUID = 2L; 135 | 136 | private long currentMaxTimestamp; 137 | private long lastEmittedWatermark = Long.MIN_VALUE; 138 | private long lastRecordProcessingTime; 139 | 140 | private final long maxOutOfOrderness; 141 | private final long idleTimeout; 142 | 143 | MeasurementTSExtractor(Time maxOutOfOrderness, Time idleTimeout) { 144 | if (maxOutOfOrderness.toMilliseconds() < 0) { 145 | throw new RuntimeException("Tried to set the maximum allowed " + 146 | "lateness to " + maxOutOfOrderness + 147 | ". This parameter cannot be negative."); 148 | } 149 | 150 | if (idleTimeout.toMilliseconds() < 0) { 151 | throw new RuntimeException("Tried to set the idle Timeout" + idleTimeout + 152 | ". This parameter cannot be negative."); 153 | } 154 | 155 | 156 | this.maxOutOfOrderness = maxOutOfOrderness.toMilliseconds(); 157 | this.idleTimeout = idleTimeout.toMilliseconds(); 158 | this.currentMaxTimestamp = Long.MIN_VALUE; 159 | } 160 | 161 | public long getMaxOutOfOrdernessInMillis() { 162 | return maxOutOfOrderness; 163 | } 164 | 165 | @Override 166 | public final Watermark getCurrentWatermark() { 167 | 168 | // if last record was processed more than the idleTimeout in the past, consider this 169 | // source idle and set timestamp to current processing time 170 | long currentProcessingTime = System.currentTimeMillis(); 171 | if (lastRecordProcessingTime < currentProcessingTime - idleTimeout) { 172 | this.currentMaxTimestamp = currentProcessingTime; 173 | } 174 | 175 | long potentialWM = this.currentMaxTimestamp - maxOutOfOrderness; 176 | if (potentialWM >= lastEmittedWatermark) { 177 | lastEmittedWatermark = potentialWM; 178 | } 179 | return new Watermark(lastEmittedWatermark); 180 | } 181 | 182 | @Override 183 | public final long extractTimestamp(FakeKafkaRecord element, long previousElementTimestamp) { 184 | lastRecordProcessingTime = System.currentTimeMillis(); 185 | long timestamp = element.getTimestamp(); 186 | if (timestamp > currentMaxTimestamp) { 187 | currentMaxTimestamp = timestamp; 188 | } 189 | return timestamp; 190 | } 191 | } 192 | 193 | public static class MeasurementWindowAggregatingFunction 194 | implements AggregateFunction { 195 | private static final long serialVersionUID = -1083906142198231377L; 196 | 197 | public MeasurementWindowAggregatingFunction() {} 198 | 199 | @Override 200 | public WindowedMeasurements createAccumulator() { 201 | return new WindowedMeasurements(); 202 | } 203 | 204 | @Override 205 | public WindowedMeasurements add(final SimpleMeasurement record, final WindowedMeasurements aggregate) { 206 | double result = record.getValue(); 207 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result); 208 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1); 209 | return aggregate; 210 | } 211 | 212 | @Override 213 | public WindowedMeasurements getResult(final WindowedMeasurements windowedMeasurements) { 214 | return windowedMeasurements; 215 | } 216 | 217 | @Override 218 | public WindowedMeasurements merge(final WindowedMeasurements agg1, final WindowedMeasurements agg2) { 219 | agg2.setEventsPerWindow(agg1.getEventsPerWindow() + agg2.getEventsPerWindow()); 220 | agg2.setSumPerWindow(agg1.getSumPerWindow() + agg2.getSumPerWindow()); 221 | return agg2; 222 | } 223 | } 224 | 225 | public static class MeasurementWindowProcessFunction 226 | extends ProcessWindowFunction { 227 | private static final long serialVersionUID = 1L; 228 | 229 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000; 230 | 231 | private transient DescriptiveStatisticsHistogram eventTimeLag; 232 | 233 | MeasurementWindowProcessFunction() { 234 | } 235 | 236 | @Override 237 | public void process( 238 | final String location, 239 | final Context context, 240 | final Iterable input, 241 | final Collector out) { 242 | 243 | // Windows with pre-aggregation only forward the final to the WindowFunction 244 | WindowedMeasurements aggregate = input.iterator().next(); 245 | 246 | final TimeWindow window = context.window(); 247 | aggregate.setWindowStart(window.getStart()); 248 | aggregate.setWindowEnd(window.getEnd()); 249 | aggregate.setLocation(location); 250 | 251 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd()); 252 | out.collect(aggregate); 253 | } 254 | 255 | @Override 256 | public void open(Configuration parameters) throws Exception { 257 | super.open(parameters); 258 | 259 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag", 260 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE)); 261 | } 262 | } 263 | 264 | private static ObjectMapper createObjectMapper() { 265 | ObjectMapper objectMapper = new ObjectMapper(); 266 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); 267 | return objectMapper; 268 | } 269 | } 270 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/ExtendedMeasurement.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable; 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeInfo; 4 | import org.apache.flink.api.common.typeinfo.TypeInfoFactory; 5 | import org.apache.flink.api.common.typeinfo.TypeInformation; 6 | 7 | import java.lang.reflect.Type; 8 | import java.util.Map; 9 | 10 | @TypeInfo(ExtendedMeasurement.ExtendedMeasurementTypeInfoFactory.class) 11 | public class ExtendedMeasurement { 12 | 13 | private Sensor sensor; 14 | private Location location; 15 | private MeasurementValue measurement; 16 | 17 | public ExtendedMeasurement( 18 | Sensor sensor, 19 | Location location, 20 | MeasurementValue measurement) { 21 | this.sensor = sensor; 22 | this.location = location; 23 | this.measurement = measurement; 24 | } 25 | 26 | public Sensor getSensor() { 27 | return sensor; 28 | } 29 | 30 | public Location getLocation() { 31 | return location; 32 | } 33 | 34 | public MeasurementValue getMeasurement() { 35 | return measurement; 36 | } 37 | 38 | public static class ExtendedMeasurementTypeInfoFactory extends TypeInfoFactory { 39 | @Override 40 | public TypeInformation createTypeInfo( 41 | Type t, 42 | Map> genericParameters) { 43 | return ExtendedMeasurementTypeInfo.INSTANCE; 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/ExtendedMeasurementSerializer.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable; 2 | 3 | import org.apache.flink.api.common.typeutils.SimpleTypeSerializerSnapshot; 4 | import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot; 5 | import org.apache.flink.api.common.typeutils.base.TypeSerializerSingleton; 6 | import org.apache.flink.core.memory.DataInputView; 7 | import org.apache.flink.core.memory.DataOutputView; 8 | 9 | import java.io.IOException; 10 | 11 | public class ExtendedMeasurementSerializer extends TypeSerializerSingleton { 12 | 13 | private ExtendedMeasurementSerializer() { 14 | } 15 | 16 | static final ExtendedMeasurementSerializer INSTANCE = new ExtendedMeasurementSerializer(); 17 | 18 | @Override 19 | public boolean isImmutableType() { 20 | return true; 21 | } 22 | 23 | @Override 24 | public ExtendedMeasurement createInstance() { 25 | return null; 26 | } 27 | 28 | @Override 29 | public ExtendedMeasurement copy(ExtendedMeasurement from) { 30 | return new ExtendedMeasurement( 31 | SensorSerializer.INSTANCE.copy(from.getSensor()), 32 | LocationSerializer.INSTANCE.copy(from.getLocation()), 33 | MeasurementValueSerializer.INSTANCE.copy(from.getMeasurement())); 34 | } 35 | 36 | @Override 37 | public ExtendedMeasurement copy(ExtendedMeasurement from, ExtendedMeasurement reuse) { 38 | return copy(from); 39 | } 40 | 41 | @Override 42 | public int getLength() { 43 | return SensorSerializer.INSTANCE.getLength() + 44 | LocationSerializer.INSTANCE.getLength() + 45 | MeasurementValueSerializer.INSTANCE.getLength(); 46 | } 47 | 48 | @Override 49 | public void serialize(ExtendedMeasurement record, DataOutputView target) throws IOException { 50 | SensorSerializer.INSTANCE.serialize(record.getSensor(), target); 51 | LocationSerializer.INSTANCE.serialize(record.getLocation(), target); 52 | MeasurementValueSerializer.INSTANCE.serialize(record.getMeasurement(), target); 53 | } 54 | 55 | @Override 56 | public ExtendedMeasurement deserialize(DataInputView source) throws IOException { 57 | Sensor sensor = SensorSerializer.INSTANCE.deserialize(source); 58 | Location location = LocationSerializer.INSTANCE.deserialize(source); 59 | MeasurementValue measurement = MeasurementValueSerializer.INSTANCE.deserialize(source); 60 | return new ExtendedMeasurement(sensor, location, measurement); 61 | } 62 | 63 | @Override 64 | public ExtendedMeasurement deserialize(ExtendedMeasurement reuse, DataInputView source) throws IOException { 65 | return deserialize(source); 66 | } 67 | 68 | @Override 69 | public void copy(DataInputView source, DataOutputView target) throws IOException { 70 | SensorSerializer.INSTANCE.copy(source, target); 71 | LocationSerializer.INSTANCE.copy(source, target); 72 | MeasurementValueSerializer.INSTANCE.copy(source, target); 73 | } 74 | 75 | // ----------------------------------------------------------------------------------- 76 | 77 | @Override 78 | public TypeSerializerSnapshot snapshotConfiguration() { 79 | return new ExtendedMeasurementSerializerSnapshot(); 80 | } 81 | 82 | @SuppressWarnings("WeakerAccess") 83 | public static final class ExtendedMeasurementSerializerSnapshot extends 84 | SimpleTypeSerializerSnapshot { 85 | 86 | public ExtendedMeasurementSerializerSnapshot() { 87 | super(() -> INSTANCE); 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/ExtendedMeasurementTypeInfo.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable; 2 | 3 | import org.apache.flink.api.common.ExecutionConfig; 4 | import org.apache.flink.api.common.typeinfo.TypeInformation; 5 | import org.apache.flink.api.common.typeutils.TypeSerializer; 6 | 7 | class ExtendedMeasurementTypeInfo extends TypeInformation { 8 | 9 | private ExtendedMeasurementTypeInfo() { 10 | } 11 | 12 | static final ExtendedMeasurementTypeInfo INSTANCE = new ExtendedMeasurementTypeInfo(); 13 | 14 | @Override 15 | public boolean isBasicType() { 16 | return false; 17 | } 18 | 19 | @Override 20 | public boolean isTupleType() { 21 | return false; 22 | } 23 | 24 | @Override 25 | public int getArity() { 26 | return 3; 27 | } 28 | 29 | @Override 30 | public int getTotalFields() { 31 | return SensorTypeInfo.INSTANCE.getArity() + 32 | LocationTypeInfo.INSTANCE.getArity() + 33 | MeasurementValueTypeInfo.INSTANCE.getArity(); 34 | } 35 | 36 | @Override 37 | public Class getTypeClass() { 38 | return ExtendedMeasurement.class; 39 | } 40 | 41 | @Override 42 | public boolean isKeyType() { 43 | return SensorTypeInfo.INSTANCE.isKeyType() && 44 | LocationTypeInfo.INSTANCE.isKeyType() && 45 | MeasurementValueTypeInfo.INSTANCE.isKeyType(); 46 | } 47 | 48 | @Override 49 | public TypeSerializer createSerializer(ExecutionConfig config) { 50 | return ExtendedMeasurementSerializer.INSTANCE; 51 | } 52 | 53 | @Override 54 | public String toString() { 55 | return getClass().getSimpleName(); 56 | } 57 | 58 | @SuppressWarnings("EqualsWhichDoesntCheckParameterClass") 59 | @Override 60 | public boolean equals(Object obj) { 61 | return this.canEqual(obj); 62 | } 63 | 64 | @Override 65 | public int hashCode() { 66 | return ExtendedMeasurement.class.hashCode(); 67 | } 68 | 69 | @Override 70 | public boolean canEqual(Object obj) { 71 | return obj instanceof ExtendedMeasurementTypeInfo; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/Location.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable; 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeInfo; 4 | import org.apache.flink.api.common.typeinfo.TypeInfoFactory; 5 | import org.apache.flink.api.common.typeinfo.TypeInformation; 6 | 7 | import java.lang.reflect.Type; 8 | import java.util.Map; 9 | import java.util.Objects; 10 | 11 | @SuppressWarnings("WeakerAccess") 12 | @TypeInfo(Location.LocationTypeInfoFactory.class) 13 | public class Location { 14 | private double longitude; 15 | private double latitude; 16 | private double height; 17 | 18 | public Location(double longitude, double latitude, double height) { 19 | this.longitude = longitude; 20 | this.latitude = latitude; 21 | this.height = height; 22 | } 23 | 24 | public double getLongitude() { 25 | return longitude; 26 | } 27 | 28 | public double getLatitude() { 29 | return latitude; 30 | } 31 | 32 | public double getHeight() { 33 | return height; 34 | } 35 | 36 | @Override 37 | public boolean equals(Object o) { 38 | if (this == o) { 39 | return true; 40 | } 41 | if (o == null || getClass() != o.getClass()) { 42 | return false; 43 | } 44 | Location location = (Location) o; 45 | return Double.compare(location.longitude, longitude) == 0 && 46 | Double.compare(location.latitude, latitude) == 0 && 47 | Double.compare(location.height, height) == 0; 48 | } 49 | 50 | @Override 51 | public int hashCode() { 52 | return Objects.hash(longitude, latitude, height); 53 | } 54 | 55 | public static class LocationTypeInfoFactory extends TypeInfoFactory { 56 | @Override 57 | public TypeInformation createTypeInfo( 58 | Type t, 59 | Map> genericParameters) { 60 | return LocationTypeInfo.INSTANCE; 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/LocationSerializer.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable; 2 | 3 | import org.apache.flink.api.common.typeutils.SimpleTypeSerializerSnapshot; 4 | import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot; 5 | import org.apache.flink.api.common.typeutils.base.TypeSerializerSingleton; 6 | import org.apache.flink.core.memory.DataInputView; 7 | import org.apache.flink.core.memory.DataOutputView; 8 | 9 | import java.io.IOException; 10 | 11 | public class LocationSerializer extends TypeSerializerSingleton { 12 | 13 | private LocationSerializer() { 14 | } 15 | 16 | static final LocationSerializer INSTANCE = new LocationSerializer(); 17 | 18 | @Override 19 | public boolean isImmutableType() { 20 | return true; 21 | } 22 | 23 | @Override 24 | public Location createInstance() { 25 | return null; 26 | } 27 | 28 | @Override 29 | public Location copy(Location from) { 30 | return new Location(from.getLongitude(), from.getLatitude(), from.getHeight()); 31 | } 32 | 33 | @Override 34 | public Location copy(Location from, Location reuse) { 35 | return copy(from); 36 | } 37 | 38 | @Override 39 | public int getLength() { 40 | return Double.BYTES + Double.BYTES + Double.BYTES; 41 | } 42 | 43 | @Override 44 | public void serialize(Location record, DataOutputView target) throws IOException { 45 | target.writeDouble(record.getLongitude()); 46 | target.writeDouble(record.getLatitude()); 47 | target.writeDouble(record.getHeight()); 48 | } 49 | 50 | @Override 51 | public Location deserialize(DataInputView source) throws IOException { 52 | double longitude = source.readDouble(); 53 | double latitude = source.readDouble(); 54 | double height = source.readDouble(); 55 | return new Location(longitude, latitude, height); 56 | } 57 | 58 | @Override 59 | public Location deserialize(Location reuse, DataInputView source) throws IOException { 60 | return deserialize(source); 61 | } 62 | 63 | @Override 64 | public void copy(DataInputView source, DataOutputView target) throws IOException { 65 | target.writeDouble(source.readDouble()); 66 | target.writeDouble(source.readDouble()); 67 | target.writeDouble(source.readDouble()); 68 | } 69 | 70 | // ----------------------------------------------------------------------------------- 71 | 72 | @Override 73 | public TypeSerializerSnapshot snapshotConfiguration() { 74 | return new LocationSerializerSnapshot(); 75 | } 76 | 77 | @SuppressWarnings("WeakerAccess") 78 | public static final class LocationSerializerSnapshot extends 79 | SimpleTypeSerializerSnapshot { 80 | 81 | public LocationSerializerSnapshot() { 82 | super(() -> INSTANCE); 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/LocationTypeInfo.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable; 2 | 3 | import org.apache.flink.api.common.ExecutionConfig; 4 | import org.apache.flink.api.common.typeinfo.TypeInformation; 5 | import org.apache.flink.api.common.typeutils.TypeSerializer; 6 | 7 | class LocationTypeInfo extends TypeInformation { 8 | 9 | private LocationTypeInfo() { 10 | } 11 | 12 | static final LocationTypeInfo INSTANCE = new LocationTypeInfo(); 13 | 14 | @Override 15 | public boolean isBasicType() { 16 | return false; 17 | } 18 | 19 | @Override 20 | public boolean isTupleType() { 21 | return false; 22 | } 23 | 24 | @Override 25 | public int getArity() { 26 | return 3; 27 | } 28 | 29 | @Override 30 | public int getTotalFields() { 31 | return 3; 32 | } 33 | 34 | @Override 35 | public Class getTypeClass() { 36 | return Location.class; 37 | } 38 | 39 | @Override 40 | public boolean isKeyType() { 41 | return true; 42 | } 43 | 44 | @Override 45 | public TypeSerializer createSerializer(ExecutionConfig config) { 46 | return LocationSerializer.INSTANCE; 47 | } 48 | 49 | @Override 50 | public String toString() { 51 | return getClass().getSimpleName(); 52 | } 53 | 54 | @SuppressWarnings("EqualsWhichDoesntCheckParameterClass") 55 | @Override 56 | public boolean equals(Object obj) { 57 | return this.canEqual(obj); 58 | } 59 | 60 | @Override 61 | public int hashCode() { 62 | return Location.class.hashCode(); 63 | } 64 | 65 | @Override 66 | public boolean canEqual(Object obj) { 67 | return obj instanceof LocationTypeInfo; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/MeasurementValue.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable; 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeInfo; 4 | import org.apache.flink.api.common.typeinfo.TypeInfoFactory; 5 | import org.apache.flink.api.common.typeinfo.TypeInformation; 6 | 7 | import java.lang.reflect.Type; 8 | import java.util.Map; 9 | 10 | @TypeInfo(MeasurementValue.MeasurementValueTypeInfoFactory.class) 11 | public class MeasurementValue { 12 | private double value; 13 | private float accuracy; 14 | private long timestamp; 15 | 16 | public MeasurementValue(double value, float accuracy, long timestamp) { 17 | this.value = value; 18 | this.accuracy = accuracy; 19 | this.timestamp = timestamp; 20 | } 21 | 22 | public double getValue() { 23 | return value; 24 | } 25 | 26 | public float getAccuracy() { 27 | return accuracy; 28 | } 29 | 30 | public long getTimestamp() { 31 | return timestamp; 32 | } 33 | 34 | public static class MeasurementValueTypeInfoFactory extends TypeInfoFactory { 35 | @Override 36 | public TypeInformation createTypeInfo( 37 | Type t, 38 | Map> genericParameters) { 39 | return MeasurementValueTypeInfo.INSTANCE; 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/MeasurementValueSerializer.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable; 2 | 3 | import org.apache.flink.api.common.typeutils.SimpleTypeSerializerSnapshot; 4 | import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot; 5 | import org.apache.flink.api.common.typeutils.base.TypeSerializerSingleton; 6 | import org.apache.flink.core.memory.DataInputView; 7 | import org.apache.flink.core.memory.DataOutputView; 8 | 9 | import java.io.IOException; 10 | 11 | public class MeasurementValueSerializer extends TypeSerializerSingleton { 12 | 13 | private MeasurementValueSerializer() { 14 | } 15 | 16 | static final MeasurementValueSerializer INSTANCE = new MeasurementValueSerializer(); 17 | 18 | @Override 19 | public boolean isImmutableType() { 20 | return true; 21 | } 22 | 23 | @Override 24 | public MeasurementValue createInstance() { 25 | return null; 26 | } 27 | 28 | @Override 29 | public MeasurementValue copy(MeasurementValue from) { 30 | return new MeasurementValue(from.getValue(), from.getAccuracy(), from.getTimestamp()); 31 | } 32 | 33 | @Override 34 | public MeasurementValue copy(MeasurementValue from, MeasurementValue reuse) { 35 | return copy(from); 36 | } 37 | 38 | @Override 39 | public int getLength() { 40 | return Double.BYTES + Float.BYTES + Long.BYTES; 41 | } 42 | 43 | @Override 44 | public void serialize(MeasurementValue record, DataOutputView target) throws IOException { 45 | target.writeDouble(record.getValue()); 46 | target.writeFloat(record.getAccuracy()); 47 | target.writeLong(record.getTimestamp()); 48 | } 49 | 50 | @Override 51 | public MeasurementValue deserialize(DataInputView source) throws IOException { 52 | double value = source.readDouble(); 53 | float accuracy = source.readFloat(); 54 | long timestamp = source.readLong(); 55 | return new MeasurementValue(value, accuracy, timestamp); 56 | } 57 | 58 | @Override 59 | public MeasurementValue deserialize(MeasurementValue reuse, DataInputView source) throws IOException { 60 | return deserialize(source); 61 | } 62 | 63 | @Override 64 | public void copy(DataInputView source, DataOutputView target) throws IOException { 65 | target.writeDouble(source.readDouble()); 66 | target.writeFloat(source.readFloat()); 67 | target.writeLong(source.readLong()); 68 | } 69 | 70 | // ----------------------------------------------------------------------------------- 71 | 72 | @Override 73 | public TypeSerializerSnapshot snapshotConfiguration() { 74 | return new MeasurementValueSerializerSnapshot(); 75 | } 76 | 77 | @SuppressWarnings("WeakerAccess") 78 | public static final class MeasurementValueSerializerSnapshot extends 79 | SimpleTypeSerializerSnapshot { 80 | 81 | public MeasurementValueSerializerSnapshot() { 82 | super(() -> INSTANCE); 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/MeasurementValueTypeInfo.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable; 2 | 3 | import org.apache.flink.api.common.ExecutionConfig; 4 | import org.apache.flink.api.common.typeinfo.TypeInformation; 5 | import org.apache.flink.api.common.typeutils.TypeSerializer; 6 | 7 | class MeasurementValueTypeInfo extends TypeInformation { 8 | 9 | private MeasurementValueTypeInfo() { 10 | } 11 | 12 | static final MeasurementValueTypeInfo INSTANCE = new MeasurementValueTypeInfo(); 13 | 14 | @Override 15 | public boolean isBasicType() { 16 | return false; 17 | } 18 | 19 | @Override 20 | public boolean isTupleType() { 21 | return false; 22 | } 23 | 24 | @Override 25 | public int getArity() { 26 | return 3; 27 | } 28 | 29 | @Override 30 | public int getTotalFields() { 31 | return 3; 32 | } 33 | 34 | @Override 35 | public Class getTypeClass() { 36 | return MeasurementValue.class; 37 | } 38 | 39 | @Override 40 | public boolean isKeyType() { 41 | return true; 42 | } 43 | 44 | @Override 45 | public TypeSerializer createSerializer(ExecutionConfig config) { 46 | return MeasurementValueSerializer.INSTANCE; 47 | } 48 | 49 | @Override 50 | public String toString() { 51 | return getClass().getSimpleName(); 52 | } 53 | 54 | @SuppressWarnings("EqualsWhichDoesntCheckParameterClass") 55 | @Override 56 | public boolean equals(Object obj) { 57 | return this.canEqual(obj); 58 | } 59 | 60 | @Override 61 | public int hashCode() { 62 | return MeasurementValue.class.hashCode(); 63 | } 64 | 65 | @Override 66 | public boolean canEqual(Object obj) { 67 | return obj instanceof MeasurementValueTypeInfo; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/ObjectReuseExtendedMeasurementSource.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable; 2 | 3 | import org.apache.flink.configuration.Configuration; 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; 5 | 6 | import com.ververica.flinktraining.provided.troubleshoot.GeoUtils; 7 | import com.ververica.flinktraining.provided.troubleshoot.MeanGauge; 8 | 9 | import java.util.SplittableRandom; 10 | 11 | /** 12 | * Artificial source for sensor measurements (temperature and wind speed) of a pre-defined set of 13 | * sensors (per parallel instance) creating measurements for two locations (inside the bounding 14 | * boxes of Germany (DE) and the USA (US)) in SI units (°C and km/h). 15 | */ 16 | @SuppressWarnings("WeakerAccess") 17 | public class ObjectReuseExtendedMeasurementSource extends RichParallelSourceFunction { 18 | 19 | private static final long serialVersionUID = 1L; 20 | 21 | private static final int NUM_SENSORS = 10_000; 22 | 23 | public static final int LOWER_TEMPERATURE_CELCIUS = -10; 24 | public static final int UPPER_TEMPERATURE_CELCIUS = 35; 25 | public static final int LOWER_WIND_SPEED_KMH = 0; 26 | public static final int UPPER_WIND_SPEED_KMH = 335; 27 | 28 | private volatile boolean running = true; 29 | 30 | private transient Sensor[] sensors; 31 | private transient Location[] locations; 32 | private transient double[] lastValue; 33 | private transient MeanGauge sourceTemperatureUS; 34 | 35 | @Override 36 | public void open(final Configuration parameters) { 37 | initSensors(); 38 | 39 | sourceTemperatureUS = getRuntimeContext().getMetricGroup() 40 | .gauge("sourceTemperatureUSmean", new MeanGauge()); 41 | getRuntimeContext().getMetricGroup().gauge( 42 | "sourceTemperatureUSmin", new MeanGauge.MinGauge(sourceTemperatureUS)); 43 | getRuntimeContext().getMetricGroup().gauge( 44 | "sourceTemperatureUSmax", new MeanGauge.MaxGauge(sourceTemperatureUS)); 45 | } 46 | 47 | @Override 48 | public void run(SourceContext ctx) { 49 | final SplittableRandom rnd = new SplittableRandom(); 50 | final Object lock = ctx.getCheckpointLock(); 51 | 52 | while (running) { 53 | ExtendedMeasurement event = randomEvent(rnd); 54 | 55 | //noinspection SynchronizationOnLocalVariableOrMethodParameter 56 | synchronized (lock) { 57 | ctx.collect(event); 58 | } 59 | } 60 | } 61 | 62 | @Override 63 | public void cancel() { 64 | running = false; 65 | } 66 | 67 | /** 68 | * Creates sensor metadata that this source instance will work with. 69 | */ 70 | private void initSensors() { 71 | final SplittableRandom rnd = new SplittableRandom(); 72 | final Sensor.SensorType[] sensorTypes = 73 | Sensor.SensorType.values(); 74 | 75 | final int start = getRuntimeContext().getIndexOfThisSubtask() * NUM_SENSORS; 76 | this.sensors = new Sensor[NUM_SENSORS]; 77 | this.lastValue = new double[NUM_SENSORS]; 78 | this.locations = new Location[NUM_SENSORS]; 79 | for (int i = 0; i < NUM_SENSORS; ++i) { 80 | long sensorId = start + i; 81 | long vendorId = sensorId % 100; 82 | final Sensor.SensorType sensorType = 83 | sensorTypes[(i / 2) % sensorTypes.length]; 84 | sensors[i] = new Sensor(sensorId, vendorId, sensorType); 85 | 86 | lastValue[i] = randomInitialMeasurementValue(rnd, sensorType); 87 | 88 | // assume that a sensor has a fixed position 89 | locations[i] = randomInitialLocation(rnd, i); 90 | } 91 | } 92 | 93 | /** 94 | * Creates a random measurement value that a sensor will start with. 95 | */ 96 | private double randomInitialMeasurementValue( 97 | SplittableRandom rnd, 98 | Sensor.SensorType sensorType) { 99 | switch (sensorType) { 100 | case Temperature: 101 | // -10°C - 35°C 102 | return rnd.nextInt( 103 | (UPPER_TEMPERATURE_CELCIUS - LOWER_TEMPERATURE_CELCIUS) * 10) / 10.0 + 104 | LOWER_TEMPERATURE_CELCIUS; 105 | case Wind: 106 | // 0km/h - 335km/h 107 | return rnd.nextInt((UPPER_WIND_SPEED_KMH - LOWER_WIND_SPEED_KMH) * 10) / 10.0 + 108 | LOWER_WIND_SPEED_KMH; 109 | default: 110 | throw new IllegalStateException("Unknown sensor type: " + sensorType); 111 | } 112 | } 113 | 114 | /** 115 | * Creates a random location for a sensor, distinguishing two bounding boxes: US and DE. 116 | */ 117 | private static Location randomInitialLocation(SplittableRandom rnd, int i) { 118 | final double longitude; 119 | final double latitude; 120 | // let's assume that no selected region wraps around LON -180/+180 121 | if (i < NUM_SENSORS / 2) { 122 | // in US 123 | longitude = rnd.nextDouble() * (GeoUtils.US_LON_EAST - GeoUtils.US_LON_WEST) + GeoUtils.US_LON_WEST; 124 | latitude = rnd.nextDouble() * (GeoUtils.US_LAT_NORTH - GeoUtils.US_LAT_SOUTH) + GeoUtils.US_LAT_SOUTH; 125 | } else { 126 | // in DE 127 | longitude = rnd.nextDouble() * (GeoUtils.DE_LON_EAST - GeoUtils.DE_LON_WEST) + GeoUtils.DE_LON_WEST; 128 | latitude = rnd.nextDouble() * (GeoUtils.DE_LAT_NORTH - GeoUtils.DE_LAT_SOUTH) + GeoUtils.DE_LAT_SOUTH; 129 | } 130 | double height = rnd.nextDouble() * 3000; 131 | return new Location(longitude, latitude, height); 132 | } 133 | 134 | /** 135 | * Creates a randomized sensor value during runtime of the source. Each new value differs 136 | * slightly from the previous value that this sensor had. 137 | */ 138 | private ExtendedMeasurement randomEvent(SplittableRandom rnd) { 139 | int randomIdx = rnd.nextInt(sensors.length); 140 | Sensor sensor = sensors[randomIdx]; 141 | Location location = locations[randomIdx]; 142 | 143 | long timestamp = System.currentTimeMillis(); 144 | 145 | final double value = randomChangeMeasurementValue( 146 | rnd, 147 | sensor.getSensorType(), 148 | location, 149 | lastValue[randomIdx]); 150 | 151 | lastValue[randomIdx] = value; 152 | 153 | final MeasurementValue measurement = 154 | new MeasurementValue( 155 | value, 156 | (float) (rnd.nextInt(100) - 50) / 10.0f, // +- 5 157 | timestamp); 158 | 159 | return new ExtendedMeasurement( 160 | new Sensor( 161 | sensor.getSensorId(), sensor.getVendorId(), sensor.getSensorType()), 162 | new Location( 163 | location.getLongitude(), location.getLatitude(), location.getHeight()), 164 | measurement); 165 | } 166 | 167 | /** 168 | * Generates a new sensor value that is +-3 of the old value and reports a custom metric for 169 | * sensor values in the US. 170 | */ 171 | private double randomChangeMeasurementValue( 172 | SplittableRandom rnd, 173 | Sensor.SensorType sensorType, 174 | Location location, 175 | double lastValue) { 176 | double change = rnd.nextDouble(6) - 3.0; // +- 3 177 | final double value; 178 | switch (sensorType) { 179 | case Temperature: 180 | value = newValueWithinBounds( 181 | lastValue, change, LOWER_TEMPERATURE_CELCIUS, UPPER_TEMPERATURE_CELCIUS); 182 | if (GeoUtils.isInUS(location.getLongitude(), location.getLatitude())) { 183 | sourceTemperatureUS.addValue(value); 184 | } 185 | break; 186 | case Wind: 187 | value = newValueWithinBounds( 188 | lastValue, change, LOWER_WIND_SPEED_KMH, UPPER_WIND_SPEED_KMH); 189 | break; 190 | default: 191 | throw new InternalError("Unknown sensor type: " + sensorType); 192 | } 193 | return value; 194 | } 195 | 196 | /** 197 | * Returns either lastValue + change (if within the given bounds) or 198 | * lastValue - change (otherwise). 199 | */ 200 | private static double newValueWithinBounds( 201 | double lastValue, 202 | double change, 203 | double lowerLimit, 204 | double upperLimit) { 205 | double value; 206 | if (lastValue + change >= lowerLimit && lastValue + change <= upperLimit) { 207 | value = lastValue + change; 208 | } else { 209 | value = lastValue - change; 210 | } 211 | return value; 212 | } 213 | } 214 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/Sensor.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable; 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeInfo; 4 | import org.apache.flink.api.common.typeinfo.TypeInfoFactory; 5 | import org.apache.flink.api.common.typeinfo.TypeInformation; 6 | 7 | import java.lang.reflect.Type; 8 | import java.util.Map; 9 | import java.util.Objects; 10 | 11 | @SuppressWarnings("WeakerAccess") 12 | @TypeInfo(Sensor.SensorTypeInfoFactory.class) 13 | public class Sensor { 14 | public enum SensorType { 15 | Temperature, 16 | Wind 17 | } 18 | 19 | private long sensorId; 20 | private long vendorId; 21 | private SensorType sensorType; 22 | 23 | public Sensor( 24 | long sensorId, 25 | long vendorId, 26 | SensorType sensorType) { 27 | this.sensorId = sensorId; 28 | this.vendorId = vendorId; 29 | this.sensorType = sensorType; 30 | } 31 | 32 | public long getSensorId() { 33 | return sensorId; 34 | } 35 | 36 | public long getVendorId() { 37 | return vendorId; 38 | } 39 | 40 | public SensorType getSensorType() { 41 | return sensorType; 42 | } 43 | 44 | @Override 45 | public boolean equals(Object o) { 46 | if (this == o) { 47 | return true; 48 | } 49 | if (o == null || getClass() != o.getClass()) { 50 | return false; 51 | } 52 | Sensor sensor = (Sensor) o; 53 | return sensorId == sensor.sensorId && 54 | vendorId == sensor.vendorId && 55 | sensorType == sensor.sensorType; 56 | } 57 | 58 | @Override 59 | public int hashCode() { 60 | // NOTE: do not use the enum directly here. Why? 61 | // -> try with Sensor as a key in a distributed setting and see for yourself! 62 | return Objects.hash(sensorId, vendorId, sensorType.ordinal()); 63 | } 64 | 65 | public static class SensorTypeInfoFactory extends TypeInfoFactory { 66 | @Override 67 | public TypeInformation createTypeInfo( 68 | Type t, 69 | Map> genericParameters) { 70 | return SensorTypeInfo.INSTANCE; 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/SensorSerializer.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable; 2 | 3 | import org.apache.flink.api.common.typeutils.SimpleTypeSerializerSnapshot; 4 | import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot; 5 | import org.apache.flink.api.common.typeutils.base.TypeSerializerSingleton; 6 | import org.apache.flink.core.memory.DataInputView; 7 | import org.apache.flink.core.memory.DataOutputView; 8 | 9 | import java.io.IOException; 10 | 11 | public class SensorSerializer extends TypeSerializerSingleton { 12 | private static final Sensor.SensorType[] SENSOR_TYPES = 13 | Sensor.SensorType.values(); 14 | 15 | private SensorSerializer() { 16 | } 17 | 18 | static final SensorSerializer INSTANCE = new SensorSerializer(); 19 | 20 | @Override 21 | public boolean isImmutableType() { 22 | return true; 23 | } 24 | 25 | @Override 26 | public Sensor createInstance() { 27 | return null; 28 | } 29 | 30 | @Override 31 | public Sensor copy(Sensor from) { 32 | return new Sensor(from.getSensorId(), from.getVendorId(), from.getSensorType()); 33 | } 34 | 35 | @Override 36 | public Sensor copy(Sensor from, Sensor reuse) { 37 | return copy(from); 38 | } 39 | 40 | @Override 41 | public int getLength() { 42 | return Long.BYTES + Long.BYTES + Integer.BYTES; 43 | } 44 | 45 | @Override 46 | public void serialize(Sensor record, DataOutputView target) throws IOException { 47 | target.writeLong(record.getSensorId()); 48 | target.writeLong(record.getVendorId()); 49 | target.writeInt(record.getSensorType().ordinal()); 50 | } 51 | 52 | @Override 53 | public Sensor deserialize(DataInputView source) throws IOException { 54 | long sensorId = source.readLong(); 55 | long vendorId = source.readLong(); 56 | Sensor.SensorType sensorType = SENSOR_TYPES[source.readInt()]; 57 | return new Sensor(sensorId, vendorId, sensorType); 58 | } 59 | 60 | @Override 61 | public Sensor deserialize(Sensor reuse, DataInputView source) throws IOException { 62 | return deserialize(source); 63 | } 64 | 65 | @Override 66 | public void copy(DataInputView source, DataOutputView target) throws IOException { 67 | target.writeLong(source.readLong()); 68 | target.writeLong(source.readLong()); 69 | target.writeInt(source.readInt()); 70 | } 71 | 72 | // ----------------------------------------------------------------------------------- 73 | 74 | @Override 75 | public TypeSerializerSnapshot snapshotConfiguration() { 76 | return new SensorSerializerSnapshot(); 77 | } 78 | 79 | @SuppressWarnings("WeakerAccess") 80 | public static final class SensorSerializerSnapshot extends 81 | SimpleTypeSerializerSnapshot { 82 | 83 | public SensorSerializerSnapshot() { 84 | super(() -> INSTANCE); 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/SensorTypeInfo.java: -------------------------------------------------------------------------------- 1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable; 2 | 3 | import org.apache.flink.api.common.ExecutionConfig; 4 | import org.apache.flink.api.common.typeinfo.TypeInformation; 5 | import org.apache.flink.api.common.typeutils.TypeSerializer; 6 | 7 | class SensorTypeInfo extends TypeInformation { 8 | 9 | private SensorTypeInfo() { 10 | } 11 | 12 | static final SensorTypeInfo INSTANCE = new SensorTypeInfo(); 13 | 14 | @Override 15 | public boolean isBasicType() { 16 | return false; 17 | } 18 | 19 | @Override 20 | public boolean isTupleType() { 21 | return false; 22 | } 23 | 24 | @Override 25 | public int getArity() { 26 | return 3; 27 | } 28 | 29 | @Override 30 | public int getTotalFields() { 31 | return 3; 32 | } 33 | 34 | @Override 35 | public Class getTypeClass() { 36 | return Sensor.class; 37 | } 38 | 39 | @Override 40 | public boolean isKeyType() { 41 | return true; 42 | } 43 | 44 | @Override 45 | public TypeSerializer createSerializer(ExecutionConfig config) { 46 | return SensorSerializer.INSTANCE; 47 | } 48 | 49 | @Override 50 | public String toString() { 51 | return getClass().getSimpleName(); 52 | } 53 | 54 | @SuppressWarnings("EqualsWhichDoesntCheckParameterClass") 55 | @Override 56 | public boolean equals(Object obj) { 57 | return this.canEqual(obj); 58 | } 59 | 60 | @Override 61 | public int hashCode() { 62 | return Sensor.class.hashCode(); 63 | } 64 | 65 | @Override 66 | public boolean canEqual(Object obj) { 67 | return obj instanceof SensorTypeInfo; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- 1 | loogers=rootLooger 2 | appender.console.type=Console 3 | appender.console.name=STDOUT 4 | appender.console.layout.type=PatternLayout 5 | appender.console.layout.pattern=%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n 6 | rootLogger.level=INFO 7 | rootLogger.appenderRef.console.ref=STDOUT -------------------------------------------------------------------------------- /src/test/java/com/ververica/training/ObjectReuseJobRunner.java: -------------------------------------------------------------------------------- 1 | package com.ververica.training; 2 | 3 | import com.ververica.flinktraining.exercises.troubleshoot.ObjectReuseJob; 4 | import org.junit.Test; 5 | 6 | public class ObjectReuseJobRunner { 7 | 8 | @Test 9 | public void run() throws Exception { 10 | String[] args = {"--local", "true", "--objectReuse", "true"}; 11 | // String[] args = {"--local", "true"}; 12 | ObjectReuseJob.main(args); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/test/java/com/ververica/training/TroubledStreamingJobRunner.java: -------------------------------------------------------------------------------- 1 | package com.ververica.training; 2 | 3 | import com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJob; 4 | import org.junit.Test; 5 | 6 | public class TroubledStreamingJobRunner { 7 | 8 | @Test 9 | public void run() throws Exception { 10 | String[] args = {"--local", "true"}; 11 | TroubledStreamingJob.main(args); 12 | } 13 | } 14 | --------------------------------------------------------------------------------