out) {
141 |
142 | WindowedMeasurements aggregate = new WindowedMeasurements();
143 | for (JsonNode record : input) {
144 | double result = Double.parseDouble(record.get("value").asText());
145 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result);
146 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1);
147 | }
148 |
149 | final TimeWindow window = context.window();
150 | aggregate.setWindowStart(window.getStart());
151 | aggregate.setWindowEnd(window.getEnd());
152 | aggregate.setLocation(location);
153 |
154 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd());
155 | out.collect(aggregate);
156 | }
157 |
158 | @Override
159 | public void open(Configuration parameters) throws Exception {
160 | super.open(parameters);
161 |
162 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag",
163 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE));
164 | }
165 | }
166 |
167 | private static class ObjectMapperSingleton {
168 | static ObjectMapper getInstance() {
169 | ObjectMapper objectMapper = new ObjectMapper();
170 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
171 | return objectMapper;
172 | }
173 | }
174 | }
175 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/exercises/troubleshoot/TroubledStreamingJobUtils.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.exercises.troubleshoot;
2 |
3 | import org.apache.flink.api.common.restartstrategy.RestartStrategies;
4 | import org.apache.flink.api.common.time.Time;
5 | import org.apache.flink.api.java.utils.ParameterTool;
6 | import org.apache.flink.configuration.Configuration;
7 | import org.apache.flink.core.fs.Path;
8 | import org.apache.flink.runtime.state.StateBackend;
9 | import org.apache.flink.runtime.state.filesystem.FsStateBackend;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.util.FileUtils;
12 |
13 | import java.io.File;
14 | import java.io.IOException;
15 | import java.net.URI;
16 | import java.net.URISyntaxException;
17 | import java.nio.file.Files;
18 | import java.util.concurrent.TimeUnit;
19 |
20 | public class TroubledStreamingJobUtils {
21 | public static StreamExecutionEnvironment createConfiguredEnvironment(
22 | final ParameterTool parameters, final boolean local) throws
23 | IOException, URISyntaxException {
24 | StreamExecutionEnvironment env;
25 | if (local) {
26 | env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
27 |
28 | String statePath = parameters.get("fsStatePath");
29 | Path checkpointPath;
30 | if (statePath != null) {
31 | FileUtils.deleteDirectory(new File(new URI(statePath)));
32 | checkpointPath = Path.fromLocalFile(new File(new URI(statePath)));
33 | } else {
34 | checkpointPath = Path.fromLocalFile(Files.createTempDirectory("checkpoints").toFile());
35 | }
36 |
37 | StateBackend stateBackend = new FsStateBackend(checkpointPath);
38 | env.setStateBackend(stateBackend);
39 | } else {
40 | env = StreamExecutionEnvironment.getExecutionEnvironment();
41 | }
42 |
43 | env.getConfig().setGlobalJobParameters(parameters);
44 | env.setRestartStrategy(RestartStrategies.fixedDelayRestart(
45 | Integer.MAX_VALUE,
46 | Time.of(15, TimeUnit.SECONDS) // delay
47 | ));
48 | return env;
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/provided/DoNotChangeThis.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.provided;
2 |
3 | import java.lang.annotation.ElementType;
4 | import java.lang.annotation.Retention;
5 | import java.lang.annotation.RetentionPolicy;
6 | import java.lang.annotation.Target;
7 |
8 | /**
9 | * Classes, methods or fields marked annotated with {@link DoNotChangeThis} should not be changed by training participants. They are either part of the required business logic, or usually outside of the scope of the Flink in a real-life scenario.
10 | */
11 | @Retention(RetentionPolicy.SOURCE)
12 | @Target({ElementType.CONSTRUCTOR, ElementType.METHOD, ElementType.TYPE})
13 | public @interface DoNotChangeThis {
14 | }
15 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/provided/troubleshoot/ExtendedMeasurement.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.provided.troubleshoot;
2 |
3 | import com.ververica.flinktraining.provided.DoNotChangeThis;
4 |
5 | import java.util.Objects;
6 |
7 | @SuppressWarnings({"WeakerAccess", "unused"})
8 | @DoNotChangeThis
9 | public class ExtendedMeasurement {
10 |
11 | private Sensor sensor;
12 | private Location location;
13 | private MeasurementValue measurement;
14 |
15 | public ExtendedMeasurement() {
16 | }
17 |
18 | public ExtendedMeasurement(
19 | Sensor sensor,
20 | Location location,
21 | MeasurementValue measurement) {
22 | this.sensor = sensor;
23 | this.location = location;
24 | this.measurement = measurement;
25 | }
26 |
27 | public Sensor getSensor() {
28 | return sensor;
29 | }
30 |
31 | public void setSensor(Sensor sensor) {
32 | this.sensor = sensor;
33 | }
34 |
35 | public Location getLocation() {
36 | return location;
37 | }
38 |
39 | public void setLocation(Location location) {
40 | this.location = location;
41 | }
42 |
43 | public MeasurementValue getMeasurement() {
44 | return measurement;
45 | }
46 |
47 | public void setMeasurement(MeasurementValue measurement) {
48 | this.measurement = measurement;
49 | }
50 |
51 | public enum SensorType {
52 | Temperature,
53 | Wind
54 | }
55 |
56 | public static class Sensor {
57 | private long sensorId;
58 | private long vendorId;
59 | private SensorType sensorType;
60 |
61 | public Sensor() {
62 | }
63 |
64 | public Sensor(
65 | long sensorId,
66 | long vendorId,
67 | SensorType sensorType) {
68 | this.sensorId = sensorId;
69 | this.vendorId = vendorId;
70 | this.sensorType = sensorType;
71 | }
72 |
73 | public long getSensorId() {
74 | return sensorId;
75 | }
76 |
77 | public void setSensorId(long sensorId) {
78 | this.sensorId = sensorId;
79 | }
80 |
81 | public long getVendorId() {
82 | return vendorId;
83 | }
84 |
85 | public void setVendorId(long vendorId) {
86 | this.vendorId = vendorId;
87 | }
88 |
89 | public SensorType getSensorType() {
90 | return sensorType;
91 | }
92 |
93 | public void setSensorType(SensorType sensorType) {
94 | this.sensorType = sensorType;
95 | }
96 |
97 | @Override
98 | public boolean equals(Object o) {
99 | if (this == o) {
100 | return true;
101 | }
102 | if (o == null || getClass() != o.getClass()) {
103 | return false;
104 | }
105 | Sensor sensor = (Sensor) o;
106 | return sensorId == sensor.sensorId &&
107 | vendorId == sensor.vendorId &&
108 | sensorType == sensor.sensorType;
109 | }
110 |
111 | @Override
112 | public int hashCode() {
113 | // NOTE: do not use the enum directly here. Why?
114 | // -> try with Sensor as a key in a distributed setting and see for yourself!
115 | return Objects.hash(sensorId, vendorId, sensorType.ordinal());
116 | }
117 | }
118 |
119 | public static class Location {
120 | private double longitude;
121 | private double latitude;
122 | private double height;
123 |
124 | public Location() {
125 | }
126 |
127 | public Location(double longitude, double latitude, double height) {
128 | this.longitude = longitude;
129 | this.latitude = latitude;
130 | this.height = height;
131 | }
132 |
133 | public double getLongitude() {
134 | return longitude;
135 | }
136 |
137 | public void setLongitude(double longitude) {
138 | this.longitude = longitude;
139 | }
140 |
141 | public double getLatitude() {
142 | return latitude;
143 | }
144 |
145 | public void setLatitude(double latitude) {
146 | this.latitude = latitude;
147 | }
148 |
149 | public double getHeight() {
150 | return height;
151 | }
152 |
153 | public void setHeight(double height) {
154 | this.height = height;
155 | }
156 |
157 | @Override
158 | public boolean equals(Object o) {
159 | if (this == o) {
160 | return true;
161 | }
162 | if (o == null || getClass() != o.getClass()) {
163 | return false;
164 | }
165 | Location location = (Location) o;
166 | return Double.compare(location.longitude, longitude) == 0 &&
167 | Double.compare(location.latitude, latitude) == 0 &&
168 | Double.compare(location.height, height) == 0;
169 | }
170 |
171 | @Override
172 | public int hashCode() {
173 | return Objects.hash(longitude, latitude, height);
174 | }
175 | }
176 |
177 | public static class MeasurementValue {
178 | private double value;
179 | private float accuracy;
180 | private long timestamp;
181 |
182 | public MeasurementValue() {
183 | }
184 |
185 | public MeasurementValue(double value, float accuracy, long timestamp) {
186 | this.value = value;
187 | this.accuracy = accuracy;
188 | this.timestamp = timestamp;
189 | }
190 |
191 | public double getValue() {
192 | return value;
193 | }
194 |
195 | public void setValue(double value) {
196 | this.value = value;
197 | }
198 |
199 | public float getAccuracy() {
200 | return accuracy;
201 | }
202 |
203 | public void setAccuracy(float accuracy) {
204 | this.accuracy = accuracy;
205 | }
206 |
207 | public long getTimestamp() {
208 | return timestamp;
209 | }
210 |
211 | public void setTimestamp(long timestamp) {
212 | this.timestamp = timestamp;
213 | }
214 | }
215 | }
216 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/provided/troubleshoot/FakeKafkaRecord.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.provided.troubleshoot;
2 |
3 | import com.ververica.flinktraining.provided.DoNotChangeThis;
4 |
5 | import java.util.Arrays;
6 | import java.util.Objects;
7 |
8 | @DoNotChangeThis
9 | public class FakeKafkaRecord {
10 |
11 | private long timestamp;
12 | private byte[] key;
13 | private byte[] value;
14 | private int partition;
15 |
16 | public FakeKafkaRecord() {
17 | }
18 |
19 | public FakeKafkaRecord(final long timestamp, final byte[] key, final byte[] value, final int partition) {
20 | this.timestamp = timestamp;
21 | this.key = key;
22 | this.value = value;
23 | this.partition = partition;
24 | }
25 |
26 | public long getTimestamp() {
27 | return timestamp;
28 | }
29 |
30 | public void setTimestamp(final long timestamp) {
31 | this.timestamp = timestamp;
32 | }
33 |
34 | public byte[] getKey() {
35 | return key;
36 | }
37 |
38 | public void setKey(final byte[] key) {
39 | this.key = key;
40 | }
41 |
42 | public byte[] getValue() {
43 | return value;
44 | }
45 |
46 | public void setValue(final byte[] value) {
47 | this.value = value;
48 | }
49 |
50 | public int getPartition() {
51 | return partition;
52 | }
53 |
54 | public void setPartition(final int partition) {
55 | this.partition = partition;
56 | }
57 |
58 | @Override
59 | public boolean equals(final Object o) {
60 | if (this == o) {
61 | return true;
62 | }
63 | if (o == null || getClass() != o.getClass()) {
64 | return false;
65 | }
66 | final FakeKafkaRecord that = (FakeKafkaRecord) o;
67 | return timestamp == that.timestamp &&
68 | partition == that.partition &&
69 | Arrays.equals(key, that.key) &&
70 | Arrays.equals(value, that.value);
71 | }
72 |
73 | @Override
74 | public int hashCode() {
75 | int result = Objects.hash(timestamp, partition);
76 | result = 31 * result + Arrays.hashCode(key);
77 | result = 31 * result + Arrays.hashCode(value);
78 | return result;
79 | }
80 |
81 | @Override
82 | public String toString() {
83 | final StringBuilder sb = new StringBuilder("FakeKafkaRecord{");
84 | sb.append("timestamp=").append(timestamp);
85 | sb.append(", key=").append(Arrays.toString(key));
86 | sb.append(", value=").append(Arrays.toString(value));
87 | sb.append(", partition=").append(partition);
88 | sb.append('}');
89 | return sb.toString();
90 | }
91 | }
92 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/provided/troubleshoot/FakeKafkaSource.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.provided.troubleshoot;
2 |
3 | import org.apache.flink.configuration.Configuration;
4 | import org.apache.flink.runtime.state.FunctionInitializationContext;
5 | import org.apache.flink.runtime.state.FunctionSnapshotContext;
6 | import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
7 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
8 |
9 | import com.ververica.flinktraining.provided.DoNotChangeThis;
10 | import org.slf4j.Logger;
11 | import org.slf4j.LoggerFactory;
12 |
13 | import java.util.Arrays;
14 | import java.util.BitSet;
15 | import java.util.List;
16 | import java.util.Random;
17 | import java.util.stream.Collectors;
18 | import java.util.stream.IntStream;
19 |
20 | /**
21 | * The {@link FakeKafkaSource} reads from {@code NO_OF_PARTIONS} Kafka partitions.
22 | *
23 | * The timestamps roughly start at the epoch and are ascending per partition. The partitions themselves can be out of sync.
24 | * *
25 | */
26 | @DoNotChangeThis
27 | public class FakeKafkaSource extends RichParallelSourceFunction implements CheckpointedFunction {
28 | private static final long serialVersionUID = 4658785571367840693L;
29 |
30 | private static final int NO_OF_PARTIONS = 8;
31 | public static final Logger log = LoggerFactory.getLogger(FakeKafkaSource.class);
32 |
33 | private final Random rand;
34 |
35 | private transient volatile boolean cancelled;
36 | private transient int indexOfThisSubtask;
37 | private transient int numberOfParallelSubtasks;
38 | private transient List assignedPartitions;
39 |
40 | private final List serializedMeasurements;
41 | private final double poisonPillRate;
42 | private final BitSet idlePartitions;
43 |
44 | FakeKafkaSource(final int seed, final float poisonPillRate, List idlePartitions, List serializedMeasurements) {
45 | this.poisonPillRate = poisonPillRate;
46 | this.idlePartitions = new BitSet(NO_OF_PARTIONS);
47 | for (int i : idlePartitions) {
48 | this.idlePartitions.set(i);
49 | }
50 | this.serializedMeasurements = serializedMeasurements;
51 |
52 | this.rand = new Random(seed);
53 | }
54 |
55 | @Override
56 | public void open(final Configuration parameters) {
57 | indexOfThisSubtask = getRuntimeContext().getIndexOfThisSubtask();
58 | numberOfParallelSubtasks = getRuntimeContext().getNumberOfParallelSubtasks();
59 |
60 | assignedPartitions = IntStream.range(0, NO_OF_PARTIONS)
61 | .filter(i -> i % numberOfParallelSubtasks == indexOfThisSubtask)
62 | .boxed()
63 | .collect(Collectors.toList());
64 |
65 | log.info("Now reading from partitions: {}", assignedPartitions);
66 | }
67 |
68 |
69 | @Override
70 | public void run(final SourceContext sourceContext) throws Exception {
71 |
72 | int numberOfPartitions = assignedPartitions.size();
73 |
74 | if (!assignedPartitions.isEmpty()) {
75 | while (!cancelled) {
76 | int nextPartition = assignedPartitions.get(rand.nextInt(numberOfPartitions));
77 |
78 | if (idlePartitions.get(nextPartition)) {
79 | Thread.sleep(1000); // avoid spinning wait
80 | continue;
81 | }
82 |
83 | long nextTimestamp = getTimestampForPartition(nextPartition);
84 |
85 | byte[] serializedMeasurement =
86 | serializedMeasurements.get(rand.nextInt(serializedMeasurements.size()));
87 |
88 | if (rand.nextFloat() > 1 - poisonPillRate) {
89 | serializedMeasurement = Arrays.copyOf(serializedMeasurement, 10);
90 | }
91 |
92 | synchronized (sourceContext.getCheckpointLock()) {
93 | sourceContext.collect(
94 | new FakeKafkaRecord(
95 | nextTimestamp, null, serializedMeasurement, nextPartition));
96 | }
97 | }
98 | } else {
99 | // this source doesn't have any partitions and thus never emits any records
100 | // (and therefore also no watermarks), so we mark this subtask as idle to
101 | // not block watermark forwarding
102 | sourceContext.markAsTemporarilyIdle();
103 |
104 | // wait until this is canceled
105 | final Object waitLock = new Object();
106 | while (!cancelled) {
107 | try {
108 | //noinspection SynchronizationOnLocalVariableOrMethodParameter
109 | synchronized (waitLock) {
110 | waitLock.wait();
111 | }
112 | } catch (InterruptedException e) {
113 | if (cancelled) {
114 | // restore the interrupted state, and fall through the loop
115 | Thread.currentThread().interrupt();
116 | }
117 | }
118 | }
119 | }
120 | }
121 |
122 | private long getTimestampForPartition(int partition) {
123 | return System.currentTimeMillis() - (partition * 50L);
124 | }
125 |
126 | @Override
127 | public void cancel() {
128 | cancelled = true;
129 |
130 | // there will be an interrupt() call to the main thread anyways
131 | }
132 |
133 | @Override
134 | public void snapshotState(final FunctionSnapshotContext context) {
135 | }
136 |
137 | @Override
138 | public void initializeState(final FunctionInitializationContext context) {
139 | }
140 | }
141 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/provided/troubleshoot/GeoUtils.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.provided.troubleshoot;
2 |
3 | import com.ververica.flinktraining.provided.DoNotChangeThis;
4 |
5 | @SuppressWarnings("WeakerAccess")
6 | @DoNotChangeThis
7 | public class GeoUtils {
8 |
9 | // bounding box of the area of the USA
10 | public final static double US_LON_EAST = -66.9326;
11 | public final static double US_LON_WEST = -125.0011;
12 | public final static double US_LAT_NORTH = 49.5904;
13 | public final static double US_LAT_SOUTH = 24.9493;
14 |
15 | /**
16 | * Checks if a location specified by longitude and latitude values is
17 | * within the geo boundaries of the USA.
18 | *
19 | * @param lon longitude of the location to check
20 | * @param lat latitude of the location to check
21 | *
22 | * @return true if the location is within US boundaries, otherwise false.
23 | */
24 | public static boolean isInUS(double lon, double lat) {
25 | return !(lon > US_LON_EAST || lon < US_LON_WEST) &&
26 | !(lat > US_LAT_NORTH || lat < US_LAT_SOUTH);
27 | }
28 |
29 | // bounding box of the area of the USA
30 | public final static double DE_LON_EAST = 15.0419319;
31 | public final static double DE_LON_WEST = 5.8663153;
32 | public final static double DE_LAT_NORTH = 55.099161;
33 | public final static double DE_LAT_SOUTH = 47.2701114;
34 |
35 | /**
36 | * Checks if a location specified by longitude and latitude values is
37 | * within the geo boundaries of Germany.
38 | *
39 | * @param lon longitude of the location to check
40 | * @param lat latitude of the location to check
41 | *
42 | * @return true if the location is within German boundaries, otherwise false.
43 | */
44 | public static boolean isInDE(double lon, double lat) {
45 | return !(lon > DE_LON_EAST || lon < DE_LON_WEST) &&
46 | !(lat > DE_LAT_NORTH || lat < DE_LAT_SOUTH);
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/provided/troubleshoot/MeanGauge.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.provided.troubleshoot;
2 |
3 | import org.apache.flink.metrics.Gauge;
4 | import org.apache.flink.metrics.View;
5 |
6 | import com.ververica.flinktraining.provided.DoNotChangeThis;
7 | import org.apache.commons.math3.stat.descriptive.moment.SecondMoment;
8 |
9 | /**
10 | * Gauge view for determining the mean per time span. Also allows access to min and max metrics via
11 | * the {@link MinGauge} and {@link MaxGauge} wrappers.
12 | */
13 | @DoNotChangeThis
14 | public class MeanGauge implements Gauge, View {
15 |
16 | private SimpleStats stats = new SimpleStats();
17 | private SimpleStats currentStats = new SimpleStats();
18 |
19 | @Override
20 | public void update() {
21 | currentStats = stats.copy();
22 | stats.clear();
23 | }
24 |
25 | public void addValue(double d) {
26 | stats.increment(d);
27 | }
28 |
29 | @Override
30 | public Double getValue() {
31 | return currentStats.getMean();
32 | }
33 |
34 | /**
35 | * Wraps around the {@link MeanGauge} view to get the min
of all reported values.
36 | */
37 | public static class MinGauge implements Gauge {
38 | private final MeanGauge base;
39 |
40 | public MinGauge(MeanGauge base) {
41 | this.base = base;
42 | }
43 |
44 | @Override
45 | public Double getValue() {
46 | return base.currentStats.getMin();
47 | }
48 | }
49 |
50 | /**
51 | * Wraps around the {@link MeanGauge} view to get the max
of all reported values.
52 | */
53 | public static class MaxGauge implements Gauge {
54 | private final MeanGauge base;
55 |
56 | public MaxGauge(MeanGauge base) {
57 | this.base = base;
58 | }
59 |
60 | @Override
61 | public Double getValue() {
62 | return base.currentStats.getMax();
63 | }
64 | }
65 |
66 | /**
67 | * Calculates min, max, mean (first moment), as well as the second moment.
68 | */
69 | private static class SimpleStats extends SecondMoment {
70 | private static final long serialVersionUID = 1L;
71 |
72 | private double min = Double.NaN;
73 | private double max = Double.NaN;
74 |
75 | @Override
76 | public void increment(double d) {
77 | if (d < min || Double.isNaN(min)) {
78 | min = d;
79 | }
80 | if (d > max || Double.isNaN(max)) {
81 | max = d;
82 | }
83 | super.increment(d);
84 | }
85 |
86 | @Override
87 | public SimpleStats copy() {
88 | SimpleStats result = new SimpleStats();
89 | SecondMoment.copy(this, result);
90 | result.min = min;
91 | result.max = max;
92 | return result;
93 | }
94 |
95 | double getMin() {
96 | return min;
97 | }
98 |
99 | double getMax() {
100 | return max;
101 | }
102 |
103 | double getMean() {
104 | return m1;
105 | }
106 | }
107 | }
108 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/provided/troubleshoot/Measurement.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.provided.troubleshoot;
2 |
3 | import com.ververica.flinktraining.provided.DoNotChangeThis;
4 |
5 | import java.util.Objects;
6 |
7 | @DoNotChangeThis
8 | public class Measurement {
9 |
10 | private int sensorId;
11 | private double value;
12 | private String location;
13 | private String measurementInformation;
14 |
15 | public Measurement() {
16 | }
17 |
18 | public Measurement(final int sensorId, final double value, final String location, final String measurementInformation) {
19 | this.sensorId = sensorId;
20 | this.value = value;
21 | this.location = location;
22 | this.measurementInformation = measurementInformation;
23 | }
24 |
25 | public String getMeasurementInformation() {
26 | return measurementInformation;
27 | }
28 |
29 | public void setMeasurementInformation(final String measurementInformation) {
30 | this.measurementInformation = measurementInformation;
31 | }
32 |
33 | public int getSensorId() {
34 | return sensorId;
35 | }
36 |
37 | public void setSensorId(final int sensorId) {
38 | this.sensorId = sensorId;
39 | }
40 |
41 | public double getValue() {
42 | return value;
43 | }
44 |
45 | public void setValue(final double value) {
46 | this.value = value;
47 | }
48 |
49 | public String getLocation() {
50 | return location;
51 | }
52 |
53 | public void setLocation(final String location) {
54 | this.location = location;
55 | }
56 |
57 | @Override
58 | public boolean equals(final Object o) {
59 | if (this == o) {
60 | return true;
61 | }
62 | if (o == null || getClass() != o.getClass()) {
63 | return false;
64 | }
65 | final Measurement that = (Measurement) o;
66 | return sensorId == that.sensorId &&
67 | Double.compare(that.value, value) == 0 &&
68 | Objects.equals(location, that.location) &&
69 | Objects.equals(measurementInformation, that.measurementInformation);
70 | }
71 |
72 | @Override
73 | public int hashCode() {
74 | return Objects.hash(sensorId, value, location, measurementInformation);
75 | }
76 |
77 | @Override
78 | public String toString() {
79 | final StringBuilder sb = new StringBuilder("Measurement{");
80 | sb.append("sensorId=").append(sensorId);
81 | sb.append(", value=").append(value);
82 | sb.append(", location='").append(location).append('\'');
83 | sb.append(", measurementInformation='").append(measurementInformation).append('\'');
84 | sb.append('}');
85 | return sb.toString();
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/provided/troubleshoot/ObjectReuseExtendedMeasurementSource.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.provided.troubleshoot;
2 |
3 | import org.apache.flink.configuration.Configuration;
4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
5 |
6 | import com.ververica.flinktraining.provided.DoNotChangeThis;
7 |
8 | import java.util.SplittableRandom;
9 |
10 | /**
11 | * Artificial source for sensor measurements (temperature and wind speed) of a pre-defined set of
12 | * sensors (per parallel instance) creating measurements for two locations (inside the bounding
13 | * boxes of Germany (DE) and the USA (US)) in SI units (°C and km/h).
14 | */
15 | @SuppressWarnings("WeakerAccess")
16 | @DoNotChangeThis
17 | public class ObjectReuseExtendedMeasurementSource extends RichParallelSourceFunction {
18 |
19 | private static final long serialVersionUID = 1L;
20 |
21 | private static final int NUM_SENSORS = 10_000;
22 |
23 | public static final int LOWER_TEMPERATURE_CELCIUS = -10;
24 | public static final int UPPER_TEMPERATURE_CELCIUS = 35;
25 | public static final int LOWER_WIND_SPEED_KMH = 0;
26 | public static final int UPPER_WIND_SPEED_KMH = 335;
27 |
28 | private volatile boolean running = true;
29 |
30 | private transient ExtendedMeasurement.Sensor[] sensors;
31 | private transient ExtendedMeasurement.Location[] locations;
32 | private transient double[] lastValue;
33 | private transient MeanGauge sourceTemperatureUS;
34 |
35 | @Override
36 | public void open(final Configuration parameters) {
37 | initSensors();
38 |
39 | sourceTemperatureUS = getRuntimeContext().getMetricGroup()
40 | .gauge("sourceTemperatureUSmean", new MeanGauge());
41 | getRuntimeContext().getMetricGroup().gauge(
42 | "sourceTemperatureUSmin", new MeanGauge.MinGauge(sourceTemperatureUS));
43 | getRuntimeContext().getMetricGroup().gauge(
44 | "sourceTemperatureUSmax", new MeanGauge.MaxGauge(sourceTemperatureUS));
45 | }
46 |
47 | @Override
48 | public void run(SourceContext ctx) {
49 | final SplittableRandom rnd = new SplittableRandom();
50 | final Object lock = ctx.getCheckpointLock();
51 |
52 | while (running) {
53 | ExtendedMeasurement event = randomEvent(rnd);
54 |
55 | //noinspection SynchronizationOnLocalVariableOrMethodParameter
56 | synchronized (lock) {
57 | ctx.collect(event);
58 | }
59 | }
60 | }
61 |
62 | @Override
63 | public void cancel() {
64 | running = false;
65 | }
66 |
67 | /**
68 | * Creates sensor metadata that this source instance will work with.
69 | */
70 | private void initSensors() {
71 | final SplittableRandom rnd = new SplittableRandom();
72 | final ExtendedMeasurement.SensorType[] sensorTypes =
73 | ExtendedMeasurement.SensorType.values();
74 |
75 | final int start = getRuntimeContext().getIndexOfThisSubtask() * NUM_SENSORS;
76 | this.sensors = new ExtendedMeasurement.Sensor[NUM_SENSORS];
77 | this.lastValue = new double[NUM_SENSORS];
78 | this.locations = new ExtendedMeasurement.Location[NUM_SENSORS];
79 | for (int i = 0; i < NUM_SENSORS; ++i) {
80 | long sensorId = start + i;
81 | long vendorId = sensorId % 100;
82 | final ExtendedMeasurement.SensorType sensorType =
83 | sensorTypes[(i / 2) % sensorTypes.length];
84 | sensors[i] = new ExtendedMeasurement.Sensor(sensorId, vendorId, sensorType);
85 |
86 | lastValue[i] = randomInitialMeasurementValue(rnd, sensorType);
87 |
88 | // assume that a sensor has a fixed position
89 | locations[i] = randomInitialLocation(rnd, i);
90 | }
91 | }
92 |
93 | /**
94 | * Creates a random measurement value that a sensor will start with.
95 | */
96 | private double randomInitialMeasurementValue(
97 | SplittableRandom rnd,
98 | ExtendedMeasurement.SensorType sensorType) {
99 | switch (sensorType) {
100 | case Temperature:
101 | // -10°C - 35°C
102 | return rnd.nextInt(
103 | (UPPER_TEMPERATURE_CELCIUS - LOWER_TEMPERATURE_CELCIUS) * 10) / 10.0 +
104 | LOWER_TEMPERATURE_CELCIUS;
105 | case Wind:
106 | // 0km/h - 335km/h
107 | return rnd.nextInt((UPPER_WIND_SPEED_KMH - LOWER_WIND_SPEED_KMH) * 10) / 10.0 +
108 | LOWER_WIND_SPEED_KMH;
109 | default:
110 | throw new IllegalStateException("Unknown sensor type: " + sensorType);
111 | }
112 | }
113 |
114 | /**
115 | * Creates a random location for a sensor, distinguishing two bounding boxes: US and DE.
116 | */
117 | private static ExtendedMeasurement.Location randomInitialLocation(SplittableRandom rnd, int i) {
118 | final double longitude;
119 | final double latitude;
120 | // let's assume that no selected region wraps around LON -180/+180
121 | if (i < NUM_SENSORS / 2) {
122 | // in US
123 | longitude = rnd.nextDouble() * (GeoUtils.US_LON_EAST - GeoUtils.US_LON_WEST) + GeoUtils.US_LON_WEST;
124 | latitude = rnd.nextDouble() * (GeoUtils.US_LAT_NORTH - GeoUtils.US_LAT_SOUTH) + GeoUtils.US_LAT_SOUTH;
125 | } else {
126 | // in DE
127 | longitude = rnd.nextDouble() * (GeoUtils.DE_LON_EAST - GeoUtils.DE_LON_WEST) + GeoUtils.DE_LON_WEST;
128 | latitude = rnd.nextDouble() * (GeoUtils.DE_LAT_NORTH - GeoUtils.DE_LAT_SOUTH) + GeoUtils.DE_LAT_SOUTH;
129 | }
130 | double height = rnd.nextDouble() * 3000;
131 | return new ExtendedMeasurement.Location(longitude, latitude, height);
132 | }
133 |
134 | /**
135 | * Creates a randomized sensor value during runtime of the source. Each new value differs
136 | * slightly from the previous value that this sensor had.
137 | */
138 | private ExtendedMeasurement randomEvent(SplittableRandom rnd) {
139 | int randomIdx = rnd.nextInt(sensors.length);
140 | ExtendedMeasurement.Sensor sensor = sensors[randomIdx];
141 | ExtendedMeasurement.Location location = locations[randomIdx];
142 |
143 | long timestamp = System.currentTimeMillis();
144 |
145 | final double value = randomChangeMeasurementValue(
146 | rnd,
147 | sensor.getSensorType(),
148 | location,
149 | lastValue[randomIdx]);
150 |
151 | lastValue[randomIdx] = value;
152 |
153 | final ExtendedMeasurement.MeasurementValue measurement =
154 | new ExtendedMeasurement.MeasurementValue(
155 | value,
156 | (float) (rnd.nextInt(100) - 50) / 10.0f, // +- 5
157 | timestamp);
158 |
159 | return new ExtendedMeasurement(
160 | new ExtendedMeasurement.Sensor(
161 | sensor.getSensorId(), sensor.getVendorId(), sensor.getSensorType()),
162 | new ExtendedMeasurement.Location(
163 | location.getLongitude(), location.getLatitude(), location.getHeight()),
164 | measurement);
165 | }
166 |
167 | /**
168 | * Generates a new sensor value that is +-3 of the old value and reports a custom metric for
169 | * sensor values in the US.
170 | */
171 | private double randomChangeMeasurementValue(
172 | SplittableRandom rnd,
173 | ExtendedMeasurement.SensorType sensorType,
174 | ExtendedMeasurement.Location location,
175 | double lastValue) {
176 | double change = rnd.nextDouble(6) - 3.0; // +- 3
177 | final double value;
178 | switch (sensorType) {
179 | case Temperature:
180 | value = newValueWithinBounds(
181 | lastValue, change, LOWER_TEMPERATURE_CELCIUS, UPPER_TEMPERATURE_CELCIUS);
182 | if (GeoUtils.isInUS(location.getLongitude(), location.getLatitude())) {
183 | sourceTemperatureUS.addValue(value);
184 | }
185 | break;
186 | case Wind:
187 | value = newValueWithinBounds(
188 | lastValue, change, LOWER_WIND_SPEED_KMH, UPPER_WIND_SPEED_KMH);
189 | break;
190 | default:
191 | throw new InternalError("Unknown sensor type: " + sensorType);
192 | }
193 | return value;
194 | }
195 |
196 | /**
197 | * Returns either lastValue + change
(if within the given bounds) or
198 | * lastValue - change
(otherwise).
199 | */
200 | private static double newValueWithinBounds(
201 | double lastValue,
202 | double change,
203 | double lowerLimit,
204 | double upperLimit) {
205 | double value;
206 | if (lastValue + change >= lowerLimit && lastValue + change <= upperLimit) {
207 | value = lastValue + change;
208 | } else {
209 | value = lastValue - change;
210 | }
211 | return value;
212 | }
213 | }
214 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/provided/troubleshoot/SourceUtils.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.provided.troubleshoot;
2 |
3 | import com.fasterxml.jackson.core.JsonProcessingException;
4 | import com.fasterxml.jackson.databind.ObjectMapper;
5 | import com.ververica.flinktraining.provided.DoNotChangeThis;
6 | import org.apache.commons.lang3.RandomStringUtils;
7 | import org.slf4j.Logger;
8 | import org.slf4j.LoggerFactory;
9 |
10 | import java.io.BufferedReader;
11 | import java.io.IOException;
12 | import java.io.InputStream;
13 | import java.io.InputStreamReader;
14 | import java.util.ArrayList;
15 | import java.util.Arrays;
16 | import java.util.List;
17 | import java.util.Random;
18 |
19 | @DoNotChangeThis
20 | public class SourceUtils {
21 |
22 | public static final Logger log = LoggerFactory.getLogger(SourceUtils.class);
23 |
24 | public static final int NUM_OF_MEASUREMENTS = 100_000;
25 | public static final int RANDOM_SEED = 1;
26 | public static final float FAILURE_RATE = 0.0001f;
27 | public static final List IDLE_PARTITIONS = Arrays.asList(0, 4);
28 |
29 | public static FakeKafkaSource createFakeKafkaSource() {
30 | List serializedMeasurements = createSerializedMeasurements();
31 | return new FakeKafkaSource(RANDOM_SEED, FAILURE_RATE, IDLE_PARTITIONS, serializedMeasurements);
32 | }
33 |
34 | private static List createSerializedMeasurements() {
35 | Random rand = new Random(RANDOM_SEED);
36 | ObjectMapper mapper = new ObjectMapper();
37 |
38 | final List locations = readLocationsFromFile();
39 |
40 | List measurements = new ArrayList<>();
41 | for (int i = 0; i < NUM_OF_MEASUREMENTS; i++) {
42 | Measurement nextMeasurement = new Measurement(rand.nextInt(100),
43 | rand.nextDouble() * 100, locations.get(rand.nextInt(locations.size())), RandomStringUtils.randomAlphabetic(30));
44 | try {
45 | measurements.add(mapper.writeValueAsBytes(nextMeasurement));
46 | } catch (JsonProcessingException e) {
47 | log.error("Unable to serialize measurement.", e);
48 | throw new RuntimeException(e);
49 | }
50 | }
51 | return measurements;
52 | }
53 |
54 | private static List readLocationsFromFile() {
55 | List locations = new ArrayList<>();
56 | try (InputStream is = SourceUtils.class.getResourceAsStream("/cities.csv");
57 | BufferedReader br = new BufferedReader(new InputStreamReader(is));) {
58 | String city;
59 | while ((city = br.readLine()) != null) {
60 | locations.add(city);
61 | }
62 | } catch (IOException e) {
63 | log.error("Unable to read cities from file.", e);
64 | throw new RuntimeException(e);
65 | }
66 | return locations;
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/provided/troubleshoot/WeatherUtils.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.provided.troubleshoot;
2 |
3 | import com.ververica.flinktraining.provided.DoNotChangeThis;
4 |
5 | /**
6 | * Various tools to convert units used in weather sensors.
7 | */
8 | @SuppressWarnings("unused")
9 | @DoNotChangeThis
10 | public class WeatherUtils {
11 |
12 | /**
13 | * Converts the given temperature from Fahrenheit to Celcius.
14 | */
15 | public static double fahrenheitToCelcius(double temperatureInFahrenheit) {
16 | return ((temperatureInFahrenheit - 32) * 5.0) / 9.0;
17 | }
18 |
19 | /**
20 | * Converts the given temperature from Celcius to Fahrenheit.
21 | */
22 | public static double celciusToFahrenheit(double celcius) {
23 | return (celcius * 9.0) / 5.0 + 32;
24 | }
25 |
26 | /**
27 | * Miles per hour -> kilometres per hour.
28 | */
29 | public static double mphToKph(double mph) {
30 | return mph * 1.60934;
31 | }
32 |
33 | /**
34 | * Kilometres per hour -> miles per hour
35 | */
36 | public static double kphToMph(double kph) {
37 | return kph / 1.60934;
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/provided/troubleshoot/WindowedMeasurements.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.provided.troubleshoot;
2 |
3 | import com.ververica.flinktraining.provided.DoNotChangeThis;
4 |
5 | import java.util.Objects;
6 |
7 | @DoNotChangeThis
8 | public class WindowedMeasurements {
9 |
10 | private long windowStart;
11 | private long windowEnd;
12 | private String location;
13 | private long eventsPerWindow;
14 | private double sumPerWindow;
15 |
16 | public WindowedMeasurements() {
17 | }
18 |
19 | public WindowedMeasurements(final long windowStart, final long windowEnd, final String location, final long eventsPerWindow, final double sumPerWindow) {
20 | this.windowStart = windowStart;
21 | this.windowEnd = windowEnd;
22 | this.location = location;
23 | this.eventsPerWindow = eventsPerWindow;
24 | this.sumPerWindow = sumPerWindow;
25 | }
26 |
27 | public long getWindowStart() {
28 | return windowStart;
29 | }
30 |
31 | public void setWindowStart(final long windowStart) {
32 | this.windowStart = windowStart;
33 | }
34 |
35 | public long getWindowEnd() {
36 | return windowEnd;
37 | }
38 |
39 | public void setWindowEnd(final long windowEnd) {
40 | this.windowEnd = windowEnd;
41 | }
42 |
43 | public String getLocation() {
44 | return location;
45 | }
46 |
47 | public void setLocation(final String location) {
48 | this.location = location;
49 | }
50 |
51 | public long getEventsPerWindow() {
52 | return eventsPerWindow;
53 | }
54 |
55 | public void setEventsPerWindow(final long eventsPerWindow) {
56 | this.eventsPerWindow = eventsPerWindow;
57 | }
58 |
59 | public double getSumPerWindow() {
60 | return sumPerWindow;
61 | }
62 |
63 | public void setSumPerWindow(final double sumPerWindow) {
64 | this.sumPerWindow = sumPerWindow;
65 | }
66 |
67 | @Override
68 | public boolean equals(final Object o) {
69 | if (this == o) {
70 | return true;
71 | }
72 | if (o == null || getClass() != o.getClass()) {
73 | return false;
74 | }
75 | final WindowedMeasurements that = (WindowedMeasurements) o;
76 | return windowStart == that.windowStart &&
77 | windowEnd == that.windowEnd &&
78 | eventsPerWindow == that.eventsPerWindow &&
79 | Double.compare(that.sumPerWindow, sumPerWindow) == 0 &&
80 | Objects.equals(location, that.location);
81 | }
82 |
83 | @Override
84 | public int hashCode() {
85 | return Objects.hash(windowStart, windowEnd, location, eventsPerWindow, sumPerWindow);
86 | }
87 |
88 | @Override
89 | public String toString() {
90 | final StringBuilder sb = new StringBuilder("WindowedMeasurements{");
91 | sb.append("windowStart=").append(windowStart);
92 | sb.append(", windowEnd=").append(windowEnd);
93 | sb.append(", location='").append(location).append('\'');
94 | sb.append(", eventsPerWindow=").append(eventsPerWindow);
95 | sb.append(", sumPerWindow=").append(sumPerWindow);
96 | sb.append('}');
97 | return sb.toString();
98 | }
99 | }
100 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/SimpleMeasurement.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot;
2 |
3 | import java.util.Objects;
4 |
5 | public class SimpleMeasurement {
6 |
7 | private int sensorId;
8 | private double value;
9 | private String location;
10 |
11 | public SimpleMeasurement() {
12 | }
13 |
14 | public SimpleMeasurement(final int sensorId, final double value, final String location, final String measurementInformation) {
15 | this.sensorId = sensorId;
16 | this.value = value;
17 | this.location = location;
18 | }
19 |
20 | public int getSensorId() {
21 | return sensorId;
22 | }
23 |
24 | public void setSensorId(final int sensorId) {
25 | this.sensorId = sensorId;
26 | }
27 |
28 | public double getValue() {
29 | return value;
30 | }
31 |
32 | public void setValue(final double value) {
33 | this.value = value;
34 | }
35 |
36 | public String getLocation() {
37 | return location;
38 | }
39 |
40 | public void setLocation(final String location) {
41 | this.location = location;
42 | }
43 |
44 | @Override
45 | public boolean equals(final Object o) {
46 | if (this == o) {
47 | return true;
48 | }
49 | if (o == null || getClass() != o.getClass()) {
50 | return false;
51 | }
52 | final SimpleMeasurement
53 | that = (SimpleMeasurement) o;
54 | return sensorId == that.sensorId &&
55 | Double.compare(that.value, value) == 0 &&
56 | Objects.equals(location, that.location);
57 | }
58 |
59 | @Override
60 | public int hashCode() {
61 | return Objects.hash(sensorId, value, location);
62 | }
63 |
64 | @Override
65 | public String toString() {
66 | final StringBuilder sb = new StringBuilder("Measurement{");
67 | sb.append("sensorId=").append(sensorId);
68 | sb.append(", value=").append(value);
69 | sb.append(", location='").append(location).append('\'');
70 | sb.append('}');
71 | return sb.toString();
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/TroubledStreamingJobSolution1.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot;
2 |
3 | import org.apache.flink.api.common.functions.RichFlatMapFunction;
4 | import org.apache.flink.api.java.utils.ParameterTool;
5 | import org.apache.flink.configuration.Configuration;
6 | import org.apache.flink.metrics.Counter;
7 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram;
8 | import org.apache.flink.streaming.api.TimeCharacteristic;
9 | import org.apache.flink.streaming.api.datastream.DataStream;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink;
13 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
14 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
15 | import org.apache.flink.streaming.api.windowing.time.Time;
16 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
17 | import org.apache.flink.util.Collector;
18 | import org.apache.flink.util.OutputTag;
19 |
20 | import com.fasterxml.jackson.databind.DeserializationFeature;
21 | import com.fasterxml.jackson.databind.JsonNode;
22 | import com.fasterxml.jackson.databind.ObjectMapper;
23 | import com.ververica.flinktraining.provided.troubleshoot.FakeKafkaRecord;
24 | import com.ververica.flinktraining.provided.troubleshoot.WindowedMeasurements;
25 | import com.ververica.flinktraining.provided.troubleshoot.SourceUtils;
26 |
27 | import java.io.IOException;
28 | import java.util.concurrent.TimeUnit;
29 |
30 | import static com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJobUtils.createConfiguredEnvironment;
31 |
32 | public class TroubledStreamingJobSolution1 {
33 |
34 | public static void main(String[] args) throws Exception {
35 | ParameterTool parameters = ParameterTool.fromArgs(args);
36 |
37 | final boolean local = parameters.getBoolean("local", false);
38 |
39 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local);
40 |
41 | //Time Characteristics
42 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
43 | env.getConfig().setAutoWatermarkInterval(2000);
44 |
45 | //Checkpointing Configuration
46 | env.enableCheckpointing(5000);
47 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000);
48 |
49 | DataStream sourceStream = env
50 | .addSource(SourceUtils.createFakeKafkaSource())
51 | .name("FakeKafkaSource")
52 | .uid("FakeKafkaSource")
53 | .assignTimestampsAndWatermarks(new MeasurementTSExtractor())
54 | .name("Watermarks")
55 | .uid("Watermarks")
56 | .flatMap(new MeasurementDeserializer())
57 | .name("Deserialization")
58 | .uid("Deserialization");
59 |
60 | OutputTag lateDataTag = new OutputTag("late-data") {
61 | private static final long serialVersionUID = 33513631677208956L;
62 | };
63 |
64 | SingleOutputStreamOperator aggregatedPerLocation = sourceStream
65 | .keyBy(jsonNode -> jsonNode.get("location").asText())
66 | .timeWindow(Time.of(1, TimeUnit.SECONDS))
67 | .sideOutputLateData(lateDataTag)
68 | .process(new MeasurementWindowAggregatingFunction())
69 | .name("WindowedAggregationPerLocation")
70 | .uid("WindowedAggregationPerLocation");
71 |
72 | if (local) {
73 | aggregatedPerLocation.print()
74 | .name("NormalOutput")
75 | .uid("NormalOutput")
76 | .disableChaining();
77 | aggregatedPerLocation.getSideOutput(lateDataTag).printToErr()
78 | .name("LateDataSink")
79 | .uid("LateDataSink")
80 | .disableChaining();
81 | } else {
82 | aggregatedPerLocation.addSink(new DiscardingSink<>())
83 | .name("NormalOutput")
84 | .uid("NormalOutput")
85 | .disableChaining();
86 | aggregatedPerLocation.getSideOutput(lateDataTag).addSink(new DiscardingSink<>())
87 | .name("LateDataSink")
88 | .uid("LateDataSink")
89 | .disableChaining();
90 | }
91 |
92 | env.execute(TroubledStreamingJobSolution1.class.getSimpleName());
93 | }
94 |
95 | /**
96 | * Deserializes the JSON Kafka message.
97 | */
98 | public static class MeasurementDeserializer extends RichFlatMapFunction {
99 | private static final long serialVersionUID = 2L;
100 |
101 | private Counter numInvalidRecords;
102 |
103 | @Override
104 | public void open(final Configuration parameters) throws Exception {
105 | super.open(parameters);
106 | numInvalidRecords = getRuntimeContext().getMetricGroup().counter("numInvalidRecords");
107 | }
108 |
109 | @Override
110 | public void flatMap(final FakeKafkaRecord kafkaRecord, final Collector out) {
111 | final JsonNode node;
112 | try {
113 | node = deserialize(kafkaRecord.getValue());
114 | } catch (IOException e) {
115 | numInvalidRecords.inc();
116 | return;
117 | }
118 | out.collect(node);
119 | }
120 |
121 | private JsonNode deserialize(final byte[] bytes) throws IOException {
122 | return ObjectMapperSingleton.getInstance().readValue(bytes, JsonNode.class);
123 | }
124 | }
125 |
126 | public static class MeasurementTSExtractor
127 | extends BoundedOutOfOrdernessTimestampExtractor {
128 | private static final long serialVersionUID = 1L;
129 |
130 | MeasurementTSExtractor() {
131 | super(Time.of(250, TimeUnit.MILLISECONDS));
132 | }
133 |
134 | @Override
135 | public long extractTimestamp(final FakeKafkaRecord record) {
136 | return record.getTimestamp();
137 | }
138 | }
139 |
140 | public static class MeasurementWindowAggregatingFunction
141 | extends ProcessWindowFunction {
142 | private static final long serialVersionUID = 1L;
143 |
144 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000;
145 |
146 | private transient DescriptiveStatisticsHistogram eventTimeLag;
147 |
148 | MeasurementWindowAggregatingFunction() {
149 | }
150 |
151 | @Override
152 | public void process(
153 | final String location,
154 | final Context context,
155 | final Iterable input,
156 | final Collector out) {
157 |
158 | WindowedMeasurements aggregate = new WindowedMeasurements();
159 | for (JsonNode record : input) {
160 | double result = Double.parseDouble(record.get("value").asText());
161 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result);
162 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1);
163 | }
164 |
165 | final TimeWindow window = context.window();
166 | aggregate.setWindowStart(window.getStart());
167 | aggregate.setWindowEnd(window.getEnd());
168 | aggregate.setLocation(location);
169 |
170 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd());
171 | out.collect(aggregate);
172 | }
173 |
174 | @Override
175 | public void open(Configuration parameters) throws Exception {
176 | super.open(parameters);
177 |
178 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag",
179 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE));
180 | }
181 | }
182 |
183 | private static class ObjectMapperSingleton {
184 | static ObjectMapper getInstance() {
185 | ObjectMapper objectMapper = new ObjectMapper();
186 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
187 | return objectMapper;
188 | }
189 | }
190 | }
191 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/TroubledStreamingJobSolution2.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot;
2 |
3 | import org.apache.flink.api.common.functions.RichFlatMapFunction;
4 | import org.apache.flink.api.java.utils.ParameterTool;
5 | import org.apache.flink.configuration.Configuration;
6 | import org.apache.flink.metrics.Counter;
7 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram;
8 | import org.apache.flink.streaming.api.TimeCharacteristic;
9 | import org.apache.flink.streaming.api.datastream.DataStream;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
13 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink;
14 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
15 | import org.apache.flink.streaming.api.watermark.Watermark;
16 | import org.apache.flink.streaming.api.windowing.time.Time;
17 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
18 | import org.apache.flink.util.Collector;
19 | import org.apache.flink.util.OutputTag;
20 |
21 | import com.fasterxml.jackson.databind.DeserializationFeature;
22 | import com.fasterxml.jackson.databind.JsonNode;
23 | import com.fasterxml.jackson.databind.ObjectMapper;
24 | import com.ververica.flinktraining.provided.troubleshoot.FakeKafkaRecord;
25 | import com.ververica.flinktraining.provided.troubleshoot.WindowedMeasurements;
26 | import com.ververica.flinktraining.provided.troubleshoot.SourceUtils;
27 |
28 | import java.io.IOException;
29 | import java.util.concurrent.TimeUnit;
30 |
31 | import static com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJobUtils.createConfiguredEnvironment;
32 |
33 | public class TroubledStreamingJobSolution2 {
34 |
35 | public static void main(String[] args) throws Exception {
36 | ParameterTool parameters = ParameterTool.fromArgs(args);
37 |
38 | final boolean local = parameters.getBoolean("local", false);
39 |
40 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local);
41 |
42 | //Time Characteristics
43 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
44 | env.getConfig().setAutoWatermarkInterval(2000);
45 |
46 | //Checkpointing Configuration
47 | env.enableCheckpointing(5000);
48 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000);
49 |
50 | DataStream sourceStream = env
51 | .addSource(SourceUtils.createFakeKafkaSource())
52 | .name("FakeKafkaSource")
53 | .uid("FakeKafkaSource")
54 | .assignTimestampsAndWatermarks(
55 | new MeasurementTSExtractor(Time.of(250, TimeUnit.MILLISECONDS),
56 | Time.of(1, TimeUnit.SECONDS)))
57 | .name("Watermarks")
58 | .uid("Watermarks")
59 | .flatMap(new MeasurementDeserializer())
60 | .name("Deserialization")
61 | .uid("Deserialization");
62 |
63 | OutputTag lateDataTag = new OutputTag("late-data") {
64 | private static final long serialVersionUID = 33513631677208956L;
65 | };
66 |
67 | SingleOutputStreamOperator aggregatedPerLocation = sourceStream
68 | .keyBy(jsonNode -> jsonNode.get("location").asText())
69 | .timeWindow(Time.of(1, TimeUnit.SECONDS))
70 | .sideOutputLateData(lateDataTag)
71 | .process(new MeasurementWindowAggregatingFunction())
72 | .name("WindowedAggregationPerLocation")
73 | .uid("WindowedAggregationPerLocation");
74 |
75 | if (local) {
76 | aggregatedPerLocation.print()
77 | .name("NormalOutput")
78 | .uid("NormalOutput")
79 | .disableChaining();
80 | aggregatedPerLocation.getSideOutput(lateDataTag).printToErr()
81 | .name("LateDataSink")
82 | .uid("LateDataSink")
83 | .disableChaining();
84 | } else {
85 | aggregatedPerLocation.addSink(new DiscardingSink<>())
86 | .name("NormalOutput")
87 | .uid("NormalOutput")
88 | .disableChaining();
89 | aggregatedPerLocation.getSideOutput(lateDataTag).addSink(new DiscardingSink<>())
90 | .name("LateDataSink")
91 | .uid("LateDataSink")
92 | .disableChaining();
93 | }
94 |
95 | env.execute(TroubledStreamingJobSolution2.class.getSimpleName());
96 | }
97 |
98 | /**
99 | * Deserializes the JSON Kafka message.
100 | */
101 | public static class MeasurementDeserializer extends RichFlatMapFunction {
102 | private static final long serialVersionUID = 2L;
103 |
104 | private Counter numInvalidRecords;
105 |
106 | @Override
107 | public void open(final Configuration parameters) throws Exception {
108 | super.open(parameters);
109 | numInvalidRecords = getRuntimeContext().getMetricGroup().counter("numInvalidRecords");
110 | }
111 |
112 | @Override
113 | public void flatMap(final FakeKafkaRecord kafkaRecord, final Collector out) {
114 | final JsonNode node;
115 | try {
116 | node = deserialize(kafkaRecord.getValue());
117 | } catch (IOException e) {
118 | numInvalidRecords.inc();
119 | return;
120 | }
121 | out.collect(node);
122 | }
123 |
124 | private JsonNode deserialize(final byte[] bytes) throws IOException {
125 | return ObjectMapperSingleton.getInstance().readValue(bytes, JsonNode.class);
126 | }
127 | }
128 |
129 | public static class MeasurementTSExtractor implements AssignerWithPeriodicWatermarks {
130 | private static final long serialVersionUID = 2L;
131 |
132 | private long currentMaxTimestamp;
133 | private long lastEmittedWatermark = Long.MIN_VALUE;
134 | private long lastRecordProcessingTime;
135 |
136 | private final long maxOutOfOrderness;
137 | private final long idleTimeout;
138 |
139 | MeasurementTSExtractor(Time maxOutOfOrderness, Time idleTimeout) {
140 | if (maxOutOfOrderness.toMilliseconds() < 0) {
141 | throw new RuntimeException("Tried to set the maximum allowed " +
142 | "lateness to " + maxOutOfOrderness +
143 | ". This parameter cannot be negative.");
144 | }
145 |
146 | if (idleTimeout.toMilliseconds() < 0) {
147 | throw new RuntimeException("Tried to set the idle Timeout" + idleTimeout +
148 | ". This parameter cannot be negative.");
149 | }
150 |
151 |
152 | this.maxOutOfOrderness = maxOutOfOrderness.toMilliseconds();
153 | this.idleTimeout = idleTimeout.toMilliseconds();
154 | this.currentMaxTimestamp = Long.MIN_VALUE;
155 | }
156 |
157 | public long getMaxOutOfOrdernessInMillis() {
158 | return maxOutOfOrderness;
159 | }
160 |
161 | @Override
162 | public final Watermark getCurrentWatermark() {
163 |
164 | // if last record was processed more than the idleTimeout in the past, consider this
165 | // source idle and set timestamp to current processing time
166 | long currentProcessingTime = System.currentTimeMillis();
167 | if (lastRecordProcessingTime < currentProcessingTime - idleTimeout) {
168 | this.currentMaxTimestamp = currentProcessingTime;
169 | }
170 |
171 | long potentialWM = this.currentMaxTimestamp - maxOutOfOrderness;
172 | if (potentialWM >= lastEmittedWatermark) {
173 | lastEmittedWatermark = potentialWM;
174 | }
175 | return new Watermark(lastEmittedWatermark);
176 | }
177 |
178 | @Override
179 | public final long extractTimestamp(FakeKafkaRecord element, long previousElementTimestamp) {
180 | lastRecordProcessingTime = System.currentTimeMillis();
181 | long timestamp = element.getTimestamp();
182 | if (timestamp > currentMaxTimestamp) {
183 | currentMaxTimestamp = timestamp;
184 | }
185 | return timestamp;
186 | }
187 | }
188 |
189 | public static class MeasurementWindowAggregatingFunction
190 | extends ProcessWindowFunction {
191 | private static final long serialVersionUID = 1L;
192 |
193 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000;
194 |
195 | private transient DescriptiveStatisticsHistogram eventTimeLag;
196 |
197 | MeasurementWindowAggregatingFunction() {
198 | }
199 |
200 | @Override
201 | public void process(
202 | final String location,
203 | final Context context,
204 | final Iterable input,
205 | final Collector out) {
206 |
207 | WindowedMeasurements aggregate = new WindowedMeasurements();
208 | for (JsonNode record : input) {
209 | double result = Double.parseDouble(record.get("value").asText());
210 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result);
211 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1);
212 | }
213 |
214 | final TimeWindow window = context.window();
215 | aggregate.setWindowStart(window.getStart());
216 | aggregate.setWindowEnd(window.getEnd());
217 | aggregate.setLocation(location);
218 |
219 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd());
220 | out.collect(aggregate);
221 | }
222 |
223 | @Override
224 | public void open(Configuration parameters) throws Exception {
225 | super.open(parameters);
226 |
227 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag",
228 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE));
229 | }
230 | }
231 |
232 | private static class ObjectMapperSingleton {
233 | static ObjectMapper getInstance() {
234 | ObjectMapper objectMapper = new ObjectMapper();
235 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
236 | return objectMapper;
237 | }
238 | }
239 | }
240 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/TroubledStreamingJobSolution31.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot;
2 |
3 | import org.apache.flink.api.common.functions.RichFlatMapFunction;
4 | import org.apache.flink.api.java.utils.ParameterTool;
5 | import org.apache.flink.configuration.Configuration;
6 | import org.apache.flink.metrics.Counter;
7 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram;
8 | import org.apache.flink.streaming.api.TimeCharacteristic;
9 | import org.apache.flink.streaming.api.datastream.DataStream;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
13 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink;
14 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
15 | import org.apache.flink.streaming.api.watermark.Watermark;
16 | import org.apache.flink.streaming.api.windowing.time.Time;
17 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
18 | import org.apache.flink.util.Collector;
19 | import org.apache.flink.util.OutputTag;
20 |
21 | import com.fasterxml.jackson.databind.DeserializationFeature;
22 | import com.fasterxml.jackson.databind.JsonNode;
23 | import com.fasterxml.jackson.databind.ObjectMapper;
24 | import com.ververica.flinktraining.provided.troubleshoot.FakeKafkaRecord;
25 | import com.ververica.flinktraining.provided.troubleshoot.WindowedMeasurements;
26 | import com.ververica.flinktraining.provided.troubleshoot.SourceUtils;
27 |
28 | import java.io.IOException;
29 | import java.util.concurrent.TimeUnit;
30 |
31 | import static com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJobUtils.createConfiguredEnvironment;
32 |
33 | public class TroubledStreamingJobSolution31 {
34 |
35 | public static void main(String[] args) throws Exception {
36 | ParameterTool parameters = ParameterTool.fromArgs(args);
37 |
38 | final boolean local = parameters.getBoolean("local", false);
39 |
40 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local);
41 |
42 | //Time Characteristics
43 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
44 | env.getConfig().setAutoWatermarkInterval(100);
45 |
46 | //Checkpointing Configuration
47 | env.enableCheckpointing(5000);
48 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000);
49 |
50 | DataStream sourceStream = env
51 | .addSource(SourceUtils.createFakeKafkaSource())
52 | .name("FakeKafkaSource")
53 | .uid("FakeKafkaSource")
54 | .assignTimestampsAndWatermarks(
55 | new MeasurementTSExtractor(Time.of(250, TimeUnit.MILLISECONDS),
56 | Time.of(1, TimeUnit.SECONDS)))
57 | .name("Watermarks")
58 | .uid("Watermarks")
59 | .flatMap(new MeasurementDeserializer())
60 | .name("Deserialization")
61 | .uid("Deserialization");
62 |
63 | OutputTag lateDataTag = new OutputTag("late-data") {
64 | private static final long serialVersionUID = 33513631677208956L;
65 | };
66 |
67 | SingleOutputStreamOperator aggregatedPerLocation = sourceStream
68 | .keyBy(jsonNode -> jsonNode.get("location").asText())
69 | .timeWindow(Time.of(1, TimeUnit.SECONDS))
70 | .sideOutputLateData(lateDataTag)
71 | .process(new MeasurementWindowAggregatingFunction())
72 | .name("WindowedAggregationPerLocation")
73 | .uid("WindowedAggregationPerLocation");
74 |
75 | if (local) {
76 | aggregatedPerLocation.print()
77 | .name("NormalOutput")
78 | .uid("NormalOutput")
79 | .disableChaining();
80 | aggregatedPerLocation.getSideOutput(lateDataTag).printToErr()
81 | .name("LateDataSink")
82 | .uid("LateDataSink")
83 | .disableChaining();
84 | } else {
85 | aggregatedPerLocation.addSink(new DiscardingSink<>())
86 | .name("NormalOutput")
87 | .uid("NormalOutput")
88 | .disableChaining();
89 | aggregatedPerLocation.getSideOutput(lateDataTag).addSink(new DiscardingSink<>())
90 | .name("LateDataSink")
91 | .uid("LateDataSink")
92 | .disableChaining();
93 | }
94 |
95 | env.execute(TroubledStreamingJobSolution31.class.getSimpleName());
96 | }
97 |
98 | /**
99 | * Deserializes the JSON Kafka message.
100 | */
101 | public static class MeasurementDeserializer extends RichFlatMapFunction {
102 | private static final long serialVersionUID = 2L;
103 |
104 | private Counter numInvalidRecords;
105 |
106 | @Override
107 | public void open(final Configuration parameters) throws Exception {
108 | super.open(parameters);
109 | numInvalidRecords = getRuntimeContext().getMetricGroup().counter("numInvalidRecords");
110 | }
111 |
112 | @Override
113 | public void flatMap(final FakeKafkaRecord kafkaRecord, final Collector out) {
114 | final JsonNode node;
115 | try {
116 | node = deserialize(kafkaRecord.getValue());
117 | } catch (IOException e) {
118 | numInvalidRecords.inc();
119 | return;
120 | }
121 | out.collect(node);
122 | }
123 |
124 | private JsonNode deserialize(final byte[] bytes) throws IOException {
125 | return ObjectMapperSingleton.getInstance().readValue(bytes, JsonNode.class);
126 | }
127 | }
128 |
129 | public static class MeasurementTSExtractor implements AssignerWithPeriodicWatermarks {
130 | private static final long serialVersionUID = 2L;
131 |
132 | private long currentMaxTimestamp;
133 | private long lastEmittedWatermark = Long.MIN_VALUE;
134 | private long lastRecordProcessingTime;
135 |
136 | private final long maxOutOfOrderness;
137 | private final long idleTimeout;
138 |
139 | MeasurementTSExtractor(Time maxOutOfOrderness, Time idleTimeout) {
140 | if (maxOutOfOrderness.toMilliseconds() < 0) {
141 | throw new RuntimeException("Tried to set the maximum allowed " +
142 | "lateness to " + maxOutOfOrderness +
143 | ". This parameter cannot be negative.");
144 | }
145 |
146 | if (idleTimeout.toMilliseconds() < 0) {
147 | throw new RuntimeException("Tried to set the idle Timeout" + idleTimeout +
148 | ". This parameter cannot be negative.");
149 | }
150 |
151 |
152 | this.maxOutOfOrderness = maxOutOfOrderness.toMilliseconds();
153 | this.idleTimeout = idleTimeout.toMilliseconds();
154 | this.currentMaxTimestamp = Long.MIN_VALUE;
155 | }
156 |
157 | public long getMaxOutOfOrdernessInMillis() {
158 | return maxOutOfOrderness;
159 | }
160 |
161 | @Override
162 | public final Watermark getCurrentWatermark() {
163 |
164 | // if last record was processed more than the idleTimeout in the past, consider this
165 | // source idle and set timestamp to current processing time
166 | long currentProcessingTime = System.currentTimeMillis();
167 | if (lastRecordProcessingTime < currentProcessingTime - idleTimeout) {
168 | this.currentMaxTimestamp = currentProcessingTime;
169 | }
170 |
171 | long potentialWM = this.currentMaxTimestamp - maxOutOfOrderness;
172 | if (potentialWM >= lastEmittedWatermark) {
173 | lastEmittedWatermark = potentialWM;
174 | }
175 | return new Watermark(lastEmittedWatermark);
176 | }
177 |
178 | @Override
179 | public final long extractTimestamp(FakeKafkaRecord element, long previousElementTimestamp) {
180 | lastRecordProcessingTime = System.currentTimeMillis();
181 | long timestamp = element.getTimestamp();
182 | if (timestamp > currentMaxTimestamp) {
183 | currentMaxTimestamp = timestamp;
184 | }
185 | return timestamp;
186 | }
187 | }
188 |
189 | public static class MeasurementWindowAggregatingFunction
190 | extends ProcessWindowFunction {
191 | private static final long serialVersionUID = 1L;
192 |
193 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000;
194 |
195 | private transient DescriptiveStatisticsHistogram eventTimeLag;
196 |
197 | MeasurementWindowAggregatingFunction() {
198 | }
199 |
200 | @Override
201 | public void process(
202 | final String location,
203 | final Context context,
204 | final Iterable input,
205 | final Collector out) {
206 |
207 | WindowedMeasurements aggregate = new WindowedMeasurements();
208 | for (JsonNode record : input) {
209 | double result = Double.parseDouble(record.get("value").asText());
210 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result);
211 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1);
212 | }
213 |
214 | final TimeWindow window = context.window();
215 | aggregate.setWindowStart(window.getStart());
216 | aggregate.setWindowEnd(window.getEnd());
217 | aggregate.setLocation(location);
218 |
219 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd());
220 | out.collect(aggregate);
221 | }
222 |
223 | @Override
224 | public void open(Configuration parameters) throws Exception {
225 | super.open(parameters);
226 |
227 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag",
228 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE));
229 | }
230 | }
231 |
232 | private static class ObjectMapperSingleton {
233 | static ObjectMapper getInstance() {
234 | ObjectMapper objectMapper = new ObjectMapper();
235 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
236 | return objectMapper;
237 | }
238 | }
239 | }
240 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/TroubledStreamingJobSolution32.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot;
2 |
3 | import org.apache.flink.api.common.functions.AggregateFunction;
4 | import org.apache.flink.api.common.functions.RichFlatMapFunction;
5 | import org.apache.flink.api.java.utils.ParameterTool;
6 | import org.apache.flink.configuration.Configuration;
7 | import org.apache.flink.metrics.Counter;
8 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram;
9 | import org.apache.flink.streaming.api.TimeCharacteristic;
10 | import org.apache.flink.streaming.api.datastream.DataStream;
11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
13 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
14 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink;
15 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
16 | import org.apache.flink.streaming.api.watermark.Watermark;
17 | import org.apache.flink.streaming.api.windowing.time.Time;
18 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
19 | import org.apache.flink.util.Collector;
20 | import org.apache.flink.util.OutputTag;
21 |
22 | import com.fasterxml.jackson.databind.DeserializationFeature;
23 | import com.fasterxml.jackson.databind.JsonNode;
24 | import com.fasterxml.jackson.databind.ObjectMapper;
25 | import com.ververica.flinktraining.provided.troubleshoot.FakeKafkaRecord;
26 | import com.ververica.flinktraining.provided.troubleshoot.WindowedMeasurements;
27 | import com.ververica.flinktraining.provided.troubleshoot.SourceUtils;
28 |
29 | import java.io.IOException;
30 | import java.util.concurrent.TimeUnit;
31 |
32 | import static com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJobUtils.createConfiguredEnvironment;
33 |
34 | public class TroubledStreamingJobSolution32 {
35 |
36 | public static void main(String[] args) throws Exception {
37 | ParameterTool parameters = ParameterTool.fromArgs(args);
38 |
39 | final boolean local = parameters.getBoolean("local", false);
40 |
41 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local);
42 |
43 | //Time Characteristics
44 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
45 | env.getConfig().setAutoWatermarkInterval(100);
46 |
47 | //Checkpointing Configuration
48 | env.enableCheckpointing(5000);
49 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000);
50 |
51 | DataStream sourceStream = env
52 | .addSource(SourceUtils.createFakeKafkaSource())
53 | .name("FakeKafkaSource")
54 | .uid("FakeKafkaSource")
55 | .assignTimestampsAndWatermarks(
56 | new MeasurementTSExtractor(Time.of(250, TimeUnit.MILLISECONDS),
57 | Time.of(1, TimeUnit.SECONDS)))
58 | .name("Watermarks")
59 | .uid("Watermarks")
60 | .flatMap(new MeasurementDeserializer())
61 | .name("Deserialization")
62 | .uid("Deserialization");
63 |
64 | OutputTag lateDataTag = new OutputTag("late-data") {
65 | private static final long serialVersionUID = 33513631677208956L;
66 | };
67 |
68 | SingleOutputStreamOperator aggregatedPerLocation = sourceStream
69 | .keyBy(jsonNode -> jsonNode.get("location").asText())
70 | .timeWindow(Time.of(1, TimeUnit.SECONDS))
71 | .sideOutputLateData(lateDataTag)
72 | .aggregate(new MeasurementWindowAggregatingFunction(),
73 | new MeasurementWindowProcessFunction())
74 | .name("WindowedAggregationPerLocation")
75 | .uid("WindowedAggregationPerLocation");
76 |
77 | if (local) {
78 | aggregatedPerLocation.print()
79 | .name("NormalOutput")
80 | .uid("NormalOutput")
81 | .disableChaining();
82 | aggregatedPerLocation.getSideOutput(lateDataTag).printToErr()
83 | .name("LateDataSink")
84 | .uid("LateDataSink")
85 | .disableChaining();
86 | } else {
87 | aggregatedPerLocation.addSink(new DiscardingSink<>())
88 | .name("NormalOutput")
89 | .uid("NormalOutput")
90 | .disableChaining();
91 | aggregatedPerLocation.getSideOutput(lateDataTag).addSink(new DiscardingSink<>())
92 | .name("LateDataSink")
93 | .uid("LateDataSink")
94 | .disableChaining();
95 | }
96 |
97 | env.execute(TroubledStreamingJobSolution32.class.getSimpleName());
98 | }
99 |
100 | /**
101 | * Deserializes the JSON Kafka message.
102 | */
103 | public static class MeasurementDeserializer extends RichFlatMapFunction {
104 | private static final long serialVersionUID = 2L;
105 |
106 | private Counter numInvalidRecords;
107 |
108 | @Override
109 | public void open(final Configuration parameters) throws Exception {
110 | super.open(parameters);
111 | numInvalidRecords = getRuntimeContext().getMetricGroup().counter("numInvalidRecords");
112 | }
113 |
114 | @Override
115 | public void flatMap(final FakeKafkaRecord kafkaRecord, final Collector out) {
116 | final JsonNode node;
117 | try {
118 | node = deserialize(kafkaRecord.getValue());
119 | } catch (IOException e) {
120 | numInvalidRecords.inc();
121 | return;
122 | }
123 | out.collect(node);
124 | }
125 |
126 | private JsonNode deserialize(final byte[] bytes) throws IOException {
127 | return ObjectMapperSingleton.getInstance().readValue(bytes, JsonNode.class);
128 | }
129 | }
130 |
131 | public static class MeasurementTSExtractor implements AssignerWithPeriodicWatermarks {
132 | private static final long serialVersionUID = 2L;
133 |
134 | private long currentMaxTimestamp;
135 | private long lastEmittedWatermark = Long.MIN_VALUE;
136 | private long lastRecordProcessingTime;
137 |
138 | private final long maxOutOfOrderness;
139 | private final long idleTimeout;
140 |
141 | MeasurementTSExtractor(Time maxOutOfOrderness, Time idleTimeout) {
142 | if (maxOutOfOrderness.toMilliseconds() < 0) {
143 | throw new RuntimeException("Tried to set the maximum allowed " +
144 | "lateness to " + maxOutOfOrderness +
145 | ". This parameter cannot be negative.");
146 | }
147 |
148 | if (idleTimeout.toMilliseconds() < 0) {
149 | throw new RuntimeException("Tried to set the idle Timeout" + idleTimeout +
150 | ". This parameter cannot be negative.");
151 | }
152 |
153 |
154 | this.maxOutOfOrderness = maxOutOfOrderness.toMilliseconds();
155 | this.idleTimeout = idleTimeout.toMilliseconds();
156 | this.currentMaxTimestamp = Long.MIN_VALUE;
157 | }
158 |
159 | public long getMaxOutOfOrdernessInMillis() {
160 | return maxOutOfOrderness;
161 | }
162 |
163 | @Override
164 | public final Watermark getCurrentWatermark() {
165 |
166 | // if last record was processed more than the idleTimeout in the past, consider this
167 | // source idle and set timestamp to current processing time
168 | long currentProcessingTime = System.currentTimeMillis();
169 | if (lastRecordProcessingTime < currentProcessingTime - idleTimeout) {
170 | this.currentMaxTimestamp = currentProcessingTime;
171 | }
172 |
173 | long potentialWM = this.currentMaxTimestamp - maxOutOfOrderness;
174 | if (potentialWM >= lastEmittedWatermark) {
175 | lastEmittedWatermark = potentialWM;
176 | }
177 | return new Watermark(lastEmittedWatermark);
178 | }
179 |
180 | @Override
181 | public final long extractTimestamp(FakeKafkaRecord element, long previousElementTimestamp) {
182 | lastRecordProcessingTime = System.currentTimeMillis();
183 | long timestamp = element.getTimestamp();
184 | if (timestamp > currentMaxTimestamp) {
185 | currentMaxTimestamp = timestamp;
186 | }
187 | return timestamp;
188 | }
189 | }
190 |
191 | public static class MeasurementWindowAggregatingFunction
192 | implements AggregateFunction {
193 | private static final long serialVersionUID = 2L;
194 |
195 | MeasurementWindowAggregatingFunction() {}
196 |
197 | @Override
198 | public WindowedMeasurements createAccumulator() {
199 | return new WindowedMeasurements();
200 | }
201 |
202 | @Override
203 | public WindowedMeasurements add(final JsonNode record, final WindowedMeasurements aggregate) {
204 | double result = Double.parseDouble(record.get("value").asText());
205 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result);
206 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1);
207 | return aggregate;
208 | }
209 |
210 | @Override
211 | public WindowedMeasurements getResult(final WindowedMeasurements windowedMeasurements) {
212 | return windowedMeasurements;
213 | }
214 |
215 | @Override
216 | public WindowedMeasurements merge(final WindowedMeasurements agg1, final WindowedMeasurements agg2) {
217 | agg2.setEventsPerWindow(agg1.getEventsPerWindow() + agg2.getEventsPerWindow());
218 | agg2.setSumPerWindow(agg1.getSumPerWindow() + agg2.getSumPerWindow());
219 | return agg2;
220 | }
221 | }
222 |
223 | public static class MeasurementWindowProcessFunction
224 | extends ProcessWindowFunction {
225 | private static final long serialVersionUID = 1L;
226 |
227 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000;
228 |
229 | private transient DescriptiveStatisticsHistogram eventTimeLag;
230 |
231 | MeasurementWindowProcessFunction() {
232 | }
233 |
234 | @Override
235 | public void process(
236 | final String location,
237 | final Context context,
238 | final Iterable input,
239 | final Collector out) {
240 |
241 | // Windows with pre-aggregation only forward the final to the WindowFunction
242 | WindowedMeasurements aggregate = input.iterator().next();
243 |
244 | final TimeWindow window = context.window();
245 | aggregate.setWindowStart(window.getStart());
246 | aggregate.setWindowEnd(window.getEnd());
247 | aggregate.setLocation(location);
248 |
249 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd());
250 | out.collect(aggregate);
251 | }
252 |
253 | @Override
254 | public void open(Configuration parameters) throws Exception {
255 | super.open(parameters);
256 |
257 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag",
258 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE));
259 | }
260 | }
261 |
262 | private static class ObjectMapperSingleton {
263 | static ObjectMapper getInstance() {
264 | ObjectMapper objectMapper = new ObjectMapper();
265 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
266 | return objectMapper;
267 | }
268 | }
269 | }
270 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/TroubledStreamingJobSolution33.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot;
2 |
3 | import org.apache.flink.api.common.functions.AggregateFunction;
4 | import org.apache.flink.api.common.functions.RichFlatMapFunction;
5 | import org.apache.flink.api.java.utils.ParameterTool;
6 | import org.apache.flink.configuration.Configuration;
7 | import org.apache.flink.metrics.Counter;
8 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram;
9 | import org.apache.flink.streaming.api.TimeCharacteristic;
10 | import org.apache.flink.streaming.api.datastream.DataStream;
11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
13 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
14 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink;
15 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
16 | import org.apache.flink.streaming.api.watermark.Watermark;
17 | import org.apache.flink.streaming.api.windowing.time.Time;
18 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
19 | import org.apache.flink.util.Collector;
20 | import org.apache.flink.util.OutputTag;
21 |
22 | import com.fasterxml.jackson.databind.DeserializationFeature;
23 | import com.fasterxml.jackson.databind.JsonNode;
24 | import com.fasterxml.jackson.databind.ObjectMapper;
25 | import com.ververica.flinktraining.provided.troubleshoot.FakeKafkaRecord;
26 | import com.ververica.flinktraining.provided.troubleshoot.WindowedMeasurements;
27 | import com.ververica.flinktraining.provided.troubleshoot.SourceUtils;
28 |
29 | import java.io.IOException;
30 | import java.util.concurrent.TimeUnit;
31 |
32 | import static com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJobUtils.createConfiguredEnvironment;
33 |
34 | public class TroubledStreamingJobSolution33 {
35 |
36 | public static void main(String[] args) throws Exception {
37 | ParameterTool parameters = ParameterTool.fromArgs(args);
38 |
39 | final boolean local = parameters.getBoolean("local", false);
40 |
41 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local);
42 |
43 | //Time Characteristics
44 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
45 | env.getConfig().setAutoWatermarkInterval(100);
46 | env.setBufferTimeout(10);
47 |
48 | //Checkpointing Configuration
49 | env.enableCheckpointing(5000);
50 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000);
51 |
52 | DataStream sourceStream = env
53 | .addSource(SourceUtils.createFakeKafkaSource())
54 | .name("FakeKafkaSource")
55 | .uid("FakeKafkaSource")
56 | .assignTimestampsAndWatermarks(
57 | new MeasurementTSExtractor(Time.of(250, TimeUnit.MILLISECONDS),
58 | Time.of(1, TimeUnit.SECONDS)))
59 | .name("Watermarks")
60 | .uid("Watermarks")
61 | .flatMap(new MeasurementDeserializer())
62 | .name("Deserialization")
63 | .uid("Deserialization");
64 |
65 | OutputTag lateDataTag = new OutputTag("late-data") {
66 | private static final long serialVersionUID = 33513631677208956L;
67 | };
68 |
69 | SingleOutputStreamOperator aggregatedPerLocation = sourceStream
70 | .keyBy(jsonNode -> jsonNode.get("location").asText())
71 | .timeWindow(Time.of(1, TimeUnit.SECONDS))
72 | .sideOutputLateData(lateDataTag)
73 | .aggregate(new MeasurementWindowAggregatingFunction(),
74 | new MeasurementWindowProcessFunction())
75 | .name("WindowedAggregationPerLocation")
76 | .uid("WindowedAggregationPerLocation");
77 |
78 | if (local) {
79 | aggregatedPerLocation.print()
80 | .name("NormalOutput")
81 | .uid("NormalOutput")
82 | .disableChaining();
83 | aggregatedPerLocation.getSideOutput(lateDataTag).printToErr()
84 | .name("LateDataSink")
85 | .uid("LateDataSink")
86 | .disableChaining();
87 | } else {
88 | aggregatedPerLocation.addSink(new DiscardingSink<>())
89 | .name("NormalOutput")
90 | .uid("NormalOutput")
91 | .disableChaining();
92 | aggregatedPerLocation.getSideOutput(lateDataTag).addSink(new DiscardingSink<>())
93 | .name("LateDataSink")
94 | .uid("LateDataSink")
95 | .disableChaining();
96 | }
97 |
98 | env.execute(TroubledStreamingJobSolution33.class.getSimpleName());
99 | }
100 |
101 | /**
102 | * Deserializes the JSON Kafka message.
103 | */
104 | public static class MeasurementDeserializer extends RichFlatMapFunction {
105 | private static final long serialVersionUID = 2L;
106 |
107 | private Counter numInvalidRecords;
108 |
109 | @Override
110 | public void open(final Configuration parameters) throws Exception {
111 | super.open(parameters);
112 | numInvalidRecords = getRuntimeContext().getMetricGroup().counter("numInvalidRecords");
113 | }
114 |
115 | @Override
116 | public void flatMap(final FakeKafkaRecord kafkaRecord, final Collector out) {
117 | final JsonNode node;
118 | try {
119 | node = deserialize(kafkaRecord.getValue());
120 | } catch (IOException e) {
121 | numInvalidRecords.inc();
122 | return;
123 | }
124 | out.collect(node);
125 | }
126 |
127 | private JsonNode deserialize(final byte[] bytes) throws IOException {
128 | return ObjectMapperSingleton.getInstance().readValue(bytes, JsonNode.class);
129 | }
130 | }
131 |
132 | public static class MeasurementTSExtractor implements AssignerWithPeriodicWatermarks {
133 | private static final long serialVersionUID = 2L;
134 |
135 | private long currentMaxTimestamp;
136 | private long lastEmittedWatermark = Long.MIN_VALUE;
137 | private long lastRecordProcessingTime;
138 |
139 | private final long maxOutOfOrderness;
140 | private final long idleTimeout;
141 |
142 | MeasurementTSExtractor(Time maxOutOfOrderness, Time idleTimeout) {
143 | if (maxOutOfOrderness.toMilliseconds() < 0) {
144 | throw new RuntimeException("Tried to set the maximum allowed " +
145 | "lateness to " + maxOutOfOrderness +
146 | ". This parameter cannot be negative.");
147 | }
148 |
149 | if (idleTimeout.toMilliseconds() < 0) {
150 | throw new RuntimeException("Tried to set the idle Timeout" + idleTimeout +
151 | ". This parameter cannot be negative.");
152 | }
153 |
154 |
155 | this.maxOutOfOrderness = maxOutOfOrderness.toMilliseconds();
156 | this.idleTimeout = idleTimeout.toMilliseconds();
157 | this.currentMaxTimestamp = Long.MIN_VALUE;
158 | }
159 |
160 | public long getMaxOutOfOrdernessInMillis() {
161 | return maxOutOfOrderness;
162 | }
163 |
164 | @Override
165 | public final Watermark getCurrentWatermark() {
166 |
167 | // if last record was processed more than the idleTimeout in the past, consider this
168 | // source idle and set timestamp to current processing time
169 | long currentProcessingTime = System.currentTimeMillis();
170 | if (lastRecordProcessingTime < currentProcessingTime - idleTimeout) {
171 | this.currentMaxTimestamp = currentProcessingTime;
172 | }
173 |
174 | long potentialWM = this.currentMaxTimestamp - maxOutOfOrderness;
175 | if (potentialWM >= lastEmittedWatermark) {
176 | lastEmittedWatermark = potentialWM;
177 | }
178 | return new Watermark(lastEmittedWatermark);
179 | }
180 |
181 | @Override
182 | public final long extractTimestamp(FakeKafkaRecord element, long previousElementTimestamp) {
183 | lastRecordProcessingTime = System.currentTimeMillis();
184 | long timestamp = element.getTimestamp();
185 | if (timestamp > currentMaxTimestamp) {
186 | currentMaxTimestamp = timestamp;
187 | }
188 | return timestamp;
189 | }
190 | }
191 |
192 | public static class MeasurementWindowAggregatingFunction
193 | implements AggregateFunction {
194 | private static final long serialVersionUID = 2L;
195 |
196 | public MeasurementWindowAggregatingFunction() {}
197 |
198 | @Override
199 | public WindowedMeasurements createAccumulator() {
200 | return new WindowedMeasurements();
201 | }
202 |
203 | @Override
204 | public WindowedMeasurements add(final JsonNode record, final WindowedMeasurements aggregate) {
205 | double result = Double.parseDouble(record.get("value").asText());
206 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result);
207 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1);
208 | return aggregate;
209 | }
210 |
211 | @Override
212 | public WindowedMeasurements getResult(final WindowedMeasurements windowedMeasurements) {
213 | return windowedMeasurements;
214 | }
215 |
216 | @Override
217 | public WindowedMeasurements merge(final WindowedMeasurements agg1, final WindowedMeasurements agg2) {
218 | agg2.setEventsPerWindow(agg1.getEventsPerWindow() + agg2.getEventsPerWindow());
219 | agg2.setSumPerWindow(agg1.getSumPerWindow() + agg2.getSumPerWindow());
220 | return agg2;
221 | }
222 | }
223 |
224 | public static class MeasurementWindowProcessFunction
225 | extends ProcessWindowFunction {
226 | private static final long serialVersionUID = 1L;
227 |
228 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000;
229 |
230 | private transient DescriptiveStatisticsHistogram eventTimeLag;
231 |
232 | MeasurementWindowProcessFunction() {
233 | }
234 |
235 | @Override
236 | public void process(
237 | final String location,
238 | final Context context,
239 | final Iterable input,
240 | final Collector out) {
241 |
242 | // Windows with pre-aggregation only forward the final to the WindowFunction
243 | WindowedMeasurements aggregate = input.iterator().next();
244 |
245 | final TimeWindow window = context.window();
246 | aggregate.setWindowStart(window.getStart());
247 | aggregate.setWindowEnd(window.getEnd());
248 | aggregate.setLocation(location);
249 |
250 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd());
251 | out.collect(aggregate);
252 | }
253 |
254 | @Override
255 | public void open(Configuration parameters) throws Exception {
256 | super.open(parameters);
257 |
258 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag",
259 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE));
260 | }
261 | }
262 |
263 | private static class ObjectMapperSingleton {
264 | static ObjectMapper getInstance() {
265 | ObjectMapper objectMapper = new ObjectMapper();
266 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
267 | return objectMapper;
268 | }
269 | }
270 | }
271 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/TroubledStreamingJobSolution41.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot;
2 |
3 | import org.apache.flink.api.common.functions.AggregateFunction;
4 | import org.apache.flink.api.common.functions.RichFlatMapFunction;
5 | import org.apache.flink.api.java.utils.ParameterTool;
6 | import org.apache.flink.configuration.Configuration;
7 | import org.apache.flink.metrics.Counter;
8 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram;
9 | import org.apache.flink.streaming.api.TimeCharacteristic;
10 | import org.apache.flink.streaming.api.datastream.DataStream;
11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
13 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
14 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink;
15 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
16 | import org.apache.flink.streaming.api.watermark.Watermark;
17 | import org.apache.flink.streaming.api.windowing.time.Time;
18 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
19 | import org.apache.flink.util.Collector;
20 | import org.apache.flink.util.OutputTag;
21 |
22 | import com.fasterxml.jackson.databind.DeserializationFeature;
23 | import com.fasterxml.jackson.databind.ObjectMapper;
24 | import com.ververica.flinktraining.provided.troubleshoot.FakeKafkaRecord;
25 | import com.ververica.flinktraining.provided.troubleshoot.Measurement;
26 | import com.ververica.flinktraining.provided.troubleshoot.WindowedMeasurements;
27 | import com.ververica.flinktraining.provided.troubleshoot.SourceUtils;
28 |
29 | import java.io.IOException;
30 | import java.util.concurrent.TimeUnit;
31 |
32 | import static com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJobUtils.createConfiguredEnvironment;
33 |
34 | public class TroubledStreamingJobSolution41 {
35 |
36 | public static void main(String[] args) throws Exception {
37 | ParameterTool parameters = ParameterTool.fromArgs(args);
38 |
39 | final boolean local = parameters.getBoolean("local", false);
40 |
41 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local);
42 |
43 | //Time Characteristics
44 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
45 | env.getConfig().setAutoWatermarkInterval(100);
46 | env.setBufferTimeout(10);
47 |
48 | //Checkpointing Configuration
49 | env.enableCheckpointing(5000);
50 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000);
51 |
52 | DataStream sourceStream = env
53 | .addSource(SourceUtils.createFakeKafkaSource())
54 | .name("FakeKafkaSource")
55 | .uid("FakeKafkaSource")
56 | .assignTimestampsAndWatermarks(
57 | new MeasurementTSExtractor(Time.of(250, TimeUnit.MILLISECONDS),
58 | Time.of(1, TimeUnit.SECONDS)))
59 | .name("Watermarks")
60 | .uid("Watermarks")
61 | .flatMap(new MeasurementDeserializer())
62 | .name("Deserialization")
63 | .uid("Deserialization");
64 |
65 | OutputTag lateDataTag = new OutputTag("late-data") {
66 | private static final long serialVersionUID = 33513631677208956L;
67 | };
68 |
69 | SingleOutputStreamOperator aggregatedPerLocation = sourceStream
70 | .keyBy(Measurement::getLocation)
71 | .timeWindow(Time.of(1, TimeUnit.SECONDS))
72 | .sideOutputLateData(lateDataTag)
73 | .aggregate(new MeasurementWindowAggregatingFunction(),
74 | new MeasurementWindowProcessFunction())
75 | .name("WindowedAggregationPerLocation")
76 | .uid("WindowedAggregationPerLocation");
77 |
78 | if (local) {
79 | aggregatedPerLocation.print()
80 | .name("NormalOutput")
81 | .uid("NormalOutput")
82 | .disableChaining();
83 | aggregatedPerLocation.getSideOutput(lateDataTag).printToErr()
84 | .name("LateDataSink")
85 | .uid("LateDataSink")
86 | .disableChaining();
87 | } else {
88 | aggregatedPerLocation.addSink(new DiscardingSink<>())
89 | .name("NormalOutput")
90 | .uid("NormalOutput")
91 | .disableChaining();
92 | aggregatedPerLocation.getSideOutput(lateDataTag).addSink(new DiscardingSink<>())
93 | .name("LateDataSink")
94 | .uid("LateDataSink")
95 | .disableChaining();
96 | }
97 |
98 | env.execute(TroubledStreamingJobSolution41.class.getSimpleName());
99 | }
100 |
101 | /**
102 | * Deserializes the JSON Kafka message.
103 | */
104 | public static class MeasurementDeserializer extends RichFlatMapFunction {
105 | private static final long serialVersionUID = 3L;
106 |
107 | private Counter numInvalidRecords;
108 |
109 | @Override
110 | public void open(final Configuration parameters) throws Exception {
111 | super.open(parameters);
112 | numInvalidRecords = getRuntimeContext().getMetricGroup().counter("numInvalidRecords");
113 | }
114 |
115 | @Override
116 | public void flatMap(final FakeKafkaRecord kafkaRecord, final Collector out) {
117 | final Measurement node;
118 | try {
119 | node = deserialize(kafkaRecord.getValue());
120 | } catch (IOException e) {
121 | numInvalidRecords.inc();
122 | return;
123 | }
124 | out.collect(node);
125 | }
126 |
127 | private Measurement deserialize(final byte[] bytes) throws IOException {
128 | return ObjectMapperSingleton.getInstance().readValue(bytes, Measurement.class);
129 | }
130 | }
131 |
132 | public static class MeasurementTSExtractor implements AssignerWithPeriodicWatermarks {
133 | private static final long serialVersionUID = 2L;
134 |
135 | private long currentMaxTimestamp;
136 | private long lastEmittedWatermark = Long.MIN_VALUE;
137 | private long lastRecordProcessingTime;
138 |
139 | private final long maxOutOfOrderness;
140 | private final long idleTimeout;
141 |
142 | MeasurementTSExtractor(Time maxOutOfOrderness, Time idleTimeout) {
143 | if (maxOutOfOrderness.toMilliseconds() < 0) {
144 | throw new RuntimeException("Tried to set the maximum allowed " +
145 | "lateness to " + maxOutOfOrderness +
146 | ". This parameter cannot be negative.");
147 | }
148 |
149 | if (idleTimeout.toMilliseconds() < 0) {
150 | throw new RuntimeException("Tried to set the idle Timeout" + idleTimeout +
151 | ". This parameter cannot be negative.");
152 | }
153 |
154 |
155 | this.maxOutOfOrderness = maxOutOfOrderness.toMilliseconds();
156 | this.idleTimeout = idleTimeout.toMilliseconds();
157 | this.currentMaxTimestamp = Long.MIN_VALUE;
158 | }
159 |
160 | public long getMaxOutOfOrdernessInMillis() {
161 | return maxOutOfOrderness;
162 | }
163 |
164 | @Override
165 | public final Watermark getCurrentWatermark() {
166 |
167 | // if last record was processed more than the idleTimeout in the past, consider this
168 | // source idle and set timestamp to current processing time
169 | long currentProcessingTime = System.currentTimeMillis();
170 | if (lastRecordProcessingTime < currentProcessingTime - idleTimeout) {
171 | this.currentMaxTimestamp = currentProcessingTime;
172 | }
173 |
174 | long potentialWM = this.currentMaxTimestamp - maxOutOfOrderness;
175 | if (potentialWM >= lastEmittedWatermark) {
176 | lastEmittedWatermark = potentialWM;
177 | }
178 | return new Watermark(lastEmittedWatermark);
179 | }
180 |
181 | @Override
182 | public final long extractTimestamp(FakeKafkaRecord element, long previousElementTimestamp) {
183 | lastRecordProcessingTime = System.currentTimeMillis();
184 | long timestamp = element.getTimestamp();
185 | if (timestamp > currentMaxTimestamp) {
186 | currentMaxTimestamp = timestamp;
187 | }
188 | return timestamp;
189 | }
190 | }
191 |
192 | public static class MeasurementWindowAggregatingFunction
193 | implements AggregateFunction {
194 | private static final long serialVersionUID = -1083906142198231377L;
195 |
196 | public MeasurementWindowAggregatingFunction() {}
197 |
198 | @Override
199 | public WindowedMeasurements createAccumulator() {
200 | return new WindowedMeasurements();
201 | }
202 |
203 | @Override
204 | public WindowedMeasurements add(final Measurement record, final WindowedMeasurements aggregate) {
205 | double result = record.getValue();
206 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result);
207 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1);
208 | return aggregate;
209 | }
210 |
211 | @Override
212 | public WindowedMeasurements getResult(final WindowedMeasurements windowedMeasurements) {
213 | return windowedMeasurements;
214 | }
215 |
216 | @Override
217 | public WindowedMeasurements merge(final WindowedMeasurements agg1, final WindowedMeasurements agg2) {
218 | agg2.setEventsPerWindow(agg1.getEventsPerWindow() + agg2.getEventsPerWindow());
219 | agg2.setSumPerWindow(agg1.getSumPerWindow() + agg2.getSumPerWindow());
220 | return agg2;
221 | }
222 | }
223 |
224 | public static class MeasurementWindowProcessFunction
225 | extends ProcessWindowFunction {
226 | private static final long serialVersionUID = 1L;
227 |
228 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000;
229 |
230 | private transient DescriptiveStatisticsHistogram eventTimeLag;
231 |
232 | MeasurementWindowProcessFunction() {
233 | }
234 |
235 | @Override
236 | public void process(
237 | final String location,
238 | final Context context,
239 | final Iterable input,
240 | final Collector out) {
241 |
242 | // Windows with pre-aggregation only forward the final to the WindowFunction
243 | WindowedMeasurements aggregate = input.iterator().next();
244 |
245 | final TimeWindow window = context.window();
246 | aggregate.setWindowStart(window.getStart());
247 | aggregate.setWindowEnd(window.getEnd());
248 | aggregate.setLocation(location);
249 |
250 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd());
251 | out.collect(aggregate);
252 | }
253 |
254 | @Override
255 | public void open(Configuration parameters) throws Exception {
256 | super.open(parameters);
257 |
258 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag",
259 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE));
260 | }
261 | }
262 |
263 | private static class ObjectMapperSingleton {
264 | static ObjectMapper getInstance() {
265 | ObjectMapper objectMapper = new ObjectMapper();
266 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
267 | return objectMapper;
268 | }
269 | }
270 | }
271 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/TroubledStreamingJobSolution42.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot;
2 |
3 | import org.apache.flink.api.common.functions.AggregateFunction;
4 | import org.apache.flink.api.common.functions.RichFlatMapFunction;
5 | import org.apache.flink.api.java.utils.ParameterTool;
6 | import org.apache.flink.configuration.Configuration;
7 | import org.apache.flink.metrics.Counter;
8 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram;
9 | import org.apache.flink.streaming.api.TimeCharacteristic;
10 | import org.apache.flink.streaming.api.datastream.DataStream;
11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
13 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
14 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink;
15 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
16 | import org.apache.flink.streaming.api.watermark.Watermark;
17 | import org.apache.flink.streaming.api.windowing.time.Time;
18 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
19 | import org.apache.flink.util.Collector;
20 | import org.apache.flink.util.OutputTag;
21 |
22 | import com.fasterxml.jackson.databind.DeserializationFeature;
23 | import com.fasterxml.jackson.databind.ObjectMapper;
24 | import com.ververica.flinktraining.provided.troubleshoot.FakeKafkaRecord;
25 | import com.ververica.flinktraining.provided.troubleshoot.Measurement;
26 | import com.ververica.flinktraining.provided.troubleshoot.WindowedMeasurements;
27 | import com.ververica.flinktraining.provided.troubleshoot.SourceUtils;
28 |
29 | import java.io.IOException;
30 | import java.util.concurrent.TimeUnit;
31 |
32 | import static com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJobUtils.createConfiguredEnvironment;
33 |
34 | public class TroubledStreamingJobSolution42 {
35 |
36 | public static void main(String[] args) throws Exception {
37 | ParameterTool parameters = ParameterTool.fromArgs(args);
38 |
39 | final boolean local = parameters.getBoolean("local", false);
40 |
41 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local);
42 |
43 | //Time Characteristics
44 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
45 | env.getConfig().setAutoWatermarkInterval(100);
46 | env.setBufferTimeout(10);
47 |
48 | //Checkpointing Configuration
49 | env.enableCheckpointing(5000);
50 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000);
51 |
52 | DataStream sourceStream = env
53 | .addSource(SourceUtils.createFakeKafkaSource())
54 | .name("FakeKafkaSource")
55 | .uid("FakeKafkaSource")
56 | .assignTimestampsAndWatermarks(
57 | new MeasurementTSExtractor(Time.of(250, TimeUnit.MILLISECONDS),
58 | Time.of(1, TimeUnit.SECONDS)))
59 | .name("Watermarks")
60 | .uid("Watermarks")
61 | .flatMap(new MeasurementDeserializer())
62 | .name("Deserialization")
63 | .uid("Deserialization");
64 |
65 | OutputTag lateDataTag = new OutputTag("late-data") {
66 | private static final long serialVersionUID = 33513631677208956L;
67 | };
68 |
69 | SingleOutputStreamOperator aggregatedPerLocation = sourceStream
70 | .keyBy(Measurement::getLocation)
71 | .timeWindow(Time.of(1, TimeUnit.SECONDS))
72 | .sideOutputLateData(lateDataTag)
73 | .aggregate(new MeasurementWindowAggregatingFunction(),
74 | new MeasurementWindowProcessFunction())
75 | .name("WindowedAggregationPerLocation")
76 | .uid("WindowedAggregationPerLocation");
77 |
78 | if (local) {
79 | aggregatedPerLocation.print()
80 | .name("NormalOutput")
81 | .uid("NormalOutput")
82 | .disableChaining();
83 | aggregatedPerLocation.getSideOutput(lateDataTag).printToErr()
84 | .name("LateDataSink")
85 | .uid("LateDataSink")
86 | .disableChaining();
87 | } else {
88 | aggregatedPerLocation.addSink(new DiscardingSink<>())
89 | .name("NormalOutput")
90 | .uid("NormalOutput")
91 | .disableChaining();
92 | aggregatedPerLocation.getSideOutput(lateDataTag).addSink(new DiscardingSink<>())
93 | .name("LateDataSink")
94 | .uid("LateDataSink")
95 | .disableChaining();
96 | }
97 |
98 | env.execute(TroubledStreamingJobSolution42.class.getSimpleName());
99 | }
100 |
101 | /**
102 | * Deserializes the JSON Kafka message.
103 | */
104 | public static class MeasurementDeserializer extends RichFlatMapFunction {
105 | private static final long serialVersionUID = 3L;
106 |
107 | private Counter numInvalidRecords;
108 | private transient ObjectMapper instance;
109 |
110 | @Override
111 | public void open(final Configuration parameters) throws Exception {
112 | super.open(parameters);
113 | numInvalidRecords = getRuntimeContext().getMetricGroup().counter("numInvalidRecords");
114 | instance = createObjectMapper();
115 | }
116 |
117 | @Override
118 | public void flatMap(final FakeKafkaRecord kafkaRecord, final Collector out) {
119 | final Measurement node;
120 | try {
121 | node = deserialize(kafkaRecord.getValue());
122 | } catch (IOException e) {
123 | numInvalidRecords.inc();
124 | return;
125 | }
126 | out.collect(node);
127 | }
128 |
129 | private Measurement deserialize(final byte[] bytes) throws IOException {
130 | return instance.readValue(bytes, Measurement.class);
131 | }
132 | }
133 |
134 | public static class MeasurementTSExtractor implements AssignerWithPeriodicWatermarks {
135 | private static final long serialVersionUID = 2L;
136 |
137 | private long currentMaxTimestamp;
138 | private long lastEmittedWatermark = Long.MIN_VALUE;
139 | private long lastRecordProcessingTime;
140 |
141 | private final long maxOutOfOrderness;
142 | private final long idleTimeout;
143 |
144 | MeasurementTSExtractor(Time maxOutOfOrderness, Time idleTimeout) {
145 | if (maxOutOfOrderness.toMilliseconds() < 0) {
146 | throw new RuntimeException("Tried to set the maximum allowed " +
147 | "lateness to " + maxOutOfOrderness +
148 | ". This parameter cannot be negative.");
149 | }
150 |
151 | if (idleTimeout.toMilliseconds() < 0) {
152 | throw new RuntimeException("Tried to set the idle Timeout" + idleTimeout +
153 | ". This parameter cannot be negative.");
154 | }
155 |
156 |
157 | this.maxOutOfOrderness = maxOutOfOrderness.toMilliseconds();
158 | this.idleTimeout = idleTimeout.toMilliseconds();
159 | this.currentMaxTimestamp = Long.MIN_VALUE;
160 | }
161 |
162 | public long getMaxOutOfOrdernessInMillis() {
163 | return maxOutOfOrderness;
164 | }
165 |
166 | @Override
167 | public final Watermark getCurrentWatermark() {
168 |
169 | // if last record was processed more than the idleTimeout in the past, consider this
170 | // source idle and set timestamp to current processing time
171 | long currentProcessingTime = System.currentTimeMillis();
172 | if (lastRecordProcessingTime < currentProcessingTime - idleTimeout) {
173 | this.currentMaxTimestamp = currentProcessingTime;
174 | }
175 |
176 | long potentialWM = this.currentMaxTimestamp - maxOutOfOrderness;
177 | if (potentialWM >= lastEmittedWatermark) {
178 | lastEmittedWatermark = potentialWM;
179 | }
180 | return new Watermark(lastEmittedWatermark);
181 | }
182 |
183 | @Override
184 | public final long extractTimestamp(FakeKafkaRecord element, long previousElementTimestamp) {
185 | lastRecordProcessingTime = System.currentTimeMillis();
186 | long timestamp = element.getTimestamp();
187 | if (timestamp > currentMaxTimestamp) {
188 | currentMaxTimestamp = timestamp;
189 | }
190 | return timestamp;
191 | }
192 | }
193 |
194 | public static class MeasurementWindowAggregatingFunction
195 | implements AggregateFunction {
196 | private static final long serialVersionUID = -1083906142198231377L;
197 |
198 | public MeasurementWindowAggregatingFunction() {}
199 |
200 | @Override
201 | public WindowedMeasurements createAccumulator() {
202 | return new WindowedMeasurements();
203 | }
204 |
205 | @Override
206 | public WindowedMeasurements add(final Measurement record, final WindowedMeasurements aggregate) {
207 | double result = record.getValue();
208 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result);
209 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1);
210 | return aggregate;
211 | }
212 |
213 | @Override
214 | public WindowedMeasurements getResult(final WindowedMeasurements windowedMeasurements) {
215 | return windowedMeasurements;
216 | }
217 |
218 | @Override
219 | public WindowedMeasurements merge(final WindowedMeasurements agg1, final WindowedMeasurements agg2) {
220 | agg2.setEventsPerWindow(agg1.getEventsPerWindow() + agg2.getEventsPerWindow());
221 | agg2.setSumPerWindow(agg1.getSumPerWindow() + agg2.getSumPerWindow());
222 | return agg2;
223 | }
224 | }
225 |
226 | public static class MeasurementWindowProcessFunction
227 | extends ProcessWindowFunction {
228 | private static final long serialVersionUID = 1L;
229 |
230 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000;
231 |
232 | private transient DescriptiveStatisticsHistogram eventTimeLag;
233 |
234 | MeasurementWindowProcessFunction() {
235 | }
236 |
237 | @Override
238 | public void process(
239 | final String location,
240 | final Context context,
241 | final Iterable input,
242 | final Collector out) {
243 |
244 | // Windows with pre-aggregation only forward the final to the WindowFunction
245 | WindowedMeasurements aggregate = input.iterator().next();
246 |
247 | final TimeWindow window = context.window();
248 | aggregate.setWindowStart(window.getStart());
249 | aggregate.setWindowEnd(window.getEnd());
250 | aggregate.setLocation(location);
251 |
252 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd());
253 | out.collect(aggregate);
254 | }
255 |
256 | @Override
257 | public void open(Configuration parameters) throws Exception {
258 | super.open(parameters);
259 |
260 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag",
261 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE));
262 | }
263 | }
264 |
265 | private static ObjectMapper createObjectMapper() {
266 | ObjectMapper objectMapper = new ObjectMapper();
267 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
268 | return objectMapper;
269 | }
270 | }
271 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/TroubledStreamingJobSolution43.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot;
2 |
3 | import org.apache.flink.api.common.functions.AggregateFunction;
4 | import org.apache.flink.api.common.functions.RichFlatMapFunction;
5 | import org.apache.flink.api.java.utils.ParameterTool;
6 | import org.apache.flink.configuration.Configuration;
7 | import org.apache.flink.metrics.Counter;
8 | import org.apache.flink.runtime.metrics.DescriptiveStatisticsHistogram;
9 | import org.apache.flink.streaming.api.TimeCharacteristic;
10 | import org.apache.flink.streaming.api.datastream.DataStream;
11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
13 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
14 | import org.apache.flink.streaming.api.functions.sink.DiscardingSink;
15 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
16 | import org.apache.flink.streaming.api.watermark.Watermark;
17 | import org.apache.flink.streaming.api.windowing.time.Time;
18 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
19 | import org.apache.flink.util.Collector;
20 | import org.apache.flink.util.OutputTag;
21 |
22 | import com.fasterxml.jackson.databind.DeserializationFeature;
23 | import com.fasterxml.jackson.databind.ObjectMapper;
24 | import com.ververica.flinktraining.provided.troubleshoot.FakeKafkaRecord;
25 | import com.ververica.flinktraining.provided.troubleshoot.WindowedMeasurements;
26 | import com.ververica.flinktraining.provided.troubleshoot.SourceUtils;
27 |
28 | import java.io.IOException;
29 | import java.util.concurrent.TimeUnit;
30 |
31 | import static com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJobUtils.createConfiguredEnvironment;
32 |
33 | public class TroubledStreamingJobSolution43 {
34 |
35 | public static void main(String[] args) throws Exception {
36 | ParameterTool parameters = ParameterTool.fromArgs(args);
37 |
38 | final boolean local = parameters.getBoolean("local", false);
39 |
40 | StreamExecutionEnvironment env = createConfiguredEnvironment(parameters, local);
41 |
42 | //Time Characteristics
43 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
44 | env.getConfig().setAutoWatermarkInterval(100);
45 | env.setBufferTimeout(10);
46 |
47 | //Checkpointing Configuration
48 | env.enableCheckpointing(5000);
49 | env.getCheckpointConfig().setMinPauseBetweenCheckpoints(4000);
50 |
51 | DataStream sourceStream = env
52 | .addSource(SourceUtils.createFakeKafkaSource())
53 | .name("FakeKafkaSource")
54 | .uid("FakeKafkaSource")
55 | .assignTimestampsAndWatermarks(
56 | new MeasurementTSExtractor(Time.of(250, TimeUnit.MILLISECONDS),
57 | Time.of(1, TimeUnit.SECONDS)))
58 | .name("Watermarks")
59 | .uid("Watermarks")
60 | .flatMap(new MeasurementDeserializer())
61 | .name("Deserialization")
62 | .uid("Deserialization");
63 |
64 | OutputTag lateDataTag = new OutputTag("late-data") {
65 | private static final long serialVersionUID = 33513631677208956L;
66 | };
67 |
68 | SingleOutputStreamOperator aggregatedPerLocation = sourceStream
69 | .keyBy(SimpleMeasurement::getLocation)
70 | .timeWindow(Time.of(1, TimeUnit.SECONDS))
71 | .sideOutputLateData(lateDataTag)
72 | .aggregate(new MeasurementWindowAggregatingFunction(),
73 | new MeasurementWindowProcessFunction())
74 | .name("WindowedAggregationPerLocation")
75 | .uid("WindowedAggregationPerLocation");
76 |
77 | if (local) {
78 | aggregatedPerLocation.print()
79 | .name("NormalOutput")
80 | .uid("NormalOutput")
81 | .disableChaining();
82 | aggregatedPerLocation.getSideOutput(lateDataTag).printToErr()
83 | .name("LateDataSink")
84 | .uid("LateDataSink")
85 | .disableChaining();
86 | } else {
87 | aggregatedPerLocation.addSink(new DiscardingSink<>())
88 | .name("NormalOutput")
89 | .uid("NormalOutput")
90 | .disableChaining();
91 | aggregatedPerLocation.getSideOutput(lateDataTag).addSink(new DiscardingSink<>())
92 | .name("LateDataSink")
93 | .uid("LateDataSink")
94 | .disableChaining();
95 | }
96 |
97 | env.execute(TroubledStreamingJobSolution43.class.getSimpleName());
98 | }
99 |
100 | /**
101 | * Deserializes the JSON Kafka message.
102 | */
103 | public static class MeasurementDeserializer extends RichFlatMapFunction {
104 | private static final long serialVersionUID = 4L;
105 |
106 | private Counter numInvalidRecords;
107 | private transient ObjectMapper instance;
108 |
109 | @Override
110 | public void open(final Configuration parameters) throws Exception {
111 | super.open(parameters);
112 | numInvalidRecords = getRuntimeContext().getMetricGroup().counter("numInvalidRecords");
113 | instance = createObjectMapper();
114 | }
115 |
116 | @Override
117 | public void flatMap(final FakeKafkaRecord kafkaRecord, final Collector out) {
118 | final SimpleMeasurement node;
119 | try {
120 | node = deserialize(kafkaRecord.getValue());
121 | } catch (IOException e) {
122 | numInvalidRecords.inc();
123 | return;
124 | }
125 | out.collect(node);
126 | }
127 |
128 | private SimpleMeasurement deserialize(final byte[] bytes) throws IOException {
129 | return instance.readValue(bytes, SimpleMeasurement.class);
130 | }
131 | }
132 |
133 | public static class MeasurementTSExtractor implements AssignerWithPeriodicWatermarks {
134 | private static final long serialVersionUID = 2L;
135 |
136 | private long currentMaxTimestamp;
137 | private long lastEmittedWatermark = Long.MIN_VALUE;
138 | private long lastRecordProcessingTime;
139 |
140 | private final long maxOutOfOrderness;
141 | private final long idleTimeout;
142 |
143 | MeasurementTSExtractor(Time maxOutOfOrderness, Time idleTimeout) {
144 | if (maxOutOfOrderness.toMilliseconds() < 0) {
145 | throw new RuntimeException("Tried to set the maximum allowed " +
146 | "lateness to " + maxOutOfOrderness +
147 | ". This parameter cannot be negative.");
148 | }
149 |
150 | if (idleTimeout.toMilliseconds() < 0) {
151 | throw new RuntimeException("Tried to set the idle Timeout" + idleTimeout +
152 | ". This parameter cannot be negative.");
153 | }
154 |
155 |
156 | this.maxOutOfOrderness = maxOutOfOrderness.toMilliseconds();
157 | this.idleTimeout = idleTimeout.toMilliseconds();
158 | this.currentMaxTimestamp = Long.MIN_VALUE;
159 | }
160 |
161 | public long getMaxOutOfOrdernessInMillis() {
162 | return maxOutOfOrderness;
163 | }
164 |
165 | @Override
166 | public final Watermark getCurrentWatermark() {
167 |
168 | // if last record was processed more than the idleTimeout in the past, consider this
169 | // source idle and set timestamp to current processing time
170 | long currentProcessingTime = System.currentTimeMillis();
171 | if (lastRecordProcessingTime < currentProcessingTime - idleTimeout) {
172 | this.currentMaxTimestamp = currentProcessingTime;
173 | }
174 |
175 | long potentialWM = this.currentMaxTimestamp - maxOutOfOrderness;
176 | if (potentialWM >= lastEmittedWatermark) {
177 | lastEmittedWatermark = potentialWM;
178 | }
179 | return new Watermark(lastEmittedWatermark);
180 | }
181 |
182 | @Override
183 | public final long extractTimestamp(FakeKafkaRecord element, long previousElementTimestamp) {
184 | lastRecordProcessingTime = System.currentTimeMillis();
185 | long timestamp = element.getTimestamp();
186 | if (timestamp > currentMaxTimestamp) {
187 | currentMaxTimestamp = timestamp;
188 | }
189 | return timestamp;
190 | }
191 | }
192 |
193 | public static class MeasurementWindowAggregatingFunction
194 | implements AggregateFunction {
195 | private static final long serialVersionUID = -1083906142198231377L;
196 |
197 | public MeasurementWindowAggregatingFunction() {}
198 |
199 | @Override
200 | public WindowedMeasurements createAccumulator() {
201 | return new WindowedMeasurements();
202 | }
203 |
204 | @Override
205 | public WindowedMeasurements add(final SimpleMeasurement record, final WindowedMeasurements aggregate) {
206 | double result = record.getValue();
207 | aggregate.setSumPerWindow(aggregate.getSumPerWindow() + result);
208 | aggregate.setEventsPerWindow(aggregate.getEventsPerWindow() + 1);
209 | return aggregate;
210 | }
211 |
212 | @Override
213 | public WindowedMeasurements getResult(final WindowedMeasurements windowedMeasurements) {
214 | return windowedMeasurements;
215 | }
216 |
217 | @Override
218 | public WindowedMeasurements merge(final WindowedMeasurements agg1, final WindowedMeasurements agg2) {
219 | agg2.setEventsPerWindow(agg1.getEventsPerWindow() + agg2.getEventsPerWindow());
220 | agg2.setSumPerWindow(agg1.getSumPerWindow() + agg2.getSumPerWindow());
221 | return agg2;
222 | }
223 | }
224 |
225 | public static class MeasurementWindowProcessFunction
226 | extends ProcessWindowFunction {
227 | private static final long serialVersionUID = 1L;
228 |
229 | private static final int EVENT_TIME_LAG_WINDOW_SIZE = 10_000;
230 |
231 | private transient DescriptiveStatisticsHistogram eventTimeLag;
232 |
233 | MeasurementWindowProcessFunction() {
234 | }
235 |
236 | @Override
237 | public void process(
238 | final String location,
239 | final Context context,
240 | final Iterable input,
241 | final Collector out) {
242 |
243 | // Windows with pre-aggregation only forward the final to the WindowFunction
244 | WindowedMeasurements aggregate = input.iterator().next();
245 |
246 | final TimeWindow window = context.window();
247 | aggregate.setWindowStart(window.getStart());
248 | aggregate.setWindowEnd(window.getEnd());
249 | aggregate.setLocation(location);
250 |
251 | eventTimeLag.update(System.currentTimeMillis() - window.getEnd());
252 | out.collect(aggregate);
253 | }
254 |
255 | @Override
256 | public void open(Configuration parameters) throws Exception {
257 | super.open(parameters);
258 |
259 | eventTimeLag = getRuntimeContext().getMetricGroup().histogram("eventTimeLag",
260 | new DescriptiveStatisticsHistogram(EVENT_TIME_LAG_WINDOW_SIZE));
261 | }
262 | }
263 |
264 | private static ObjectMapper createObjectMapper() {
265 | ObjectMapper objectMapper = new ObjectMapper();
266 | objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
267 | return objectMapper;
268 | }
269 | }
270 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/ExtendedMeasurement.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable;
2 |
3 | import org.apache.flink.api.common.typeinfo.TypeInfo;
4 | import org.apache.flink.api.common.typeinfo.TypeInfoFactory;
5 | import org.apache.flink.api.common.typeinfo.TypeInformation;
6 |
7 | import java.lang.reflect.Type;
8 | import java.util.Map;
9 |
10 | @TypeInfo(ExtendedMeasurement.ExtendedMeasurementTypeInfoFactory.class)
11 | public class ExtendedMeasurement {
12 |
13 | private Sensor sensor;
14 | private Location location;
15 | private MeasurementValue measurement;
16 |
17 | public ExtendedMeasurement(
18 | Sensor sensor,
19 | Location location,
20 | MeasurementValue measurement) {
21 | this.sensor = sensor;
22 | this.location = location;
23 | this.measurement = measurement;
24 | }
25 |
26 | public Sensor getSensor() {
27 | return sensor;
28 | }
29 |
30 | public Location getLocation() {
31 | return location;
32 | }
33 |
34 | public MeasurementValue getMeasurement() {
35 | return measurement;
36 | }
37 |
38 | public static class ExtendedMeasurementTypeInfoFactory extends TypeInfoFactory {
39 | @Override
40 | public TypeInformation createTypeInfo(
41 | Type t,
42 | Map> genericParameters) {
43 | return ExtendedMeasurementTypeInfo.INSTANCE;
44 | }
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/ExtendedMeasurementSerializer.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable;
2 |
3 | import org.apache.flink.api.common.typeutils.SimpleTypeSerializerSnapshot;
4 | import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot;
5 | import org.apache.flink.api.common.typeutils.base.TypeSerializerSingleton;
6 | import org.apache.flink.core.memory.DataInputView;
7 | import org.apache.flink.core.memory.DataOutputView;
8 |
9 | import java.io.IOException;
10 |
11 | public class ExtendedMeasurementSerializer extends TypeSerializerSingleton {
12 |
13 | private ExtendedMeasurementSerializer() {
14 | }
15 |
16 | static final ExtendedMeasurementSerializer INSTANCE = new ExtendedMeasurementSerializer();
17 |
18 | @Override
19 | public boolean isImmutableType() {
20 | return true;
21 | }
22 |
23 | @Override
24 | public ExtendedMeasurement createInstance() {
25 | return null;
26 | }
27 |
28 | @Override
29 | public ExtendedMeasurement copy(ExtendedMeasurement from) {
30 | return new ExtendedMeasurement(
31 | SensorSerializer.INSTANCE.copy(from.getSensor()),
32 | LocationSerializer.INSTANCE.copy(from.getLocation()),
33 | MeasurementValueSerializer.INSTANCE.copy(from.getMeasurement()));
34 | }
35 |
36 | @Override
37 | public ExtendedMeasurement copy(ExtendedMeasurement from, ExtendedMeasurement reuse) {
38 | return copy(from);
39 | }
40 |
41 | @Override
42 | public int getLength() {
43 | return SensorSerializer.INSTANCE.getLength() +
44 | LocationSerializer.INSTANCE.getLength() +
45 | MeasurementValueSerializer.INSTANCE.getLength();
46 | }
47 |
48 | @Override
49 | public void serialize(ExtendedMeasurement record, DataOutputView target) throws IOException {
50 | SensorSerializer.INSTANCE.serialize(record.getSensor(), target);
51 | LocationSerializer.INSTANCE.serialize(record.getLocation(), target);
52 | MeasurementValueSerializer.INSTANCE.serialize(record.getMeasurement(), target);
53 | }
54 |
55 | @Override
56 | public ExtendedMeasurement deserialize(DataInputView source) throws IOException {
57 | Sensor sensor = SensorSerializer.INSTANCE.deserialize(source);
58 | Location location = LocationSerializer.INSTANCE.deserialize(source);
59 | MeasurementValue measurement = MeasurementValueSerializer.INSTANCE.deserialize(source);
60 | return new ExtendedMeasurement(sensor, location, measurement);
61 | }
62 |
63 | @Override
64 | public ExtendedMeasurement deserialize(ExtendedMeasurement reuse, DataInputView source) throws IOException {
65 | return deserialize(source);
66 | }
67 |
68 | @Override
69 | public void copy(DataInputView source, DataOutputView target) throws IOException {
70 | SensorSerializer.INSTANCE.copy(source, target);
71 | LocationSerializer.INSTANCE.copy(source, target);
72 | MeasurementValueSerializer.INSTANCE.copy(source, target);
73 | }
74 |
75 | // -----------------------------------------------------------------------------------
76 |
77 | @Override
78 | public TypeSerializerSnapshot snapshotConfiguration() {
79 | return new ExtendedMeasurementSerializerSnapshot();
80 | }
81 |
82 | @SuppressWarnings("WeakerAccess")
83 | public static final class ExtendedMeasurementSerializerSnapshot extends
84 | SimpleTypeSerializerSnapshot {
85 |
86 | public ExtendedMeasurementSerializerSnapshot() {
87 | super(() -> INSTANCE);
88 | }
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/ExtendedMeasurementTypeInfo.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable;
2 |
3 | import org.apache.flink.api.common.ExecutionConfig;
4 | import org.apache.flink.api.common.typeinfo.TypeInformation;
5 | import org.apache.flink.api.common.typeutils.TypeSerializer;
6 |
7 | class ExtendedMeasurementTypeInfo extends TypeInformation {
8 |
9 | private ExtendedMeasurementTypeInfo() {
10 | }
11 |
12 | static final ExtendedMeasurementTypeInfo INSTANCE = new ExtendedMeasurementTypeInfo();
13 |
14 | @Override
15 | public boolean isBasicType() {
16 | return false;
17 | }
18 |
19 | @Override
20 | public boolean isTupleType() {
21 | return false;
22 | }
23 |
24 | @Override
25 | public int getArity() {
26 | return 3;
27 | }
28 |
29 | @Override
30 | public int getTotalFields() {
31 | return SensorTypeInfo.INSTANCE.getArity() +
32 | LocationTypeInfo.INSTANCE.getArity() +
33 | MeasurementValueTypeInfo.INSTANCE.getArity();
34 | }
35 |
36 | @Override
37 | public Class getTypeClass() {
38 | return ExtendedMeasurement.class;
39 | }
40 |
41 | @Override
42 | public boolean isKeyType() {
43 | return SensorTypeInfo.INSTANCE.isKeyType() &&
44 | LocationTypeInfo.INSTANCE.isKeyType() &&
45 | MeasurementValueTypeInfo.INSTANCE.isKeyType();
46 | }
47 |
48 | @Override
49 | public TypeSerializer createSerializer(ExecutionConfig config) {
50 | return ExtendedMeasurementSerializer.INSTANCE;
51 | }
52 |
53 | @Override
54 | public String toString() {
55 | return getClass().getSimpleName();
56 | }
57 |
58 | @SuppressWarnings("EqualsWhichDoesntCheckParameterClass")
59 | @Override
60 | public boolean equals(Object obj) {
61 | return this.canEqual(obj);
62 | }
63 |
64 | @Override
65 | public int hashCode() {
66 | return ExtendedMeasurement.class.hashCode();
67 | }
68 |
69 | @Override
70 | public boolean canEqual(Object obj) {
71 | return obj instanceof ExtendedMeasurementTypeInfo;
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/Location.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable;
2 |
3 | import org.apache.flink.api.common.typeinfo.TypeInfo;
4 | import org.apache.flink.api.common.typeinfo.TypeInfoFactory;
5 | import org.apache.flink.api.common.typeinfo.TypeInformation;
6 |
7 | import java.lang.reflect.Type;
8 | import java.util.Map;
9 | import java.util.Objects;
10 |
11 | @SuppressWarnings("WeakerAccess")
12 | @TypeInfo(Location.LocationTypeInfoFactory.class)
13 | public class Location {
14 | private double longitude;
15 | private double latitude;
16 | private double height;
17 |
18 | public Location(double longitude, double latitude, double height) {
19 | this.longitude = longitude;
20 | this.latitude = latitude;
21 | this.height = height;
22 | }
23 |
24 | public double getLongitude() {
25 | return longitude;
26 | }
27 |
28 | public double getLatitude() {
29 | return latitude;
30 | }
31 |
32 | public double getHeight() {
33 | return height;
34 | }
35 |
36 | @Override
37 | public boolean equals(Object o) {
38 | if (this == o) {
39 | return true;
40 | }
41 | if (o == null || getClass() != o.getClass()) {
42 | return false;
43 | }
44 | Location location = (Location) o;
45 | return Double.compare(location.longitude, longitude) == 0 &&
46 | Double.compare(location.latitude, latitude) == 0 &&
47 | Double.compare(location.height, height) == 0;
48 | }
49 |
50 | @Override
51 | public int hashCode() {
52 | return Objects.hash(longitude, latitude, height);
53 | }
54 |
55 | public static class LocationTypeInfoFactory extends TypeInfoFactory {
56 | @Override
57 | public TypeInformation createTypeInfo(
58 | Type t,
59 | Map> genericParameters) {
60 | return LocationTypeInfo.INSTANCE;
61 | }
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/LocationSerializer.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable;
2 |
3 | import org.apache.flink.api.common.typeutils.SimpleTypeSerializerSnapshot;
4 | import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot;
5 | import org.apache.flink.api.common.typeutils.base.TypeSerializerSingleton;
6 | import org.apache.flink.core.memory.DataInputView;
7 | import org.apache.flink.core.memory.DataOutputView;
8 |
9 | import java.io.IOException;
10 |
11 | public class LocationSerializer extends TypeSerializerSingleton {
12 |
13 | private LocationSerializer() {
14 | }
15 |
16 | static final LocationSerializer INSTANCE = new LocationSerializer();
17 |
18 | @Override
19 | public boolean isImmutableType() {
20 | return true;
21 | }
22 |
23 | @Override
24 | public Location createInstance() {
25 | return null;
26 | }
27 |
28 | @Override
29 | public Location copy(Location from) {
30 | return new Location(from.getLongitude(), from.getLatitude(), from.getHeight());
31 | }
32 |
33 | @Override
34 | public Location copy(Location from, Location reuse) {
35 | return copy(from);
36 | }
37 |
38 | @Override
39 | public int getLength() {
40 | return Double.BYTES + Double.BYTES + Double.BYTES;
41 | }
42 |
43 | @Override
44 | public void serialize(Location record, DataOutputView target) throws IOException {
45 | target.writeDouble(record.getLongitude());
46 | target.writeDouble(record.getLatitude());
47 | target.writeDouble(record.getHeight());
48 | }
49 |
50 | @Override
51 | public Location deserialize(DataInputView source) throws IOException {
52 | double longitude = source.readDouble();
53 | double latitude = source.readDouble();
54 | double height = source.readDouble();
55 | return new Location(longitude, latitude, height);
56 | }
57 |
58 | @Override
59 | public Location deserialize(Location reuse, DataInputView source) throws IOException {
60 | return deserialize(source);
61 | }
62 |
63 | @Override
64 | public void copy(DataInputView source, DataOutputView target) throws IOException {
65 | target.writeDouble(source.readDouble());
66 | target.writeDouble(source.readDouble());
67 | target.writeDouble(source.readDouble());
68 | }
69 |
70 | // -----------------------------------------------------------------------------------
71 |
72 | @Override
73 | public TypeSerializerSnapshot snapshotConfiguration() {
74 | return new LocationSerializerSnapshot();
75 | }
76 |
77 | @SuppressWarnings("WeakerAccess")
78 | public static final class LocationSerializerSnapshot extends
79 | SimpleTypeSerializerSnapshot {
80 |
81 | public LocationSerializerSnapshot() {
82 | super(() -> INSTANCE);
83 | }
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/LocationTypeInfo.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable;
2 |
3 | import org.apache.flink.api.common.ExecutionConfig;
4 | import org.apache.flink.api.common.typeinfo.TypeInformation;
5 | import org.apache.flink.api.common.typeutils.TypeSerializer;
6 |
7 | class LocationTypeInfo extends TypeInformation {
8 |
9 | private LocationTypeInfo() {
10 | }
11 |
12 | static final LocationTypeInfo INSTANCE = new LocationTypeInfo();
13 |
14 | @Override
15 | public boolean isBasicType() {
16 | return false;
17 | }
18 |
19 | @Override
20 | public boolean isTupleType() {
21 | return false;
22 | }
23 |
24 | @Override
25 | public int getArity() {
26 | return 3;
27 | }
28 |
29 | @Override
30 | public int getTotalFields() {
31 | return 3;
32 | }
33 |
34 | @Override
35 | public Class getTypeClass() {
36 | return Location.class;
37 | }
38 |
39 | @Override
40 | public boolean isKeyType() {
41 | return true;
42 | }
43 |
44 | @Override
45 | public TypeSerializer createSerializer(ExecutionConfig config) {
46 | return LocationSerializer.INSTANCE;
47 | }
48 |
49 | @Override
50 | public String toString() {
51 | return getClass().getSimpleName();
52 | }
53 |
54 | @SuppressWarnings("EqualsWhichDoesntCheckParameterClass")
55 | @Override
56 | public boolean equals(Object obj) {
57 | return this.canEqual(obj);
58 | }
59 |
60 | @Override
61 | public int hashCode() {
62 | return Location.class.hashCode();
63 | }
64 |
65 | @Override
66 | public boolean canEqual(Object obj) {
67 | return obj instanceof LocationTypeInfo;
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/MeasurementValue.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable;
2 |
3 | import org.apache.flink.api.common.typeinfo.TypeInfo;
4 | import org.apache.flink.api.common.typeinfo.TypeInfoFactory;
5 | import org.apache.flink.api.common.typeinfo.TypeInformation;
6 |
7 | import java.lang.reflect.Type;
8 | import java.util.Map;
9 |
10 | @TypeInfo(MeasurementValue.MeasurementValueTypeInfoFactory.class)
11 | public class MeasurementValue {
12 | private double value;
13 | private float accuracy;
14 | private long timestamp;
15 |
16 | public MeasurementValue(double value, float accuracy, long timestamp) {
17 | this.value = value;
18 | this.accuracy = accuracy;
19 | this.timestamp = timestamp;
20 | }
21 |
22 | public double getValue() {
23 | return value;
24 | }
25 |
26 | public float getAccuracy() {
27 | return accuracy;
28 | }
29 |
30 | public long getTimestamp() {
31 | return timestamp;
32 | }
33 |
34 | public static class MeasurementValueTypeInfoFactory extends TypeInfoFactory {
35 | @Override
36 | public TypeInformation createTypeInfo(
37 | Type t,
38 | Map> genericParameters) {
39 | return MeasurementValueTypeInfo.INSTANCE;
40 | }
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/MeasurementValueSerializer.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable;
2 |
3 | import org.apache.flink.api.common.typeutils.SimpleTypeSerializerSnapshot;
4 | import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot;
5 | import org.apache.flink.api.common.typeutils.base.TypeSerializerSingleton;
6 | import org.apache.flink.core.memory.DataInputView;
7 | import org.apache.flink.core.memory.DataOutputView;
8 |
9 | import java.io.IOException;
10 |
11 | public class MeasurementValueSerializer extends TypeSerializerSingleton {
12 |
13 | private MeasurementValueSerializer() {
14 | }
15 |
16 | static final MeasurementValueSerializer INSTANCE = new MeasurementValueSerializer();
17 |
18 | @Override
19 | public boolean isImmutableType() {
20 | return true;
21 | }
22 |
23 | @Override
24 | public MeasurementValue createInstance() {
25 | return null;
26 | }
27 |
28 | @Override
29 | public MeasurementValue copy(MeasurementValue from) {
30 | return new MeasurementValue(from.getValue(), from.getAccuracy(), from.getTimestamp());
31 | }
32 |
33 | @Override
34 | public MeasurementValue copy(MeasurementValue from, MeasurementValue reuse) {
35 | return copy(from);
36 | }
37 |
38 | @Override
39 | public int getLength() {
40 | return Double.BYTES + Float.BYTES + Long.BYTES;
41 | }
42 |
43 | @Override
44 | public void serialize(MeasurementValue record, DataOutputView target) throws IOException {
45 | target.writeDouble(record.getValue());
46 | target.writeFloat(record.getAccuracy());
47 | target.writeLong(record.getTimestamp());
48 | }
49 |
50 | @Override
51 | public MeasurementValue deserialize(DataInputView source) throws IOException {
52 | double value = source.readDouble();
53 | float accuracy = source.readFloat();
54 | long timestamp = source.readLong();
55 | return new MeasurementValue(value, accuracy, timestamp);
56 | }
57 |
58 | @Override
59 | public MeasurementValue deserialize(MeasurementValue reuse, DataInputView source) throws IOException {
60 | return deserialize(source);
61 | }
62 |
63 | @Override
64 | public void copy(DataInputView source, DataOutputView target) throws IOException {
65 | target.writeDouble(source.readDouble());
66 | target.writeFloat(source.readFloat());
67 | target.writeLong(source.readLong());
68 | }
69 |
70 | // -----------------------------------------------------------------------------------
71 |
72 | @Override
73 | public TypeSerializerSnapshot snapshotConfiguration() {
74 | return new MeasurementValueSerializerSnapshot();
75 | }
76 |
77 | @SuppressWarnings("WeakerAccess")
78 | public static final class MeasurementValueSerializerSnapshot extends
79 | SimpleTypeSerializerSnapshot {
80 |
81 | public MeasurementValueSerializerSnapshot() {
82 | super(() -> INSTANCE);
83 | }
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/MeasurementValueTypeInfo.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable;
2 |
3 | import org.apache.flink.api.common.ExecutionConfig;
4 | import org.apache.flink.api.common.typeinfo.TypeInformation;
5 | import org.apache.flink.api.common.typeutils.TypeSerializer;
6 |
7 | class MeasurementValueTypeInfo extends TypeInformation {
8 |
9 | private MeasurementValueTypeInfo() {
10 | }
11 |
12 | static final MeasurementValueTypeInfo INSTANCE = new MeasurementValueTypeInfo();
13 |
14 | @Override
15 | public boolean isBasicType() {
16 | return false;
17 | }
18 |
19 | @Override
20 | public boolean isTupleType() {
21 | return false;
22 | }
23 |
24 | @Override
25 | public int getArity() {
26 | return 3;
27 | }
28 |
29 | @Override
30 | public int getTotalFields() {
31 | return 3;
32 | }
33 |
34 | @Override
35 | public Class getTypeClass() {
36 | return MeasurementValue.class;
37 | }
38 |
39 | @Override
40 | public boolean isKeyType() {
41 | return true;
42 | }
43 |
44 | @Override
45 | public TypeSerializer createSerializer(ExecutionConfig config) {
46 | return MeasurementValueSerializer.INSTANCE;
47 | }
48 |
49 | @Override
50 | public String toString() {
51 | return getClass().getSimpleName();
52 | }
53 |
54 | @SuppressWarnings("EqualsWhichDoesntCheckParameterClass")
55 | @Override
56 | public boolean equals(Object obj) {
57 | return this.canEqual(obj);
58 | }
59 |
60 | @Override
61 | public int hashCode() {
62 | return MeasurementValue.class.hashCode();
63 | }
64 |
65 | @Override
66 | public boolean canEqual(Object obj) {
67 | return obj instanceof MeasurementValueTypeInfo;
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/ObjectReuseExtendedMeasurementSource.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable;
2 |
3 | import org.apache.flink.configuration.Configuration;
4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
5 |
6 | import com.ververica.flinktraining.provided.troubleshoot.GeoUtils;
7 | import com.ververica.flinktraining.provided.troubleshoot.MeanGauge;
8 |
9 | import java.util.SplittableRandom;
10 |
11 | /**
12 | * Artificial source for sensor measurements (temperature and wind speed) of a pre-defined set of
13 | * sensors (per parallel instance) creating measurements for two locations (inside the bounding
14 | * boxes of Germany (DE) and the USA (US)) in SI units (°C and km/h).
15 | */
16 | @SuppressWarnings("WeakerAccess")
17 | public class ObjectReuseExtendedMeasurementSource extends RichParallelSourceFunction {
18 |
19 | private static final long serialVersionUID = 1L;
20 |
21 | private static final int NUM_SENSORS = 10_000;
22 |
23 | public static final int LOWER_TEMPERATURE_CELCIUS = -10;
24 | public static final int UPPER_TEMPERATURE_CELCIUS = 35;
25 | public static final int LOWER_WIND_SPEED_KMH = 0;
26 | public static final int UPPER_WIND_SPEED_KMH = 335;
27 |
28 | private volatile boolean running = true;
29 |
30 | private transient Sensor[] sensors;
31 | private transient Location[] locations;
32 | private transient double[] lastValue;
33 | private transient MeanGauge sourceTemperatureUS;
34 |
35 | @Override
36 | public void open(final Configuration parameters) {
37 | initSensors();
38 |
39 | sourceTemperatureUS = getRuntimeContext().getMetricGroup()
40 | .gauge("sourceTemperatureUSmean", new MeanGauge());
41 | getRuntimeContext().getMetricGroup().gauge(
42 | "sourceTemperatureUSmin", new MeanGauge.MinGauge(sourceTemperatureUS));
43 | getRuntimeContext().getMetricGroup().gauge(
44 | "sourceTemperatureUSmax", new MeanGauge.MaxGauge(sourceTemperatureUS));
45 | }
46 |
47 | @Override
48 | public void run(SourceContext ctx) {
49 | final SplittableRandom rnd = new SplittableRandom();
50 | final Object lock = ctx.getCheckpointLock();
51 |
52 | while (running) {
53 | ExtendedMeasurement event = randomEvent(rnd);
54 |
55 | //noinspection SynchronizationOnLocalVariableOrMethodParameter
56 | synchronized (lock) {
57 | ctx.collect(event);
58 | }
59 | }
60 | }
61 |
62 | @Override
63 | public void cancel() {
64 | running = false;
65 | }
66 |
67 | /**
68 | * Creates sensor metadata that this source instance will work with.
69 | */
70 | private void initSensors() {
71 | final SplittableRandom rnd = new SplittableRandom();
72 | final Sensor.SensorType[] sensorTypes =
73 | Sensor.SensorType.values();
74 |
75 | final int start = getRuntimeContext().getIndexOfThisSubtask() * NUM_SENSORS;
76 | this.sensors = new Sensor[NUM_SENSORS];
77 | this.lastValue = new double[NUM_SENSORS];
78 | this.locations = new Location[NUM_SENSORS];
79 | for (int i = 0; i < NUM_SENSORS; ++i) {
80 | long sensorId = start + i;
81 | long vendorId = sensorId % 100;
82 | final Sensor.SensorType sensorType =
83 | sensorTypes[(i / 2) % sensorTypes.length];
84 | sensors[i] = new Sensor(sensorId, vendorId, sensorType);
85 |
86 | lastValue[i] = randomInitialMeasurementValue(rnd, sensorType);
87 |
88 | // assume that a sensor has a fixed position
89 | locations[i] = randomInitialLocation(rnd, i);
90 | }
91 | }
92 |
93 | /**
94 | * Creates a random measurement value that a sensor will start with.
95 | */
96 | private double randomInitialMeasurementValue(
97 | SplittableRandom rnd,
98 | Sensor.SensorType sensorType) {
99 | switch (sensorType) {
100 | case Temperature:
101 | // -10°C - 35°C
102 | return rnd.nextInt(
103 | (UPPER_TEMPERATURE_CELCIUS - LOWER_TEMPERATURE_CELCIUS) * 10) / 10.0 +
104 | LOWER_TEMPERATURE_CELCIUS;
105 | case Wind:
106 | // 0km/h - 335km/h
107 | return rnd.nextInt((UPPER_WIND_SPEED_KMH - LOWER_WIND_SPEED_KMH) * 10) / 10.0 +
108 | LOWER_WIND_SPEED_KMH;
109 | default:
110 | throw new IllegalStateException("Unknown sensor type: " + sensorType);
111 | }
112 | }
113 |
114 | /**
115 | * Creates a random location for a sensor, distinguishing two bounding boxes: US and DE.
116 | */
117 | private static Location randomInitialLocation(SplittableRandom rnd, int i) {
118 | final double longitude;
119 | final double latitude;
120 | // let's assume that no selected region wraps around LON -180/+180
121 | if (i < NUM_SENSORS / 2) {
122 | // in US
123 | longitude = rnd.nextDouble() * (GeoUtils.US_LON_EAST - GeoUtils.US_LON_WEST) + GeoUtils.US_LON_WEST;
124 | latitude = rnd.nextDouble() * (GeoUtils.US_LAT_NORTH - GeoUtils.US_LAT_SOUTH) + GeoUtils.US_LAT_SOUTH;
125 | } else {
126 | // in DE
127 | longitude = rnd.nextDouble() * (GeoUtils.DE_LON_EAST - GeoUtils.DE_LON_WEST) + GeoUtils.DE_LON_WEST;
128 | latitude = rnd.nextDouble() * (GeoUtils.DE_LAT_NORTH - GeoUtils.DE_LAT_SOUTH) + GeoUtils.DE_LAT_SOUTH;
129 | }
130 | double height = rnd.nextDouble() * 3000;
131 | return new Location(longitude, latitude, height);
132 | }
133 |
134 | /**
135 | * Creates a randomized sensor value during runtime of the source. Each new value differs
136 | * slightly from the previous value that this sensor had.
137 | */
138 | private ExtendedMeasurement randomEvent(SplittableRandom rnd) {
139 | int randomIdx = rnd.nextInt(sensors.length);
140 | Sensor sensor = sensors[randomIdx];
141 | Location location = locations[randomIdx];
142 |
143 | long timestamp = System.currentTimeMillis();
144 |
145 | final double value = randomChangeMeasurementValue(
146 | rnd,
147 | sensor.getSensorType(),
148 | location,
149 | lastValue[randomIdx]);
150 |
151 | lastValue[randomIdx] = value;
152 |
153 | final MeasurementValue measurement =
154 | new MeasurementValue(
155 | value,
156 | (float) (rnd.nextInt(100) - 50) / 10.0f, // +- 5
157 | timestamp);
158 |
159 | return new ExtendedMeasurement(
160 | new Sensor(
161 | sensor.getSensorId(), sensor.getVendorId(), sensor.getSensorType()),
162 | new Location(
163 | location.getLongitude(), location.getLatitude(), location.getHeight()),
164 | measurement);
165 | }
166 |
167 | /**
168 | * Generates a new sensor value that is +-3 of the old value and reports a custom metric for
169 | * sensor values in the US.
170 | */
171 | private double randomChangeMeasurementValue(
172 | SplittableRandom rnd,
173 | Sensor.SensorType sensorType,
174 | Location location,
175 | double lastValue) {
176 | double change = rnd.nextDouble(6) - 3.0; // +- 3
177 | final double value;
178 | switch (sensorType) {
179 | case Temperature:
180 | value = newValueWithinBounds(
181 | lastValue, change, LOWER_TEMPERATURE_CELCIUS, UPPER_TEMPERATURE_CELCIUS);
182 | if (GeoUtils.isInUS(location.getLongitude(), location.getLatitude())) {
183 | sourceTemperatureUS.addValue(value);
184 | }
185 | break;
186 | case Wind:
187 | value = newValueWithinBounds(
188 | lastValue, change, LOWER_WIND_SPEED_KMH, UPPER_WIND_SPEED_KMH);
189 | break;
190 | default:
191 | throw new InternalError("Unknown sensor type: " + sensorType);
192 | }
193 | return value;
194 | }
195 |
196 | /**
197 | * Returns either lastValue + change
(if within the given bounds) or
198 | * lastValue - change
(otherwise).
199 | */
200 | private static double newValueWithinBounds(
201 | double lastValue,
202 | double change,
203 | double lowerLimit,
204 | double upperLimit) {
205 | double value;
206 | if (lastValue + change >= lowerLimit && lastValue + change <= upperLimit) {
207 | value = lastValue + change;
208 | } else {
209 | value = lastValue - change;
210 | }
211 | return value;
212 | }
213 | }
214 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/Sensor.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable;
2 |
3 | import org.apache.flink.api.common.typeinfo.TypeInfo;
4 | import org.apache.flink.api.common.typeinfo.TypeInfoFactory;
5 | import org.apache.flink.api.common.typeinfo.TypeInformation;
6 |
7 | import java.lang.reflect.Type;
8 | import java.util.Map;
9 | import java.util.Objects;
10 |
11 | @SuppressWarnings("WeakerAccess")
12 | @TypeInfo(Sensor.SensorTypeInfoFactory.class)
13 | public class Sensor {
14 | public enum SensorType {
15 | Temperature,
16 | Wind
17 | }
18 |
19 | private long sensorId;
20 | private long vendorId;
21 | private SensorType sensorType;
22 |
23 | public Sensor(
24 | long sensorId,
25 | long vendorId,
26 | SensorType sensorType) {
27 | this.sensorId = sensorId;
28 | this.vendorId = vendorId;
29 | this.sensorType = sensorType;
30 | }
31 |
32 | public long getSensorId() {
33 | return sensorId;
34 | }
35 |
36 | public long getVendorId() {
37 | return vendorId;
38 | }
39 |
40 | public SensorType getSensorType() {
41 | return sensorType;
42 | }
43 |
44 | @Override
45 | public boolean equals(Object o) {
46 | if (this == o) {
47 | return true;
48 | }
49 | if (o == null || getClass() != o.getClass()) {
50 | return false;
51 | }
52 | Sensor sensor = (Sensor) o;
53 | return sensorId == sensor.sensorId &&
54 | vendorId == sensor.vendorId &&
55 | sensorType == sensor.sensorType;
56 | }
57 |
58 | @Override
59 | public int hashCode() {
60 | // NOTE: do not use the enum directly here. Why?
61 | // -> try with Sensor as a key in a distributed setting and see for yourself!
62 | return Objects.hash(sensorId, vendorId, sensorType.ordinal());
63 | }
64 |
65 | public static class SensorTypeInfoFactory extends TypeInfoFactory {
66 | @Override
67 | public TypeInformation createTypeInfo(
68 | Type t,
69 | Map> genericParameters) {
70 | return SensorTypeInfo.INSTANCE;
71 | }
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/SensorSerializer.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable;
2 |
3 | import org.apache.flink.api.common.typeutils.SimpleTypeSerializerSnapshot;
4 | import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot;
5 | import org.apache.flink.api.common.typeutils.base.TypeSerializerSingleton;
6 | import org.apache.flink.core.memory.DataInputView;
7 | import org.apache.flink.core.memory.DataOutputView;
8 |
9 | import java.io.IOException;
10 |
11 | public class SensorSerializer extends TypeSerializerSingleton {
12 | private static final Sensor.SensorType[] SENSOR_TYPES =
13 | Sensor.SensorType.values();
14 |
15 | private SensorSerializer() {
16 | }
17 |
18 | static final SensorSerializer INSTANCE = new SensorSerializer();
19 |
20 | @Override
21 | public boolean isImmutableType() {
22 | return true;
23 | }
24 |
25 | @Override
26 | public Sensor createInstance() {
27 | return null;
28 | }
29 |
30 | @Override
31 | public Sensor copy(Sensor from) {
32 | return new Sensor(from.getSensorId(), from.getVendorId(), from.getSensorType());
33 | }
34 |
35 | @Override
36 | public Sensor copy(Sensor from, Sensor reuse) {
37 | return copy(from);
38 | }
39 |
40 | @Override
41 | public int getLength() {
42 | return Long.BYTES + Long.BYTES + Integer.BYTES;
43 | }
44 |
45 | @Override
46 | public void serialize(Sensor record, DataOutputView target) throws IOException {
47 | target.writeLong(record.getSensorId());
48 | target.writeLong(record.getVendorId());
49 | target.writeInt(record.getSensorType().ordinal());
50 | }
51 |
52 | @Override
53 | public Sensor deserialize(DataInputView source) throws IOException {
54 | long sensorId = source.readLong();
55 | long vendorId = source.readLong();
56 | Sensor.SensorType sensorType = SENSOR_TYPES[source.readInt()];
57 | return new Sensor(sensorId, vendorId, sensorType);
58 | }
59 |
60 | @Override
61 | public Sensor deserialize(Sensor reuse, DataInputView source) throws IOException {
62 | return deserialize(source);
63 | }
64 |
65 | @Override
66 | public void copy(DataInputView source, DataOutputView target) throws IOException {
67 | target.writeLong(source.readLong());
68 | target.writeLong(source.readLong());
69 | target.writeInt(source.readInt());
70 | }
71 |
72 | // -----------------------------------------------------------------------------------
73 |
74 | @Override
75 | public TypeSerializerSnapshot snapshotConfiguration() {
76 | return new SensorSerializerSnapshot();
77 | }
78 |
79 | @SuppressWarnings("WeakerAccess")
80 | public static final class SensorSerializerSnapshot extends
81 | SimpleTypeSerializerSnapshot {
82 |
83 | public SensorSerializerSnapshot() {
84 | super(() -> INSTANCE);
85 | }
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/src/main/java/com/ververica/flinktraining/solutions/troubleshoot/immutable/SensorTypeInfo.java:
--------------------------------------------------------------------------------
1 | package com.ververica.flinktraining.solutions.troubleshoot.immutable;
2 |
3 | import org.apache.flink.api.common.ExecutionConfig;
4 | import org.apache.flink.api.common.typeinfo.TypeInformation;
5 | import org.apache.flink.api.common.typeutils.TypeSerializer;
6 |
7 | class SensorTypeInfo extends TypeInformation {
8 |
9 | private SensorTypeInfo() {
10 | }
11 |
12 | static final SensorTypeInfo INSTANCE = new SensorTypeInfo();
13 |
14 | @Override
15 | public boolean isBasicType() {
16 | return false;
17 | }
18 |
19 | @Override
20 | public boolean isTupleType() {
21 | return false;
22 | }
23 |
24 | @Override
25 | public int getArity() {
26 | return 3;
27 | }
28 |
29 | @Override
30 | public int getTotalFields() {
31 | return 3;
32 | }
33 |
34 | @Override
35 | public Class getTypeClass() {
36 | return Sensor.class;
37 | }
38 |
39 | @Override
40 | public boolean isKeyType() {
41 | return true;
42 | }
43 |
44 | @Override
45 | public TypeSerializer createSerializer(ExecutionConfig config) {
46 | return SensorSerializer.INSTANCE;
47 | }
48 |
49 | @Override
50 | public String toString() {
51 | return getClass().getSimpleName();
52 | }
53 |
54 | @SuppressWarnings("EqualsWhichDoesntCheckParameterClass")
55 | @Override
56 | public boolean equals(Object obj) {
57 | return this.canEqual(obj);
58 | }
59 |
60 | @Override
61 | public int hashCode() {
62 | return Sensor.class.hashCode();
63 | }
64 |
65 | @Override
66 | public boolean canEqual(Object obj) {
67 | return obj instanceof SensorTypeInfo;
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/src/main/resources/log4j2.properties:
--------------------------------------------------------------------------------
1 | loogers=rootLooger
2 | appender.console.type=Console
3 | appender.console.name=STDOUT
4 | appender.console.layout.type=PatternLayout
5 | appender.console.layout.pattern=%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n
6 | rootLogger.level=INFO
7 | rootLogger.appenderRef.console.ref=STDOUT
--------------------------------------------------------------------------------
/src/test/java/com/ververica/training/ObjectReuseJobRunner.java:
--------------------------------------------------------------------------------
1 | package com.ververica.training;
2 |
3 | import com.ververica.flinktraining.exercises.troubleshoot.ObjectReuseJob;
4 | import org.junit.Test;
5 |
6 | public class ObjectReuseJobRunner {
7 |
8 | @Test
9 | public void run() throws Exception {
10 | String[] args = {"--local", "true", "--objectReuse", "true"};
11 | // String[] args = {"--local", "true"};
12 | ObjectReuseJob.main(args);
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/src/test/java/com/ververica/training/TroubledStreamingJobRunner.java:
--------------------------------------------------------------------------------
1 | package com.ververica.training;
2 |
3 | import com.ververica.flinktraining.exercises.troubleshoot.TroubledStreamingJob;
4 | import org.junit.Test;
5 |
6 | public class TroubledStreamingJobRunner {
7 |
8 | @Test
9 | public void run() throws Exception {
10 | String[] args = {"--local", "true"};
11 | TroubledStreamingJob.main(args);
12 | }
13 | }
14 |
--------------------------------------------------------------------------------