├── .gitignore
├── accumulator
├── .gitignore
├── Readme.md
├── pom.xml
└── src
│ └── main
│ ├── java
│ └── org
│ │ └── example
│ │ ├── CSVSink.java
│ │ ├── Main.java
│ │ └── PriceQuery.java
│ └── resources
│ └── logback.xml
├── gatherer_hafas
├── fetchBestPrices.mjs
└── package.json
├── gatherer_role
├── Readme.md
├── defaults
│ └── main.yaml
├── tasks
│ └── main.yaml
└── templates
│ ├── fahrpreis_gatherer.service.j2
│ ├── fahrpreis_gatherer.timer.j2
│ └── run.sh.j2
├── images
└── img.png
├── license.md
├── plotter
├── config.json
├── dashboard.py
├── plotter.py
├── requirements.txt
└── wsgi.py
├── plotter_role
├── defaults
│ └── main.yaml
├── tasks
│ └── main.yaml
└── templates
│ ├── fahrpreis-plotter-config.json.j2
│ └── fahrpreis-plotter.service.j2
└── readme.md
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | Thumbs.db
3 |
4 | .nvm-version
5 | node_modules
6 | npm-debug.log
7 | /package-lock.json
8 |
--------------------------------------------------------------------------------
/accumulator/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | .idea
3 |
--------------------------------------------------------------------------------
/accumulator/Readme.md:
--------------------------------------------------------------------------------
1 |
2 | # Fahrpreis accumulator
3 |
4 | transforms the brotli files that the service produces into accumulated summary
5 |
6 | This is an apache beam pipeline that reads the brotli files, decompresses them and transfroms them into the single data points
7 |
8 | ## todo:
9 | - sort the datapoints in the order they are going to be plotted
10 | - make faster
11 | -
12 |
13 |
14 | ## db schema
15 | a bit slow
16 |
17 | ```sql
18 | create table preis_query
19 | (
20 | id int auto_increment
21 | primary key,
22 | `from` int null,
23 | `to` int null,
24 | queried_at varchar(24) null,
25 | price float null,
26 | start_date varchar(24) null
27 | );
28 |
29 | create index preis_query_from_to_index
30 | on preis_query (`from`, `to`);
31 | ```
32 |
33 |
34 | ## timings
35 | java pipelines+jackson+bz2: 345380ms
36 | java pipelines+jackson+gz: 270938ms
37 | ram+brotli4j: 438696ms
38 |
--------------------------------------------------------------------------------
/accumulator/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | org.example
8 | accumulator
9 | 1.0-SNAPSHOT
10 |
11 |
12 | 17
13 | 17
14 | UTF-8
15 |
16 |
17 |
18 |
19 |
20 | org.codehaus.mojo
21 | exec-maven-plugin
22 | 3.1.0
23 |
24 |
25 |
26 | java
27 |
28 |
29 |
30 |
31 | org.example.Main
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 | org.apache.beam
40 | beam-sdks-java-core
41 | 2.43.0
42 |
43 |
44 |
45 | org.apache.beam
46 | beam-runners-direct-java
47 | 2.43.0
48 | runtime
49 |
50 |
51 | com.jayway.jsonpath
52 | json-path
53 | 2.7.0
54 |
55 |
56 | ch.qos.logback
57 | logback-classic
58 | 1.2.11
59 |
60 |
61 | org.projectlombok
62 | lombok
63 | 1.18.24
64 |
65 |
66 | org.apache.beam
67 | beam-sdks-java-io-jdbc
68 | 2.43.0
69 |
70 |
71 | com.nixxcode.jvmbrotli
72 | jvmbrotli
73 | 0.2.0
74 |
75 |
76 | org.apache.beam
77 | beam-runners-flink-1.14
78 | 2.43.0
79 |
80 |
81 | org.mariadb.jdbc
82 | mariadb-java-client
83 | 3.0.6
84 |
85 |
86 | com.fasterxml.jackson.core
87 | jackson-databind
88 | 2.14.0
89 |
90 |
91 |
92 |
93 |
--------------------------------------------------------------------------------
/accumulator/src/main/java/org/example/CSVSink.java:
--------------------------------------------------------------------------------
1 | package org.example;
2 |
3 | import java.io.IOException;
4 | import java.io.PrintWriter;
5 | import java.nio.channels.Channels;
6 | import java.nio.channels.WritableByteChannel;
7 | import java.util.Collections;
8 | import java.util.List;
9 |
10 | import org.apache.beam.sdk.io.FileIO;
11 |
12 | class CSVSink implements FileIO.Sink> {
13 | private String header;
14 | private PrintWriter writer;
15 |
16 | public CSVSink(List colNames) {
17 | this.header = String.join(",",colNames);
18 | }
19 |
20 | @Override
21 | public void open(WritableByteChannel channel) throws IOException {
22 | writer = new PrintWriter(Channels.newOutputStream(channel));
23 | writer.println(header);
24 | }
25 |
26 | @Override
27 | public void write(List element) throws IOException {
28 | writer.println(String.join(",",element));
29 | }
30 |
31 | @Override
32 | public void flush() throws IOException {
33 | writer.flush();
34 | }
35 | }
--------------------------------------------------------------------------------
/accumulator/src/main/java/org/example/Main.java:
--------------------------------------------------------------------------------
1 | package org.example;
2 |
3 | import java.io.ByteArrayInputStream;
4 | import java.time.Instant;
5 | import java.time.ZoneId;
6 | import java.time.format.DateTimeFormatter;
7 | import java.util.ArrayList;
8 | import java.util.Arrays;
9 | import java.util.EnumSet;
10 | import java.util.List;
11 | import java.util.Set;
12 |
13 | import com.jayway.jsonpath.Configuration;
14 | import com.jayway.jsonpath.JsonPath;
15 | import com.jayway.jsonpath.Option;
16 | import com.jayway.jsonpath.spi.json.JacksonJsonProvider;
17 | import com.jayway.jsonpath.spi.json.JsonProvider;
18 | import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
19 | import com.jayway.jsonpath.spi.mapper.MappingProvider;
20 | import com.nixxcode.jvmbrotli.common.BrotliLoader;
21 | import com.nixxcode.jvmbrotli.dec.BrotliInputStream;
22 | import lombok.extern.slf4j.Slf4j;
23 | import org.apache.beam.runners.flink.FlinkPipelineOptions;
24 | import org.apache.beam.runners.flink.FlinkRunner;
25 | import org.apache.beam.sdk.Pipeline;
26 | import org.apache.beam.sdk.coders.StringUtf8Coder;
27 | import org.apache.beam.sdk.io.Compression;
28 | import org.apache.beam.sdk.io.FileIO;
29 | import org.apache.beam.sdk.io.jdbc.JdbcIO;
30 | import org.apache.beam.sdk.options.PipelineOptionsFactory;
31 | import org.apache.beam.sdk.transforms.Contextful;
32 | import org.apache.beam.sdk.transforms.FlatMapElements;
33 | import org.apache.beam.sdk.values.PCollection;
34 | import org.apache.beam.sdk.values.TypeDescriptor;
35 |
36 | import static org.apache.beam.sdk.io.FileIO.Write.defaultNaming;
37 |
38 | @Slf4j
39 | public class Main {
40 | static final DateTimeFormatter dateTimeFormatter =
41 | DateTimeFormatter.ISO_INSTANT.withZone(ZoneId.systemDefault());
42 | public static final JsonPath PRICE_PATH = JsonPath.compile("$.data.*.*.price.amount");
43 | public static final JsonPath START_TIME_PATH = JsonPath.compile("$.data.*.*.legs[0].departure");
44 | public static final JsonPath END_TIME_PATH = JsonPath.compile("$.data.*.*.legs[-1].departure");
45 |
46 | public static void main(String[] args) {
47 | setupTools();
48 | FlinkPipelineOptions options = PipelineOptionsFactory.create().as(FlinkPipelineOptions.class);
49 | options.setRunner(FlinkRunner.class);
50 | options.setFasterCopy(true);
51 | Pipeline p = Pipeline.create(options);
52 | //todo map datetime
53 |
54 | final PCollection extractedDataPoints = p.apply(FileIO.match().filepattern("/tmp/fahrpreise/*" +
55 | ".brotli"))
56 | .apply(FileIO.readMatches().withCompression(Compression.UNCOMPRESSED))
57 | .apply(FlatMapElements
58 | // uses imports from TypeDescriptors
59 | .into(
60 | new TypeDescriptor() {
61 | }
62 | )
63 | .via(Main::getPriceQueries));
64 | //writeToDB(extractedDataPoints);
65 | writeToCSV(extractedDataPoints);
66 |
67 | p.run().waitUntilFinish();
68 | }
69 |
70 | private static void writeToCSV(final PCollection extractedDataPoints) {
71 | extractedDataPoints.apply(FileIO.writeDynamic()
72 | .via(Contextful.fn((PriceQuery pq) -> Arrays.asList(pq.startDate() + "",
73 | pq.queriedAt() + "",
74 | pq.price() + "")),
75 | Contextful.fn(
76 | (String connection) -> new CSVSink(Arrays.asList("startDate", "queriedAt", "price"))))
77 | .by(input -> input.startStation() + "-" + input.targetStation())
78 | .to("/tmp/fahrpreisakku/")
79 | .withDestinationCoder(StringUtf8Coder.of())
80 | .withNaming(type -> defaultNaming(type, ".csv"))
81 | .withCompression(Compression.GZIP)
82 | );
83 | }
84 |
85 | private static void writeToDB(final PCollection extractedDataPoints) {
86 | extractedDataPoints.apply(JdbcIO.write()
87 | .withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(
88 | "org.mariadb.jdbc.Driver", "jdbc:mariadb://localhost:3306/fahrpreise")
89 | )
90 | .withStatement("INSERT INTO fahrpreise.preis_query (`from`,`to`,queried_at,price,start_date)" +
91 | "VALUES (?,?,?,?,?)")
92 | .withPreparedStatementSetter((JdbcIO.PreparedStatementSetter) (element, query) -> {
93 | query.setLong(1, element.startStation());
94 | query.setLong(2, element.targetStation());
95 | query.setLong(3, element.queriedAt());
96 | query.setDouble(4, element.price());
97 | query.setLong(5, element.startDate());
98 | })
99 | );
100 | }
101 |
102 | private static void setupTools() {
103 | BrotliLoader.isBrotliAvailable();
104 | Configuration.setDefaults(new Configuration.Defaults() {
105 |
106 | private final JsonProvider jsonProvider = new JacksonJsonProvider();
107 | private final MappingProvider mappingProvider = new JacksonMappingProvider();
108 |
109 | @Override
110 | public JsonProvider jsonProvider() {
111 | return jsonProvider;
112 | }
113 |
114 | @Override
115 | public MappingProvider mappingProvider() {
116 | return mappingProvider;
117 | }
118 |
119 | @Override
120 | public Set