├── .gitignore ├── accumulator ├── .gitignore ├── Readme.md ├── pom.xml └── src │ └── main │ ├── java │ └── org │ │ └── example │ │ ├── CSVSink.java │ │ ├── Main.java │ │ └── PriceQuery.java │ └── resources │ └── logback.xml ├── gatherer_hafas ├── fetchBestPrices.mjs └── package.json ├── gatherer_role ├── Readme.md ├── defaults │ └── main.yaml ├── tasks │ └── main.yaml └── templates │ ├── fahrpreis_gatherer.service.j2 │ ├── fahrpreis_gatherer.timer.j2 │ └── run.sh.j2 ├── images └── img.png ├── license.md ├── plotter ├── config.json ├── dashboard.py ├── plotter.py ├── requirements.txt └── wsgi.py ├── plotter_role ├── defaults │ └── main.yaml ├── tasks │ └── main.yaml └── templates │ ├── fahrpreis-plotter-config.json.j2 │ └── fahrpreis-plotter.service.j2 └── readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | Thumbs.db 3 | 4 | .nvm-version 5 | node_modules 6 | npm-debug.log 7 | /package-lock.json 8 | -------------------------------------------------------------------------------- /accumulator/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | .idea 3 | -------------------------------------------------------------------------------- /accumulator/Readme.md: -------------------------------------------------------------------------------- 1 | 2 | # Fahrpreis accumulator 3 | 4 | transforms the brotli files that the service produces into accumulated summary 5 | 6 | This is an apache beam pipeline that reads the brotli files, decompresses them and transfroms them into the single data points 7 | 8 | ## todo: 9 | - sort the datapoints in the order they are going to be plotted 10 | - make faster 11 | - 12 | 13 | 14 | ## db schema 15 | a bit slow 16 | 17 | ```sql 18 | create table preis_query 19 | ( 20 | id int auto_increment 21 | primary key, 22 | `from` int null, 23 | `to` int null, 24 | queried_at varchar(24) null, 25 | price float null, 26 | start_date varchar(24) null 27 | ); 28 | 29 | create index preis_query_from_to_index 30 | on preis_query (`from`, `to`); 31 | ``` 32 | 33 | 34 | ## timings 35 | java pipelines+jackson+bz2: 345380ms 36 | java pipelines+jackson+gz: 270938ms 37 | ram+brotli4j: 438696ms 38 | -------------------------------------------------------------------------------- /accumulator/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | org.example 8 | accumulator 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 17 13 | 17 14 | UTF-8 15 | 16 | 17 | 18 | 19 | 20 | org.codehaus.mojo 21 | exec-maven-plugin 22 | 3.1.0 23 | 24 | 25 | 26 | java 27 | 28 | 29 | 30 | 31 | org.example.Main 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | org.apache.beam 40 | beam-sdks-java-core 41 | 2.43.0 42 | 43 | 44 | 45 | org.apache.beam 46 | beam-runners-direct-java 47 | 2.43.0 48 | runtime 49 | 50 | 51 | com.jayway.jsonpath 52 | json-path 53 | 2.7.0 54 | 55 | 56 | ch.qos.logback 57 | logback-classic 58 | 1.2.11 59 | 60 | 61 | org.projectlombok 62 | lombok 63 | 1.18.24 64 | 65 | 66 | org.apache.beam 67 | beam-sdks-java-io-jdbc 68 | 2.43.0 69 | 70 | 71 | com.nixxcode.jvmbrotli 72 | jvmbrotli 73 | 0.2.0 74 | 75 | 76 | org.apache.beam 77 | beam-runners-flink-1.14 78 | 2.43.0 79 | 80 | 81 | org.mariadb.jdbc 82 | mariadb-java-client 83 | 3.0.6 84 | 85 | 86 | com.fasterxml.jackson.core 87 | jackson-databind 88 | 2.14.0 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /accumulator/src/main/java/org/example/CSVSink.java: -------------------------------------------------------------------------------- 1 | package org.example; 2 | 3 | import java.io.IOException; 4 | import java.io.PrintWriter; 5 | import java.nio.channels.Channels; 6 | import java.nio.channels.WritableByteChannel; 7 | import java.util.Collections; 8 | import java.util.List; 9 | 10 | import org.apache.beam.sdk.io.FileIO; 11 | 12 | class CSVSink implements FileIO.Sink> { 13 | private String header; 14 | private PrintWriter writer; 15 | 16 | public CSVSink(List colNames) { 17 | this.header = String.join(",",colNames); 18 | } 19 | 20 | @Override 21 | public void open(WritableByteChannel channel) throws IOException { 22 | writer = new PrintWriter(Channels.newOutputStream(channel)); 23 | writer.println(header); 24 | } 25 | 26 | @Override 27 | public void write(List element) throws IOException { 28 | writer.println(String.join(",",element)); 29 | } 30 | 31 | @Override 32 | public void flush() throws IOException { 33 | writer.flush(); 34 | } 35 | } -------------------------------------------------------------------------------- /accumulator/src/main/java/org/example/Main.java: -------------------------------------------------------------------------------- 1 | package org.example; 2 | 3 | import java.io.ByteArrayInputStream; 4 | import java.time.Instant; 5 | import java.time.ZoneId; 6 | import java.time.format.DateTimeFormatter; 7 | import java.util.ArrayList; 8 | import java.util.Arrays; 9 | import java.util.EnumSet; 10 | import java.util.List; 11 | import java.util.Set; 12 | 13 | import com.jayway.jsonpath.Configuration; 14 | import com.jayway.jsonpath.JsonPath; 15 | import com.jayway.jsonpath.Option; 16 | import com.jayway.jsonpath.spi.json.JacksonJsonProvider; 17 | import com.jayway.jsonpath.spi.json.JsonProvider; 18 | import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider; 19 | import com.jayway.jsonpath.spi.mapper.MappingProvider; 20 | import com.nixxcode.jvmbrotli.common.BrotliLoader; 21 | import com.nixxcode.jvmbrotli.dec.BrotliInputStream; 22 | import lombok.extern.slf4j.Slf4j; 23 | import org.apache.beam.runners.flink.FlinkPipelineOptions; 24 | import org.apache.beam.runners.flink.FlinkRunner; 25 | import org.apache.beam.sdk.Pipeline; 26 | import org.apache.beam.sdk.coders.StringUtf8Coder; 27 | import org.apache.beam.sdk.io.Compression; 28 | import org.apache.beam.sdk.io.FileIO; 29 | import org.apache.beam.sdk.io.jdbc.JdbcIO; 30 | import org.apache.beam.sdk.options.PipelineOptionsFactory; 31 | import org.apache.beam.sdk.transforms.Contextful; 32 | import org.apache.beam.sdk.transforms.FlatMapElements; 33 | import org.apache.beam.sdk.values.PCollection; 34 | import org.apache.beam.sdk.values.TypeDescriptor; 35 | 36 | import static org.apache.beam.sdk.io.FileIO.Write.defaultNaming; 37 | 38 | @Slf4j 39 | public class Main { 40 | static final DateTimeFormatter dateTimeFormatter = 41 | DateTimeFormatter.ISO_INSTANT.withZone(ZoneId.systemDefault()); 42 | public static final JsonPath PRICE_PATH = JsonPath.compile("$.data.*.*.price.amount"); 43 | public static final JsonPath START_TIME_PATH = JsonPath.compile("$.data.*.*.legs[0].departure"); 44 | public static final JsonPath END_TIME_PATH = JsonPath.compile("$.data.*.*.legs[-1].departure"); 45 | 46 | public static void main(String[] args) { 47 | setupTools(); 48 | FlinkPipelineOptions options = PipelineOptionsFactory.create().as(FlinkPipelineOptions.class); 49 | options.setRunner(FlinkRunner.class); 50 | options.setFasterCopy(true); 51 | Pipeline p = Pipeline.create(options); 52 | //todo map datetime 53 | 54 | final PCollection extractedDataPoints = p.apply(FileIO.match().filepattern("/tmp/fahrpreise/*" + 55 | ".brotli")) 56 | .apply(FileIO.readMatches().withCompression(Compression.UNCOMPRESSED)) 57 | .apply(FlatMapElements 58 | // uses imports from TypeDescriptors 59 | .into( 60 | new TypeDescriptor() { 61 | } 62 | ) 63 | .via(Main::getPriceQueries)); 64 | //writeToDB(extractedDataPoints); 65 | writeToCSV(extractedDataPoints); 66 | 67 | p.run().waitUntilFinish(); 68 | } 69 | 70 | private static void writeToCSV(final PCollection extractedDataPoints) { 71 | extractedDataPoints.apply(FileIO.writeDynamic() 72 | .via(Contextful.fn((PriceQuery pq) -> Arrays.asList(pq.startDate() + "", 73 | pq.queriedAt() + "", 74 | pq.price() + "")), 75 | Contextful.fn( 76 | (String connection) -> new CSVSink(Arrays.asList("startDate", "queriedAt", "price")))) 77 | .by(input -> input.startStation() + "-" + input.targetStation()) 78 | .to("/tmp/fahrpreisakku/") 79 | .withDestinationCoder(StringUtf8Coder.of()) 80 | .withNaming(type -> defaultNaming(type, ".csv")) 81 | .withCompression(Compression.GZIP) 82 | ); 83 | } 84 | 85 | private static void writeToDB(final PCollection extractedDataPoints) { 86 | extractedDataPoints.apply(JdbcIO.write() 87 | .withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create( 88 | "org.mariadb.jdbc.Driver", "jdbc:mariadb://localhost:3306/fahrpreise") 89 | ) 90 | .withStatement("INSERT INTO fahrpreise.preis_query (`from`,`to`,queried_at,price,start_date)" + 91 | "VALUES (?,?,?,?,?)") 92 | .withPreparedStatementSetter((JdbcIO.PreparedStatementSetter) (element, query) -> { 93 | query.setLong(1, element.startStation()); 94 | query.setLong(2, element.targetStation()); 95 | query.setLong(3, element.queriedAt()); 96 | query.setDouble(4, element.price()); 97 | query.setLong(5, element.startDate()); 98 | }) 99 | ); 100 | } 101 | 102 | private static void setupTools() { 103 | BrotliLoader.isBrotliAvailable(); 104 | Configuration.setDefaults(new Configuration.Defaults() { 105 | 106 | private final JsonProvider jsonProvider = new JacksonJsonProvider(); 107 | private final MappingProvider mappingProvider = new JacksonMappingProvider(); 108 | 109 | @Override 110 | public JsonProvider jsonProvider() { 111 | return jsonProvider; 112 | } 113 | 114 | @Override 115 | public MappingProvider mappingProvider() { 116 | return mappingProvider; 117 | } 118 | 119 | @Override 120 | public Set