├── LICENSE
├── PostgresTimeseriesAnalysis
├── .gitignore
├── pom.xml
├── scripts
│ └── station_csv_to_sql.ps1
├── sql
│ ├── .gitignore
│ ├── create_database.bat
│ ├── create_database.sh
│ └── sql
│ │ ├── 10_create_database.sql
│ │ └── 20_sample_data.sql
└── src
│ └── main
│ ├── java
│ ├── app
│ │ └── WeatherDataStreamingExample.java
│ ├── csv
│ │ ├── converter
│ │ │ └── LocalWeatherDataConverter.java
│ │ ├── mapping
│ │ │ ├── LocalWeatherDataMapper.java
│ │ │ └── StationMapper.java
│ │ ├── model
│ │ │ ├── LocalWeatherData.java
│ │ │ └── Station.java
│ │ └── parser
│ │ │ └── Parsers.java
│ ├── model
│ │ ├── GeoLocation.java
│ │ ├── LocalWeatherData.java
│ │ └── Station.java
│ ├── pgsql
│ │ ├── converter
│ │ │ └── LocalWeatherDataConverter.java
│ │ ├── mapping
│ │ │ └── LocalWeatherDataMapping.java
│ │ └── model
│ │ │ └── LocalWeatherData.java
│ └── utils
│ │ ├── DateUtilities.java
│ │ └── StringUtils.java
│ └── resources
│ ├── log4j.properties
│ └── logback.xml
└── README.md
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) Philipp Wagner
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | target
3 | elasticutils-*-javadoc.jar
4 | elasticutils-*-sources.jar
5 | *.asc
6 | stderr.log
7 | stdout.log
8 | *.iml
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | de.bytefish
8 | pgsamle
9 | 1.0
10 |
11 |
12 |
13 |
14 | org.apache.maven.plugins
15 | maven-compiler-plugin
16 |
17 | 1.8
18 | 1.8
19 |
20 |
21 |
22 |
23 |
24 |
25 | UTF-8
26 |
27 |
28 |
29 |
30 |
31 | org.postgresql
32 | postgresql
33 | 42.2.5
34 |
35 |
36 |
37 | de.bytefish
38 | jtinycsvparser
39 | 1.2
40 |
41 |
42 |
43 | de.bytefish
44 | pgbulkinsert
45 | 3.3
46 |
47 |
48 |
49 | io.reactivex.rxjava2
50 | rxjava
51 | 2.2.8
52 |
53 |
54 |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/scripts/station_csv_to_sql.ps1:
--------------------------------------------------------------------------------
1 | param (
2 | [string]$in_filename,
3 | [string]$out_filename
4 | )
5 |
6 | function GetSqlCompatibleValue([string] $val)
7 | {
8 | if([string]::IsNullOrWhiteSpace($val)) {
9 | return "NULL"
10 | }
11 | return $val.Replace("'", "''");
12 | }
13 |
14 |
15 |
16 | Import-Csv $in_filename -Delimiter "|" | Foreach-Object{
17 |
18 | $line =
19 | @"
20 | `nINSERT INTO sample.station(wban, name, state, location, latitude, longitude, ground_height, station_height, timeZone)
21 | SELECT '{0}', '{1}', '{2}', '{3}', {4}, {5}, {6}, {7}, {8}
22 | WHERE NOT EXISTS (SELECT 1 FROM sample.station WHERE wban='{0}');
23 | "@ -f (GetSqlCompatibleValue $_.WBAN), (GetSqlCompatibleValue $_.Name), (GetSqlCompatibleValue $_.State), (GetSqlCompatibleValue $_.Location), (GetSqlCompatibleValue $_.Latitude), (GetSqlCompatibleValue $_.Longitude), (GetSqlCompatibleValue $_.GroundHeight), (GetSqlCompatibleValue $_.StationHeight), (GetSqlCompatibleValue $_.TimeZone)
24 |
25 | $line|Out-File $out_filename -Append
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/sql/.gitignore:
--------------------------------------------------------------------------------
1 | *.log
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/sql/create_database.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 |
3 | :: Copyright (c) Philipp Wagner. All rights reserved.
4 | :: Licensed under the MIT license. See LICENSE file in the project root for full license information.
5 |
6 | set PGSQL_EXECUTABLE="psql.exe"
7 | set CREATEDB_EXECUTABLE="createdb.exe"
8 | set STDOUT=stdout.log
9 | set STDERR=stderr.log
10 | set LOGFILE=query_output.log
11 |
12 | set HostName=localhost
13 | set PortNumber=5432
14 | set DatabaseName=sampledb
15 | set UserName=philipp
16 | set Password=
17 |
18 | call :AskQuestionWithYdefault "Use Host (%HostName%) Port (%PortNumber%) [Y,n]?" reply_
19 | if /i [%reply_%] NEQ [y] (
20 | set /p HostName="Enter HostName: "
21 | set /p PortNumber="Enter Port: "
22 | )
23 |
24 | call :AskQuestionWithYdefault "Use Database (%DatabaseName%) [Y,n]?" reply_
25 | if /i [%reply_%] NEQ [y] (
26 | set /p ServerName="Enter Database: "
27 | )
28 |
29 | call :AskQuestionWithYdefault "Use User (%UserName%) [Y,n]?" reply_
30 | if /i [%reply_%] NEQ [y] (
31 | set /p UserName="Enter User: "
32 | )
33 |
34 | set /p PGPASSWORD="Password: "
35 |
36 | 1>%STDOUT% 2>%STDERR% (
37 | %CREATEDB_EXECUTABLE% -h %HostName% -p %PortNumber% -U %UserName% -O %UserName% %DatabaseName%
38 | %PGSQL_EXECUTABLE% -h %HostName% -p %PortNumber% -d %DatabaseName% -U %UserName% < sql/10_create_database.sql -L %LOGFILE%
39 | %PGSQL_EXECUTABLE% -h %HostName% -p %PortNumber% -d %DatabaseName% -U %UserName% < sql/20_sample_data.sql -L %LOGFILE%
40 | )
41 |
42 | goto :end
43 |
44 | :: The question as a subroutine
45 | :AskQuestionWithYdefault
46 | setlocal enableextensions
47 | :_asktheyquestionagain
48 | set return_=
49 | set ask_=
50 | set /p ask_="%~1"
51 | if "%ask_%"=="" set return_=y
52 | if /i "%ask_%"=="Y" set return_=y
53 | if /i "%ask_%"=="n" set return_=n
54 | if not defined return_ goto _asktheyquestionagain
55 | endlocal & set "%2=%return_%" & goto :EOF
56 |
57 | :end
58 | pause
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/sql/create_database.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Copyright (c) Philipp Wagner. All rights reserved.
4 | # Licensed under the MIT license. See LICENSE file in the project root for full license information.
5 |
6 |
7 | function ask_yes_or_no() {
8 | read -p "$1 ([y]es or [N]o): "
9 | case $(echo $REPLY | tr '[A-Z]' '[a-z]') in
10 | y|yes) echo "yes" ;;
11 | *) echo "no" ;;
12 | esac
13 | }
14 |
15 | STDOUT=stdout.log
16 | STDERR=stderr.log
17 | LOGFILE=query_output.log
18 |
19 | HostName=localhost
20 | PortNumber=5432
21 | DatabaseName=sampledb
22 | UserName=philipp
23 |
24 | if [[ "no" == $(ask_yes_or_no "Use Host ($HostName) Port ($PortNumber)") ]]
25 | then
26 | read -p "Enter HostName: " HostName
27 | read -p "Enter Port: " PortNumber
28 | fi
29 |
30 | if [[ "no" == $(ask_yes_or_no "Use Database ($DatabaseName)") ]]
31 | then
32 | read -p "Enter Database: " ServerName
33 | fi
34 |
35 | if [[ "no" == $(ask_yes_or_no "Use User ($UserName)") ]]
36 | then
37 | read -p "Enter User: " UserName
38 | fi
39 |
40 | read -p "Password: " PGPASSWORD
41 |
42 | # Database
43 | psql -h $HostName -p $PortNumber -d $DatabaseName -U $UserName < sql/10_create_database.sql -L $LOGFILE 1>$STDOUT 2>$STDERR
44 |
45 | # Sample Data
46 | psql -h $HostName -p $PortNumber -d $DatabaseName -U $UserName < sql/20_sample_data.sql -L $LOGFILE 1>>$STDOUT 2>>$STDERR
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/sql/sql/10_create_database.sql:
--------------------------------------------------------------------------------
1 | -- Copyright (c) Philipp Wagner. All rights reserved.
2 | -- Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 |
4 | DO $$
5 | BEGIN
6 | -----------------------------
7 | -- Schema
8 | -----------------------------
9 | IF NOT EXISTS (SELECT 1 FROM information_schema.schemata WHERE schema_name = 'sample') THEN
10 | CREATE SCHEMA sample;
11 | END IF;
12 |
13 | -----------------------------
14 | -- Tables
15 | -----------------------------
16 | IF NOT EXISTS (
17 | SELECT 1
18 | FROM information_schema.tables
19 | WHERE table_schema = 'sample'
20 | AND table_name = 'station'
21 | ) THEN
22 |
23 | CREATE TABLE sample.station
24 | (
25 | station_id SERIAL PRIMARY KEY,
26 | wban TEXT NOT NULL,
27 | name TEXT NOT NULL,
28 | state TEXT,
29 | location TEXT,
30 | latitude REAL NOT NULL,
31 | longitude REAL NOT NULL,
32 | ground_height SMALLINT,
33 | station_height SMALLINT,
34 | TimeZone SMALLINT
35 | );
36 |
37 | END IF;
38 |
39 | IF NOT EXISTS (
40 | SELECT 1
41 | FROM information_schema.tables
42 | WHERE table_schema = 'sample'
43 | AND table_name = 'weather_data'
44 | ) THEN
45 |
46 | CREATE TABLE sample.weather_data
47 | (
48 | wban TEXT,
49 | dateTime TIMESTAMP,
50 | temperature REAL,
51 | windSpeed REAL,
52 | stationPressure REAL,
53 | skyCondition TEXT
54 | );
55 |
56 | END IF;
57 |
58 | -----------------------------
59 | -- Indexes
60 | -----------------------------
61 | IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'uk_station_wban') THEN
62 | ALTER TABLE sample.station
63 | ADD CONSTRAINT uk_station_wban
64 | UNIQUE (wban);
65 | END IF;
66 |
67 | -----------------------------
68 | -- Security
69 | -----------------------------
70 | REVOKE ALL ON sample.station FROM public;
71 | REVOKE ALL ON sample.weather_data FROM public;
72 |
73 | END;
74 | $$;
75 |
76 | -----------------------------
77 | -- Functions
78 | -----------------------------
79 | CREATE OR REPLACE FUNCTION sample.first_agg ( anyelement, anyelement )
80 | RETURNS anyelement LANGUAGE SQL IMMUTABLE STRICT AS $$
81 | SELECT $1;
82 | $$;
83 |
84 | CREATE AGGREGATE sample.FIRST (
85 | sfunc = sample.first_agg,
86 | basetype = anyelement,
87 | stype = anyelement
88 | );
89 |
90 | CREATE OR REPLACE FUNCTION sample.last_agg ( anyelement, anyelement )
91 | RETURNS anyelement LANGUAGE SQL IMMUTABLE STRICT AS $$
92 | SELECT $2;
93 | $$;
94 |
95 | CREATE AGGREGATE sample.LAST (
96 | sfunc = sample.last_agg,
97 | basetype = anyelement,
98 | stype = anyelement
99 | );
100 |
101 | CREATE OR REPLACE FUNCTION sample.datediff_seconds(start_t TIMESTAMP, end_t TIMESTAMP)
102 | RETURNS DOUBLE PRECISION AS $$
103 | SELECT EXTRACT(epoch FROM $2 - $1)
104 | $$ LANGUAGE SQL;
105 |
106 | CREATE OR REPLACE FUNCTION sample.timestamp_to_seconds(timestamp_t TIMESTAMP)
107 | RETURNS DOUBLE PRECISION AS $$
108 | SELECT EXTRACT(epoch from timestamp_t)
109 | $$ LANGUAGE SQL;
110 |
111 | CREATE OR REPLACE FUNCTION sample.linear_interpolate(x_i DOUBLE PRECISION,
112 | x_0 DOUBLE PRECISION,
113 | y_0 DOUBLE PRECISION,
114 | x_1 DOUBLE PRECISION,
115 | y_1 DOUBLE PRECISION)
116 | RETURNS DOUBLE PRECISION AS $$
117 | SELECT (($5 - $3) / ($4 - $2)) * ($1 - $2) + $3;
118 | $$ LANGUAGE SQL;
119 |
120 | CREATE OR REPLACE FUNCTION sample.linear_interpolate(x_i TIMESTAMP, x_0 TIMESTAMP, y_0 DOUBLE PRECISION, x_1 TIMESTAMP, y_1 DOUBLE PRECISION)
121 | RETURNS DOUBLE PRECISION AS $$
122 | SELECT sample.linear_interpolate(sample.timestamp_to_seconds($1),
123 | sample.timestamp_to_seconds($2),
124 | $3,
125 | sample.timestamp_to_seconds($4),
126 | $5);
127 | $$ LANGUAGE SQL;
128 |
129 |
130 | CREATE OR REPLACE FUNCTION sample.interpolate_temperature(wban_p TEXT, start_t TIMESTAMP, end_t TIMESTAMP, slice_t INTERVAL)
131 | RETURNS TABLE(
132 | r_wban TEXT,
133 | r_slice TIMESTAMP,
134 | min_temp DOUBLE PRECISION,
135 | max_temp DOUBLE PRECISION,
136 | avg_temp DOUBLE PRECISION
137 | ) AS $$
138 | -- bounded_series assigns all values into a time slice with a given interval length in slice_t:
139 | WITH bounded_series AS (
140 | SELECT wban,
141 | datetime,
142 | 'epoch'::timestamp + $4 * (extract(epoch from datetime)::int4 / EXTRACT(epoch FROM $4)::int4) AS slice,
143 | temperature
144 | FROM sample.weather_data w
145 | WHERE w.wban = $1
146 | ORDER BY wban, slice, datetime ASC
147 | ),
148 | -- dense_series uses generate_series to generate the intervals we expect in the data:
149 | dense_series AS (
150 | SELECT $1 as wban, slice
151 | FROM generate_series($2, $3, $4) s(slice)
152 | ORDER BY wban, slice
153 | ),
154 | -- filled_series now uses a WINDOW function for find the first / last not null
155 | -- value in a WINDOW and uses sample.linear_interpolate to interpolate the slices
156 | -- between both values.
157 | --
158 | -- Finally we have to GROUP BY the slice and wban and take the AVG, MIN and MAX
159 | -- value in the slice. You can also add more Operators there, it is just an
160 | -- example:
161 | filled_series AS (
162 | SELECT wban,
163 | slice,
164 | temperature,
165 | COALESCE(temperature, sample.linear_interpolate(slice,
166 | sample.last(datetime) over (lookback),
167 | sample.last(temperature) over (lookback),
168 | sample.last(datetime) over (lookforward),
169 | sample.last(temperature) over (lookforward))) interpolated
170 | FROM bounded_series
171 | RIGHT JOIN dense_series USING (wban, slice)
172 | WINDOW
173 | lookback AS (ORDER BY slice, datetime),
174 | lookforward AS (ORDER BY slice DESC, datetime DESC)
175 | ORDER BY slice, datetime)
176 | SELECT wban AS r_wban,
177 | slice AS r_slice,
178 | MIN(interpolated) as min_temp,
179 | MAX(interpolated) as max_temp,
180 | AVG(interpolated) as avg_temp
181 | FROM filled_series
182 | GROUP BY slice, wban
183 | ORDER BY wban, slice;
184 |
185 | $$ LANGUAGE SQL;
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/src/main/java/app/WeatherDataStreamingExample.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) Philipp Wagner. All rights reserved.
2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 |
4 | package app;
5 |
6 | import csv.model.LocalWeatherData;
7 | import csv.model.Station;
8 | import csv.parser.Parsers;
9 | import de.bytefish.jtinycsvparser.mapping.CsvMappingResult;
10 | import de.bytefish.pgbulkinsert.PgBulkInsert;
11 | import io.reactivex.Observable;
12 | import io.reactivex.disposables.Disposable;
13 | import org.postgresql.PGConnection;
14 | import pgsql.mapping.LocalWeatherDataMapping;
15 |
16 | import java.nio.charset.StandardCharsets;
17 | import java.nio.file.FileSystems;
18 | import java.nio.file.Path;
19 | import java.sql.Connection;
20 | import java.sql.DriverManager;
21 | import java.util.Map;
22 | import java.util.concurrent.TimeUnit;
23 | import java.util.stream.Collectors;
24 | import java.util.stream.Stream;
25 |
26 | public class WeatherDataStreamingExample {
27 |
28 | private static final String databaseUri = "jdbc:postgresql://127.0.0.1:5432/sampledb?user=philipp&password=test_pwd";
29 |
30 | public static void main(String[] args) {
31 |
32 | // The PostgreSQL Bulk Writer:
33 | final PgBulkInsert writer = new PgBulkInsert<>(new LocalWeatherDataMapping("sample", "weather_data"));
34 |
35 | // Path to QCLCD CSV Files:
36 | final Path csvStationDataFilePath = FileSystems.getDefault().getPath("D:\\datasets\\201503station.txt");
37 | final Path csvLocalWeatherDataFilePath = FileSystems.getDefault().getPath("D:\\datasets\\201503hourly.txt");
38 |
39 | // A map between the WBAN and Station for faster Lookups:
40 | final Map stationMap = getStationMap(csvStationDataFilePath);
41 |
42 | try (Stream> csvStream = getLocalWeatherData(csvLocalWeatherDataFilePath)) {
43 | // Now turn the CSV Stream into the Postgres Stream:
44 | Stream localWeatherDataStream = csvStream
45 | // Filter only valid entries:
46 | .filter(x -> x.isValid())
47 | // Now we can work on the Results:
48 | .map(x -> x.getResult())
49 | // Take only measurements available in the list of stations:
50 | .filter(x -> stationMap.containsKey(x.getWban()))
51 | // Map into the general Analytics Model:
52 | .map(x -> {
53 | // Get the matching station now:
54 | csv.model.Station station = stationMap.get(x.getWban());
55 | // And build the Model:
56 | return csv.converter.LocalWeatherDataConverter.convert(x, station);
57 | })
58 | // Now build the PostgresSQL Model:
59 | .map(x -> pgsql.converter.LocalWeatherDataConverter.convert(x));
60 |
61 | // Turn it into an Observable for simplified Buffering:
62 | Disposable disposable = Observable.fromIterable(localWeatherDataStream::iterator)
63 | // Wait two Seconds or Buffer up to 80000 entities:
64 | .buffer(2, TimeUnit.SECONDS,80000)
65 | // Subscribe to the Batches:
66 | .subscribe(x -> {
67 | // Connect to your Postgres Instance:
68 | try (Connection connection = DriverManager.getConnection(databaseUri)) {
69 | // Get the underlying PGConnection:
70 | PGConnection pgConnection = connection.unwrap(PGConnection.class);
71 |
72 | // And Bulk Write the Results:
73 | writer.saveAll(pgConnection, x);
74 | }
75 | }, x -> System.err.println(x));
76 |
77 | // Probably not neccessary, but dispose anyway:
78 | if(disposable.isDisposed()) {
79 | disposable.dispose();
80 | }
81 | }
82 | }
83 |
84 | private static Stream> getLocalWeatherData(Path path) {
85 | return Parsers.LocalWeatherDataParser().readFromFile(path, StandardCharsets.US_ASCII);
86 | }
87 |
88 | private static Stream getStations(Path path) {
89 | return Parsers.StationParser().readFromFile(path, StandardCharsets.US_ASCII)
90 | .filter(x -> x.isValid())
91 | .map(x -> x.getResult());
92 | }
93 |
94 | private static Map getStationMap(Path path) {
95 | try (Stream stationStream = getStations(path)) {
96 | return stationStream
97 | .collect(Collectors.groupingBy(x -> x.getWban()))
98 | .entrySet().stream()
99 | .map(x -> x.getValue().get(0))
100 | .collect(Collectors.toMap(csv.model.Station::getWban, x -> x));
101 | }
102 | }
103 | }
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/src/main/java/csv/converter/LocalWeatherDataConverter.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) Philipp Wagner. All rights reserved.
2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 |
4 | package csv.converter;
5 |
6 |
7 | import java.time.LocalDate;
8 | import java.time.LocalTime;
9 |
10 | public class LocalWeatherDataConverter {
11 |
12 | public static model.LocalWeatherData convert(csv.model.LocalWeatherData csvLocalWeatherData, csv.model.Station csvStation) {
13 |
14 | LocalDate date = csvLocalWeatherData.getDate();
15 | LocalTime time = csvLocalWeatherData.getTime();
16 | String skyCondition = csvLocalWeatherData.getSkyCondition();
17 | Float stationPressure = csvLocalWeatherData.getStationPressure();
18 | Float temperature = csvLocalWeatherData.getDryBulbCelsius();
19 | Float windSpeed = csvLocalWeatherData.getWindSpeed();
20 |
21 | // Convert the Station data:
22 | model.Station station = convert(csvStation);
23 |
24 | return new model.LocalWeatherData(station, date, time, temperature, windSpeed, stationPressure, skyCondition);
25 | }
26 |
27 | public static model.Station convert(csv.model.Station csvStation) {
28 | String wban = csvStation.getWban();
29 | String name = csvStation.getName();
30 | String state = csvStation.getState();
31 | String location = csvStation.getLocation();
32 | Integer timeZone = csvStation.getTimeZone();
33 | model.GeoLocation geoLocation = new model.GeoLocation(csvStation.getLatitude(), csvStation.getLongitude());
34 |
35 | return new model.Station(wban, name, state, location, timeZone, geoLocation);
36 | }
37 |
38 | }
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/src/main/java/csv/mapping/LocalWeatherDataMapper.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) Philipp Wagner. All rights reserved.
2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 |
4 | package csv.mapping;
5 |
6 | import csv.model.LocalWeatherData;
7 | import de.bytefish.jtinycsvparser.builder.IObjectCreator;
8 | import de.bytefish.jtinycsvparser.mapping.CsvMapping;
9 | import de.bytefish.jtinycsvparser.typeconverter.FloatConverter;
10 | import de.bytefish.jtinycsvparser.typeconverter.IgnoreMissingValuesConverter;
11 | import de.bytefish.jtinycsvparser.typeconverter.LocalDateConverter;
12 | import de.bytefish.jtinycsvparser.typeconverter.LocalTimeConverter;
13 |
14 | import java.time.LocalDate;
15 | import java.time.LocalTime;
16 | import java.time.format.DateTimeFormatter;
17 |
18 | public class LocalWeatherDataMapper extends CsvMapping
19 | {
20 | public LocalWeatherDataMapper(IObjectCreator creator)
21 | {
22 | super(creator);
23 |
24 | mapProperty(0, String.class, LocalWeatherData::setWban);
25 | mapProperty(1, LocalDate.class, LocalWeatherData::setDate, new LocalDateConverter(DateTimeFormatter.ofPattern("yyyyMMdd")));
26 | mapProperty(2, LocalTime.class, LocalWeatherData::setTime, new LocalTimeConverter(DateTimeFormatter.ofPattern("HHmm")));
27 | mapProperty(4, String.class, LocalWeatherData::setSkyCondition);
28 | mapProperty(12, Float.class, LocalWeatherData::setDryBulbCelsius, new IgnoreMissingValuesConverter<>(new FloatConverter(), "M"));
29 | mapProperty(24, Float.class, LocalWeatherData::setWindSpeed, new IgnoreMissingValuesConverter<>(new FloatConverter(), "M"));
30 | mapProperty(30, Float.class, LocalWeatherData::setStationPressure, new IgnoreMissingValuesConverter<>(new FloatConverter(), "M"));
31 | }
32 | }
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/src/main/java/csv/mapping/StationMapper.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) Philipp Wagner. All rights reserved.
2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 |
4 | package csv.mapping;
5 |
6 | import csv.model.Station;
7 | import de.bytefish.jtinycsvparser.builder.IObjectCreator;
8 | import de.bytefish.jtinycsvparser.mapping.CsvMapping;
9 | import de.bytefish.jtinycsvparser.typeconverter.IgnoreMissingValuesConverter;
10 | import de.bytefish.jtinycsvparser.typeconverter.IntegerConverter;
11 |
12 | public class StationMapper extends CsvMapping
13 | {
14 | public StationMapper(IObjectCreator creator)
15 | {
16 | super(creator);
17 |
18 | mapProperty(0, String.class, Station::setWban);
19 | mapProperty(1, String.class, Station::setWmo);
20 | mapProperty(2, String.class, Station::setCallSign);
21 | mapProperty(3, String.class, Station::setClimateDivisionCode);
22 | mapProperty(4, String.class, Station::setClimateDivisionStateCode);
23 | mapProperty(5, String.class, Station::setClimateDivisionStationCode);
24 | mapProperty(6, String.class, Station::setName);
25 | mapProperty(7, String.class, Station::setState);
26 | mapProperty(8, String.class, Station::setLocation);
27 | mapProperty(9, Float.class, Station::setLatitude);
28 | mapProperty(10, Float.class, Station::setLongitude);
29 | mapProperty(11, Integer.class, Station::setGroundHeight, new IgnoreMissingValuesConverter<>(new IntegerConverter()));
30 | mapProperty(12, Integer.class, Station::setStationHeight, new IgnoreMissingValuesConverter<>(new IntegerConverter()));
31 | mapProperty(13, Integer.class, Station::setBarometer, new IgnoreMissingValuesConverter<>(new IntegerConverter()));
32 | mapProperty(14, Integer.class, Station::setTimeZone);
33 | }
34 | }
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/src/main/java/csv/model/LocalWeatherData.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) Philipp Wagner. All rights reserved.
2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 |
4 | package csv.model;
5 |
6 | import java.time.LocalDate;
7 | import java.time.LocalTime;
8 |
9 | public class LocalWeatherData {
10 |
11 | private String wban;
12 |
13 | private LocalDate date;
14 |
15 | private LocalTime time;
16 |
17 | private String skyCondition;
18 |
19 | private Float dryBulbCelsius;
20 |
21 | private Float windSpeed;
22 |
23 | private Float stationPressure;
24 |
25 | public LocalWeatherData() {
26 |
27 | }
28 |
29 | public String getWban() {
30 | return wban;
31 | }
32 |
33 | public void setWban(String wban) {
34 | this.wban = wban;
35 | }
36 |
37 | public LocalDate getDate() {
38 | return date;
39 | }
40 |
41 | public void setDate(LocalDate date) {
42 | this.date = date;
43 | }
44 |
45 | public LocalTime getTime() {
46 | return time;
47 | }
48 |
49 | public void setTime(LocalTime time) {
50 | this.time = time;
51 | }
52 |
53 | public String getSkyCondition() {
54 | return skyCondition;
55 | }
56 |
57 | public void setSkyCondition(String skyCondition) {
58 | this.skyCondition = skyCondition;
59 | }
60 |
61 | public Float getDryBulbCelsius() {
62 | return dryBulbCelsius;
63 | }
64 |
65 | public void setDryBulbCelsius(Float dryBulbCelsius) {
66 | this.dryBulbCelsius = dryBulbCelsius;
67 | }
68 |
69 | public Float getWindSpeed() {
70 | return windSpeed;
71 | }
72 |
73 | public void setWindSpeed(Float windSpeed) {
74 | this.windSpeed = windSpeed;
75 | }
76 |
77 | public Float getStationPressure() {
78 | return stationPressure;
79 | }
80 |
81 | public void setStationPressure(Float stationPressure) {
82 | this.stationPressure = stationPressure;
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/src/main/java/csv/model/Station.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) Philipp Wagner. All rights reserved.
2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 |
4 | package csv.model;
5 |
6 | public class Station {
7 |
8 | private String wban;
9 |
10 | private String wmo;
11 |
12 | private String callSign;
13 |
14 | private String climateDivisionCode;
15 |
16 | private String climateDivisionStateCode;
17 |
18 | private String climateDivisionStationCode;
19 |
20 | private String name;
21 |
22 | private String state;
23 |
24 | private String location;
25 |
26 | private Float latitude;
27 |
28 | private Float longitude;
29 |
30 | private Integer groundHeight;
31 |
32 | private Integer stationHeight;
33 |
34 | private Integer barometer;
35 |
36 | private Integer timeZone;
37 |
38 | public Station() {
39 | }
40 |
41 | public String getWban() {
42 | return wban;
43 | }
44 |
45 | public void setWban(String wban) {
46 | this.wban = wban;
47 | }
48 |
49 | public String getWmo() {
50 | return wmo;
51 | }
52 |
53 | public void setWmo(String wmo) {
54 | this.wmo = wmo;
55 | }
56 |
57 | public String getCallSign() {
58 | return callSign;
59 | }
60 |
61 | public void setCallSign(String callSign) {
62 | this.callSign = callSign;
63 | }
64 |
65 | public String getClimateDivisionCode() {
66 | return climateDivisionCode;
67 | }
68 |
69 | public void setClimateDivisionCode(String climateDivisionCode) {
70 | this.climateDivisionCode = climateDivisionCode;
71 | }
72 |
73 | public String getClimateDivisionStateCode() {
74 | return climateDivisionStateCode;
75 | }
76 |
77 | public void setClimateDivisionStateCode(String climateDivisionStateCode) {
78 | this.climateDivisionStateCode = climateDivisionStateCode;
79 | }
80 |
81 | public String getClimateDivisionStationCode() {
82 | return climateDivisionStationCode;
83 | }
84 |
85 | public void setClimateDivisionStationCode(String climateDivisionStationCode) {
86 | this.climateDivisionStationCode = climateDivisionStationCode;
87 | }
88 |
89 | public String getName() {
90 | return name;
91 | }
92 |
93 | public void setName(String name) {
94 | this.name = name;
95 | }
96 |
97 | public String getState() {
98 | return state;
99 | }
100 |
101 | public void setState(String state) {
102 | this.state = state;
103 | }
104 |
105 | public String getLocation() {
106 | return location;
107 | }
108 |
109 | public void setLocation(String location) {
110 | this.location = location;
111 | }
112 |
113 | public float getLatitude() {
114 | return latitude;
115 | }
116 |
117 | public void setLatitude(Float latitude) {
118 | this.latitude = latitude;
119 | }
120 |
121 | public float getLongitude() {
122 | return longitude;
123 | }
124 |
125 | public void setLongitude(Float longitude) {
126 | this.longitude = longitude;
127 | }
128 |
129 | public Integer getGroundHeight() {
130 | return groundHeight;
131 | }
132 |
133 | public void setGroundHeight(Integer groundHeight) {
134 | this.groundHeight = groundHeight;
135 | }
136 |
137 | public Integer getStationHeight() {
138 | return stationHeight;
139 | }
140 |
141 | public void setStationHeight(Integer stationHeight) {
142 | this.stationHeight = stationHeight;
143 | }
144 |
145 | public Integer getBarometer() {
146 | return barometer;
147 | }
148 |
149 | public void setBarometer(Integer barometer) {
150 | this.barometer = barometer;
151 | }
152 |
153 | public Integer getTimeZone() {
154 | return timeZone;
155 | }
156 |
157 | public void setTimeZone(Integer timeZone) {
158 | this.timeZone = timeZone;
159 | }
160 | }
161 |
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/src/main/java/csv/parser/Parsers.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) Philipp Wagner. All rights reserved.
2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 |
4 | package csv.parser;
5 |
6 | import csv.mapping.LocalWeatherDataMapper;
7 | import csv.mapping.StationMapper;
8 | import csv.model.LocalWeatherData;
9 | import csv.model.Station;
10 | import de.bytefish.jtinycsvparser.CsvParser;
11 | import de.bytefish.jtinycsvparser.CsvParserOptions;
12 | import de.bytefish.jtinycsvparser.tokenizer.StringSplitTokenizer;
13 |
14 | public class Parsers {
15 |
16 | public static CsvParser StationParser() {
17 |
18 | return new CsvParser<>(new CsvParserOptions(true, new StringSplitTokenizer("\\|", true)), new StationMapper(() -> new Station()));
19 | }
20 |
21 | public static CsvParser LocalWeatherDataParser()
22 | {
23 | return new CsvParser<>(new CsvParserOptions(true, new StringSplitTokenizer(",", true)), new LocalWeatherDataMapper(() -> new LocalWeatherData()));
24 | }
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/src/main/java/model/GeoLocation.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) Philipp Wagner. All rights reserved.
2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 |
4 | package model;
5 |
6 | public class GeoLocation {
7 |
8 | private double lat;
9 |
10 | private double lon;
11 |
12 | public GeoLocation() {
13 |
14 | }
15 |
16 | public GeoLocation(double lat, double lon) {
17 | this.lat = lat;
18 | this.lon = lon;
19 | }
20 |
21 | public void setLat(double lat) {
22 | this.lat = lat;
23 | }
24 |
25 | public void setLon(double lon) {
26 | this.lon = lon;
27 | }
28 |
29 | public double getLat() {
30 | return lat;
31 | }
32 |
33 | public double getLon() {
34 | return lon;
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/src/main/java/model/LocalWeatherData.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) Philipp Wagner. All rights reserved.
2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 |
4 | package model;
5 |
6 | import java.time.LocalDate;
7 | import java.time.LocalTime;
8 |
9 | public class LocalWeatherData {
10 |
11 | private Station station;
12 |
13 | private LocalDate date;
14 |
15 | private LocalTime time;
16 |
17 | private Float temperature;
18 |
19 | private Float windSpeed;
20 |
21 | private Float stationPressure;
22 |
23 | private String skyCondition;
24 |
25 | public LocalWeatherData() {
26 |
27 | }
28 |
29 | public LocalWeatherData(Station station, LocalDate date, LocalTime time, Float temperature, Float windSpeed, Float stationPressure, String skyCondition) {
30 | this.station = station;
31 | this.date = date;
32 | this.time = time;
33 | this.temperature = temperature;
34 | this.windSpeed = windSpeed;
35 | this.stationPressure = stationPressure;
36 | this.skyCondition = skyCondition;
37 | }
38 |
39 | public void setStation(Station station) {
40 | this.station = station;
41 | }
42 |
43 | public void setDate(LocalDate date) {
44 | this.date = date;
45 | }
46 |
47 | public void setTime(LocalTime time) {
48 | this.time = time;
49 | }
50 |
51 | public void setTemperature(Float temperature) {
52 | this.temperature = temperature;
53 | }
54 |
55 | public void setWindSpeed(Float windSpeed) {
56 | this.windSpeed = windSpeed;
57 | }
58 |
59 | public void setStationPressure(Float stationPressure) {
60 | this.stationPressure = stationPressure;
61 | }
62 |
63 | public void setSkyCondition(String skyCondition) {
64 | this.skyCondition = skyCondition;
65 | }
66 |
67 | public Station getStation() {
68 | return station;
69 | }
70 |
71 | public LocalDate getDate() {
72 | return date;
73 | }
74 |
75 | public LocalTime getTime() {
76 | return time;
77 | }
78 |
79 | public Float getTemperature() {
80 | return temperature;
81 | }
82 |
83 | public Float getWindSpeed() {
84 | return windSpeed;
85 | }
86 |
87 | public Float getStationPressure() {
88 | return stationPressure;
89 | }
90 |
91 | public String getSkyCondition() {
92 | return skyCondition;
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/src/main/java/model/Station.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) Philipp Wagner. All rights reserved.
2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 |
4 | package model;
5 |
6 | public class Station {
7 |
8 | private String wban;
9 |
10 | private String name;
11 |
12 | private String state;
13 |
14 | private String location;
15 |
16 | private Integer timeZone;
17 |
18 | private GeoLocation geoLocation;
19 |
20 | public Station() {
21 |
22 | }
23 |
24 | public Station(String wban, String name, String state, String location, Integer timeZone, GeoLocation geoLocation) {
25 | this.wban = wban;
26 | this.name = name;
27 | this.state = state;
28 | this.location = location;
29 | this.timeZone = timeZone;
30 | this.geoLocation = geoLocation;
31 | }
32 |
33 | public void setWban(String wban) {
34 | this.wban = wban;
35 | }
36 |
37 | public void setName(String name) {
38 | this.name = name;
39 | }
40 |
41 | public void setState(String state) {
42 | this.state = state;
43 | }
44 |
45 | public void setLocation(String location) {
46 | this.location = location;
47 | }
48 |
49 | public void setTimeZone(Integer timeZone) {
50 | this.timeZone = timeZone;
51 | }
52 |
53 | public void setGeoLocation(GeoLocation geoLocation) {
54 | this.geoLocation = geoLocation;
55 | }
56 |
57 | public String getWban() {
58 | return wban;
59 | }
60 |
61 | public String getName() {
62 | return name;
63 | }
64 |
65 | public String getState() {
66 | return state;
67 | }
68 |
69 | public String getLocation() {
70 | return location;
71 | }
72 |
73 | public Integer getTimeZone() {
74 | return timeZone;
75 | }
76 |
77 | public GeoLocation getGeoLocation() {
78 | return geoLocation;
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/src/main/java/pgsql/converter/LocalWeatherDataConverter.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) Philipp Wagner. All rights reserved.
2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 |
4 | package pgsql.converter;
5 |
6 | import java.time.LocalDateTime;
7 |
8 | public class LocalWeatherDataConverter {
9 |
10 | public static pgsql.model.LocalWeatherData convert(model.LocalWeatherData modelLocalWeatherData) {
11 |
12 | String wban = modelLocalWeatherData.getStation().getWban();
13 | LocalDateTime dateTime = modelLocalWeatherData.getDate().atTime(modelLocalWeatherData.getTime());
14 | Float temperature = modelLocalWeatherData.getTemperature();
15 | Float windSpeed = modelLocalWeatherData.getWindSpeed();
16 | Float stationPressure = modelLocalWeatherData.getStationPressure();
17 | String skyCondition = modelLocalWeatherData.getSkyCondition();
18 |
19 | return new pgsql.model.LocalWeatherData(wban, dateTime, temperature, windSpeed, stationPressure, skyCondition);
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/src/main/java/pgsql/mapping/LocalWeatherDataMapping.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) Philipp Wagner. All rights reserved.
2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 |
4 | package pgsql.mapping;
5 |
6 | import de.bytefish.pgbulkinsert.mapping.AbstractMapping;
7 | import pgsql.model.LocalWeatherData;
8 |
9 | public class LocalWeatherDataMapping extends AbstractMapping {
10 |
11 | public LocalWeatherDataMapping(String schemaName, String tableName) {
12 |
13 | super(schemaName, tableName);
14 |
15 | mapText("wban", pgsql.model.LocalWeatherData::getWban);
16 | mapTimeStamp("dateTime", pgsql.model.LocalWeatherData::getDateTime);
17 | mapFloat("temperature", pgsql.model.LocalWeatherData::getTemperature);
18 | mapFloat("windSpeed", pgsql.model.LocalWeatherData::getWindSpeed);
19 | mapFloat("stationPressure", pgsql.model.LocalWeatherData::getStationPressure);
20 | mapText("skyCondition", pgsql.model.LocalWeatherData::getSkyCondition);
21 | }
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/src/main/java/pgsql/model/LocalWeatherData.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) Philipp Wagner. All rights reserved.
2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 |
4 | package pgsql.model;
5 |
6 | import java.time.LocalDateTime;
7 |
8 | public class LocalWeatherData {
9 |
10 | private String wban;
11 |
12 | private LocalDateTime dateTime;
13 |
14 | private Float temperature;
15 |
16 | private Float windSpeed;
17 |
18 | private Float stationPressure;
19 |
20 | private String skyCondition;
21 |
22 | public LocalWeatherData(String wban, LocalDateTime dateTime, Float temperature, Float windSpeed, Float stationPressure, String skyCondition) {
23 | this.wban = wban;
24 | this.dateTime = dateTime;
25 | this.temperature = temperature;
26 | this.windSpeed = windSpeed;
27 | this.stationPressure = stationPressure;
28 | this.skyCondition = skyCondition;
29 | }
30 |
31 | public String getWban() {
32 | return wban;
33 | }
34 |
35 | public LocalDateTime getDateTime() {
36 | return dateTime;
37 | }
38 |
39 | public Float getTemperature() {
40 | return temperature;
41 | }
42 |
43 | public Float getWindSpeed() {
44 | return windSpeed;
45 | }
46 |
47 | public Float getStationPressure() {
48 | return stationPressure;
49 | }
50 |
51 | public String getSkyCondition() {
52 | return skyCondition;
53 | }
54 | }
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/src/main/java/utils/DateUtilities.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) Philipp Wagner. All rights reserved.
2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 |
4 | package utils;
5 |
6 | import java.time.LocalDate;
7 | import java.time.LocalDateTime;
8 | import java.time.LocalTime;
9 | import java.time.ZoneOffset;
10 | import java.time.OffsetDateTime;
11 | import java.util.Date;
12 |
13 | public class DateUtilities {
14 |
15 | public static Date from(LocalDate localDate, LocalTime localTime, ZoneOffset zoneOffset) {
16 | LocalDateTime localDateTime = localDate.atTime(localTime);
17 |
18 | return from(localDateTime, zoneOffset);
19 | }
20 |
21 | public static Date from(LocalDateTime localDateTime, ZoneOffset zoneOffset) {
22 | OffsetDateTime offsetDateTime = localDateTime.atOffset(zoneOffset);
23 |
24 | return Date.from(offsetDateTime.toInstant());
25 | }
26 |
27 | }
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/src/main/java/utils/StringUtils.java:
--------------------------------------------------------------------------------
1 | // Copyright (c) Philipp Wagner. All rights reserved.
2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 |
4 | package utils;
5 |
6 | public class StringUtils {
7 |
8 | private StringUtils() {}
9 |
10 | public static String StringEmpty = "";
11 |
12 | public static boolean isNullOrWhiteSpace(String input) {
13 | return input == null || input.equals(StringEmpty) || input.trim().length() == 0;
14 | }
15 |
16 | }
17 |
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 |
19 | log4j.rootLogger=WARN, console
20 |
21 | log4j.appender.console=org.apache.log4j.ConsoleAppender
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
--------------------------------------------------------------------------------
/PostgresTimeseriesAnalysis/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
18 |
19 |
20 |
21 |
22 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{60} %X{sourceThread} - %msg%n
23 |
24 |
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PostgresTimeseriesAnalysis #
2 |
3 | There was an interesting article by the [jOOQ] team on how to fill gaps in data using SQL:
4 |
5 | * [Using IGNORE NULLS With SQL Window Functions to Fill Gaps]
6 |
7 | I have been experimenting with using SQL for Linear Interpolation two years ago. It would be a waste
8 | to silo all this in a private repository, so I decided to share it.
9 |
10 | The code is heavily based on a great article by [Caleb Welton](https://github.com/cwelton):
11 |
12 | * [Time Series Analysis Part 3: Resampling and Interpolation]
13 |
14 | This article can also be found on my personal website:
15 |
16 | * [https://bytefish.de/blog/postgresql_interpolation/](https://bytefish.de/blog/postgresql_interpolation/)
17 |
18 | To reproduce the example, please see the section [How to Reproduce this Experiment](#how-to-reproduce-this-experiment).
19 |
20 | ## Dataset ##
21 |
22 | The dataset is the [Quality Controlled Local Climatological Data (QCLCD)] for 2014 and 2015. It contains hourly weather
23 | measurements for more than 1,600 US Weather Stations. It is a great dataset to learn about data processing and data
24 | visualization:
25 |
26 | > The Quality Controlled Local Climatological Data (QCLCD) consist of hourly, daily, and monthly summaries for approximately
27 | > 1,600 U.S. locations. Daily Summary forms are not available for all stations. Data are available beginning January 1, 2005
28 | > and continue to the present. Please note, there may be a 48-hour lag in the availability of the most recent data.
29 |
30 | The data is available as CSV files at:
31 |
32 | * [http://www.ncdc.noaa.gov/orders/qclcd/](http://www.ncdc.noaa.gov/orders/qclcd/)
33 |
34 | Download the file ``QCLCD201503.zip`` from:
35 |
36 | * [http://www.ncdc.noaa.gov/orders/qclcd/](http://www.ncdc.noaa.gov/orders/qclcd/)
37 |
38 | [Quality Controlled Local Climatological Data (QCLCD)]:
39 |
40 | ## Are there missing values? ##
41 |
42 | Devices might break. Networks can be down. Disks still run full in 2019. The sky is the limit, when it comes to invalid or missing measurements in data.
43 |
44 | The [Quality Controlled Local Climatological Data (QCLCD)] has hourly measurements of weather stations. So to find missing data we will look for gaps in data greater than 1 hour.
45 |
46 | How can we do this? [Window Functions]!
47 |
48 | [Window Functions] can be hard to grasp and I can't go into all details here. The best introduction to [Window Functions] was written by [Dimitri Fontaine] and I highly recommend reading it:
49 |
50 | * [Understanding Window Functions] ([Dimitri Fontaine])
51 |
52 | To identify gaps we first need a way calculate the interval between two timestamps, so I will
53 | define a function ``datediff_seconds`` to calculate the length between two timestamp values:
54 |
55 | ```sql
56 | CREATE OR REPLACE FUNCTION sample.datediff_seconds(start_t TIMESTAMP, end_t TIMESTAMP)
57 | RETURNS DOUBLE PRECISION AS $$
58 | SELECT EXTRACT(epoch FROM $2 - $1)
59 | $$ LANGUAGE SQL;
60 | ```
61 |
62 | Now we can use the [LAG] operator to identity gaps larger 3600 seconds, which is an hour:
63 |
64 | ```sql
65 | SELECT *
66 | FROM (SELECT
67 | weather_data.wban as wban,
68 | weather_data.datetime as current_datetime,
69 | LAG(weather_data.datetime, 1, NULL) OVER (PARTITION BY weather_data.wban ORDER BY weather_data.datetime) AS previous_datetime
70 | FROM sample.weather_data) lag_select
71 | WHERE sample.datediff_seconds (previous_datetime, current_datetime) > 3600;
72 | ```
73 |
74 | And we can see there are 17,043 affected rows, which is the number of gaps in the data:
75 |
76 | ```
77 | Successfully run. Total query runtime: 33 secs 590 msec.
78 | 17043 rows affected.
79 | ```
80 |
81 | ## Linear Interpolation with SQL ##
82 |
83 | First of all we write a function to do a [Linear Interpolation] between two points:
84 |
85 | ```sql
86 | CREATE OR REPLACE FUNCTION sample.linear_interpolate(x_i DOUBLE PRECISION,
87 | x_0 DOUBLE PRECISION,
88 | y_0 DOUBLE PRECISION,
89 | x_1 DOUBLE PRECISION,
90 | y_1 DOUBLE PRECISION)
91 | RETURNS DOUBLE PRECISION AS $$
92 | SELECT (($5 - $3) / ($4 - $2)) * ($1 - $2) + $3;
93 | $$ LANGUAGE SQL;
94 | ```
95 |
96 | We are working with the ``TIMESTAMP`` datatype, so in order to put it into the ``linear_interpolate`` function,
97 | we need to transform the ``TIMESTAMP`` into its representation of seconds since epoch:
98 |
99 | ```sql
100 | CREATE OR REPLACE FUNCTION sample.timestamp_to_seconds(timestamp_t TIMESTAMP)
101 | RETURNS DOUBLE PRECISION AS $$
102 | SELECT EXTRACT(epoch from timestamp_t)
103 | $$ LANGUAGE SQL;
104 | ```
105 |
106 | This makes it possible to write an overload, that takes the timestamps and returns the interpolated value of a given timestamp ``x_i``:
107 |
108 | ```sql
109 | CREATE OR REPLACE FUNCTION sample.linear_interpolate(x_i TIMESTAMP, x_0 TIMESTAMP, y_0 DOUBLE PRECISION, x_1 TIMESTAMP, y_1 DOUBLE PRECISION)
110 | RETURNS DOUBLE PRECISION AS $$
111 | SELECT sample.linear_interpolate(sample.timestamp_to_seconds($1),
112 | sample.timestamp_to_seconds($2),
113 | $3,
114 | sample.timestamp_to_seconds($4),
115 | $5);
116 | $$ LANGUAGE SQL;
117 | ```
118 |
119 | And that's it?
120 |
121 | ## Linear Interpolation of the QCLCD Weather Data ##
122 |
123 | As a final example I want to show how to use the functions to interpolate the sample weather data, which had 17,043 missing measurements.
124 |
125 | The idea is quite simple: First of all we will put all measurements into a time slice of a given interval length. So we know, that we have
126 | a value for the expected point in time. We will then build a dense series using the [generate_series] method with the given ``slice_t`` interval, which has all the slices we expect.
127 |
128 | The ``bounded_series`` and ``dense_series`` will then be joined, which means: The joined series will have ``NULL`` for the measurements, which indicates the slice has to be interpolated. A custom function will be used to identify the first and last non-null value of a window, so we get the two points for the ``linear_interpolate`` function.
129 |
130 | To make this work we need to ignore ``NULL`` values, just like in the [jOOQ] article. The PostgreSQL wiki has a great article on it, which shows how to implement such a function with a PostgreSQL ``AGGREGATE``:
131 |
132 | * [https://wiki.postgresql.org/wiki/First/last_(aggregate)](https://wiki.postgresql.org/wiki/First/last_(aggregate))
133 |
134 | I simply copy and paste it:
135 |
136 | ```sql
137 | CREATE OR REPLACE FUNCTION sample.last_agg ( anyelement, anyelement )
138 | RETURNS anyelement LANGUAGE SQL IMMUTABLE STRICT AS $$
139 | SELECT $2;
140 | $$;
141 |
142 | CREATE AGGREGATE sample.LAST (
143 | sfunc = sample.last_agg,
144 | basetype = anyelement,
145 | stype = anyelement
146 | );
147 | ```
148 |
149 | And finally we can write the function to interpolate the measurements:
150 |
151 | ```sql
152 | CREATE OR REPLACE FUNCTION sample.interpolate_temperature(wban_p TEXT, start_t TIMESTAMP, end_t TIMESTAMP, slice_t INTERVAL)
153 | RETURNS TABLE(
154 | r_wban TEXT,
155 | r_slice TIMESTAMP,
156 | min_temp DOUBLE PRECISION,
157 | max_temp DOUBLE PRECISION,
158 | avg_temp DOUBLE PRECISION
159 | ) AS $$
160 | -- bounded_series assigns all values into a time slice with a given interval length in slice_t:
161 | WITH bounded_series AS (
162 | SELECT wban,
163 | datetime,
164 | 'epoch'::timestamp + $4 * (extract(epoch from datetime)::int4 / EXTRACT(epoch FROM $4)::int4) AS slice,
165 | temperature
166 | FROM sample.weather_data w
167 | WHERE w.wban = $1
168 | ORDER BY wban, slice, datetime ASC
169 | ),
170 | -- dense_series uses generate_series to generate the intervals we expect in the data:
171 | dense_series AS (
172 | SELECT $1 as wban, slice
173 | FROM generate_series($2, $3, $4) s(slice)
174 | ORDER BY wban, slice
175 | ),
176 | -- filled_series now uses a WINDOW function for find the first / last not null
177 | -- value in a WINDOW and uses sample.linear_interpolate to interpolate the slices
178 | -- between both values.
179 | --
180 | -- Finally we have to GROUP BY the slice and wban and take the AVG, MIN and MAX
181 | -- value in the slice. You can also add more Operators there, it is just an
182 | -- example:
183 | filled_series AS (
184 | SELECT wban,
185 | slice,
186 | temperature,
187 | COALESCE(temperature, sample.linear_interpolate(slice,
188 | sample.last(datetime) over (lookback),
189 | sample.last(temperature) over (lookback),
190 | sample.last(datetime) over (lookforward),
191 | sample.last(temperature) over (lookforward))) interpolated
192 | FROM bounded_series
193 | RIGHT JOIN dense_series USING (wban, slice)
194 | WINDOW
195 | lookback AS (ORDER BY slice, datetime),
196 | lookforward AS (ORDER BY slice DESC, datetime DESC)
197 | ORDER BY slice, datetime)
198 | SELECT wban AS r_wban,
199 | slice AS r_slice,
200 | MIN(interpolated) as min_temp,
201 | MAX(interpolated) as max_temp,
202 | AVG(interpolated) as avg_temp
203 | FROM filled_series
204 | GROUP BY slice, wban
205 | ORDER BY wban, slice;
206 |
207 | $$ LANGUAGE SQL;
208 | ```
209 |
210 | With the function we can now interpolate the temperature for a given station with any interval:
211 |
212 | ```sql
213 | SELECT * FROM sample.interpolate_temperature('00102', '2015-03-23', '2015-03-30', '1 hour'::interval)
214 | ```
215 |
216 | And that's it!
217 |
218 | ## How to Reproduce this Experiment ##
219 |
220 | The was a highly interesting article on the [Machine Learning Reproducibility crisis] lately, which discussed the
221 | problem of reproducing the results of Machine Learning papers. It's something I also felt long time ago, that's
222 | why you will always be able to reproduce the examples I share in this blog.
223 |
224 | It's probably best to add a section on how to reproduce this article and use the example.
225 |
226 | ### Database ###
227 |
228 | #### Creating a User ####
229 |
230 | Create the user ``philipp`` for connecting to the databases:
231 |
232 | ```
233 | postgres=# CREATE USER philipp WITH PASSWORD 'test_pwd';
234 | CREATE ROLE
235 | ```
236 |
237 | Then we can create the test database ``sampledb`` and set the owner to ``philipp``:
238 |
239 | ```
240 | postgres=# CREATE DATABASE sampledb WITH OWNER philipp;
241 | ```
242 |
243 | #### Creating the Database ####
244 |
245 | There are two scripts to create the database in the following folder of the project:
246 |
247 | * [PostgresTimeseriesAnalysis/sql]
248 |
249 | To create the database execute the ``create_database.bat`` (Windows) or ``create_database.sh`` (Linux).
250 |
251 | Alternatively you can simply copy and paste [10_create_database.sql] and [20_sample_data.sql] into an editor of your choice and execute it.
252 |
253 | [PostgresTimeseriesAnalysis/sql]: https://github.com/bytefish/PostgresTimeseriesAnalysis/tree/master/PostgresTimeseriesAnalysis/sql
254 | [10_create_database.sql]: https://github.com/bytefish/PostgresTimeseriesAnalysis/blob/master/PostgresTimeseriesAnalysis/sql/sql/10_create_database.sql
255 | [20_sample_data.sql]: https://github.com/bytefish/PostgresTimeseriesAnalysis/blob/master/PostgresTimeseriesAnalysis/sql/sql/20_sample_data.sql
256 |
257 | #### Enable PostgreSQL Statistics ####
258 |
259 | Find out which ``postgresql.config`` is currently loaded:
260 |
261 | ```sql
262 | -- Show the currently used config file:
263 | SHOW config_file;
264 | ```
265 |
266 | The ``pg_stat_statements`` module must be configured in the ``postgresq.conf``:
267 |
268 | ```
269 | shared_preload_libraries='pg_stat_statements'
270 |
271 | pg_stat_statements.max = 10000
272 | pg_stat_statements.track = all
273 | ```
274 |
275 | Now we can load the ``pg_stat_statements`` and query the most recent queries:
276 |
277 | ```sql
278 | -- Load the pg_stat_statements:
279 | create extension pg_stat_statements;
280 |
281 | -- Show recent Query statistics:
282 | select *
283 | from pg_stat_statements
284 | order by queryid desc;
285 | ```
286 |
287 | #### Enable Parallel Queries ####
288 |
289 | Find out, which ``postgresql.config`` is currently loaded:
290 |
291 | ```sql
292 | -- Show the currently used config file:
293 | SHOW config_file;
294 | ```
295 |
296 | Then set the parameters ``max_worker_processes``and ``max_parallel_workers_per_gather``:
297 |
298 | ```
299 | max_worker_processes = 8 # (change requires restart)
300 | max_parallel_workers_per_gather = 4 # taken from max_worker_processes
301 | ```
302 |
303 | ### Dataset ###
304 |
305 | The dataset is the [Quality Controlled Local Climatological Data (QCLCD)] for 2014 and 2015. It contains hourly weather
306 | measurements for more than 1,600 US Weather Stations. It is a great dataset to learn about data processing and data
307 | visualization:
308 |
309 | > The Quality Controlled Local Climatological Data (QCLCD) consist of hourly, daily, and monthly summaries for approximately
310 | > 1,600 U.S. locations. Daily Summary forms are not available for all stations. Data are available beginning January 1, 2005
311 | > and continue to the present. Please note, there may be a 48-hour lag in the availability of the most recent data.
312 |
313 | The data is available as CSV files at:
314 |
315 | * [http://www.ncdc.noaa.gov/orders/qclcd/](http://www.ncdc.noaa.gov/orders/qclcd/)
316 |
317 | Download the file ``QCLCD201503.zip`` from:
318 |
319 | * [http://www.ncdc.noaa.gov/orders/qclcd/](http://www.ncdc.noaa.gov/orders/qclcd/)
320 |
321 | ### Application ###
322 |
323 | The application is a Java application, which can be started with an IDE of your choice:
324 |
325 | * [WeatherDataStreamingExample.java]
326 |
327 | You probably need to adjust the connection string to the database:
328 |
329 | ```java
330 | private static final String databaseUri = "jdbc:postgresql://127.0.0.1:5432/sampledb?user=philipp&password=test_pwd";
331 | ```
332 |
333 | And change the path to the CSV files, if the path differs:
334 |
335 | ```java
336 | final Path csvStationDataFilePath = FileSystems.getDefault().getPath("D:\\datasets\\201503station.txt");
337 | final Path csvLocalWeatherDataFilePath = FileSystems.getDefault().getPath("D:\\datasets\\201503hourly.txt");
338 | ```
339 |
340 | Once executed the application parses the CSV files and writes the data into the specified database.
341 |
342 | [WeatherDataStreamingExample.java]: https://github.com/bytefish/PostgresTimeseriesAnalysis/blob/master/PostgresTimeseriesAnalysis/src/main/java/app/WeatherDataStreamingExample.java
343 | [jOOQ]: https://www.jooq.org/
344 | [Using IGNORE NULLS With SQL Window Functions to Fill Gaps]: https://blog.jooq.org/2019/04/24/using-ignore-nulls-with-sql-window-functions-to-fill-gaps/
345 | [Time Series Analysis Part 3: Resampling and Interpolation]: https://content.pivotal.io/blog/time-series-analysis-part-3-resampling-and-interpolation
346 | [Machine Learning Reproducibility crisis]: https://towardsdatascience.com/why-git-and-git-lfs-is-not-enough-to-solve-the-machine-learning-reproducibility-crisis-f733b49e96e8
347 | [generate_series]: https://www.postgresql.org/docs/current/functions-srf.html
348 | [Linear Interpolation]: https://en.wikipedia.org/wiki/Linear_interpolation
349 | [Window Functions]: https://www.postgresql.org/docs/current/functions-window.html
350 | [Understanding Window Functions]: https://tapoueh.org/blog/2013/08/understanding-window-functions/
351 | [Dimitri Fontaine]: https://tapoueh.org
352 | [LAG]: https://docs.microsoft.com/en-us/sql/t-sql/functions/lag-transact-sql
353 | [Quality Controlled Local Climatological Data (QCLCD)]: https://www.ncdc.noaa.gov/data-access/land-based-station-data/land-based-datasets/quality-controlled-local-climatological-data-qclcd
354 |
355 | ## Additional Resources ##
356 |
357 | * http://tapoueh.org/blog/2013/08/20-Window-Functions
358 | * https://www.compose.com/articles/metrics-maven-window-functions-in-postgresql/
359 | * https://wiki.postgresql.org/images/a/a2/PostgreSQL_Window_Functions.pdf
360 | * http://blog.cleverelephant.ca/2016/03/parallel-postgis.html
361 |
362 | ### Resampling and Interpolation ###
363 |
364 | * https://content.pivotal.io/blog/time-series-analysis-part-3-resampling-and-interpolation
365 |
366 |
367 | [PostgreSQL]: https://www.postgresql.org
368 | [Quality Controlled Local Climatological Data (QCLCD)]: https://www.ncdc.noaa.gov/data-access/land-based-station-data/land-based-datasets/quality-controlled-local-climatological-data-qclcd
369 |
--------------------------------------------------------------------------------