├── LICENSE ├── PostgresTimeseriesAnalysis ├── .gitignore ├── pom.xml ├── scripts │ └── station_csv_to_sql.ps1 ├── sql │ ├── .gitignore │ ├── create_database.bat │ ├── create_database.sh │ └── sql │ │ ├── 10_create_database.sql │ │ └── 20_sample_data.sql └── src │ └── main │ ├── java │ ├── app │ │ └── WeatherDataStreamingExample.java │ ├── csv │ │ ├── converter │ │ │ └── LocalWeatherDataConverter.java │ │ ├── mapping │ │ │ ├── LocalWeatherDataMapper.java │ │ │ └── StationMapper.java │ │ ├── model │ │ │ ├── LocalWeatherData.java │ │ │ └── Station.java │ │ └── parser │ │ │ └── Parsers.java │ ├── model │ │ ├── GeoLocation.java │ │ ├── LocalWeatherData.java │ │ └── Station.java │ ├── pgsql │ │ ├── converter │ │ │ └── LocalWeatherDataConverter.java │ │ ├── mapping │ │ │ └── LocalWeatherDataMapping.java │ │ └── model │ │ │ └── LocalWeatherData.java │ └── utils │ │ ├── DateUtilities.java │ │ └── StringUtils.java │ └── resources │ ├── log4j.properties │ └── logback.xml └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) Philipp Wagner 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | target 3 | elasticutils-*-javadoc.jar 4 | elasticutils-*-sources.jar 5 | *.asc 6 | stderr.log 7 | stdout.log 8 | *.iml -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | de.bytefish 8 | pgsamle 9 | 1.0 10 | 11 | 12 | 13 | 14 | org.apache.maven.plugins 15 | maven-compiler-plugin 16 | 17 | 1.8 18 | 1.8 19 | 20 | 21 | 22 | 23 | 24 | 25 | UTF-8 26 | 27 | 28 | 29 | 30 | 31 | org.postgresql 32 | postgresql 33 | 42.2.5 34 | 35 | 36 | 37 | de.bytefish 38 | jtinycsvparser 39 | 1.2 40 | 41 | 42 | 43 | de.bytefish 44 | pgbulkinsert 45 | 3.3 46 | 47 | 48 | 49 | io.reactivex.rxjava2 50 | rxjava 51 | 2.2.8 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/scripts/station_csv_to_sql.ps1: -------------------------------------------------------------------------------- 1 | param ( 2 | [string]$in_filename, 3 | [string]$out_filename 4 | ) 5 | 6 | function GetSqlCompatibleValue([string] $val) 7 | { 8 | if([string]::IsNullOrWhiteSpace($val)) { 9 | return "NULL" 10 | } 11 | return $val.Replace("'", "''"); 12 | } 13 | 14 | 15 | 16 | Import-Csv $in_filename -Delimiter "|" | Foreach-Object{ 17 | 18 | $line = 19 | @" 20 | `nINSERT INTO sample.station(wban, name, state, location, latitude, longitude, ground_height, station_height, timeZone) 21 | SELECT '{0}', '{1}', '{2}', '{3}', {4}, {5}, {6}, {7}, {8} 22 | WHERE NOT EXISTS (SELECT 1 FROM sample.station WHERE wban='{0}'); 23 | "@ -f (GetSqlCompatibleValue $_.WBAN), (GetSqlCompatibleValue $_.Name), (GetSqlCompatibleValue $_.State), (GetSqlCompatibleValue $_.Location), (GetSqlCompatibleValue $_.Latitude), (GetSqlCompatibleValue $_.Longitude), (GetSqlCompatibleValue $_.GroundHeight), (GetSqlCompatibleValue $_.StationHeight), (GetSqlCompatibleValue $_.TimeZone) 24 | 25 | $line|Out-File $out_filename -Append 26 | 27 | } 28 | -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/sql/.gitignore: -------------------------------------------------------------------------------- 1 | *.log -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/sql/create_database.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | :: Copyright (c) Philipp Wagner. All rights reserved. 4 | :: Licensed under the MIT license. See LICENSE file in the project root for full license information. 5 | 6 | set PGSQL_EXECUTABLE="psql.exe" 7 | set CREATEDB_EXECUTABLE="createdb.exe" 8 | set STDOUT=stdout.log 9 | set STDERR=stderr.log 10 | set LOGFILE=query_output.log 11 | 12 | set HostName=localhost 13 | set PortNumber=5432 14 | set DatabaseName=sampledb 15 | set UserName=philipp 16 | set Password= 17 | 18 | call :AskQuestionWithYdefault "Use Host (%HostName%) Port (%PortNumber%) [Y,n]?" reply_ 19 | if /i [%reply_%] NEQ [y] ( 20 | set /p HostName="Enter HostName: " 21 | set /p PortNumber="Enter Port: " 22 | ) 23 | 24 | call :AskQuestionWithYdefault "Use Database (%DatabaseName%) [Y,n]?" reply_ 25 | if /i [%reply_%] NEQ [y] ( 26 | set /p ServerName="Enter Database: " 27 | ) 28 | 29 | call :AskQuestionWithYdefault "Use User (%UserName%) [Y,n]?" reply_ 30 | if /i [%reply_%] NEQ [y] ( 31 | set /p UserName="Enter User: " 32 | ) 33 | 34 | set /p PGPASSWORD="Password: " 35 | 36 | 1>%STDOUT% 2>%STDERR% ( 37 | %CREATEDB_EXECUTABLE% -h %HostName% -p %PortNumber% -U %UserName% -O %UserName% %DatabaseName% 38 | %PGSQL_EXECUTABLE% -h %HostName% -p %PortNumber% -d %DatabaseName% -U %UserName% < sql/10_create_database.sql -L %LOGFILE% 39 | %PGSQL_EXECUTABLE% -h %HostName% -p %PortNumber% -d %DatabaseName% -U %UserName% < sql/20_sample_data.sql -L %LOGFILE% 40 | ) 41 | 42 | goto :end 43 | 44 | :: The question as a subroutine 45 | :AskQuestionWithYdefault 46 | setlocal enableextensions 47 | :_asktheyquestionagain 48 | set return_= 49 | set ask_= 50 | set /p ask_="%~1" 51 | if "%ask_%"=="" set return_=y 52 | if /i "%ask_%"=="Y" set return_=y 53 | if /i "%ask_%"=="n" set return_=n 54 | if not defined return_ goto _asktheyquestionagain 55 | endlocal & set "%2=%return_%" & goto :EOF 56 | 57 | :end 58 | pause -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/sql/create_database.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Copyright (c) Philipp Wagner. All rights reserved. 4 | # Licensed under the MIT license. See LICENSE file in the project root for full license information. 5 | 6 | 7 | function ask_yes_or_no() { 8 | read -p "$1 ([y]es or [N]o): " 9 | case $(echo $REPLY | tr '[A-Z]' '[a-z]') in 10 | y|yes) echo "yes" ;; 11 | *) echo "no" ;; 12 | esac 13 | } 14 | 15 | STDOUT=stdout.log 16 | STDERR=stderr.log 17 | LOGFILE=query_output.log 18 | 19 | HostName=localhost 20 | PortNumber=5432 21 | DatabaseName=sampledb 22 | UserName=philipp 23 | 24 | if [[ "no" == $(ask_yes_or_no "Use Host ($HostName) Port ($PortNumber)") ]] 25 | then 26 | read -p "Enter HostName: " HostName 27 | read -p "Enter Port: " PortNumber 28 | fi 29 | 30 | if [[ "no" == $(ask_yes_or_no "Use Database ($DatabaseName)") ]] 31 | then 32 | read -p "Enter Database: " ServerName 33 | fi 34 | 35 | if [[ "no" == $(ask_yes_or_no "Use User ($UserName)") ]] 36 | then 37 | read -p "Enter User: " UserName 38 | fi 39 | 40 | read -p "Password: " PGPASSWORD 41 | 42 | # Database 43 | psql -h $HostName -p $PortNumber -d $DatabaseName -U $UserName < sql/10_create_database.sql -L $LOGFILE 1>$STDOUT 2>$STDERR 44 | 45 | # Sample Data 46 | psql -h $HostName -p $PortNumber -d $DatabaseName -U $UserName < sql/20_sample_data.sql -L $LOGFILE 1>>$STDOUT 2>>$STDERR -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/sql/sql/10_create_database.sql: -------------------------------------------------------------------------------- 1 | -- Copyright (c) Philipp Wagner. All rights reserved. 2 | -- Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 | 4 | DO $$ 5 | BEGIN 6 | ----------------------------- 7 | -- Schema 8 | ----------------------------- 9 | IF NOT EXISTS (SELECT 1 FROM information_schema.schemata WHERE schema_name = 'sample') THEN 10 | CREATE SCHEMA sample; 11 | END IF; 12 | 13 | ----------------------------- 14 | -- Tables 15 | ----------------------------- 16 | IF NOT EXISTS ( 17 | SELECT 1 18 | FROM information_schema.tables 19 | WHERE table_schema = 'sample' 20 | AND table_name = 'station' 21 | ) THEN 22 | 23 | CREATE TABLE sample.station 24 | ( 25 | station_id SERIAL PRIMARY KEY, 26 | wban TEXT NOT NULL, 27 | name TEXT NOT NULL, 28 | state TEXT, 29 | location TEXT, 30 | latitude REAL NOT NULL, 31 | longitude REAL NOT NULL, 32 | ground_height SMALLINT, 33 | station_height SMALLINT, 34 | TimeZone SMALLINT 35 | ); 36 | 37 | END IF; 38 | 39 | IF NOT EXISTS ( 40 | SELECT 1 41 | FROM information_schema.tables 42 | WHERE table_schema = 'sample' 43 | AND table_name = 'weather_data' 44 | ) THEN 45 | 46 | CREATE TABLE sample.weather_data 47 | ( 48 | wban TEXT, 49 | dateTime TIMESTAMP, 50 | temperature REAL, 51 | windSpeed REAL, 52 | stationPressure REAL, 53 | skyCondition TEXT 54 | ); 55 | 56 | END IF; 57 | 58 | ----------------------------- 59 | -- Indexes 60 | ----------------------------- 61 | IF NOT EXISTS (SELECT 1 FROM pg_constraint WHERE conname = 'uk_station_wban') THEN 62 | ALTER TABLE sample.station 63 | ADD CONSTRAINT uk_station_wban 64 | UNIQUE (wban); 65 | END IF; 66 | 67 | ----------------------------- 68 | -- Security 69 | ----------------------------- 70 | REVOKE ALL ON sample.station FROM public; 71 | REVOKE ALL ON sample.weather_data FROM public; 72 | 73 | END; 74 | $$; 75 | 76 | ----------------------------- 77 | -- Functions 78 | ----------------------------- 79 | CREATE OR REPLACE FUNCTION sample.first_agg ( anyelement, anyelement ) 80 | RETURNS anyelement LANGUAGE SQL IMMUTABLE STRICT AS $$ 81 | SELECT $1; 82 | $$; 83 | 84 | CREATE AGGREGATE sample.FIRST ( 85 | sfunc = sample.first_agg, 86 | basetype = anyelement, 87 | stype = anyelement 88 | ); 89 | 90 | CREATE OR REPLACE FUNCTION sample.last_agg ( anyelement, anyelement ) 91 | RETURNS anyelement LANGUAGE SQL IMMUTABLE STRICT AS $$ 92 | SELECT $2; 93 | $$; 94 | 95 | CREATE AGGREGATE sample.LAST ( 96 | sfunc = sample.last_agg, 97 | basetype = anyelement, 98 | stype = anyelement 99 | ); 100 | 101 | CREATE OR REPLACE FUNCTION sample.datediff_seconds(start_t TIMESTAMP, end_t TIMESTAMP) 102 | RETURNS DOUBLE PRECISION AS $$ 103 | SELECT EXTRACT(epoch FROM $2 - $1) 104 | $$ LANGUAGE SQL; 105 | 106 | CREATE OR REPLACE FUNCTION sample.timestamp_to_seconds(timestamp_t TIMESTAMP) 107 | RETURNS DOUBLE PRECISION AS $$ 108 | SELECT EXTRACT(epoch from timestamp_t) 109 | $$ LANGUAGE SQL; 110 | 111 | CREATE OR REPLACE FUNCTION sample.linear_interpolate(x_i DOUBLE PRECISION, 112 | x_0 DOUBLE PRECISION, 113 | y_0 DOUBLE PRECISION, 114 | x_1 DOUBLE PRECISION, 115 | y_1 DOUBLE PRECISION) 116 | RETURNS DOUBLE PRECISION AS $$ 117 | SELECT (($5 - $3) / ($4 - $2)) * ($1 - $2) + $3; 118 | $$ LANGUAGE SQL; 119 | 120 | CREATE OR REPLACE FUNCTION sample.linear_interpolate(x_i TIMESTAMP, x_0 TIMESTAMP, y_0 DOUBLE PRECISION, x_1 TIMESTAMP, y_1 DOUBLE PRECISION) 121 | RETURNS DOUBLE PRECISION AS $$ 122 | SELECT sample.linear_interpolate(sample.timestamp_to_seconds($1), 123 | sample.timestamp_to_seconds($2), 124 | $3, 125 | sample.timestamp_to_seconds($4), 126 | $5); 127 | $$ LANGUAGE SQL; 128 | 129 | 130 | CREATE OR REPLACE FUNCTION sample.interpolate_temperature(wban_p TEXT, start_t TIMESTAMP, end_t TIMESTAMP, slice_t INTERVAL) 131 | RETURNS TABLE( 132 | r_wban TEXT, 133 | r_slice TIMESTAMP, 134 | min_temp DOUBLE PRECISION, 135 | max_temp DOUBLE PRECISION, 136 | avg_temp DOUBLE PRECISION 137 | ) AS $$ 138 | -- bounded_series assigns all values into a time slice with a given interval length in slice_t: 139 | WITH bounded_series AS ( 140 | SELECT wban, 141 | datetime, 142 | 'epoch'::timestamp + $4 * (extract(epoch from datetime)::int4 / EXTRACT(epoch FROM $4)::int4) AS slice, 143 | temperature 144 | FROM sample.weather_data w 145 | WHERE w.wban = $1 146 | ORDER BY wban, slice, datetime ASC 147 | ), 148 | -- dense_series uses generate_series to generate the intervals we expect in the data: 149 | dense_series AS ( 150 | SELECT $1 as wban, slice 151 | FROM generate_series($2, $3, $4) s(slice) 152 | ORDER BY wban, slice 153 | ), 154 | -- filled_series now uses a WINDOW function for find the first / last not null 155 | -- value in a WINDOW and uses sample.linear_interpolate to interpolate the slices 156 | -- between both values. 157 | -- 158 | -- Finally we have to GROUP BY the slice and wban and take the AVG, MIN and MAX 159 | -- value in the slice. You can also add more Operators there, it is just an 160 | -- example: 161 | filled_series AS ( 162 | SELECT wban, 163 | slice, 164 | temperature, 165 | COALESCE(temperature, sample.linear_interpolate(slice, 166 | sample.last(datetime) over (lookback), 167 | sample.last(temperature) over (lookback), 168 | sample.last(datetime) over (lookforward), 169 | sample.last(temperature) over (lookforward))) interpolated 170 | FROM bounded_series 171 | RIGHT JOIN dense_series USING (wban, slice) 172 | WINDOW 173 | lookback AS (ORDER BY slice, datetime), 174 | lookforward AS (ORDER BY slice DESC, datetime DESC) 175 | ORDER BY slice, datetime) 176 | SELECT wban AS r_wban, 177 | slice AS r_slice, 178 | MIN(interpolated) as min_temp, 179 | MAX(interpolated) as max_temp, 180 | AVG(interpolated) as avg_temp 181 | FROM filled_series 182 | GROUP BY slice, wban 183 | ORDER BY wban, slice; 184 | 185 | $$ LANGUAGE SQL; -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/src/main/java/app/WeatherDataStreamingExample.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Philipp Wagner. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 | 4 | package app; 5 | 6 | import csv.model.LocalWeatherData; 7 | import csv.model.Station; 8 | import csv.parser.Parsers; 9 | import de.bytefish.jtinycsvparser.mapping.CsvMappingResult; 10 | import de.bytefish.pgbulkinsert.PgBulkInsert; 11 | import io.reactivex.Observable; 12 | import io.reactivex.disposables.Disposable; 13 | import org.postgresql.PGConnection; 14 | import pgsql.mapping.LocalWeatherDataMapping; 15 | 16 | import java.nio.charset.StandardCharsets; 17 | import java.nio.file.FileSystems; 18 | import java.nio.file.Path; 19 | import java.sql.Connection; 20 | import java.sql.DriverManager; 21 | import java.util.Map; 22 | import java.util.concurrent.TimeUnit; 23 | import java.util.stream.Collectors; 24 | import java.util.stream.Stream; 25 | 26 | public class WeatherDataStreamingExample { 27 | 28 | private static final String databaseUri = "jdbc:postgresql://127.0.0.1:5432/sampledb?user=philipp&password=test_pwd"; 29 | 30 | public static void main(String[] args) { 31 | 32 | // The PostgreSQL Bulk Writer: 33 | final PgBulkInsert writer = new PgBulkInsert<>(new LocalWeatherDataMapping("sample", "weather_data")); 34 | 35 | // Path to QCLCD CSV Files: 36 | final Path csvStationDataFilePath = FileSystems.getDefault().getPath("D:\\datasets\\201503station.txt"); 37 | final Path csvLocalWeatherDataFilePath = FileSystems.getDefault().getPath("D:\\datasets\\201503hourly.txt"); 38 | 39 | // A map between the WBAN and Station for faster Lookups: 40 | final Map stationMap = getStationMap(csvStationDataFilePath); 41 | 42 | try (Stream> csvStream = getLocalWeatherData(csvLocalWeatherDataFilePath)) { 43 | // Now turn the CSV Stream into the Postgres Stream: 44 | Stream localWeatherDataStream = csvStream 45 | // Filter only valid entries: 46 | .filter(x -> x.isValid()) 47 | // Now we can work on the Results: 48 | .map(x -> x.getResult()) 49 | // Take only measurements available in the list of stations: 50 | .filter(x -> stationMap.containsKey(x.getWban())) 51 | // Map into the general Analytics Model: 52 | .map(x -> { 53 | // Get the matching station now: 54 | csv.model.Station station = stationMap.get(x.getWban()); 55 | // And build the Model: 56 | return csv.converter.LocalWeatherDataConverter.convert(x, station); 57 | }) 58 | // Now build the PostgresSQL Model: 59 | .map(x -> pgsql.converter.LocalWeatherDataConverter.convert(x)); 60 | 61 | // Turn it into an Observable for simplified Buffering: 62 | Disposable disposable = Observable.fromIterable(localWeatherDataStream::iterator) 63 | // Wait two Seconds or Buffer up to 80000 entities: 64 | .buffer(2, TimeUnit.SECONDS,80000) 65 | // Subscribe to the Batches: 66 | .subscribe(x -> { 67 | // Connect to your Postgres Instance: 68 | try (Connection connection = DriverManager.getConnection(databaseUri)) { 69 | // Get the underlying PGConnection: 70 | PGConnection pgConnection = connection.unwrap(PGConnection.class); 71 | 72 | // And Bulk Write the Results: 73 | writer.saveAll(pgConnection, x); 74 | } 75 | }, x -> System.err.println(x)); 76 | 77 | // Probably not neccessary, but dispose anyway: 78 | if(disposable.isDisposed()) { 79 | disposable.dispose(); 80 | } 81 | } 82 | } 83 | 84 | private static Stream> getLocalWeatherData(Path path) { 85 | return Parsers.LocalWeatherDataParser().readFromFile(path, StandardCharsets.US_ASCII); 86 | } 87 | 88 | private static Stream getStations(Path path) { 89 | return Parsers.StationParser().readFromFile(path, StandardCharsets.US_ASCII) 90 | .filter(x -> x.isValid()) 91 | .map(x -> x.getResult()); 92 | } 93 | 94 | private static Map getStationMap(Path path) { 95 | try (Stream stationStream = getStations(path)) { 96 | return stationStream 97 | .collect(Collectors.groupingBy(x -> x.getWban())) 98 | .entrySet().stream() 99 | .map(x -> x.getValue().get(0)) 100 | .collect(Collectors.toMap(csv.model.Station::getWban, x -> x)); 101 | } 102 | } 103 | } -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/src/main/java/csv/converter/LocalWeatherDataConverter.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Philipp Wagner. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 | 4 | package csv.converter; 5 | 6 | 7 | import java.time.LocalDate; 8 | import java.time.LocalTime; 9 | 10 | public class LocalWeatherDataConverter { 11 | 12 | public static model.LocalWeatherData convert(csv.model.LocalWeatherData csvLocalWeatherData, csv.model.Station csvStation) { 13 | 14 | LocalDate date = csvLocalWeatherData.getDate(); 15 | LocalTime time = csvLocalWeatherData.getTime(); 16 | String skyCondition = csvLocalWeatherData.getSkyCondition(); 17 | Float stationPressure = csvLocalWeatherData.getStationPressure(); 18 | Float temperature = csvLocalWeatherData.getDryBulbCelsius(); 19 | Float windSpeed = csvLocalWeatherData.getWindSpeed(); 20 | 21 | // Convert the Station data: 22 | model.Station station = convert(csvStation); 23 | 24 | return new model.LocalWeatherData(station, date, time, temperature, windSpeed, stationPressure, skyCondition); 25 | } 26 | 27 | public static model.Station convert(csv.model.Station csvStation) { 28 | String wban = csvStation.getWban(); 29 | String name = csvStation.getName(); 30 | String state = csvStation.getState(); 31 | String location = csvStation.getLocation(); 32 | Integer timeZone = csvStation.getTimeZone(); 33 | model.GeoLocation geoLocation = new model.GeoLocation(csvStation.getLatitude(), csvStation.getLongitude()); 34 | 35 | return new model.Station(wban, name, state, location, timeZone, geoLocation); 36 | } 37 | 38 | } -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/src/main/java/csv/mapping/LocalWeatherDataMapper.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Philipp Wagner. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 | 4 | package csv.mapping; 5 | 6 | import csv.model.LocalWeatherData; 7 | import de.bytefish.jtinycsvparser.builder.IObjectCreator; 8 | import de.bytefish.jtinycsvparser.mapping.CsvMapping; 9 | import de.bytefish.jtinycsvparser.typeconverter.FloatConverter; 10 | import de.bytefish.jtinycsvparser.typeconverter.IgnoreMissingValuesConverter; 11 | import de.bytefish.jtinycsvparser.typeconverter.LocalDateConverter; 12 | import de.bytefish.jtinycsvparser.typeconverter.LocalTimeConverter; 13 | 14 | import java.time.LocalDate; 15 | import java.time.LocalTime; 16 | import java.time.format.DateTimeFormatter; 17 | 18 | public class LocalWeatherDataMapper extends CsvMapping 19 | { 20 | public LocalWeatherDataMapper(IObjectCreator creator) 21 | { 22 | super(creator); 23 | 24 | mapProperty(0, String.class, LocalWeatherData::setWban); 25 | mapProperty(1, LocalDate.class, LocalWeatherData::setDate, new LocalDateConverter(DateTimeFormatter.ofPattern("yyyyMMdd"))); 26 | mapProperty(2, LocalTime.class, LocalWeatherData::setTime, new LocalTimeConverter(DateTimeFormatter.ofPattern("HHmm"))); 27 | mapProperty(4, String.class, LocalWeatherData::setSkyCondition); 28 | mapProperty(12, Float.class, LocalWeatherData::setDryBulbCelsius, new IgnoreMissingValuesConverter<>(new FloatConverter(), "M")); 29 | mapProperty(24, Float.class, LocalWeatherData::setWindSpeed, new IgnoreMissingValuesConverter<>(new FloatConverter(), "M")); 30 | mapProperty(30, Float.class, LocalWeatherData::setStationPressure, new IgnoreMissingValuesConverter<>(new FloatConverter(), "M")); 31 | } 32 | } -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/src/main/java/csv/mapping/StationMapper.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Philipp Wagner. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 | 4 | package csv.mapping; 5 | 6 | import csv.model.Station; 7 | import de.bytefish.jtinycsvparser.builder.IObjectCreator; 8 | import de.bytefish.jtinycsvparser.mapping.CsvMapping; 9 | import de.bytefish.jtinycsvparser.typeconverter.IgnoreMissingValuesConverter; 10 | import de.bytefish.jtinycsvparser.typeconverter.IntegerConverter; 11 | 12 | public class StationMapper extends CsvMapping 13 | { 14 | public StationMapper(IObjectCreator creator) 15 | { 16 | super(creator); 17 | 18 | mapProperty(0, String.class, Station::setWban); 19 | mapProperty(1, String.class, Station::setWmo); 20 | mapProperty(2, String.class, Station::setCallSign); 21 | mapProperty(3, String.class, Station::setClimateDivisionCode); 22 | mapProperty(4, String.class, Station::setClimateDivisionStateCode); 23 | mapProperty(5, String.class, Station::setClimateDivisionStationCode); 24 | mapProperty(6, String.class, Station::setName); 25 | mapProperty(7, String.class, Station::setState); 26 | mapProperty(8, String.class, Station::setLocation); 27 | mapProperty(9, Float.class, Station::setLatitude); 28 | mapProperty(10, Float.class, Station::setLongitude); 29 | mapProperty(11, Integer.class, Station::setGroundHeight, new IgnoreMissingValuesConverter<>(new IntegerConverter())); 30 | mapProperty(12, Integer.class, Station::setStationHeight, new IgnoreMissingValuesConverter<>(new IntegerConverter())); 31 | mapProperty(13, Integer.class, Station::setBarometer, new IgnoreMissingValuesConverter<>(new IntegerConverter())); 32 | mapProperty(14, Integer.class, Station::setTimeZone); 33 | } 34 | } -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/src/main/java/csv/model/LocalWeatherData.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Philipp Wagner. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 | 4 | package csv.model; 5 | 6 | import java.time.LocalDate; 7 | import java.time.LocalTime; 8 | 9 | public class LocalWeatherData { 10 | 11 | private String wban; 12 | 13 | private LocalDate date; 14 | 15 | private LocalTime time; 16 | 17 | private String skyCondition; 18 | 19 | private Float dryBulbCelsius; 20 | 21 | private Float windSpeed; 22 | 23 | private Float stationPressure; 24 | 25 | public LocalWeatherData() { 26 | 27 | } 28 | 29 | public String getWban() { 30 | return wban; 31 | } 32 | 33 | public void setWban(String wban) { 34 | this.wban = wban; 35 | } 36 | 37 | public LocalDate getDate() { 38 | return date; 39 | } 40 | 41 | public void setDate(LocalDate date) { 42 | this.date = date; 43 | } 44 | 45 | public LocalTime getTime() { 46 | return time; 47 | } 48 | 49 | public void setTime(LocalTime time) { 50 | this.time = time; 51 | } 52 | 53 | public String getSkyCondition() { 54 | return skyCondition; 55 | } 56 | 57 | public void setSkyCondition(String skyCondition) { 58 | this.skyCondition = skyCondition; 59 | } 60 | 61 | public Float getDryBulbCelsius() { 62 | return dryBulbCelsius; 63 | } 64 | 65 | public void setDryBulbCelsius(Float dryBulbCelsius) { 66 | this.dryBulbCelsius = dryBulbCelsius; 67 | } 68 | 69 | public Float getWindSpeed() { 70 | return windSpeed; 71 | } 72 | 73 | public void setWindSpeed(Float windSpeed) { 74 | this.windSpeed = windSpeed; 75 | } 76 | 77 | public Float getStationPressure() { 78 | return stationPressure; 79 | } 80 | 81 | public void setStationPressure(Float stationPressure) { 82 | this.stationPressure = stationPressure; 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/src/main/java/csv/model/Station.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Philipp Wagner. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 | 4 | package csv.model; 5 | 6 | public class Station { 7 | 8 | private String wban; 9 | 10 | private String wmo; 11 | 12 | private String callSign; 13 | 14 | private String climateDivisionCode; 15 | 16 | private String climateDivisionStateCode; 17 | 18 | private String climateDivisionStationCode; 19 | 20 | private String name; 21 | 22 | private String state; 23 | 24 | private String location; 25 | 26 | private Float latitude; 27 | 28 | private Float longitude; 29 | 30 | private Integer groundHeight; 31 | 32 | private Integer stationHeight; 33 | 34 | private Integer barometer; 35 | 36 | private Integer timeZone; 37 | 38 | public Station() { 39 | } 40 | 41 | public String getWban() { 42 | return wban; 43 | } 44 | 45 | public void setWban(String wban) { 46 | this.wban = wban; 47 | } 48 | 49 | public String getWmo() { 50 | return wmo; 51 | } 52 | 53 | public void setWmo(String wmo) { 54 | this.wmo = wmo; 55 | } 56 | 57 | public String getCallSign() { 58 | return callSign; 59 | } 60 | 61 | public void setCallSign(String callSign) { 62 | this.callSign = callSign; 63 | } 64 | 65 | public String getClimateDivisionCode() { 66 | return climateDivisionCode; 67 | } 68 | 69 | public void setClimateDivisionCode(String climateDivisionCode) { 70 | this.climateDivisionCode = climateDivisionCode; 71 | } 72 | 73 | public String getClimateDivisionStateCode() { 74 | return climateDivisionStateCode; 75 | } 76 | 77 | public void setClimateDivisionStateCode(String climateDivisionStateCode) { 78 | this.climateDivisionStateCode = climateDivisionStateCode; 79 | } 80 | 81 | public String getClimateDivisionStationCode() { 82 | return climateDivisionStationCode; 83 | } 84 | 85 | public void setClimateDivisionStationCode(String climateDivisionStationCode) { 86 | this.climateDivisionStationCode = climateDivisionStationCode; 87 | } 88 | 89 | public String getName() { 90 | return name; 91 | } 92 | 93 | public void setName(String name) { 94 | this.name = name; 95 | } 96 | 97 | public String getState() { 98 | return state; 99 | } 100 | 101 | public void setState(String state) { 102 | this.state = state; 103 | } 104 | 105 | public String getLocation() { 106 | return location; 107 | } 108 | 109 | public void setLocation(String location) { 110 | this.location = location; 111 | } 112 | 113 | public float getLatitude() { 114 | return latitude; 115 | } 116 | 117 | public void setLatitude(Float latitude) { 118 | this.latitude = latitude; 119 | } 120 | 121 | public float getLongitude() { 122 | return longitude; 123 | } 124 | 125 | public void setLongitude(Float longitude) { 126 | this.longitude = longitude; 127 | } 128 | 129 | public Integer getGroundHeight() { 130 | return groundHeight; 131 | } 132 | 133 | public void setGroundHeight(Integer groundHeight) { 134 | this.groundHeight = groundHeight; 135 | } 136 | 137 | public Integer getStationHeight() { 138 | return stationHeight; 139 | } 140 | 141 | public void setStationHeight(Integer stationHeight) { 142 | this.stationHeight = stationHeight; 143 | } 144 | 145 | public Integer getBarometer() { 146 | return barometer; 147 | } 148 | 149 | public void setBarometer(Integer barometer) { 150 | this.barometer = barometer; 151 | } 152 | 153 | public Integer getTimeZone() { 154 | return timeZone; 155 | } 156 | 157 | public void setTimeZone(Integer timeZone) { 158 | this.timeZone = timeZone; 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/src/main/java/csv/parser/Parsers.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Philipp Wagner. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 | 4 | package csv.parser; 5 | 6 | import csv.mapping.LocalWeatherDataMapper; 7 | import csv.mapping.StationMapper; 8 | import csv.model.LocalWeatherData; 9 | import csv.model.Station; 10 | import de.bytefish.jtinycsvparser.CsvParser; 11 | import de.bytefish.jtinycsvparser.CsvParserOptions; 12 | import de.bytefish.jtinycsvparser.tokenizer.StringSplitTokenizer; 13 | 14 | public class Parsers { 15 | 16 | public static CsvParser StationParser() { 17 | 18 | return new CsvParser<>(new CsvParserOptions(true, new StringSplitTokenizer("\\|", true)), new StationMapper(() -> new Station())); 19 | } 20 | 21 | public static CsvParser LocalWeatherDataParser() 22 | { 23 | return new CsvParser<>(new CsvParserOptions(true, new StringSplitTokenizer(",", true)), new LocalWeatherDataMapper(() -> new LocalWeatherData())); 24 | } 25 | 26 | } 27 | -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/src/main/java/model/GeoLocation.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Philipp Wagner. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 | 4 | package model; 5 | 6 | public class GeoLocation { 7 | 8 | private double lat; 9 | 10 | private double lon; 11 | 12 | public GeoLocation() { 13 | 14 | } 15 | 16 | public GeoLocation(double lat, double lon) { 17 | this.lat = lat; 18 | this.lon = lon; 19 | } 20 | 21 | public void setLat(double lat) { 22 | this.lat = lat; 23 | } 24 | 25 | public void setLon(double lon) { 26 | this.lon = lon; 27 | } 28 | 29 | public double getLat() { 30 | return lat; 31 | } 32 | 33 | public double getLon() { 34 | return lon; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/src/main/java/model/LocalWeatherData.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Philipp Wagner. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 | 4 | package model; 5 | 6 | import java.time.LocalDate; 7 | import java.time.LocalTime; 8 | 9 | public class LocalWeatherData { 10 | 11 | private Station station; 12 | 13 | private LocalDate date; 14 | 15 | private LocalTime time; 16 | 17 | private Float temperature; 18 | 19 | private Float windSpeed; 20 | 21 | private Float stationPressure; 22 | 23 | private String skyCondition; 24 | 25 | public LocalWeatherData() { 26 | 27 | } 28 | 29 | public LocalWeatherData(Station station, LocalDate date, LocalTime time, Float temperature, Float windSpeed, Float stationPressure, String skyCondition) { 30 | this.station = station; 31 | this.date = date; 32 | this.time = time; 33 | this.temperature = temperature; 34 | this.windSpeed = windSpeed; 35 | this.stationPressure = stationPressure; 36 | this.skyCondition = skyCondition; 37 | } 38 | 39 | public void setStation(Station station) { 40 | this.station = station; 41 | } 42 | 43 | public void setDate(LocalDate date) { 44 | this.date = date; 45 | } 46 | 47 | public void setTime(LocalTime time) { 48 | this.time = time; 49 | } 50 | 51 | public void setTemperature(Float temperature) { 52 | this.temperature = temperature; 53 | } 54 | 55 | public void setWindSpeed(Float windSpeed) { 56 | this.windSpeed = windSpeed; 57 | } 58 | 59 | public void setStationPressure(Float stationPressure) { 60 | this.stationPressure = stationPressure; 61 | } 62 | 63 | public void setSkyCondition(String skyCondition) { 64 | this.skyCondition = skyCondition; 65 | } 66 | 67 | public Station getStation() { 68 | return station; 69 | } 70 | 71 | public LocalDate getDate() { 72 | return date; 73 | } 74 | 75 | public LocalTime getTime() { 76 | return time; 77 | } 78 | 79 | public Float getTemperature() { 80 | return temperature; 81 | } 82 | 83 | public Float getWindSpeed() { 84 | return windSpeed; 85 | } 86 | 87 | public Float getStationPressure() { 88 | return stationPressure; 89 | } 90 | 91 | public String getSkyCondition() { 92 | return skyCondition; 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/src/main/java/model/Station.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Philipp Wagner. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 | 4 | package model; 5 | 6 | public class Station { 7 | 8 | private String wban; 9 | 10 | private String name; 11 | 12 | private String state; 13 | 14 | private String location; 15 | 16 | private Integer timeZone; 17 | 18 | private GeoLocation geoLocation; 19 | 20 | public Station() { 21 | 22 | } 23 | 24 | public Station(String wban, String name, String state, String location, Integer timeZone, GeoLocation geoLocation) { 25 | this.wban = wban; 26 | this.name = name; 27 | this.state = state; 28 | this.location = location; 29 | this.timeZone = timeZone; 30 | this.geoLocation = geoLocation; 31 | } 32 | 33 | public void setWban(String wban) { 34 | this.wban = wban; 35 | } 36 | 37 | public void setName(String name) { 38 | this.name = name; 39 | } 40 | 41 | public void setState(String state) { 42 | this.state = state; 43 | } 44 | 45 | public void setLocation(String location) { 46 | this.location = location; 47 | } 48 | 49 | public void setTimeZone(Integer timeZone) { 50 | this.timeZone = timeZone; 51 | } 52 | 53 | public void setGeoLocation(GeoLocation geoLocation) { 54 | this.geoLocation = geoLocation; 55 | } 56 | 57 | public String getWban() { 58 | return wban; 59 | } 60 | 61 | public String getName() { 62 | return name; 63 | } 64 | 65 | public String getState() { 66 | return state; 67 | } 68 | 69 | public String getLocation() { 70 | return location; 71 | } 72 | 73 | public Integer getTimeZone() { 74 | return timeZone; 75 | } 76 | 77 | public GeoLocation getGeoLocation() { 78 | return geoLocation; 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/src/main/java/pgsql/converter/LocalWeatherDataConverter.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Philipp Wagner. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 | 4 | package pgsql.converter; 5 | 6 | import java.time.LocalDateTime; 7 | 8 | public class LocalWeatherDataConverter { 9 | 10 | public static pgsql.model.LocalWeatherData convert(model.LocalWeatherData modelLocalWeatherData) { 11 | 12 | String wban = modelLocalWeatherData.getStation().getWban(); 13 | LocalDateTime dateTime = modelLocalWeatherData.getDate().atTime(modelLocalWeatherData.getTime()); 14 | Float temperature = modelLocalWeatherData.getTemperature(); 15 | Float windSpeed = modelLocalWeatherData.getWindSpeed(); 16 | Float stationPressure = modelLocalWeatherData.getStationPressure(); 17 | String skyCondition = modelLocalWeatherData.getSkyCondition(); 18 | 19 | return new pgsql.model.LocalWeatherData(wban, dateTime, temperature, windSpeed, stationPressure, skyCondition); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/src/main/java/pgsql/mapping/LocalWeatherDataMapping.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Philipp Wagner. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 | 4 | package pgsql.mapping; 5 | 6 | import de.bytefish.pgbulkinsert.mapping.AbstractMapping; 7 | import pgsql.model.LocalWeatherData; 8 | 9 | public class LocalWeatherDataMapping extends AbstractMapping { 10 | 11 | public LocalWeatherDataMapping(String schemaName, String tableName) { 12 | 13 | super(schemaName, tableName); 14 | 15 | mapText("wban", pgsql.model.LocalWeatherData::getWban); 16 | mapTimeStamp("dateTime", pgsql.model.LocalWeatherData::getDateTime); 17 | mapFloat("temperature", pgsql.model.LocalWeatherData::getTemperature); 18 | mapFloat("windSpeed", pgsql.model.LocalWeatherData::getWindSpeed); 19 | mapFloat("stationPressure", pgsql.model.LocalWeatherData::getStationPressure); 20 | mapText("skyCondition", pgsql.model.LocalWeatherData::getSkyCondition); 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/src/main/java/pgsql/model/LocalWeatherData.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Philipp Wagner. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 | 4 | package pgsql.model; 5 | 6 | import java.time.LocalDateTime; 7 | 8 | public class LocalWeatherData { 9 | 10 | private String wban; 11 | 12 | private LocalDateTime dateTime; 13 | 14 | private Float temperature; 15 | 16 | private Float windSpeed; 17 | 18 | private Float stationPressure; 19 | 20 | private String skyCondition; 21 | 22 | public LocalWeatherData(String wban, LocalDateTime dateTime, Float temperature, Float windSpeed, Float stationPressure, String skyCondition) { 23 | this.wban = wban; 24 | this.dateTime = dateTime; 25 | this.temperature = temperature; 26 | this.windSpeed = windSpeed; 27 | this.stationPressure = stationPressure; 28 | this.skyCondition = skyCondition; 29 | } 30 | 31 | public String getWban() { 32 | return wban; 33 | } 34 | 35 | public LocalDateTime getDateTime() { 36 | return dateTime; 37 | } 38 | 39 | public Float getTemperature() { 40 | return temperature; 41 | } 42 | 43 | public Float getWindSpeed() { 44 | return windSpeed; 45 | } 46 | 47 | public Float getStationPressure() { 48 | return stationPressure; 49 | } 50 | 51 | public String getSkyCondition() { 52 | return skyCondition; 53 | } 54 | } -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/src/main/java/utils/DateUtilities.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Philipp Wagner. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 | 4 | package utils; 5 | 6 | import java.time.LocalDate; 7 | import java.time.LocalDateTime; 8 | import java.time.LocalTime; 9 | import java.time.ZoneOffset; 10 | import java.time.OffsetDateTime; 11 | import java.util.Date; 12 | 13 | public class DateUtilities { 14 | 15 | public static Date from(LocalDate localDate, LocalTime localTime, ZoneOffset zoneOffset) { 16 | LocalDateTime localDateTime = localDate.atTime(localTime); 17 | 18 | return from(localDateTime, zoneOffset); 19 | } 20 | 21 | public static Date from(LocalDateTime localDateTime, ZoneOffset zoneOffset) { 22 | OffsetDateTime offsetDateTime = localDateTime.atOffset(zoneOffset); 23 | 24 | return Date.from(offsetDateTime.toInstant()); 25 | } 26 | 27 | } -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/src/main/java/utils/StringUtils.java: -------------------------------------------------------------------------------- 1 | // Copyright (c) Philipp Wagner. All rights reserved. 2 | // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 | 4 | package utils; 5 | 6 | public class StringUtils { 7 | 8 | private StringUtils() {} 9 | 10 | public static String StringEmpty = ""; 11 | 12 | public static boolean isNullOrWhiteSpace(String input) { 13 | return input == null || input.equals(StringEmpty) || input.trim().length() == 0; 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger=WARN, console 20 | 21 | log4j.appender.console=org.apache.log4j.ConsoleAppender 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n -------------------------------------------------------------------------------- /PostgresTimeseriesAnalysis/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 18 | 19 | 20 | 21 | 22 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{60} %X{sourceThread} - %msg%n 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PostgresTimeseriesAnalysis # 2 | 3 | There was an interesting article by the [jOOQ] team on how to fill gaps in data using SQL: 4 | 5 | * [Using IGNORE NULLS With SQL Window Functions to Fill Gaps] 6 | 7 | I have been experimenting with using SQL for Linear Interpolation two years ago. It would be a waste 8 | to silo all this in a private repository, so I decided to share it. 9 | 10 | The code is heavily based on a great article by [Caleb Welton](https://github.com/cwelton): 11 | 12 | * [Time Series Analysis Part 3: Resampling and Interpolation] 13 | 14 | This article can also be found on my personal website: 15 | 16 | * [https://bytefish.de/blog/postgresql_interpolation/](https://bytefish.de/blog/postgresql_interpolation/) 17 | 18 | To reproduce the example, please see the section [How to Reproduce this Experiment](#how-to-reproduce-this-experiment). 19 | 20 | ## Dataset ## 21 | 22 | The dataset is the [Quality Controlled Local Climatological Data (QCLCD)] for 2014 and 2015. It contains hourly weather 23 | measurements for more than 1,600 US Weather Stations. It is a great dataset to learn about data processing and data 24 | visualization: 25 | 26 | > The Quality Controlled Local Climatological Data (QCLCD) consist of hourly, daily, and monthly summaries for approximately 27 | > 1,600 U.S. locations. Daily Summary forms are not available for all stations. Data are available beginning January 1, 2005 28 | > and continue to the present. Please note, there may be a 48-hour lag in the availability of the most recent data. 29 | 30 | The data is available as CSV files at: 31 | 32 | * [http://www.ncdc.noaa.gov/orders/qclcd/](http://www.ncdc.noaa.gov/orders/qclcd/) 33 | 34 | Download the file ``QCLCD201503.zip`` from: 35 | 36 | * [http://www.ncdc.noaa.gov/orders/qclcd/](http://www.ncdc.noaa.gov/orders/qclcd/) 37 | 38 | [Quality Controlled Local Climatological Data (QCLCD)]: 39 | 40 | ## Are there missing values? ## 41 | 42 | Devices might break. Networks can be down. Disks still run full in 2019. The sky is the limit, when it comes to invalid or missing measurements in data. 43 | 44 | The [Quality Controlled Local Climatological Data (QCLCD)] has hourly measurements of weather stations. So to find missing data we will look for gaps in data greater than 1 hour. 45 | 46 | How can we do this? [Window Functions]! 47 | 48 | [Window Functions] can be hard to grasp and I can't go into all details here. The best introduction to [Window Functions] was written by [Dimitri Fontaine] and I highly recommend reading it: 49 | 50 | * [Understanding Window Functions] ([Dimitri Fontaine]) 51 | 52 | To identify gaps we first need a way calculate the interval between two timestamps, so I will 53 | define a function ``datediff_seconds`` to calculate the length between two timestamp values: 54 | 55 | ```sql 56 | CREATE OR REPLACE FUNCTION sample.datediff_seconds(start_t TIMESTAMP, end_t TIMESTAMP) 57 | RETURNS DOUBLE PRECISION AS $$ 58 | SELECT EXTRACT(epoch FROM $2 - $1) 59 | $$ LANGUAGE SQL; 60 | ``` 61 | 62 | Now we can use the [LAG] operator to identity gaps larger 3600 seconds, which is an hour: 63 | 64 | ```sql 65 | SELECT * 66 | FROM (SELECT 67 | weather_data.wban as wban, 68 | weather_data.datetime as current_datetime, 69 | LAG(weather_data.datetime, 1, NULL) OVER (PARTITION BY weather_data.wban ORDER BY weather_data.datetime) AS previous_datetime 70 | FROM sample.weather_data) lag_select 71 | WHERE sample.datediff_seconds (previous_datetime, current_datetime) > 3600; 72 | ``` 73 | 74 | And we can see there are 17,043 affected rows, which is the number of gaps in the data: 75 | 76 | ``` 77 | Successfully run. Total query runtime: 33 secs 590 msec. 78 | 17043 rows affected. 79 | ``` 80 | 81 | ## Linear Interpolation with SQL ## 82 | 83 | First of all we write a function to do a [Linear Interpolation] between two points: 84 | 85 | ```sql 86 | CREATE OR REPLACE FUNCTION sample.linear_interpolate(x_i DOUBLE PRECISION, 87 | x_0 DOUBLE PRECISION, 88 | y_0 DOUBLE PRECISION, 89 | x_1 DOUBLE PRECISION, 90 | y_1 DOUBLE PRECISION) 91 | RETURNS DOUBLE PRECISION AS $$ 92 | SELECT (($5 - $3) / ($4 - $2)) * ($1 - $2) + $3; 93 | $$ LANGUAGE SQL; 94 | ``` 95 | 96 | We are working with the ``TIMESTAMP`` datatype, so in order to put it into the ``linear_interpolate`` function, 97 | we need to transform the ``TIMESTAMP`` into its representation of seconds since epoch: 98 | 99 | ```sql 100 | CREATE OR REPLACE FUNCTION sample.timestamp_to_seconds(timestamp_t TIMESTAMP) 101 | RETURNS DOUBLE PRECISION AS $$ 102 | SELECT EXTRACT(epoch from timestamp_t) 103 | $$ LANGUAGE SQL; 104 | ``` 105 | 106 | This makes it possible to write an overload, that takes the timestamps and returns the interpolated value of a given timestamp ``x_i``: 107 | 108 | ```sql 109 | CREATE OR REPLACE FUNCTION sample.linear_interpolate(x_i TIMESTAMP, x_0 TIMESTAMP, y_0 DOUBLE PRECISION, x_1 TIMESTAMP, y_1 DOUBLE PRECISION) 110 | RETURNS DOUBLE PRECISION AS $$ 111 | SELECT sample.linear_interpolate(sample.timestamp_to_seconds($1), 112 | sample.timestamp_to_seconds($2), 113 | $3, 114 | sample.timestamp_to_seconds($4), 115 | $5); 116 | $$ LANGUAGE SQL; 117 | ``` 118 | 119 | And that's it? 120 | 121 | ## Linear Interpolation of the QCLCD Weather Data ## 122 | 123 | As a final example I want to show how to use the functions to interpolate the sample weather data, which had 17,043 missing measurements. 124 | 125 | The idea is quite simple: First of all we will put all measurements into a time slice of a given interval length. So we know, that we have 126 | a value for the expected point in time. We will then build a dense series using the [generate_series] method with the given ``slice_t`` interval, which has all the slices we expect. 127 | 128 | The ``bounded_series`` and ``dense_series`` will then be joined, which means: The joined series will have ``NULL`` for the measurements, which indicates the slice has to be interpolated. A custom function will be used to identify the first and last non-null value of a window, so we get the two points for the ``linear_interpolate`` function. 129 | 130 | To make this work we need to ignore ``NULL`` values, just like in the [jOOQ] article. The PostgreSQL wiki has a great article on it, which shows how to implement such a function with a PostgreSQL ``AGGREGATE``: 131 | 132 | * [https://wiki.postgresql.org/wiki/First/last_(aggregate)](https://wiki.postgresql.org/wiki/First/last_(aggregate)) 133 | 134 | I simply copy and paste it: 135 | 136 | ```sql 137 | CREATE OR REPLACE FUNCTION sample.last_agg ( anyelement, anyelement ) 138 | RETURNS anyelement LANGUAGE SQL IMMUTABLE STRICT AS $$ 139 | SELECT $2; 140 | $$; 141 | 142 | CREATE AGGREGATE sample.LAST ( 143 | sfunc = sample.last_agg, 144 | basetype = anyelement, 145 | stype = anyelement 146 | ); 147 | ``` 148 | 149 | And finally we can write the function to interpolate the measurements: 150 | 151 | ```sql 152 | CREATE OR REPLACE FUNCTION sample.interpolate_temperature(wban_p TEXT, start_t TIMESTAMP, end_t TIMESTAMP, slice_t INTERVAL) 153 | RETURNS TABLE( 154 | r_wban TEXT, 155 | r_slice TIMESTAMP, 156 | min_temp DOUBLE PRECISION, 157 | max_temp DOUBLE PRECISION, 158 | avg_temp DOUBLE PRECISION 159 | ) AS $$ 160 | -- bounded_series assigns all values into a time slice with a given interval length in slice_t: 161 | WITH bounded_series AS ( 162 | SELECT wban, 163 | datetime, 164 | 'epoch'::timestamp + $4 * (extract(epoch from datetime)::int4 / EXTRACT(epoch FROM $4)::int4) AS slice, 165 | temperature 166 | FROM sample.weather_data w 167 | WHERE w.wban = $1 168 | ORDER BY wban, slice, datetime ASC 169 | ), 170 | -- dense_series uses generate_series to generate the intervals we expect in the data: 171 | dense_series AS ( 172 | SELECT $1 as wban, slice 173 | FROM generate_series($2, $3, $4) s(slice) 174 | ORDER BY wban, slice 175 | ), 176 | -- filled_series now uses a WINDOW function for find the first / last not null 177 | -- value in a WINDOW and uses sample.linear_interpolate to interpolate the slices 178 | -- between both values. 179 | -- 180 | -- Finally we have to GROUP BY the slice and wban and take the AVG, MIN and MAX 181 | -- value in the slice. You can also add more Operators there, it is just an 182 | -- example: 183 | filled_series AS ( 184 | SELECT wban, 185 | slice, 186 | temperature, 187 | COALESCE(temperature, sample.linear_interpolate(slice, 188 | sample.last(datetime) over (lookback), 189 | sample.last(temperature) over (lookback), 190 | sample.last(datetime) over (lookforward), 191 | sample.last(temperature) over (lookforward))) interpolated 192 | FROM bounded_series 193 | RIGHT JOIN dense_series USING (wban, slice) 194 | WINDOW 195 | lookback AS (ORDER BY slice, datetime), 196 | lookforward AS (ORDER BY slice DESC, datetime DESC) 197 | ORDER BY slice, datetime) 198 | SELECT wban AS r_wban, 199 | slice AS r_slice, 200 | MIN(interpolated) as min_temp, 201 | MAX(interpolated) as max_temp, 202 | AVG(interpolated) as avg_temp 203 | FROM filled_series 204 | GROUP BY slice, wban 205 | ORDER BY wban, slice; 206 | 207 | $$ LANGUAGE SQL; 208 | ``` 209 | 210 | With the function we can now interpolate the temperature for a given station with any interval: 211 | 212 | ```sql 213 | SELECT * FROM sample.interpolate_temperature('00102', '2015-03-23', '2015-03-30', '1 hour'::interval) 214 | ``` 215 | 216 | And that's it! 217 | 218 | ## How to Reproduce this Experiment ## 219 | 220 | The was a highly interesting article on the [Machine Learning Reproducibility crisis] lately, which discussed the 221 | problem of reproducing the results of Machine Learning papers. It's something I also felt long time ago, that's 222 | why you will always be able to reproduce the examples I share in this blog. 223 | 224 | It's probably best to add a section on how to reproduce this article and use the example. 225 | 226 | ### Database ### 227 | 228 | #### Creating a User #### 229 | 230 | Create the user ``philipp`` for connecting to the databases: 231 | 232 | ``` 233 | postgres=# CREATE USER philipp WITH PASSWORD 'test_pwd'; 234 | CREATE ROLE 235 | ``` 236 | 237 | Then we can create the test database ``sampledb`` and set the owner to ``philipp``: 238 | 239 | ``` 240 | postgres=# CREATE DATABASE sampledb WITH OWNER philipp; 241 | ``` 242 | 243 | #### Creating the Database #### 244 | 245 | There are two scripts to create the database in the following folder of the project: 246 | 247 | * [PostgresTimeseriesAnalysis/sql] 248 | 249 | To create the database execute the ``create_database.bat`` (Windows) or ``create_database.sh`` (Linux). 250 | 251 | Alternatively you can simply copy and paste [10_create_database.sql] and [20_sample_data.sql] into an editor of your choice and execute it. 252 | 253 | [PostgresTimeseriesAnalysis/sql]: https://github.com/bytefish/PostgresTimeseriesAnalysis/tree/master/PostgresTimeseriesAnalysis/sql 254 | [10_create_database.sql]: https://github.com/bytefish/PostgresTimeseriesAnalysis/blob/master/PostgresTimeseriesAnalysis/sql/sql/10_create_database.sql 255 | [20_sample_data.sql]: https://github.com/bytefish/PostgresTimeseriesAnalysis/blob/master/PostgresTimeseriesAnalysis/sql/sql/20_sample_data.sql 256 | 257 | #### Enable PostgreSQL Statistics #### 258 | 259 | Find out which ``postgresql.config`` is currently loaded: 260 | 261 | ```sql 262 | -- Show the currently used config file: 263 | SHOW config_file; 264 | ``` 265 | 266 | The ``pg_stat_statements`` module must be configured in the ``postgresq.conf``: 267 | 268 | ``` 269 | shared_preload_libraries='pg_stat_statements' 270 | 271 | pg_stat_statements.max = 10000 272 | pg_stat_statements.track = all 273 | ``` 274 | 275 | Now we can load the ``pg_stat_statements`` and query the most recent queries: 276 | 277 | ```sql 278 | -- Load the pg_stat_statements: 279 | create extension pg_stat_statements; 280 | 281 | -- Show recent Query statistics: 282 | select * 283 | from pg_stat_statements 284 | order by queryid desc; 285 | ``` 286 | 287 | #### Enable Parallel Queries #### 288 | 289 | Find out, which ``postgresql.config`` is currently loaded: 290 | 291 | ```sql 292 | -- Show the currently used config file: 293 | SHOW config_file; 294 | ``` 295 | 296 | Then set the parameters ``max_worker_processes``and ``max_parallel_workers_per_gather``: 297 | 298 | ``` 299 | max_worker_processes = 8 # (change requires restart) 300 | max_parallel_workers_per_gather = 4 # taken from max_worker_processes 301 | ``` 302 | 303 | ### Dataset ### 304 | 305 | The dataset is the [Quality Controlled Local Climatological Data (QCLCD)] for 2014 and 2015. It contains hourly weather 306 | measurements for more than 1,600 US Weather Stations. It is a great dataset to learn about data processing and data 307 | visualization: 308 | 309 | > The Quality Controlled Local Climatological Data (QCLCD) consist of hourly, daily, and monthly summaries for approximately 310 | > 1,600 U.S. locations. Daily Summary forms are not available for all stations. Data are available beginning January 1, 2005 311 | > and continue to the present. Please note, there may be a 48-hour lag in the availability of the most recent data. 312 | 313 | The data is available as CSV files at: 314 | 315 | * [http://www.ncdc.noaa.gov/orders/qclcd/](http://www.ncdc.noaa.gov/orders/qclcd/) 316 | 317 | Download the file ``QCLCD201503.zip`` from: 318 | 319 | * [http://www.ncdc.noaa.gov/orders/qclcd/](http://www.ncdc.noaa.gov/orders/qclcd/) 320 | 321 | ### Application ### 322 | 323 | The application is a Java application, which can be started with an IDE of your choice: 324 | 325 | * [WeatherDataStreamingExample.java] 326 | 327 | You probably need to adjust the connection string to the database: 328 | 329 | ```java 330 | private static final String databaseUri = "jdbc:postgresql://127.0.0.1:5432/sampledb?user=philipp&password=test_pwd"; 331 | ``` 332 | 333 | And change the path to the CSV files, if the path differs: 334 | 335 | ```java 336 | final Path csvStationDataFilePath = FileSystems.getDefault().getPath("D:\\datasets\\201503station.txt"); 337 | final Path csvLocalWeatherDataFilePath = FileSystems.getDefault().getPath("D:\\datasets\\201503hourly.txt"); 338 | ``` 339 | 340 | Once executed the application parses the CSV files and writes the data into the specified database. 341 | 342 | [WeatherDataStreamingExample.java]: https://github.com/bytefish/PostgresTimeseriesAnalysis/blob/master/PostgresTimeseriesAnalysis/src/main/java/app/WeatherDataStreamingExample.java 343 | [jOOQ]: https://www.jooq.org/ 344 | [Using IGNORE NULLS With SQL Window Functions to Fill Gaps]: https://blog.jooq.org/2019/04/24/using-ignore-nulls-with-sql-window-functions-to-fill-gaps/ 345 | [Time Series Analysis Part 3: Resampling and Interpolation]: https://content.pivotal.io/blog/time-series-analysis-part-3-resampling-and-interpolation 346 | [Machine Learning Reproducibility crisis]: https://towardsdatascience.com/why-git-and-git-lfs-is-not-enough-to-solve-the-machine-learning-reproducibility-crisis-f733b49e96e8 347 | [generate_series]: https://www.postgresql.org/docs/current/functions-srf.html 348 | [Linear Interpolation]: https://en.wikipedia.org/wiki/Linear_interpolation 349 | [Window Functions]: https://www.postgresql.org/docs/current/functions-window.html 350 | [Understanding Window Functions]: https://tapoueh.org/blog/2013/08/understanding-window-functions/ 351 | [Dimitri Fontaine]: https://tapoueh.org 352 | [LAG]: https://docs.microsoft.com/en-us/sql/t-sql/functions/lag-transact-sql 353 | [Quality Controlled Local Climatological Data (QCLCD)]: https://www.ncdc.noaa.gov/data-access/land-based-station-data/land-based-datasets/quality-controlled-local-climatological-data-qclcd 354 | 355 | ## Additional Resources ## 356 | 357 | * http://tapoueh.org/blog/2013/08/20-Window-Functions 358 | * https://www.compose.com/articles/metrics-maven-window-functions-in-postgresql/ 359 | * https://wiki.postgresql.org/images/a/a2/PostgreSQL_Window_Functions.pdf 360 | * http://blog.cleverelephant.ca/2016/03/parallel-postgis.html 361 | 362 | ### Resampling and Interpolation ### 363 | 364 | * https://content.pivotal.io/blog/time-series-analysis-part-3-resampling-and-interpolation 365 | 366 | 367 | [PostgreSQL]: https://www.postgresql.org 368 | [Quality Controlled Local Climatological Data (QCLCD)]: https://www.ncdc.noaa.gov/data-access/land-based-station-data/land-based-datasets/quality-controlled-local-climatological-data-qclcd 369 | --------------------------------------------------------------------------------