├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── LICENSE.txt ├── README.md ├── bench ├── config_java.yml ├── config_jruby.yml ├── config_jruby_formatter.yml ├── config_jruby_parser.yml ├── config_nano.yml └── gen_dummy.rb ├── build.gradle ├── config └── checkstyle │ └── checkstyle.xml ├── example ├── bracket_notation.txt ├── bracket_notation.yml ├── empty.yml ├── example.yml ├── from_double.csv ├── from_double.txt ├── from_double.yml ├── from_long.csv ├── from_long.txt ├── from_long.yml ├── from_string.csv ├── from_string.txt ├── from_string.yml ├── from_string_auto_java.txt ├── from_string_auto_java.yml ├── from_string_java.txt ├── from_string_java.yml ├── from_timestamp.csv ├── from_timestamp.txt ├── from_timestamp.yml ├── nested.jsonl ├── nested.txt ├── nested.yml ├── timezone.csv ├── timezone.txt └── timezone.yml ├── gradle ├── dependency-locks │ └── embulkPluginRuntime.lockfile └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── settings.gradle └── src ├── main └── java │ └── org │ └── embulk │ └── filter │ └── timestamp_format │ ├── ColumnCaster.java │ ├── ColumnVisitorImpl.java │ ├── JsonCaster.java │ ├── JsonPathUtil.java │ ├── JsonVisitor.java │ ├── TimestampFormatConverter.java │ ├── TimestampFormatFilterPlugin.java │ ├── TimestampFormatter.java │ ├── TimestampParser.java │ ├── TimestampUnit.java │ ├── TimestampUnitDeserializer.java │ └── cast │ ├── DoubleCast.java │ ├── LongCast.java │ ├── StringCast.java │ └── TimestampCast.java └── test └── java └── org └── embulk └── filter └── timestamp_format ├── TestTimestampFormatConverter.java ├── TestTimestampParser.java └── TestTimestampUnit.java /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | /pkg/ 3 | /tmp/ 4 | *.gemspec 5 | .gradle/ 6 | /classpath/ 7 | build/ 8 | .idea 9 | .tags 10 | .ruby-version 11 | *.iml 12 | dummy.csv 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | jdk: 3 | - oraclejdk8 4 | script: 5 | - ./gradlew test 6 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 0.4.0 (2021-06-10) 2 | 3 | Enhancements: 4 | 5 | * Build it with the "org.embulk.embulk-plugins" Gradle plugin 6 | [ 7 | # 0.3.3 (2019-02-19) 8 | 9 | Fixes: 10 | 11 | * Avoid symlinks to support installing on Windows (thanks to hiroyuki-sato) 12 | 13 | # 0.3.2 (2019-01-11) 14 | 15 | Enhancements: 16 | 17 | * Support Embulk 0.9.x (thanks to @sakama) 18 | 19 | # 0.3.1 (2017-08-26) 20 | 21 | Enhancements: 22 | 23 | * Use old, but non-deprecated TimestampParser API to support embulk < 0.8.29 24 | 25 | # 0.3.0 (2017-08-23) 26 | 27 | Changes: 28 | 29 | * Support new TimestampFormatter and TimestampParser API of embulk >= 0.8.29 30 | * Note that this plugin now requires embulk >= 0.8.29 31 | 32 | # 0.2.5 (2017-07-11) 33 | 34 | Enhancements: 35 | 36 | * Leverage new faster jruby timestamp parser introduced in embulk 0.8.27. 37 | 38 | # 0.2.4 (2016-11-06) 39 | 40 | Enhancements: 41 | 42 | * Support jsonpath bracket notation 43 | 44 | # 0.2.3 (2016-10-25) 45 | 46 | Fixes: 47 | 48 | * Fix the case of top-level array such as `$.array_timestamp[*]` 49 | 50 | # 0.2.2 (2016-10-07) 51 | 52 | yanked 53 | 54 | # 0.2.1 (2016-05-19) 55 | 56 | Enhancements: 57 | 58 | * Support JSONPath array wildcard 59 | 60 | # 0.2.0 (2016-05-13) 61 | 62 | Enhancements: 63 | 64 | * Add `timestamp_format: auto_java` option (experimental) 65 | 66 | # 0.1.9 (2016-05-10) 67 | 68 | Enhancements: 69 | 70 | * Support nano second resolution for Java formatter 71 | 72 | # 0.1.8 (2016-05-10) 73 | 74 | Enhancements: 75 | 76 | * Support nano second resolution for Java parser 77 | 78 | # 0.1.7 (2016-05-09) 79 | 80 | Enhancements: 81 | 82 | * Use Joda-Time DateTimeFormat instead of SimpleDateFormat for Java timestamp parser/formatter 83 | * to be thread-safe 84 | * to fix ss.SSS resolves 1.1 as 1.001 seconds wrongly 85 | 86 | # 0.1.6 (2016-05-01) 87 | 88 | Enhancements: 89 | 90 | * Support unixtimestamp unit such as milli sec, micro sec, nano sec 91 | * Support Java timestamp parser/formatter (SimpleDateFormat) 92 | 93 | # 0.1.5 (2016-04-29) 94 | 95 | Enhancements: 96 | 97 | * Support to cast from/into timestamp 98 | * Support to cast into long/double (unixtimesatmp) 99 | 100 | # 0.1.4 (2016-04-26) 101 | 102 | Enhancements: 103 | 104 | * Performance Improvement by avoiding unnecessary visiting 105 | 106 | # 0.1.3 (2016-04-26) 107 | 108 | Fixes: 109 | 110 | * Fix to see all from_format 111 | 112 | # 0.1.2 (2016-04-26) 113 | 114 | Changes: 115 | 116 | * Relax ConfigException 117 | 118 | # 0.1.1 (2016-04-26) 119 | 120 | Enhancements: 121 | 122 | * Check whether specified columns exist 123 | 124 | # 0.1.0 (2016-04-26) 125 | 126 | initial version 127 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | MIT License 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining 5 | a copy of this software and associated documentation files (the 6 | "Software"), to deal in the Software without restriction, including 7 | without limitation the rights to use, copy, modify, merge, publish, 8 | distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so, subject to 10 | the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 19 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Timestamp format filter plugin for Embulk 2 | 3 | [![Build Status](https://secure.travis-ci.org/sonots/embulk-filter-timestamp_format.png?branch=master)](http://travis-ci.org/sonots/embulk-filter-timestamp_format) 4 | 5 | A filter plugin for Embulk to change timestamp format 6 | 7 | ## Configuration 8 | 9 | - **columns**: columns to retain (array of hash) 10 | - **name**: name of column (required) 11 | - **type**: type to cast, choose one of `string`, `timestamp`, `long` (unixtimestamp), `double` (unixtimestamp) (string, default is `string`) 12 | - **from_format**: specify the format of the input string (array of strings, default is default_from_timestamp_format) 13 | - **from_timezone**: specify the timezone of the input string (string, default is default_from_timezone) 14 | - **to_format**: specify the format of the output string (string, default is default_to_timestamp_format) 15 | - **to_timezone**: specify the timezone of the output string (string, default is default_to_timezone) 16 | - **from_unit**: specify the time unit of the input unixtimestamp (string, default is default_from_timestamp_unit) 17 | - **to_unit**: specify the time unit of the output unixtimestamp (string, default is default_to_timestamp_unit) 18 | - **default_from_timestamp_format**: default timestamp format for the input string (array of strings, default is `["%Y-%m-%d %H:%M:%S.%N %z"]`) 19 | - **default_from_timezone**: default timezone for the input string (string, default is `UTC`) 20 | - **default_to_timestamp_format**: default timestamp format for the output string (string, default is `%Y-%m-%d %H:%M:%S.%N %z`) 21 | - **default_to_timezone**: default timezone for the output string (string, default is `UTC`) 22 | - **default_from_timestamp_unit**: default time unit such as `sec` (for second), `ms` (for milli second), `us` (for micro second), `ns` (for nano second) for the input unixtimestamp (string, default is `second`) 23 | - **default_to_timestamp_unit**: default time unit such as `sec` (for second), `ms` (for milli second), `us` (for micro second), `ns` (for nano second) for the output unixtimestamp (string, default is `second`) 24 | - **stop_on_invalid_record**: stop bulk load transaction if a invalid record is found (boolean, default is `false`) 25 | - **timestamp_parser** (experimental): set `auto_java` to try to convert ruby format to java format to use faster java timestamp parser (string, default is `auto`) 26 | 27 | ## Example 28 | 29 | Say example.jsonl is as follows (this is a typical format on exporting a BigQuery table): 30 | 31 | ``` 32 | {"timestamp":"2015-07-12 15:00:00 UTC","nested":{"timestamp":"2015-07-12 15:00:00 UTC"}} 33 | {"timestamp":"2015-07-12 15:00:00.1 UTC","nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"}} 34 | ``` 35 | 36 | ```yaml 37 | in: 38 | type: file 39 | path_prefix: example/example.jsonl 40 | parser: 41 | type: jsonl # not json parser 42 | columns: 43 | - {name: timestamp, type: string} 44 | - {name: nested, type: json} 45 | filters: 46 | - type: timestamp_format 47 | default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"] 48 | default_to_timezone: "Asia/Tokyo" 49 | default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N" 50 | columns: 51 | - {name: timestamp, type: long, to_unit: ms} 52 | - {name: $.nested.timestamp} 53 | out: 54 | type: stdout 55 | ``` 56 | 57 | Output will be as: 58 | 59 | ``` 60 | {"timestamp":1436713200000,"nested":{"timestamp":"2015-07-13 00:00:00.0}} 61 | {"timestamp":1436713200100,"nested":{"timestamp":"2015-07-13 00:00:00.1}} 62 | ``` 63 | 64 | See [./example](./example) for more examples. 65 | 66 | ## JSONPath 67 | 68 | For `type: json` column, you can specify [JSONPath](http://goessner.net/articles/JsonPath/) for column's name as: 69 | 70 | ``` 71 | name: $.payload.key1 72 | name: "$.payload.array[0]" 73 | name: "$.payload.array[*]" 74 | name: $['payload']['key1.key2'] 75 | ``` 76 | 77 | Following operators of JSONPath are not supported: 78 | 79 | * Multiple properties such as `['name','name']` 80 | * Multiple array indexes such as `[1,2]` 81 | * Array slice such as `[1:2]` 82 | * Filter expression such as `[?()]` 83 | 84 | ## JRuby Timestamp Parser Performance Issue 85 | 86 | **NEWS: (2017/07/10) embulk 0.8.27 is released with a fast Timestamp jruby parser. This issue should be resolved, so Java Timestamp parser support will be dropped in future releases.** 87 | 88 | Embulk's timestamp parser originally uses jruby implementation, but it is slow. 89 | To improve performance, this plugin also supports Java's Joda-Time [DateTimeFormat](http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) format as: 90 | 91 | ```yaml 92 | in: 93 | type: file 94 | path_prefix: example/example.jsonl 95 | parser: 96 | type: jsonl 97 | columns: 98 | - {name: timestamp, type: string} 99 | - {name: nested, type: json} 100 | filters: 101 | - type: timestamp_format 102 | default_from_timestamp_format: ["yyyy-MM-dd HH:mm:ss.SSS z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"] 103 | default_to_timezone: "Asia/Taipei" 104 | default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.SSS Z" 105 | columns: 106 | - {name: timestamp, type: long, to_unit: ms} 107 | - {name: $.nested.timestamp} 108 | out: 109 | type: stdout 110 | ``` 111 | 112 | If format strings contain `%`, jruby parser/formatter is used. Otherwirse, java parser/formatter is used 113 | 114 | **Automatic Conversion of Ruby Timestamp Format to Java Timestamp Format** (experimental) 115 | 116 | If you configure `timestamp_parser: auto_java`, this plugin tries to convert ruby format into java format automatically to use faster java timestamp parser. 117 | 118 | **COMPARISON:** 119 | 120 | Benchmark test sets are available at [./bench](./bench). In my environment (Mac Book Pro), for 1000000 timestamps: 121 | 122 | * java parser + java formatter: 1.3s 123 | * java parser + jruby formatter: 1.4s 124 | * jruby parser + java formatter: 64.52s 125 | * jruby parser + jruby formatter: 65.06s 126 | 127 | JRuby parser is slow, but JRuby formatter is not so slow. 128 | 129 | ## Nano Resolution 130 | 131 | JRuby parser has micro second resolution. Java (Joda-Time) parser has milli second resolution. 132 | 133 | Nano second resolution is partially supported by this plugin itself. Use parser format `nnnnnnnnn` for Java parser as 134 | 135 | ``` 136 | yyyy-MM-dd HH:mm:ss.nnnnnnnnn z 137 | ``` 138 | 139 | This plugin finds places of nano second from texts with regular expression `\.(\d+)`. 140 | 141 | For formatter, you can use `nnnnnnnnn` for nano and `nnnnnn` for micro as 142 | 143 | ``` 144 | yyyy-MM-dd HH:mm:ss.nnnnnnnnn z 145 | yyyy-MM-dd HH:mm:ss.nnnnnn z 146 | ``` 147 | 148 | FYI: Java8's DateTimeFormatter supports nano second resolution, but we can not use it because embulk supports Java7. 149 | 150 | ## ToDo 151 | 152 | * Write test 153 | 154 | ## Development 155 | 156 | Run example: 157 | 158 | ``` 159 | $ ./gradlew classpath 160 | $ embulk preview -I lib example/example.yml 161 | ``` 162 | 163 | Run test: 164 | 165 | ``` 166 | $ ./gradlew test 167 | ``` 168 | 169 | Run checkstyle: 170 | 171 | ``` 172 | $ ./gradlew check 173 | ``` 174 | 175 | Release gem: 176 | 177 | ``` 178 | $ ./gradlew gemPush 179 | ``` 180 | -------------------------------------------------------------------------------- /bench/config_java.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: bench/dummy 4 | parser: 5 | type: csv 6 | columns: 7 | - {name: timestamp, type: string} 8 | filters: 9 | - type: timestamp_format 10 | stop_on_invalid_record: true 11 | columns: 12 | - {name: timestamp, from_format: ["yyyy-MM-dd HH:mm:ss.SSSSSSSSS"], to_format: "yyyy-MM-dd"} 13 | out: 14 | type: "null" 15 | -------------------------------------------------------------------------------- /bench/config_jruby.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: bench/dummy 4 | parser: 5 | type: csv 6 | columns: 7 | - {name: timestamp, type: string} 8 | filters: 9 | - type: timestamp_format 10 | stop_on_invalid_record: true 11 | columns: 12 | - {name: timestamp, from_format: ["%Y-%m-%d %H:%M:%S.%N"], to_format: "%Y-%m-%d"} 13 | 14 | out: 15 | type: "null" 16 | -------------------------------------------------------------------------------- /bench/config_jruby_formatter.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: bench/dummy 4 | parser: 5 | type: csv 6 | columns: 7 | - {name: timestamp, type: string} 8 | filters: 9 | - type: timestamp_format 10 | stop_on_invalid_record: true 11 | columns: 12 | - {name: timestamp, from_format: ["yyyy-MM-dd HH:mm:ss.SSSSSSSSS"], to_format: "%Y-%m-%d"} 13 | out: 14 | type: "null" 15 | -------------------------------------------------------------------------------- /bench/config_jruby_parser.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: bench/dummy 4 | parser: 5 | type: csv 6 | columns: 7 | - {name: timestamp, type: string} 8 | filters: 9 | - type: timestamp_format 10 | stop_on_invalid_record: true 11 | columns: 12 | - {name: timestamp, from_format: ["%Y-%m-%d %H:%M:%S.%N"], to_format: "yyyy-MM-dd"} 13 | out: 14 | type: "null" 15 | -------------------------------------------------------------------------------- /bench/config_nano.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: bench/dummy 4 | parser: 5 | type: csv 6 | columns: 7 | - {name: timestamp, type: string} 8 | filters: 9 | - type: timestamp_format 10 | stop_on_invalid_record: true 11 | columns: 12 | - {name: timestamp, from_format: ["yyyy-MM-dd HH:mm:ss.nnnnnnnnn"], to_format: "yyyy-MM-dd HH:mm:ss.nnnnnn"} 13 | out: 14 | type: "null" 15 | -------------------------------------------------------------------------------- /bench/gen_dummy.rb: -------------------------------------------------------------------------------- 1 | File::open('bench/dummy.csv', 'w') { |f| 2 | (1..1000000).each { 3 | f.puts(Time.now.strftime('%Y-%m-%d %H:%M:%S.%9N')) 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id "java" 3 | id "checkstyle" 4 | id "maven-publish" 5 | id "org.embulk.embulk-plugins" version "0.4.2" 6 | } 7 | 8 | repositories { 9 | mavenCentral() 10 | jcenter() 11 | } 12 | 13 | group = "io.github.sonots" 14 | version = "0.4.0" 15 | description = "A filter plugin for Embulk to change timestamp format." 16 | 17 | sourceCompatibility = 1.8 18 | targetCompatibility = 1.8 19 | 20 | dependencies { 21 | compileOnly "org.embulk:embulk-core:0.9.23" 22 | compile("io.github.medjed:JsonPathCompiler:0.1.2") { 23 | exclude group: "org.apache.commons", module: "commons-lang3" 24 | exclude group: "org.slf4j", module: "slf4j-api" 25 | } 26 | 27 | testCompile "org.embulk:embulk-core:0.9.23:tests" 28 | testCompile "org.embulk:embulk-standards:0.9.23" 29 | // TODO: Remove them. 30 | // These `testCompile` are a tentative workaround. It will be covered in Embulk core's testing mechanism. 31 | testCompile "org.embulk:embulk-deps-buffer:0.9.23" 32 | testCompile "org.embulk:embulk-deps-config:0.9.23" 33 | testCompile "junit:junit:4.+" 34 | } 35 | 36 | checkstyle { 37 | toolVersion = '6.7' 38 | } 39 | 40 | embulkPlugin { 41 | mainClass = "org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin" 42 | category = "filter" 43 | type = "timestamp_format" 44 | } 45 | 46 | // TODO: enable when the pure-java plugin publish. 47 | //publishing { 48 | // publications { 49 | // embulkPluginMaven(MavenPublication) { // Publish it with "publishEmbulkPluginMavenPublicationToMavenRepository". 50 | // from components.java // Must be "components.java". The dependency modification works only for it. 51 | // } 52 | // } 53 | // repositories { 54 | // maven { 55 | // url = "${project.buildDir}/mavenPublishLocal" 56 | // } 57 | // } 58 | //} 59 | 60 | gem { 61 | from("LICENSE.txt") 62 | authors = [ "Naotoshi Seo" ] 63 | email = [ "sonots@gmail.com" ] 64 | summary = "A filter plugin for Embulk to change timestamp format" 65 | homepage = "https://github.com/sonots/embulk-filter-timestamp_format" 66 | licenses = [ "MIT" ] 67 | } 68 | 69 | gemPush { 70 | host = "https://rubygems.org" 71 | } 72 | -------------------------------------------------------------------------------- /config/checkstyle/checkstyle.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /example/bracket_notation.txt: -------------------------------------------------------------------------------- 1 | 2017-08-23 17:24:55.119 +0900: Embulk v0.8.30 2 | 2017-08-23 17:24:59.552 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path 3 | 2017-08-23 17:24:59.617 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'nested.jsonl' 4 | 2017-08-23 17:24:59.618 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped. 5 | 2017-08-23 17:24:59.625 +0900 [INFO] (0001:preview): Loading files [example/nested.jsonl] 6 | 2017-08-23 17:24:59.636 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source 7 | +-----------------------------------------------------------------------------------------------------------------------------------------------------------+ 8 | | record:json | 9 | +-----------------------------------------------------------------------------------------------------------------------------------------------------------+ 10 | | {"ignore_nested":{"timestamp":"2015-07-12 15:00:00 UTC"},"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.000000000"}]},"timestamp":1436713200000} | 11 | | {"ignore_nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"},"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.100000000"}]},"timestamp":1436713200100} | 12 | +-----------------------------------------------------------------------------------------------------------------------------------------------------------+ 13 | -------------------------------------------------------------------------------- /example/bracket_notation.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: example/nested.jsonl 4 | parser: 5 | type: json 6 | filters: 7 | - type: timestamp_format 8 | default_to_timezone: "Asia/Tokyo" 9 | default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N" 10 | columns: 11 | - {name: "$['record']['timestamp']", type: long, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_unit: ms} 12 | - {name: "$['record']['nested']['nested'][0]['timestamp']", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]} 13 | out: 14 | type: "null" 15 | -------------------------------------------------------------------------------- /example/empty.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: example/example.jsonl 4 | parser: 5 | type: json 6 | filters: 7 | - type: timestamp_format 8 | out: 9 | type: "null" 10 | -------------------------------------------------------------------------------- /example/example.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: example/from_string.csv 4 | parser: 5 | type: csv 6 | columns: 7 | - {name: string1, type: string} 8 | - {name: string2, type: string} 9 | - {name: string3, type: string} 10 | - {name: string4, type: string} 11 | - {name: record, type: json} 12 | filters: 13 | - type: timestamp_format 14 | default_from_timezone: "Asia/Taipei" 15 | default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %z", "%Y-%m-%d"] 16 | default_to_timezone: "Asia/Taipei" 17 | default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N" 18 | columns: 19 | - {name: string1, type: string} 20 | - {name: string2, type: long, to_unit: ms} 21 | - {name: string3, type: double, to_unit: ms} 22 | - {name: string4, type: timestamp} 23 | - {name: $.record.string1, to_timezone: "Asia/Taipei", to_format: "%Y-%m-%d %H:%M:%S.%N"} 24 | - {name: $.record.string2, type: long, to_unit: ms} 25 | - {name: $.record.string3, type: double, to_unit: ms} 26 | out: 27 | type: "null" 28 | -------------------------------------------------------------------------------- /example/from_double.csv: -------------------------------------------------------------------------------- 1 | 1436713200100.2,1436713200100.2,1436713200100.2,1436713200100.2,"{""double1"":1436713200100.2,""double2"":1436713200100.2,""double3"":1436713200100.2}" 2 | -------------------------------------------------------------------------------- /example/from_double.txt: -------------------------------------------------------------------------------- 1 | 2017-08-23 17:25:14.951 +0900: Embulk v0.8.30 2 | 2017-08-23 17:25:19.079 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path 3 | 2017-08-23 17:25:19.132 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_double.csv' 4 | 2017-08-23 17:25:19.133 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped. 5 | 2017-08-23 17:25:19.138 +0900 [INFO] (0001:preview): Loading files [example/from_double.csv] 6 | 2017-08-23 17:25:19.151 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source 7 | +-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+ 8 | | double1:string | double2:long | double3:double | double4:timestamp | record:json | 9 | +-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+ 10 | | 2015-07-13 00:00:00.100199936 | 1,436,713,200 | 1.4367132001002E9 | 2015-07-12 15:00:00.100199936 UTC | {"double2":1436713200,"double3":1.4367132001002E9,"double1":"2015-07-13 00:00:00.100199936"} | 11 | +-------------------------------+---------------+-------------------+-----------------------------------+----------------------------------------------------------------------------------------------+ 12 | -------------------------------------------------------------------------------- /example/from_double.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: example/from_double.csv 4 | parser: 5 | type: csv 6 | columns: 7 | - {name: double1, type: double} 8 | - {name: double2, type: double} 9 | - {name: double3, type: double} 10 | - {name: double4, type: double} 11 | - {name: record, type: json} 12 | filters: 13 | - type: timestamp_format 14 | default_from_timestamp_unit: ms 15 | columns: 16 | - {name: double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"} 17 | - {name: double2, type: long} 18 | - {name: double3, type: double} 19 | - {name: double4, type: timestamp} 20 | - {name: $.record.double1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"} 21 | - {name: $.record.double2, type: long} 22 | - {name: $.record.double3, type: double} 23 | out: 24 | type: "null" 25 | -------------------------------------------------------------------------------- /example/from_long.csv: -------------------------------------------------------------------------------- 1 | 1436713200100,1436713200100,1436713200100,1436713200100,"{""long1"":1436713200100,""long2"":1436713200100,""long3"":1436713200100}" 2 | -------------------------------------------------------------------------------- /example/from_long.txt: -------------------------------------------------------------------------------- 1 | 2017-08-23 17:25:28.989 +0900: Embulk v0.8.30 2 | 2017-08-23 17:25:33.716 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path 3 | 2017-08-23 17:25:33.758 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_long.csv' 4 | 2017-08-23 17:25:33.760 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped. 5 | 2017-08-23 17:25:33.767 +0900 [INFO] (0001:preview): Loading files [example/from_long.csv] 6 | 2017-08-23 17:25:33.780 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source 7 | +-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+ 8 | | long1:string | long2:long | long3:double | long4:timestamp | record:json | 9 | +-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+ 10 | | 2015-07-13 00:00:00.100000000 | 1,436,713,200 | 1.4367132E9 | 2015-07-12 15:00:00.100 UTC | {"long3":1.4367132E9,"long2":1436713200,"long1":"2015-07-13 00:00:00.100000000"} | 11 | +-------------------------------+---------------+--------------+-----------------------------+----------------------------------------------------------------------------------+ 12 | -------------------------------------------------------------------------------- /example/from_long.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: example/from_long.csv 4 | parser: 5 | type: csv 6 | columns: 7 | - {name: long1, type: long} 8 | - {name: long2, type: long} 9 | - {name: long3, type: long} 10 | - {name: long4, type: long} 11 | - {name: record, type: json} 12 | filters: 13 | - type: timestamp_format 14 | default_from_timestamp_unit: ms 15 | columns: 16 | - {name: long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"} 17 | - {name: long2, type: long} 18 | - {name: long3, type: double} 19 | - {name: long4, type: timestamp} 20 | - {name: $.record.long1, to_timezone: "Asia/Tokyo", to_format: "%Y-%m-%d %H:%M:%S.%N"} 21 | - {name: $.record.long2, type: long} 22 | - {name: $.record.long3, type: double} 23 | out: 24 | type: "null" 25 | -------------------------------------------------------------------------------- /example/from_string.csv: -------------------------------------------------------------------------------- 1 | 2015-07-13,2015-07-13,2015-07-13,2015-07-13,"{""string1"":""2015-07-13"" ,""string2"":""2015-07-13"" ,""string3"":""2015-07-13"" }" 2 | 2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC,2015-07-13 UTC,"{""string1"":""2015-07-13 UTC"" ,""string2"":""2015-07-13 UTC"" ,""string3"":""2015-07-13 UTC"" }" 3 | 2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,2015-07-13 00:00:00,"{""string1"":""2015-07-13 00:00:00"" ,""string2"":""2015-07-13 00:00:00"" ,""string3"":""2015-07-13 00:00:00"" }" 4 | 2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,2015-07-12 16:00:00 UTC,"{""string1"":""2015-07-12 16:00:00 UTC"" ,""string2"":""2015-07-12 16:00:00 UTC"" ,""string3"":""2015-07-12 16:00:00 UTC"" }" 5 | 2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,2015-07-12 16:00:00.1 UTC,"{""string1"":""2015-07-12 16:00:00.1 UTC"" ,""string2"":""2015-07-12 16:00:00.1 UTC"" ,""string3"":""2015-07-12 16:00:00.1 UTC"" }" 6 | 2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,2015-07-12 16:00:00.12 UTC,"{""string1"":""2015-07-12 16:00:00.12 UTC"" ,""string2"":""2015-07-12 16:00:00.12 UTC"" ,""string3"":""2015-07-12 16:00:00.12 UTC"" }" 7 | 2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,2015-07-12 16:00:00.123 UTC,"{""string1"":""2015-07-12 16:00:00.123 UTC"" ,""string2"":""2015-07-12 16:00:00.123 UTC"" ,""string3"":""2015-07-12 16:00:00.123 UTC"" }" 8 | 2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,2015-07-12 16:00:00.1234 UTC,"{""string1"":""2015-07-12 16:00:00.1234 UTC"" ,""string2"":""2015-07-12 16:00:00.1234 UTC"" ,""string3"":""2015-07-12 16:00:00.1234 UTC"" }" 9 | 2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,2015-07-12 16:00:00.12345 UTC,"{""string1"":""2015-07-12 16:00:00.12345 UTC"" ,""string2"":""2015-07-12 16:00:00.12345 UTC"" ,""string3"":""2015-07-12 16:00:00.12345 UTC"" }" 10 | 2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,2015-07-12 16:00:00.123456 UTC,"{""string1"":""2015-07-12 16:00:00.123456 UTC"" ,""string2"":""2015-07-12 16:00:00.123456 UTC"" ,""string3"":""2015-07-12 16:00:00.123456 UTC"" }" 11 | 2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,2015-07-12 16:00:00.1234567 UTC,"{""string1"":""2015-07-12 16:00:00.1234567 UTC"" ,""string2"":""2015-07-12 16:00:00.1234567 UTC"" ,""string3"":""2015-07-12 16:00:00.1234567 UTC"" }" 12 | 2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,2015-07-12 16:00:00.12345678 UTC,"{""string1"":""2015-07-12 16:00:00.12345678 UTC"" ,""string2"":""2015-07-12 16:00:00.12345678 UTC"" ,""string3"":""2015-07-12 16:00:00.12345678 UTC"" }" 13 | 2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,2015-07-12 16:00:00.123456789 UTC,"{""string1"":""2015-07-12 16:00:00.123456789 UTC"",""string2"":""2015-07-12 16:00:00.123456789 UTC"",""string3"":""2015-07-12 16:00:00.123456789 UTC""}" 14 | 15 | -------------------------------------------------------------------------------- /example/from_string.txt: -------------------------------------------------------------------------------- 1 | 2017-08-23 17:25:45.974 +0900: Embulk v0.8.30 2 | 2017-08-23 17:25:50.111 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path 3 | 2017-08-23 17:25:50.154 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv' 4 | 2017-08-23 17:25:50.155 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped. 5 | 2017-08-23 17:25:50.160 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv] 6 | 2017-08-23 17:25:50.172 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source 7 | +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+ 8 | | string1:string | string2:long | string3:double | string4:timestamp | record:json | 9 | +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+ 10 | | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} | 11 | | 2015-07-13 08:00:00.000000000 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000000"} | 12 | | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} | 13 | | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} | 14 | | 2015-07-13 00:00:00.100000000 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000000"} | 15 | | 2015-07-13 00:00:00.120000000 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000000"} | 16 | | 2015-07-13 00:00:00.123000000 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000000"} | 17 | | 2015-07-13 00:00:00.123400000 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400000"} | 18 | | 2015-07-13 00:00:00.123450000 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450000"} | 19 | | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} | 20 | | 2015-07-13 00:00:00.123456700 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456700 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456700"} | 21 | | 2015-07-13 00:00:00.123456780 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456780 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456780"} | 22 | | 2015-07-13 00:00:00.123456789 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456789 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456789"} | 23 | +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+ 24 | -------------------------------------------------------------------------------- /example/from_string.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: example/from_string.csv 4 | parser: 5 | type: csv 6 | columns: 7 | - {name: string1, type: string} 8 | - {name: string2, type: string} 9 | - {name: string3, type: string} 10 | - {name: string4, type: string} 11 | - {name: record, type: json} 12 | filters: 13 | - type: timestamp_format 14 | default_from_timezone: "Asia/Taipei" 15 | default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z", "%Y-%m-%d %H:%M:%S", "%Y-%m-%d %z", "%Y-%m-%d"] 16 | default_to_timezone: "Asia/Taipei" 17 | default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N" 18 | columns: 19 | - {name: string1, type: string} 20 | - {name: string2, type: long, to_unit: ms} 21 | - {name: string3, type: double, to_unit: ms} 22 | - {name: string4, type: timestamp} 23 | - {name: $.record.string1, to_timezone: "Asia/Taipei", to_format: "%Y-%m-%d %H:%M:%S.%N"} 24 | - {name: $.record.string2, type: long, to_unit: ms} 25 | - {name: $.record.string3, type: double, to_unit: ms} 26 | out: 27 | type: "null" 28 | -------------------------------------------------------------------------------- /example/from_string_auto_java.txt: -------------------------------------------------------------------------------- 1 | 2017-08-23 17:26:34.305 +0900: Embulk v0.8.30 2 | 2017-08-23 17:26:38.614 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path 3 | 2017-08-23 17:26:38.702 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv' 4 | 2017-08-23 17:26:38.704 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped. 5 | 2017-08-23 17:26:38.711 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv] 6 | 2017-08-23 17:26:38.728 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source 7 | +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+ 8 | | string1:string | string2:long | string3:double | string4:timestamp | record:json | 9 | +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+ 10 | | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} | 11 | | 2015-07-13 08:00:00.000000000 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000000"} | 12 | | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} | 13 | | 2015-07-13 00:00:00.000000000 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000000"} | 14 | | 2015-07-13 00:00:00.100000000 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000000"} | 15 | | 2015-07-13 00:00:00.120000000 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000000"} | 16 | | 2015-07-13 00:00:00.123000000 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000000"} | 17 | | 2015-07-13 00:00:00.123400000 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400000"} | 18 | | 2015-07-13 00:00:00.123450000 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450000"} | 19 | | 2015-07-13 00:00:00.123456000 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456000"} | 20 | | 2015-07-13 00:00:00.123456700 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456700 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456700"} | 21 | | 2015-07-13 00:00:00.123456780 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456780 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456780"} | 22 | | 2015-07-13 00:00:00.123456789 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456789 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456789"} | 23 | +-------------------------------+-------------------+-----------------------+-----------------------------------+-----------------------------------------------------------------------------------------------------+ 24 | -------------------------------------------------------------------------------- /example/from_string_auto_java.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: example/from_string.csv 4 | parser: 5 | type: csv 6 | columns: 7 | - {name: string1, type: string} 8 | - {name: string2, type: string} 9 | - {name: string3, type: string} 10 | - {name: string4, type: string} 11 | - {name: record, type: json} 12 | filters: 13 | - type: timestamp_format 14 | default_from_timezone: "Asia/Taipei" 15 | default_from_timestamp_format: ["%Y-%m-%d", "%Y-%m-%d %Z", "%Y-%m-%d %H:%M:%S.%N %Z", "%Y-%m-%d %H:%M:%S %Z", "%Y-%m-%d %H:%M:%S"] 16 | default_to_timezone: "Asia/Taipei" 17 | default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N" 18 | timestamp_parser: auto_java 19 | columns: 20 | - {name: string1} 21 | - {name: string2, type: long, to_unit: ms} 22 | - {name: string3, type: double, to_unit: ms} 23 | - {name: string4, type: timestamp} 24 | - {name: $.record.string1, to_timezone: "Asia/Taipei", to_format: "%Y-%m-%d %H:%M:%S.%N"} 25 | - {name: $.record.string2, type: long, to_unit: ms} 26 | - {name: $.record.string3, type: double, to_unit: ms} 27 | out: 28 | type: "null" 29 | -------------------------------------------------------------------------------- /example/from_string_java.txt: -------------------------------------------------------------------------------- 1 | 2017-08-23 17:26:56.132 +0900: Embulk v0.8.30 2 | 2017-08-23 17:27:00.403 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path 3 | 2017-08-23 17:27:00.454 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_string.csv' 4 | 2017-08-23 17:27:00.455 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped. 5 | 2017-08-23 17:27:00.460 +0900 [INFO] (0001:preview): Loading files [example/from_string.csv] 6 | 2017-08-23 17:27:00.474 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source 7 | +----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+ 8 | | string1:string | string2:long | string3:double | string4:timestamp | record:json | 9 | +----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+ 10 | | 2015-07-13 00:00:00.000000 +0800 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000 +0800"} | 11 | | 2015-07-13 08:00:00.000000 +0800 | 1,436,745,600,000 | 1.4367456E12 | 2015-07-13 00:00:00 UTC | {"string3":1.4367456E12,"string2":1436745600000,"string1":"2015-07-13 08:00:00.000000 +0800"} | 12 | | 2015-07-13 00:00:00.000000 +0800 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000 +0800"} | 13 | | 2015-07-13 00:00:00.000000 +0800 | 1,436,716,800,000 | 1.4367168E12 | 2015-07-12 16:00:00 UTC | {"string3":1.4367168E12,"string2":1436716800000,"string1":"2015-07-13 00:00:00.000000 +0800"} | 14 | | 2015-07-13 00:00:00.100000 +0800 | 1,436,716,800,100 | 1.4367168001E12 | 2015-07-12 16:00:00.100 UTC | {"string3":1.4367168001E12,"string2":1436716800100,"string1":"2015-07-13 00:00:00.100000 +0800"} | 15 | | 2015-07-13 00:00:00.120000 +0800 | 1,436,716,800,120 | 1.43671680012E12 | 2015-07-12 16:00:00.120 UTC | {"string3":1.43671680012E12,"string2":1436716800120,"string1":"2015-07-13 00:00:00.120000 +0800"} | 16 | | 2015-07-13 00:00:00.123000 +0800 | 1,436,716,800,123 | 1.436716800123E12 | 2015-07-12 16:00:00.123 UTC | {"string3":1.436716800123E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123000 +0800"} | 17 | | 2015-07-13 00:00:00.123400 +0800 | 1,436,716,800,123 | 1.4367168001234E12 | 2015-07-12 16:00:00.123400 UTC | {"string3":1.4367168001234E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123400 +0800"} | 18 | | 2015-07-13 00:00:00.123450 +0800 | 1,436,716,800,123 | 1.43671680012345E12 | 2015-07-12 16:00:00.123450 UTC | {"string3":1.43671680012345E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123450 +0800"} | 19 | | 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.436716800123456E12 | 2015-07-12 16:00:00.123456 UTC | {"string3":1.436716800123456E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} | 20 | | 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456700 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} | 21 | | 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456780 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} | 22 | | 2015-07-13 00:00:00.123456 +0800 | 1,436,716,800,123 | 1.4367168001234568E12 | 2015-07-12 16:00:00.123456789 UTC | {"string3":1.4367168001234568E12,"string2":1436716800123,"string1":"2015-07-13 00:00:00.123456 +0800"} | 23 | +----------------------------------+-------------------+-----------------------+-----------------------------------+--------------------------------------------------------------------------------------------------------+ 24 | -------------------------------------------------------------------------------- /example/from_string_java.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: example/from_string.csv 4 | parser: 5 | type: csv 6 | columns: 7 | - {name: string1, type: string} 8 | - {name: string2, type: string} 9 | - {name: string3, type: string} 10 | - {name: string4, type: string} 11 | - {name: record, type: json} 12 | filters: 13 | - type: timestamp_format 14 | default_from_timezone: "Asia/Taipei" 15 | default_from_timestamp_format: ["yyyy-MM-dd", "yyyy-MM-dd z", "yyyy-MM-dd HH:mm:ss.nnnnnnnnn z", "yyyy-MM-dd HH:mm:ss z", "yyyy-MM-dd HH:mm:ss"] 16 | default_to_timezone: "Asia/Taipei" 17 | default_to_timestamp_format: "yyyy-MM-dd HH:mm:ss.nnnnnn Z" 18 | columns: 19 | - {name: string1} 20 | - {name: string2, type: long, to_unit: ms} 21 | - {name: string3, type: double, to_unit: ms} 22 | - {name: string4, type: timestamp} 23 | - {name: $.record.string1} 24 | - {name: $.record.string2, type: long, to_unit: ms} 25 | - {name: $.record.string3, type: double, to_unit: ms} 26 | out: 27 | type: "null" 28 | -------------------------------------------------------------------------------- /example/from_timestamp.csv: -------------------------------------------------------------------------------- 1 | 2015-07-12 15:00:00.1 UTC,2015-07-12 15:00:00.1 UTC,2015-07-12 15:00:00.1 UTC,2015-07-12 15:00:00.1 UTC 2 | 3 | -------------------------------------------------------------------------------- /example/from_timestamp.txt: -------------------------------------------------------------------------------- 1 | 2017-08-23 17:27:14.804 +0900: Embulk v0.8.30 2 | 2017-08-23 17:27:19.493 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path 3 | 2017-08-23 17:27:19.591 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'from_timestamp.csv' 4 | 2017-08-23 17:27:19.592 +0900 [INFO] (0001:preview): "follow_symlinks" is set false. Note that symbolic links to directories are skipped. 5 | 2017-08-23 17:27:19.600 +0900 [INFO] (0001:preview): Loading files [example/from_timestamp.csv] 6 | 2017-08-23 17:27:19.623 +0900 [INFO] (0001:preview): Try to read 32,768 bytes from input source 7 | +-------------------------------+-------------------+-------------------+-----------------------------+ 8 | | timestamp1:string | timestamp2:long | timestamp3:double | timestamp4:timestamp | 9 | +-------------------------------+-------------------+-------------------+-----------------------------+ 10 | | 2015-07-13 00:00:00.100000000 | 1,436,713,200,100 | 1.4367132001E12 | 2015-07-12 15:00:00.100 UTC | 11 | +-------------------------------+-------------------+-------------------+-----------------------------+ 12 | -------------------------------------------------------------------------------- /example/from_timestamp.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: example/from_timestamp.csv 4 | parser: 5 | type: csv 6 | default_timestamp_format: "%Y-%m-%d %H:%M:%S.%N %z" 7 | columns: 8 | - {name: timestamp1, type: timestamp} 9 | - {name: timestamp2, type: timestamp} 10 | - {name: timestamp3, type: timestamp} 11 | - {name: timestamp4, type: timestamp} 12 | filters: 13 | - type: timestamp_format 14 | default_to_timezone: "Asia/Tokyo" 15 | default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N" 16 | columns: 17 | - {name: timestamp1, to_format: "%Y-%m-%d %H:%M:%S.%N"} 18 | - {name: timestamp2, type: long, to_unit: ms} 19 | - {name: timestamp3, type: double, to_unit: ms} 20 | - {name: timestamp4, type: timestamp} 21 | out: 22 | type: "null" 23 | -------------------------------------------------------------------------------- /example/nested.jsonl: -------------------------------------------------------------------------------- 1 | {"timestamp":"2015-07-12 15:00:00 UTC","nested":{"nested":[{"timestamp":"2015-07-12 15:00:00 UTC"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00 UTC"}} 2 | {"timestamp":"2015-07-12 15:00:00.1 UTC","nested":{"nested":[{"timestamp":"2015-07-12 15:00:00.1 UTC"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"}} 3 | -------------------------------------------------------------------------------- /example/nested.txt: -------------------------------------------------------------------------------- 1 | 2016-11-06 14:25:21.964 +0900: Embulk v0.8.6 2 | 2016-11-06 14:25:22.829 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path 3 | 2016-11-06 14:25:22.844 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'nested.jsonl' 4 | 2016-11-06 14:25:22.850 +0900 [INFO] (0001:preview): Loading files [example/nested.jsonl] 5 | +-----------------------------------------------------------------------------------------------------------------------------------------------------------+ 6 | | record:json | 7 | +-----------------------------------------------------------------------------------------------------------------------------------------------------------+ 8 | | {"timestamp":1436713200000,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.000000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00 UTC"}} | 9 | | {"timestamp":1436713200100,"nested":{"nested":[{"timestamp":"2015-07-13 00:00:00.100000000"}]},"ignore_nested":{"timestamp":"2015-07-12 15:00:00.1 UTC"}} | 10 | +-----------------------------------------------------------------------------------------------------------------------------------------------------------+ 11 | -------------------------------------------------------------------------------- /example/nested.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: example/nested.jsonl 4 | parser: 5 | type: json 6 | filters: 7 | - type: timestamp_format 8 | default_to_timezone: "Asia/Tokyo" 9 | default_to_timestamp_format: "%Y-%m-%d %H:%M:%S.%N" 10 | columns: 11 | - {name: "$.record.timestamp", type: long, from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"], to_unit: ms} 12 | - {name: "$.record.nested.nested[0].timestamp", from_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"]} 13 | out: 14 | type: "null" 15 | -------------------------------------------------------------------------------- /example/timezone.csv: -------------------------------------------------------------------------------- 1 | 2015-07-12 15:00:00 UTC,2015-07-12 15:00:00 UTC 2 | 2015-07-12 15:00:00.1 UTC,2015-07-12 15:00:00.1 UTC 3 | -------------------------------------------------------------------------------- /example/timezone.txt: -------------------------------------------------------------------------------- 1 | 2016-11-06 14:25:02.170 +0900: Embulk v0.8.6 2 | 2016-11-06 14:25:03.024 +0900 [INFO] (0001:preview): Loaded plugin embulk/filter/timestamp_format from a load path 3 | 2016-11-06 14:25:03.039 +0900 [INFO] (0001:preview): Listing local files at directory 'example' filtering filename by prefix 'timezone.csv' 4 | 2016-11-06 14:25:03.043 +0900 [INFO] (0001:preview): Loading files [example/timezone.csv] 5 | +----------------+-------------------------------------+ 6 | | string1:string | string2:string | 7 | +----------------+-------------------------------------+ 8 | | 2015-07-13 | 2015-07-13 00:00:00.000000000 +0900 | 9 | | 2015-07-13 | 2015-07-13 00:00:00.100000000 +0900 | 10 | +----------------+-------------------------------------+ 11 | -------------------------------------------------------------------------------- /example/timezone.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: example/timezone.csv 4 | parser: 5 | type: csv 6 | columns: 7 | - {name: string1, type: string} 8 | - {name: string2, type: string} 9 | filters: 10 | - type: timestamp_format 11 | default_from_timestamp_format: ["%Y-%m-%d %H:%M:%S.%N %z", "%Y-%m-%d %H:%M:%S %z"] 12 | columns: 13 | - {name: string1, to_format: "%Y-%m-%d", to_timezone: "Asia/Tokyo"} 14 | - {name: string2, to_format: "%Y-%m-%d %H:%M:%S.%N %z", to_timezone: "Asia/Tokyo"} 15 | out: 16 | type: "null" 17 | -------------------------------------------------------------------------------- /gradle/dependency-locks/embulkPluginRuntime.lockfile: -------------------------------------------------------------------------------- 1 | # This is a Gradle generated file for dependency locking. 2 | # Manual edits can break the build and are not advised. 3 | # This file is expected to be part of source control. 4 | io.github.medjed:JsonPathCompiler:0.1.2 5 | net.minidev:accessors-smart:1.1 6 | net.minidev:json-smart:2.2.1 7 | org.ow2.asm:asm:5.0.3 8 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sonots/embulk-filter-timestamp_format/0d8609c8536d32e204cd53783e72b945e53cd4fb/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | zipStoreBase=GRADLE_USER_HOME 4 | zipStorePath=wrapper/dists 5 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.8.2-bin.zip 6 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Attempt to set APP_HOME 10 | # Resolve links: $0 may be a link 11 | PRG="$0" 12 | # Need this for relative symlinks. 13 | while [ -h "$PRG" ] ; do 14 | ls=`ls -ld "$PRG"` 15 | link=`expr "$ls" : '.*-> \(.*\)$'` 16 | if expr "$link" : '/.*' > /dev/null; then 17 | PRG="$link" 18 | else 19 | PRG=`dirname "$PRG"`"/$link" 20 | fi 21 | done 22 | SAVED="`pwd`" 23 | cd "`dirname \"$PRG\"`/" >/dev/null 24 | APP_HOME="`pwd -P`" 25 | cd "$SAVED" >/dev/null 26 | 27 | APP_NAME="Gradle" 28 | APP_BASE_NAME=`basename "$0"` 29 | 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 31 | DEFAULT_JVM_OPTS="" 32 | 33 | # Use the maximum available, or set MAX_FD != -1 to use that value. 34 | MAX_FD="maximum" 35 | 36 | warn () { 37 | echo "$*" 38 | } 39 | 40 | die () { 41 | echo 42 | echo "$*" 43 | echo 44 | exit 1 45 | } 46 | 47 | # OS specific support (must be 'true' or 'false'). 48 | cygwin=false 49 | msys=false 50 | darwin=false 51 | nonstop=false 52 | case "`uname`" in 53 | CYGWIN* ) 54 | cygwin=true 55 | ;; 56 | Darwin* ) 57 | darwin=true 58 | ;; 59 | MINGW* ) 60 | msys=true 61 | ;; 62 | NONSTOP* ) 63 | nonstop=true 64 | ;; 65 | esac 66 | 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 68 | 69 | # Determine the Java command to use to start the JVM. 70 | if [ -n "$JAVA_HOME" ] ; then 71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 72 | # IBM's JDK on AIX uses strange locations for the executables 73 | JAVACMD="$JAVA_HOME/jre/sh/java" 74 | else 75 | JAVACMD="$JAVA_HOME/bin/java" 76 | fi 77 | if [ ! -x "$JAVACMD" ] ; then 78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 79 | 80 | Please set the JAVA_HOME variable in your environment to match the 81 | location of your Java installation." 82 | fi 83 | else 84 | JAVACMD="java" 85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 86 | 87 | Please set the JAVA_HOME variable in your environment to match the 88 | location of your Java installation." 89 | fi 90 | 91 | # Increase the maximum file descriptors if we can. 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 93 | MAX_FD_LIMIT=`ulimit -H -n` 94 | if [ $? -eq 0 ] ; then 95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 96 | MAX_FD="$MAX_FD_LIMIT" 97 | fi 98 | ulimit -n $MAX_FD 99 | if [ $? -ne 0 ] ; then 100 | warn "Could not set maximum file descriptor limit: $MAX_FD" 101 | fi 102 | else 103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 104 | fi 105 | fi 106 | 107 | # For Darwin, add options to specify how the application appears in the dock 108 | if $darwin; then 109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 110 | fi 111 | 112 | # For Cygwin, switch paths to Windows format before running java 113 | if $cygwin ; then 114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 116 | JAVACMD=`cygpath --unix "$JAVACMD"` 117 | 118 | # We build the pattern for arguments to be converted via cygpath 119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 120 | SEP="" 121 | for dir in $ROOTDIRSRAW ; do 122 | ROOTDIRS="$ROOTDIRS$SEP$dir" 123 | SEP="|" 124 | done 125 | OURCYGPATTERN="(^($ROOTDIRS))" 126 | # Add a user-defined pattern to the cygpath arguments 127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 129 | fi 130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 131 | i=0 132 | for arg in "$@" ; do 133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 135 | 136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 138 | else 139 | eval `echo args$i`="\"$arg\"" 140 | fi 141 | i=$((i+1)) 142 | done 143 | case $i in 144 | (0) set -- ;; 145 | (1) set -- "$args0" ;; 146 | (2) set -- "$args0" "$args1" ;; 147 | (3) set -- "$args0" "$args1" "$args2" ;; 148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 154 | esac 155 | fi 156 | 157 | # Escape application args 158 | save () { 159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 160 | echo " " 161 | } 162 | APP_ARGS=$(save "$@") 163 | 164 | # Collect all arguments for the java command, following the shell quoting and substitution rules 165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 166 | 167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 169 | cd "$(dirname "$0")" 170 | fi 171 | 172 | exec "$JAVACMD" "$@" 173 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS= 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | 53 | :win9xME_args 54 | @rem Slurp the command line arguments. 55 | set CMD_LINE_ARGS= 56 | set _SKIP=2 57 | 58 | :win9xME_args_slurp 59 | if "x%~1" == "x" goto execute 60 | 61 | set CMD_LINE_ARGS=%* 62 | 63 | :execute 64 | @rem Setup the command line 65 | 66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 67 | 68 | @rem Execute Gradle 69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 70 | 71 | :end 72 | @rem End local scope for the variables with windows NT shell 73 | if "%ERRORLEVEL%"=="0" goto mainEnd 74 | 75 | :fail 76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 77 | rem the _cmd.exe /c_ return code! 78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 79 | exit /b 1 80 | 81 | :mainEnd 82 | if "%OS%"=="Windows_NT" endlocal 83 | 84 | :omega 85 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'embulk-filter-timestamp_format' 2 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/filter/timestamp_format/ColumnCaster.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format; 2 | 3 | import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken; 4 | import org.embulk.filter.timestamp_format.cast.DoubleCast; 5 | import org.embulk.filter.timestamp_format.cast.LongCast; 6 | import org.embulk.filter.timestamp_format.cast.StringCast; 7 | import org.embulk.filter.timestamp_format.cast.TimestampCast; 8 | import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.ColumnConfig; 9 | import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.PluginTask; 10 | import org.embulk.spi.Column; 11 | import org.embulk.spi.Exec; 12 | import org.embulk.spi.PageBuilder; 13 | import org.embulk.spi.PageReader; 14 | import org.embulk.spi.Schema; 15 | import org.embulk.spi.time.Timestamp; 16 | import org.embulk.spi.type.DoubleType; 17 | import org.embulk.spi.type.LongType; 18 | import org.embulk.spi.type.StringType; 19 | import org.embulk.spi.type.TimestampType; 20 | import org.embulk.spi.type.Type; 21 | import org.joda.time.DateTimeZone; 22 | import org.msgpack.value.Value; 23 | import org.slf4j.Logger; 24 | 25 | import java.util.ArrayList; 26 | import java.util.HashMap; 27 | import java.util.List; 28 | 29 | public class ColumnCaster 30 | { 31 | private static final Logger logger = Exec.getLogger(TimestampFormatFilterPlugin.class); 32 | private final PluginTask task; 33 | private final Schema inputSchema; 34 | private final Schema outputSchema; 35 | private final PageReader pageReader; 36 | private final PageBuilder pageBuilder; 37 | private final HashMap timestampParserMap = new HashMap<>(); 38 | private final HashMap timestampFormatterMap = new HashMap<>(); 39 | private final HashMap fromTimestampUnitMap = new HashMap<>(); 40 | private final HashMap toTimestampUnitMap = new HashMap<>(); 41 | private final JsonVisitor jsonVisitor; 42 | 43 | ColumnCaster(PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader, PageBuilder pageBuilder) 44 | { 45 | this.task = task; 46 | this.inputSchema = inputSchema; 47 | this.outputSchema = outputSchema; 48 | this.pageReader = pageReader; 49 | this.pageBuilder = pageBuilder; 50 | 51 | buildTimestampParserMap(); 52 | buildTimestampFormatterMap(); 53 | buildFromTimestampUnitMap(); 54 | buildToTimestampUnitMap(); 55 | 56 | JsonCaster jsonCaster = new JsonCaster(task, timestampParserMap, timestampFormatterMap, fromTimestampUnitMap, toTimestampUnitMap); 57 | this.jsonVisitor = new JsonVisitor(task, jsonCaster); 58 | } 59 | 60 | private void buildTimestampParserMap() 61 | { 62 | // columnName or jsonPath => TimestampParser 63 | // we do not know input type of json here, so creates anyway 64 | for (ColumnConfig columnConfig : task.getColumns()) { 65 | TimestampParser parser = getTimestampParser(columnConfig, task); 66 | this.timestampParserMap.put(columnConfig.getName(), parser); 67 | } 68 | } 69 | 70 | private TimestampParser getTimestampParser(ColumnConfig columnConfig, PluginTask task) 71 | { 72 | DateTimeZone timezone = columnConfig.getFromTimeZone().or(task.getDefaultFromTimeZone()); 73 | List formatList = columnConfig.getFromFormat().or(task.getDefaultFromTimestampFormat()); 74 | List newFormatList = new ArrayList<>(formatList); 75 | String name = columnConfig.getName(); 76 | if (task.getTimeStampParser().equals("auto_java")) { 77 | for (int i = 0; i < formatList.size(); i++) { 78 | String format = formatList.get(i); 79 | if (!format.contains("%")) { 80 | continue; 81 | } 82 | String javaFormat = TimestampFormatConverter.toJavaFormat(format); 83 | if (javaFormat == null) { 84 | logger.info(String.format("%s: Failed to convert ruby parser to java parser: \"%s\", Use ruby parser as is", name, format)); 85 | } else { 86 | logger.debug(String.format("%s: Convert ruby parser \"%s\" to java parser \"%s\"", name, format, javaFormat)); 87 | newFormatList.set(i, javaFormat); 88 | } 89 | } 90 | } 91 | return new TimestampParser(newFormatList, timezone); 92 | } 93 | 94 | private void buildTimestampFormatterMap() 95 | { 96 | // columnName or jsonPath => TimestampFormatter 97 | for (ColumnConfig columnConfig : task.getColumns()) { 98 | if (columnConfig.getType() instanceof StringType) { 99 | TimestampFormatter parser = getTimestampFormatter(columnConfig, task); 100 | this.timestampFormatterMap.put(columnConfig.getName(), parser); 101 | } 102 | } 103 | } 104 | 105 | private TimestampFormatter getTimestampFormatter(ColumnConfig columnConfig, PluginTask task) 106 | { 107 | String format = columnConfig.getToFormat().or(task.getDefaultToTimestampFormat()); 108 | DateTimeZone timezone = columnConfig.getToTimeZone().or(task.getDefaultToTimeZone()); 109 | return new TimestampFormatter(format, timezone); 110 | } 111 | 112 | private void buildFromTimestampUnitMap() 113 | { 114 | // columnName or jsonPath => TimestampUnit 115 | // we do not know input type of json here, so creates anyway 116 | for (ColumnConfig columnConfig : task.getColumns()) { 117 | TimestampUnit unit = getFromTimestampUnit(columnConfig, task); 118 | this.fromTimestampUnitMap.put(columnConfig.getName(), unit); 119 | } 120 | } 121 | 122 | private TimestampUnit getFromTimestampUnit(ColumnConfig columnConfig, PluginTask task) 123 | { 124 | return columnConfig.getFromUnit().or(task.getDefaultFromTimestampUnit()); 125 | } 126 | 127 | private void buildToTimestampUnitMap() 128 | { 129 | // columnName or jsonPath => TimestampUnit 130 | for (ColumnConfig columnConfig : task.getColumns()) { 131 | Type type = columnConfig.getType(); 132 | if (type instanceof LongType || type instanceof DoubleType) { 133 | TimestampUnit unit = getToTimestampUnit(columnConfig, task); 134 | this.toTimestampUnitMap.put(columnConfig.getName(), unit); 135 | } 136 | } 137 | } 138 | 139 | private TimestampUnit getToTimestampUnit(ColumnConfig columnConfig, PluginTask task) 140 | { 141 | return columnConfig.getToUnit().or(task.getDefaultToTimestampUnit()); 142 | } 143 | 144 | public void setFromLong(Column outputColumn, long value) 145 | { 146 | Type outputType = outputColumn.getType(); 147 | TimestampUnit fromUnit = fromTimestampUnitMap.get(outputColumn.getName()); 148 | if (outputType instanceof StringType) { 149 | TimestampFormatter timestampFormatter = timestampFormatterMap.get(outputColumn.getName()); 150 | pageBuilder.setString(outputColumn, LongCast.asString(value, fromUnit, timestampFormatter)); 151 | } 152 | else if (outputType instanceof TimestampType) { 153 | pageBuilder.setTimestamp(outputColumn, LongCast.asTimestamp(value, fromUnit)); 154 | } 155 | else if (outputType instanceof LongType) { 156 | TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName()); 157 | pageBuilder.setLong(outputColumn, LongCast.asLong(value, fromUnit, toUnit)); 158 | } 159 | else if (outputType instanceof DoubleType) { 160 | TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName()); 161 | pageBuilder.setDouble(outputColumn, LongCast.asDouble(value, fromUnit, toUnit)); 162 | } 163 | else { 164 | assert false; 165 | } 166 | } 167 | 168 | public void setFromDouble(Column outputColumn, double value) 169 | { 170 | Type outputType = outputColumn.getType(); 171 | TimestampUnit fromUnit = fromTimestampUnitMap.get(outputColumn.getName()); 172 | if (outputType instanceof StringType) { 173 | TimestampFormatter timestampFormatter = timestampFormatterMap.get(outputColumn.getName()); 174 | pageBuilder.setString(outputColumn, DoubleCast.asString(value, fromUnit, timestampFormatter)); 175 | } 176 | else if (outputType instanceof TimestampType) { 177 | pageBuilder.setTimestamp(outputColumn, DoubleCast.asTimestamp(value, fromUnit)); 178 | } 179 | else if (outputType instanceof LongType) { 180 | TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName()); 181 | pageBuilder.setLong(outputColumn, DoubleCast.asLong(value, fromUnit, toUnit)); 182 | } 183 | else if (outputType instanceof DoubleType) { 184 | TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName()); 185 | pageBuilder.setDouble(outputColumn, DoubleCast.asDouble(value, fromUnit, toUnit)); 186 | } 187 | else { 188 | assert false; 189 | } 190 | } 191 | 192 | public void setFromString(Column outputColumn, String value) 193 | { 194 | Type outputType = outputColumn.getType(); 195 | TimestampParser timestampParser = timestampParserMap.get(outputColumn.getName()); 196 | if (outputType instanceof StringType) { 197 | TimestampFormatter timestampFormatter = timestampFormatterMap.get(outputColumn.getName()); 198 | pageBuilder.setString(outputColumn, StringCast.asString(value, timestampParser, timestampFormatter)); 199 | } 200 | else if (outputType instanceof TimestampType) { 201 | pageBuilder.setTimestamp(outputColumn, StringCast.asTimestamp(value, timestampParser)); 202 | } 203 | else if (outputType instanceof LongType) { 204 | TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName()); 205 | pageBuilder.setLong(outputColumn, StringCast.asLong(value, timestampParser, toUnit)); 206 | } 207 | else if (outputType instanceof DoubleType) { 208 | TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName()); 209 | pageBuilder.setDouble(outputColumn, StringCast.asDouble(value, timestampParser, toUnit)); 210 | } 211 | else { 212 | assert false; 213 | } 214 | } 215 | 216 | public void setFromTimestamp(Column outputColumn, Timestamp value) 217 | { 218 | Type outputType = outputColumn.getType(); 219 | if (outputType instanceof StringType) { 220 | TimestampFormatter timestampFormatter = timestampFormatterMap.get(outputColumn.getName()); 221 | pageBuilder.setString(outputColumn, TimestampCast.asString(value, timestampFormatter)); 222 | } 223 | else if (outputType instanceof TimestampType) { 224 | pageBuilder.setTimestamp(outputColumn, value); 225 | } 226 | else if (outputType instanceof LongType) { 227 | TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName()); 228 | pageBuilder.setLong(outputColumn, TimestampCast.asLong(value, toUnit)); 229 | } 230 | else if (outputType instanceof DoubleType) { 231 | TimestampUnit toUnit = toTimestampUnitMap.get(outputColumn.getName()); 232 | pageBuilder.setDouble(outputColumn, TimestampCast.asDouble(value, toUnit)); 233 | } 234 | else { 235 | assert false; 236 | } 237 | } 238 | 239 | public void setFromJson(Column outputColumn, Value value) 240 | { 241 | String pathFragment = PropertyPathToken.getPathFragment(outputColumn.getName()); 242 | String jsonPath = new StringBuilder("$").append(pathFragment).toString(); 243 | pageBuilder.setJson(outputColumn, jsonVisitor.visit(jsonPath, value)); 244 | } 245 | } 246 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/filter/timestamp_format/ColumnVisitorImpl.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format; 2 | 3 | import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler; 4 | import org.embulk.spi.DataException; 5 | import org.embulk.spi.PageReader; 6 | import org.embulk.spi.Schema; 7 | 8 | import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.ColumnConfig; 9 | import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.PluginTask; 10 | 11 | import org.embulk.spi.Column; 12 | import org.embulk.spi.ColumnVisitor; 13 | import org.embulk.spi.Exec; 14 | import org.embulk.spi.PageBuilder; 15 | import org.slf4j.Logger; 16 | 17 | import java.util.HashMap; 18 | import java.util.HashSet; 19 | 20 | public class ColumnVisitorImpl 21 | implements ColumnVisitor 22 | { 23 | private static final Logger logger = Exec.getLogger(TimestampFormatFilterPlugin.class); 24 | private final PluginTask task; 25 | private final Schema inputSchema; 26 | private final Schema outputSchema; 27 | private final PageReader pageReader; 28 | private final PageBuilder pageBuilder; 29 | private final HashSet shouldCastSet = new HashSet<>(); 30 | private final HashMap outputColumnMap = new HashMap<>(); 31 | private final ColumnCaster columnCaster; 32 | 33 | ColumnVisitorImpl(PluginTask task, Schema inputSchema, Schema outputSchema, 34 | PageReader pageReader, PageBuilder pageBuilder) 35 | { 36 | this.task = task; 37 | this.inputSchema = inputSchema; 38 | this.outputSchema = outputSchema; 39 | this.pageReader = pageReader; 40 | this.pageBuilder = pageBuilder; 41 | 42 | buildShouldCastSet(); 43 | buildOutputColumnMap(); 44 | this.columnCaster = new ColumnCaster(task, inputSchema, outputSchema, pageReader, pageBuilder); 45 | } 46 | 47 | private void buildShouldCastSet() 48 | { 49 | // columnName => Boolean to avoid unnecessary cast 50 | for (ColumnConfig columnConfig : task.getColumns()) { 51 | String name = columnConfig.getName(); 52 | if (PathCompiler.isProbablyJsonPath(name)) { 53 | String columnName = JsonPathUtil.getColumnName(name); 54 | shouldCastSet.add(columnName); 55 | continue; 56 | } 57 | shouldCastSet.add(name); 58 | } 59 | } 60 | 61 | private boolean shouldCast(String name) 62 | { 63 | return shouldCastSet.contains(name); 64 | } 65 | 66 | private void buildOutputColumnMap() 67 | { 68 | // columnName => outputColumn 69 | for (Column column : outputSchema.getColumns()) { 70 | this.outputColumnMap.put(column.getName(), column); 71 | } 72 | } 73 | 74 | private interface PageBuildable 75 | { 76 | public void run() throws DataException; 77 | } 78 | 79 | private void withStopOnInvalidRecord(final PageBuildable op, 80 | final Column inputColumn, final Column outputColumn) throws DataException 81 | { 82 | if (pageReader.isNull(inputColumn)) { 83 | pageBuilder.setNull(outputColumn); 84 | } 85 | else { 86 | if (task.getStopOnInvalidRecord()) { 87 | op.run(); 88 | } 89 | else { 90 | try { 91 | op.run(); 92 | } 93 | catch (final DataException ex) { 94 | logger.warn(ex.getMessage()); 95 | pageBuilder.setNull(outputColumn); 96 | } 97 | } 98 | } 99 | } 100 | 101 | @Override 102 | public void booleanColumn(final Column inputColumn) 103 | { 104 | if (pageReader.isNull(inputColumn)) { 105 | pageBuilder.setNull(inputColumn); 106 | } 107 | else { 108 | pageBuilder.setBoolean(inputColumn, pageReader.getBoolean(inputColumn)); 109 | } 110 | } 111 | 112 | @Override 113 | public void longColumn(final Column inputColumn) 114 | { 115 | String name = inputColumn.getName(); 116 | if (! shouldCast(name)){ 117 | if (pageReader.isNull(inputColumn)) { 118 | pageBuilder.setNull(inputColumn); 119 | } 120 | else { 121 | pageBuilder.setLong(inputColumn, pageReader.getLong(inputColumn)); 122 | } 123 | } 124 | else { 125 | final Column outputColumn = outputColumnMap.get(name); 126 | PageBuildable op = new PageBuildable() { 127 | public void run() throws DataException { 128 | columnCaster.setFromLong(outputColumn, pageReader.getLong(inputColumn)); 129 | } 130 | }; 131 | withStopOnInvalidRecord(op, inputColumn, outputColumn); 132 | } 133 | } 134 | 135 | @Override 136 | public void doubleColumn(final Column inputColumn) 137 | { 138 | String name = inputColumn.getName(); 139 | if (! shouldCast(name)){ 140 | if (pageReader.isNull(inputColumn)) { 141 | pageBuilder.setNull(inputColumn); 142 | } 143 | else { 144 | pageBuilder.setDouble(inputColumn, pageReader.getDouble(inputColumn)); 145 | } 146 | } 147 | else { 148 | final Column outputColumn = outputColumnMap.get(inputColumn.getName()); 149 | PageBuildable op = new PageBuildable() { 150 | public void run() throws DataException { 151 | columnCaster.setFromDouble(outputColumn, pageReader.getDouble(inputColumn)); 152 | } 153 | }; 154 | withStopOnInvalidRecord(op, inputColumn, outputColumn); 155 | } 156 | } 157 | 158 | @Override 159 | public void stringColumn(final Column inputColumn) 160 | { 161 | String name = inputColumn.getName(); 162 | if (! shouldCast(name)){ 163 | if (pageReader.isNull(inputColumn)) { 164 | pageBuilder.setNull(inputColumn); 165 | } 166 | else { 167 | pageBuilder.setString(inputColumn, pageReader.getString(inputColumn)); 168 | } 169 | } 170 | else { 171 | final Column outputColumn = outputColumnMap.get(inputColumn.getName()); 172 | PageBuildable op = new PageBuildable() { 173 | public void run() throws DataException { 174 | columnCaster.setFromString(outputColumn, pageReader.getString(inputColumn)); 175 | } 176 | }; 177 | withStopOnInvalidRecord(op, inputColumn, outputColumn); 178 | } 179 | } 180 | 181 | @Override 182 | public void timestampColumn(final Column inputColumn) 183 | { 184 | String name = inputColumn.getName(); 185 | if (! shouldCast(name)){ 186 | if (pageReader.isNull(inputColumn)) { 187 | pageBuilder.setNull(inputColumn); 188 | } 189 | else { 190 | pageBuilder.setTimestamp(inputColumn, pageReader.getTimestamp(inputColumn)); 191 | } 192 | } 193 | else { 194 | final Column outputColumn = outputColumnMap.get(inputColumn.getName()); 195 | PageBuildable op = new PageBuildable() { 196 | public void run() throws DataException { 197 | columnCaster.setFromTimestamp(outputColumn, pageReader.getTimestamp(inputColumn)); 198 | } 199 | }; 200 | withStopOnInvalidRecord(op, inputColumn, outputColumn); 201 | } 202 | } 203 | 204 | @Override 205 | public void jsonColumn(final Column inputColumn) 206 | { 207 | String name = inputColumn.getName(); 208 | if (! shouldCast(name)){ 209 | if (pageReader.isNull(inputColumn)) { 210 | pageBuilder.setNull(inputColumn); 211 | } 212 | else { 213 | pageBuilder.setJson(inputColumn, pageReader.getJson(inputColumn)); 214 | } 215 | } 216 | else { 217 | final Column outputColumn = outputColumnMap.get(inputColumn.getName()); 218 | PageBuildable op = new PageBuildable() { 219 | public void run() throws DataException { 220 | columnCaster.setFromJson(outputColumn, pageReader.getJson(inputColumn)); 221 | } 222 | }; 223 | withStopOnInvalidRecord(op, inputColumn, outputColumn); 224 | } 225 | } 226 | } 227 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/filter/timestamp_format/JsonCaster.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format; 2 | 3 | import org.embulk.config.ConfigException; 4 | import org.embulk.filter.timestamp_format.cast.DoubleCast; 5 | import org.embulk.filter.timestamp_format.cast.LongCast; 6 | import org.embulk.filter.timestamp_format.cast.StringCast; 7 | import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.ColumnConfig; 8 | import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.PluginTask; 9 | import org.embulk.spi.Exec; 10 | import org.embulk.spi.type.DoubleType; 11 | import org.embulk.spi.type.LongType; 12 | import org.embulk.spi.type.StringType; 13 | import org.embulk.spi.type.Type; 14 | import org.msgpack.value.FloatValue; 15 | import org.msgpack.value.IntegerValue; 16 | import org.msgpack.value.StringValue; 17 | import org.msgpack.value.Value; 18 | import org.msgpack.value.ValueFactory; 19 | 20 | import org.slf4j.Logger; 21 | 22 | import java.util.HashMap; 23 | 24 | class JsonCaster 25 | { 26 | private static final Logger logger = Exec.getLogger(TimestampFormatFilterPlugin.class); 27 | private final PluginTask task; 28 | private final HashMap timestampParserMap; 29 | private final HashMap timestampFormatterMap; 30 | private final HashMap fromTimestampUnitMap; 31 | private final HashMap toTimestampUnitMap; 32 | 33 | JsonCaster(PluginTask task, 34 | HashMap timestampParserMap, 35 | HashMap timestampFormatterMap, 36 | HashMap fromTimestampUnitMap, 37 | HashMap toTimestampUnitMap) 38 | { 39 | this.task = task; 40 | this.timestampParserMap = timestampParserMap; 41 | this.timestampFormatterMap = timestampFormatterMap; 42 | this.fromTimestampUnitMap = fromTimestampUnitMap; 43 | this.toTimestampUnitMap = toTimestampUnitMap; 44 | } 45 | 46 | public Value fromLong(ColumnConfig columnConfig, IntegerValue value) 47 | { 48 | Type outputType = columnConfig.getType(); 49 | TimestampUnit fromUnit = fromTimestampUnitMap.get(columnConfig.getName()); 50 | if (outputType instanceof StringType) { 51 | TimestampFormatter formatter = timestampFormatterMap.get(columnConfig.getName()); 52 | return ValueFactory.newString(LongCast.asString(value.asLong(), fromUnit, formatter)); 53 | } 54 | else if (outputType instanceof LongType) { 55 | TimestampUnit toUnit = toTimestampUnitMap.get(columnConfig.getName()); 56 | return ValueFactory.newInteger(LongCast.asLong(value.asLong(), fromUnit, toUnit)); 57 | } 58 | else if (outputType instanceof DoubleType) { 59 | TimestampUnit toUnit = toTimestampUnitMap.get(columnConfig.getName()); 60 | return ValueFactory.newFloat(LongCast.asDouble(value.asLong(), fromUnit, toUnit)); 61 | } 62 | else { 63 | assert false; 64 | throw new RuntimeException(); 65 | } 66 | } 67 | 68 | public Value fromDouble(ColumnConfig columnConfig, FloatValue value) 69 | { 70 | Type outputType = columnConfig.getType(); 71 | TimestampUnit fromUnit = fromTimestampUnitMap.get(columnConfig.getName()); 72 | if (outputType instanceof StringType) { 73 | TimestampFormatter formatter = timestampFormatterMap.get(columnConfig.getName()); 74 | return ValueFactory.newString(DoubleCast.asString(value.toDouble(), fromUnit, formatter)); 75 | } 76 | else if (outputType instanceof LongType) { 77 | TimestampUnit toUnit = toTimestampUnitMap.get(columnConfig.getName()); 78 | return ValueFactory.newInteger(DoubleCast.asLong(value.toDouble(), fromUnit, toUnit)); 79 | } 80 | else if (outputType instanceof DoubleType) { 81 | TimestampUnit toUnit = toTimestampUnitMap.get(columnConfig.getName()); 82 | return ValueFactory.newFloat(DoubleCast.asDouble(value.toDouble(), fromUnit, toUnit)); 83 | } 84 | else { 85 | assert false; 86 | throw new RuntimeException(); 87 | } 88 | } 89 | 90 | public Value fromString(ColumnConfig columnConfig, StringValue value) 91 | { 92 | Type outputType = columnConfig.getType(); 93 | TimestampParser parser = timestampParserMap.get(columnConfig.getName()); 94 | if (outputType instanceof StringType) { 95 | TimestampFormatter formatter = timestampFormatterMap.get(columnConfig.getName()); 96 | return ValueFactory.newString(StringCast.asString(value.asString(), parser, formatter)); 97 | } 98 | else if (outputType instanceof LongType) { 99 | TimestampUnit toUnit = toTimestampUnitMap.get(columnConfig.getName()); 100 | return ValueFactory.newInteger(StringCast.asLong(value.asString(), parser, toUnit)); 101 | } 102 | else if (outputType instanceof DoubleType) { 103 | TimestampUnit toUnit = toTimestampUnitMap.get(columnConfig.getName()); 104 | return ValueFactory.newFloat(StringCast.asDouble(value.asString(), parser, toUnit)); 105 | } 106 | else { 107 | assert false; 108 | throw new RuntimeException(); 109 | } 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/filter/timestamp_format/JsonPathUtil.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format; 2 | 3 | import io.github.medjed.jsonpathcompiler.InvalidPathException; 4 | import io.github.medjed.jsonpathcompiler.expressions.Path; 5 | import io.github.medjed.jsonpathcompiler.expressions.path.ArrayIndexOperation; 6 | import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken; 7 | import io.github.medjed.jsonpathcompiler.expressions.path.FunctionPathToken; 8 | import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler; 9 | import io.github.medjed.jsonpathcompiler.expressions.path.PathToken; 10 | import io.github.medjed.jsonpathcompiler.expressions.path.PredicatePathToken; 11 | import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken; 12 | import io.github.medjed.jsonpathcompiler.expressions.path.ScanPathToken; 13 | import org.embulk.config.ConfigException; 14 | 15 | public class JsonPathUtil 16 | { 17 | private JsonPathUtil() {} 18 | 19 | public static String getColumnName(String jsonPath) 20 | { 21 | Path compiledPath; 22 | try { 23 | compiledPath = PathCompiler.compile(jsonPath); 24 | } 25 | catch (InvalidPathException e) { 26 | throw new ConfigException(String.format("jsonpath %s, %s", jsonPath, e.getMessage())); 27 | } 28 | PathToken pathToken = compiledPath.getRoot(); 29 | pathToken = pathToken.next(); // skip $ 30 | return ((PropertyPathToken) pathToken).getProperties().get(0); 31 | } 32 | 33 | public static void assertJsonPathFormat(String path) 34 | { 35 | Path compiledPath; 36 | try { 37 | compiledPath = PathCompiler.compile(path); 38 | } 39 | catch (InvalidPathException e) { 40 | throw new ConfigException(String.format("jsonpath %s, %s", path, e.getMessage())); 41 | } 42 | PathToken pathToken = compiledPath.getRoot(); 43 | while (true) { 44 | assertSupportedPathToken(pathToken, path); 45 | if (pathToken.isLeaf()) { 46 | break; 47 | } 48 | pathToken = pathToken.next(); 49 | } 50 | } 51 | 52 | protected static void assertSupportedPathToken(PathToken pathToken, String path) 53 | { 54 | if (pathToken instanceof ArrayPathToken) { 55 | ArrayIndexOperation arrayIndexOperation = ((ArrayPathToken) pathToken).getArrayIndexOperation(); 56 | assertSupportedArrayPathToken(arrayIndexOperation, path); 57 | } 58 | else if (pathToken instanceof ScanPathToken) { 59 | throw new ConfigException(String.format("scan path token is not supported \"%s\"", path)); 60 | } 61 | else if (pathToken instanceof FunctionPathToken) { 62 | throw new ConfigException(String.format("function path token is not supported \"%s\"", path)); 63 | } 64 | else if (pathToken instanceof PredicatePathToken) { 65 | throw new ConfigException(String.format("predicate path token is not supported \"%s\"", path)); 66 | } 67 | } 68 | 69 | protected static void assertSupportedArrayPathToken(ArrayIndexOperation arrayIndexOperation, String path) 70 | { 71 | if (arrayIndexOperation == null) { 72 | throw new ConfigException(String.format("Array Slice Operation is not supported \"%s\"", path)); 73 | } 74 | else if (!arrayIndexOperation.isSingleIndexOperation()) { 75 | throw new ConfigException(String.format("Multi Array Indexes is not supported \"%s\"", path)); 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/filter/timestamp_format/JsonVisitor.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format; 2 | 3 | import io.github.medjed.jsonpathcompiler.expressions.Path; 4 | import io.github.medjed.jsonpathcompiler.expressions.path.ArrayPathToken; 5 | import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler; 6 | import io.github.medjed.jsonpathcompiler.expressions.path.PathToken; 7 | import io.github.medjed.jsonpathcompiler.expressions.path.PropertyPathToken; 8 | import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.ColumnConfig; 9 | import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.PluginTask; 10 | 11 | import org.embulk.spi.Exec; 12 | import org.msgpack.value.ArrayValue; 13 | import org.msgpack.value.MapValue; 14 | import org.msgpack.value.Value; 15 | import org.msgpack.value.ValueFactory; 16 | 17 | import org.slf4j.Logger; 18 | 19 | import java.util.HashMap; 20 | import java.util.HashSet; 21 | import java.util.Map; 22 | 23 | public class JsonVisitor 24 | { 25 | private static final Logger logger = Exec.getLogger(TimestampFormatFilterPlugin.class); 26 | private final PluginTask task; 27 | private final JsonCaster jsonCaster; 28 | private final HashMap jsonPathColumnConfigMap = new HashMap<>(); 29 | private final HashSet shouldVisitSet = new HashSet<>(); 30 | 31 | JsonVisitor(PluginTask task, JsonCaster jsonCaster) 32 | { 33 | this.task = task; 34 | this.jsonCaster = jsonCaster; 35 | 36 | assertJsonPathFormat(); 37 | buildJsonPathColumnConfigMap(); 38 | buildShouldVisitSet(); 39 | } 40 | 41 | private void assertJsonPathFormat() 42 | { 43 | for (ColumnConfig columnConfig : task.getColumns()) { 44 | String name = columnConfig.getName(); 45 | if (!PathCompiler.isProbablyJsonPath(name)) { 46 | continue; 47 | } 48 | JsonPathUtil.assertJsonPathFormat(name); 49 | } 50 | } 51 | 52 | private void buildJsonPathColumnConfigMap() 53 | { 54 | // json path => Type 55 | for (ColumnConfig columnConfig : task.getColumns()) { 56 | String name = columnConfig.getName(); 57 | if (!PathCompiler.isProbablyJsonPath(name)) { 58 | continue; 59 | } 60 | Path compiledPath = PathCompiler.compile(name); 61 | this.jsonPathColumnConfigMap.put(compiledPath.toString(), columnConfig); 62 | } 63 | } 64 | 65 | private void buildShouldVisitSet() 66 | { 67 | // json partial path => Boolean to avoid unnecessary type: json visit 68 | for (ColumnConfig columnConfig : task.getColumns()) { 69 | String name = columnConfig.getName(); 70 | if (! PathCompiler.isProbablyJsonPath(name)) { 71 | continue; 72 | } 73 | Path compiledPath = PathCompiler.compile(name); 74 | PathToken parts = compiledPath.getRoot(); 75 | StringBuilder partialPath = new StringBuilder("$"); 76 | while (! parts.isLeaf()) { 77 | parts = parts.next(); // first next() skips "$" 78 | partialPath.append(parts.getPathFragment()); 79 | this.shouldVisitSet.add(partialPath.toString()); 80 | } 81 | } 82 | } 83 | 84 | private boolean shouldVisit(String jsonPath) 85 | { 86 | return shouldVisitSet.contains(jsonPath); 87 | } 88 | 89 | public Value visit(String rootPath, Value value) 90 | { 91 | if (!shouldVisit(rootPath)) { 92 | return value; 93 | } 94 | if (value.isArrayValue()) { 95 | ArrayValue arrayValue = value.asArrayValue(); 96 | int size = arrayValue.size(); 97 | Value[] newValue = new Value[size]; 98 | for (int i = 0; i < size; i++) { 99 | String pathFragment = ArrayPathToken.getPathFragment(i); 100 | String k = new StringBuilder(rootPath).append(pathFragment).toString(); 101 | if (!shouldVisit(k)) { 102 | k = new StringBuilder(rootPath).append("[*]").toString(); // try [*] too 103 | } 104 | Value v = arrayValue.get(i); 105 | newValue[i] = visit(k, v); 106 | } 107 | return ValueFactory.newArray(newValue, true); 108 | } 109 | else if (value.isMapValue()) { 110 | MapValue mapValue = value.asMapValue(); 111 | int size = mapValue.size() * 2; 112 | Value[] newValue = new Value[size]; 113 | int i = 0; 114 | for (Map.Entry entry : mapValue.entrySet()) { 115 | Value k = entry.getKey(); 116 | Value v = entry.getValue(); 117 | String pathFragment = PropertyPathToken.getPathFragment(k.asStringValue().asString()); 118 | String newPath = new StringBuilder(rootPath).append(pathFragment).toString(); 119 | Value r = visit(newPath, v); 120 | newValue[i++] = k; 121 | newValue[i++] = r; 122 | } 123 | return ValueFactory.newMap(newValue, true); 124 | } 125 | else if (value.isIntegerValue()) { 126 | ColumnConfig columnConfig = jsonPathColumnConfigMap.get(rootPath); 127 | return jsonCaster.fromLong(columnConfig, value.asIntegerValue()); 128 | } 129 | else if (value.isFloatValue()) { 130 | ColumnConfig columnConfig = jsonPathColumnConfigMap.get(rootPath); 131 | return jsonCaster.fromDouble(columnConfig, value.asFloatValue()); 132 | } 133 | else if (value.isStringValue()) { 134 | ColumnConfig columnConfig = jsonPathColumnConfigMap.get(rootPath); 135 | return jsonCaster.fromString(columnConfig, value.asStringValue()); 136 | } 137 | else { 138 | return value; 139 | } 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/filter/timestamp_format/TimestampFormatConverter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format; 2 | 3 | // Convert JRuby Time Format into Java (Joda-Time) Format 4 | // Aimed only for parser (JRuby format is too rich than Java Format in terms of formatter) 5 | 6 | import java.util.HashMap; 7 | import java.util.regex.Matcher; 8 | import java.util.regex.Pattern; 9 | 10 | public class TimestampFormatConverter 11 | { 12 | public static final HashMap RUBY_TO_JAVA_FORMAT_TABLE = new HashMap<>(); 13 | private static final Pattern IDENTIFIER_PATTERN; 14 | private static final Pattern NON_IDENTIFIER_PATTERN; 15 | 16 | static 17 | { 18 | // %A EEEE 19 | // %B MMMM 20 | // %C<20> CC<20> 21 | // %D<05/13/16> MM/dd/yy<05/13/16> 22 | // %F<2016-05-13> yyyy-MM-dd<2016-05-13> 23 | // %H<09> HH<09> 24 | // %I<09> hh<09> 25 | // %L<123> SSS<123> 26 | // %M<02> mm<02> 27 | // %N<123456789> nnnnnnnnn<123456789> 28 | // %P a 29 | // %R<09:02> HH:mm<09:02> 30 | // %S<39> ss<39> 31 | // %T<09:02:39> HH:mm:ss<09:02:39> 32 | // %U<19> w<19> 33 | // %V<19> w<19> 34 | // %W<19> w<19> 35 | // %X<09:02:39> HH:mm:ss<09:02:39> 36 | // %Y<2016> yyyy<2016> 37 | // %Z z 38 | // %a EEE 39 | // %b MMM 40 | // %c EEE MMM dd HH:mm:ss yyyy 41 | // %d<13> dd<13> 42 | // %e<13> dd<13> 43 | // %h MMM 44 | // %j<134> DDD<134> 45 | // %k< 9> HH<09> 46 | // %m<05> MM<05> 47 | // %p a 48 | // %r<09:02:39 AM> hh:mm:ss a<09:02:39 AM> 49 | // %u<5> e<5> 50 | // %v<13-MAY-2016> dd-MMM-yyyy<13-May-2016> 51 | // %w<5> e<5> 52 | // %x<05/13/16> MM/dd/yy<05/13/16> 53 | // %y<16> yy<16> 54 | // %z<+0000> Z<+0000> 55 | // %:z<+00:00> Z<+0000> 56 | // %::z<+00:00:00> Z<+0000> 57 | RUBY_TO_JAVA_FORMAT_TABLE.put("A", "EEEE"); 58 | RUBY_TO_JAVA_FORMAT_TABLE.put("a", "EEE"); 59 | RUBY_TO_JAVA_FORMAT_TABLE.put("B", "MMMM"); 60 | RUBY_TO_JAVA_FORMAT_TABLE.put("b", "MMM"); 61 | RUBY_TO_JAVA_FORMAT_TABLE.put("C", "CC"); 62 | RUBY_TO_JAVA_FORMAT_TABLE.put("c", "EEE MMM dd HH:mm:ss yyyy"); 63 | RUBY_TO_JAVA_FORMAT_TABLE.put("D", "MM/dd/yy"); 64 | RUBY_TO_JAVA_FORMAT_TABLE.put("d", "dd"); 65 | RUBY_TO_JAVA_FORMAT_TABLE.put("e", "dd"); 66 | RUBY_TO_JAVA_FORMAT_TABLE.put("F", "yyyy-MM-dd"); 67 | RUBY_TO_JAVA_FORMAT_TABLE.put("H", "HH"); 68 | RUBY_TO_JAVA_FORMAT_TABLE.put("h", "MMM"); 69 | RUBY_TO_JAVA_FORMAT_TABLE.put("I", "hh"); 70 | RUBY_TO_JAVA_FORMAT_TABLE.put("j", "DDD"); 71 | //RUBY_TO_JAVA_FORMAT_TABLE.put("k", "HH"); // " 9" fails with HH 72 | RUBY_TO_JAVA_FORMAT_TABLE.put("L", "SSS"); 73 | //RUBY_TO_JAVA_FORMAT_TABLE.put("l", "hh"); // " 9" fails with hh 74 | RUBY_TO_JAVA_FORMAT_TABLE.put("M", "mm"); 75 | RUBY_TO_JAVA_FORMAT_TABLE.put("m", "MM"); 76 | //RUBY_TO_JAVA_FORMAT_TABLE.put("n", ""); 77 | RUBY_TO_JAVA_FORMAT_TABLE.put("N", "nnnnnnnnn"); 78 | RUBY_TO_JAVA_FORMAT_TABLE.put("P", "a"); 79 | RUBY_TO_JAVA_FORMAT_TABLE.put("p", "a"); 80 | RUBY_TO_JAVA_FORMAT_TABLE.put("R", "HH:mm"); 81 | RUBY_TO_JAVA_FORMAT_TABLE.put("r", "hh:mm:ss a"); 82 | RUBY_TO_JAVA_FORMAT_TABLE.put("S", "ss"); 83 | //RUBY_TO_JAVA_FORMAT_TABLE.put("s", "")); // N/A 84 | RUBY_TO_JAVA_FORMAT_TABLE.put("T", "HH:mm:ss"); 85 | //RUBY_TO_JAVA_FORMAT_TABLE.put("t", ""); 86 | RUBY_TO_JAVA_FORMAT_TABLE.put("U", "w"); 87 | RUBY_TO_JAVA_FORMAT_TABLE.put("u", "e"); 88 | RUBY_TO_JAVA_FORMAT_TABLE.put("v", "dd-MMM-yyyy"); 89 | RUBY_TO_JAVA_FORMAT_TABLE.put("V", "w"); 90 | RUBY_TO_JAVA_FORMAT_TABLE.put("W", "w"); 91 | RUBY_TO_JAVA_FORMAT_TABLE.put("w", "e"); 92 | RUBY_TO_JAVA_FORMAT_TABLE.put("X", "HH:mm:ss"); 93 | RUBY_TO_JAVA_FORMAT_TABLE.put("x", "MM/dd/yy"); 94 | RUBY_TO_JAVA_FORMAT_TABLE.put("Y", "yyyy"); 95 | RUBY_TO_JAVA_FORMAT_TABLE.put("y", "yy"); 96 | RUBY_TO_JAVA_FORMAT_TABLE.put("Z", "z"); 97 | RUBY_TO_JAVA_FORMAT_TABLE.put("z", "Z"); 98 | //RUBY_TO_JAVA_FORMAT_TABLE.put("%", ""); 99 | 100 | String[] array = RUBY_TO_JAVA_FORMAT_TABLE.keySet().toArray(new String[0]); 101 | StringBuilder keyPatternBuilder = new StringBuilder(array[0]); 102 | for (int i = 1; i < array.length; i++) { 103 | keyPatternBuilder.append(array[i]); 104 | } 105 | IDENTIFIER_PATTERN = Pattern.compile(new StringBuilder() 106 | .append("%[-_^#0-9:]*([") 107 | .append(keyPatternBuilder.toString()) 108 | .append("])") 109 | .toString()); 110 | 111 | NON_IDENTIFIER_PATTERN = Pattern.compile("(^|\\s)([^%\\s]\\S*)"); 112 | } 113 | 114 | // @return returns null if appropriate java format is not available 115 | public static String toJavaFormat(String rubyFormat) 116 | { 117 | String quotedFormat = quoteFormat(rubyFormat); 118 | Matcher match = IDENTIFIER_PATTERN.matcher(quotedFormat); 119 | StringBuffer buf = new StringBuffer(); 120 | while (match.find()) { 121 | String key = match.group(1); 122 | String replacement = RUBY_TO_JAVA_FORMAT_TABLE.get(key); 123 | match.appendReplacement(buf, replacement); 124 | } 125 | match.appendTail(buf); 126 | String javaFormat = buf.toString(); 127 | 128 | if (javaFormat.contains("%")) { 129 | return null; // give up to use java format 130 | } 131 | else { 132 | return javaFormat; 133 | } 134 | } 135 | 136 | private static String quoteFormat(String rubyFormat) 137 | { 138 | Matcher match = NON_IDENTIFIER_PATTERN.matcher(rubyFormat); 139 | StringBuffer buf = new StringBuffer(); 140 | while (match.find()) { 141 | String replacement = new StringBuilder().append(match.group(1)).append("'").append(match.group(2)).append("'").toString(); 142 | match.appendReplacement(buf, replacement); 143 | } 144 | match.appendTail(buf); 145 | return buf.toString(); 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/filter/timestamp_format/TimestampFormatFilterPlugin.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format; 2 | 3 | import com.google.common.base.Optional; 4 | import com.google.common.collect.ImmutableList; 5 | import io.github.medjed.jsonpathcompiler.expressions.path.PathCompiler; 6 | import org.embulk.config.Config; 7 | import org.embulk.config.ConfigDefault; 8 | import org.embulk.config.ConfigException; 9 | import org.embulk.config.ConfigInject; 10 | import org.embulk.config.ConfigSource; 11 | import org.embulk.config.Task; 12 | import org.embulk.config.TaskSource; 13 | 14 | import org.embulk.spi.Column; 15 | import org.embulk.spi.Exec; 16 | import org.embulk.spi.FilterPlugin; 17 | import org.embulk.spi.Page; 18 | import org.embulk.spi.PageBuilder; 19 | import org.embulk.spi.PageOutput; 20 | import org.embulk.spi.PageReader; 21 | import org.embulk.spi.Schema; 22 | 23 | import org.embulk.spi.type.BooleanType; 24 | import org.embulk.spi.type.JsonType; 25 | import org.embulk.spi.type.TimestampType; 26 | import org.embulk.spi.type.Type; 27 | import org.slf4j.Logger; 28 | 29 | import java.util.List; 30 | 31 | public class TimestampFormatFilterPlugin implements FilterPlugin 32 | { 33 | private static final Logger logger = Exec.getLogger(TimestampFormatFilterPlugin.class); 34 | 35 | public TimestampFormatFilterPlugin() {} 36 | 37 | // NOTE: This is not spi.ColumnConfig 38 | interface ColumnConfig extends Task, 39 | TimestampParser.TimestampColumnOption, TimestampFormatter.TimestampColumnOption 40 | { 41 | @Config("name") 42 | String getName(); 43 | 44 | @Config("type") 45 | @ConfigDefault("\"string\"") 46 | Type getType(); 47 | 48 | @Config("from_unit") 49 | @ConfigDefault("null") 50 | Optional getFromUnit(); 51 | 52 | @Config("to_unit") 53 | @ConfigDefault("null") 54 | Optional getToUnit(); 55 | } 56 | 57 | interface PluginTask extends Task, 58 | TimestampParser.Task, TimestampFormatter.Task 59 | { 60 | @Config("columns") 61 | @ConfigDefault("[]") 62 | List getColumns(); 63 | 64 | @Config("stop_on_invalid_record") 65 | @ConfigDefault("false") 66 | Boolean getStopOnInvalidRecord(); 67 | 68 | @Config("timestamp_parser") 69 | @ConfigDefault("\"auto\"") // or auto_java 70 | String getTimeStampParser(); 71 | 72 | @Config("default_from_timestamp_unit") 73 | @ConfigDefault("\"second\"") 74 | TimestampUnit getDefaultFromTimestampUnit(); 75 | 76 | @Config("default_to_timestamp_unit") 77 | @ConfigDefault("\"second\"") 78 | TimestampUnit getDefaultToTimestampUnit(); 79 | } 80 | 81 | @Override 82 | public void transaction(final ConfigSource config, final Schema inputSchema, 83 | final FilterPlugin.Control control) 84 | { 85 | PluginTask task = config.loadConfig(PluginTask.class); 86 | 87 | configure(task, inputSchema); 88 | Schema outputSchema = buildOuputSchema(task, inputSchema); 89 | control.run(task.dump(), outputSchema); 90 | } 91 | 92 | private void configure(PluginTask task, Schema inputSchema) 93 | { 94 | List columns = task.getColumns(); 95 | 96 | // throw if column does not exist 97 | for (ColumnConfig columnConfig : columns) { 98 | String name = columnConfig.getName(); 99 | if (PathCompiler.isProbablyJsonPath(name)) { 100 | String columnName = JsonPathUtil.getColumnName(name); 101 | inputSchema.lookupColumn(columnName); 102 | } 103 | else { 104 | inputSchema.lookupColumn(name); 105 | } 106 | } 107 | 108 | // throw if column type is not supported 109 | for (ColumnConfig columnConfig : columns) { 110 | String name = columnConfig.getName(); 111 | Type type = columnConfig.getType(); 112 | if (type instanceof BooleanType) { 113 | throw new ConfigException(String.format("casting to boolean is not available: \"%s\"", name)); 114 | } 115 | if (type instanceof JsonType) { 116 | throw new ConfigException(String.format("casting to json is not available: \"%s\"", name)); 117 | } 118 | if (PathCompiler.isProbablyJsonPath(name) && type instanceof TimestampType) { 119 | throw new ConfigException(String.format("casting a json path into timestamp is not available: \"%s\"", name)); 120 | } 121 | } 122 | } 123 | 124 | private Schema buildOuputSchema(final PluginTask task, final Schema inputSchema) 125 | { 126 | List columnConfigs = task.getColumns(); 127 | ImmutableList.Builder builder = ImmutableList.builder(); 128 | int i = 0; 129 | for (Column inputColumn : inputSchema.getColumns()) { 130 | String name = inputColumn.getName(); 131 | Type type = inputColumn.getType(); 132 | ColumnConfig columnConfig = getColumnConfig(name, columnConfigs); 133 | if (columnConfig != null) { 134 | type = columnConfig.getType(); 135 | } 136 | Column outputColumn = new Column(i++, name, type); 137 | builder.add(outputColumn); 138 | } 139 | return new Schema(builder.build()); 140 | } 141 | 142 | private ColumnConfig getColumnConfig(String name, List columnConfigs) 143 | { 144 | // hash should be faster, though 145 | for (ColumnConfig columnConfig : columnConfigs) { 146 | if (columnConfig.getName().equals(name)) { 147 | return columnConfig; 148 | } 149 | } 150 | return null; 151 | } 152 | 153 | @Override 154 | public PageOutput open(final TaskSource taskSource, final Schema inputSchema, 155 | final Schema outputSchema, final PageOutput output) 156 | { 157 | final PluginTask task = taskSource.loadTask(PluginTask.class); 158 | 159 | return new PageOutput() { 160 | private PageReader pageReader = new PageReader(inputSchema); 161 | private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output); 162 | private ColumnVisitorImpl visitor = new ColumnVisitorImpl(task, inputSchema, outputSchema, pageReader, pageBuilder); 163 | 164 | @Override 165 | public void finish() 166 | { 167 | pageBuilder.finish(); 168 | } 169 | 170 | @Override 171 | public void close() 172 | { 173 | pageBuilder.close(); 174 | } 175 | 176 | @Override 177 | public void add(Page page) 178 | { 179 | pageReader.setPage(page); 180 | 181 | while (pageReader.nextRecord()) { 182 | inputSchema.visitColumns(visitor); 183 | pageBuilder.addRecord(); 184 | } 185 | } 186 | }; 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/filter/timestamp_format/TimestampFormatter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format; 2 | 3 | import com.google.common.base.Optional; 4 | 5 | import org.embulk.config.Config; 6 | import org.embulk.config.ConfigDefault; 7 | 8 | import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.PluginTask; 9 | 10 | import org.embulk.spi.time.Timestamp; 11 | import org.embulk.spi.util.LineEncoder; 12 | 13 | import org.joda.time.DateTime; 14 | import org.joda.time.DateTimeZone; 15 | import org.jruby.util.RubyDateFormat; 16 | 17 | import java.util.Locale; 18 | 19 | import org.joda.time.format.DateTimeFormat; 20 | import org.joda.time.format.DateTimeFormatter; 21 | 22 | public class TimestampFormatter 23 | { 24 | public interface Task 25 | { 26 | @Config("default_to_timezone") 27 | @ConfigDefault("\"UTC\"") 28 | DateTimeZone getDefaultToTimeZone(); 29 | 30 | @Config("default_to_timestamp_format") 31 | @ConfigDefault("\"%Y-%m-%d %H:%M:%S.%6N %z\"") 32 | String getDefaultToTimestampFormat(); 33 | } 34 | 35 | public interface TimestampColumnOption 36 | { 37 | @Config("to_timezone") 38 | @ConfigDefault("null") 39 | Optional getToTimeZone(); 40 | 41 | @Config("to_format") 42 | @ConfigDefault("null") 43 | Optional getToFormat(); 44 | } 45 | 46 | private final RubyDateFormat jrubyFormatter; 47 | private final DateTimeFormatter javaFormatter; 48 | private boolean handleNanoResolution = false; 49 | private boolean handleMicroResolution = false; 50 | private final DateTimeZone toTimeZone; 51 | 52 | public TimestampFormatter(PluginTask task, Optional columnOption) 53 | { 54 | this(columnOption.isPresent() ? 55 | columnOption.get().getToFormat().or(task.getDefaultToTimestampFormat()) 56 | : task.getDefaultToTimestampFormat(), 57 | columnOption.isPresent() ? 58 | columnOption.get().getToTimeZone().or(task.getDefaultToTimeZone()) 59 | : task.getDefaultToTimeZone()); 60 | } 61 | 62 | public TimestampFormatter(String format, DateTimeZone toTimeZone) 63 | { 64 | this.toTimeZone = toTimeZone; 65 | if (format.contains("%")) { 66 | this.javaFormatter = null; 67 | this.jrubyFormatter = new RubyDateFormat(format, Locale.ENGLISH, true); 68 | } 69 | else { 70 | this.jrubyFormatter = null; 71 | if (format.contains("nnnnnnnnn")) { 72 | this.handleNanoResolution = true; 73 | String newFormat = format.replaceAll("nnnnnnnnn", "'%09d'"); 74 | this.javaFormatter = DateTimeFormat.forPattern(newFormat).withLocale(Locale.ENGLISH).withZone(toTimeZone); 75 | } 76 | else if (format.contains("nnnnnn")) { 77 | this.handleMicroResolution = true; 78 | String newFormat = format.replaceAll("nnnnnn", "'%06d'"); 79 | this.javaFormatter = DateTimeFormat.forPattern(newFormat).withLocale(Locale.ENGLISH).withZone(toTimeZone); 80 | } 81 | else { 82 | this.javaFormatter = DateTimeFormat.forPattern(format).withLocale(Locale.ENGLISH).withZone(toTimeZone); 83 | } 84 | } 85 | } 86 | 87 | public DateTimeZone getToTimeZone() 88 | { 89 | return toTimeZone; 90 | } 91 | 92 | public void format(Timestamp value, LineEncoder encoder) 93 | { 94 | // TODO optimize by directly appending to internal buffer 95 | encoder.addText(format(value)); 96 | } 97 | 98 | public String format(Timestamp value) 99 | { 100 | if (jrubyFormatter != null) { 101 | return jrubyFormat(value); 102 | } 103 | else if (javaFormatter != null) { 104 | return javaFormat(value); 105 | } 106 | else { 107 | assert false; 108 | throw new RuntimeException(); 109 | } 110 | } 111 | 112 | private String jrubyFormat(Timestamp value) 113 | { 114 | // TODO optimize by using reused StringBuilder 115 | jrubyFormatter.setDateTime(new DateTime(value.getEpochSecond() * 1000, toTimeZone)); 116 | jrubyFormatter.setNSec(value.getNano()); 117 | return jrubyFormatter.format(null); 118 | } 119 | 120 | private String javaFormat(Timestamp value) 121 | { 122 | if (handleNanoResolution) { 123 | String datetimeFormatted = javaFormatter.print(value.getEpochSecond() * 1000); 124 | return String.format(datetimeFormatted, value.getNano()); 125 | } 126 | else if (handleMicroResolution) { 127 | String datetimeFormatted = javaFormatter.print(value.getEpochSecond() * 1000); 128 | return String.format(datetimeFormatted, value.getNano() / 1000); 129 | } 130 | else { 131 | long milliSecond = value.getEpochSecond() * 1000 + value.getNano() / 1000000; 132 | return javaFormatter.print(milliSecond); 133 | } 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/filter/timestamp_format/TimestampParser.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format; 2 | 3 | import com.google.common.base.Optional; 4 | 5 | import org.embulk.config.Config; 6 | import org.embulk.config.ConfigDefault; 7 | 8 | import org.embulk.filter.timestamp_format.TimestampFormatFilterPlugin.PluginTask; 9 | 10 | import org.embulk.spi.time.Timestamp; 11 | 12 | import org.embulk.spi.time.TimestampParseException; 13 | import org.joda.time.DateTime; 14 | import org.joda.time.DateTimeZone; 15 | import org.joda.time.format.DateTimeFormatter; 16 | 17 | import java.util.ArrayList; 18 | import java.util.List; 19 | import java.util.Locale; 20 | import java.util.regex.Matcher; 21 | import java.util.regex.Pattern; 22 | 23 | import org.joda.time.format.DateTimeFormat; 24 | import org.jruby.embed.ScriptingContainer; 25 | 26 | public class TimestampParser { 27 | public interface Task { 28 | @Config("default_from_timezone") 29 | @ConfigDefault("\"UTC\"") 30 | DateTimeZone getDefaultFromTimeZone(); 31 | 32 | @Config("default_from_timestamp_format") 33 | @ConfigDefault("[\"%Y-%m-%d %H:%M:%S.%N %z\"]") 34 | List getDefaultFromTimestampFormat(); 35 | } 36 | 37 | public interface TimestampColumnOption { 38 | @Config("from_timezone") 39 | @ConfigDefault("null") 40 | Optional getFromTimeZone(); 41 | 42 | @Config("from_format") 43 | @ConfigDefault("null") 44 | Optional> getFromFormat(); 45 | } 46 | 47 | private final List jrubyParserList = new ArrayList<>(); 48 | private final List javaParserList = new ArrayList<>(); 49 | private final List handleNanoResolutionList = new ArrayList<>(); 50 | private final DateTimeZone defaultFromTimeZone; 51 | private final Pattern nanoSecPattern = Pattern.compile("\\.(\\d+)"); 52 | 53 | TimestampParser(PluginTask task) { 54 | this(task.getDefaultFromTimestampFormat(), task.getDefaultFromTimeZone()); 55 | } 56 | 57 | public TimestampParser(PluginTask task, TimestampColumnOption columnOption) { 58 | this(columnOption.getFromFormat().or(task.getDefaultFromTimestampFormat()), 59 | columnOption.getFromTimeZone().or(task.getDefaultFromTimeZone())); 60 | } 61 | 62 | public TimestampParser(List formatList, DateTimeZone defaultFromTimeZone) { 63 | // TODO get default current time from ExecTask.getExecTimestamp 64 | for (String format : formatList) { 65 | if (format.contains("%")) { 66 | org.embulk.spi.time.TimestampParser parser = createTimestampParser(format, defaultFromTimeZone); 67 | this.jrubyParserList.add(parser); 68 | } else { 69 | // special treatment for nano resolution. n is not originally supported by Joda-Time 70 | if (format.contains("nnnnnnnnn")) { 71 | this.handleNanoResolutionList.add(true); 72 | String newFormat = format.replaceAll("n", "S"); 73 | DateTimeFormatter parser = DateTimeFormat.forPattern(newFormat).withLocale(Locale.ENGLISH).withZone(defaultFromTimeZone); 74 | this.javaParserList.add(parser); 75 | } 76 | else { 77 | this.handleNanoResolutionList.add(false); 78 | DateTimeFormatter parser = DateTimeFormat.forPattern(format).withLocale(Locale.ENGLISH).withZone(defaultFromTimeZone); 79 | this.javaParserList.add(parser); 80 | } 81 | } 82 | } 83 | this.defaultFromTimeZone = defaultFromTimeZone; 84 | } 85 | 86 | public DateTimeZone getDefaultFromTimeZone() { 87 | return defaultFromTimeZone; 88 | } 89 | 90 | public Timestamp parse(String text) throws TimestampParseException, IllegalArgumentException { 91 | if (!jrubyParserList.isEmpty()) { 92 | return jrubyParse(text); 93 | } else if (!javaParserList.isEmpty()) { 94 | return javaParse(text); 95 | } else { 96 | assert false; 97 | throw new RuntimeException(); 98 | } 99 | } 100 | 101 | private Timestamp jrubyParse(String text) throws TimestampParseException { 102 | Timestamp timestamp = null; 103 | TimestampParseException exception = null; 104 | 105 | org.embulk.spi.time.TimestampParser parser = null; 106 | for (org.embulk.spi.time.TimestampParser p : jrubyParserList) { 107 | parser = p; 108 | try { 109 | // NOTE: embulk >= 0.8.27 uses new faster jruby timestamp parser, and it supports nano second 110 | // NOTE: embulk < 0.8.27 uses old slower jruby timestamp parser, and it supports micro second 111 | timestamp = parser.parse(text); 112 | break; 113 | } catch (TimestampParseException ex) { 114 | exception = ex; 115 | } 116 | } 117 | if (timestamp == null) { 118 | throw exception; 119 | } 120 | return timestamp; 121 | } 122 | 123 | private Timestamp javaParse(String text) throws IllegalArgumentException { 124 | long msec = -1; 125 | long nsec = -1; 126 | Boolean handleNanoResolution = false; 127 | IllegalArgumentException exception = null; 128 | 129 | for (int i = 0; i < javaParserList.size(); i++) { 130 | DateTimeFormatter parser = javaParserList.get(i); 131 | handleNanoResolution = handleNanoResolutionList.get(i); 132 | try { 133 | if (handleNanoResolution) { 134 | nsec = parseNano(text); 135 | } 136 | DateTime dateTime = parser.parseDateTime(text); 137 | msec = dateTime.getMillis(); // NOTE: milli second resolution 138 | break; 139 | } catch (IllegalArgumentException ex) { 140 | exception = ex; 141 | } 142 | } 143 | if (msec == -1) { 144 | throw exception; 145 | } 146 | 147 | if (handleNanoResolution) { 148 | long sec = msec / 1000; 149 | return Timestamp.ofEpochSecond(sec, nsec); 150 | } 151 | else { 152 | long nanoAdjustment = msec * 1000000; 153 | return Timestamp.ofEpochSecond(0, nanoAdjustment); 154 | } 155 | } 156 | 157 | private long parseNano(String text) { 158 | long nsec = -1; 159 | Matcher m = nanoSecPattern.matcher(text); 160 | if (m.find()) { 161 | //String nanoStr = String.format("%-9s", m.group(1)).replace(" ", "0"); 162 | //nsec = Long.parseLong(nanoStr); 163 | String nanoStr = m.group(1); 164 | nsec = Long.parseLong(nanoStr) * (long) Math.pow(10, 9 - nanoStr.length()); 165 | } 166 | return nsec; 167 | } 168 | 169 | private class TimestampParserTaskImpl implements org.embulk.spi.time.TimestampParser.Task 170 | { 171 | private final DateTimeZone defaultTimeZone; 172 | private final String defaultTimestampFormat; 173 | private final String defaultDate; 174 | public TimestampParserTaskImpl( 175 | DateTimeZone defaultTimeZone, 176 | String defaultTimestampFormat, 177 | String defaultDate) 178 | { 179 | this.defaultTimeZone = defaultTimeZone; 180 | this.defaultTimestampFormat = defaultTimestampFormat; 181 | this.defaultDate = defaultDate; 182 | } 183 | @Override 184 | public DateTimeZone getDefaultTimeZone() 185 | { 186 | return this.defaultTimeZone; 187 | } 188 | @Override 189 | public String getDefaultTimeZoneId() 190 | { 191 | return this.defaultTimeZone.getID(); 192 | } 193 | @Override 194 | public String getDefaultTimestampFormat() 195 | { 196 | return this.defaultTimestampFormat; 197 | } 198 | @Override 199 | public String getDefaultDate() 200 | { 201 | return this.defaultDate; 202 | } 203 | public ScriptingContainer getJRuby() 204 | { 205 | return null; 206 | } 207 | } 208 | 209 | private class TimestampParserColumnOptionImpl implements org.embulk.spi.time.TimestampParser.TimestampColumnOption 210 | { 211 | private final Optional timeZone; 212 | private final Optional format; 213 | private final Optional date; 214 | public TimestampParserColumnOptionImpl( 215 | Optional timeZone, 216 | Optional format, 217 | Optional date) 218 | { 219 | this.timeZone = timeZone; 220 | this.format = format; 221 | this.date = date; 222 | } 223 | @Override 224 | public Optional getTimeZone() 225 | { 226 | return this.timeZone; 227 | } 228 | @Override 229 | public Optional getTimeZoneId() 230 | { 231 | if (this.timeZone.isPresent()) { 232 | return Optional.of(this.timeZone.get().getID()); 233 | } 234 | else { 235 | return Optional.absent(); 236 | } 237 | } 238 | @Override 239 | public Optional getFormat() 240 | { 241 | return this.format; 242 | } 243 | @Override 244 | public Optional getDate() 245 | { 246 | return this.date; 247 | } 248 | } 249 | 250 | // ToDo: Replace with `TimestampParser.of(Task, TimestampColumnOption)` 251 | // after deciding to drop supporting embulk < 0.8.29. 252 | private org.embulk.spi.time.TimestampParser createTimestampParser(String format, DateTimeZone timezone) 253 | { 254 | return createTimestampParser(format, timezone, "1970-01-01"); 255 | } 256 | 257 | // ToDo: Replace with `TimestampParser.of(Task, TimestampColumnOption)` 258 | // after deciding to drop supporting embulk < 0.8.29. 259 | private org.embulk.spi.time.TimestampParser createTimestampParser(String format, DateTimeZone timezone, String date) 260 | { 261 | TimestampParserTaskImpl task = new TimestampParserTaskImpl(timezone, format, date); 262 | TimestampParserColumnOptionImpl columnOption = new TimestampParserColumnOptionImpl( 263 | Optional.of(timezone), Optional.of(format), Optional.of(date)); 264 | return new org.embulk.spi.time.TimestampParser(task, columnOption); 265 | } 266 | } 267 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/filter/timestamp_format/TimestampUnit.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format; 2 | 3 | import com.fasterxml.jackson.databind.annotation.JsonDeserialize; 4 | import org.embulk.spi.time.Timestamp; 5 | 6 | @JsonDeserialize(using=TimestampUnitDeserializer.class) 7 | public enum TimestampUnit { 8 | Second { 9 | @Override 10 | public int scale() { 11 | return 1; 12 | } 13 | 14 | @Override 15 | public int scaleToNano() { 16 | return 1000000000; 17 | } 18 | }, 19 | MilliSecond { 20 | @Override 21 | public int scale() { 22 | return 1000; 23 | } 24 | 25 | @Override 26 | public int scaleToNano() { 27 | return 1000000; 28 | } 29 | }, 30 | MicroSecond { 31 | @Override 32 | public int scale() { 33 | return 1000000; 34 | } 35 | 36 | @Override 37 | public int scaleToNano() { 38 | return 1000; 39 | } 40 | }, 41 | NanoSecond { 42 | @Override 43 | public int scale() { 44 | return 1000000000; 45 | } 46 | 47 | @Override 48 | public int scaleToNano() { 49 | return 1; 50 | } 51 | }; 52 | 53 | public abstract int scale(); 54 | public abstract int scaleToNano(); 55 | 56 | public static Timestamp toTimestamp(long value, TimestampUnit fromUnit) 57 | { 58 | long nanoAdjustment = value * fromUnit.scaleToNano(); 59 | return Timestamp.ofEpochSecond(0, nanoAdjustment); 60 | } 61 | 62 | public static Timestamp toTimestamp(double value, TimestampUnit fromUnit) 63 | { 64 | long nanoAdjustment = (long) (value * fromUnit.scaleToNano()); 65 | return Timestamp.ofEpochSecond(0, nanoAdjustment); 66 | } 67 | 68 | public static long toLong(Timestamp value, TimestampUnit toUnit) 69 | { 70 | long epochSecond = value.getEpochSecond() * toUnit.scale(); 71 | long nanoIntegerPart = value.getNano() / toUnit.scaleToNano(); 72 | return epochSecond + nanoIntegerPart; 73 | 74 | } 75 | public static double toDouble(Timestamp value, TimestampUnit toUnit) 76 | { 77 | long epochSecond = value.getEpochSecond() * toUnit.scale(); 78 | long nanoIntegerPart = value.getNano() / toUnit.scaleToNano(); 79 | long nanoDecimalPart = value.getNano() - (nanoIntegerPart * toUnit.scaleToNano()); 80 | return epochSecond + nanoIntegerPart + (nanoDecimalPart / (double) toUnit.scaleToNano()); 81 | } 82 | 83 | public static long changeUnit(long value, TimestampUnit fromUnit, TimestampUnit toUnit) 84 | { 85 | if (fromUnit.scale() == toUnit.scale()) { 86 | return value; 87 | } 88 | else if (fromUnit.scale() < toUnit.scale()) { 89 | long factor = toUnit.scale() / fromUnit.scale(); 90 | return value * factor; 91 | } 92 | else { 93 | long divideFactor = fromUnit.scale() / toUnit.scale(); 94 | return value / divideFactor; 95 | } 96 | } 97 | 98 | public static double changeUnit(double value, TimestampUnit fromUnit, TimestampUnit toUnit) 99 | { 100 | if (fromUnit.scale() == toUnit.scale()) { 101 | return value; 102 | } 103 | else if (fromUnit.scale() < toUnit.scale()) { 104 | long factor = toUnit.scale() / fromUnit.scale(); 105 | return value * factor; 106 | } 107 | else { 108 | long divideFactor = fromUnit.scale() / toUnit.scale(); 109 | return value / (double)divideFactor; 110 | } 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/filter/timestamp_format/TimestampUnitDeserializer.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format; 2 | 3 | import java.util.Map; 4 | import java.io.IOException; 5 | import com.google.common.base.Joiner; 6 | import com.google.common.collect.ImmutableMap; 7 | import com.fasterxml.jackson.databind.DeserializationContext; 8 | import com.fasterxml.jackson.databind.deser.std.FromStringDeserializer; 9 | import com.fasterxml.jackson.databind.JsonMappingException; 10 | 11 | public class TimestampUnitDeserializer 12 | extends FromStringDeserializer 13 | { 14 | private static final Map stringToTimestampUnitMap; 15 | 16 | static { 17 | ImmutableMap.Builder builder = ImmutableMap.builder(); 18 | builder.put("Second", TimestampUnit.Second); 19 | builder.put("second", TimestampUnit.Second); 20 | builder.put("sec", TimestampUnit.Second); 21 | builder.put("MilliSecond", TimestampUnit.MilliSecond); 22 | builder.put("millisecond", TimestampUnit.MilliSecond); 23 | builder.put("milli_second", TimestampUnit.MilliSecond); 24 | builder.put("milli", TimestampUnit.MilliSecond); 25 | builder.put("msec", TimestampUnit.MilliSecond); 26 | builder.put("ms", TimestampUnit.MilliSecond); 27 | builder.put("MicroSecond", TimestampUnit.MicroSecond); 28 | builder.put("microsecond", TimestampUnit.MicroSecond); 29 | builder.put("micro_second", TimestampUnit.MicroSecond); 30 | builder.put("micro", TimestampUnit.MicroSecond); 31 | builder.put("usec", TimestampUnit.MicroSecond); 32 | builder.put("us", TimestampUnit.MicroSecond); 33 | builder.put("NanoSecond", TimestampUnit.NanoSecond); 34 | builder.put("nanosecond", TimestampUnit.NanoSecond); 35 | builder.put("nano_second", TimestampUnit.NanoSecond); 36 | builder.put("nano", TimestampUnit.NanoSecond); 37 | builder.put("nsec", TimestampUnit.NanoSecond); 38 | builder.put("ns", TimestampUnit.NanoSecond); 39 | stringToTimestampUnitMap = builder.build(); 40 | } 41 | 42 | public TimestampUnitDeserializer() 43 | { 44 | super(TimestampUnit.class); 45 | } 46 | 47 | @Override 48 | protected TimestampUnit _deserialize(String value, DeserializationContext context) 49 | throws IOException 50 | { 51 | TimestampUnit t = stringToTimestampUnitMap.get(value); 52 | if (t == null) { 53 | throw new JsonMappingException( 54 | String.format("Unknown type name '%s'. Supported types are: %s", 55 | value, 56 | Joiner.on(", ").join(stringToTimestampUnitMap.keySet()))); 57 | } 58 | return t; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/filter/timestamp_format/cast/DoubleCast.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format.cast; 2 | 3 | import org.embulk.filter.timestamp_format.TimestampFormatter; 4 | import org.embulk.filter.timestamp_format.TimestampUnit; 5 | import org.embulk.spi.DataException; 6 | import org.embulk.spi.time.Timestamp; 7 | 8 | public class DoubleCast 9 | { 10 | private DoubleCast() {} 11 | 12 | public static String asString(double value, TimestampUnit fromUnit, TimestampFormatter formatter) throws DataException 13 | { 14 | Timestamp timestamp = TimestampUnit.toTimestamp(value, fromUnit); 15 | return formatter.format(timestamp); 16 | } 17 | 18 | public static Timestamp asTimestamp(double value, TimestampUnit fromUnit) throws DataException 19 | { 20 | return TimestampUnit.toTimestamp(value, fromUnit); 21 | } 22 | 23 | public static long asLong(double value, TimestampUnit fromUnit, TimestampUnit toUnit) throws DataException 24 | { 25 | return (long) TimestampUnit.changeUnit(value, fromUnit, toUnit); 26 | } 27 | 28 | public static double asDouble(double value, TimestampUnit fromUnit, TimestampUnit toUnit) throws DataException 29 | { 30 | return TimestampUnit.changeUnit(value, fromUnit, toUnit); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/filter/timestamp_format/cast/LongCast.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format.cast; 2 | 3 | import org.embulk.filter.timestamp_format.TimestampFormatter; 4 | import org.embulk.filter.timestamp_format.TimestampUnit; 5 | import org.embulk.spi.DataException; 6 | import org.embulk.spi.time.Timestamp; 7 | 8 | public class LongCast 9 | { 10 | private LongCast() {} 11 | 12 | public static String asString(long value, TimestampUnit fromUnit, TimestampFormatter formatter) throws DataException 13 | { 14 | Timestamp timestamp = TimestampUnit.toTimestamp(value, fromUnit); 15 | return formatter.format(timestamp); 16 | } 17 | 18 | public static Timestamp asTimestamp(long value, TimestampUnit fromUnit) throws DataException 19 | { 20 | return TimestampUnit.toTimestamp(value, fromUnit); 21 | } 22 | 23 | public static long asLong(long value, TimestampUnit fromUnit, TimestampUnit toUnit) throws DataException 24 | { 25 | return TimestampUnit.changeUnit(value, fromUnit, toUnit); 26 | } 27 | 28 | public static double asDouble(long value, TimestampUnit fromUnit, TimestampUnit toUnit) throws DataException 29 | { 30 | return (double) TimestampUnit.changeUnit(value, fromUnit, toUnit); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/filter/timestamp_format/cast/StringCast.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format.cast; 2 | 3 | import org.embulk.filter.timestamp_format.TimestampFormatter; 4 | import org.embulk.filter.timestamp_format.TimestampParser; 5 | import org.embulk.filter.timestamp_format.TimestampUnit; 6 | import org.embulk.spi.DataException; 7 | import org.embulk.spi.time.Timestamp; 8 | import org.embulk.spi.time.TimestampParseException; 9 | 10 | public class StringCast 11 | { 12 | private StringCast() {} 13 | 14 | private static String buildErrorMessage(String value) 15 | { 16 | return String.format("failed to parse string: \"%s\"", value); 17 | } 18 | 19 | public static String asString(String value, TimestampParser parser, TimestampFormatter formatter) throws DataException 20 | { 21 | try { 22 | Timestamp timestamp = parser.parse(value); 23 | return formatter.format(timestamp); 24 | } 25 | catch (TimestampParseException ex) { 26 | throw new DataException(buildErrorMessage(value), ex); 27 | } 28 | catch (IllegalArgumentException ex) { 29 | throw new DataException(buildErrorMessage(value), ex); 30 | } 31 | } 32 | 33 | public static Timestamp asTimestamp(String value, TimestampParser parser) throws DataException 34 | { 35 | try { 36 | return parser.parse(value); 37 | } 38 | catch (TimestampParseException ex) { 39 | throw new DataException(buildErrorMessage(value), ex); 40 | } 41 | catch (IllegalArgumentException ex) { 42 | throw new DataException(buildErrorMessage(value), ex); 43 | } 44 | } 45 | 46 | public static long asLong(String value, TimestampParser parser, TimestampUnit toUnit) throws DataException 47 | { 48 | try { 49 | Timestamp timestamp = parser.parse(value); 50 | return TimestampUnit.toLong(timestamp, toUnit); 51 | } 52 | catch (TimestampParseException ex) { 53 | throw new DataException(buildErrorMessage(value), ex); 54 | } 55 | catch (IllegalArgumentException ex) { 56 | throw new DataException(buildErrorMessage(value), ex); 57 | } 58 | } 59 | 60 | public static double asDouble(String value, TimestampParser parser, TimestampUnit toUnit) throws DataException 61 | { 62 | try { 63 | Timestamp timestamp = parser.parse(value); 64 | return TimestampUnit.toDouble(timestamp, toUnit); 65 | } 66 | catch (TimestampParseException ex) { 67 | throw new DataException(buildErrorMessage(value), ex); 68 | } 69 | catch (IllegalArgumentException ex) { 70 | throw new DataException(buildErrorMessage(value), ex); 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/filter/timestamp_format/cast/TimestampCast.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format.cast; 2 | 3 | import org.embulk.filter.timestamp_format.TimestampFormatter; 4 | import org.embulk.filter.timestamp_format.TimestampUnit; 5 | import org.embulk.spi.DataException; 6 | import org.embulk.spi.time.Timestamp; 7 | 8 | public class TimestampCast 9 | { 10 | private TimestampCast() {} 11 | 12 | public static String asString(Timestamp value, TimestampFormatter formatter) throws DataException 13 | { 14 | return formatter.format(value); 15 | } 16 | 17 | public static Timestamp asTimestamp(Timestamp value) throws DataException 18 | { 19 | return value; 20 | } 21 | 22 | public static long asLong(Timestamp value, TimestampUnit toUnit) throws DataException 23 | { 24 | return TimestampUnit.toLong(value, toUnit); 25 | } 26 | 27 | public static double asDouble(Timestamp value, TimestampUnit toUnit) throws DataException 28 | { 29 | return TimestampUnit.toDouble(value, toUnit); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/filter/timestamp_format/TestTimestampFormatConverter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format; 2 | 3 | import org.embulk.EmbulkTestRuntime; 4 | 5 | import org.embulk.spi.time.Timestamp; 6 | import org.joda.time.DateTimeZone; 7 | 8 | import org.junit.Before; 9 | import org.junit.Rule; 10 | import org.junit.Test; 11 | 12 | import java.util.Arrays; 13 | import java.util.Map; 14 | 15 | import static org.junit.Assert.assertEquals; 16 | import static org.junit.Assert.fail; 17 | 18 | public class TestTimestampFormatConverter 19 | { 20 | @Rule 21 | public EmbulkTestRuntime runtime = new EmbulkTestRuntime(); 22 | public DateTimeZone zone; 23 | public Timestamp timestamp; 24 | 25 | @Before 26 | public void createResource() 27 | { 28 | zone = DateTimeZone.UTC; 29 | timestamp = Timestamp.ofEpochSecond(1463130159, 123456789); 30 | } 31 | 32 | @Test 33 | public void testRUBY_TO_JAVA_FORMAT_TABLE() 34 | { 35 | for(Map.Entry entry : TimestampFormatConverter.RUBY_TO_JAVA_FORMAT_TABLE.entrySet()) { 36 | String rubyFormat = "%" + entry.getKey(); 37 | String javaFormat = entry.getValue(); 38 | 39 | TimestampFormatter rubyFormatter = new TimestampFormatter(rubyFormat, zone); 40 | TimestampFormatter javaFormatter = new TimestampFormatter(javaFormat, zone); 41 | String rubyFormatted = rubyFormatter.format(timestamp); 42 | String javaFormatted = javaFormatter.format(timestamp); 43 | // System.out.println(String.format("%s<%s> %s<%s>", rubyFormat, rubyFormatted, javaFormat, javaFormatted)); 44 | 45 | TimestampParser rubyParser = new TimestampParser(Arrays.asList("." + rubyFormat), zone); 46 | TimestampParser javaParser = new TimestampParser(Arrays.asList("." + javaFormat), zone); 47 | Timestamp rubyParsed = rubyParser.parse("." + rubyFormatted); 48 | try { 49 | Timestamp javaParsed = javaParser.parse("." + rubyFormatted); 50 | } 51 | catch (IllegalArgumentException ex) { 52 | fail(String.format("Parse \"%s\" with java format \"%s\" failed (corresponding ruby format \"%s\")", rubyFormatted, javaFormat, rubyFormat)); 53 | } 54 | } 55 | } 56 | 57 | @Test 58 | public void testToJavaFormat() 59 | { 60 | for(Map.Entry entry : TimestampFormatConverter.RUBY_TO_JAVA_FORMAT_TABLE.entrySet()) { 61 | String rubyFormat = "%-2" + entry.getKey(); 62 | String javaFormat = entry.getValue(); 63 | assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat)); 64 | } 65 | } 66 | 67 | @Test 68 | public void testToJavaFormats() 69 | { 70 | { 71 | String rubyFormat = "%Y-%m-%d %H:%M:%S.%6N %:z"; 72 | String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn Z"; 73 | assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat)); 74 | 75 | TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone); 76 | try { 77 | parser.parse("2016-05-12 20:14:13.123456789 +09:00"); 78 | } 79 | catch (IllegalArgumentException ex) { 80 | fail(); 81 | } 82 | } 83 | { 84 | String rubyFormat = "%Y-%m-%d %H:%M:%S.%6N UTC"; 85 | String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn 'UTC'"; 86 | assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat)); 87 | 88 | TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone); 89 | try { 90 | parser.parse("2016-05-12 20:14:13.123456789 UTC"); 91 | } 92 | catch (IllegalArgumentException ex) { 93 | fail(); 94 | } 95 | } 96 | { 97 | String rubyFormat = "%Y-%m-%d %H:%M:%S.%6N +00:00"; 98 | String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn '+00:00'"; 99 | assertEquals(javaFormat, TimestampFormatConverter.toJavaFormat(rubyFormat)); 100 | 101 | TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone); 102 | try { 103 | parser.parse("2016-05-12 20:14:13.123456789 +00:00"); 104 | } 105 | catch (IllegalArgumentException ex) { 106 | fail(); 107 | } 108 | } 109 | } 110 | } 111 | 112 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/filter/timestamp_format/TestTimestampParser.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format; 2 | 3 | import org.embulk.EmbulkTestRuntime; 4 | 5 | import org.embulk.spi.time.Timestamp; 6 | import org.joda.time.DateTimeZone; 7 | 8 | import org.junit.Before; 9 | import org.junit.Rule; 10 | import org.junit.Test; 11 | 12 | import java.util.Arrays; 13 | 14 | import static org.junit.Assert.assertEquals; 15 | import static org.junit.Assert.fail; 16 | 17 | public class TestTimestampParser 18 | { 19 | public DateTimeZone zone; 20 | public Timestamp expected; 21 | 22 | @Before 23 | public void createResource() 24 | { 25 | zone = DateTimeZone.UTC; 26 | expected = Timestamp.ofEpochSecond(1463065359, 123456789); 27 | } 28 | 29 | @Test 30 | public void testJRubyParser() 31 | { 32 | String rubyFormat = "%Y-%m-%d %H:%M:%S.%N %:z"; 33 | 34 | TimestampParser parser = new TimestampParser(Arrays.asList(rubyFormat), zone); 35 | try { 36 | Timestamp actual = parser.parse("2016-05-13 00:02:39.123456789 +09:00"); 37 | // embulk >= 0.8.27 uses new faster jruby Timestamp parser, and it support nano second 38 | // embulk < 0.8.27 uses old slow jruby Timestamp parser, and it does not support nano seconds 39 | //assertEquals(expected, actual); 40 | } 41 | catch (IllegalArgumentException ex) { 42 | fail(); 43 | } 44 | } 45 | 46 | @Test 47 | public void testJavaParser() 48 | { 49 | String javaFormat = "yyyy-MM-dd HH:mm:ss.nnnnnnnnn Z"; 50 | 51 | TimestampParser parser = new TimestampParser(Arrays.asList(javaFormat), zone); 52 | try { 53 | Timestamp actual = parser.parse("2016-05-13 00:02:39.123456789 +09:00"); 54 | assertEquals(expected, actual); 55 | } 56 | catch (IllegalArgumentException ex) { 57 | fail(); 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/filter/timestamp_format/TestTimestampUnit.java: -------------------------------------------------------------------------------- 1 | package org.embulk.filter.timestamp_format; 2 | 3 | import org.embulk.spi.time.Timestamp; 4 | import org.junit.Test; 5 | 6 | import static org.junit.Assert.assertEquals; 7 | 8 | public class TestTimestampUnit 9 | { 10 | @Test 11 | public void testLongToTimestamp() 12 | { 13 | long epochSecond = 1462087147L; 14 | long epochNanoSecond = 1462087147100200300L; 15 | Timestamp timestamp; 16 | 17 | timestamp = TimestampUnit.toTimestamp(epochNanoSecond / 1000000000, TimestampUnit.Second); 18 | assertEquals(epochSecond, timestamp.getEpochSecond()); 19 | assertEquals(0, timestamp.getNano()); 20 | 21 | timestamp = TimestampUnit.toTimestamp(epochNanoSecond / 1000000, TimestampUnit.MilliSecond); 22 | assertEquals(epochSecond, timestamp.getEpochSecond()); 23 | assertEquals(100000000, timestamp.getNano()); 24 | 25 | timestamp = TimestampUnit.toTimestamp(epochNanoSecond / 1000, TimestampUnit.MicroSecond); 26 | assertEquals(epochSecond, timestamp.getEpochSecond()); 27 | assertEquals(100200000, timestamp.getNano()); 28 | 29 | timestamp = TimestampUnit.toTimestamp(epochNanoSecond, TimestampUnit.NanoSecond); 30 | assertEquals(epochSecond, timestamp.getEpochSecond()); 31 | assertEquals(100200300, timestamp.getNano()); 32 | } 33 | 34 | @Test 35 | public void testDoubleToTimestamp() 36 | { 37 | long epochSecond = 1462087147L; 38 | double epochNanoSecond = 1462087147100200192.0; 39 | Timestamp timestamp; 40 | 41 | timestamp = TimestampUnit.toTimestamp(epochNanoSecond / 1000000000, TimestampUnit.Second); 42 | assertEquals(epochSecond, timestamp.getEpochSecond()); 43 | assertEquals(100200192, timestamp.getNano(), 200); 44 | 45 | timestamp = TimestampUnit.toTimestamp(epochNanoSecond / 1000000, TimestampUnit.MilliSecond); 46 | assertEquals(epochSecond, timestamp.getEpochSecond()); 47 | assertEquals(100200192, timestamp.getNano(), 200); 48 | 49 | timestamp = TimestampUnit.toTimestamp(epochNanoSecond / 1000, TimestampUnit.MicroSecond); 50 | assertEquals(epochSecond, timestamp.getEpochSecond()); 51 | assertEquals(100200192, timestamp.getNano(), 200); 52 | 53 | timestamp = TimestampUnit.toTimestamp(epochNanoSecond, TimestampUnit.NanoSecond); 54 | assertEquals(epochSecond, timestamp.getEpochSecond()); 55 | assertEquals(100200192, timestamp.getNano()); 56 | } 57 | 58 | @Test 59 | public void testTimestampToLong() 60 | { 61 | long epochNanoSecond = 1462087147100200300L; 62 | Timestamp timestamp = Timestamp.ofEpochSecond(0, epochNanoSecond); 63 | long value; 64 | 65 | value = TimestampUnit.toLong(timestamp, TimestampUnit.Second); 66 | assertEquals(epochNanoSecond / 1000000000, value); 67 | 68 | value = TimestampUnit.toLong(timestamp, TimestampUnit.MilliSecond); 69 | assertEquals(epochNanoSecond / 1000000, value); 70 | 71 | value = TimestampUnit.toLong(timestamp, TimestampUnit.MicroSecond); 72 | assertEquals(epochNanoSecond / 1000, value); 73 | 74 | value = TimestampUnit.toLong(timestamp, TimestampUnit.NanoSecond); 75 | assertEquals(epochNanoSecond, value); 76 | } 77 | 78 | @Test 79 | public void testTimestampToDouble() 80 | { 81 | long epochNanoSecond = 1462087147100200192L; 82 | Timestamp timestamp = Timestamp.ofEpochSecond(0, epochNanoSecond); 83 | double value; 84 | 85 | value = TimestampUnit.toDouble(timestamp, TimestampUnit.Second); 86 | assertEquals(epochNanoSecond / 1000000000.0, value, 2); 87 | 88 | value = TimestampUnit.toDouble(timestamp, TimestampUnit.MilliSecond); 89 | assertEquals(epochNanoSecond / 1000000.0, value, 2); 90 | 91 | value = TimestampUnit.toDouble(timestamp, TimestampUnit.MicroSecond); 92 | assertEquals(epochNanoSecond / 1000.0, value, 2); 93 | 94 | value = TimestampUnit.toDouble(timestamp, TimestampUnit.NanoSecond); 95 | assertEquals(epochNanoSecond / 1.0, value, 2); 96 | } 97 | 98 | @Test 99 | public void testLongChangeUnit() 100 | { 101 | long epochNanoSecond = 1462087147100200300L; 102 | long value; 103 | 104 | // from second 105 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000000000, TimestampUnit.Second, TimestampUnit.Second); 106 | assertEquals(epochNanoSecond / 1000000000, value); 107 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000000000, TimestampUnit.Second, TimestampUnit.MilliSecond); 108 | assertEquals(epochNanoSecond / 1000000000 * 1000, value); 109 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000000000, TimestampUnit.Second, TimestampUnit.MicroSecond); 110 | assertEquals(epochNanoSecond / 1000000000 * 1000000, value); 111 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000000000, TimestampUnit.Second, TimestampUnit.NanoSecond); 112 | assertEquals(epochNanoSecond / 1000000000 * 1000000000, value); 113 | 114 | // from milli second 115 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000000, TimestampUnit.MilliSecond, TimestampUnit.Second); 116 | assertEquals(epochNanoSecond / 1000000 / 1000, value); 117 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000000, TimestampUnit.MilliSecond, TimestampUnit.MilliSecond); 118 | assertEquals(epochNanoSecond / 1000000, value); 119 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000000, TimestampUnit.MilliSecond, TimestampUnit.MicroSecond); 120 | assertEquals(epochNanoSecond / 1000000 * 1000, value); 121 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000000, TimestampUnit.MilliSecond, TimestampUnit.NanoSecond); 122 | assertEquals(epochNanoSecond / 1000000 * 1000000, value); 123 | 124 | // from micro second 125 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000, TimestampUnit.MicroSecond, TimestampUnit.Second); 126 | assertEquals(epochNanoSecond / 1000 / 1000000, value); 127 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000, TimestampUnit.MicroSecond, TimestampUnit.MilliSecond); 128 | assertEquals(epochNanoSecond / 1000 / 1000, value); 129 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000, TimestampUnit.MicroSecond, TimestampUnit.MicroSecond); 130 | assertEquals(epochNanoSecond / 1000, value); 131 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000, TimestampUnit.MicroSecond, TimestampUnit.NanoSecond); 132 | assertEquals(epochNanoSecond / 1000 * 1000, value); 133 | 134 | // from nano second 135 | value = TimestampUnit.changeUnit(epochNanoSecond, TimestampUnit.NanoSecond, TimestampUnit.Second); 136 | assertEquals(epochNanoSecond / 1000000000, value); 137 | value = TimestampUnit.changeUnit(epochNanoSecond, TimestampUnit.NanoSecond, TimestampUnit.MilliSecond); 138 | assertEquals(epochNanoSecond / 1000000, value); 139 | value = TimestampUnit.changeUnit(epochNanoSecond, TimestampUnit.NanoSecond, TimestampUnit.MicroSecond); 140 | assertEquals(epochNanoSecond / 1000, value); 141 | value = TimestampUnit.changeUnit(epochNanoSecond, TimestampUnit.NanoSecond, TimestampUnit.NanoSecond); 142 | assertEquals(epochNanoSecond, value); 143 | } 144 | 145 | @Test 146 | public void testDoubleChangeUnit() 147 | { 148 | double epochNanoSecond = 1462087147100200192L; 149 | double value; 150 | 151 | // from second 152 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000000000, TimestampUnit.Second, TimestampUnit.Second); 153 | assertEquals(epochNanoSecond / 1000000000, value, 2); 154 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000000000, TimestampUnit.Second, TimestampUnit.MilliSecond); 155 | assertEquals(epochNanoSecond / 1000000000 * 1000, value, 2); 156 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000000000, TimestampUnit.Second, TimestampUnit.MicroSecond); 157 | assertEquals(epochNanoSecond / 1000000000 * 1000000, value, 2); 158 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000000000, TimestampUnit.Second, TimestampUnit.NanoSecond); 159 | assertEquals(epochNanoSecond / 1000000000 * 1000000000, value, 2); 160 | 161 | // from milli second 162 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000000, TimestampUnit.MilliSecond, TimestampUnit.Second); 163 | assertEquals(epochNanoSecond / 1000000 / 1000, value ,2); 164 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000000, TimestampUnit.MilliSecond, TimestampUnit.MilliSecond); 165 | assertEquals(epochNanoSecond / 1000000, value, 2); 166 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000000, TimestampUnit.MilliSecond, TimestampUnit.MicroSecond); 167 | assertEquals(epochNanoSecond / 1000000 * 1000, value, 2); 168 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000000, TimestampUnit.MilliSecond, TimestampUnit.NanoSecond); 169 | assertEquals(epochNanoSecond / 1000000 * 1000000, value, 2); 170 | 171 | // from micro second 172 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000, TimestampUnit.MicroSecond, TimestampUnit.Second); 173 | assertEquals(epochNanoSecond / 1000 / 1000000, value, 2); 174 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000, TimestampUnit.MicroSecond, TimestampUnit.MilliSecond); 175 | assertEquals(epochNanoSecond / 1000 / 1000, value, 2); 176 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000, TimestampUnit.MicroSecond, TimestampUnit.MicroSecond); 177 | assertEquals(epochNanoSecond / 1000, value, 2); 178 | value = TimestampUnit.changeUnit(epochNanoSecond / 1000, TimestampUnit.MicroSecond, TimestampUnit.NanoSecond); 179 | assertEquals(epochNanoSecond / 1000 * 1000, value, 2); 180 | 181 | // from nano second 182 | value = TimestampUnit.changeUnit(epochNanoSecond, TimestampUnit.NanoSecond, TimestampUnit.Second); 183 | assertEquals(epochNanoSecond / 1000000000, value, 2); 184 | value = TimestampUnit.changeUnit(epochNanoSecond, TimestampUnit.NanoSecond, TimestampUnit.MilliSecond); 185 | assertEquals(epochNanoSecond / 1000000, value, 2); 186 | value = TimestampUnit.changeUnit(epochNanoSecond, TimestampUnit.NanoSecond, TimestampUnit.MicroSecond); 187 | assertEquals(epochNanoSecond / 1000, value, 2); 188 | value = TimestampUnit.changeUnit(epochNanoSecond, TimestampUnit.NanoSecond, TimestampUnit.NanoSecond); 189 | assertEquals(epochNanoSecond, value, 2); 190 | } 191 | 192 | } 193 | --------------------------------------------------------------------------------