├── .gitignore ├── CHANGELOG.md ├── Gemfile ├── LICENSE.txt ├── README.md ├── Rakefile ├── bench ├── gen_dummy.rb ├── typecast.yml └── without_typecast.yml ├── build.gradle ├── embulk-parser-jsonl.gemspec ├── example ├── compat.yml ├── example.yml ├── example_without_typecast.yml └── sample.json ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── lib └── embulk │ ├── guess │ └── jsonl.rb │ └── parser │ └── jsonl.rb ├── settings.gradle └── src ├── main └── java │ └── org │ └── embulk │ └── parser │ └── jsonl │ ├── ColumnCaster.java │ ├── ColumnVisitorImpl.java │ ├── JsonRecordValidateException.java │ ├── JsonlParserPlugin.java │ └── cast │ ├── BooleanCast.java │ ├── DoubleCast.java │ ├── JsonCast.java │ ├── LongCast.java │ └── StringCast.java └── test ├── java └── org │ └── embulk │ └── parser │ └── jsonl │ ├── TestColumnCaster.java │ ├── TestJsonlParserPlugin.java │ └── cast │ ├── TestBooleanCast.java │ ├── TestDoubleCast.java │ ├── TestJsonCast.java │ ├── TestLongCast.java │ └── TestStringCast.java └── resources └── org └── embulk └── parser └── jsonl └── use_column_options.yml /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | /pkg/ 3 | /tmp/ 4 | /.bundle/ 5 | build/ 6 | /classpath/ 7 | /.gradle 8 | /Gemfile.lock 9 | 10 | /.idea 11 | *.iml 12 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.2.0 - 2016-05-28 2 | 3 | [new feature] Support typecast option [#9](https://github.com/shun0102/embulk-parser-jsonl/pull/9) 4 | 5 | ## 0.1.2 - 2016-03-27 6 | 7 | [new feature] Support column_options option [#4](https://github.com/shun0102/embulk-parser-jsonl/pull/4) 8 | [maintenance] Upgrade Embulk v0.8.8 [#6](https://github.com/shun0102/embulk-parser-jsonl/pull/6)## 0.1.2 - 2016-03-27 9 | 10 | ## 0.1.1 - 2016-03-17 11 | 12 | [fix bug] Avoid org.embulk.spi.json.JsonParseException: Unable to parse empty string [#5](https://github.com/shun0102/embulk-parser-jsonl/pull/5) 13 | 14 | ## 0.1.0 - 2016-02-22 15 | 16 | Upgrade Embulk v0.8 and support Json type in Java [#3](https://github.com/shun0102/embulk-parser-jsonl/pull/3) 17 | 18 | ## 0.0.1 - 2015-04-04 19 | 20 | The first release!! 21 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org/' 2 | gemspec 3 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | MIT License 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining 5 | a copy of this software and associated documentation files (the 6 | "Software"), to deal in the Software without restriction, including 7 | without limitation the rights to use, copy, modify, merge, publish, 8 | distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so, subject to 10 | the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 19 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Announcement 2 | 3 | From Embulk v0.9.16 (Released on 2019/03/11) has started to support [built-in json parser](https://www.embulk.org/docs/built-in.html). 4 | 5 | * The embulk built-in json parser has same behavior for the columns option. 6 | * The embulk built-in json parser does not support automatic type conversion with the default_typecast option. 7 | 8 | For more detail you can visit this [article](https://qiita.com/kamatama_41/items/ebb2ae0548e2a4863397#fn1) 9 | 10 | # Jsonl parser plugin for Embulk 11 | 12 | [JSONL (JSON Lines)](http://jsonlines.org/) parser plugin for Embulk 13 | 14 | ## Overview 15 | 16 | * **Plugin type**: parser 17 | * **Guess supported**: yes 18 | 19 | ## Configuration 20 | 21 | - **type**: Specify this parser as jsonl 22 | - **columns**: Specify column name and type. See below (array, required) 23 | * **stop_on_invalid_record**: Stop bulk load transaction if a file includes invalid record (such as invalid timestamp) (boolean, default: false) 24 | * **default_timezone**: Default timezone of the timestamp (string, default: UTC) 25 | * **default_timestamp_format**: Default timestamp format of the timestamp (string, default: `%Y-%m-%d %H:%M:%S.%N %z`) 26 | * **newline**: Newline character (CRLF, LF or CR) (string, default: CRLF) 27 | * **charset**: Character encoding (eg. ISO-8859-1, UTF-8) (string, default: UTF-8) 28 | * **default_typecast**: Specify whether to cast values automatically to the specified types or not (boolean, default: true) 29 | * **min_rows_for_guess**: Minimum rows for guess (integer, default: 4) 30 | 31 | ### columns 32 | 33 | * **name**: Name of the column (string, required) 34 | * **type**: Type of the column (string, required) 35 | * **timezone**: Timezone of the timestamp if type is timestamp (string, default: default_timestamp) 36 | * **format**: Format of the timestamp if type is timestamp (string, default: default_format) 37 | * **typecast**: Whether cast values or not (boolean, default: default_typecast) 38 | 39 | ## Guess 40 | 41 | (If guess supported) you don't have to write `parser:` section in the configuration file. After writing `in:` section, you can let embulk guess `parser:` section using this command: 42 | 43 | ``` 44 | $ embulk gem install embulk-parser-jsonl 45 | $ embulk guess -g jsonl config.yml -o guessed.yml 46 | ``` 47 | 48 | ## Build 49 | 50 | ``` 51 | $ ./gradlew gem classpath 52 | ``` 53 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | 3 | task default: :build 4 | -------------------------------------------------------------------------------- /bench/gen_dummy.rb: -------------------------------------------------------------------------------- 1 | File::open('bench/sample.jsonl', 'w') { |f| 2 | (1..1000000).each { 3 | f.puts(%Q[{"foo":"foo","bool":true,"bool_str":"true","int":10,"int_str":"20","double":1.5,"double_str":"2.5","array":[1,2,3]}]) 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /bench/typecast.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: "bench/sample" 4 | parser: 5 | type: jsonl 6 | default_typecast: true 7 | columns: 8 | - {name: "foo", type: "string"} 9 | - {name: "bool", type: "boolean"} 10 | - {name: "bool_str", type: "boolean"} 11 | - {name: "int", type: "long"} 12 | - {name: "int_str", type: "long"} 13 | - {name: "double", type: "double"} 14 | - {name: "double_str", type: "double"} 15 | - {name: "array", type: "json"} 16 | out: 17 | type: "null" 18 | -------------------------------------------------------------------------------- /bench/without_typecast.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: "bench/sample" 4 | parser: 5 | type: jsonl 6 | default_typecast: false 7 | columns: 8 | - {name: "foo", type: "string"} 9 | - {name: "bool", type: "boolean"} 10 | - {name: "bool_str", type: "string"} 11 | - {name: "int", type: "long"} 12 | - {name: "int_str", type: "string"} 13 | - {name: "double", type: "double"} 14 | - {name: "double_str", type: "string"} 15 | - {name: "array", type: "json"} 16 | out: 17 | type: "null" 18 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id "com.jfrog.bintray" version "1.1" 3 | id "com.github.jruby-gradle.base" version "0.1.5" 4 | id "java" 5 | id "jacoco" 6 | } 7 | import com.github.jrubygradle.JRubyExec 8 | repositories { 9 | mavenCentral() 10 | jcenter() 11 | } 12 | configurations { 13 | provided 14 | } 15 | 16 | version = "0.2.0" 17 | 18 | compileJava.options.encoding = 'UTF-8' // source encoding 19 | sourceCompatibility = 1.7 20 | targetCompatibility = 1.7 21 | 22 | dependencies { 23 | compile "org.embulk:embulk-core:0.8.8" 24 | provided "org.embulk:embulk-core:0.8.8" 25 | 26 | testCompile "junit:junit:4.+" 27 | testCompile "org.embulk:embulk-core:0.8.8:tests" 28 | testCompile "org.embulk:embulk-standards:0.8.8" 29 | } 30 | 31 | task classpath(type: Copy, dependsOn: ["jar"]) { 32 | doFirst { file("classpath").deleteDir() } 33 | from (configurations.runtime - configurations.provided + files(jar.archivePath)) 34 | into "classpath" 35 | } 36 | clean { delete "classpath" } 37 | 38 | task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) { 39 | jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build" 40 | script "${project.name}.gemspec" 41 | doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") } 42 | } 43 | 44 | task gemPush(type: JRubyExec, dependsOn: ["gem"]) { 45 | jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push" 46 | script "pkg/${project.name}-${project.version}.gem" 47 | } 48 | 49 | task "package"(dependsOn: ["gemspec", "classpath"]) << { 50 | println "> Build succeeded." 51 | println "> You can run embulk with '-L ${file(".").absolutePath}' argument." 52 | } 53 | 54 | task gemspec { 55 | ext.gemspecFile = file("${project.name}.gemspec") 56 | inputs.file "build.gradle" 57 | outputs.file gemspecFile 58 | doLast { gemspecFile.write($/ 59 | Gem::Specification.new do |spec| 60 | spec.name = "${project.name}" 61 | spec.version = "${project.version}" 62 | spec.authors = ["Shunsuke Mikami"] 63 | spec.summary = "Jsonl parser plugin for Embulk" 64 | spec.description = "Parses Jsonl files read by other file input plugins." 65 | spec.email = ["shun0102@gmail.com"] 66 | spec.licenses = ["MIT"] 67 | spec.homepage = "https://github.com/shun0102/embulk-parser-jsonl" 68 | 69 | spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"] 70 | spec.test_files = spec.files.grep(%r{^(test|spec)/}) 71 | spec.require_paths = ["lib"] 72 | 73 | spec.add_development_dependency 'bundler', ['~> 1.0'] 74 | spec.add_development_dependency 'rake', ['~> 10.0'] 75 | end 76 | /$) 77 | } 78 | } 79 | clean { delete "${project.name}.gemspec" } 80 | -------------------------------------------------------------------------------- /embulk-parser-jsonl.gemspec: -------------------------------------------------------------------------------- 1 | 2 | Gem::Specification.new do |spec| 3 | spec.name = "embulk-parser-jsonl" 4 | spec.version = "0.2.0" 5 | spec.authors = ["Shunsuke Mikami"] 6 | spec.summary = "Jsonl parser plugin for Embulk" 7 | spec.description = "Parses Jsonl files read by other file input plugins." 8 | spec.email = ["shun0102@gmail.com"] 9 | spec.licenses = ["MIT"] 10 | spec.homepage = "https://github.com/shun0102/embulk-parser-jsonl" 11 | 12 | spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"] 13 | spec.test_files = spec.files.grep(%r{^(test|spec)/}) 14 | spec.require_paths = ["lib"] 15 | 16 | spec.add_development_dependency 'bundler', ['~> 1.0'] 17 | spec.add_development_dependency 'rake', ['~> 10.0'] 18 | end 19 | -------------------------------------------------------------------------------- /example/compat.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: "example/sample" 4 | parser: 5 | type: jsonl 6 | columns: 7 | - {name: "foo", type: "string"} 8 | - {name: "bool", type: "boolean"} 9 | - {name: "bool_str", type: "boolean"} 10 | - {name: "int", type: "string"} 11 | - {name: "int_str", type: "string"} 12 | - {name: "time", type: "timestamp", format: '%Y-%m-%d %H:%M:%S'} 13 | - {name: "double", type: "double"} 14 | - {name: "double_str", type: "string"} 15 | - {name: "array", type: "json"} 16 | column_options: 17 | bool_str: {type: "boolean"} 18 | int_str: {type: "long"} 19 | double_str: {type: "double"} 20 | out: 21 | type: stdout 22 | -------------------------------------------------------------------------------- /example/example.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: "example/sample" 4 | parser: 5 | type: jsonl 6 | # default_typecast: true # default: true 7 | columns: 8 | - {name: "foo", type: "string"} 9 | - {name: "bool", type: "boolean"} 10 | - {name: "bool_str", type: "boolean"} 11 | - {name: "int", type: "long"} 12 | - {name: "int_str", type: "long"} 13 | - {name: "time", type: "timestamp", format: '%Y-%m-%d %H:%M:%S'} 14 | - {name: "double", type: "double"} 15 | - {name: "double_str", type: "double"} 16 | - {name: "array", type: "json"} 17 | out: 18 | type: stdout 19 | -------------------------------------------------------------------------------- /example/example_without_typecast.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: file 3 | path_prefix: "example/sample" 4 | parser: 5 | type: jsonl 6 | default_typecast: false 7 | columns: 8 | - {name: "foo", type: "string"} 9 | - {name: "bool", type: "boolean"} 10 | - {name: "bool_str", type: "string"} 11 | - {name: "int", type: "long"} 12 | - {name: "int_str", type: "string"} 13 | - {name: "time", type: "timestamp", format: '%Y-%m-%d %H:%M:%S'} 14 | - {name: "double", type: "double"} 15 | - {name: "double_str", type: "string"} 16 | - {name: "array", type: "json"} 17 | out: 18 | type: stdout 19 | -------------------------------------------------------------------------------- /example/sample.json: -------------------------------------------------------------------------------- 1 | {"foo": "bar", "bool": true, "bool_str": "true", "int": 42, "int_str": "42", "time": "2016-3-2 00:39:18", "double": 2.4, "double_str": "2.4", "array": [1, 2, 3]} 2 | {"foo": null, "bool": false, "bool_str": "false", "int": 42, "int_str": "42", "time": "2016-3-2 00:39:18", "double": 2.4, "double_str": "2.4", "array": [{"inner": "hoge"}, {"inner": 1.5}]} 3 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shun0102/embulk-parser-jsonl/21499b6b6f3614e7b86ea8027153aa8cd1f63db0/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Tue Aug 11 00:26:20 PDT 2015 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-2.6-bin.zip 7 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 10 | DEFAULT_JVM_OPTS="" 11 | 12 | APP_NAME="Gradle" 13 | APP_BASE_NAME=`basename "$0"` 14 | 15 | # Use the maximum available, or set MAX_FD != -1 to use that value. 16 | MAX_FD="maximum" 17 | 18 | warn ( ) { 19 | echo "$*" 20 | } 21 | 22 | die ( ) { 23 | echo 24 | echo "$*" 25 | echo 26 | exit 1 27 | } 28 | 29 | # OS specific support (must be 'true' or 'false'). 30 | cygwin=false 31 | msys=false 32 | darwin=false 33 | case "`uname`" in 34 | CYGWIN* ) 35 | cygwin=true 36 | ;; 37 | Darwin* ) 38 | darwin=true 39 | ;; 40 | MINGW* ) 41 | msys=true 42 | ;; 43 | esac 44 | 45 | # For Cygwin, ensure paths are in UNIX format before anything is touched. 46 | if $cygwin ; then 47 | [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"` 48 | fi 49 | 50 | # Attempt to set APP_HOME 51 | # Resolve links: $0 may be a link 52 | PRG="$0" 53 | # Need this for relative symlinks. 54 | while [ -h "$PRG" ] ; do 55 | ls=`ls -ld "$PRG"` 56 | link=`expr "$ls" : '.*-> \(.*\)$'` 57 | if expr "$link" : '/.*' > /dev/null; then 58 | PRG="$link" 59 | else 60 | PRG=`dirname "$PRG"`"/$link" 61 | fi 62 | done 63 | SAVED="`pwd`" 64 | cd "`dirname \"$PRG\"`/" >&- 65 | APP_HOME="`pwd -P`" 66 | cd "$SAVED" >&- 67 | 68 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 69 | 70 | # Determine the Java command to use to start the JVM. 71 | if [ -n "$JAVA_HOME" ] ; then 72 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 73 | # IBM's JDK on AIX uses strange locations for the executables 74 | JAVACMD="$JAVA_HOME/jre/sh/java" 75 | else 76 | JAVACMD="$JAVA_HOME/bin/java" 77 | fi 78 | if [ ! -x "$JAVACMD" ] ; then 79 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 80 | 81 | Please set the JAVA_HOME variable in your environment to match the 82 | location of your Java installation." 83 | fi 84 | else 85 | JAVACMD="java" 86 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 87 | 88 | Please set the JAVA_HOME variable in your environment to match the 89 | location of your Java installation." 90 | fi 91 | 92 | # Increase the maximum file descriptors if we can. 93 | if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then 94 | MAX_FD_LIMIT=`ulimit -H -n` 95 | if [ $? -eq 0 ] ; then 96 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 97 | MAX_FD="$MAX_FD_LIMIT" 98 | fi 99 | ulimit -n $MAX_FD 100 | if [ $? -ne 0 ] ; then 101 | warn "Could not set maximum file descriptor limit: $MAX_FD" 102 | fi 103 | else 104 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 105 | fi 106 | fi 107 | 108 | # For Darwin, add options to specify how the application appears in the dock 109 | if $darwin; then 110 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 111 | fi 112 | 113 | # For Cygwin, switch paths to Windows format before running java 114 | if $cygwin ; then 115 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 116 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 117 | 118 | # We build the pattern for arguments to be converted via cygpath 119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 120 | SEP="" 121 | for dir in $ROOTDIRSRAW ; do 122 | ROOTDIRS="$ROOTDIRS$SEP$dir" 123 | SEP="|" 124 | done 125 | OURCYGPATTERN="(^($ROOTDIRS))" 126 | # Add a user-defined pattern to the cygpath arguments 127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 129 | fi 130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 131 | i=0 132 | for arg in "$@" ; do 133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 135 | 136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 138 | else 139 | eval `echo args$i`="\"$arg\"" 140 | fi 141 | i=$((i+1)) 142 | done 143 | case $i in 144 | (0) set -- ;; 145 | (1) set -- "$args0" ;; 146 | (2) set -- "$args0" "$args1" ;; 147 | (3) set -- "$args0" "$args1" "$args2" ;; 148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 154 | esac 155 | fi 156 | 157 | # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules 158 | function splitJvmOpts() { 159 | JVM_OPTS=("$@") 160 | } 161 | eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS 162 | JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME" 163 | 164 | exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@" 165 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 12 | set DEFAULT_JVM_OPTS= 13 | 14 | set DIRNAME=%~dp0 15 | if "%DIRNAME%" == "" set DIRNAME=. 16 | set APP_BASE_NAME=%~n0 17 | set APP_HOME=%DIRNAME% 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windowz variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | if "%@eval[2+2]" == "4" goto 4NT_args 53 | 54 | :win9xME_args 55 | @rem Slurp the command line arguments. 56 | set CMD_LINE_ARGS= 57 | set _SKIP=2 58 | 59 | :win9xME_args_slurp 60 | if "x%~1" == "x" goto execute 61 | 62 | set CMD_LINE_ARGS=%* 63 | goto execute 64 | 65 | :4NT_args 66 | @rem Get arguments from the 4NT Shell from JP Software 67 | set CMD_LINE_ARGS=%$ 68 | 69 | :execute 70 | @rem Setup the command line 71 | 72 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 73 | 74 | @rem Execute Gradle 75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 76 | 77 | :end 78 | @rem End local scope for the variables with windows NT shell 79 | if "%ERRORLEVEL%"=="0" goto mainEnd 80 | 81 | :fail 82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 83 | rem the _cmd.exe /c_ return code! 84 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 85 | exit /b 1 86 | 87 | :mainEnd 88 | if "%OS%"=="Windows_NT" endlocal 89 | 90 | :omega 91 | -------------------------------------------------------------------------------- /lib/embulk/guess/jsonl.rb: -------------------------------------------------------------------------------- 1 | require 'json' 2 | require "embulk/parser/jsonl.rb" 3 | 4 | module Embulk 5 | module Guess 6 | # $ embulk guess -g "jsonl" partial-config.yml 7 | 8 | class Jsonl < LineGuessPlugin # TODO should use GuessPlugin instead of LineGuessPlugin 9 | Plugin.register_guess("jsonl", self) 10 | 11 | def guess_lines(config, sample_lines) 12 | #return {} unless config.fetch("parser", {}).fetch("type", "jsonl") == "jsonl" 13 | 14 | rows = [] 15 | 16 | columns = {} 17 | sample_lines.each do |line| 18 | rows << JSON.parse(line) 19 | end 20 | 21 | min_rows_for_guess = config.fetch("parser", {}).fetch("min_rows_for_guess", 4) 22 | return {} if rows.size < min_rows_for_guess 23 | 24 | columns = Embulk::Guess::SchemaGuess.from_hash_records(rows).map do |c| 25 | column = {name: c.name, type: c.type} 26 | column[:format] = c.format if c.format 27 | column 28 | end 29 | parser_guessed = {"type" => "jsonl"} 30 | parser_guessed["columns"] = columns 31 | return {"parser" => parser_guessed} 32 | end 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /lib/embulk/parser/jsonl.rb: -------------------------------------------------------------------------------- 1 | Embulk::JavaPlugin.register_parser( 2 | "jsonl", "org.embulk.parser.jsonl.JsonlParserPlugin", 3 | File.expand_path('../../../../classpath', __FILE__)) -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'embulk-parser-jsonl' 2 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/jsonl/ColumnCaster.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.jsonl; 2 | 3 | import org.embulk.parser.jsonl.cast.BooleanCast; 4 | import org.embulk.parser.jsonl.cast.DoubleCast; 5 | import org.embulk.parser.jsonl.cast.JsonCast; 6 | import org.embulk.parser.jsonl.cast.LongCast; 7 | import org.embulk.parser.jsonl.cast.StringCast; 8 | import org.embulk.spi.DataException; 9 | import org.embulk.spi.time.Timestamp; 10 | import org.embulk.spi.time.TimestampParser; 11 | import org.msgpack.value.Value; 12 | 13 | class ColumnCaster 14 | { 15 | ColumnCaster() {} 16 | 17 | public static boolean asBoolean(Value value) throws DataException 18 | { 19 | if (value.isBooleanValue()) { 20 | return value.asBooleanValue().getBoolean(); 21 | } 22 | else if (value.isIntegerValue()) { 23 | return LongCast.asBoolean(value.asIntegerValue().asLong()); 24 | } 25 | else if (value.isFloatValue()) { 26 | return DoubleCast.asBoolean(value.asFloatValue().toDouble()); 27 | } 28 | else if (value.isStringValue()) { 29 | return StringCast.asBoolean(value.asStringValue().asString()); 30 | } 31 | else { 32 | return JsonCast.asBoolean(value); 33 | } 34 | } 35 | 36 | public static long asLong(Value value) throws DataException 37 | { 38 | if (value.isBooleanValue()) { 39 | return BooleanCast.asLong(value.asBooleanValue().getBoolean()); 40 | } 41 | else if (value.isIntegerValue()) { 42 | return value.asIntegerValue().asLong(); 43 | } 44 | else if (value.isFloatValue()) { 45 | return DoubleCast.asLong(value.asFloatValue().toDouble()); 46 | } 47 | else if (value.isStringValue()) { 48 | return StringCast.asLong(value.asStringValue().asString()); 49 | } 50 | else { 51 | return JsonCast.asLong(value); 52 | } 53 | } 54 | 55 | public static double asDouble(Value value) throws DataException 56 | { 57 | if (value.isBooleanValue()) { 58 | return BooleanCast.asDouble(value.asBooleanValue().getBoolean()); 59 | } 60 | else if (value.isIntegerValue()) { 61 | return LongCast.asDouble(value.asIntegerValue().asLong()); 62 | } 63 | else if (value.isFloatValue()) { 64 | return value.asFloatValue().toDouble(); 65 | } 66 | else if (value.isStringValue()) { 67 | return StringCast.asDouble(value.asStringValue().asString()); 68 | } 69 | else { 70 | return JsonCast.asDouble(value); 71 | } 72 | } 73 | 74 | public static String asString(Value value) throws DataException 75 | { 76 | return value.toString(); 77 | } 78 | 79 | public static Timestamp asTimestamp(Value value, TimestampParser parser) throws DataException 80 | { 81 | if (value.isBooleanValue()) { 82 | return BooleanCast.asTimestamp(value.asBooleanValue().getBoolean()); 83 | } 84 | else if (value.isIntegerValue()) { 85 | return LongCast.asTimestamp(value.asIntegerValue().asLong()); 86 | } 87 | else if (value.isFloatValue()) { 88 | return DoubleCast.asTimestamp(value.asFloatValue().toDouble()); 89 | } 90 | else if (value.isStringValue()) { 91 | return StringCast.asTimestamp(value.asStringValue().asString(), parser); 92 | } 93 | else { 94 | return JsonCast.asTimestamp(value); 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/jsonl/ColumnVisitorImpl.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.jsonl; 2 | 3 | import com.google.common.base.Optional; 4 | import org.embulk.parser.jsonl.JsonlParserPlugin.PluginTask; 5 | import org.embulk.parser.jsonl.JsonlParserPlugin.TypecastColumnOption; 6 | 7 | import org.embulk.spi.Column; 8 | import org.embulk.spi.ColumnConfig; 9 | import org.embulk.spi.ColumnVisitor; 10 | import org.embulk.spi.PageBuilder; 11 | import org.embulk.spi.Schema; 12 | import org.embulk.spi.SchemaConfig; 13 | import org.embulk.spi.time.Timestamp; 14 | import org.embulk.spi.time.TimestampParser; 15 | import org.msgpack.core.MessageTypeException; 16 | import org.msgpack.value.Value; 17 | 18 | public class ColumnVisitorImpl implements ColumnVisitor { 19 | protected final PluginTask task; 20 | protected final Schema schema; 21 | protected final PageBuilder pageBuilder; 22 | protected final TimestampParser[] timestampParsers; 23 | protected final Boolean autoTypecasts[]; 24 | 25 | protected Value value; 26 | 27 | public ColumnVisitorImpl(PluginTask task, Schema schema, PageBuilder pageBuilder, TimestampParser[] timestampParsers) 28 | { 29 | this.task = task; 30 | this.schema = schema; 31 | this.pageBuilder = pageBuilder; 32 | this.timestampParsers = timestampParsers; 33 | this.autoTypecasts = new Boolean[schema.size()]; 34 | buildAutoTypecasts(); 35 | } 36 | 37 | private void buildAutoTypecasts() 38 | { 39 | for (Column column : schema.getColumns()) { 40 | this.autoTypecasts[column.getIndex()] = task.getDefaultTypecast(); 41 | } 42 | 43 | Optional schemaConfig = task.getSchemaConfig(); 44 | if (schemaConfig.isPresent()) { 45 | for (ColumnConfig columnConfig : schemaConfig.get().getColumns()) { 46 | TypecastColumnOption columnOption = columnConfig.getOption().loadConfig(TypecastColumnOption.class); 47 | Boolean autoTypecast = columnOption.getTypecast().or(task.getDefaultTypecast()); 48 | Column column = schema.lookupColumn(columnConfig.getName()); 49 | this.autoTypecasts[column.getIndex()] = autoTypecast; 50 | } 51 | } 52 | } 53 | 54 | public void setValue(Value value) 55 | { 56 | this.value = value; 57 | } 58 | 59 | @Override 60 | public void booleanColumn(Column column) 61 | { 62 | if (isNil(value)) { 63 | pageBuilder.setNull(column); 64 | } 65 | else { 66 | try { 67 | boolean booleanValue = autoTypecasts[column.getIndex()] ? ColumnCaster.asBoolean(value) : value.asBooleanValue().getBoolean(); 68 | pageBuilder.setBoolean(column, booleanValue); 69 | } 70 | catch (MessageTypeException e) { 71 | throw new JsonRecordValidateException(String.format("failed to get \"%s\" as Boolean", value), e); 72 | } 73 | } 74 | } 75 | 76 | @Override 77 | public void longColumn(Column column) 78 | { 79 | if (isNil(value)) { 80 | pageBuilder.setNull(column); 81 | } 82 | else { 83 | try { 84 | long longValue = autoTypecasts[column.getIndex()] ? ColumnCaster.asLong(value) : value.asIntegerValue().toLong(); 85 | pageBuilder.setLong(column, longValue); 86 | } 87 | catch (MessageTypeException e) { 88 | throw new JsonRecordValidateException(String.format("failed to get \"%s\" as Long", value), e); 89 | } 90 | } 91 | } 92 | 93 | @Override 94 | public void doubleColumn(Column column) 95 | { 96 | if (isNil(value)) { 97 | pageBuilder.setNull(column); 98 | } 99 | else { 100 | try { 101 | double doubleValue = autoTypecasts[column.getIndex()] ? ColumnCaster.asDouble(value) : value.asFloatValue().toDouble(); 102 | pageBuilder.setDouble(column, doubleValue); 103 | } 104 | catch (MessageTypeException e) { 105 | throw new JsonRecordValidateException(String.format("failed get \"%s\" as Double", value), e); 106 | } 107 | } 108 | } 109 | 110 | @Override 111 | public void stringColumn(Column column) 112 | { 113 | if (isNil(value)) { 114 | pageBuilder.setNull(column); 115 | } 116 | else { 117 | try { 118 | String string = autoTypecasts[column.getIndex()] ? ColumnCaster.asString(value) : value.asStringValue().toString(); 119 | pageBuilder.setString(column, string); 120 | } 121 | catch (MessageTypeException e) { 122 | throw new JsonRecordValidateException(String.format("failed to get \"%s\" as String", value), e); 123 | } 124 | } 125 | } 126 | 127 | @Override 128 | public void timestampColumn(Column column) 129 | { 130 | if (isNil(value)) { 131 | pageBuilder.setNull(column); 132 | } 133 | else { 134 | try { 135 | Timestamp timestamp = ColumnCaster.asTimestamp(value, timestampParsers[column.getIndex()]); 136 | pageBuilder.setTimestamp(column, timestamp); 137 | } 138 | catch (MessageTypeException e) { 139 | throw new JsonRecordValidateException(String.format("failed to get \"%s\" as Timestamp", value), e); 140 | } 141 | } 142 | } 143 | 144 | @Override 145 | public void jsonColumn(Column column) 146 | { 147 | if (isNil(value)) { 148 | pageBuilder.setNull(column); 149 | } 150 | else { 151 | try { 152 | pageBuilder.setJson(column, value); 153 | } 154 | catch (MessageTypeException e) { 155 | throw new JsonRecordValidateException(String.format("failed to get \"%s\" as Json", value), e); 156 | } 157 | } 158 | } 159 | 160 | protected boolean isNil(Value v) 161 | { 162 | return v == null || v.isNilValue(); 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/jsonl/JsonRecordValidateException.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.jsonl; 2 | 3 | import org.embulk.spi.DataException; 4 | 5 | public class JsonRecordValidateException 6 | extends DataException 7 | { 8 | public JsonRecordValidateException(String message) 9 | { 10 | super(message); 11 | } 12 | 13 | public JsonRecordValidateException(String message, Throwable cause) 14 | { 15 | super(message, cause); 16 | } 17 | 18 | public JsonRecordValidateException(Throwable cause) 19 | { 20 | super(cause); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/jsonl/JsonlParserPlugin.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.jsonl; 2 | 3 | import com.google.common.base.Optional; 4 | import com.google.common.base.Supplier; 5 | import com.google.common.collect.ImmutableList; 6 | import com.google.common.collect.ImmutableMap; 7 | import org.embulk.config.Config; 8 | import org.embulk.config.ConfigDefault; 9 | import org.embulk.config.ConfigException; 10 | import org.embulk.config.ConfigSource; 11 | import org.embulk.config.Task; 12 | import org.embulk.config.TaskSource; 13 | import org.embulk.spi.Column; 14 | import org.embulk.spi.ColumnConfig; 15 | import org.embulk.spi.DataException; 16 | import org.embulk.spi.Exec; 17 | import org.embulk.spi.FileInput; 18 | import org.embulk.spi.PageBuilder; 19 | import org.embulk.spi.PageOutput; 20 | import org.embulk.spi.ParserPlugin; 21 | import org.embulk.spi.Schema; 22 | import org.embulk.spi.SchemaConfig; 23 | import org.embulk.spi.json.JsonParseException; 24 | import org.embulk.spi.json.JsonParser; 25 | import org.embulk.spi.time.TimestampParser; 26 | import org.embulk.spi.type.Type; 27 | import org.embulk.spi.util.LineDecoder; 28 | import org.embulk.spi.util.Timestamps; 29 | import org.msgpack.value.Value; 30 | import org.slf4j.Logger; 31 | 32 | import java.util.Map; 33 | 34 | import static org.msgpack.value.ValueFactory.newString; 35 | 36 | public class JsonlParserPlugin 37 | implements ParserPlugin 38 | { 39 | @Deprecated 40 | public interface JsonlColumnOption 41 | extends Task 42 | { 43 | @Config("type") 44 | @ConfigDefault("null") 45 | Optional getType(); 46 | } 47 | 48 | public interface TypecastColumnOption 49 | extends Task 50 | { 51 | @Config("typecast") 52 | @ConfigDefault("null") 53 | public Optional getTypecast(); 54 | } 55 | 56 | public interface PluginTask 57 | extends Task, LineDecoder.DecoderTask, TimestampParser.Task 58 | { 59 | @Config("columns") 60 | @ConfigDefault("null") 61 | Optional getSchemaConfig(); 62 | 63 | @Config("schema") 64 | @ConfigDefault("null") 65 | @Deprecated 66 | Optional getOldSchemaConfig(); 67 | 68 | @Config("stop_on_invalid_record") 69 | @ConfigDefault("false") 70 | boolean getStopOnInvalidRecord(); 71 | 72 | @Config("default_typecast") 73 | @ConfigDefault("true") 74 | Boolean getDefaultTypecast(); 75 | 76 | @Config("column_options") 77 | @ConfigDefault("{}") 78 | @Deprecated 79 | Map getColumnOptions(); 80 | } 81 | 82 | private final Logger log; 83 | 84 | private String line = null; 85 | private long lineNumber = 0; 86 | private Map columnNameValues; 87 | 88 | public JsonlParserPlugin() 89 | { 90 | this.log = Exec.getLogger(JsonlParserPlugin.class); 91 | } 92 | 93 | @Override 94 | public void transaction(ConfigSource configSource, Control control) 95 | { 96 | PluginTask task = configSource.loadConfig(PluginTask.class); 97 | 98 | if (! task.getColumnOptions().isEmpty()) { 99 | log.warn("embulk-parser-jsonl: \"column_options\" option is deprecated, specify type directly to \"columns\" option with typecast: true (default: true)."); 100 | } 101 | 102 | SchemaConfig schemaConfig = getSchemaConfig(task); 103 | ImmutableList.Builder columns = ImmutableList.builder(); 104 | for (int i = 0; i < schemaConfig.getColumnCount(); i++) { 105 | ColumnConfig columnConfig = schemaConfig.getColumn(i); 106 | Type type = getType(task, columnConfig); 107 | columns.add(new Column(i, columnConfig.getName(), type)); 108 | } 109 | control.run(task.dump(), new Schema(columns.build())); 110 | } 111 | 112 | private static Type getType(PluginTask task, ColumnConfig columnConfig) 113 | { 114 | JsonlColumnOption columnOption = columnOptionOf(task.getColumnOptions(), columnConfig.getName()); 115 | return columnOption.getType().or(columnConfig.getType()); 116 | } 117 | 118 | // this method is to keep the backward compatibility of 'schema' option. 119 | private SchemaConfig getSchemaConfig(PluginTask task) 120 | { 121 | if (task.getOldSchemaConfig().isPresent()) { 122 | log.warn("Please use 'columns' option instead of 'schema' because the 'schema' option is deprecated. The next version will stop 'schema' option support."); 123 | } 124 | 125 | if (task.getSchemaConfig().isPresent()) { 126 | return task.getSchemaConfig().get(); 127 | } 128 | else if (task.getOldSchemaConfig().isPresent()) { 129 | return task.getOldSchemaConfig().get(); 130 | } 131 | else { 132 | throw new ConfigException("Attribute 'columns' is required but not set"); 133 | } 134 | } 135 | 136 | @Override 137 | public void run(TaskSource taskSource, Schema schema, FileInput input, PageOutput output) 138 | { 139 | PluginTask task = taskSource.loadTask(PluginTask.class); 140 | 141 | setColumnNameValues(schema); 142 | 143 | final SchemaConfig schemaConfig = getSchemaConfig(task); 144 | final TimestampParser[] timestampParsers = Timestamps.newTimestampColumnParsers(task, schemaConfig); 145 | final LineDecoder decoder = newLineDecoder(input, task); 146 | final JsonParser jsonParser = newJsonParser(); 147 | final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord(); 148 | 149 | try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) { 150 | ColumnVisitorImpl visitor = new ColumnVisitorImpl(task, schema, pageBuilder, timestampParsers); 151 | 152 | while (decoder.nextFile()) { // TODO this implementation should be improved with new JsonParser API on Embulk v0.8.3 153 | lineNumber = 0; 154 | 155 | while ((line = decoder.poll()) != null) { 156 | lineNumber++; 157 | 158 | try { 159 | Value value = jsonParser.parse(line); 160 | 161 | if (!value.isMapValue()) { 162 | throw new JsonRecordValidateException("Json string is not representing map value."); 163 | } 164 | 165 | final Map record = value.asMapValue().map(); 166 | for (Column column : schema.getColumns()) { 167 | Value v = record.get(getColumnNameValue(column)); 168 | visitor.setValue(v); 169 | column.visit(visitor); 170 | } 171 | 172 | pageBuilder.addRecord(); 173 | } 174 | catch (JsonRecordValidateException | JsonParseException e) { 175 | if (stopOnInvalidRecord) { 176 | throw new DataException(String.format("Invalid record at line %d: %s", lineNumber, line), e); 177 | } 178 | log.warn(String.format("Skipped line %d (%s): %s", lineNumber, e.getMessage(), line)); 179 | } 180 | } 181 | } 182 | 183 | pageBuilder.finish(); 184 | } 185 | } 186 | 187 | private void setColumnNameValues(Schema schema) 188 | { 189 | ImmutableMap.Builder builder = ImmutableMap.builder(); 190 | for (Column column : schema.getColumns()) { 191 | String name = column.getName(); 192 | builder.put(name, newString(name)); 193 | } 194 | columnNameValues = builder.build(); 195 | } 196 | 197 | private Value getColumnNameValue(Column column) 198 | { 199 | return columnNameValues.get(column.getName()); 200 | } 201 | 202 | public LineDecoder newLineDecoder(FileInput input, PluginTask task) 203 | { 204 | return new LineDecoder(input, task); 205 | } 206 | 207 | public JsonParser newJsonParser() 208 | { 209 | return new JsonParser(); 210 | } 211 | 212 | private static JsonlColumnOption columnOptionOf(Map columnOptions, String columnName) 213 | { 214 | return Optional.fromNullable(columnOptions.get(columnName)).or( 215 | // default column option 216 | new Supplier() 217 | { 218 | public JsonlColumnOption get() 219 | { 220 | return Exec.newConfigSource().loadConfig(JsonlColumnOption.class); 221 | } 222 | }); 223 | } 224 | 225 | } 226 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/jsonl/cast/BooleanCast.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.jsonl.cast; 2 | 3 | import org.embulk.spi.DataException; 4 | import org.embulk.spi.time.Timestamp; 5 | 6 | public class BooleanCast 7 | { 8 | private BooleanCast() {} 9 | 10 | private static String buildErrorMessage(String as, boolean value) 11 | { 12 | return String.format("cannot cast boolean to %s: \"%s\"", as, value); 13 | } 14 | 15 | public static boolean asBoolean(boolean value) throws DataException 16 | { 17 | return value; 18 | } 19 | 20 | public static long asLong(boolean value) throws DataException 21 | { 22 | return value ? 1 : 0; 23 | } 24 | 25 | public static double asDouble(boolean value) throws DataException 26 | { 27 | throw new DataException(buildErrorMessage("double", value)); 28 | } 29 | 30 | public static String asString(boolean value) throws DataException 31 | { 32 | return value ? "true" : "false"; 33 | } 34 | 35 | public static Timestamp asTimestamp(boolean value) throws DataException 36 | { 37 | throw new DataException(buildErrorMessage("timestamp", value)); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/jsonl/cast/DoubleCast.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.jsonl.cast; 2 | 3 | import org.embulk.spi.DataException; 4 | import org.embulk.spi.time.Timestamp; 5 | 6 | public class DoubleCast 7 | { 8 | private DoubleCast() {} 9 | 10 | private static String buildErrorMessage(String as, double value) 11 | { 12 | return String.format("cannot cast double to %s: \"%s\"", as, value); 13 | } 14 | 15 | public static boolean asBoolean(double value) throws DataException 16 | { 17 | throw new DataException(buildErrorMessage("boolean", value)); 18 | } 19 | 20 | public static long asLong(double value) throws DataException 21 | { 22 | return (long) value; 23 | } 24 | 25 | public static double asDouble(double value) throws DataException 26 | { 27 | return value; 28 | } 29 | 30 | public static String asString(double value) throws DataException 31 | { 32 | return String.valueOf(value); 33 | } 34 | 35 | public static Timestamp asTimestamp(double value) throws DataException 36 | { 37 | long epochSecond = (long) value; 38 | long nanoAdjustMent = (long) ((value - epochSecond) * 1000000000); 39 | return Timestamp.ofEpochSecond(epochSecond, nanoAdjustMent); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/jsonl/cast/JsonCast.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.jsonl.cast; 2 | 3 | import org.embulk.spi.DataException; 4 | import org.embulk.spi.time.Timestamp; 5 | import org.msgpack.value.Value; 6 | 7 | public class JsonCast 8 | { 9 | private JsonCast() {} 10 | 11 | private static String buildErrorMessage(String as, Value value) 12 | { 13 | return String.format("cannot cast Json to %s: \"%s\"", as, value); 14 | } 15 | 16 | public static boolean asBoolean(Value value) throws DataException 17 | { 18 | throw new DataException(buildErrorMessage("boolean", value)); 19 | } 20 | 21 | public static long asLong(Value value) throws DataException 22 | { 23 | throw new DataException(buildErrorMessage("long", value)); 24 | } 25 | 26 | public static double asDouble(Value value) throws DataException 27 | { 28 | throw new DataException(buildErrorMessage("double", value)); 29 | } 30 | 31 | public static String asString(Value value) throws DataException 32 | { 33 | return value.toString(); 34 | } 35 | 36 | public static Timestamp asTimestamp(Value value) throws DataException 37 | { 38 | throw new DataException(buildErrorMessage("timestamp", value)); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/jsonl/cast/LongCast.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.jsonl.cast; 2 | 3 | import org.embulk.spi.DataException; 4 | import org.embulk.spi.time.Timestamp; 5 | 6 | public class LongCast 7 | { 8 | private LongCast() {} 9 | 10 | private static String buildErrorMessage(String as, long value) 11 | { 12 | return String.format("cannot cast long to %s: \"%s\"", as, value); 13 | } 14 | 15 | public static boolean asBoolean(long value) throws DataException 16 | { 17 | if (value == 1) { 18 | return true; 19 | } 20 | else if (value == 0) { 21 | return false; 22 | } 23 | else { 24 | throw new DataException(buildErrorMessage("boolean", value)); 25 | } 26 | } 27 | 28 | public static long asLong(long value) throws DataException 29 | { 30 | return value; 31 | } 32 | 33 | public static double asDouble(long value) throws DataException 34 | { 35 | return (double) value; 36 | } 37 | 38 | public static String asString(long value) throws DataException 39 | { 40 | return String.valueOf(value); 41 | } 42 | 43 | public static Timestamp asTimestamp(long value) throws DataException 44 | { 45 | return Timestamp.ofEpochSecond(value); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/jsonl/cast/StringCast.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.jsonl.cast; 2 | 3 | import com.google.common.collect.ImmutableSet; 4 | import org.embulk.spi.DataException; 5 | import org.embulk.spi.time.Timestamp; 6 | import org.embulk.spi.time.TimestampParseException; 7 | import org.embulk.spi.time.TimestampParser; 8 | 9 | public class StringCast 10 | { 11 | // copy from csv plugin 12 | public static final ImmutableSet TRUE_STRINGS = 13 | ImmutableSet.of( 14 | "true", "True", "TRUE", 15 | "yes", "Yes", "YES", 16 | "t", "T", "y", "Y", 17 | "on", "On", "ON", 18 | "1"); 19 | 20 | public static final ImmutableSet FALSE_STRINGS = 21 | ImmutableSet.of( 22 | "false", "False", "FALSE", 23 | "no", "No", "NO", 24 | "f", "F", "n", "N", 25 | "off", "Off", "OFF", 26 | "0"); 27 | 28 | private StringCast() {} 29 | 30 | private static String buildErrorMessage(String as, String value) 31 | { 32 | return String.format("cannot cast String to %s: \"%s\"", as, value); 33 | } 34 | 35 | public static boolean asBoolean(String value) throws DataException 36 | { 37 | if (TRUE_STRINGS.contains(value)) { 38 | return true; 39 | } 40 | else if (FALSE_STRINGS.contains(value)) { 41 | return false; 42 | } 43 | else { 44 | throw new DataException(buildErrorMessage("boolean", value)); 45 | } 46 | } 47 | 48 | public static long asLong(String value) throws DataException 49 | { 50 | try { 51 | return Long.parseLong(value); 52 | } 53 | catch (NumberFormatException ex) { 54 | throw new DataException(buildErrorMessage("long", value), ex); 55 | } 56 | } 57 | 58 | public static double asDouble(String value) throws DataException 59 | { 60 | try { 61 | return Double.parseDouble(value); 62 | } 63 | catch (NumberFormatException ex) { 64 | throw new DataException(buildErrorMessage("double", value), ex); 65 | } 66 | } 67 | 68 | public static String asString(String value) throws DataException 69 | { 70 | return value; 71 | } 72 | 73 | public static Timestamp asTimestamp(String value, TimestampParser parser) throws DataException 74 | { 75 | try { 76 | return parser.parse(value); 77 | } 78 | catch (TimestampParseException ex) { 79 | throw new DataException(buildErrorMessage("timestamp", value), ex); 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/jsonl/TestColumnCaster.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.jsonl; 2 | 3 | import org.embulk.EmbulkTestRuntime; 4 | import org.embulk.spi.DataException; 5 | import org.embulk.spi.time.Timestamp; 6 | import org.embulk.spi.time.TimestampParser; 7 | import org.joda.time.DateTimeZone; 8 | import org.jruby.embed.ScriptingContainer; 9 | import org.junit.Before; 10 | import org.junit.Rule; 11 | import org.junit.Test; 12 | import org.msgpack.value.MapValue; 13 | import org.msgpack.value.Value; 14 | import org.msgpack.value.ValueFactory; 15 | 16 | import static org.junit.Assert.assertEquals; 17 | import static org.junit.Assert.assertTrue; 18 | import static org.junit.Assert.fail; 19 | 20 | public class TestColumnCaster 21 | { 22 | @Rule 23 | public EmbulkTestRuntime runtime = new EmbulkTestRuntime(); 24 | public MapValue mapValue; 25 | public DataException thrown; 26 | public ScriptingContainer jruby; 27 | public TimestampParser parser; 28 | 29 | @Before 30 | public void createResource() 31 | { 32 | jruby = new ScriptingContainer(); 33 | thrown = new DataException("any"); 34 | Value[] kvs = new Value[2]; 35 | kvs[0] = ValueFactory.newString("k"); 36 | kvs[1] = ValueFactory.newString("v"); 37 | mapValue = ValueFactory.newMap(kvs); 38 | parser = new TimestampParser(jruby, "%Y-%m-%d %H:%M:%S.%N", DateTimeZone.UTC); 39 | } 40 | 41 | @Test 42 | public void asBooleanFromBoolean() 43 | { 44 | assertEquals(true, ColumnCaster.asBoolean(ValueFactory.newBoolean(true))); 45 | } 46 | 47 | @Test 48 | public void asBooleanFromInteger() 49 | { 50 | assertEquals(true, ColumnCaster.asBoolean(ValueFactory.newInteger(1))); 51 | try { 52 | ColumnCaster.asBoolean(ValueFactory.newInteger(2)); 53 | fail(); 54 | } 55 | catch (Throwable t) { 56 | assertTrue(t instanceof DataException); 57 | } 58 | } 59 | 60 | @Test 61 | public void asBooleanFromFloat() 62 | { 63 | try { 64 | ColumnCaster.asBoolean(ValueFactory.newFloat(1.1)); 65 | fail(); 66 | } 67 | catch (Throwable t) { 68 | assertTrue(t instanceof DataException); 69 | } 70 | } 71 | 72 | @Test 73 | public void asBooleanFromString() 74 | { 75 | assertEquals(true, ColumnCaster.asBoolean(ValueFactory.newString("true"))); 76 | try { 77 | ColumnCaster.asBoolean(ValueFactory.newString("foo")); 78 | fail(); 79 | } 80 | catch (Throwable t) { 81 | assertTrue(t instanceof DataException); 82 | } 83 | } 84 | 85 | @Test 86 | public void asBooleanFromJson() 87 | { 88 | try { 89 | ColumnCaster.asBoolean(mapValue); 90 | fail(); 91 | } 92 | catch (Throwable t) { 93 | assertTrue(t instanceof DataException); 94 | } 95 | } 96 | 97 | @Test 98 | public void asLongFromBoolean() 99 | { 100 | assertEquals(1, ColumnCaster.asLong(ValueFactory.newBoolean(true))); 101 | } 102 | 103 | @Test 104 | public void asLongFromInteger() 105 | { 106 | assertEquals(1, ColumnCaster.asLong(ValueFactory.newInteger(1))); 107 | } 108 | 109 | @Test 110 | public void asLongFromFloat() 111 | { 112 | assertEquals(1, ColumnCaster.asLong(ValueFactory.newFloat(1.5))); 113 | } 114 | 115 | @Test 116 | public void asLongFromString() 117 | { 118 | assertEquals(1, ColumnCaster.asLong(ValueFactory.newString("1"))); 119 | try { 120 | ColumnCaster.asLong(ValueFactory.newString("foo")); 121 | fail(); 122 | } 123 | catch (Throwable t) { 124 | assertTrue(t instanceof DataException); 125 | } 126 | } 127 | 128 | @Test 129 | public void asLongFromJson() 130 | { 131 | try { 132 | ColumnCaster.asLong(mapValue); 133 | fail(); 134 | } 135 | catch (Throwable t) { 136 | assertTrue(t instanceof DataException); 137 | } 138 | } 139 | 140 | @Test 141 | public void asDoubleFromBoolean() 142 | { 143 | assertEquals(1, ColumnCaster.asLong(ValueFactory.newBoolean(true))); 144 | } 145 | 146 | @Test 147 | public void asDoubleFromInteger() 148 | { 149 | assertEquals(1, ColumnCaster.asLong(ValueFactory.newInteger(1))); 150 | } 151 | 152 | @Test 153 | public void asDoubleFromFloat() 154 | { 155 | assertEquals(1, ColumnCaster.asLong(ValueFactory.newFloat(1.5))); 156 | } 157 | 158 | @Test 159 | public void asDoubleFromString() 160 | { 161 | assertEquals(1, ColumnCaster.asLong(ValueFactory.newString("1"))); 162 | try { 163 | ColumnCaster.asLong(ValueFactory.newString("foo")); 164 | fail(); 165 | } 166 | catch (Throwable t) { 167 | assertTrue(t instanceof DataException); 168 | } 169 | } 170 | 171 | @Test 172 | public void asDoubleFromJson() 173 | { 174 | try { 175 | ColumnCaster.asLong(mapValue); 176 | fail(); 177 | } 178 | catch (Throwable t) { 179 | assertTrue(t instanceof DataException); 180 | } 181 | } 182 | 183 | @Test 184 | public void asStringFromBoolean() 185 | { 186 | assertEquals("true", ColumnCaster.asString(ValueFactory.newBoolean(true))); 187 | } 188 | 189 | @Test 190 | public void asStringFromInteger() 191 | { 192 | assertEquals("1", ColumnCaster.asString(ValueFactory.newInteger(1))); 193 | } 194 | 195 | @Test 196 | public void asStringFromFloat() 197 | { 198 | assertEquals("1.5", ColumnCaster.asString(ValueFactory.newFloat(1.5))); 199 | } 200 | 201 | @Test 202 | public void asStringFromString() 203 | { 204 | assertEquals("1", ColumnCaster.asString(ValueFactory.newString("1"))); 205 | } 206 | 207 | @Test 208 | public void asStringFromJson() 209 | { 210 | assertEquals("{\"k\":\"v\"}", ColumnCaster.asString(mapValue)); 211 | } 212 | 213 | @Test 214 | public void asTimestampFromBoolean() 215 | { 216 | try { 217 | ColumnCaster.asTimestamp(ValueFactory.newBoolean(true), parser); 218 | fail(); 219 | } 220 | catch (Throwable t) { 221 | assertTrue(t instanceof DataException); 222 | } 223 | } 224 | 225 | @Test 226 | public void asTimestampFromInteger() 227 | { 228 | assertEquals(1, ColumnCaster.asTimestamp(ValueFactory.newInteger(1), parser).getEpochSecond()); 229 | } 230 | 231 | @Test 232 | public void asTimestampFromFloat() 233 | { 234 | Timestamp expected = Timestamp.ofEpochSecond(1463084053, 500000000); 235 | assertEquals(expected, ColumnCaster.asTimestamp(ValueFactory.newFloat(1463084053.5), parser)); 236 | } 237 | 238 | @Test 239 | public void asTimestampFromString() 240 | { 241 | Timestamp expected = Timestamp.ofEpochSecond(1463084053, 500000000); 242 | assertEquals(expected, ColumnCaster.asTimestamp(ValueFactory.newString("2016-05-12 20:14:13.5"), parser)); 243 | } 244 | 245 | @Test 246 | public void asTimestampFromJson() 247 | { 248 | try { 249 | ColumnCaster.asTimestamp(mapValue, parser); 250 | fail(); 251 | } 252 | catch (Throwable t) { 253 | assertTrue(t instanceof DataException); 254 | } 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/jsonl/TestJsonlParserPlugin.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.jsonl; 2 | 3 | import com.google.common.collect.ImmutableList; 4 | import com.google.common.collect.Lists; 5 | import org.embulk.EmbulkTestRuntime; 6 | import org.embulk.config.ConfigLoader; 7 | import org.embulk.config.ConfigSource; 8 | import org.embulk.config.TaskSource; 9 | import org.embulk.spi.ColumnConfig; 10 | import org.embulk.spi.DataException; 11 | import org.embulk.spi.Exec; 12 | import org.embulk.spi.FileInput; 13 | import org.embulk.spi.ParserPlugin; 14 | import org.embulk.spi.Schema; 15 | import org.embulk.spi.SchemaConfig; 16 | import org.embulk.spi.TestPageBuilderReader.MockPageOutput; 17 | import org.embulk.spi.time.Timestamp; 18 | import org.embulk.spi.type.Type; 19 | import org.embulk.spi.util.InputStreamFileInput; 20 | import org.embulk.spi.util.Pages; 21 | import org.junit.Before; 22 | import org.junit.Rule; 23 | import org.junit.Test; 24 | 25 | import java.io.ByteArrayInputStream; 26 | import java.io.File; 27 | import java.io.IOException; 28 | import java.io.InputStream; 29 | import java.util.List; 30 | 31 | import static org.embulk.spi.type.Types.BOOLEAN; 32 | import static org.embulk.spi.type.Types.DOUBLE; 33 | import static org.embulk.spi.type.Types.JSON; 34 | import static org.embulk.spi.type.Types.LONG; 35 | import static org.embulk.spi.type.Types.STRING; 36 | import static org.embulk.spi.type.Types.TIMESTAMP; 37 | import static org.junit.Assert.assertEquals; 38 | import static org.junit.Assert.assertNull; 39 | import static org.junit.Assert.assertTrue; 40 | import static org.junit.Assert.fail; 41 | import static org.msgpack.value.ValueFactory.newArray; 42 | import static org.msgpack.value.ValueFactory.newMap; 43 | import static org.msgpack.value.ValueFactory.newString; 44 | 45 | public class TestJsonlParserPlugin 46 | { 47 | @Rule 48 | public EmbulkTestRuntime runtime = new EmbulkTestRuntime(); 49 | 50 | private ConfigSource config; 51 | private JsonlParserPlugin plugin; 52 | private MockPageOutput output; 53 | 54 | @Before 55 | public void createResource() 56 | { 57 | config = config().set("type", "jsonl"); 58 | plugin = new JsonlParserPlugin(); 59 | recreatePageOutput(); 60 | } 61 | 62 | private void recreatePageOutput() 63 | { 64 | output = new MockPageOutput(); 65 | } 66 | 67 | @Test 68 | public void skipRecords() 69 | throws Exception 70 | { 71 | SchemaConfig schema = schema( 72 | column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE), 73 | column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON)); 74 | ConfigSource config = this.config.deepCopy().set("columns", schema); 75 | 76 | transaction(config, fileInput( 77 | "[]", 78 | "\"embulk\"", 79 | "10", 80 | "true", 81 | "false", 82 | "null", 83 | " " 84 | )); 85 | 86 | List records = Pages.toObjects(schema.toSchema(), output.pages); 87 | assertEquals(0, records.size()); 88 | } 89 | 90 | @Test 91 | public void throwDataException() 92 | throws Exception 93 | { 94 | SchemaConfig schema = schema( 95 | column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE), 96 | column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON)); 97 | ConfigSource config = this.config.deepCopy().set("columns", schema).set("stop_on_invalid_record", true); 98 | 99 | try { 100 | transaction(config, fileInput( 101 | "\"not_map_value\"" 102 | )); 103 | fail(); 104 | } 105 | catch (Throwable t) { 106 | assertTrue(t instanceof DataException); 107 | } 108 | } 109 | 110 | @Test 111 | public void writeNils() 112 | throws Exception 113 | { 114 | SchemaConfig schema = schema( 115 | column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE), 116 | column("_c3", STRING), column("_c4", TIMESTAMP), column("_c5", JSON)); 117 | ConfigSource config = this.config.deepCopy().set("columns", schema); 118 | 119 | transaction(config, fileInput( 120 | "{}", 121 | "{\"_c0\":null,\"_c1\":null,\"_c2\":null}", 122 | "{\"_c3\":null,\"_c4\":null,\"_c5\":null}", 123 | "{}" 124 | )); 125 | 126 | List records = Pages.toObjects(schema.toSchema(), output.pages); 127 | assertEquals(4, records.size()); 128 | 129 | for (Object[] record : records) { 130 | for (int i = 0; i < 6; i++) { 131 | assertNull(record[i]); 132 | } 133 | } 134 | } 135 | 136 | @Test 137 | public void useNormal() 138 | throws Exception 139 | { 140 | SchemaConfig schema = schema( 141 | column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE), 142 | column("_c3", STRING), column("_c4", TIMESTAMP, config().set("format", "%Y-%m-%d %H:%M:%S %Z")), column("_c5", JSON)); 143 | List configs = Lists.newArrayList( 144 | this.config.deepCopy().set("columns", schema), 145 | this.config.deepCopy().set("schema", schema) 146 | ); 147 | 148 | for (ConfigSource config : configs) { 149 | transaction(config, fileInput( 150 | "{\"_c0\":true,\"_c1\":10,\"_c2\":0.1,\"_c3\":\"embulk\",\"_c4\":\"2016-01-01 00:00:00 UTC\",\"_c5\":{\"k\":\"v\"}}", 151 | "[1, 2, 3]", 152 | "{\"_c0\":false,\"_c1\":-10,\"_c2\":1.0,\"_c3\":\"エンバルク\",\"_c4\":\"2016-01-01 00:00:00 +0000\",\"_c5\":[\"e0\",\"e1\"]}" 153 | )); 154 | 155 | List records = Pages.toObjects(schema.toSchema(), output.pages); 156 | assertEquals(2, records.size()); 157 | 158 | Object[] record; 159 | { 160 | record = records.get(0); 161 | assertEquals(true, record[0]); 162 | assertEquals(10L, record[1]); 163 | assertEquals(0.1, (Double) record[2], 0.0001); 164 | assertEquals("embulk", record[3]); 165 | assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]); 166 | assertEquals(newMap(newString("k"), newString("v")), record[5]); 167 | } 168 | { 169 | record = records.get(1); 170 | assertEquals(false, record[0]); 171 | assertEquals(-10L, record[1]); 172 | assertEquals(1.0, (Double) record[2], 0.0001); 173 | assertEquals("エンバルク", record[3]); 174 | assertEquals(Timestamp.ofEpochSecond(1451606400L), record[4]); 175 | assertEquals(newArray(newString("e0"), newString("e1")), record[5]); 176 | } 177 | 178 | recreatePageOutput(); 179 | } 180 | } 181 | 182 | @Test 183 | public void useColumnOptions() 184 | throws Exception 185 | { 186 | 187 | SchemaConfig schema = schema( 188 | column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", DOUBLE)); 189 | File yamlFile = getResourceFile("use_column_options.yml"); 190 | ConfigSource config = getConfigFromYamlFile(yamlFile); 191 | 192 | transaction(config, fileInput( 193 | "{\"_c0\":\"true\",\"_c1\":\"10\",\"_c2\":\"0.1\"}", 194 | "{\"_c0\":\"false\",\"_c1\":\"-10\",\"_c2\":\"1.0\"}" 195 | )); 196 | 197 | List records = Pages.toObjects(schema.toSchema(), output.pages); 198 | assertEquals(2, records.size()); 199 | 200 | Object[] record; 201 | { 202 | record = records.get(0); 203 | assertEquals(true, record[0]); 204 | assertEquals(10L, record[1]); 205 | assertEquals(0.1, (Double) record[2], 0.0001); 206 | } 207 | { 208 | record = records.get(1); 209 | assertEquals(false, record[0]); 210 | assertEquals(-10L, record[1]); 211 | assertEquals(1.0, (Double) record[2], 0.0001); 212 | } 213 | } 214 | 215 | private ConfigSource config() 216 | { 217 | return runtime.getExec().newConfigSource(); 218 | } 219 | 220 | private File getResourceFile(String resourceName) 221 | throws IOException 222 | { 223 | return new File(this.getClass().getResource(resourceName).getFile()); 224 | } 225 | 226 | private ConfigSource getConfigFromYamlFile(File yamlFile) 227 | throws IOException 228 | { 229 | ConfigLoader loader = new ConfigLoader(Exec.getModelManager()); 230 | return loader.fromYamlFile(yamlFile); 231 | } 232 | 233 | private void transaction(ConfigSource config, final FileInput input) 234 | { 235 | plugin.transaction(config, new ParserPlugin.Control() 236 | { 237 | @Override 238 | public void run(TaskSource taskSource, Schema schema) 239 | { 240 | plugin.run(taskSource, schema, input, output); 241 | } 242 | }); 243 | } 244 | 245 | private FileInput fileInput(String... lines) 246 | throws Exception 247 | { 248 | StringBuilder sb = new StringBuilder(); 249 | for (String line : lines) { 250 | sb.append(line).append("\n"); 251 | } 252 | 253 | ByteArrayInputStream in = new ByteArrayInputStream(sb.toString().getBytes()); 254 | return new InputStreamFileInput(runtime.getBufferAllocator(), provider(in)); 255 | } 256 | 257 | private InputStreamFileInput.IteratorProvider provider(InputStream... inputStreams) 258 | throws IOException 259 | { 260 | return new InputStreamFileInput.IteratorProvider( 261 | ImmutableList.copyOf(inputStreams)); 262 | } 263 | 264 | private SchemaConfig schema(ColumnConfig... columns) 265 | { 266 | return new SchemaConfig(Lists.newArrayList(columns)); 267 | } 268 | 269 | private ColumnConfig column(String name, Type type) 270 | { 271 | return column(name, type, config()); 272 | } 273 | 274 | private ColumnConfig column(String name, Type type, ConfigSource option) 275 | { 276 | return new ColumnConfig(name, type, option); 277 | } 278 | } 279 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/jsonl/cast/TestBooleanCast.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.jsonl.cast; 2 | 3 | import org.embulk.spi.DataException; 4 | import org.junit.Test; 5 | 6 | import static org.junit.Assert.assertEquals; 7 | import static org.junit.Assert.assertTrue; 8 | import static org.junit.Assert.fail; 9 | 10 | public class TestBooleanCast 11 | { 12 | @Test 13 | public void asBoolean() 14 | { 15 | assertEquals(true, BooleanCast.asBoolean(true)); 16 | assertEquals(false, BooleanCast.asBoolean(false)); 17 | } 18 | 19 | @Test 20 | public void asLong() 21 | { 22 | assertEquals(1, BooleanCast.asLong(true)); 23 | assertEquals(0, BooleanCast.asLong(false)); 24 | } 25 | 26 | @Test 27 | public void asDouble() 28 | { 29 | try { 30 | BooleanCast.asDouble(true); 31 | fail(); 32 | } 33 | catch (Throwable t) { 34 | assertTrue(t instanceof DataException); 35 | } 36 | } 37 | 38 | @Test 39 | public void asString() 40 | { 41 | assertEquals("true", BooleanCast.asString(true)); 42 | assertEquals("false", BooleanCast.asString(false)); 43 | } 44 | 45 | @Test 46 | public void asTimestamp() 47 | { 48 | try { 49 | BooleanCast.asTimestamp(true); 50 | fail(); 51 | } 52 | catch (Throwable t) { 53 | assertTrue(t instanceof DataException); 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/jsonl/cast/TestDoubleCast.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.jsonl.cast; 2 | 3 | import org.embulk.spi.DataException; 4 | import org.embulk.spi.time.Timestamp; 5 | import org.junit.Test; 6 | 7 | import static org.junit.Assert.assertEquals; 8 | import static org.junit.Assert.assertTrue; 9 | import static org.junit.Assert.fail; 10 | 11 | public class TestDoubleCast 12 | { 13 | @Test 14 | public void asBoolean() 15 | { 16 | try { 17 | DoubleCast.asBoolean(0.5); 18 | fail(); 19 | } 20 | catch (Throwable t) { 21 | assertTrue(t instanceof DataException); 22 | } 23 | } 24 | 25 | @Test 26 | public void asLong() 27 | { 28 | assertEquals(0, DoubleCast.asLong(0.5)); 29 | } 30 | 31 | @Test 32 | public void asDouble() 33 | { 34 | assertEquals(0.5, DoubleCast.asDouble(0.5), 0.0); 35 | } 36 | 37 | @Test 38 | public void asString() 39 | { 40 | assertEquals("0.5", DoubleCast.asString(0.5)); 41 | } 42 | 43 | @Test 44 | public void asTimestamp() 45 | { 46 | Timestamp expected = Timestamp.ofEpochSecond(1, 500000000); 47 | assertEquals(expected, DoubleCast.asTimestamp(1.5)); 48 | } 49 | } 50 | 51 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/jsonl/cast/TestJsonCast.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.jsonl.cast; 2 | 3 | import org.embulk.spi.DataException; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | import org.msgpack.value.Value; 7 | import org.msgpack.value.ValueFactory; 8 | 9 | import static org.junit.Assert.assertEquals; 10 | import static org.junit.Assert.assertTrue; 11 | import static org.junit.Assert.fail; 12 | 13 | public class TestJsonCast 14 | { 15 | public Value value; 16 | 17 | @Before 18 | public void createResource() 19 | { 20 | Value[] kvs = new Value[2]; 21 | kvs[0] = ValueFactory.newString("k"); 22 | kvs[1] = ValueFactory.newString("v"); 23 | value = ValueFactory.newMap(kvs); 24 | } 25 | 26 | @Test 27 | public void asBoolean() 28 | { 29 | try { 30 | JsonCast.asBoolean(value); 31 | fail(); 32 | } 33 | catch (Throwable t) { 34 | assertTrue(t instanceof DataException); 35 | } 36 | } 37 | 38 | @Test 39 | public void asLong() 40 | { 41 | try { 42 | JsonCast.asLong(value); 43 | fail(); 44 | } 45 | catch (Throwable t) { 46 | assertTrue(t instanceof DataException); 47 | } 48 | } 49 | 50 | @Test 51 | public void asDouble() 52 | { 53 | try { 54 | JsonCast.asDouble(value); 55 | fail(); 56 | } 57 | catch (Throwable t) { 58 | assertTrue(t instanceof DataException); 59 | } 60 | } 61 | 62 | @Test 63 | public void asString() 64 | { 65 | assertEquals("{\"k\":\"v\"}", JsonCast.asString(value)); 66 | } 67 | 68 | @Test 69 | public void asTimestamp() 70 | { 71 | try { 72 | JsonCast.asTimestamp(value); 73 | fail(); 74 | } 75 | catch (Throwable t) { 76 | assertTrue(t instanceof DataException); 77 | } 78 | } 79 | } 80 | 81 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/jsonl/cast/TestLongCast.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.jsonl.cast; 2 | 3 | import org.embulk.spi.time.Timestamp; 4 | import org.junit.Test; 5 | 6 | import static org.junit.Assert.assertEquals; 7 | 8 | public class TestLongCast 9 | { 10 | @Test 11 | public void asBoolean() 12 | { 13 | assertEquals(true, LongCast.asBoolean(1)); 14 | assertEquals(false, LongCast.asBoolean(0)); 15 | } 16 | 17 | @Test 18 | public void asLong() 19 | { 20 | assertEquals(1, LongCast.asLong(1)); 21 | } 22 | 23 | @Test 24 | public void asDouble() 25 | { 26 | assertEquals(1.0, LongCast.asDouble(1), 0.0); 27 | } 28 | 29 | @Test 30 | public void asString() 31 | { 32 | assertEquals("1", LongCast.asString(1)); 33 | } 34 | 35 | @Test 36 | public void asTimestamp() 37 | { 38 | Timestamp expected = Timestamp.ofEpochSecond(1); 39 | assertEquals(expected, LongCast.asTimestamp(1)); 40 | } 41 | } 42 | 43 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/jsonl/cast/TestStringCast.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.jsonl.cast; 2 | 3 | import org.embulk.EmbulkTestRuntime; 4 | import org.embulk.spi.DataException; 5 | import org.embulk.spi.time.Timestamp; 6 | import org.embulk.spi.time.TimestampParser; 7 | import org.joda.time.DateTimeZone; 8 | import org.jruby.embed.ScriptingContainer; 9 | import org.junit.Before; 10 | import org.junit.Rule; 11 | import org.junit.Test; 12 | 13 | import static org.junit.Assert.assertEquals; 14 | import static org.junit.Assert.assertTrue; 15 | import static org.junit.Assert.fail; 16 | 17 | public class TestStringCast 18 | { 19 | @Rule 20 | public EmbulkTestRuntime runtime = new EmbulkTestRuntime(); 21 | public ScriptingContainer jruby; 22 | 23 | @Before 24 | public void createResource() 25 | { 26 | jruby = new ScriptingContainer(); 27 | } 28 | 29 | @Test 30 | public void asBoolean() 31 | { 32 | for (String str : StringCast.TRUE_STRINGS) { 33 | assertEquals(true, StringCast.asBoolean(str)); 34 | } 35 | for (String str : StringCast.FALSE_STRINGS) { 36 | assertEquals(false, StringCast.asBoolean(str)); 37 | } 38 | try { 39 | StringCast.asBoolean("foo"); 40 | } 41 | catch (Throwable t) { 42 | assertTrue(t instanceof DataException); 43 | } 44 | } 45 | 46 | @Test 47 | public void asLong() 48 | { 49 | assertEquals(1, StringCast.asLong("1")); 50 | try { 51 | StringCast.asLong("1.5"); 52 | fail(); 53 | } 54 | catch (Throwable t) { 55 | assertTrue(t instanceof DataException); 56 | } 57 | try { 58 | StringCast.asLong("foo"); 59 | fail(); 60 | } 61 | catch (Throwable t) { 62 | assertTrue(t instanceof DataException); 63 | } 64 | } 65 | 66 | @Test 67 | public void asDouble() 68 | { 69 | assertEquals(1.0, StringCast.asDouble("1"), 0.0); 70 | assertEquals(1.5, StringCast.asDouble("1.5"), 0.0); 71 | try { 72 | StringCast.asDouble("foo"); 73 | fail(); 74 | } 75 | catch (Throwable t) { 76 | assertTrue(t instanceof DataException); 77 | } 78 | } 79 | 80 | @Test 81 | public void asString() 82 | { 83 | assertEquals("1", StringCast.asString("1")); 84 | assertEquals("1.5", StringCast.asString("1.5")); 85 | assertEquals("foo", StringCast.asString("foo")); 86 | } 87 | 88 | @Test 89 | public void asTimestamp() 90 | { 91 | Timestamp expected = Timestamp.ofEpochSecond(1463084053, 123456000); 92 | TimestampParser parser = new TimestampParser(jruby, "%Y-%m-%d %H:%M:%S.%N", DateTimeZone.UTC); 93 | assertEquals(expected, StringCast.asTimestamp("2016-05-12 20:14:13.123456", parser)); 94 | 95 | try { 96 | StringCast.asTimestamp("foo", parser); 97 | fail(); 98 | } 99 | catch (Throwable t) { 100 | assertTrue(t instanceof DataException); 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/test/resources/org/embulk/parser/jsonl/use_column_options.yml: -------------------------------------------------------------------------------- 1 | type: jsonl 2 | columns: 3 | - {name: "_c0", type: "string"} 4 | - {name: "_c1", type: "string"} 5 | - {name: "_c2", type: "string"} 6 | column_options: 7 | _c0: {type: "boolean"} 8 | _c1: {type: "long"} 9 | _c2: {type: "double"} 10 | --------------------------------------------------------------------------------