├── .gitignore ├── LICENSE.txt ├── README.md ├── build.gradle ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── lib └── embulk │ ├── guess │ └── poi_excel.rb │ └── parser │ └── poi_excel.rb └── src ├── main └── java │ └── org │ └── embulk │ └── parser │ └── poi_excel │ ├── PoiExcelColumnValueType.java │ ├── PoiExcelParserPlugin.java │ ├── bean │ ├── PoiExcelColumnBean.java │ ├── PoiExcelColumnIndex.java │ ├── PoiExcelSheetBean.java │ ├── record │ │ ├── PoiExcelRecord.java │ │ ├── PoiExcelRecordColumn.java │ │ ├── PoiExcelRecordRow.java │ │ ├── PoiExcelRecordSheet.java │ │ └── RecordType.java │ └── util │ │ ├── PoiExcelCellAddress.java │ │ └── SearchMergedCell.java │ └── visitor │ ├── AbstractPoiExcelCellAttributeVisitor.java │ ├── PoiExcelCellCommentVisitor.java │ ├── PoiExcelCellFontVisitor.java │ ├── PoiExcelCellStyleVisitor.java │ ├── PoiExcelCellTypeVisitor.java │ ├── PoiExcelCellValueVisitor.java │ ├── PoiExcelClientAnchorVisitor.java │ ├── PoiExcelColorVisitor.java │ ├── PoiExcelColumnVisitor.java │ ├── PoiExcelVisitorFactory.java │ ├── PoiExcelVisitorValue.java │ ├── embulk │ ├── BooleanCellVisitor.java │ ├── CellVisitor.java │ ├── DoubleCellVisitor.java │ ├── LongCellVisitor.java │ ├── StringCellVisitor.java │ └── TimestampCellVisitor.java │ └── util │ ├── MergedRegionFinder.java │ ├── MergedRegionList.java │ ├── MergedRegionMap.java │ └── MergedRegionNothing.java └── test ├── java └── org │ └── embulk │ └── parser │ ├── EmbulkPluginTester.java │ ├── EmbulkTestFileInputPlugin.java │ ├── EmbulkTestOutputPlugin.java │ ├── EmbulkTestParserConfig.java │ └── poi_excel │ ├── TestPoiExcelParserPlugin.java │ ├── TestPoiExcelParserPlugin_cellAddress.java │ ├── TestPoiExcelParserPlugin_cellComment.java │ ├── TestPoiExcelParserPlugin_cellError.java │ ├── TestPoiExcelParserPlugin_cellFont.java │ ├── TestPoiExcelParserPlugin_cellStyle.java │ ├── TestPoiExcelParserPlugin_cellType.java │ ├── TestPoiExcelParserPlugin_columnNumber.java │ ├── TestPoiExcelParserPlugin_constant.java │ ├── TestPoiExcelParserPlugin_convertError.java │ ├── TestPoiExcelParserPlugin_formula.java │ ├── TestPoiExcelParserPlugin_mergedCell.java │ ├── TestPoiExcelParserPlugin_recordType.java │ └── TestPoiExcelParserPlugin_sheets.java └── resources └── org └── embulk └── parser └── poi_excel ├── test1.xls └── test2.xlsx /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | /pkg/ 3 | /tmp/ 4 | *.gemspec 5 | .gradle/ 6 | /classpath/ 7 | build/ 8 | .idea 9 | .project 10 | .classpath 11 | .settings 12 | /bin 13 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | MIT License 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining 5 | a copy of this software and associated documentation files (the 6 | "Software"), to deal in the Software without restriction, including 7 | without limitation the rights to use, copy, modify, merge, publish, 8 | distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so, subject to 10 | the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 19 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Apache POI Excel parser plugin for Embulk 2 | 3 | Parses Microsoft Excel files(xls, xlsx) read by other file input plugins. 4 | This plugin uses Apache POI. 5 | 6 | ## Overview 7 | 8 | * **Plugin type**: parser 9 | * **Guess supported**: no 10 | * Embulk 0.9 or earlier (refer to https://github.com/hishidama/embulk-parser-excel-poi for 0.10 and later) 11 | 12 | 13 | ## Example 14 | 15 | ```yaml 16 | in: 17 | type: any file input plugin type 18 | parser: 19 | type: poi_excel 20 | sheets: ["DQ10-orb"] 21 | skip_header_lines: 1 # first row is header. 22 | columns: 23 | - {name: row, type: long, value: row_number} 24 | - {name: get_date, type: timestamp, cell_column: A, value: cell_value} 25 | - {name: orb_type, type: string} 26 | - {name: orb_name, type: string} 27 | - {name: orb_shape, type: long} 28 | - {name: drop_monster_name, type: string} 29 | ``` 30 | 31 | if omit **value**, specified `cell_value`. 32 | if omit **cell_column** when **value** is `cell_value`, specified next column. 33 | 34 | 35 | ## Configuration 36 | 37 | * **sheets**: sheet name. can use wildcards `*`, `?`. (list of string, required) 38 | * **record_type**: record type. (`row`, `column` or `sheet`. default: `row`) 39 | * **skip_header_lines**: skip rows when **record_type**=`row` (skip columns when **record_type**=`column`). ignored when **record_type**=`sheet`. (integer, default: `0`) 40 | * **columns**: column definition. see below. (hash, required) 41 | * **sheet_options**: sheet option. see below. (hash, default: null) 42 | 43 | ### columns 44 | 45 | * **name**: Embulk column name. (string, required) 46 | * **type**: Embulk column type. (string, required) 47 | * **value**: value type. see below. (string, default: `cell_value`) 48 | * **column_number**: same as **cell_column**. 49 | * **cell_column**: Excel column number. see below. (string, default: next column when **record_type**=`row`) 50 | * **cell_row**: Excel row number. see below. (integer, default: next row when **record_type**=`column`) 51 | * **cell_address**: Excel cell address such as `A1`, `Sheet1!B3`. (string, not required) 52 | * **numeric_format**: format of numeric(double) to string such as `%4.2f`. (default: Java's Double.toString()) 53 | * **attribute_name**: use with value `cell_style`, `cell_font`, etc. see below. (list of string) 54 | * **on_cell_error**: processing method of Cell error. see below. (string, default: `constant`) 55 | * **formula_handling**: processing method of formula. see below. (`evaluate` or `cashed_value`. default: `evaluate`) 56 | * **on_evaluate_error**: processing method of evaluate formula error. see below. (string, default: `exception`) 57 | * **formula_replace**: replace formula before evaluate. see below. 58 | * **on_convert_error**: processing method of convert error. see below. (string, default: `exception`) 59 | * **search_merged_cell**: search merged cell when cell is BLANK. (`none`, `linear_search`, `tree_search` or `hash_search`, default: `hash_search`) 60 | 61 | ### value 62 | 63 | * `cell_value`: value in cell. 64 | * `cell_formula`: formula in cell. (if cell is not formula, same `cell_value`.) 65 | * `cell_style`: all cell style attributes. returned json string. see **attribute_name**. (**type** required `string`) 66 | * `cell_font`: all cell font attributes. returned json string. see **attribute_name**. (**type** required `string`) 67 | * `cell_comment`: all cell comment attributes. returned json string. see **attribute_name**. (**type** required `string`) 68 | * `cell_type`: cell type. returned Cell.getCellType() of POI. 69 | * `cell_cached_type`: cell cached formula result type. returned Cell.getCachedFormulaResultType() of POI when CellType==FORMULA, otherwise same as `cell_type` (returned Cell.getCellType()). 70 | * `sheet_name`: sheet name. 71 | * `row_number`: row number(1 origin). 72 | * `column_number`: column number(1 origin). 73 | * `constant`: constant value. 74 | 75 | * `constant.`*value*: specified value. 76 | * `constant`: null. 77 | 78 | ### cell_column 79 | 80 | Basically used for **record_type**=`row`. 81 | 82 | * `A`,`B`,`C`,...: column number of "A1 format". 83 | * *number*: column number (1 origin). 84 | * `+`: next column. 85 | * `+`*name*: next column of name. 86 | * `+`*number*: number next column. 87 | * `-`: previous column. 88 | * `-`*name*: previous column of name. 89 | * `-`*number*: number previous column. 90 | * `=`: same column. 91 | * `=`*name*: same column of name. 92 | 93 | ### cell_row 94 | 95 | Basically used for **record_type**=`column`. 96 | 97 | * *number*: row number (1 origin). 98 | 99 | ### attribute_name 100 | 101 | **value**が`cell_style`, `cell_font`, `cell_comment`のとき、デフォルトでは、全属性を取得してJSON文字列に変換します。 102 | (JSON文字列を返すので、**type**は`string`である必要があります) 103 | 104 | ```yaml 105 | columns: 106 | - {name: foo, type: string, cell_column: A, value: cell_style} 107 | ``` 108 | 109 | 110 | attribute_nameを指定することで、指定された属性だけを取得してJSON文字列に変換します。 111 | 112 | * **attribute_name**: attribute names. (list of string) 113 | 114 | ```yaml 115 | columns: 116 | - {name: foo, type: string, cell_column: A, value: cell_style, attribute_name: [border_top, border_bottom, border_left, border_right]} 117 | ``` 118 | 119 | 120 | また、`cell_style`や`cell_font`の直後にピリオドを付けて属性名を指定することにより、その属性だけを取得することが出来ます。 121 | この場合はJSON文字列にはならず、属性の型に合う**type**を指定する必要があります。 122 | 123 | ```yaml 124 | columns: 125 | - {name: foo, type: long, value: cell_style.border} 126 | - {name: bar, type: long, value: cell_font.color} 127 | ``` 128 | 129 | なお、`cell_style`や`cell_font`では、**cell_column**を省略した場合は直前と同じ列を対象とします。 130 | (`cell_value`では、**cell_column**を省略すると次の列に移る) 131 | 132 | 133 | ### on_cell_error 134 | 135 | Processing method of Cell error (`#DIV/0!`, `#REF!`, etc). 136 | 137 | ```yaml 138 | columns: 139 | - {name: foo, type: string, cell_column: A, value: cell_value, on_cell_error: error_code} 140 | ``` 141 | 142 | * `constant`: set null. (default) 143 | * `constant.`*value*: set specified value. 144 | * `error_code`: set error code. 145 | * `exception`: throw exception. 146 | 147 | 148 | ### formula_handling 149 | 150 | Processing method of formula. 151 | 152 | ```yaml 153 | columns: 154 | - {name: foo, type: string, cell_column: A, value: cell_value, formula_handling: cashed_value} 155 | ``` 156 | 157 | * `evaluate`: evaluate formula. (default) 158 | * `cashed_value`: cashed value in cell. 159 | 160 | 161 | ### on_evaluate_error 162 | 163 | Processing method of evaluate formula error. 164 | 165 | ```yaml 166 | columns: 167 | - {name: foo, type: string, cell_column: A, value: cell_value, on_evaluate_error: constant} 168 | ``` 169 | 170 | * `constant`: set null. 171 | * `constant.`*value*: set specified value. 172 | * `exception`: throw exception. (default) 173 | 174 | 175 | ### formula_replace 176 | 177 | Replace formula before evaluate. 178 | 179 | ```yaml 180 | columns: 181 | - {name: foo, type: string, cell_column: A, value: cell_value, formula_replace: [{regex: aaa, to: "A${row}"}, {regex: bbb, to: "B${row}"}]} 182 | ``` 183 | 184 | `${row}` is replaced with the current row number. 185 | `${column}` is replaced with the current column string. 186 | 187 | 188 | ### on_convert_error 189 | 190 | Processing method of convert error. ex) Excel boolean to Embulk timestamp 191 | 192 | ```yaml 193 | columns: 194 | - {name: foo, type: timestamp, format: "%Y/%m/%d", cell_column: A, value: cell_value, on_convert_error: constant.9999/12/31} 195 | ``` 196 | 197 | * `constant`: set null. 198 | * `constant.`*value*: set specified value. 199 | * `exception`: throw exception. (default) 200 | 201 | 202 | ### sheet_options 203 | 204 | Options of individual sheet. 205 | 206 | ```yaml 207 | parser: 208 | type: poi_excel 209 | sheets: [Sheet1, Sheet2] 210 | columns: 211 | - {name: date, type: timestamp, cell_column: A} 212 | - {name: foo, type: string} 213 | - {name: bar, type: long} 214 | sheet_options: 215 | Sheet1: 216 | skip_header_lines: 1 217 | columns: 218 | foo: {cell_column: B} 219 | bar: {cell_column: C} 220 | Sheet2: 221 | skip_header_lines: 0 222 | columns: 223 | foo: {cell_column: D} 224 | bar: {value: constant.0} 225 | ``` 226 | 227 | **sheet_options** is map of sheet name. 228 | Map values are **skip_header_lines**, **columns**. 229 | 230 | **columns** is map of column name. 231 | Map values are same **columns** in **parser** (excluding `name`, `type`). 232 | 233 | 234 | ## Install 235 | 236 | ``` 237 | $ embulk gem install embulk-parser-poi_excel 238 | ``` 239 | 240 | 241 | ## Build 242 | 243 | ``` 244 | $ ./gradlew test 245 | $ ./gradlew package 246 | ``` 247 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id "com.jfrog.bintray" version "1.1" 3 | id "com.github.jruby-gradle.base" version "1.5.0" 4 | id "java" 5 | id "eclipse" 6 | } 7 | import com.github.jrubygradle.JRubyExec 8 | repositories { 9 | mavenCentral() 10 | jcenter() 11 | } 12 | configurations { 13 | provided 14 | } 15 | 16 | version = "0.1.12" 17 | 18 | sourceCompatibility = 1.7 19 | targetCompatibility = 1.7 20 | 21 | dependencies { 22 | compile "org.embulk:embulk-core:0.7.5" 23 | provided "org.embulk:embulk-core:0.7.5" 24 | compile "org.embulk:embulk-standards:0.7.5" 25 | // compile "YOUR_JAR_DEPENDENCY_GROUP:YOUR_JAR_DEPENDENCY_MODULE:YOUR_JAR_DEPENDENCY_VERSION" 26 | compile group: 'org.apache.poi', name : 'poi', version: '3.17' 27 | compile(group: 'org.apache.poi', name : 'poi-ooxml', version: '3.17') { 28 | exclude group: 'stax', module: 'stax-api' 29 | } 30 | testCompile "junit:junit:4.+" 31 | } 32 | 33 | task classpath(type: Copy, dependsOn: ["jar"]) { 34 | doFirst { file("classpath").deleteDir() } 35 | from (configurations.runtime - configurations.provided + files(jar.archivePath)) 36 | into "classpath" 37 | } 38 | clean { delete "classpath" } 39 | 40 | task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) { 41 | jrubyArgs "-S" 42 | script "gem" 43 | scriptArgs "build", "${project.name}.gemspec" 44 | doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") } 45 | } 46 | 47 | task gemPush(type: JRubyExec, dependsOn: ["gem"]) { 48 | jrubyArgs "-S" 49 | script "gem" 50 | scriptArgs "push", "pkg/${project.name}-${project.version}.gem" 51 | } 52 | 53 | task "package"(dependsOn: ["gemspec", "classpath"]) { 54 | doLast { 55 | println "> Build succeeded." 56 | println "> You can run embulk with '-L ${file(".").absolutePath}' argument." 57 | } 58 | } 59 | 60 | task gemUnpack(type: JRubyExec) { 61 | jrubyArgs "-S" 62 | script "gem" 63 | scriptArgs "unpack", "pkg/${project.name}-${project.version}.gem" 64 | } 65 | 66 | task gemspec { 67 | ext.gemspecFile = file("${project.name}.gemspec") 68 | inputs.file "build.gradle" 69 | outputs.file gemspecFile 70 | doLast { gemspecFile.write($/ 71 | Gem::Specification.new do |spec| 72 | spec.name = "${project.name}" 73 | spec.version = "${project.version}" 74 | spec.authors = ["hishidama"] 75 | spec.summary = %[Apache POI Excel parser plugin for Embulk] 76 | spec.description = %[Parses Excel files(xls, xlsx) read by other file input plugins.] 77 | spec.email = ["hishi.dama@asahi.email.ne.jp"] 78 | spec.licenses = ["MIT"] 79 | spec.homepage = "https://github.com/hishidama/embulk-parser-poi_excel" 80 | 81 | spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"] 82 | spec.test_files = spec.files.grep(%r"^(test|spec)/") 83 | spec.require_paths = ["lib"] 84 | 85 | #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION'] 86 | spec.add_development_dependency 'bundler', ['~> 1.0'] 87 | spec.add_development_dependency 'rake', ['>= 10.0'] 88 | end 89 | /$) 90 | } 91 | } 92 | clean { delete "${project.name}.gemspec" } 93 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hishidama/embulk-parser-poi_excel/2a279661543f0552b285ac37467b1c39d7f81cf6/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | zipStoreBase=GRADLE_USER_HOME 4 | zipStorePath=wrapper/dists 5 | distributionUrl=https\://services.gradle.org/distributions/gradle-4.1-bin.zip 6 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Attempt to set APP_HOME 10 | # Resolve links: $0 may be a link 11 | PRG="$0" 12 | # Need this for relative symlinks. 13 | while [ -h "$PRG" ] ; do 14 | ls=`ls -ld "$PRG"` 15 | link=`expr "$ls" : '.*-> \(.*\)$'` 16 | if expr "$link" : '/.*' > /dev/null; then 17 | PRG="$link" 18 | else 19 | PRG=`dirname "$PRG"`"/$link" 20 | fi 21 | done 22 | SAVED="`pwd`" 23 | cd "`dirname \"$PRG\"`/" >/dev/null 24 | APP_HOME="`pwd -P`" 25 | cd "$SAVED" >/dev/null 26 | 27 | APP_NAME="Gradle" 28 | APP_BASE_NAME=`basename "$0"` 29 | 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 31 | DEFAULT_JVM_OPTS="" 32 | 33 | # Use the maximum available, or set MAX_FD != -1 to use that value. 34 | MAX_FD="maximum" 35 | 36 | warn () { 37 | echo "$*" 38 | } 39 | 40 | die () { 41 | echo 42 | echo "$*" 43 | echo 44 | exit 1 45 | } 46 | 47 | # OS specific support (must be 'true' or 'false'). 48 | cygwin=false 49 | msys=false 50 | darwin=false 51 | nonstop=false 52 | case "`uname`" in 53 | CYGWIN* ) 54 | cygwin=true 55 | ;; 56 | Darwin* ) 57 | darwin=true 58 | ;; 59 | MINGW* ) 60 | msys=true 61 | ;; 62 | NONSTOP* ) 63 | nonstop=true 64 | ;; 65 | esac 66 | 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 68 | 69 | # Determine the Java command to use to start the JVM. 70 | if [ -n "$JAVA_HOME" ] ; then 71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 72 | # IBM's JDK on AIX uses strange locations for the executables 73 | JAVACMD="$JAVA_HOME/jre/sh/java" 74 | else 75 | JAVACMD="$JAVA_HOME/bin/java" 76 | fi 77 | if [ ! -x "$JAVACMD" ] ; then 78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 79 | 80 | Please set the JAVA_HOME variable in your environment to match the 81 | location of your Java installation." 82 | fi 83 | else 84 | JAVACMD="java" 85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 86 | 87 | Please set the JAVA_HOME variable in your environment to match the 88 | location of your Java installation." 89 | fi 90 | 91 | # Increase the maximum file descriptors if we can. 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 93 | MAX_FD_LIMIT=`ulimit -H -n` 94 | if [ $? -eq 0 ] ; then 95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 96 | MAX_FD="$MAX_FD_LIMIT" 97 | fi 98 | ulimit -n $MAX_FD 99 | if [ $? -ne 0 ] ; then 100 | warn "Could not set maximum file descriptor limit: $MAX_FD" 101 | fi 102 | else 103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 104 | fi 105 | fi 106 | 107 | # For Darwin, add options to specify how the application appears in the dock 108 | if $darwin; then 109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 110 | fi 111 | 112 | # For Cygwin, switch paths to Windows format before running java 113 | if $cygwin ; then 114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 116 | JAVACMD=`cygpath --unix "$JAVACMD"` 117 | 118 | # We build the pattern for arguments to be converted via cygpath 119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 120 | SEP="" 121 | for dir in $ROOTDIRSRAW ; do 122 | ROOTDIRS="$ROOTDIRS$SEP$dir" 123 | SEP="|" 124 | done 125 | OURCYGPATTERN="(^($ROOTDIRS))" 126 | # Add a user-defined pattern to the cygpath arguments 127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 129 | fi 130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 131 | i=0 132 | for arg in "$@" ; do 133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 135 | 136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 138 | else 139 | eval `echo args$i`="\"$arg\"" 140 | fi 141 | i=$((i+1)) 142 | done 143 | case $i in 144 | (0) set -- ;; 145 | (1) set -- "$args0" ;; 146 | (2) set -- "$args0" "$args1" ;; 147 | (3) set -- "$args0" "$args1" "$args2" ;; 148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 154 | esac 155 | fi 156 | 157 | # Escape application args 158 | save () { 159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 160 | echo " " 161 | } 162 | APP_ARGS=$(save "$@") 163 | 164 | # Collect all arguments for the java command, following the shell quoting and substitution rules 165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 166 | 167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 169 | cd "$(dirname "$0")" 170 | fi 171 | 172 | exec "$JAVACMD" "$@" 173 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS= 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | 53 | :win9xME_args 54 | @rem Slurp the command line arguments. 55 | set CMD_LINE_ARGS= 56 | set _SKIP=2 57 | 58 | :win9xME_args_slurp 59 | if "x%~1" == "x" goto execute 60 | 61 | set CMD_LINE_ARGS=%* 62 | 63 | :execute 64 | @rem Setup the command line 65 | 66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 67 | 68 | @rem Execute Gradle 69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 70 | 71 | :end 72 | @rem End local scope for the variables with windows NT shell 73 | if "%ERRORLEVEL%"=="0" goto mainEnd 74 | 75 | :fail 76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 77 | rem the _cmd.exe /c_ return code! 78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 79 | exit /b 1 80 | 81 | :mainEnd 82 | if "%OS%"=="Windows_NT" endlocal 83 | 84 | :omega 85 | -------------------------------------------------------------------------------- /lib/embulk/guess/poi_excel.rb: -------------------------------------------------------------------------------- 1 | module Embulk 2 | module Guess 3 | 4 | # TODO implement guess plugin to make this command work: 5 | # $ embulk guess -g "poi_excel" partial-config.yml 6 | # 7 | # Depending on the file format the plugin uses, you can use choose 8 | # one of binary guess (GuessPlugin), text guess (TextGuessPlugin), 9 | # or line guess (LineGuessPlugin). 10 | 11 | #class PoiExcel < GuessPlugin 12 | # Plugin.register_guess("poi_excel", self) 13 | # 14 | # def guess(config, sample_buffer) 15 | # if sample_buffer[0,2] == GZIP_HEADER 16 | # guessed = {} 17 | # guessed["type"] = "poi_excel" 18 | # guessed["property1"] = "guessed-value" 19 | # return {"parser" => guessed} 20 | # else 21 | # return {} 22 | # end 23 | # end 24 | #end 25 | 26 | #class PoiExcel < TextGuessPlugin 27 | # Plugin.register_guess("poi_excel", self) 28 | # 29 | # def guess_text(config, sample_text) 30 | # js = JSON.parse(sample_text) rescue nil 31 | # if js && js["mykeyword"] == "keyword" 32 | # guessed = {} 33 | # guessed["type"] = "poi_excel" 34 | # guessed["property1"] = "guessed-value" 35 | # return {"parser" => guessed} 36 | # else 37 | # return {} 38 | # end 39 | # end 40 | #end 41 | 42 | #class PoiExcel < LineGuessPlugin 43 | # Plugin.register_guess("poi_excel", self) 44 | # 45 | # def guess_lines(config, sample_lines) 46 | # all_line_matched = sample_lines.all? do |line| 47 | # line =~ /mypattern/ 48 | # end 49 | # if all_line_matched 50 | # guessed = {} 51 | # guessed["type"] = "poi_excel" 52 | # guessed["property1"] = "guessed-value" 53 | # return {"parser" => guessed} 54 | # else 55 | # return {} 56 | # end 57 | # end 58 | #end 59 | 60 | end 61 | end 62 | -------------------------------------------------------------------------------- /lib/embulk/parser/poi_excel.rb: -------------------------------------------------------------------------------- 1 | Embulk::JavaPlugin.register_parser( 2 | "poi_excel", "org.embulk.parser.poi_excel.PoiExcelParserPlugin", 3 | File.expand_path('../../../../classpath', __FILE__)) 4 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/PoiExcelColumnValueType.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel; 2 | 3 | import org.embulk.parser.poi_excel.bean.record.RecordType; 4 | 5 | public enum PoiExcelColumnValueType { 6 | /** cell value */ 7 | CELL_VALUE(true, true), 8 | /** cell formula */ 9 | CELL_FORMULA(true, true), 10 | /** cell style */ 11 | CELL_STYLE(true, false), 12 | /** cell font */ 13 | CELL_FONT(true, false), 14 | /** cell comment */ 15 | CELL_COMMENT(true, false), 16 | /** cell type */ 17 | CELL_TYPE(true, false), 18 | /** cell CachedFormulaResultType */ 19 | CELL_CACHED_TYPE(true, false), 20 | /** sheet name */ 21 | SHEET_NAME(false, false), 22 | /** row number (1 origin) */ 23 | ROW_NUMBER(false, false) { 24 | @Override 25 | public boolean useCell(RecordType recordType) { 26 | if (recordType == RecordType.COLUMN) { 27 | return true; 28 | } 29 | return super.useCell(recordType); 30 | } 31 | }, 32 | /** column number (1 origin) */ 33 | COLUMN_NUMBER(true, false) { 34 | @Override 35 | public boolean useCell(RecordType recordType) { 36 | if (recordType == RecordType.ROW) { 37 | return true; 38 | } 39 | return super.useCell(recordType); 40 | } 41 | }, 42 | /** constant */ 43 | CONSTANT(false, false); 44 | 45 | private final boolean useCell; 46 | private final boolean nextIndex; 47 | 48 | PoiExcelColumnValueType(boolean useCell, boolean nextIndex) { 49 | this.useCell = useCell; 50 | this.nextIndex = nextIndex; 51 | } 52 | 53 | public boolean useCell(RecordType recordType) { 54 | return useCell; 55 | } 56 | 57 | public boolean nextIndex() { 58 | return nextIndex; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/PoiExcelParserPlugin.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.LinkedHashSet; 6 | import java.util.List; 7 | import java.util.Map; 8 | import java.util.Set; 9 | import java.util.regex.Pattern; 10 | 11 | import org.apache.poi.EncryptedDocumentException; 12 | import org.apache.poi.openxml4j.exceptions.InvalidFormatException; 13 | import org.apache.poi.ss.usermodel.Sheet; 14 | import org.apache.poi.ss.usermodel.Workbook; 15 | import org.apache.poi.ss.usermodel.WorkbookFactory; 16 | import org.embulk.config.Config; 17 | import org.embulk.config.ConfigDefault; 18 | import org.embulk.config.ConfigException; 19 | import org.embulk.config.ConfigSource; 20 | import org.embulk.config.Task; 21 | import org.embulk.config.TaskSource; 22 | import org.embulk.parser.poi_excel.bean.PoiExcelSheetBean; 23 | import org.embulk.parser.poi_excel.bean.record.PoiExcelRecord; 24 | import org.embulk.parser.poi_excel.visitor.PoiExcelColumnVisitor; 25 | import org.embulk.parser.poi_excel.visitor.PoiExcelVisitorFactory; 26 | import org.embulk.parser.poi_excel.visitor.PoiExcelVisitorValue; 27 | import org.embulk.spi.Exec; 28 | import org.embulk.spi.FileInput; 29 | import org.embulk.spi.PageBuilder; 30 | import org.embulk.spi.PageOutput; 31 | import org.embulk.spi.ParserPlugin; 32 | import org.embulk.spi.Schema; 33 | import org.embulk.spi.SchemaConfig; 34 | import org.embulk.spi.time.TimestampParser; 35 | import org.embulk.spi.util.FileInputInputStream; 36 | import org.slf4j.Logger; 37 | 38 | import com.google.common.base.Optional; 39 | import com.ibm.icu.text.MessageFormat; 40 | 41 | public class PoiExcelParserPlugin implements ParserPlugin { 42 | private final Logger log = Exec.getLogger(getClass()); 43 | 44 | public static final String TYPE = "poi_excel"; 45 | 46 | public interface PluginTask extends Task, TimestampParser.Task, SheetCommonOptionTask { 47 | @Config("sheet") 48 | @ConfigDefault("null") 49 | public Optional getSheet(); 50 | 51 | @Config("sheets") 52 | @ConfigDefault("[]") 53 | public List getSheets(); 54 | 55 | @Config("ignore_sheet_not_found") 56 | @ConfigDefault("false") 57 | public boolean getIgnoreSheetNotFound(); 58 | 59 | @Config("sheet_options") 60 | @ConfigDefault("{}") 61 | public Map getSheetOptions(); 62 | 63 | @Config("columns") 64 | public SchemaConfig getColumns(); 65 | 66 | @Config("flush_count") 67 | @ConfigDefault("100") 68 | public int getFlushCount(); 69 | } 70 | 71 | public interface SheetCommonOptionTask extends Task, ColumnCommonOptionTask { 72 | 73 | @Config("record_type") 74 | @ConfigDefault("null") 75 | public Optional getRecordType(); 76 | 77 | @Config("skip_header_lines") 78 | @ConfigDefault("null") 79 | public Optional getSkipHeaderLines(); 80 | } 81 | 82 | public interface SheetOptionTask extends Task, SheetCommonOptionTask { 83 | 84 | @Config("columns") 85 | @ConfigDefault("null") 86 | public Optional> getColumns(); 87 | } 88 | 89 | public interface ColumnOptionTask extends Task, ColumnCommonOptionTask { 90 | 91 | /** 92 | * @see PoiExcelColumnValueType 93 | * @return value_type 94 | */ 95 | @Config("value") 96 | @ConfigDefault("null") 97 | public Optional getValueType(); 98 | 99 | // same as cell_column 100 | @Config("column_number") 101 | @ConfigDefault("null") 102 | public Optional getColumnNumber(); 103 | 104 | public static final String CELL_COLUMN = "cell_column"; 105 | 106 | // A,B,... or number(1 origin) 107 | @Config(CELL_COLUMN) 108 | @ConfigDefault("null") 109 | public Optional getCellColumn(); 110 | 111 | public static final String CELL_ROW = "cell_row"; 112 | 113 | // number(1 origin) 114 | @Config(CELL_ROW) 115 | @ConfigDefault("null") 116 | public Optional getCellRow(); 117 | 118 | // A1,B2,... or Sheet1!A1 119 | @Config("cell_address") 120 | @ConfigDefault("null") 121 | public Optional getCellAddress(); 122 | 123 | // use when value_type=cell_style, cell_font, ... 124 | @Config("attribute_name") 125 | @ConfigDefault("null") 126 | public Optional> getAttributeName(); 127 | } 128 | 129 | public interface ColumnCommonOptionTask extends Task { 130 | // format of numeric(double) to string 131 | @Config("numeric_format") 132 | @ConfigDefault("null") 133 | public Optional getNumericFormat(); 134 | 135 | // search merged cell if cellType=BLANK 136 | @Config("search_merged_cell") 137 | @ConfigDefault("null") 138 | public Optional getSearchMergedCell(); 139 | 140 | @Config("formula_handling") 141 | @ConfigDefault("null") 142 | public Optional getFormulaHandling(); 143 | 144 | @Config("formula_replace") 145 | @ConfigDefault("null") 146 | public Optional> getFormulaReplace(); 147 | 148 | @Config("on_evaluate_error") 149 | @ConfigDefault("null") 150 | public Optional getOnEvaluateError(); 151 | 152 | @Config("on_cell_error") 153 | @ConfigDefault("null") 154 | public Optional getOnCellError(); 155 | 156 | @Config("on_convert_error") 157 | @ConfigDefault("null") 158 | public Optional getOnConvertError(); 159 | } 160 | 161 | public interface FormulaReplaceTask extends Task { 162 | 163 | @Config("regex") 164 | public String getRegex(); 165 | 166 | // replace string 167 | // can use variable: "${row}" 168 | @Config("to") 169 | public String getTo(); 170 | } 171 | 172 | @Override 173 | public void transaction(ConfigSource config, ParserPlugin.Control control) { 174 | PluginTask task = config.loadConfig(PluginTask.class); 175 | 176 | Schema schema = task.getColumns().toSchema(); 177 | 178 | control.run(task.dump(), schema); 179 | } 180 | 181 | @Override 182 | public void run(TaskSource taskSource, Schema schema, FileInput input, PageOutput output) { 183 | PluginTask task = taskSource.loadTask(PluginTask.class); 184 | 185 | List sheetNames = new ArrayList<>(); 186 | Optional sheetOption = task.getSheet(); 187 | if (sheetOption.isPresent()) { 188 | sheetNames.add(sheetOption.get()); 189 | } 190 | sheetNames.addAll(task.getSheets()); 191 | if (sheetNames.isEmpty()) { 192 | throw new ConfigException("Attribute sheets is required but not set"); 193 | } 194 | 195 | try (FileInputInputStream is = new FileInputInputStream(input)) { 196 | while (is.nextFile()) { 197 | Workbook workbook; 198 | try { 199 | workbook = WorkbookFactory.create(is); 200 | } catch (IOException | EncryptedDocumentException | InvalidFormatException e) { 201 | throw new RuntimeException(e); 202 | } 203 | 204 | List list = resolveSheetName(workbook, sheetNames); 205 | if (log.isDebugEnabled()) { 206 | log.debug("resolved sheet names={}", list); 207 | } 208 | run(task, schema, workbook, list, output); 209 | } 210 | } 211 | } 212 | 213 | private List resolveSheetName(Workbook workbook, List sheetNames) { 214 | Set set = new LinkedHashSet<>(); 215 | for (String s : sheetNames) { 216 | if (s.contains("*") || s.contains("?")) { 217 | int length = s.length(); 218 | StringBuilder sb = new StringBuilder(length * 2); 219 | StringBuilder buf = new StringBuilder(32); 220 | for (int i = 0; i < length;) { 221 | int c = s.codePointAt(i); 222 | switch (c) { 223 | case '*': 224 | if (buf.length() > 0) { 225 | sb.append(Pattern.quote(buf.toString())); 226 | buf.setLength(0); 227 | } 228 | sb.append(".*"); 229 | break; 230 | case '?': 231 | if (buf.length() > 0) { 232 | sb.append(Pattern.quote(buf.toString())); 233 | buf.setLength(0); 234 | } 235 | sb.append("."); 236 | break; 237 | default: 238 | buf.appendCodePoint(c); 239 | break; 240 | } 241 | i += Character.charCount(c); 242 | } 243 | if (buf.length() > 0) { 244 | sb.append(Pattern.quote(buf.toString())); 245 | } 246 | String regex = sb.toString(); 247 | for (Sheet sheet : workbook) { 248 | String name = sheet.getSheetName(); 249 | if (name.matches(regex)) { 250 | set.add(name); 251 | } 252 | } 253 | } else { 254 | set.add(s); 255 | } 256 | } 257 | return new ArrayList<>(set); 258 | } 259 | 260 | protected void run(PluginTask task, Schema schema, Workbook workbook, List sheetNames, PageOutput output) { 261 | final int flushCount = task.getFlushCount(); 262 | 263 | try (PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) { 264 | for (String sheetName : sheetNames) { 265 | Sheet sheet = workbook.getSheet(sheetName); 266 | if (sheet == null) { 267 | if (task.getIgnoreSheetNotFound()) { 268 | log.info("ignore: not found sheet={}", sheetName); 269 | continue; 270 | } else { 271 | throw new RuntimeException(MessageFormat.format("not found sheet={0}", sheetName)); 272 | } 273 | } 274 | 275 | log.info("sheet={}", sheetName); 276 | PoiExcelVisitorFactory factory = newPoiExcelVisitorFactory(task, schema, sheet, pageBuilder); 277 | PoiExcelColumnVisitor visitor = factory.getPoiExcelColumnVisitor(); 278 | PoiExcelSheetBean sheetBean = factory.getVisitorValue().getSheetBean(); 279 | final int skipHeaderLines = sheetBean.getSkipHeaderLines(); 280 | 281 | PoiExcelRecord record = sheetBean.getRecordType().newPoiExcelRecord(); 282 | record.initialize(sheet, skipHeaderLines); 283 | visitor.setRecord(record); 284 | 285 | int count = 0; 286 | for (; record.exists(); record.moveNext()) { 287 | record.logStart(); 288 | 289 | schema.visitColumns(visitor); // use record 290 | pageBuilder.addRecord(); 291 | 292 | if (++count >= flushCount) { 293 | log.trace("flush"); 294 | pageBuilder.flush(); 295 | count = 0; 296 | } 297 | 298 | record.logEnd(); 299 | } 300 | pageBuilder.flush(); 301 | } 302 | pageBuilder.finish(); 303 | } 304 | } 305 | 306 | protected PoiExcelVisitorFactory newPoiExcelVisitorFactory(PluginTask task, Schema schema, Sheet sheet, 307 | PageBuilder pageBuilder) { 308 | PoiExcelVisitorValue visitorValue = new PoiExcelVisitorValue(task, schema, sheet, pageBuilder); 309 | return new PoiExcelVisitorFactory(visitorValue); 310 | } 311 | } 312 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/bean/PoiExcelSheetBean.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.bean; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | import java.util.Map.Entry; 8 | 9 | import org.apache.poi.ss.usermodel.Sheet; 10 | import org.embulk.parser.poi_excel.PoiExcelParserPlugin.ColumnOptionTask; 11 | import org.embulk.parser.poi_excel.PoiExcelParserPlugin.PluginTask; 12 | import org.embulk.parser.poi_excel.PoiExcelParserPlugin.SheetCommonOptionTask; 13 | import org.embulk.parser.poi_excel.PoiExcelParserPlugin.SheetOptionTask; 14 | import org.embulk.parser.poi_excel.bean.record.RecordType; 15 | import org.embulk.spi.Column; 16 | import org.embulk.spi.ColumnConfig; 17 | import org.embulk.spi.Schema; 18 | 19 | import com.google.common.base.Optional; 20 | 21 | public class PoiExcelSheetBean { 22 | 23 | protected final Sheet sheet; 24 | 25 | private final List sheetTaskList = new ArrayList<>(2); 26 | 27 | private final List columnBeanList = new ArrayList<>(); 28 | 29 | public PoiExcelSheetBean(PluginTask task, Schema schema, Sheet sheet) { 30 | this.sheet = sheet; 31 | 32 | initializeSheetTask(task); 33 | initializeColumnBean(task, schema); 34 | } 35 | 36 | private void initializeSheetTask(PluginTask task) { 37 | String name = sheet.getSheetName(); 38 | Map map = task.getSheetOptions(); 39 | SheetOptionTask s = map.get(name); 40 | if (s != null) { 41 | sheetTaskList.add(s); 42 | } else { 43 | loop: for (Entry entry : map.entrySet()) { 44 | String[] ss = entry.getKey().split("/"); 45 | for (String key : ss) { 46 | if (key.trim().equalsIgnoreCase(name)) { 47 | sheetTaskList.add(entry.getValue()); 48 | break loop; 49 | } 50 | } 51 | } 52 | } 53 | sheetTaskList.add(task); 54 | } 55 | 56 | private void initializeColumnBean(PluginTask task, Schema schema) { 57 | List list = task.getColumns().getColumns(); 58 | 59 | Map map = new HashMap<>(); 60 | List slist = getSheetOption(); 61 | for (int i = slist.size() - 1; i >= 0; i--) { 62 | SheetCommonOptionTask s = slist.get(i); 63 | if (s instanceof SheetOptionTask) { 64 | Optional> option = ((SheetOptionTask) s).getColumns(); 65 | if (option.isPresent()) { 66 | map.putAll(option.get()); 67 | } 68 | } 69 | } 70 | 71 | for (Column column : schema.getColumns()) { 72 | String name = column.getName(); 73 | ColumnConfig c = list.get(column.getIndex()); 74 | ColumnOptionTask t = c.getOption().loadConfig(ColumnOptionTask.class); 75 | PoiExcelColumnBean bean = new PoiExcelColumnBean(this, column, t, map.get(name)); 76 | columnBeanList.add(bean); 77 | } 78 | 79 | new PoiExcelColumnIndex(this).initializeColumnIndex(task, columnBeanList); 80 | } 81 | 82 | public final List getSheetOption() { 83 | return sheetTaskList; 84 | } 85 | 86 | public RecordType getRecordType() { 87 | List list = getSheetOption(); 88 | for (SheetCommonOptionTask sheetTask : list) { 89 | Optional value = sheetTask.getRecordType(); 90 | if (value.isPresent()) { 91 | return RecordType.of(value.get()); 92 | } 93 | } 94 | return RecordType.ROW; 95 | } 96 | 97 | public int getSkipHeaderLines() { 98 | List list = getSheetOption(); 99 | for (SheetCommonOptionTask sheetTask : list) { 100 | Optional value = sheetTask.getSkipHeaderLines(); 101 | if (value.isPresent()) { 102 | return value.get(); 103 | } 104 | } 105 | return 0; 106 | } 107 | 108 | public final List getColumnBeans() { 109 | return columnBeanList; 110 | } 111 | 112 | public final PoiExcelColumnBean getColumnBean(Column column) { 113 | List list = getColumnBeans(); 114 | return list.get(column.getIndex()); 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecord.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.bean.record; 2 | 3 | import org.apache.poi.ss.usermodel.Cell; 4 | import org.apache.poi.ss.usermodel.Sheet; 5 | import org.apache.poi.ss.util.CellReference; 6 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean; 7 | 8 | public abstract class PoiExcelRecord { 9 | 10 | // loop record 11 | 12 | private Sheet sheet; 13 | 14 | public final void initialize(Sheet sheet, int skipHeaderLines) { 15 | this.sheet = sheet; 16 | initializeLoop(skipHeaderLines); 17 | } 18 | 19 | protected abstract void initializeLoop(int skipHeaderLines); 20 | 21 | public final Sheet getSheet() { 22 | return sheet; 23 | } 24 | 25 | public abstract boolean exists(); 26 | 27 | public abstract void moveNext(); 28 | 29 | // current record 30 | 31 | public final void logStart() { 32 | logStartEnd("start"); 33 | } 34 | 35 | public final void logEnd() { 36 | logStartEnd("end"); 37 | } 38 | 39 | protected abstract void logStartEnd(String part); 40 | 41 | public abstract int getRowIndex(PoiExcelColumnBean bean); 42 | 43 | public abstract int getColumnIndex(PoiExcelColumnBean bean); 44 | 45 | public abstract Cell getCell(PoiExcelColumnBean bean); 46 | 47 | public CellReference getCellReference(PoiExcelColumnBean bean) { 48 | int rowIndex = getRowIndex(bean); 49 | int columnIndex = getColumnIndex(bean); 50 | return new CellReference(rowIndex, columnIndex); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecordColumn.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.bean.record; 2 | 3 | import org.apache.poi.ss.usermodel.Cell; 4 | import org.apache.poi.ss.usermodel.Row; 5 | import org.apache.poi.ss.usermodel.Sheet; 6 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean; 7 | import org.embulk.spi.Exec; 8 | import org.slf4j.Logger; 9 | 10 | public class PoiExcelRecordColumn extends PoiExcelRecord { 11 | private final Logger log = Exec.getLogger(getClass()); 12 | 13 | private int maxColumnIndex; 14 | private int currentColumnIndex; 15 | 16 | @Override 17 | protected void initializeLoop(int skipHeaderLines) { 18 | int minColumnIndex = Integer.MAX_VALUE; 19 | maxColumnIndex = 0; 20 | Sheet sheet = getSheet(); 21 | for (Row row : sheet) { 22 | int firstIndex = row.getFirstCellNum(); 23 | if (firstIndex >= 0) { 24 | minColumnIndex = Math.min(minColumnIndex, firstIndex); 25 | } 26 | maxColumnIndex = Math.max(maxColumnIndex, row.getLastCellNum()); 27 | } 28 | 29 | this.currentColumnIndex = maxColumnIndex; 30 | for (int i = minColumnIndex; i < maxColumnIndex; i++) { 31 | if (i < skipHeaderLines) { 32 | if (log.isDebugEnabled()) { 33 | log.debug("column({}) skipped", i); 34 | } 35 | continue; 36 | } 37 | 38 | this.currentColumnIndex = i; 39 | break; 40 | } 41 | } 42 | 43 | @Override 44 | public boolean exists() { 45 | return currentColumnIndex < maxColumnIndex; 46 | } 47 | 48 | @Override 49 | public void moveNext() { 50 | currentColumnIndex++; 51 | } 52 | 53 | @Override 54 | protected void logStartEnd(String part) { 55 | if (log.isDebugEnabled()) { 56 | log.debug("column({}) {}", currentColumnIndex, part); 57 | } 58 | } 59 | 60 | @Override 61 | public int getRowIndex(PoiExcelColumnBean bean) { 62 | return bean.getColumnIndex(); 63 | } 64 | 65 | @Override 66 | public int getColumnIndex(PoiExcelColumnBean bean) { 67 | return currentColumnIndex; 68 | } 69 | 70 | @Override 71 | public Cell getCell(PoiExcelColumnBean bean) { 72 | int rowIndex = getRowIndex(bean); 73 | Row row = getSheet().getRow(rowIndex); 74 | if (row == null) { 75 | return null; 76 | } 77 | int columnIndex = getColumnIndex(bean); 78 | return row.getCell(columnIndex); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecordRow.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.bean.record; 2 | 3 | import java.util.Iterator; 4 | 5 | import org.apache.poi.ss.usermodel.Cell; 6 | import org.apache.poi.ss.usermodel.Row; 7 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean; 8 | import org.embulk.spi.Exec; 9 | import org.slf4j.Logger; 10 | 11 | public class PoiExcelRecordRow extends PoiExcelRecord { 12 | private final Logger log = Exec.getLogger(getClass()); 13 | 14 | private Iterator rowIterator; 15 | private Row currentRow; 16 | 17 | @Override 18 | protected void initializeLoop(int skipHeaderLines) { 19 | this.rowIterator = getSheet().iterator(); 20 | this.currentRow = null; 21 | while (rowIterator.hasNext()) { 22 | Row row = rowIterator.next(); 23 | 24 | int rowIndex = row.getRowNum(); 25 | if (rowIndex < skipHeaderLines) { 26 | if (log.isDebugEnabled()) { 27 | log.debug("row({}) skipped", rowIndex); 28 | } 29 | continue; 30 | } 31 | 32 | this.currentRow = row; 33 | break; 34 | } 35 | } 36 | 37 | @Override 38 | public boolean exists() { 39 | return currentRow != null; 40 | } 41 | 42 | @Override 43 | public void moveNext() { 44 | if (rowIterator.hasNext()) { 45 | this.currentRow = rowIterator.next(); 46 | } else { 47 | this.currentRow = null; 48 | } 49 | } 50 | 51 | @Override 52 | protected void logStartEnd(String part) { 53 | assert currentRow != null; 54 | if (log.isDebugEnabled()) { 55 | log.debug("row({}) {}", currentRow.getRowNum(), part); 56 | } 57 | } 58 | 59 | @Override 60 | public int getRowIndex(PoiExcelColumnBean bean) { 61 | assert currentRow != null; 62 | return currentRow.getRowNum(); 63 | } 64 | 65 | @Override 66 | public int getColumnIndex(PoiExcelColumnBean bean) { 67 | return bean.getColumnIndex(); 68 | } 69 | 70 | @Override 71 | public Cell getCell(PoiExcelColumnBean bean) { 72 | assert currentRow != null; 73 | int columnIndex = getColumnIndex(bean); 74 | return currentRow.getCell(columnIndex); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/bean/record/PoiExcelRecordSheet.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.bean.record; 2 | 3 | import org.apache.poi.ss.usermodel.Cell; 4 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean; 5 | import org.embulk.spi.Exec; 6 | import org.slf4j.Logger; 7 | 8 | public class PoiExcelRecordSheet extends PoiExcelRecord { 9 | private final Logger log = Exec.getLogger(getClass()); 10 | 11 | private boolean exists; 12 | 13 | @Override 14 | protected void initializeLoop(int skipHeaderLines) { 15 | this.exists = true; 16 | } 17 | 18 | @Override 19 | public boolean exists() { 20 | return exists; 21 | } 22 | 23 | @Override 24 | public void moveNext() { 25 | this.exists = false; 26 | } 27 | 28 | @Override 29 | protected void logStartEnd(String part) { 30 | if (log.isDebugEnabled()) { 31 | log.debug("sheet({}) {}", getSheet().getSheetName(), part); 32 | } 33 | } 34 | 35 | @Override 36 | public int getRowIndex(PoiExcelColumnBean bean) { 37 | throw new UnsupportedOperationException("unsupported at record_type=sheet"); 38 | } 39 | 40 | @Override 41 | public int getColumnIndex(PoiExcelColumnBean bean) { 42 | throw new UnsupportedOperationException("unsupported at record_type=sheet"); 43 | } 44 | 45 | @Override 46 | public Cell getCell(PoiExcelColumnBean bean) { 47 | throw new UnsupportedOperationException("unsupported at record_type=sheet"); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/bean/record/RecordType.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.bean.record; 2 | 3 | import java.text.MessageFormat; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | 7 | import org.embulk.config.ConfigException; 8 | import org.embulk.parser.poi_excel.PoiExcelParserPlugin.ColumnOptionTask; 9 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean; 10 | 11 | import com.google.common.base.Optional; 12 | 13 | public enum RecordType { 14 | ROW { 15 | @Override 16 | public Optional getRecordOption(PoiExcelColumnBean bean) { 17 | return bean.getRowNumber(); 18 | } 19 | 20 | @Override 21 | public String getRecordOptionName() { 22 | return ColumnOptionTask.CELL_ROW; 23 | } 24 | 25 | @Override 26 | public Optional getNumberOption(PoiExcelColumnBean bean) { 27 | return bean.getColumnNumber(); 28 | } 29 | 30 | @Override 31 | public String getNumberOptionName() { 32 | return ColumnOptionTask.CELL_COLUMN; 33 | } 34 | 35 | @Override 36 | public PoiExcelRecord newPoiExcelRecord() { 37 | return new PoiExcelRecordRow(); 38 | } 39 | }, 40 | COLUMN { 41 | @Override 42 | public Optional getRecordOption(PoiExcelColumnBean bean) { 43 | return bean.getColumnNumber(); 44 | } 45 | 46 | @Override 47 | public String getRecordOptionName() { 48 | return ColumnOptionTask.CELL_COLUMN; 49 | } 50 | 51 | @Override 52 | public Optional getNumberOption(PoiExcelColumnBean bean) { 53 | return bean.getRowNumber(); 54 | } 55 | 56 | @Override 57 | public String getNumberOptionName() { 58 | return ColumnOptionTask.CELL_ROW; 59 | } 60 | 61 | @Override 62 | public PoiExcelRecord newPoiExcelRecord() { 63 | return new PoiExcelRecordColumn(); 64 | } 65 | }, 66 | SHEET { 67 | @Override 68 | public Optional getRecordOption(PoiExcelColumnBean bean) { 69 | return Optional.absent(); 70 | } 71 | 72 | @Override 73 | public String getRecordOptionName() { 74 | return "-"; 75 | } 76 | 77 | @Override 78 | public Optional getNumberOption(PoiExcelColumnBean bean) { 79 | return Optional.absent(); 80 | } 81 | 82 | @Override 83 | public String getNumberOptionName() { 84 | return "-"; 85 | } 86 | 87 | @Override 88 | public PoiExcelRecord newPoiExcelRecord() { 89 | return new PoiExcelRecordSheet(); 90 | } 91 | }; 92 | 93 | public abstract Optional getRecordOption(PoiExcelColumnBean bean); 94 | 95 | public abstract String getRecordOptionName(); 96 | 97 | public abstract Optional getNumberOption(PoiExcelColumnBean bean); 98 | 99 | public abstract String getNumberOptionName(); 100 | 101 | public abstract PoiExcelRecord newPoiExcelRecord(); 102 | 103 | public static RecordType of(String value) { 104 | try { 105 | return RecordType.valueOf(value.toUpperCase()); 106 | } catch (Exception e) { 107 | List list = new ArrayList<>(); 108 | for (RecordType s : RecordType.values()) { 109 | list.add(s.name().toLowerCase()); 110 | } 111 | throw new ConfigException(MessageFormat.format("illegal record_type={0}. expected={1}", value, list), e); 112 | } 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/bean/util/PoiExcelCellAddress.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.bean.util; 2 | 3 | import java.text.MessageFormat; 4 | 5 | import org.apache.poi.ss.usermodel.Cell; 6 | import org.apache.poi.ss.usermodel.Row; 7 | import org.apache.poi.ss.usermodel.Sheet; 8 | import org.apache.poi.ss.usermodel.Workbook; 9 | import org.apache.poi.ss.util.CellReference; 10 | import org.embulk.parser.poi_excel.bean.record.PoiExcelRecord; 11 | 12 | public class PoiExcelCellAddress { 13 | private final CellReference cellReference; 14 | 15 | public PoiExcelCellAddress(CellReference cellReference) { 16 | this.cellReference = cellReference; 17 | } 18 | 19 | public String getSheetName() { 20 | return cellReference.getSheetName(); 21 | } 22 | 23 | public Sheet getSheet(PoiExcelRecord record) { 24 | String sheetName = getSheetName(); 25 | if (sheetName != null) { 26 | Workbook book = record.getSheet().getWorkbook(); 27 | Sheet sheet = book.getSheet(sheetName); 28 | if (sheet == null) { 29 | throw new RuntimeException(MessageFormat.format("not found sheet. sheetName={0}", sheetName)); 30 | } 31 | return sheet; 32 | } else { 33 | return record.getSheet(); 34 | } 35 | } 36 | 37 | public int getRowIndex() { 38 | return cellReference.getRow(); 39 | } 40 | 41 | public int getColumnIndex() { 42 | return cellReference.getCol(); 43 | } 44 | 45 | public Cell getCell(PoiExcelRecord record) { 46 | Sheet sheet = getSheet(record); 47 | 48 | Row row = sheet.getRow(getRowIndex()); 49 | if (row == null) { 50 | return null; 51 | } 52 | 53 | return row.getCell(getColumnIndex()); 54 | } 55 | 56 | public String getString() { 57 | return cellReference.formatAsString(); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/bean/util/SearchMergedCell.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.bean.util; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | import java.util.TreeMap; 6 | 7 | import org.apache.poi.ss.util.CellRangeAddress; 8 | import org.embulk.parser.poi_excel.visitor.util.MergedRegionFinder; 9 | import org.embulk.parser.poi_excel.visitor.util.MergedRegionList; 10 | import org.embulk.parser.poi_excel.visitor.util.MergedRegionMap; 11 | import org.embulk.parser.poi_excel.visitor.util.MergedRegionNothing; 12 | 13 | public enum SearchMergedCell { 14 | NONE { 15 | @Override 16 | public MergedRegionFinder createMergedRegionFinder() { 17 | return new MergedRegionNothing(); 18 | } 19 | }, 20 | LINEAR_SEARCH { 21 | @Override 22 | public MergedRegionFinder createMergedRegionFinder() { 23 | return new MergedRegionList(); 24 | } 25 | }, 26 | TREE_SEARCH { 27 | @Override 28 | public MergedRegionFinder createMergedRegionFinder() { 29 | return new MergedRegionMap() { 30 | 31 | @Override 32 | protected Map> newRowMap() { 33 | return new TreeMap<>(); 34 | } 35 | 36 | @Override 37 | protected Map newColumnMap() { 38 | return new TreeMap<>(); 39 | } 40 | }; 41 | } 42 | }, 43 | HASH_SEARCH { 44 | @Override 45 | public MergedRegionFinder createMergedRegionFinder() { 46 | return new MergedRegionMap() { 47 | 48 | @Override 49 | protected Map> newRowMap() { 50 | return new HashMap<>(); 51 | } 52 | 53 | @Override 54 | protected Map newColumnMap() { 55 | return new HashMap<>(); 56 | } 57 | }; 58 | } 59 | }; 60 | 61 | private MergedRegionFinder mergedRegionFinder; 62 | 63 | public MergedRegionFinder getMergedRegionFinder() { 64 | if (mergedRegionFinder == null) { 65 | this.mergedRegionFinder = createMergedRegionFinder(); 66 | } 67 | return mergedRegionFinder; 68 | } 69 | 70 | protected abstract MergedRegionFinder createMergedRegionFinder(); 71 | } -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/AbstractPoiExcelCellAttributeVisitor.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor; 2 | 3 | import java.text.MessageFormat; 4 | import java.util.Collection; 5 | import java.util.LinkedHashMap; 6 | import java.util.List; 7 | import java.util.Map; 8 | import java.util.TreeMap; 9 | import java.util.TreeSet; 10 | 11 | import org.apache.poi.ss.usermodel.Cell; 12 | import org.apache.poi.ss.usermodel.Color; 13 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean; 14 | import org.embulk.parser.poi_excel.visitor.embulk.CellVisitor; 15 | import org.embulk.spi.Column; 16 | import org.embulk.spi.PageBuilder; 17 | import org.embulk.spi.type.StringType; 18 | 19 | import com.fasterxml.jackson.core.JsonProcessingException; 20 | import com.fasterxml.jackson.databind.ObjectMapper; 21 | 22 | public abstract class AbstractPoiExcelCellAttributeVisitor { 23 | 24 | protected final PoiExcelVisitorValue visitorValue; 25 | protected final PageBuilder pageBuilder; 26 | 27 | public AbstractPoiExcelCellAttributeVisitor(PoiExcelVisitorValue visitorValue) { 28 | this.visitorValue = visitorValue; 29 | this.pageBuilder = visitorValue.getPageBuilder(); 30 | } 31 | 32 | public void visit(PoiExcelColumnBean bean, Cell cell, CellVisitor visitor) { 33 | A source = getAttributeSource(bean, cell); 34 | if (source == null) { 35 | Column column = bean.getColumn(); 36 | pageBuilder.setNull(column); 37 | return; 38 | } 39 | 40 | String suffix = bean.getValueTypeSuffix(); 41 | if (suffix != null) { 42 | visitKey(bean, suffix, cell, source, visitor); 43 | } else { 44 | visitJson(bean, cell, source, visitor); 45 | } 46 | } 47 | 48 | protected abstract A getAttributeSource(PoiExcelColumnBean bean, Cell cell); 49 | 50 | private void visitKey(PoiExcelColumnBean bean, String key, Cell cell, A source, CellVisitor visitor) { 51 | Column column = bean.getColumn(); 52 | Object value = getAttributeValue(column, cell, source, key); 53 | if (value == null) { 54 | pageBuilder.setNull(column); 55 | } else if (value instanceof String) { 56 | visitor.visitCellValueString(column, source, (String) value); 57 | } else if (value instanceof Long) { 58 | visitor.visitValueLong(column, source, (Long) value); 59 | } else if (value instanceof Boolean) { 60 | visitor.visitCellValueBoolean(column, source, (Boolean) value); 61 | } else if (value instanceof Double) { 62 | visitor.visitCellValueNumeric(column, source, (Double) value); 63 | } else if (value instanceof Map) { 64 | visitor.visitCellValueString(column, source, convertJsonString(value)); 65 | } else { 66 | throw new IllegalStateException(MessageFormat.format("unsupported conversion. type={0}, value={1}", value 67 | .getClass().getName(), value)); 68 | } 69 | } 70 | 71 | private void visitJson(PoiExcelColumnBean bean, Cell cell, A source, CellVisitor visitor) { 72 | Column column = bean.getColumn(); 73 | 74 | Map result; 75 | 76 | List list = bean.getAttributeName(); 77 | if (!list.isEmpty()) { 78 | result = getSpecifiedValues(column, cell, source, list); 79 | } else { 80 | result = getAllValues(column, cell, source); 81 | } 82 | 83 | String json = convertJsonString(result); 84 | visitor.visitCellValueString(column, cell, json); 85 | } 86 | 87 | protected final Map getSpecifiedValues(Column column, Cell cell, A source, List keyList) { 88 | Map result = new LinkedHashMap<>(); 89 | 90 | for (String key : keyList) { 91 | Object value = getAttributeValue(column, cell, source, key); 92 | result.put(key, value); 93 | } 94 | 95 | return result; 96 | } 97 | 98 | protected final Map getAllValues(Column column, Cell cell, A source) { 99 | Map result = new TreeMap<>(); 100 | 101 | Collection keys = getAttributeSupplierMap().keySet(); 102 | for (String key : keys) { 103 | if (acceptKey(key)) { 104 | Object value = getAttributeValue(column, cell, source, key); 105 | result.put(key, value); 106 | } 107 | } 108 | 109 | return result; 110 | } 111 | 112 | protected boolean acceptKey(String key) { 113 | return true; 114 | } 115 | 116 | protected final Object getAttributeValue(Column column, Cell cell, A source, String key) { 117 | Map> map = getAttributeSupplierMap(); 118 | AttributeSupplier supplier = map.get(key.toLowerCase()); 119 | if (supplier == null) { 120 | throw new UnsupportedOperationException(MessageFormat.format( 121 | "unsupported attribute name={0}, choose in {1}", key, new TreeSet<>(map.keySet()))); 122 | } 123 | Object value = supplier.get(column, cell, source); 124 | 125 | if (value instanceof Color) { 126 | int rgb = PoiExcelColorVisitor.getRGB((Color) value); 127 | if (rgb < 0) { 128 | return null; 129 | } 130 | if (column.getType() instanceof StringType) { 131 | value = String.format("%06x", rgb); 132 | } else { 133 | value = (long) rgb; 134 | } 135 | } 136 | return value; 137 | } 138 | 139 | // @FunctionalInterface 140 | protected static interface AttributeSupplier { 141 | public Object get(Column column, Cell cell, A source); 142 | } 143 | 144 | protected abstract Map> getAttributeSupplierMap(); 145 | 146 | protected final String convertJsonString(Object result) { 147 | try { 148 | ObjectMapper mapper = new ObjectMapper(); 149 | return mapper.writeValueAsString(result); 150 | } catch (JsonProcessingException e) { 151 | throw new RuntimeException(e); 152 | } 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellCommentVisitor.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor; 2 | 3 | import java.util.Collections; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | 7 | import org.apache.poi.ss.usermodel.Cell; 8 | import org.apache.poi.ss.usermodel.ClientAnchor; 9 | import org.apache.poi.ss.usermodel.Comment; 10 | import org.apache.poi.ss.usermodel.RichTextString; 11 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean; 12 | import org.embulk.spi.Column; 13 | 14 | public class PoiExcelCellCommentVisitor extends AbstractPoiExcelCellAttributeVisitor { 15 | 16 | public PoiExcelCellCommentVisitor(PoiExcelVisitorValue visitorValue) { 17 | super(visitorValue); 18 | } 19 | 20 | @Override 21 | protected Comment getAttributeSource(PoiExcelColumnBean bean, Cell cell) { 22 | return cell.getCellComment(); 23 | } 24 | 25 | protected boolean acceptKey(String key) { 26 | if (key.equals("client_anchor")) { 27 | return false; 28 | } 29 | return true; 30 | } 31 | 32 | @Override 33 | protected Map> getAttributeSupplierMap() { 34 | return SUPPLIER_MAP; 35 | } 36 | 37 | private final Map> SUPPLIER_MAP; 38 | { 39 | Map> map = new HashMap<>(32); 40 | map.put("author", new AttributeSupplier() { 41 | @Override 42 | public Object get(Column column, Cell cell, Comment comment) { 43 | return comment.getAuthor(); 44 | } 45 | }); 46 | map.put("column", new AttributeSupplier() { 47 | @Override 48 | public Object get(Column column, Cell cell, Comment comment) { 49 | return (long) comment.getColumn(); 50 | } 51 | }); 52 | map.put("row", new AttributeSupplier() { 53 | @Override 54 | public Object get(Column column, Cell cell, Comment comment) { 55 | return (long) comment.getRow(); 56 | } 57 | }); 58 | map.put("is_visible", new AttributeSupplier() { 59 | @Override 60 | public Object get(Column column, Cell cell, Comment comment) { 61 | return comment.isVisible(); 62 | } 63 | }); 64 | map.put("string", new AttributeSupplier() { 65 | @Override 66 | public Object get(Column column, Cell cell, Comment comment) { 67 | RichTextString rich = comment.getString(); 68 | return rich.getString(); 69 | } 70 | }); 71 | map.put("client_anchor", new AttributeSupplier() { 72 | @Override 73 | public Object get(Column column, Cell cell, Comment comment) { 74 | return getClientAnchorValue(column, cell, comment, null); 75 | } 76 | }); 77 | for (final String key : PoiExcelClientAnchorVisitor.getKeys()) { 78 | map.put("client_anchor." + key, new AttributeSupplier() { 79 | @Override 80 | public Object get(Column column, Cell cell, Comment comment) { 81 | return getClientAnchorValue(column, cell, comment, key); 82 | } 83 | }); 84 | } 85 | SUPPLIER_MAP = Collections.unmodifiableMap(map); 86 | } 87 | 88 | final Object getClientAnchorValue(Column column, Cell cell, Comment comment, String key) { 89 | ClientAnchor anchor = comment.getClientAnchor(); 90 | PoiExcelVisitorFactory factory = visitorValue.getVisitorFactory(); 91 | PoiExcelClientAnchorVisitor delegator = factory.getPoiExcelClientAnchorVisitor(); 92 | return delegator.getClientAnchorValue(column, cell, anchor, key); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellFontVisitor.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor; 2 | 3 | import java.util.Collections; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | 7 | import org.apache.poi.ss.usermodel.Cell; 8 | import org.apache.poi.ss.usermodel.CellStyle; 9 | import org.apache.poi.ss.usermodel.Font; 10 | import org.apache.poi.ss.usermodel.Workbook; 11 | import org.apache.poi.xssf.usermodel.XSSFFont; 12 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean; 13 | import org.embulk.spi.Column; 14 | 15 | public class PoiExcelCellFontVisitor extends AbstractPoiExcelCellAttributeVisitor { 16 | 17 | public PoiExcelCellFontVisitor(PoiExcelVisitorValue visitorValue) { 18 | super(visitorValue); 19 | } 20 | 21 | @Override 22 | protected Font getAttributeSource(PoiExcelColumnBean bean, Cell cell) { 23 | CellStyle style = cell.getCellStyle(); 24 | short index = style.getFontIndex(); 25 | Workbook book = visitorValue.getSheet().getWorkbook(); 26 | return book.getFontAt(index); 27 | } 28 | 29 | @Override 30 | protected Map> getAttributeSupplierMap() { 31 | return SUPPLIER_MAP; 32 | } 33 | 34 | private static final Map> SUPPLIER_MAP; 35 | static { 36 | Map> map = new HashMap<>(32); 37 | map.put("font_name", new AttributeSupplier() { 38 | @Override 39 | public Object get(Column column, Cell cell, Font font) { 40 | return font.getFontName(); 41 | } 42 | }); 43 | map.put("font_height", new AttributeSupplier() { 44 | @Override 45 | public Object get(Column column, Cell cell, Font font) { 46 | return (long) font.getFontHeight(); 47 | } 48 | }); 49 | map.put("font_height_in_points", new AttributeSupplier() { 50 | @Override 51 | public Object get(Column column, Cell cell, Font font) { 52 | return (long) font.getFontHeightInPoints(); 53 | } 54 | }); 55 | map.put("italic", new AttributeSupplier() { 56 | @Override 57 | public Object get(Column column, Cell cell, Font font) { 58 | return font.getItalic(); 59 | } 60 | }); 61 | map.put("strikeout", new AttributeSupplier() { 62 | @Override 63 | public Object get(Column column, Cell cell, Font font) { 64 | return font.getStrikeout(); 65 | } 66 | }); 67 | map.put("color", new AttributeSupplier() { 68 | @Override 69 | public Object get(Column column, Cell cell, Font font) { 70 | if (font instanceof XSSFFont) { 71 | return ((XSSFFont) font).getXSSFColor(); 72 | } else { 73 | Workbook book = cell.getSheet().getWorkbook(); 74 | short color = font.getColor(); 75 | return PoiExcelColorVisitor.getHssfColor(book, color); 76 | } 77 | } 78 | }); 79 | map.put("type_offset", new AttributeSupplier() { 80 | @Override 81 | public Object get(Column column, Cell cell, Font font) { 82 | return (long) font.getTypeOffset(); 83 | } 84 | }); 85 | map.put("underline", new AttributeSupplier() { 86 | @Override 87 | public Object get(Column column, Cell cell, Font font) { 88 | return (long) font.getUnderline(); 89 | } 90 | }); 91 | map.put("char_set", new AttributeSupplier() { 92 | @Override 93 | public Object get(Column column, Cell cell, Font font) { 94 | return (long) font.getCharSet(); 95 | } 96 | }); 97 | map.put("index", new AttributeSupplier() { 98 | @Override 99 | public Object get(Column column, Cell cell, Font font) { 100 | return (long) font.getIndex(); 101 | } 102 | }); 103 | map.put("bold", new AttributeSupplier() { 104 | @Override 105 | public Object get(Column column, Cell cell, Font font) { 106 | return font.getBold(); 107 | } 108 | }); 109 | SUPPLIER_MAP = Collections.unmodifiableMap(map); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellStyleVisitor.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor; 2 | 3 | import java.util.Collections; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | 7 | import org.apache.poi.ss.usermodel.Cell; 8 | import org.apache.poi.ss.usermodel.CellStyle; 9 | import org.apache.poi.ss.usermodel.Workbook; 10 | import org.apache.poi.xssf.usermodel.XSSFCellStyle; 11 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean; 12 | import org.embulk.spi.Column; 13 | import org.embulk.spi.type.StringType; 14 | 15 | public class PoiExcelCellStyleVisitor extends AbstractPoiExcelCellAttributeVisitor { 16 | 17 | public PoiExcelCellStyleVisitor(PoiExcelVisitorValue visitorValue) { 18 | super(visitorValue); 19 | } 20 | 21 | @Override 22 | protected CellStyle getAttributeSource(PoiExcelColumnBean bean, Cell cell) { 23 | return cell.getCellStyle(); 24 | } 25 | 26 | protected boolean acceptKey(String key) { 27 | if (key.equals("border")) { 28 | return false; 29 | } 30 | return true; 31 | } 32 | 33 | @Override 34 | protected Map> getAttributeSupplierMap() { 35 | return SUPPLIER_MAP; 36 | } 37 | 38 | private static final Map> SUPPLIER_MAP; 39 | static { 40 | Map> map = new HashMap<>(32); 41 | map.put("alignment", new AttributeSupplier() { 42 | @Override 43 | public Object get(Column column, Cell cell, CellStyle style) { 44 | return (long) style.getAlignmentEnum().getCode(); 45 | } 46 | }); 47 | map.put("border", new AttributeSupplier() { 48 | @Override 49 | public Object get(Column column, Cell cell, CellStyle style) { 50 | int n0 = style.getBorderTopEnum().getCode(); 51 | int n1 = style.getBorderBottomEnum().getCode(); 52 | int n2 = style.getBorderLeftEnum().getCode(); 53 | int n3 = style.getBorderRightEnum().getCode(); 54 | if (column.getType() instanceof StringType) { 55 | return String.format("%02x%02x%02x%02x", n0, n1, n2, n3); 56 | } 57 | return (long) ((n0 << 24) | (n1 << 16) | (n2 << 8) | n3); 58 | } 59 | }); 60 | map.put("border_bottom", new AttributeSupplier() { 61 | @Override 62 | public Object get(Column column, Cell cell, CellStyle style) { 63 | return (long) style.getBorderBottomEnum().getCode(); 64 | } 65 | }); 66 | map.put("border_left", new AttributeSupplier() { 67 | @Override 68 | public Object get(Column column, Cell cell, CellStyle style) { 69 | return (long) style.getBorderLeftEnum().getCode(); 70 | } 71 | }); 72 | map.put("border_right", new AttributeSupplier() { 73 | @Override 74 | public Object get(Column column, Cell cell, CellStyle style) { 75 | return (long) style.getBorderRightEnum().getCode(); 76 | } 77 | }); 78 | map.put("border_top", new AttributeSupplier() { 79 | @Override 80 | public Object get(Column column, Cell cell, CellStyle style) { 81 | return (long) style.getBorderTopEnum().getCode(); 82 | } 83 | }); 84 | map.put("border_bottom_color", new AttributeSupplier() { 85 | @Override 86 | public Object get(Column column, Cell cell, CellStyle style) { 87 | if (style instanceof XSSFCellStyle) { 88 | return ((XSSFCellStyle) style).getBottomBorderXSSFColor(); 89 | } else { 90 | Workbook book = cell.getSheet().getWorkbook(); 91 | short color = style.getBottomBorderColor(); 92 | return PoiExcelColorVisitor.getHssfColor(book, color); 93 | } 94 | } 95 | }); 96 | map.put("border_left_color", new AttributeSupplier() { 97 | @Override 98 | public Object get(Column column, Cell cell, CellStyle style) { 99 | if (style instanceof XSSFCellStyle) { 100 | return ((XSSFCellStyle) style).getLeftBorderXSSFColor(); 101 | } else { 102 | Workbook book = cell.getSheet().getWorkbook(); 103 | short color = style.getLeftBorderColor(); 104 | return PoiExcelColorVisitor.getHssfColor(book, color); 105 | } 106 | } 107 | }); 108 | map.put("border_right_color", new AttributeSupplier() { 109 | @Override 110 | public Object get(Column column, Cell cell, CellStyle style) { 111 | if (style instanceof XSSFCellStyle) { 112 | return ((XSSFCellStyle) style).getRightBorderXSSFColor(); 113 | } else { 114 | Workbook book = cell.getSheet().getWorkbook(); 115 | short color = style.getRightBorderColor(); 116 | return PoiExcelColorVisitor.getHssfColor(book, color); 117 | } 118 | } 119 | }); 120 | map.put("border_top_color", new AttributeSupplier() { 121 | @Override 122 | public Object get(Column column, Cell cell, CellStyle style) { 123 | if (style instanceof XSSFCellStyle) { 124 | return ((XSSFCellStyle) style).getTopBorderXSSFColor(); 125 | } else { 126 | Workbook book = cell.getSheet().getWorkbook(); 127 | short color = style.getTopBorderColor(); 128 | return PoiExcelColorVisitor.getHssfColor(book, color); 129 | } 130 | } 131 | }); 132 | map.put("data_format", new AttributeSupplier() { 133 | @Override 134 | public Object get(Column column, Cell cell, CellStyle style) { 135 | if (column.getType() instanceof StringType) { 136 | return style.getDataFormatString(); 137 | } else { 138 | return (long) style.getDataFormat(); 139 | } 140 | } 141 | }); 142 | map.put("fill_background_color", new AttributeSupplier() { 143 | @Override 144 | public Object get(Column column, Cell cell, CellStyle style) { 145 | return style.getFillBackgroundColorColor(); 146 | } 147 | }); 148 | map.put("fill_foreground_color", new AttributeSupplier() { 149 | @Override 150 | public Object get(Column column, Cell cell, CellStyle style) { 151 | return style.getFillForegroundColorColor(); 152 | } 153 | }); 154 | map.put("fill_pattern", new AttributeSupplier() { 155 | @Override 156 | public Object get(Column column, Cell cell, CellStyle style) { 157 | return (long) style.getFillPatternEnum().getCode(); 158 | } 159 | }); 160 | map.put("font_index", new AttributeSupplier() { 161 | @Override 162 | public Object get(Column column, Cell cell, CellStyle style) { 163 | return (long) style.getFontIndex(); 164 | } 165 | }); 166 | map.put("hidden", new AttributeSupplier() { 167 | @Override 168 | public Object get(Column column, Cell cell, CellStyle style) { 169 | return style.getHidden(); 170 | } 171 | }); 172 | map.put("indention", new AttributeSupplier() { 173 | @Override 174 | public Object get(Column column, Cell cell, CellStyle style) { 175 | return (long) style.getIndention(); 176 | } 177 | }); 178 | map.put("locked", new AttributeSupplier() { 179 | @Override 180 | public Object get(Column column, Cell cell, CellStyle style) { 181 | return style.getLocked(); 182 | } 183 | }); 184 | map.put("rotation", new AttributeSupplier() { 185 | @Override 186 | public Object get(Column column, Cell cell, CellStyle style) { 187 | return (long) style.getRotation(); 188 | } 189 | }); 190 | map.put("vertical_alignment", new AttributeSupplier() { 191 | @Override 192 | public Object get(Column column, Cell cell, CellStyle style) { 193 | return (long) style.getVerticalAlignmentEnum().getCode(); 194 | } 195 | }); 196 | map.put("wrap_text", new AttributeSupplier() { 197 | @Override 198 | public Object get(Column column, Cell cell, CellStyle style) { 199 | return style.getWrapText(); 200 | } 201 | }); 202 | SUPPLIER_MAP = Collections.unmodifiableMap(map); 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellTypeVisitor.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor; 2 | 3 | import org.apache.poi.ss.usermodel.Cell; 4 | import org.apache.poi.ss.usermodel.CellType; 5 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean; 6 | import org.embulk.parser.poi_excel.visitor.embulk.CellVisitor; 7 | import org.embulk.spi.Column; 8 | import org.embulk.spi.PageBuilder; 9 | import org.embulk.spi.type.StringType; 10 | 11 | public class PoiExcelCellTypeVisitor { 12 | protected final PoiExcelVisitorValue visitorValue; 13 | protected final PageBuilder pageBuilder; 14 | 15 | public PoiExcelCellTypeVisitor(PoiExcelVisitorValue visitorValue) { 16 | this.visitorValue = visitorValue; 17 | this.pageBuilder = visitorValue.getPageBuilder(); 18 | } 19 | 20 | public void visit(PoiExcelColumnBean bean, Cell cell, CellType cellType, CellVisitor visitor) { 21 | assert cell != null; 22 | 23 | Column column = bean.getColumn(); 24 | if (column.getType() instanceof StringType) { 25 | String type = cellType.name(); 26 | visitor.visitCellValueString(column, cell, type); 27 | return; 28 | } 29 | 30 | int code = getCode(cellType); 31 | visitor.visitCellValueNumeric(column, cell, code); 32 | } 33 | 34 | private static int getCode(CellType cellType) { 35 | switch (cellType) { 36 | case NUMERIC: 37 | return 0; 38 | case STRING: 39 | return 1; 40 | case FORMULA: 41 | return 2; 42 | case BLANK: 43 | return 3; 44 | case BOOLEAN: 45 | return 4; 46 | case ERROR: 47 | return 5; 48 | default: 49 | return -1; 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelCellValueVisitor.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor; 2 | 3 | import java.text.MessageFormat; 4 | import java.util.List; 5 | 6 | import org.apache.poi.ss.usermodel.Cell; 7 | import org.apache.poi.ss.usermodel.CellType; 8 | import org.apache.poi.ss.usermodel.CellValue; 9 | import org.apache.poi.ss.usermodel.CreationHelper; 10 | import org.apache.poi.ss.usermodel.FormulaError; 11 | import org.apache.poi.ss.usermodel.FormulaEvaluator; 12 | import org.apache.poi.ss.usermodel.Row; 13 | import org.apache.poi.ss.usermodel.Sheet; 14 | import org.apache.poi.ss.usermodel.Workbook; 15 | import org.apache.poi.ss.util.CellRangeAddress; 16 | import org.apache.poi.ss.util.CellReference; 17 | import org.embulk.parser.poi_excel.PoiExcelColumnValueType; 18 | import org.embulk.parser.poi_excel.PoiExcelParserPlugin.FormulaReplaceTask; 19 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean; 20 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean.ErrorStrategy; 21 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean.FormulaHandling; 22 | import org.embulk.parser.poi_excel.visitor.embulk.CellVisitor; 23 | import org.embulk.parser.poi_excel.visitor.util.MergedRegionFinder; 24 | import org.embulk.spi.Column; 25 | import org.embulk.spi.Exec; 26 | import org.embulk.spi.PageBuilder; 27 | import org.slf4j.Logger; 28 | 29 | public class PoiExcelCellValueVisitor { 30 | private final Logger log = Exec.getLogger(getClass()); 31 | 32 | protected final PoiExcelVisitorValue visitorValue; 33 | protected final PageBuilder pageBuilder; 34 | 35 | public PoiExcelCellValueVisitor(PoiExcelVisitorValue visitorValue) { 36 | this.visitorValue = visitorValue; 37 | this.pageBuilder = visitorValue.getPageBuilder(); 38 | } 39 | 40 | public void visitCellValue(PoiExcelColumnBean bean, Cell cell, CellVisitor visitor) { 41 | assert cell != null; 42 | 43 | Column column = bean.getColumn(); 44 | 45 | CellType cellType = cell.getCellTypeEnum(); 46 | switch (cellType) { 47 | case NUMERIC: 48 | visitor.visitCellValueNumeric(column, cell, cell.getNumericCellValue()); 49 | return; 50 | case STRING: 51 | visitor.visitCellValueString(column, cell, cell.getStringCellValue()); 52 | return; 53 | case FORMULA: 54 | PoiExcelColumnValueType valueType = bean.getValueType(); 55 | if (valueType == PoiExcelColumnValueType.CELL_FORMULA) { 56 | visitor.visitCellFormula(column, cell); 57 | } else { 58 | visitCellValueFormula(bean, cell, visitor); 59 | } 60 | return; 61 | case BLANK: 62 | visitCellValueBlank(bean, cell, visitor); 63 | return; 64 | case BOOLEAN: 65 | visitor.visitCellValueBoolean(column, cell, cell.getBooleanCellValue()); 66 | return; 67 | case ERROR: 68 | visitCellValueError(bean, cell, cell.getErrorCellValue(), visitor); 69 | return; 70 | default: 71 | throw new IllegalStateException(MessageFormat.format("unsupported POI cellType={0}", cellType)); 72 | } 73 | } 74 | 75 | protected void visitCellValueBlank(PoiExcelColumnBean bean, Cell cell, CellVisitor visitor) { 76 | assert cell.getCellTypeEnum() == CellType.BLANK; 77 | 78 | Column column = bean.getColumn(); 79 | 80 | CellRangeAddress region = findRegion(bean, cell); 81 | if (region != null) { 82 | Row firstRow = cell.getSheet().getRow(region.getFirstRow()); 83 | if (firstRow == null) { 84 | visitCellNull(column); 85 | return; 86 | } 87 | Cell firstCell = firstRow.getCell(region.getFirstColumn()); 88 | if (firstCell == null) { 89 | visitCellNull(column); 90 | return; 91 | } 92 | 93 | if (firstCell.getRowIndex() != cell.getRowIndex() || firstCell.getColumnIndex() != cell.getColumnIndex()) { 94 | visitCellValue(bean, firstCell, visitor); 95 | return; 96 | } 97 | } 98 | 99 | visitor.visitCellValueBlank(column, cell); 100 | } 101 | 102 | protected CellRangeAddress findRegion(PoiExcelColumnBean bean, Cell cell) { 103 | Sheet sheet = cell.getSheet(); 104 | int r = cell.getRowIndex(); 105 | int c = cell.getColumnIndex(); 106 | 107 | MergedRegionFinder finder = bean.getMergedRegionFinder(); 108 | return finder.get(sheet, r, c); 109 | } 110 | 111 | protected void visitCellValueFormula(PoiExcelColumnBean bean, Cell cell, CellVisitor visitor) { 112 | assert cell.getCellTypeEnum() == CellType.FORMULA; 113 | 114 | FormulaHandling handling = bean.getFormulaHandling(); 115 | switch (handling) { 116 | case CASHED_VALUE: 117 | visitCellValueFormulaCashedValue(bean, cell, visitor); 118 | break; 119 | default: 120 | visitCellValueFormulaEvaluate(bean, cell, visitor); 121 | break; 122 | } 123 | } 124 | 125 | protected void visitCellValueFormulaCashedValue(PoiExcelColumnBean bean, Cell cell, CellVisitor visitor) { 126 | Column column = bean.getColumn(); 127 | 128 | CellType cellType = cell.getCachedFormulaResultTypeEnum(); 129 | switch (cellType) { 130 | case NUMERIC: 131 | visitor.visitCellValueNumeric(column, cell, cell.getNumericCellValue()); 132 | return; 133 | case STRING: 134 | visitor.visitCellValueString(column, cell, cell.getStringCellValue()); 135 | return; 136 | case BLANK: 137 | visitCellValueBlank(bean, cell, visitor); 138 | return; 139 | case BOOLEAN: 140 | visitor.visitCellValueBoolean(column, cell, cell.getBooleanCellValue()); 141 | return; 142 | case ERROR: 143 | visitCellValueError(bean, cell, cell.getErrorCellValue(), visitor); 144 | return; 145 | case FORMULA: 146 | default: 147 | throw new IllegalStateException(MessageFormat.format("unsupported POI cellType={0}", cellType)); 148 | } 149 | } 150 | 151 | protected void visitCellValueFormulaEvaluate(PoiExcelColumnBean bean, Cell cell, CellVisitor visitor) { 152 | Column column = bean.getColumn(); 153 | 154 | List list = bean.getFormulaReplace(); 155 | if (!list.isEmpty()) { 156 | String formula = cell.getCellFormula(); 157 | String old = formula; 158 | 159 | for (FormulaReplaceTask replace : list) { 160 | String regex = replace.getRegex(); 161 | String replacement = replace.getTo(); 162 | 163 | if (replacement.contains("${row}")) { 164 | replacement = replacement.replace("${row}", Integer.toString(cell.getRowIndex() + 1)); 165 | } 166 | if (replacement.contains("${column}")) { 167 | replacement = replacement.replace("${column}", 168 | CellReference.convertNumToColString(cell.getColumnIndex() + 1)); 169 | } 170 | 171 | formula = formula.replaceAll(regex, replacement); 172 | } 173 | 174 | if (!formula.equals(old)) { 175 | log.debug("formula replaced. old=\"{}\", new=\"{}\"", old, formula); 176 | try { 177 | cell.setCellFormula(formula); 178 | } catch (Exception e) { 179 | throw new RuntimeException(MessageFormat.format("setCellFormula error. formula={0}", formula), e); 180 | } 181 | } 182 | } 183 | 184 | CellValue cellValue; 185 | try { 186 | Workbook book = cell.getSheet().getWorkbook(); 187 | CreationHelper helper = book.getCreationHelper(); 188 | FormulaEvaluator evaluator = helper.createFormulaEvaluator(); 189 | cellValue = evaluator.evaluate(cell); 190 | } catch (Exception e) { 191 | ErrorStrategy strategy = bean.getEvaluateErrorStrategy(); 192 | switch (strategy.getStrategy()) { 193 | default: 194 | break; 195 | case CONSTANT: 196 | String value = strategy.getValue(); 197 | if (value == null) { 198 | pageBuilder.setNull(column); 199 | } else { 200 | visitor.visitCellValueString(column, cell, value); 201 | } 202 | return; 203 | } 204 | 205 | throw new RuntimeException(MessageFormat.format("evaluate error. formula={0}", cell.getCellFormula()), e); 206 | } 207 | 208 | CellType cellType = cellValue.getCellTypeEnum(); 209 | switch (cellType) { 210 | case NUMERIC: 211 | visitor.visitCellValueNumeric(column, cellValue, cellValue.getNumberValue()); 212 | return; 213 | case STRING: 214 | visitor.visitCellValueString(column, cellValue, cellValue.getStringValue()); 215 | return; 216 | case BLANK: 217 | visitor.visitCellValueBlank(column, cellValue); 218 | return; 219 | case BOOLEAN: 220 | visitor.visitCellValueBoolean(column, cellValue, cellValue.getBooleanValue()); 221 | return; 222 | case ERROR: 223 | visitCellValueError(bean, cellValue, cellValue.getErrorValue(), visitor); 224 | return; 225 | case FORMULA: 226 | default: 227 | throw new IllegalStateException(MessageFormat.format("unsupported POI cellType={0}", cellType)); 228 | } 229 | } 230 | 231 | protected void visitCellValueError(PoiExcelColumnBean bean, Object cell, int errorCode, CellVisitor visitor) { 232 | Column column = bean.getColumn(); 233 | 234 | ErrorStrategy strategy = bean.getCellErrorStrategy(); 235 | switch (strategy.getStrategy()) { 236 | default: 237 | pageBuilder.setNull(column); 238 | return; 239 | case CONSTANT: 240 | String value = strategy.getValue(); 241 | if (value == null) { 242 | pageBuilder.setNull(column); 243 | } else { 244 | visitor.visitCellValueString(column, cell, value); 245 | } 246 | return; 247 | case ERROR_CODE: 248 | break; 249 | case EXCEPTION: 250 | FormulaError error = FormulaError.forInt((byte) errorCode); 251 | throw new RuntimeException(MessageFormat.format("encount cell error. error_code={0}({1})", errorCode, 252 | error.getString())); 253 | } 254 | 255 | visitor.visitCellValueError(column, cell, errorCode); 256 | } 257 | 258 | protected void visitCellNull(Column column) { 259 | pageBuilder.setNull(column); 260 | } 261 | } 262 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelClientAnchorVisitor.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor; 2 | 3 | import java.util.Collection; 4 | import java.util.Collections; 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | 8 | import org.apache.poi.ss.usermodel.Cell; 9 | import org.apache.poi.ss.usermodel.ClientAnchor; 10 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean; 11 | import org.embulk.spi.Column; 12 | 13 | public class PoiExcelClientAnchorVisitor extends AbstractPoiExcelCellAttributeVisitor { 14 | 15 | public PoiExcelClientAnchorVisitor(PoiExcelVisitorValue visitorValue) { 16 | super(visitorValue); 17 | } 18 | 19 | public Object getClientAnchorValue(Column column, Cell cell, ClientAnchor anchor, String key) { 20 | if (key == null || key.isEmpty()) { 21 | return getAllValues(column, cell, anchor); 22 | } 23 | 24 | return getAttributeValue(column, cell, anchor, key); 25 | } 26 | 27 | @Override 28 | protected ClientAnchor getAttributeSource(PoiExcelColumnBean bean, Cell cell) { 29 | throw new UnsupportedOperationException(); 30 | } 31 | 32 | @Override 33 | protected Map> getAttributeSupplierMap() { 34 | return SUPPLIER_MAP; 35 | } 36 | 37 | private static final Map> SUPPLIER_MAP; 38 | static { 39 | Map> map = new HashMap<>(16); 40 | map.put("anchor_type", new AttributeSupplier() { 41 | @Override 42 | public Object get(Column column, Cell cell, ClientAnchor anchor) { 43 | return (long) anchor.getAnchorType().value; 44 | } 45 | }); 46 | map.put("col1", new AttributeSupplier() { 47 | @Override 48 | public Object get(Column column, Cell cell, ClientAnchor anchor) { 49 | return (long) anchor.getCol1(); 50 | } 51 | }); 52 | map.put("col2", new AttributeSupplier() { 53 | @Override 54 | public Object get(Column column, Cell cell, ClientAnchor anchor) { 55 | return (long) anchor.getCol2(); 56 | } 57 | }); 58 | map.put("dx1", new AttributeSupplier() { 59 | @Override 60 | public Object get(Column column, Cell cell, ClientAnchor anchor) { 61 | return (long) anchor.getDx1(); 62 | } 63 | }); 64 | map.put("dx2", new AttributeSupplier() { 65 | @Override 66 | public Object get(Column column, Cell cell, ClientAnchor anchor) { 67 | return (long) anchor.getDx2(); 68 | } 69 | }); 70 | map.put("dy1", new AttributeSupplier() { 71 | @Override 72 | public Object get(Column column, Cell cell, ClientAnchor anchor) { 73 | return (long) anchor.getDy1(); 74 | } 75 | }); 76 | map.put("dy2", new AttributeSupplier() { 77 | @Override 78 | public Object get(Column column, Cell cell, ClientAnchor anchor) { 79 | return (long) anchor.getDy2(); 80 | } 81 | }); 82 | map.put("row1", new AttributeSupplier() { 83 | @Override 84 | public Object get(Column column, Cell cell, ClientAnchor anchor) { 85 | return (long) anchor.getRow1(); 86 | } 87 | }); 88 | map.put("row2", new AttributeSupplier() { 89 | @Override 90 | public Object get(Column column, Cell cell, ClientAnchor anchor) { 91 | return (long) anchor.getRow2(); 92 | } 93 | }); 94 | SUPPLIER_MAP = Collections.unmodifiableMap(map); 95 | } 96 | 97 | public static Collection getKeys() { 98 | return SUPPLIER_MAP.keySet(); 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelColorVisitor.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor; 2 | 3 | import java.text.MessageFormat; 4 | 5 | import org.apache.poi.hssf.usermodel.HSSFPalette; 6 | import org.apache.poi.hssf.usermodel.HSSFWorkbook; 7 | import org.apache.poi.hssf.util.HSSFColor; 8 | import org.apache.poi.ss.usermodel.Color; 9 | import org.apache.poi.ss.usermodel.Workbook; 10 | import org.apache.poi.xssf.usermodel.XSSFColor; 11 | import org.embulk.parser.poi_excel.visitor.embulk.CellVisitor; 12 | import org.embulk.spi.Column; 13 | import org.embulk.spi.PageBuilder; 14 | import org.embulk.spi.type.StringType; 15 | 16 | public class PoiExcelColorVisitor { 17 | 18 | protected final PoiExcelVisitorValue visitorValue; 19 | 20 | public PoiExcelColorVisitor(PoiExcelVisitorValue visitorValue) { 21 | this.visitorValue = visitorValue; 22 | } 23 | 24 | public void visitCellColor(Column column, short colorIndex, CellVisitor visitor) { 25 | Color color = getHssfColor(colorIndex); 26 | visitCellColor(column, color, visitor); 27 | } 28 | 29 | public void visitCellColor(Column column, Color color, CellVisitor visitor) { 30 | int rgb = getRGB(color); 31 | if (rgb < 0) { 32 | PageBuilder pageBuilder = visitorValue.getPageBuilder(); 33 | pageBuilder.setNull(column); 34 | return; 35 | } 36 | 37 | if (column.getType() instanceof StringType) { 38 | String s = String.format("%06x", rgb); 39 | visitor.visitCellValueString(column, color, s); 40 | } else { 41 | visitor.visitValueLong(column, color, rgb); 42 | } 43 | } 44 | 45 | public Color getHssfColor(short colorIndex) { 46 | HSSFWorkbook book = (HSSFWorkbook) visitorValue.getSheet().getWorkbook(); 47 | return getHssfColor(book, colorIndex); 48 | } 49 | 50 | public static Color getHssfColor(Workbook workbook, short colorIndex) { 51 | HSSFWorkbook book = (HSSFWorkbook) workbook; 52 | HSSFPalette palette = book.getCustomPalette(); 53 | HSSFColor color = palette.getColor(colorIndex); 54 | return color; 55 | } 56 | 57 | public static int getRGB(Color color) { 58 | if (color == null) { 59 | return -1; 60 | } 61 | 62 | int[] rgb = new int[3]; 63 | if (color instanceof HSSFColor) { 64 | HSSFColor hssf = (HSSFColor) color; 65 | short[] s = hssf.getTriplet(); 66 | rgb[0] = s[0] & 0xff; 67 | rgb[1] = s[1] & 0xff; 68 | rgb[2] = s[2] & 0xff; 69 | } else if (color instanceof XSSFColor) { 70 | XSSFColor xssf = (XSSFColor) color; 71 | byte[] b = xssf.getRGB(); 72 | if (b == null) { 73 | return -1; 74 | } 75 | rgb[0] = b[0] & 0xff; 76 | rgb[1] = b[1] & 0xff; 77 | rgb[2] = b[2] & 0xff; 78 | } else { 79 | throw new IllegalStateException(MessageFormat.format("unsupported POI color={0}", color)); 80 | } 81 | 82 | return (rgb[0] << 16) | (rgb[1] << 8) | rgb[2]; 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelColumnVisitor.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor; 2 | 3 | import java.text.MessageFormat; 4 | 5 | import org.apache.poi.ss.usermodel.Cell; 6 | import org.apache.poi.ss.usermodel.CellType; 7 | import org.apache.poi.ss.usermodel.Sheet; 8 | import org.embulk.parser.poi_excel.PoiExcelColumnValueType; 9 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean; 10 | import org.embulk.parser.poi_excel.bean.record.PoiExcelRecord; 11 | import org.embulk.parser.poi_excel.bean.util.PoiExcelCellAddress; 12 | import org.embulk.parser.poi_excel.visitor.embulk.CellVisitor; 13 | import org.embulk.spi.Column; 14 | import org.embulk.spi.ColumnVisitor; 15 | import org.embulk.spi.Exec; 16 | import org.embulk.spi.PageBuilder; 17 | import org.slf4j.Logger; 18 | 19 | public class PoiExcelColumnVisitor implements ColumnVisitor { 20 | private final Logger log = Exec.getLogger(getClass()); 21 | 22 | protected final PoiExcelVisitorValue visitorValue; 23 | protected final PageBuilder pageBuilder; 24 | protected final PoiExcelVisitorFactory factory; 25 | 26 | protected PoiExcelRecord record; 27 | 28 | public PoiExcelColumnVisitor(PoiExcelVisitorValue visitorValue) { 29 | this.visitorValue = visitorValue; 30 | this.pageBuilder = visitorValue.getPageBuilder(); 31 | this.factory = visitorValue.getVisitorFactory(); 32 | } 33 | 34 | public void setRecord(PoiExcelRecord record) { 35 | this.record = record; 36 | } 37 | 38 | @Override 39 | public final void booleanColumn(Column column) { 40 | visitCell0(column, factory.getBooleanCellVisitor()); 41 | } 42 | 43 | @Override 44 | public final void longColumn(Column column) { 45 | visitCell0(column, factory.getLongCellVisitor()); 46 | } 47 | 48 | @Override 49 | public final void doubleColumn(Column column) { 50 | visitCell0(column, factory.getDoubleCellVisitor()); 51 | } 52 | 53 | @Override 54 | public final void stringColumn(Column column) { 55 | visitCell0(column, factory.getStringCellVisitor()); 56 | } 57 | 58 | @Override 59 | public final void timestampColumn(Column column) { 60 | visitCell0(column, factory.getTimestampCellVisitor()); 61 | } 62 | 63 | protected final void visitCell0(Column column, CellVisitor visitor) { 64 | if (log.isTraceEnabled()) { 65 | log.trace("{} start", column); 66 | } 67 | try { 68 | visitCell(column, visitor); 69 | } catch (Exception e) { 70 | String sheetName = visitorValue.getSheet().getSheetName(); 71 | String ref = record.getCellReference(visitorValue.getColumnBean(column)).formatAsString(); 72 | throw new RuntimeException(MessageFormat.format("error at {0} cell={1}!{2}. {3}", column, sheetName, ref, 73 | e.getMessage()), e); 74 | } 75 | if (log.isTraceEnabled()) { 76 | log.trace("{} end", column); 77 | } 78 | } 79 | 80 | protected void visitCell(Column column, CellVisitor visitor) { 81 | PoiExcelColumnBean bean = visitorValue.getColumnBean(column); 82 | PoiExcelColumnValueType valueType = bean.getValueType(); 83 | PoiExcelCellAddress cellAddress = bean.getCellAddress(); 84 | 85 | switch (valueType) { 86 | case SHEET_NAME: 87 | if (cellAddress != null) { 88 | Sheet sheet = cellAddress.getSheet(record); 89 | visitor.visitSheetName(column, sheet); 90 | } else { 91 | visitor.visitSheetName(column); 92 | } 93 | return; 94 | case ROW_NUMBER: 95 | int rowIndex; 96 | if (cellAddress != null) { 97 | rowIndex = cellAddress.getRowIndex(); 98 | } else { 99 | rowIndex = record.getRowIndex(bean); 100 | } 101 | visitor.visitRowNumber(column, rowIndex + 1); 102 | return; 103 | case COLUMN_NUMBER: 104 | int columnIndex; 105 | if (cellAddress != null) { 106 | columnIndex = cellAddress.getColumnIndex(); 107 | } else { 108 | columnIndex = record.getColumnIndex(bean); 109 | } 110 | visitor.visitColumnNumber(column, columnIndex + 1); 111 | return; 112 | case CONSTANT: 113 | visitCellConstant(column, bean.getValueTypeSuffix(), visitor); 114 | return; 115 | default: 116 | break; 117 | } 118 | 119 | // assert valueType.useCell(); 120 | Cell cell; 121 | if (cellAddress != null) { 122 | cell = cellAddress.getCell(record); 123 | } else { 124 | cell = record.getCell(bean); 125 | } 126 | if (cell == null) { 127 | visitCellNull(column); 128 | return; 129 | } 130 | switch (valueType) { 131 | case CELL_VALUE: 132 | case CELL_FORMULA: 133 | visitCellValue(bean, cell, visitor); 134 | return; 135 | case CELL_STYLE: 136 | visitCellStyle(bean, cell, visitor); 137 | return; 138 | case CELL_FONT: 139 | visitCellFont(bean, cell, visitor); 140 | return; 141 | case CELL_COMMENT: 142 | visitCellComment(bean, cell, visitor); 143 | return; 144 | case CELL_TYPE: 145 | visitCellType(bean, cell, cell.getCellTypeEnum(), visitor); 146 | return; 147 | case CELL_CACHED_TYPE: 148 | if (cell.getCellTypeEnum() == CellType.FORMULA) { 149 | visitCellType(bean, cell, cell.getCachedFormulaResultTypeEnum(), visitor); 150 | } else { 151 | visitCellType(bean, cell, cell.getCellTypeEnum(), visitor); 152 | } 153 | return; 154 | default: 155 | throw new UnsupportedOperationException(MessageFormat.format("unsupported value_type={0}", valueType)); 156 | } 157 | } 158 | 159 | protected void visitCellConstant(Column column, String value, CellVisitor visitor) { 160 | if (value == null) { 161 | pageBuilder.setNull(column); 162 | return; 163 | } 164 | visitor.visitCellValueString(column, null, value); 165 | } 166 | 167 | protected void visitCellNull(Column column) { 168 | pageBuilder.setNull(column); 169 | } 170 | 171 | private void visitCellValue(PoiExcelColumnBean bean, Cell cell, CellVisitor visitor) { 172 | PoiExcelCellValueVisitor delegator = factory.getPoiExcelCellValueVisitor(); 173 | delegator.visitCellValue(bean, cell, visitor); 174 | } 175 | 176 | private void visitCellStyle(PoiExcelColumnBean bean, Cell cell, CellVisitor visitor) { 177 | PoiExcelCellStyleVisitor delegator = factory.getPoiExcelCellStyleVisitor(); 178 | delegator.visit(bean, cell, visitor); 179 | } 180 | 181 | private void visitCellFont(PoiExcelColumnBean bean, Cell cell, CellVisitor visitor) { 182 | PoiExcelCellFontVisitor delegator = factory.getPoiExcelCellFontVisitor(); 183 | delegator.visit(bean, cell, visitor); 184 | } 185 | 186 | private void visitCellComment(PoiExcelColumnBean bean, Cell cell, CellVisitor visitor) { 187 | PoiExcelCellCommentVisitor delegator = factory.getPoiExcelCellCommentVisitor(); 188 | delegator.visit(bean, cell, visitor); 189 | } 190 | 191 | private void visitCellType(PoiExcelColumnBean bean, Cell cell, CellType cellType, CellVisitor visitor) { 192 | PoiExcelCellTypeVisitor delegator = factory.getPoiExcelCellTypeVisitor(); 193 | delegator.visit(bean, cell, cellType, visitor); 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelVisitorFactory.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor; 2 | 3 | import org.embulk.parser.poi_excel.visitor.embulk.BooleanCellVisitor; 4 | import org.embulk.parser.poi_excel.visitor.embulk.DoubleCellVisitor; 5 | import org.embulk.parser.poi_excel.visitor.embulk.LongCellVisitor; 6 | import org.embulk.parser.poi_excel.visitor.embulk.StringCellVisitor; 7 | import org.embulk.parser.poi_excel.visitor.embulk.TimestampCellVisitor; 8 | 9 | public class PoiExcelVisitorFactory { 10 | 11 | protected final PoiExcelVisitorValue visitorValue; 12 | 13 | public PoiExcelVisitorFactory(PoiExcelVisitorValue visitorValue) { 14 | this.visitorValue = visitorValue; 15 | visitorValue.setVisitorFactory(this); 16 | } 17 | 18 | public final PoiExcelVisitorValue getVisitorValue() { 19 | return visitorValue; 20 | } 21 | 22 | // visitor root (Embulk ColumnVisitor) 23 | private PoiExcelColumnVisitor poiExcelColumnVisitor; 24 | 25 | public final PoiExcelColumnVisitor getPoiExcelColumnVisitor() { 26 | if (poiExcelColumnVisitor == null) { 27 | poiExcelColumnVisitor = newPoiExcelColumnVisitor(); 28 | } 29 | return poiExcelColumnVisitor; 30 | } 31 | 32 | protected PoiExcelColumnVisitor newPoiExcelColumnVisitor() { 33 | return new PoiExcelColumnVisitor(visitorValue); 34 | } 35 | 36 | // Embulk boolean 37 | private BooleanCellVisitor booleanCellVisitor; 38 | 39 | public final BooleanCellVisitor getBooleanCellVisitor() { 40 | if (booleanCellVisitor == null) { 41 | booleanCellVisitor = newBooleanCellVisitor(); 42 | } 43 | return booleanCellVisitor; 44 | } 45 | 46 | protected BooleanCellVisitor newBooleanCellVisitor() { 47 | return new BooleanCellVisitor(visitorValue); 48 | } 49 | 50 | // Embulk long 51 | private LongCellVisitor longCellVisitor; 52 | 53 | public final LongCellVisitor getLongCellVisitor() { 54 | if (longCellVisitor == null) { 55 | longCellVisitor = newLongCellVisitor(); 56 | } 57 | return longCellVisitor; 58 | } 59 | 60 | protected LongCellVisitor newLongCellVisitor() { 61 | return new LongCellVisitor(visitorValue); 62 | } 63 | 64 | // Embulk double 65 | private DoubleCellVisitor doubleCellVisitor; 66 | 67 | public final DoubleCellVisitor getDoubleCellVisitor() { 68 | if (doubleCellVisitor == null) { 69 | doubleCellVisitor = newDoubleCellVisitor(); 70 | } 71 | return doubleCellVisitor; 72 | } 73 | 74 | protected DoubleCellVisitor newDoubleCellVisitor() { 75 | return new DoubleCellVisitor(visitorValue); 76 | } 77 | 78 | // Embulk string 79 | private StringCellVisitor stringCellVisitor; 80 | 81 | public final StringCellVisitor getStringCellVisitor() { 82 | if (stringCellVisitor == null) { 83 | stringCellVisitor = newStringCellVisitor(); 84 | } 85 | return stringCellVisitor; 86 | } 87 | 88 | protected StringCellVisitor newStringCellVisitor() { 89 | return new StringCellVisitor(visitorValue); 90 | } 91 | 92 | // Embulk timestamp 93 | private TimestampCellVisitor timestampCellVisitor; 94 | 95 | public final TimestampCellVisitor getTimestampCellVisitor() { 96 | if (timestampCellVisitor == null) { 97 | timestampCellVisitor = newTimestampCellVisitor(); 98 | } 99 | return timestampCellVisitor; 100 | } 101 | 102 | protected TimestampCellVisitor newTimestampCellVisitor() { 103 | return new TimestampCellVisitor(visitorValue); 104 | } 105 | 106 | // cell value/formula 107 | private PoiExcelCellValueVisitor poiExcelCellValueVisitor; 108 | 109 | public final PoiExcelCellValueVisitor getPoiExcelCellValueVisitor() { 110 | if (poiExcelCellValueVisitor == null) { 111 | poiExcelCellValueVisitor = newPoiExcelCellValueVisitor(); 112 | } 113 | return poiExcelCellValueVisitor; 114 | } 115 | 116 | protected PoiExcelCellValueVisitor newPoiExcelCellValueVisitor() { 117 | return new PoiExcelCellValueVisitor(visitorValue); 118 | } 119 | 120 | // cell style 121 | private PoiExcelCellStyleVisitor poiExcelCellStyleVisitor; 122 | 123 | public final PoiExcelCellStyleVisitor getPoiExcelCellStyleVisitor() { 124 | if (poiExcelCellStyleVisitor == null) { 125 | poiExcelCellStyleVisitor = newPoiExcelCellStyleVisitor(); 126 | } 127 | return poiExcelCellStyleVisitor; 128 | } 129 | 130 | protected PoiExcelCellStyleVisitor newPoiExcelCellStyleVisitor() { 131 | return new PoiExcelCellStyleVisitor(visitorValue); 132 | } 133 | 134 | // cell font 135 | private PoiExcelCellFontVisitor poiExcelCellFontVisitor; 136 | 137 | public final PoiExcelCellFontVisitor getPoiExcelCellFontVisitor() { 138 | if (poiExcelCellFontVisitor == null) { 139 | poiExcelCellFontVisitor = newPoiExcelCellFontVisitor(); 140 | } 141 | return poiExcelCellFontVisitor; 142 | } 143 | 144 | protected PoiExcelCellFontVisitor newPoiExcelCellFontVisitor() { 145 | return new PoiExcelCellFontVisitor(visitorValue); 146 | } 147 | 148 | // cell comment 149 | private PoiExcelCellCommentVisitor poiExcelCellCommentVisitor; 150 | 151 | public final PoiExcelCellCommentVisitor getPoiExcelCellCommentVisitor() { 152 | if (poiExcelCellCommentVisitor == null) { 153 | poiExcelCellCommentVisitor = newPoiExcelCellCommentVisitor(); 154 | } 155 | return poiExcelCellCommentVisitor; 156 | } 157 | 158 | protected PoiExcelCellCommentVisitor newPoiExcelCellCommentVisitor() { 159 | return new PoiExcelCellCommentVisitor(visitorValue); 160 | } 161 | 162 | // cell type 163 | private PoiExcelCellTypeVisitor poiExcelCellTypeVisitor; 164 | 165 | public final PoiExcelCellTypeVisitor getPoiExcelCellTypeVisitor() { 166 | if (poiExcelCellTypeVisitor == null) { 167 | poiExcelCellTypeVisitor = newPoiExcelCellTypeVisitor(); 168 | } 169 | return poiExcelCellTypeVisitor; 170 | } 171 | 172 | protected PoiExcelCellTypeVisitor newPoiExcelCellTypeVisitor() { 173 | return new PoiExcelCellTypeVisitor(visitorValue); 174 | } 175 | 176 | // ClientAnchor 177 | private PoiExcelClientAnchorVisitor poiExcelClientAnchorVisitor; 178 | 179 | public final PoiExcelClientAnchorVisitor getPoiExcelClientAnchorVisitor() { 180 | if (poiExcelClientAnchorVisitor == null) { 181 | poiExcelClientAnchorVisitor = newPoiExcelClientAnchorVisitor(); 182 | } 183 | return poiExcelClientAnchorVisitor; 184 | } 185 | 186 | protected PoiExcelClientAnchorVisitor newPoiExcelClientAnchorVisitor() { 187 | return new PoiExcelClientAnchorVisitor(visitorValue); 188 | } 189 | 190 | // color 191 | private PoiExcelColorVisitor poiExcelColorVisitor; 192 | 193 | public final PoiExcelColorVisitor getPoiExcelColorVisitor() { 194 | if (poiExcelColorVisitor == null) { 195 | poiExcelColorVisitor = newPoiExcelColorVisitor(); 196 | } 197 | return poiExcelColorVisitor; 198 | } 199 | 200 | protected PoiExcelColorVisitor newPoiExcelColorVisitor() { 201 | return new PoiExcelColorVisitor(visitorValue); 202 | } 203 | } 204 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/PoiExcelVisitorValue.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor; 2 | 3 | import org.apache.poi.ss.usermodel.Sheet; 4 | import org.embulk.parser.poi_excel.PoiExcelParserPlugin.PluginTask; 5 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean; 6 | import org.embulk.parser.poi_excel.bean.PoiExcelSheetBean; 7 | import org.embulk.spi.Column; 8 | import org.embulk.spi.PageBuilder; 9 | import org.embulk.spi.Schema; 10 | 11 | public class PoiExcelVisitorValue { 12 | private final PluginTask task; 13 | private final Sheet sheet; 14 | private final PageBuilder pageBuilder; 15 | private final PoiExcelSheetBean sheetBean; 16 | private PoiExcelVisitorFactory factory; 17 | 18 | public PoiExcelVisitorValue(PluginTask task, Schema schema, Sheet sheet, PageBuilder pageBuilder) { 19 | this.task = task; 20 | this.sheet = sheet; 21 | this.pageBuilder = pageBuilder; 22 | this.sheetBean = new PoiExcelSheetBean(task, schema, sheet); 23 | } 24 | 25 | public PluginTask getPluginTask() { 26 | return task; 27 | } 28 | 29 | public Sheet getSheet() { 30 | return sheet; 31 | } 32 | 33 | public PageBuilder getPageBuilder() { 34 | return pageBuilder; 35 | } 36 | 37 | public void setVisitorFactory(PoiExcelVisitorFactory factory) { 38 | this.factory = factory; 39 | } 40 | 41 | public PoiExcelVisitorFactory getVisitorFactory() { 42 | return factory; 43 | } 44 | 45 | public PoiExcelSheetBean getSheetBean() { 46 | return sheetBean; 47 | } 48 | 49 | public PoiExcelColumnBean getColumnBean(Column column) { 50 | return sheetBean.getColumnBean(column); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/embulk/BooleanCellVisitor.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor.embulk; 2 | 3 | import org.apache.poi.ss.usermodel.Sheet; 4 | import org.embulk.parser.poi_excel.visitor.PoiExcelVisitorValue; 5 | import org.embulk.spi.Column; 6 | 7 | public class BooleanCellVisitor extends CellVisitor { 8 | 9 | public BooleanCellVisitor(PoiExcelVisitorValue visitorValue) { 10 | super(visitorValue); 11 | } 12 | 13 | @Override 14 | public void visitCellValueNumeric(Column column, Object source, double value) { 15 | pageBuilder.setBoolean(column, value != 0d); 16 | } 17 | 18 | @Override 19 | public void visitCellValueString(Column column, Object source, String value) { 20 | pageBuilder.setBoolean(column, Boolean.parseBoolean(value)); 21 | } 22 | 23 | @Override 24 | public void visitCellValueBoolean(Column column, Object source, boolean value) { 25 | pageBuilder.setBoolean(column, value); 26 | } 27 | 28 | @Override 29 | public void visitCellValueError(Column column, Object source, int code) { 30 | pageBuilder.setNull(column); 31 | } 32 | 33 | @Override 34 | public void visitValueLong(Column column, Object source, long value) { 35 | pageBuilder.setBoolean(column, value != 0); 36 | } 37 | 38 | @Override 39 | public void visitSheetName(Column column) { 40 | Sheet sheet = visitorValue.getSheet(); 41 | visitSheetName(column, sheet); 42 | } 43 | 44 | @Override 45 | public void visitSheetName(Column column, Sheet sheet) { 46 | int index = sheet.getWorkbook().getSheetIndex(sheet); 47 | pageBuilder.setBoolean(column, index != 0); 48 | } 49 | 50 | @Override 51 | public void visitRowNumber(Column column, int index1) { 52 | pageBuilder.setBoolean(column, index1 != 0); 53 | } 54 | 55 | @Override 56 | public void visitColumnNumber(Column column, int index1) { 57 | pageBuilder.setBoolean(column, index1 != 0); 58 | } 59 | 60 | @Override 61 | protected void doConvertErrorConstant(Column column, String value) throws Exception { 62 | pageBuilder.setBoolean(column, Boolean.parseBoolean(value)); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/embulk/CellVisitor.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor.embulk; 2 | 3 | import java.text.MessageFormat; 4 | 5 | import org.apache.poi.ss.usermodel.Cell; 6 | import org.apache.poi.ss.usermodel.Sheet; 7 | import org.embulk.config.ConfigException; 8 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean; 9 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean.ErrorStrategy; 10 | import org.embulk.parser.poi_excel.visitor.PoiExcelVisitorValue; 11 | import org.embulk.spi.Column; 12 | import org.embulk.spi.PageBuilder; 13 | 14 | public abstract class CellVisitor { 15 | 16 | protected final PoiExcelVisitorValue visitorValue; 17 | protected final PageBuilder pageBuilder; 18 | 19 | public CellVisitor(PoiExcelVisitorValue visitorValue) { 20 | this.visitorValue = visitorValue; 21 | this.pageBuilder = visitorValue.getPageBuilder(); 22 | } 23 | 24 | public abstract void visitCellValueNumeric(Column column, Object source, double value); 25 | 26 | public abstract void visitCellValueString(Column column, Object source, String value); 27 | 28 | public void visitCellValueBlank(Column column, Object source) { 29 | pageBuilder.setNull(column); 30 | } 31 | 32 | public abstract void visitCellValueBoolean(Column column, Object source, boolean value); 33 | 34 | public abstract void visitCellValueError(Column column, Object source, int code); 35 | 36 | public void visitCellFormula(Column column, Cell cell) { 37 | pageBuilder.setString(column, cell.getCellFormula()); 38 | } 39 | 40 | public abstract void visitValueLong(Column column, Object source, long value); 41 | 42 | public abstract void visitSheetName(Column column); 43 | 44 | public abstract void visitSheetName(Column column, Sheet sheet); 45 | 46 | public abstract void visitRowNumber(Column column, int index1); 47 | 48 | public abstract void visitColumnNumber(Column column, int index1); 49 | 50 | protected void doConvertError(Column column, Object srcValue, Throwable t) { 51 | PoiExcelColumnBean bean = visitorValue.getColumnBean(column); 52 | ErrorStrategy strategy = bean.getConvertErrorStrategy(); 53 | switch (strategy.getStrategy()) { 54 | default: 55 | break; 56 | case CONSTANT: 57 | String value = strategy.getValue(); 58 | if (value == null) { 59 | pageBuilder.setNull(column); 60 | } else { 61 | try { 62 | doConvertErrorConstant(column, value); 63 | } catch (Exception e) { 64 | throw new ConfigException(MessageFormat.format("constant value convert error. value={0}", value), e); 65 | } 66 | } 67 | return; 68 | } 69 | 70 | throw new RuntimeException(MessageFormat.format("convert error. value={0}", srcValue), t); 71 | } 72 | 73 | protected abstract void doConvertErrorConstant(Column column, String value) throws Exception; 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/embulk/DoubleCellVisitor.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor.embulk; 2 | 3 | import org.apache.poi.ss.usermodel.Sheet; 4 | import org.embulk.parser.poi_excel.visitor.PoiExcelVisitorValue; 5 | import org.embulk.spi.Column; 6 | 7 | public class DoubleCellVisitor extends CellVisitor { 8 | 9 | public DoubleCellVisitor(PoiExcelVisitorValue visitorValue) { 10 | super(visitorValue); 11 | } 12 | 13 | @Override 14 | public void visitCellValueNumeric(Column column, Object source, double value) { 15 | pageBuilder.setDouble(column, value); 16 | } 17 | 18 | @Override 19 | public void visitCellValueString(Column column, Object source, String value) { 20 | double d; 21 | try { 22 | d = Double.parseDouble(value); 23 | } catch (NumberFormatException e) { 24 | doConvertError(column, value, e); 25 | return; 26 | } 27 | pageBuilder.setDouble(column, d); 28 | } 29 | 30 | @Override 31 | public void visitCellValueBoolean(Column column, Object source, boolean value) { 32 | pageBuilder.setDouble(column, value ? 1 : 0); 33 | } 34 | 35 | @Override 36 | public void visitCellValueError(Column column, Object source, int code) { 37 | pageBuilder.setDouble(column, code); 38 | } 39 | 40 | @Override 41 | public void visitValueLong(Column column, Object source, long value) { 42 | pageBuilder.setDouble(column, value); 43 | } 44 | 45 | @Override 46 | public void visitSheetName(Column column) { 47 | Sheet sheet = visitorValue.getSheet(); 48 | visitSheetName(column, sheet); 49 | } 50 | 51 | @Override 52 | public void visitSheetName(Column column, Sheet sheet) { 53 | int index = sheet.getWorkbook().getSheetIndex(sheet); 54 | pageBuilder.setDouble(column, index); 55 | } 56 | 57 | @Override 58 | public void visitRowNumber(Column column, int index1) { 59 | pageBuilder.setDouble(column, index1); 60 | } 61 | 62 | @Override 63 | public void visitColumnNumber(Column column, int index1) { 64 | pageBuilder.setDouble(column, index1); 65 | } 66 | 67 | @Override 68 | protected void doConvertErrorConstant(Column column, String value) throws Exception { 69 | pageBuilder.setDouble(column, Double.parseDouble(value)); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/embulk/LongCellVisitor.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor.embulk; 2 | 3 | import org.apache.poi.ss.usermodel.Sheet; 4 | import org.embulk.parser.poi_excel.visitor.PoiExcelVisitorValue; 5 | import org.embulk.spi.Column; 6 | 7 | public class LongCellVisitor extends CellVisitor { 8 | 9 | public LongCellVisitor(PoiExcelVisitorValue visitorValue) { 10 | super(visitorValue); 11 | } 12 | 13 | @Override 14 | public void visitCellValueNumeric(Column column, Object source, double value) { 15 | pageBuilder.setLong(column, (long) value); 16 | } 17 | 18 | @Override 19 | public void visitCellValueString(Column column, Object source, String value) { 20 | long l; 21 | try { 22 | l = Long.parseLong(value); 23 | } catch (NumberFormatException e) { 24 | doConvertError(column, value, e); 25 | return; 26 | } 27 | pageBuilder.setLong(column, l); 28 | } 29 | 30 | @Override 31 | public void visitCellValueBoolean(Column column, Object source, boolean value) { 32 | pageBuilder.setLong(column, value ? 1 : 0); 33 | } 34 | 35 | @Override 36 | public void visitCellValueError(Column column, Object source, int code) { 37 | pageBuilder.setLong(column, code); 38 | } 39 | 40 | @Override 41 | public void visitValueLong(Column column, Object source, long value) { 42 | pageBuilder.setLong(column, value); 43 | } 44 | 45 | @Override 46 | public void visitSheetName(Column column) { 47 | Sheet sheet = visitorValue.getSheet(); 48 | visitSheetName(column, sheet); 49 | } 50 | 51 | @Override 52 | public void visitSheetName(Column column, Sheet sheet) { 53 | int index = sheet.getWorkbook().getSheetIndex(sheet); 54 | pageBuilder.setLong(column, index); 55 | } 56 | 57 | @Override 58 | public void visitRowNumber(Column column, int index1) { 59 | pageBuilder.setLong(column, index1); 60 | } 61 | 62 | @Override 63 | public void visitColumnNumber(Column column, int index1) { 64 | pageBuilder.setLong(column, index1); 65 | } 66 | 67 | @Override 68 | protected void doConvertErrorConstant(Column column, String value) throws Exception { 69 | pageBuilder.setLong(column, Long.parseLong(value)); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/embulk/StringCellVisitor.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor.embulk; 2 | 3 | import java.text.MessageFormat; 4 | 5 | import org.apache.poi.ss.usermodel.FormulaError; 6 | import org.apache.poi.ss.usermodel.Sheet; 7 | import org.apache.poi.ss.util.CellReference; 8 | import org.embulk.parser.poi_excel.bean.PoiExcelColumnBean; 9 | import org.embulk.parser.poi_excel.visitor.PoiExcelVisitorValue; 10 | import org.embulk.spi.Column; 11 | 12 | public class StringCellVisitor extends CellVisitor { 13 | 14 | public StringCellVisitor(PoiExcelVisitorValue visitorValue) { 15 | super(visitorValue); 16 | } 17 | 18 | @Override 19 | public void visitCellValueNumeric(Column column, Object source, double value) { 20 | String s = toString(column, source, value); 21 | pageBuilder.setString(column, s); 22 | } 23 | 24 | protected String toString(Column column, Object source, double value) { 25 | String format = getNumericFormat(column); 26 | if (!format.isEmpty()) { 27 | try { 28 | return String.format(format, value); 29 | } catch (Exception e) { 30 | throw new IllegalArgumentException(MessageFormat.format( 31 | "illegal String.format for double. numeric_format=\"{0}\"", format), e); 32 | } 33 | } 34 | 35 | String s = Double.toString(value); 36 | if (s.endsWith(".0")) { 37 | return s.substring(0, s.length() - 2); 38 | } 39 | return s; 40 | } 41 | 42 | protected String getNumericFormat(Column column) { 43 | PoiExcelColumnBean bean = visitorValue.getColumnBean(column); 44 | return bean.getNumericFormat(); 45 | } 46 | 47 | @Override 48 | public void visitCellValueString(Column column, Object source, String value) { 49 | pageBuilder.setString(column, value); 50 | } 51 | 52 | @Override 53 | public void visitCellValueBoolean(Column column, Object source, boolean value) { 54 | pageBuilder.setString(column, Boolean.toString(value)); 55 | } 56 | 57 | @Override 58 | public void visitCellValueError(Column column, Object source, int code) { 59 | FormulaError error = FormulaError.forInt((byte) code); 60 | String value = error.getString(); 61 | pageBuilder.setString(column, value); 62 | } 63 | 64 | @Override 65 | public void visitValueLong(Column column, Object source, long value) { 66 | String s = Long.toString(value); 67 | pageBuilder.setString(column, s); 68 | } 69 | 70 | @Override 71 | public void visitSheetName(Column column) { 72 | Sheet sheet = visitorValue.getSheet(); 73 | visitSheetName(column, sheet); 74 | } 75 | 76 | @Override 77 | public void visitSheetName(Column column, Sheet sheet) { 78 | pageBuilder.setString(column, sheet.getSheetName()); 79 | } 80 | 81 | @Override 82 | public void visitRowNumber(Column column, int index1) { 83 | pageBuilder.setString(column, Integer.toString(index1)); 84 | } 85 | 86 | @Override 87 | public void visitColumnNumber(Column column, int index1) { 88 | String value = CellReference.convertNumToColString(index1 - 1); 89 | pageBuilder.setString(column, value); 90 | } 91 | 92 | @Override 93 | protected void doConvertErrorConstant(Column column, String value) throws Exception { 94 | pageBuilder.setString(column, value); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/embulk/TimestampCellVisitor.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor.embulk; 2 | 3 | import java.util.Date; 4 | import java.util.TimeZone; 5 | 6 | import org.apache.poi.ss.usermodel.DateUtil; 7 | import org.apache.poi.ss.usermodel.Sheet; 8 | import org.embulk.parser.poi_excel.PoiExcelParserPlugin.PluginTask; 9 | import org.embulk.parser.poi_excel.visitor.PoiExcelVisitorValue; 10 | import org.embulk.spi.Column; 11 | import org.embulk.spi.time.Timestamp; 12 | import org.embulk.spi.time.TimestampParseException; 13 | import org.embulk.spi.time.TimestampParser; 14 | import org.embulk.spi.util.Timestamps; 15 | 16 | public class TimestampCellVisitor extends CellVisitor { 17 | 18 | public TimestampCellVisitor(PoiExcelVisitorValue visitorValue) { 19 | super(visitorValue); 20 | } 21 | 22 | @Override 23 | public void visitCellValueNumeric(Column column, Object source, double value) { 24 | TimestampParser parser = getTimestampParser(column); 25 | TimeZone tz = parser.getDefaultTimeZone().toTimeZone(); 26 | Date date = DateUtil.getJavaDate(value, tz); 27 | Timestamp t = Timestamp.ofEpochMilli(date.getTime()); 28 | pageBuilder.setTimestamp(column, t); 29 | } 30 | 31 | @Override 32 | public void visitCellValueString(Column column, Object source, String value) { 33 | Timestamp t; 34 | try { 35 | TimestampParser parser = getTimestampParser(column); 36 | t = parser.parse(value); 37 | } catch (TimestampParseException e) { 38 | doConvertError(column, value, e); 39 | return; 40 | } 41 | pageBuilder.setTimestamp(column, t); 42 | } 43 | 44 | @Override 45 | public void visitCellValueBoolean(Column column, Object source, boolean value) { 46 | doConvertError(column, value, new UnsupportedOperationException( 47 | "unsupported conversion Excel boolean to Embulk timestamp")); 48 | } 49 | 50 | @Override 51 | public void visitCellValueError(Column column, Object source, int code) { 52 | doConvertError(column, code, new UnsupportedOperationException( 53 | "unsupported conversion Excel Cell error code to Embulk timestamp")); 54 | } 55 | 56 | @Override 57 | public void visitValueLong(Column column, Object source, long value) { 58 | pageBuilder.setTimestamp(column, Timestamp.ofEpochMilli(value)); 59 | } 60 | 61 | @Override 62 | public void visitSheetName(Column column) { 63 | Sheet sheet = visitorValue.getSheet(); 64 | visitSheetName(column, sheet); 65 | } 66 | 67 | @Override 68 | public void visitSheetName(Column column, Sheet sheet) { 69 | doConvertError(column, sheet.getSheetName(), new UnsupportedOperationException( 70 | "unsupported conversion sheet_name to Embulk timestamp")); 71 | } 72 | 73 | @Override 74 | public void visitRowNumber(Column column, int index1) { 75 | doConvertError(column, index1, new UnsupportedOperationException( 76 | "unsupported conversion row_number to Embulk timestamp")); 77 | } 78 | 79 | @Override 80 | public void visitColumnNumber(Column column, int index1) { 81 | doConvertError(column, index1, new UnsupportedOperationException( 82 | "unsupported conversion column_number to Embulk timestamp")); 83 | } 84 | 85 | @Override 86 | protected void doConvertErrorConstant(Column column, String value) throws Exception { 87 | TimestampParser parser = getTimestampParser(column); 88 | pageBuilder.setTimestamp(column, parser.parse(value)); 89 | } 90 | 91 | private TimestampParser[] timestampParsers; 92 | 93 | protected final TimestampParser getTimestampParser(Column column) { 94 | if (timestampParsers == null) { 95 | PluginTask task = visitorValue.getPluginTask(); 96 | timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getColumns()); 97 | } 98 | return timestampParsers[column.getIndex()]; 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionFinder.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor.util; 2 | 3 | import org.apache.poi.ss.usermodel.Sheet; 4 | import org.apache.poi.ss.util.CellRangeAddress; 5 | 6 | public interface MergedRegionFinder { 7 | 8 | public CellRangeAddress get(Sheet sheet, int rowIndex, int columnIndex); 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionList.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor.util; 2 | 3 | import org.apache.poi.ss.usermodel.Sheet; 4 | import org.apache.poi.ss.util.CellRangeAddress; 5 | 6 | public class MergedRegionList implements MergedRegionFinder { 7 | 8 | @Override 9 | public CellRangeAddress get(Sheet sheet, int rowIndex, int columnIndex) { 10 | int size = sheet.getNumMergedRegions(); 11 | for (int i = 0; i < size; i++) { 12 | CellRangeAddress region = sheet.getMergedRegion(i); 13 | if (region.isInRange(rowIndex, columnIndex)) { 14 | return region; 15 | } 16 | } 17 | 18 | return null; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionMap.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor.util; 2 | 3 | import java.util.Map; 4 | import java.util.concurrent.ConcurrentHashMap; 5 | 6 | import org.apache.poi.ss.usermodel.Sheet; 7 | import org.apache.poi.ss.util.CellRangeAddress; 8 | 9 | public abstract class MergedRegionMap implements MergedRegionFinder { 10 | 11 | private final Map>> sheetMap = new ConcurrentHashMap<>(); 12 | 13 | @Override 14 | public CellRangeAddress get(Sheet sheet, int rowIndex, int columnIndex) { 15 | Map> rowMap = sheetMap.get(sheet); 16 | if (rowMap == null) { 17 | synchronized (sheet) { 18 | rowMap = createRowMap(sheet); 19 | sheetMap.put(sheet, rowMap); 20 | } 21 | } 22 | 23 | Map columnMap = rowMap.get(rowIndex); 24 | if (columnMap == null) { 25 | return null; 26 | } 27 | return columnMap.get(columnIndex); 28 | } 29 | 30 | protected Map> createRowMap(Sheet sheet) { 31 | Map> rowMap = newRowMap(); 32 | 33 | for (int i = sheet.getNumMergedRegions() - 1; i >= 0; i--) { 34 | CellRangeAddress region = sheet.getMergedRegion(i); 35 | 36 | for (int r = region.getFirstRow(); r <= region.getLastRow(); r++) { 37 | Map columnMap = rowMap.get(r); 38 | if (columnMap == null) { 39 | columnMap = newColumnMap(); 40 | rowMap.put(r, columnMap); 41 | } 42 | 43 | for (int c = region.getFirstColumn(); c <= region.getLastColumn(); c++) { 44 | columnMap.put(c, region); 45 | } 46 | } 47 | } 48 | 49 | return rowMap; 50 | } 51 | 52 | protected abstract Map> newRowMap(); 53 | 54 | protected abstract Map newColumnMap(); 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/parser/poi_excel/visitor/util/MergedRegionNothing.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel.visitor.util; 2 | 3 | import org.apache.poi.ss.usermodel.Sheet; 4 | import org.apache.poi.ss.util.CellRangeAddress; 5 | 6 | public class MergedRegionNothing implements MergedRegionFinder { 7 | 8 | @Override 9 | public CellRangeAddress get(Sheet sheet, int rowIndex, int columnIndex) { 10 | return null; 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/EmbulkPluginTester.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser; 2 | 3 | import java.io.Closeable; 4 | import java.io.File; 5 | import java.net.URISyntaxException; 6 | import java.net.URL; 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | 10 | import org.embulk.EmbulkEmbed; 11 | import org.embulk.EmbulkEmbed.Bootstrap; 12 | import org.embulk.config.ConfigLoader; 13 | import org.embulk.config.ConfigSource; 14 | import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord; 15 | import org.embulk.plugin.InjectedPluginSource; 16 | import org.embulk.spi.InputPlugin; 17 | import org.embulk.spi.OutputPlugin; 18 | import org.embulk.spi.ParserPlugin; 19 | 20 | import com.google.inject.Binder; 21 | import com.google.inject.Module; 22 | import com.google.inject.Provider; 23 | 24 | // @see https://github.com/embulk/embulk-input-jdbc/blob/master/embulk-input-mysql/src/test/java/org/embulk/input/mysql/EmbulkPluginTester.java 25 | public class EmbulkPluginTester implements Closeable { 26 | 27 | protected static class PluginDefinition { 28 | public final Class iface; 29 | public final String name; 30 | public final Class impl; 31 | 32 | public PluginDefinition(Class iface, String name, Class impl) { 33 | this.iface = iface; 34 | this.name = name; 35 | this.impl = impl; 36 | } 37 | } 38 | 39 | private final List plugins = new ArrayList<>(); 40 | 41 | private EmbulkEmbed embulk; 42 | 43 | private ConfigLoader configLoader; 44 | 45 | private EmbulkTestFileInputPlugin embulkTestFileInputPlugin = new EmbulkTestFileInputPlugin(); 46 | 47 | private EmbulkTestOutputPlugin embulkTestOutputPlugin = new EmbulkTestOutputPlugin(); 48 | 49 | public EmbulkPluginTester() { 50 | } 51 | 52 | public EmbulkPluginTester(Class iface, String name, Class impl) { 53 | addPlugin(iface, name, impl); 54 | } 55 | 56 | public void addPlugin(Class iface, String name, Class impl) { 57 | plugins.add(new PluginDefinition(iface, name, impl)); 58 | } 59 | 60 | public void addParserPlugin(String name, Class impl) { 61 | addPlugin(ParserPlugin.class, name, impl); 62 | } 63 | 64 | protected EmbulkEmbed getEmbulkEmbed() { 65 | if (embulk == null) { 66 | Bootstrap bootstrap = new EmbulkEmbed.Bootstrap(); 67 | bootstrap.addModules(new Module() { 68 | @Override 69 | public void configure(Binder binder) { 70 | EmbulkPluginTester.this.configurePlugin(binder); 71 | 72 | for (PluginDefinition plugin : plugins) { 73 | InjectedPluginSource.registerPluginTo(binder, plugin.iface, plugin.name, plugin.impl); 74 | } 75 | } 76 | }); 77 | embulk = bootstrap.initializeCloseable(); 78 | } 79 | return embulk; 80 | } 81 | 82 | protected void configurePlugin(Binder binder) { 83 | // input plugins 84 | InjectedPluginSource.registerPluginTo(binder, InputPlugin.class, EmbulkTestFileInputPlugin.TYPE, 85 | EmbulkTestFileInputPlugin.class); 86 | binder.bind(EmbulkTestFileInputPlugin.class).toProvider(new Provider() { 87 | 88 | @Override 89 | public EmbulkTestFileInputPlugin get() { 90 | return embulkTestFileInputPlugin; 91 | } 92 | }); 93 | 94 | // output plugins 95 | InjectedPluginSource.registerPluginTo(binder, OutputPlugin.class, EmbulkTestOutputPlugin.TYPE, 96 | EmbulkTestOutputPlugin.class); 97 | binder.bind(EmbulkTestOutputPlugin.class).toProvider(new Provider() { 98 | 99 | @Override 100 | public EmbulkTestOutputPlugin get() { 101 | return embulkTestOutputPlugin; 102 | } 103 | }); 104 | } 105 | 106 | public ConfigLoader getConfigLoader() { 107 | if (configLoader == null) { 108 | configLoader = getEmbulkEmbed().newConfigLoader(); 109 | } 110 | return configLoader; 111 | } 112 | 113 | public ConfigSource newConfigSource() { 114 | return getConfigLoader().newConfigSource(); 115 | } 116 | 117 | public EmbulkTestParserConfig newParserConfig(String type) { 118 | EmbulkTestParserConfig parser = new EmbulkTestParserConfig(); 119 | parser.setType(type); 120 | return parser; 121 | } 122 | 123 | public List runParser(URL inFile, EmbulkTestParserConfig parser) { 124 | File file; 125 | try { 126 | file = new File(inFile.toURI()); 127 | } catch (URISyntaxException e) { 128 | throw new RuntimeException(e); 129 | } 130 | return runParser(file, parser); 131 | } 132 | 133 | public List runParser(File inFile, EmbulkTestParserConfig parser) { 134 | ConfigSource in = newConfigSource(); 135 | in.set("type", "file"); 136 | in.set("path_prefix", inFile.getAbsolutePath()); 137 | in.set("parser", parser); 138 | return runInput(in); 139 | } 140 | 141 | public List runParser(List list, EmbulkTestParserConfig parser) { 142 | ConfigSource in = newConfigSource(); 143 | in.set("type", EmbulkTestFileInputPlugin.TYPE); 144 | in.set("parser", parser); 145 | 146 | embulkTestFileInputPlugin.setText(list); 147 | return runInput(in); 148 | } 149 | 150 | public List runInput(ConfigSource in) { 151 | ConfigSource out = newConfigSource(); 152 | out.set("type", EmbulkTestOutputPlugin.TYPE); 153 | 154 | embulkTestOutputPlugin.clearResult(); 155 | run(in, out); 156 | return embulkTestOutputPlugin.getResult(); 157 | } 158 | 159 | public void run(ConfigSource in, ConfigSource out) { 160 | ConfigSource config = newConfigSource(); 161 | config.set("in", in); 162 | config.set("out", out); 163 | run(config); 164 | } 165 | 166 | public void run(ConfigSource config) { 167 | getEmbulkEmbed().run(config); 168 | } 169 | 170 | @Override 171 | public void close() { 172 | if (embulk != null) { 173 | embulk.destroy(); 174 | } 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/EmbulkTestFileInputPlugin.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser; 2 | 3 | import java.nio.charset.StandardCharsets; 4 | import java.util.List; 5 | 6 | import org.embulk.config.ConfigDiff; 7 | import org.embulk.config.ConfigSource; 8 | import org.embulk.config.Task; 9 | import org.embulk.config.TaskReport; 10 | import org.embulk.config.TaskSource; 11 | import org.embulk.spi.Buffer; 12 | import org.embulk.spi.Exec; 13 | import org.embulk.spi.FileInputPlugin; 14 | import org.embulk.spi.TransactionalFileInput; 15 | 16 | public class EmbulkTestFileInputPlugin implements FileInputPlugin { 17 | 18 | public static final String TYPE = "EmbulkTestFileInputPlugin"; 19 | 20 | public interface PluginTask extends Task { 21 | } 22 | 23 | private List list; 24 | 25 | public void setText(List list) { 26 | this.list = list; 27 | } 28 | 29 | @Override 30 | public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control control) { 31 | PluginTask task = config.loadConfig(PluginTask.class); 32 | 33 | int taskCount = 1; 34 | return resume(task.dump(), taskCount, control); 35 | } 36 | 37 | @Override 38 | public ConfigDiff resume(TaskSource taskSource, int taskCount, FileInputPlugin.Control control) { 39 | control.run(taskSource, taskCount); 40 | return Exec.newConfigDiff(); 41 | } 42 | 43 | @Override 44 | public void cleanup(TaskSource taskSource, int taskCount, List successTaskReports) { 45 | } 46 | 47 | @Override 48 | public TransactionalFileInput open(TaskSource taskSource, int taskIndex) { 49 | return new TransactionalFileInput() { 50 | private boolean eof = false; 51 | private int index = 0; 52 | 53 | @Override 54 | public Buffer poll() { 55 | if (index < list.size()) { 56 | String s = list.get(index++) + "\n"; 57 | return Buffer.copyOf(s.getBytes(StandardCharsets.UTF_8)); 58 | } 59 | 60 | eof = true; 61 | return null; 62 | } 63 | 64 | @Override 65 | public boolean nextFile() { 66 | return !eof; 67 | } 68 | 69 | @Override 70 | public void close() { 71 | } 72 | 73 | @Override 74 | public void abort() { 75 | } 76 | 77 | @Override 78 | public TaskReport commit() { 79 | return Exec.newTaskReport(); 80 | } 81 | }; 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/EmbulkTestOutputPlugin.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser; 2 | 3 | import java.text.MessageFormat; 4 | import java.util.LinkedHashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | import java.util.concurrent.CopyOnWriteArrayList; 8 | 9 | import org.embulk.config.ConfigDiff; 10 | import org.embulk.config.ConfigSource; 11 | import org.embulk.config.Task; 12 | import org.embulk.config.TaskReport; 13 | import org.embulk.config.TaskSource; 14 | import org.embulk.spi.Column; 15 | import org.embulk.spi.ColumnVisitor; 16 | import org.embulk.spi.Exec; 17 | import org.embulk.spi.OutputPlugin; 18 | import org.embulk.spi.Page; 19 | import org.embulk.spi.PageReader; 20 | import org.embulk.spi.Schema; 21 | import org.embulk.spi.TransactionalPageOutput; 22 | import org.embulk.spi.time.Timestamp; 23 | import org.embulk.spi.time.TimestampFormatter; 24 | 25 | public class EmbulkTestOutputPlugin implements OutputPlugin { 26 | 27 | public static final String TYPE = "EmbulkTestOutputPlugin"; 28 | 29 | public interface PluginTask extends Task, TimestampFormatter.Task { 30 | } 31 | 32 | public static class OutputRecord { 33 | private Map map = new LinkedHashMap<>(); 34 | 35 | public void set(String name, Object value) { 36 | map.put(name, value); 37 | } 38 | 39 | public String getAsString(String name) { 40 | try { 41 | return (String) map.get(name); 42 | } catch (Exception e) { 43 | throw new RuntimeException(MessageFormat.format("name={0}", name), e); 44 | } 45 | } 46 | 47 | public Long getAsLong(String name) { 48 | try { 49 | return (Long) map.get(name); 50 | } catch (Exception e) { 51 | throw new RuntimeException(MessageFormat.format("name={0}", name), e); 52 | } 53 | } 54 | 55 | public Double getAsDouble(String name) { 56 | try { 57 | return (Double) map.get(name); 58 | } catch (Exception e) { 59 | throw new RuntimeException(MessageFormat.format("name={0}", name), e); 60 | } 61 | } 62 | 63 | public Boolean getAsBoolean(String name) { 64 | try { 65 | return (Boolean) map.get(name); 66 | } catch (Exception e) { 67 | throw new RuntimeException(MessageFormat.format("name={0}", name), e); 68 | } 69 | } 70 | 71 | public Timestamp getAsTimestamp(String name) { 72 | try { 73 | return (Timestamp) map.get(name); 74 | } catch (Exception e) { 75 | throw new RuntimeException(MessageFormat.format("name={0}", name), e); 76 | } 77 | } 78 | 79 | @Override 80 | public String toString() { 81 | return map.toString(); 82 | } 83 | } 84 | 85 | private final List result = new CopyOnWriteArrayList<>(); 86 | 87 | @Override 88 | public ConfigDiff transaction(ConfigSource config, Schema schema, int taskCount, OutputPlugin.Control control) { 89 | final PluginTask task = config.loadConfig(PluginTask.class); 90 | return resume(task.dump(), schema, taskCount, control); 91 | } 92 | 93 | @Override 94 | public ConfigDiff resume(TaskSource taskSource, Schema schema, int taskCount, OutputPlugin.Control control) { 95 | control.run(taskSource); 96 | return Exec.newConfigDiff(); 97 | } 98 | 99 | @Override 100 | public void cleanup(TaskSource taskSource, Schema schema, int taskCount, List successTaskReports) { 101 | } 102 | 103 | @Override 104 | public TransactionalPageOutput open(TaskSource taskSource, final Schema schema, int taskIndex) { 105 | return new TransactionalPageOutput() { 106 | private final PageReader reader = new PageReader(schema); 107 | 108 | @Override 109 | public void add(Page page) { 110 | reader.setPage(page); 111 | while (reader.nextRecord()) { 112 | final OutputRecord record = new OutputRecord(); 113 | for (Column column : schema.getColumns()) { 114 | column.visit(new ColumnVisitor() { 115 | 116 | @Override 117 | public void timestampColumn(Column column) { 118 | if (reader.isNull(column)) { 119 | record.set(column.getName(), null); 120 | return; 121 | } 122 | record.set(column.getName(), reader.getTimestamp(column)); 123 | } 124 | 125 | @Override 126 | public void stringColumn(Column column) { 127 | if (reader.isNull(column)) { 128 | record.set(column.getName(), null); 129 | return; 130 | } 131 | record.set(column.getName(), reader.getString(column)); 132 | } 133 | 134 | @Override 135 | public void longColumn(Column column) { 136 | if (reader.isNull(column)) { 137 | record.set(column.getName(), null); 138 | return; 139 | } 140 | record.set(column.getName(), reader.getLong(column)); 141 | } 142 | 143 | @Override 144 | public void doubleColumn(Column column) { 145 | if (reader.isNull(column)) { 146 | record.set(column.getName(), null); 147 | return; 148 | } 149 | record.set(column.getName(), reader.getDouble(column)); 150 | } 151 | 152 | @Override 153 | public void booleanColumn(Column column) { 154 | if (reader.isNull(column)) { 155 | record.set(column.getName(), null); 156 | return; 157 | } 158 | record.set(column.getName(), reader.getBoolean(column)); 159 | } 160 | }); 161 | } 162 | result.add(record); 163 | } 164 | } 165 | 166 | @Override 167 | public void finish() { 168 | } 169 | 170 | @Override 171 | public void close() { 172 | reader.close(); 173 | } 174 | 175 | @Override 176 | public void abort() { 177 | } 178 | 179 | @Override 180 | public TaskReport commit() { 181 | return Exec.newTaskReport(); 182 | } 183 | }; 184 | } 185 | 186 | public void clearResult() { 187 | result.clear(); 188 | } 189 | 190 | public List getResult() { 191 | return result; 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/EmbulkTestParserConfig.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | 7 | @SuppressWarnings("serial") 8 | public class EmbulkTestParserConfig extends HashMap { 9 | 10 | public void setType(String type) { 11 | set("type", type); 12 | } 13 | 14 | public void set(String key, Object value) { 15 | if (value == null) { 16 | super.remove(key); 17 | } else { 18 | super.put(key, value); 19 | } 20 | } 21 | 22 | public List getColumns() { 23 | @SuppressWarnings("unchecked") 24 | List columns = (List) super.get("columns"); 25 | if (columns == null) { 26 | columns = new ArrayList<>(); 27 | super.put("columns", columns); 28 | } 29 | return columns; 30 | } 31 | 32 | public EmbulkTestColumn addColumn(String name, String type) { 33 | EmbulkTestColumn column = new EmbulkTestColumn(); 34 | column.set("name", name); 35 | column.set("type", type); 36 | getColumns().add(column); 37 | return column; 38 | } 39 | 40 | public static class EmbulkTestColumn extends HashMap { 41 | 42 | public EmbulkTestColumn set(String key, Object value) { 43 | if (value == null) { 44 | super.remove(key); 45 | } else { 46 | super.put(key, value); 47 | } 48 | return this; 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel; 2 | 3 | import static org.hamcrest.CoreMatchers.is; 4 | import static org.hamcrest.MatcherAssert.assertThat; 5 | 6 | import java.net.URL; 7 | import java.text.ParseException; 8 | import java.text.SimpleDateFormat; 9 | import java.util.Arrays; 10 | import java.util.List; 11 | import java.util.TimeZone; 12 | 13 | import org.embulk.parser.EmbulkPluginTester; 14 | import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord; 15 | import org.embulk.parser.EmbulkTestParserConfig; 16 | import org.embulk.spi.time.Timestamp; 17 | import org.junit.experimental.theories.DataPoints; 18 | import org.junit.experimental.theories.Theories; 19 | import org.junit.experimental.theories.Theory; 20 | import org.junit.runner.RunWith; 21 | 22 | @RunWith(Theories.class) 23 | public class TestPoiExcelParserPlugin { 24 | 25 | @DataPoints 26 | public static String[] FILES = { "test1.xls", "test2.xlsx" }; 27 | 28 | @Theory 29 | public void test1(String excelFile) throws ParseException { 30 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 31 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 32 | 33 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 34 | parser.set("sheets", Arrays.asList("test1")); 35 | parser.set("skip_header_lines", 1); 36 | parser.set("default_timezone", "Asia/Tokyo"); 37 | parser.addColumn("boolean", "boolean"); 38 | parser.addColumn("long", "long"); 39 | parser.addColumn("double", "double"); 40 | parser.addColumn("string", "string"); 41 | parser.addColumn("timestamp", "timestamp").set("format", "%Y/%m/%d"); 42 | 43 | URL inFile = getClass().getResource(excelFile); 44 | List result = tester.runParser(inFile, parser); 45 | 46 | assertThat(result.size(), is(7)); 47 | check1(result, 0, true, 123L, 123.4d, "abc", "2015/10/4"); 48 | check1(result, 1, false, 456L, 456.7d, "def", "2015/10/5"); 49 | check1(result, 2, false, 123L, 123d, "456", "2015/10/6"); 50 | check1(result, 3, true, 123L, 123.4d, "abc", "2015/10/7"); 51 | check1(result, 4, true, 123L, 123.4d, "abc", "2015/10/4"); 52 | check1(result, 5, true, 1L, 1d, "true", null); 53 | check1(result, 6, null, null, null, null, null); 54 | } 55 | } 56 | 57 | private SimpleDateFormat sdf; 58 | { 59 | sdf = new SimpleDateFormat("yyyy/MM/dd"); 60 | sdf.setTimeZone(TimeZone.getTimeZone("Asia/Tokyo")); 61 | } 62 | 63 | private void check1(List result, int index, Boolean b, Long l, Double d, String s, String t) 64 | throws ParseException { 65 | Timestamp timestamp = (t != null) ? Timestamp.ofEpochMilli(sdf.parse(t).getTime()) : null; 66 | 67 | OutputRecord r = result.get(index); 68 | // System.out.println(r); 69 | assertThat(r.getAsBoolean("boolean"), is(b)); 70 | assertThat(r.getAsLong("long"), is(l)); 71 | assertThat(r.getAsDouble("double"), is(d)); 72 | assertThat(r.getAsString("string"), is(s)); 73 | assertThat(r.getAsTimestamp("timestamp"), is(timestamp)); 74 | } 75 | 76 | @Theory 77 | public void testNumricFormat(String excelFile) throws ParseException { 78 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 79 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 80 | 81 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 82 | parser.set("sheets", Arrays.asList("test1")); 83 | parser.set("skip_header_lines", 1); 84 | parser.addColumn("value", "string").set("column_number", "C").set("numeric_format", "%.2f"); 85 | 86 | URL inFile = getClass().getResource(excelFile); 87 | List result = tester.runParser(inFile, parser); 88 | 89 | assertThat(result.size(), is(7)); 90 | checkNumricFormat(result, 0, "123.40"); 91 | checkNumricFormat(result, 1, "456.70"); 92 | checkNumricFormat(result, 2, "123.00"); 93 | } 94 | } 95 | 96 | private void checkNumricFormat(List result, int index, String s) { 97 | OutputRecord r = result.get(index); 98 | // System.out.println(r); 99 | assertThat(r.getAsString("value"), is(s)); 100 | } 101 | 102 | @Theory 103 | public void testRowNumber(String excelFile) throws ParseException { 104 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 105 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 106 | 107 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 108 | parser.set("sheets", Arrays.asList("test1")); 109 | parser.set("skip_header_lines", 1); 110 | parser.addColumn("sheet", "string").set("value", "sheet_name"); 111 | parser.addColumn("sheet-n", "long").set("value", "sheet_name"); 112 | parser.addColumn("row", "long").set("value", "row_number"); 113 | parser.addColumn("flag", "boolean"); 114 | parser.addColumn("col-n", "long").set("value", "column_number"); 115 | parser.addColumn("col-s", "string").set("value", "column_number"); 116 | 117 | URL inFile = getClass().getResource(excelFile); 118 | List result = tester.runParser(inFile, parser); 119 | 120 | assertThat(result.size(), is(7)); 121 | check4(result, 0, "test1", true); 122 | check4(result, 1, "test1", false); 123 | check4(result, 2, "test1", false); 124 | check4(result, 3, "test1", true); 125 | check4(result, 4, "test1", true); 126 | check4(result, 5, "test1", true); 127 | check4(result, 6, "test1", null); 128 | } 129 | } 130 | 131 | private void check4(List result, int index, String sheetName, Boolean b) { 132 | OutputRecord r = result.get(index); 133 | // System.out.println(r); 134 | assertThat(r.getAsString("sheet"), is(sheetName)); 135 | assertThat(r.getAsLong("sheet-n"), is(0L)); 136 | assertThat(r.getAsLong("row"), is((long) (index + 2))); 137 | assertThat(r.getAsBoolean("flag"), is(b)); 138 | assertThat(r.getAsLong("col-n"), is(1L)); 139 | assertThat(r.getAsString("col-s"), is("A")); 140 | } 141 | 142 | @Theory 143 | public void test_sheets(String excelFile) throws ParseException { 144 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 145 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 146 | 147 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 148 | parser.set("sheets", Arrays.asList("formula_replace", "merged_cell")); 149 | parser.addColumn("a", "string"); 150 | 151 | URL inFile = getClass().getResource(excelFile); 152 | List result = tester.runParser(inFile, parser); 153 | 154 | assertThat(result.size(), is(2 + 4)); 155 | assertThat(result.get(0).getAsString("a"), is("boolean")); 156 | assertThat(result.get(1).getAsString("a"), is("test2-b1")); 157 | assertThat(result.get(2).getAsString("a"), is("test3-a1")); 158 | assertThat(result.get(3).getAsString("a"), is("data")); 159 | } 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellAddress.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel; 2 | 3 | import static org.hamcrest.CoreMatchers.is; 4 | import static org.hamcrest.MatcherAssert.assertThat; 5 | 6 | import java.net.URL; 7 | import java.text.ParseException; 8 | import java.util.List; 9 | 10 | import org.embulk.parser.EmbulkPluginTester; 11 | import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord; 12 | import org.embulk.parser.EmbulkTestParserConfig; 13 | import org.junit.experimental.theories.DataPoints; 14 | import org.junit.experimental.theories.Theories; 15 | import org.junit.experimental.theories.Theory; 16 | import org.junit.runner.RunWith; 17 | 18 | @RunWith(Theories.class) 19 | public class TestPoiExcelParserPlugin_cellAddress { 20 | 21 | @DataPoints 22 | public static String[] FILES = { "test1.xls", "test2.xlsx" }; 23 | 24 | @Theory 25 | public void testCellAddress(String excelFile) throws ParseException { 26 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 27 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 28 | 29 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 30 | parser.set("sheet", "test1"); 31 | parser.set("skip_header_lines", 1); 32 | parser.addColumn("text", "string").set("column_number", "D"); 33 | parser.addColumn("fix_value", "string").set("cell_address", "B1").set("value", "cell_value"); 34 | parser.addColumn("fix_sheet", "string").set("cell_address", "B1").set("value", "sheet_name"); 35 | parser.addColumn("fix_row", "long").set("cell_address", "B1").set("value", "row_number"); 36 | parser.addColumn("fix_col", "long").set("cell_address", "B1").set("value", "column_number"); 37 | parser.addColumn("other_sheet_value", "string").set("cell_address", "style!B5").set("value", "cell_value"); 38 | parser.addColumn("other_sheet_name", "string").set("cell_address", "style!B5").set("value", "sheet_name"); 39 | parser.addColumn("other_sheet_row", "long").set("cell_address", "style!B5").set("value", "row_number"); 40 | parser.addColumn("other_sheet_col", "string").set("cell_address", "style!B5").set("value", "column_number"); 41 | 42 | URL inFile = getClass().getResource(excelFile); 43 | List result = tester.runParser(inFile, parser); 44 | 45 | assertThat(result.size(), is(7)); 46 | check1(result, 0, "abc"); 47 | check1(result, 1, "def"); 48 | check1(result, 2, "456"); 49 | check1(result, 3, "abc"); 50 | check1(result, 4, "abc"); 51 | check1(result, 5, "true"); 52 | check1(result, 6, null); 53 | } 54 | } 55 | 56 | private void check1(List result, int index, String text) { 57 | OutputRecord record = result.get(index); 58 | // System.out.println(record); 59 | assertThat(record.getAsString("text"), is(text)); 60 | assertThat(record.getAsString("fix_value"), is("long")); 61 | assertThat(record.getAsString("fix_sheet"), is("test1")); 62 | assertThat(record.getAsLong("fix_row"), is(1L)); 63 | assertThat(record.getAsLong("fix_col"), is(2L)); 64 | assertThat(record.getAsString("other_sheet_value"), is("bottom")); 65 | assertThat(record.getAsString("other_sheet_name"), is("style")); 66 | assertThat(record.getAsLong("other_sheet_row"), is(5L)); 67 | assertThat(record.getAsString("other_sheet_col"), is("B")); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellComment.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel; 2 | 3 | import static org.hamcrest.CoreMatchers.is; 4 | import static org.hamcrest.CoreMatchers.nullValue; 5 | import static org.hamcrest.MatcherAssert.assertThat; 6 | import static org.junit.Assert.fail; 7 | 8 | import java.net.URL; 9 | import java.text.ParseException; 10 | import java.util.Arrays; 11 | import java.util.List; 12 | 13 | import org.embulk.parser.EmbulkPluginTester; 14 | import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord; 15 | import org.embulk.parser.EmbulkTestParserConfig; 16 | import org.junit.experimental.theories.DataPoints; 17 | import org.junit.experimental.theories.Theories; 18 | import org.junit.experimental.theories.Theory; 19 | import org.junit.runner.RunWith; 20 | 21 | @RunWith(Theories.class) 22 | public class TestPoiExcelParserPlugin_cellComment { 23 | 24 | @DataPoints 25 | public static String[] FILES = { "test1.xls", "test2.xlsx" }; 26 | 27 | @Theory 28 | public void testComment_key(String excelFile) throws ParseException { 29 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 30 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 31 | 32 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 33 | parser.set("sheet", "comment"); 34 | parser.addColumn("author", "string").set("value", "cell_comment.author"); 35 | parser.addColumn("comment", "string").set("value", "cell_comment.string"); 36 | 37 | URL inFile = getClass().getResource(excelFile); 38 | List result = tester.runParser(inFile, parser); 39 | 40 | assertThat(result.size(), is(2)); 41 | check1(result, 0, "hishidama", "hishidama:\nmy comment"); 42 | check1(result, 1, null, null); 43 | } 44 | } 45 | 46 | private void check1(List result, int index, String author, String comment) { 47 | OutputRecord record = result.get(index); 48 | // System.out.println(record); 49 | assertThat(record.getAsString("comment"), is(comment)); 50 | assertThat(record.getAsString("author"), is(author)); 51 | } 52 | 53 | @Theory 54 | public void testComment_all(String excelFile) throws ParseException { 55 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 56 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 57 | 58 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 59 | parser.set("sheet", "comment"); 60 | parser.addColumn("comment", "string").set("value", "cell_comment"); 61 | 62 | URL inFile = getClass().getResource(excelFile); 63 | List result = tester.runParser(inFile, parser); 64 | 65 | assertThat(result.size(), is(2)); 66 | check2(result, 0, "hishidama", "hishidama:\\nmy comment"); 67 | check2(result, 1, null, null); 68 | } 69 | } 70 | 71 | private void check2(List result, int index, String author, String comment) { 72 | OutputRecord record = result.get(index); 73 | // System.out.println(record); 74 | String s = record.getAsString("comment"); 75 | if (author == null && comment == null) { 76 | assertThat(s, is(nullValue())); 77 | return; 78 | } 79 | 80 | if (!s.contains(String.format("\"author\":\"%s\"", author))) { 81 | fail(s); 82 | } 83 | if (!s.contains(String.format("\"string\":\"%s\"", comment))) { 84 | fail(s); 85 | } 86 | } 87 | 88 | @Theory 89 | public void testComment_keys(String excelFile) throws ParseException { 90 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 91 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 92 | 93 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 94 | parser.set("sheet", "comment"); 95 | parser.addColumn("comment", "string").set("value", "cell_comment") 96 | .set("attribute_name", Arrays.asList("author", "string")); 97 | 98 | URL inFile = getClass().getResource(excelFile); 99 | List result = tester.runParser(inFile, parser); 100 | 101 | assertThat(result.size(), is(2)); 102 | check2(result, 0, "hishidama", "hishidama:\\nmy comment"); 103 | check2(result, 1, null, null); 104 | } 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellError.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel; 2 | 3 | import static org.hamcrest.CoreMatchers.is; 4 | import static org.hamcrest.CoreMatchers.nullValue; 5 | import static org.hamcrest.MatcherAssert.assertThat; 6 | import static org.junit.Assert.fail; 7 | 8 | import java.net.URL; 9 | import java.text.SimpleDateFormat; 10 | import java.util.List; 11 | 12 | import org.apache.poi.ss.usermodel.FormulaError; 13 | import org.embulk.parser.EmbulkPluginTester; 14 | import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord; 15 | import org.embulk.parser.EmbulkTestParserConfig; 16 | import org.embulk.spi.time.Timestamp; 17 | import org.junit.experimental.theories.DataPoints; 18 | import org.junit.experimental.theories.Theories; 19 | import org.junit.experimental.theories.Theory; 20 | import org.junit.runner.RunWith; 21 | 22 | @RunWith(Theories.class) 23 | public class TestPoiExcelParserPlugin_cellError { 24 | 25 | @DataPoints 26 | public static String[] FILES = { "test1.xls", "test2.xlsx" }; 27 | 28 | @Theory 29 | public void testCellError_default(String excelFile) throws Exception { 30 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 31 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 32 | 33 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 34 | parser.set("sheet", "test1"); 35 | parser.set("skip_header_lines", 7); 36 | parser.addColumn("b", "boolean").set("column_number", "A"); 37 | parser.addColumn("l", "long").set("column_number", "A"); 38 | parser.addColumn("d", "double").set("column_number", "A"); 39 | parser.addColumn("s", "string").set("column_number", "A"); 40 | parser.addColumn("t", "timestamp").set("column_number", "A"); 41 | 42 | URL inFile = getClass().getResource(excelFile); 43 | List result = tester.runParser(inFile, parser); 44 | 45 | assertThat(result.size(), is(1)); 46 | OutputRecord r = result.get(0); 47 | assertThat(r.getAsBoolean("b"), is(nullValue())); 48 | assertThat(r.getAsLong("l"), is(nullValue())); 49 | assertThat(r.getAsDouble("d"), is(nullValue())); 50 | assertThat(r.getAsString("s"), is(nullValue())); 51 | assertThat(r.getAsString("t"), is(nullValue())); 52 | } 53 | } 54 | 55 | @Theory 56 | public void testCellError_code(String excelFile) throws Exception { 57 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 58 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 59 | 60 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 61 | parser.set("sheet", "test1"); 62 | parser.set("skip_header_lines", 7); 63 | parser.set("on_cell_error", "error_code"); 64 | parser.addColumn("b", "boolean").set("column_number", "A"); 65 | parser.addColumn("l", "long").set("column_number", "A"); 66 | parser.addColumn("d", "double").set("column_number", "A"); 67 | parser.addColumn("s", "string").set("column_number", "A"); 68 | 69 | URL inFile = getClass().getResource(excelFile); 70 | List result = tester.runParser(inFile, parser); 71 | 72 | OutputRecord r = result.get(0); 73 | assertThat(r.getAsBoolean("b"), is(nullValue())); 74 | assertThat(r.getAsLong("l"), is((long) FormulaError.DIV0.getCode())); 75 | assertThat(r.getAsDouble("d"), is((double) FormulaError.DIV0.getCode())); 76 | assertThat(r.getAsString("s"), is("#DIV/0!")); 77 | } 78 | } 79 | 80 | @Theory 81 | public void testCellError_null(String excelFile) throws Exception { 82 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 83 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 84 | 85 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 86 | parser.set("sheet", "test1"); 87 | parser.set("skip_header_lines", 7); 88 | parser.set("on_cell_error", "constant"); 89 | parser.addColumn("b", "boolean").set("column_number", "A"); 90 | parser.addColumn("l", "long").set("column_number", "A"); 91 | parser.addColumn("d", "double").set("column_number", "A"); 92 | parser.addColumn("s", "string").set("column_number", "A"); 93 | parser.addColumn("t", "timestamp").set("column_number", "A"); 94 | 95 | URL inFile = getClass().getResource(excelFile); 96 | List result = tester.runParser(inFile, parser); 97 | 98 | assertThat(result.size(), is(1)); 99 | OutputRecord r = result.get(0); 100 | assertThat(r.getAsBoolean("b"), is(nullValue())); 101 | assertThat(r.getAsLong("l"), is(nullValue())); 102 | assertThat(r.getAsDouble("d"), is(nullValue())); 103 | assertThat(r.getAsString("s"), is(nullValue())); 104 | assertThat(r.getAsString("t"), is(nullValue())); 105 | } 106 | } 107 | 108 | @Theory 109 | public void testCellError_empty(String excelFile) throws Exception { 110 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 111 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 112 | 113 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 114 | parser.set("sheet", "test1"); 115 | parser.set("skip_header_lines", 7); 116 | parser.set("on_cell_error", "constant.zzz"); 117 | parser.addColumn("s1", "string").set("column_number", "A"); 118 | parser.addColumn("s2", "string").set("column_number", "A").set("on_cell_error", "constant"); 119 | parser.addColumn("s3", "string").set("column_number", "A").set("on_cell_error", "constant."); 120 | parser.addColumn("s4", "string").set("column_number", "A").set("on_cell_error", "constant. "); 121 | 122 | URL inFile = getClass().getResource(excelFile); 123 | List result = tester.runParser(inFile, parser); 124 | 125 | assertThat(result.size(), is(1)); 126 | OutputRecord r = result.get(0); 127 | assertThat(r.getAsString("s1"), is("zzz")); 128 | assertThat(r.getAsString("s2"), is(nullValue())); 129 | assertThat(r.getAsString("s3"), is("")); 130 | assertThat(r.getAsString("s4"), is(" ")); 131 | } 132 | } 133 | 134 | @Theory 135 | public void testCellError_constant(String excelFile) throws Exception { 136 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 137 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 138 | 139 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 140 | parser.set("sheet", "test1"); 141 | parser.set("skip_header_lines", 7); 142 | parser.set("on_cell_error", "constant.0"); 143 | parser.addColumn("b", "boolean").set("column_number", "A"); 144 | parser.addColumn("l", "long").set("column_number", "A"); 145 | parser.addColumn("d", "double").set("column_number", "A"); 146 | parser.addColumn("s", "string").set("column_number", "A"); 147 | parser.addColumn("t", "timestamp").set("column_number", "A").set("format", "%Y/%m/%d") 148 | .set("on_cell_error", "constant.2000/1/1"); 149 | 150 | URL inFile = getClass().getResource(excelFile); 151 | List result = tester.runParser(inFile, parser); 152 | 153 | OutputRecord r = result.get(0); 154 | assertThat(r.getAsBoolean("b"), is(false)); 155 | assertThat(r.getAsLong("l"), is(0L)); 156 | assertThat(r.getAsDouble("d"), is(0d)); 157 | assertThat(r.getAsString("s"), is("0")); 158 | assertThat(r.getAsTimestamp("t"), 159 | is(Timestamp.ofEpochMilli(new SimpleDateFormat("yyyy/MM/dd z").parse("2000/01/01 UTC").getTime()))); 160 | } 161 | } 162 | 163 | @Theory 164 | public void testCellError_exception(String excelFile) throws Exception { 165 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 166 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 167 | 168 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 169 | parser.set("sheet", "test1"); 170 | parser.set("skip_header_lines", 7); 171 | parser.set("on_cell_error", "exception"); 172 | parser.addColumn("b", "boolean").set("column_number", "A"); 173 | parser.addColumn("l", "long").set("column_number", "A"); 174 | parser.addColumn("d", "double").set("column_number", "A"); 175 | parser.addColumn("s", "string").set("column_number", "A"); 176 | parser.addColumn("t", "timestamp").set("column_number", "A"); 177 | 178 | URL inFile = getClass().getResource(excelFile); 179 | try { 180 | tester.runParser(inFile, parser); 181 | } catch (Exception e) { 182 | Throwable c1 = e.getCause(); 183 | assertThat(c1.getMessage().contains("error at Column"), is(true)); 184 | Throwable c2 = c1.getCause(); 185 | assertThat(c2.getMessage().contains("encount cell error"), is(true)); 186 | return; // success 187 | } 188 | fail("must throw Exception"); 189 | } 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellFont.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel; 2 | 3 | import static org.hamcrest.CoreMatchers.is; 4 | import static org.hamcrest.CoreMatchers.nullValue; 5 | import static org.hamcrest.MatcherAssert.assertThat; 6 | import static org.junit.Assert.fail; 7 | 8 | import java.net.URL; 9 | import java.text.ParseException; 10 | import java.util.Arrays; 11 | import java.util.List; 12 | 13 | import org.embulk.parser.EmbulkPluginTester; 14 | import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord; 15 | import org.embulk.parser.EmbulkTestParserConfig; 16 | import org.junit.experimental.theories.DataPoints; 17 | import org.junit.experimental.theories.Theories; 18 | import org.junit.experimental.theories.Theory; 19 | import org.junit.runner.RunWith; 20 | 21 | @RunWith(Theories.class) 22 | public class TestPoiExcelParserPlugin_cellFont { 23 | 24 | @DataPoints 25 | public static String[] FILES = { "test1.xls", "test2.xlsx" }; 26 | 27 | @Theory 28 | public void testFont_key(String excelFile) throws ParseException { 29 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 30 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 31 | 32 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 33 | parser.set("sheet", "style"); 34 | parser.addColumn("color-text", "string"); 35 | parser.addColumn("font-color", "long").set("column_number", "C").set("value", "cell_font.color"); 36 | parser.addColumn("font-bold", "boolean").set("value", "cell_font.bold"); 37 | 38 | URL inFile = getClass().getResource(excelFile); 39 | List result = tester.runParser(inFile, parser); 40 | 41 | assertThat(result.size(), is(5)); 42 | check1(result, 0, "red", null, false); 43 | check1(result, 1, "green", 0xff0000L, true); 44 | check1(result, 2, "blue", null, null); 45 | check1(result, 3, "white", null, null); 46 | check1(result, 4, "black", null, null); 47 | } 48 | } 49 | 50 | private void check1(List result, int index, String colorText, Long fontColor, Boolean fontBold) { 51 | OutputRecord record = result.get(index); 52 | // System.out.println(record); 53 | assertThat(record.getAsString("color-text"), is(colorText)); 54 | assertThat(record.getAsLong("font-color"), is(fontColor)); 55 | assertThat(record.getAsBoolean("font-bold"), is(fontBold)); 56 | } 57 | 58 | @Theory 59 | public void testFont_all(String excelFile) throws ParseException { 60 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 61 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 62 | 63 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 64 | parser.set("sheet", "style"); 65 | parser.addColumn("color-text", "string"); 66 | parser.addColumn("color-font", "string").set("column_number", "C").set("value", "cell_font"); 67 | 68 | URL inFile = getClass().getResource(excelFile); 69 | List result = tester.runParser(inFile, parser); 70 | 71 | assertThat(result.size(), is(5)); 72 | check2(result, 0, "red", null, false); 73 | check2(result, 1, "green", 0xff0000L, true); 74 | check2(result, 2, "blue", null, null); 75 | check2(result, 3, "white", null, null); 76 | check2(result, 4, "black", null, null); 77 | } 78 | } 79 | 80 | private void check2(List result, int index, String colorText, Long fontColor, Boolean fontBold) { 81 | OutputRecord record = result.get(index); 82 | // System.out.println(record); 83 | assertThat(record.getAsString("color-text"), is(colorText)); 84 | String font = record.getAsString("color-font"); 85 | if (fontColor == null && fontBold == null) { 86 | assertThat(font, is(nullValue())); 87 | return; 88 | } 89 | 90 | if (fontColor == null) { 91 | if (!font.contains("\"color\":null")) { 92 | fail(font); 93 | } 94 | } else { 95 | if (!font.contains(String.format("\"color\":\"%06x\"", fontColor))) { 96 | fail(font); 97 | } 98 | } 99 | if (fontBold == null) { 100 | if (!font.contains("\"bold\":null")) { 101 | fail(font); 102 | } 103 | } else { 104 | if (!font.contains(String.format("\"bold\":%b", fontBold))) { 105 | fail(font); 106 | } 107 | } 108 | } 109 | 110 | @Theory 111 | public void testFont_keys(String excelFile) throws ParseException { 112 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 113 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 114 | 115 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 116 | parser.set("sheet", "style"); 117 | parser.addColumn("color-text", "string"); 118 | parser.addColumn("color-font", "string").set("column_number", "C").set("value", "cell_font") 119 | .set("attribute_name", Arrays.asList("color", "bold")); 120 | 121 | URL inFile = getClass().getResource(excelFile); 122 | List result = tester.runParser(inFile, parser); 123 | 124 | assertThat(result.size(), is(5)); 125 | check2(result, 0, "red", null, false); 126 | check2(result, 1, "green", 0xff0000L, true); 127 | check2(result, 2, "blue", null, null); 128 | check2(result, 3, "white", null, null); 129 | check2(result, 4, "black", null, null); 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellStyle.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel; 2 | 3 | import static org.hamcrest.CoreMatchers.is; 4 | import static org.hamcrest.MatcherAssert.assertThat; 5 | import static org.junit.Assert.fail; 6 | 7 | import java.net.URL; 8 | import java.text.ParseException; 9 | import java.util.Arrays; 10 | import java.util.List; 11 | 12 | import org.apache.poi.ss.usermodel.BorderStyle; 13 | import org.embulk.parser.EmbulkPluginTester; 14 | import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord; 15 | import org.embulk.parser.EmbulkTestParserConfig; 16 | import org.junit.experimental.theories.DataPoints; 17 | import org.junit.experimental.theories.Theories; 18 | import org.junit.experimental.theories.Theory; 19 | import org.junit.runner.RunWith; 20 | 21 | @RunWith(Theories.class) 22 | public class TestPoiExcelParserPlugin_cellStyle { 23 | 24 | @DataPoints 25 | public static String[] FILES = { "test1.xls", "test2.xlsx" }; 26 | 27 | @Theory 28 | public void testStyle_key(String excelFile) throws ParseException { 29 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 30 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 31 | 32 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 33 | parser.set("sheet", "style"); 34 | parser.addColumn("color-text", "string"); 35 | parser.addColumn("color", "string").set("value", "cell_style.fill_foreground_color"); 36 | parser.addColumn("border-text", "string"); 37 | parser.addColumn("border-top", "long").set("value", "cell_style.border_top"); 38 | parser.addColumn("border-bottom", "long").set("value", "cell_style.border_bottom"); 39 | parser.addColumn("border-left", "long").set("value", "cell_style.border_left"); 40 | parser.addColumn("border-right", "long").set("value", "cell_style.border_right"); 41 | parser.addColumn("border-all", "long").set("value", "cell_style.border"); 42 | 43 | URL inFile = getClass().getResource(excelFile); 44 | List result = tester.runParser(inFile, parser); 45 | 46 | assertThat(result.size(), is(5)); 47 | check1(result, 0, "red", 255, 0, 0, "top", BorderStyle.THIN.getCode(), 0, 0, 0); 48 | check1(result, 1, "green", 0, 128, 0, null, 0, 0, 0, 0); 49 | check1(result, 2, "blue", 0, 0, 255, "left", 0, 0, BorderStyle.THIN.getCode(), 0); 50 | check1(result, 3, "white", 255, 255, 255, "right", 0, 0, 0, BorderStyle.THIN.getCode()); 51 | check1(result, 4, "black", 0, 0, 0, "bottom", 0, BorderStyle.MEDIUM.getCode(), 0, 0); 52 | } 53 | } 54 | 55 | private void check1(List result, int index, String colorText, int r, int g, int b, String borderText, 56 | long top, long bottom, long left, long right) { 57 | OutputRecord record = result.get(index); 58 | // System.out.println(record); 59 | assertThat(record.getAsString("color-text"), is(colorText)); 60 | assertThat(record.getAsString("color"), is(String.format("%02x%02x%02x", r, g, b))); 61 | assertThat(record.getAsString("border-text"), is(borderText)); 62 | assertThat(record.getAsLong("border-top"), is(top)); 63 | assertThat(record.getAsLong("border-bottom"), is(bottom)); 64 | assertThat(record.getAsLong("border-left"), is(left)); 65 | assertThat(record.getAsLong("border-right"), is(right)); 66 | assertThat(record.getAsLong("border-all"), is(top << 24 | bottom << 16 | left << 8 | right)); 67 | } 68 | 69 | @Theory 70 | public void testStyle_all(String excelFile) throws ParseException { 71 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 72 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 73 | 74 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 75 | parser.set("sheet", "style"); 76 | parser.addColumn("color-text", "string"); 77 | parser.addColumn("color-style", "string").set("column_number", "A").set("value", "cell_style"); 78 | parser.addColumn("border-style", "string").set("column_number", "B").set("value", "cell_style"); 79 | 80 | URL inFile = getClass().getResource(excelFile); 81 | List result = tester.runParser(inFile, parser); 82 | 83 | assertThat(result.size(), is(5)); 84 | check2(result, 0, "red", 255, 0, 0, "top", BorderStyle.THIN.getCode(), 0, 0, 0); 85 | check2(result, 1, "green", 0, 128, 0, null, 0, 0, 0, 0); 86 | check2(result, 2, "blue", 0, 0, 255, "left", 0, 0, BorderStyle.THIN.getCode(), 0); 87 | check2(result, 3, "white", 255, 255, 255, "right", 0, 0, 0, BorderStyle.THIN.getCode()); 88 | check2(result, 4, "black", 0, 0, 0, "bottom", 0, BorderStyle.MEDIUM.getCode(), 0, 0); 89 | } 90 | } 91 | 92 | private void check2(List result, int index, String colorText, int r, int g, int b, String borderText, 93 | long top, long bottom, long left, long right) { 94 | OutputRecord record = result.get(index); 95 | // System.out.println(record); 96 | assertThat(record.getAsString("color-text"), is(colorText)); 97 | String color = record.getAsString("color-style"); 98 | if (!color.contains(String.format("\"fill_foreground_color\":\"%02x%02x%02x\"", r, g, b))) { 99 | fail(color); 100 | } 101 | String border = record.getAsString("border-style"); 102 | if (!border.contains(String.format("\"border_top\":%d", top))) { 103 | fail(border); 104 | } 105 | if (!border.contains(String.format("\"border_bottom\":%d", bottom))) { 106 | fail(border); 107 | } 108 | if (!border.contains(String.format("\"border_left\":%d", left))) { 109 | fail(border); 110 | } 111 | if (!border.contains(String.format("\"border_right\":%d", right))) { 112 | fail(border); 113 | } 114 | } 115 | 116 | @Theory 117 | public void testStyle_keys(String excelFile) throws ParseException { 118 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 119 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 120 | 121 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 122 | parser.set("sheet", "style"); 123 | parser.addColumn("color-text", "string"); 124 | parser.addColumn("color-style", "string").set("column_number", "A").set("value", "cell_style") 125 | .set("cell_style_name", Arrays.asList("fill_foreground_color")); 126 | parser.addColumn("border-style", "string").set("column_number", "B").set("value", "cell_style") 127 | .set("attribute_name", Arrays.asList("border_top", "border_bottom", "border_left", "border_right")); 128 | 129 | URL inFile = getClass().getResource(excelFile); 130 | List result = tester.runParser(inFile, parser); 131 | 132 | assertThat(result.size(), is(5)); 133 | check2(result, 0, "red", 255, 0, 0, "top", BorderStyle.THIN.getCode(), 0, 0, 0); 134 | check2(result, 1, "green", 0, 128, 0, null, 0, 0, 0, 0); 135 | check2(result, 2, "blue", 0, 0, 255, "left", 0, 0, BorderStyle.THIN.getCode(), 0); 136 | check2(result, 3, "white", 255, 255, 255, "right", 0, 0, 0, BorderStyle.THIN.getCode()); 137 | check2(result, 4, "black", 0, 0, 0, "bottom", 0, BorderStyle.MEDIUM.getCode(), 0, 0); 138 | } 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_cellType.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel; 2 | 3 | import static org.hamcrest.CoreMatchers.is; 4 | import static org.hamcrest.MatcherAssert.assertThat; 5 | 6 | import java.net.URL; 7 | import java.text.ParseException; 8 | import java.util.Arrays; 9 | import java.util.List; 10 | 11 | import org.apache.poi.ss.usermodel.CellType; 12 | import org.embulk.parser.EmbulkPluginTester; 13 | import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord; 14 | import org.embulk.parser.EmbulkTestParserConfig; 15 | import org.junit.experimental.theories.DataPoints; 16 | import org.junit.experimental.theories.Theories; 17 | import org.junit.experimental.theories.Theory; 18 | import org.junit.runner.RunWith; 19 | 20 | @RunWith(Theories.class) 21 | public class TestPoiExcelParserPlugin_cellType { 22 | 23 | @DataPoints 24 | public static String[] FILES = { "test1.xls", "test2.xlsx" }; 25 | 26 | @Theory 27 | public void testCellType(String excelFile) throws ParseException { 28 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 29 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 30 | 31 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 32 | parser.set("sheets", Arrays.asList("test1")); 33 | parser.set("skip_header_lines", 3); 34 | parser.addColumn("long", "long").set("column_number", "A").set("value", "cell_type"); 35 | parser.addColumn("string", "string").set("column_number", "A").set("value", "cell_type"); 36 | 37 | URL inFile = getClass().getResource(excelFile); 38 | List result = tester.runParser(inFile, parser); 39 | 40 | assertThat(result.size(), is(5)); 41 | check1(result, 0, CellType.NUMERIC, "NUMERIC"); 42 | check1(result, 1, CellType.STRING, "STRING"); 43 | check1(result, 2, CellType.FORMULA, "FORMULA"); 44 | check1(result, 3, CellType.BOOLEAN, "BOOLEAN"); 45 | check1(result, 4, CellType.FORMULA, "FORMULA"); 46 | } 47 | } 48 | 49 | @Theory 50 | public void testCellCachedType(String excelFile) throws ParseException { 51 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 52 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 53 | 54 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 55 | parser.set("sheets", Arrays.asList("test1")); 56 | parser.set("skip_header_lines", 3); 57 | parser.addColumn("long", "long").set("column_number", "A").set("value", "cell_cached_type"); 58 | parser.addColumn("string", "string").set("column_number", "A").set("value", "cell_cached_type"); 59 | 60 | URL inFile = getClass().getResource(excelFile); 61 | List result = tester.runParser(inFile, parser); 62 | 63 | assertThat(result.size(), is(5)); 64 | check1(result, 0, CellType.NUMERIC, "NUMERIC"); 65 | check1(result, 1, CellType.STRING, "STRING"); 66 | check1(result, 2, CellType.BOOLEAN, "BOOLEAN"); 67 | check1(result, 3, CellType.BOOLEAN, "BOOLEAN"); 68 | check1(result, 4, CellType.ERROR, "ERROR"); 69 | } 70 | } 71 | 72 | @SuppressWarnings("deprecation") 73 | private void check1(List result, int index, CellType cellType, String s) throws ParseException { 74 | OutputRecord r = result.get(index); 75 | // System.out.println(r); 76 | assertThat(r.getAsLong("long"), is((long) cellType.getCode())); 77 | assertThat(r.getAsString("string"), is(s)); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_columnNumber.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel; 2 | 3 | import static org.hamcrest.CoreMatchers.is; 4 | import static org.hamcrest.CoreMatchers.nullValue; 5 | import static org.hamcrest.MatcherAssert.assertThat; 6 | 7 | import java.net.URL; 8 | import java.text.ParseException; 9 | import java.util.List; 10 | 11 | import org.embulk.parser.EmbulkPluginTester; 12 | import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord; 13 | import org.embulk.parser.EmbulkTestParserConfig; 14 | import org.junit.experimental.theories.DataPoints; 15 | import org.junit.experimental.theories.Theories; 16 | import org.junit.experimental.theories.Theory; 17 | import org.junit.runner.RunWith; 18 | 19 | @RunWith(Theories.class) 20 | public class TestPoiExcelParserPlugin_columnNumber { 21 | 22 | @DataPoints 23 | public static String[] FILES = { "test1.xls", "test2.xlsx" }; 24 | 25 | @Theory 26 | public void testColumnNumber_string(String excelFile) throws Exception { 27 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 28 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 29 | 30 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 31 | parser.set("sheet", "test1"); 32 | parser.set("skip_header_lines", 1); 33 | parser.addColumn("text", "string").set("column_number", "D"); 34 | 35 | URL inFile = getClass().getResource(excelFile); 36 | List result = tester.runParser(inFile, parser); 37 | 38 | assertThat(result.size(), is(7)); 39 | assertThat(result.get(0).getAsString("text"), is("abc")); 40 | assertThat(result.get(1).getAsString("text"), is("def")); 41 | assertThat(result.get(2).getAsString("text"), is("456")); 42 | assertThat(result.get(3).getAsString("text"), is("abc")); 43 | assertThat(result.get(4).getAsString("text"), is("abc")); 44 | assertThat(result.get(5).getAsString("text"), is("true")); 45 | assertThat(result.get(6).getAsString("text"), is(nullValue())); 46 | } 47 | } 48 | 49 | @Theory 50 | public void testColumnNumber_int(String excelFile) throws Exception { 51 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 52 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 53 | 54 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 55 | parser.set("sheet", "test1"); 56 | parser.set("skip_header_lines", 1); 57 | parser.addColumn("long", "long").set("column_number", 2); 58 | parser.addColumn("double", "double"); 59 | 60 | URL inFile = getClass().getResource(excelFile); 61 | List result = tester.runParser(inFile, parser); 62 | 63 | assertThat(result.size(), is(7)); 64 | check_int(result, 0, 123L, 123.4d); 65 | check_int(result, 1, 456L, 456.7d); 66 | check_int(result, 2, 123L, 123d); 67 | check_int(result, 3, 123L, 123.4d); 68 | check_int(result, 4, 123L, 123.4d); 69 | check_int(result, 5, 1L, 1d); 70 | check_int(result, 6, null, null); 71 | } 72 | } 73 | 74 | private void check_int(List result, int index, Long l, Double d) throws ParseException { 75 | OutputRecord r = result.get(index); 76 | // System.out.println(r); 77 | assertThat(r.getAsLong("long"), is(l)); 78 | assertThat(r.getAsDouble("double"), is(d)); 79 | } 80 | 81 | @Theory 82 | public void testColumnNumber_move(String excelFile) throws Exception { 83 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 84 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 85 | 86 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 87 | parser.set("sheet", "test1"); 88 | parser.set("skip_header_lines", 1); 89 | parser.addColumn("long1", "long").set("column_number", 2); 90 | parser.addColumn("long2", "long").set("column_number", "="); 91 | parser.addColumn("double1", "double").set("column_number", "+"); 92 | parser.addColumn("double2", "double").set("column_number", "="); 93 | parser.addColumn("long3", "long").set("column_number", "-"); 94 | 95 | URL inFile = getClass().getResource(excelFile); 96 | List result = tester.runParser(inFile, parser); 97 | 98 | assertThat(result.size(), is(7)); 99 | check_move(result, 0, 123L, 123.4d); 100 | check_move(result, 1, 456L, 456.7d); 101 | check_move(result, 2, 123L, 123d); 102 | check_move(result, 3, 123L, 123.4d); 103 | check_move(result, 4, 123L, 123.4d); 104 | check_move(result, 5, 1L, 1d); 105 | check_move(result, 6, null, null); 106 | } 107 | } 108 | 109 | private void check_move(List result, int index, Long l, Double d) throws ParseException { 110 | OutputRecord r = result.get(index); 111 | // System.out.println(r); 112 | assertThat(r.getAsLong("long1"), is(l)); 113 | assertThat(r.getAsLong("long2"), is(l)); 114 | assertThat(r.getAsLong("long3"), is(l)); 115 | assertThat(r.getAsDouble("double1"), is(d)); 116 | assertThat(r.getAsDouble("double2"), is(d)); 117 | } 118 | 119 | @Theory 120 | public void testColumnNumber_move2(String excelFile) throws Exception { 121 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 122 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 123 | 124 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 125 | parser.set("sheet", "test1"); 126 | parser.set("skip_header_lines", 1); 127 | parser.addColumn("long1", "long").set("column_number", 2); 128 | parser.addColumn("string1", "string").set("column_number", "+2"); 129 | parser.addColumn("long2", "long").set("column_number", "=long1"); 130 | parser.addColumn("string2", "string").set("column_number", "=string1"); 131 | parser.addColumn("long3", "long").set("column_number", "-2"); 132 | 133 | URL inFile = getClass().getResource(excelFile); 134 | List result = tester.runParser(inFile, parser); 135 | 136 | assertThat(result.size(), is(7)); 137 | check_move2(result, 0, 123L, "abc"); 138 | check_move2(result, 1, 456L, "def"); 139 | check_move2(result, 2, 123L, "456"); 140 | check_move2(result, 3, 123L, "abc"); 141 | check_move2(result, 4, 123L, "abc"); 142 | check_move2(result, 5, 1L, "true"); 143 | check_move2(result, 6, null, null); 144 | } 145 | } 146 | 147 | private void check_move2(List result, int index, Long l, String s) throws ParseException { 148 | OutputRecord r = result.get(index); 149 | // System.out.println(r); 150 | assertThat(r.getAsLong("long1"), is(l)); 151 | assertThat(r.getAsLong("long2"), is(l)); 152 | assertThat(r.getAsLong("long3"), is(l)); 153 | assertThat(r.getAsString("string1"), is(s)); 154 | assertThat(r.getAsString("string2"), is(s)); 155 | } 156 | 157 | @Theory 158 | public void testColumnNumber_moveName(String excelFile) throws Exception { 159 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 160 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 161 | 162 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 163 | parser.set("sheet", "test1"); 164 | parser.set("skip_header_lines", 1); 165 | parser.addColumn("long1", "long").set("column_number", 2); 166 | parser.addColumn("double1", "double").set("column_number", "+long1"); 167 | parser.addColumn("long2", "long").set("column_number", "=long1"); 168 | parser.addColumn("boolean1", "boolean").set("column_number", "-long1"); 169 | 170 | URL inFile = getClass().getResource(excelFile); 171 | List result = tester.runParser(inFile, parser); 172 | 173 | assertThat(result.size(), is(7)); 174 | check_moveName(result, 0, true, 123L, 123.4d); 175 | check_moveName(result, 1, false, 456L, 456.7d); 176 | check_moveName(result, 2, false, 123L, 123d); 177 | check_moveName(result, 3, true, 123L, 123.4d); 178 | check_moveName(result, 4, true, 123L, 123.4d); 179 | check_moveName(result, 5, true, 1L, 1d); 180 | check_moveName(result, 6, null, null, null); 181 | } 182 | } 183 | 184 | private void check_moveName(List result, int index, Boolean b, Long l, Double d) 185 | throws ParseException { 186 | OutputRecord r = result.get(index); 187 | // System.out.println(r); 188 | assertThat(r.getAsLong("long1"), is(l)); 189 | assertThat(r.getAsLong("long2"), is(l)); 190 | assertThat(r.getAsDouble("double1"), is(d)); 191 | assertThat(r.getAsBoolean("boolean1"), is(b)); 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_constant.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel; 2 | 3 | import static org.hamcrest.CoreMatchers.is; 4 | import static org.hamcrest.CoreMatchers.nullValue; 5 | import static org.hamcrest.MatcherAssert.assertThat; 6 | 7 | import java.net.URL; 8 | import java.text.ParseException; 9 | import java.util.List; 10 | 11 | import org.embulk.parser.EmbulkPluginTester; 12 | import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord; 13 | import org.embulk.parser.EmbulkTestParserConfig; 14 | import org.junit.experimental.theories.DataPoints; 15 | import org.junit.experimental.theories.Theories; 16 | import org.junit.experimental.theories.Theory; 17 | import org.junit.runner.RunWith; 18 | 19 | @RunWith(Theories.class) 20 | public class TestPoiExcelParserPlugin_constant { 21 | 22 | @DataPoints 23 | public static String[] FILES = { "test1.xls", "test2.xlsx" }; 24 | 25 | @Theory 26 | public void testConstant(String excelFile) throws Exception { 27 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 28 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 29 | 30 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 31 | parser.set("sheet", "style"); 32 | parser.addColumn("const-s", "string").set("value", "constant.zzz"); 33 | parser.addColumn("const-n", "long").set("value", "constant.-1"); 34 | parser.addColumn("space", "string").set("value", "constant. "); 35 | parser.addColumn("empty", "string").set("value", "constant."); 36 | parser.addColumn("null", "string").set("value", "constant"); 37 | parser.addColumn("cell", "string"); 38 | 39 | URL inFile = getClass().getResource(excelFile); 40 | List result = tester.runParser(inFile, parser); 41 | 42 | assertThat(result.size(), is(5)); 43 | check(result, 0, "red"); 44 | check(result, 1, "green"); 45 | check(result, 2, "blue"); 46 | check(result, 3, "white"); 47 | check(result, 4, "black"); 48 | } 49 | } 50 | 51 | private void check(List result, int index, String s) throws ParseException { 52 | OutputRecord r = result.get(index); 53 | // System.out.println(r); 54 | assertThat(r.getAsString("const-s"), is("zzz")); 55 | assertThat(r.getAsLong("const-n"), is(-1L)); 56 | assertThat(r.getAsString("space"), is(" ")); 57 | assertThat(r.getAsString("empty"), is("")); 58 | assertThat(r.getAsString("null"), is(nullValue())); 59 | assertThat(r.getAsString("cell"), is(s)); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_convertError.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel; 2 | 3 | import static org.hamcrest.CoreMatchers.is; 4 | import static org.hamcrest.CoreMatchers.nullValue; 5 | import static org.hamcrest.MatcherAssert.assertThat; 6 | import static org.junit.Assert.fail; 7 | 8 | import java.net.URL; 9 | import java.text.SimpleDateFormat; 10 | import java.util.List; 11 | 12 | import org.embulk.parser.EmbulkPluginTester; 13 | import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord; 14 | import org.embulk.parser.EmbulkTestParserConfig; 15 | import org.embulk.spi.time.Timestamp; 16 | import org.junit.experimental.theories.DataPoints; 17 | import org.junit.experimental.theories.Theories; 18 | import org.junit.experimental.theories.Theory; 19 | import org.junit.runner.RunWith; 20 | 21 | @RunWith(Theories.class) 22 | public class TestPoiExcelParserPlugin_convertError { 23 | 24 | @DataPoints 25 | public static String[] FILES = { "test1.xls", "test2.xlsx" }; 26 | 27 | @Theory 28 | public void testConvertError_default(String excelFile) throws Exception { 29 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 30 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 31 | 32 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 33 | parser.set("sheet", "style"); 34 | parser.addColumn("t", "timestamp").set("column_number", "A"); 35 | 36 | URL inFile = getClass().getResource(excelFile); 37 | try { 38 | tester.runParser(inFile, parser); 39 | } catch (Exception e) { 40 | Throwable c1 = e.getCause(); 41 | assertThat(c1.getMessage().contains("error at Column"), is(true)); 42 | Throwable c2 = c1.getCause(); 43 | assertThat(c2.getMessage().contains("convert error"), is(true)); 44 | return; // success 45 | } 46 | fail("must throw Exception"); 47 | } 48 | } 49 | 50 | @Theory 51 | public void testConvertError_exception(String excelFile) throws Exception { 52 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 53 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 54 | 55 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 56 | parser.set("sheet", "style"); 57 | parser.set("on_convert_error", "exception"); 58 | parser.addColumn("t", "timestamp").set("column_number", "A"); 59 | 60 | URL inFile = getClass().getResource(excelFile); 61 | try { 62 | tester.runParser(inFile, parser); 63 | } catch (Exception e) { 64 | Throwable c1 = e.getCause(); 65 | assertThat(c1.getMessage().contains("error at Column"), is(true)); 66 | Throwable c2 = c1.getCause(); 67 | assertThat(c2.getMessage().contains("convert error"), is(true)); 68 | return; // success 69 | } 70 | fail("must throw Exception"); 71 | } 72 | } 73 | 74 | @Theory 75 | public void testConvertError_null(String excelFile) throws Exception { 76 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 77 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 78 | 79 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 80 | parser.set("sheet", "comment"); 81 | parser.set("on_convert_error", "constant"); 82 | parser.addColumn("t", "timestamp").set("column_number", "A"); 83 | 84 | URL inFile = getClass().getResource(excelFile); 85 | List result = tester.runParser(inFile, parser); 86 | 87 | assertThat(result.size(), is(2)); 88 | assertThat(result.get(0).getAsTimestamp("t"), is(nullValue())); 89 | assertThat(result.get(1).getAsTimestamp("t"), is(nullValue())); 90 | } 91 | } 92 | 93 | @Theory 94 | public void testConvertError_constant(String excelFile) throws Exception { 95 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 96 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 97 | 98 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 99 | parser.set("sheet", "comment"); 100 | parser.set("on_convert_error", "constant.0"); 101 | parser.addColumn("b", "boolean").set("column_number", "A"); 102 | parser.addColumn("l", "long").set("column_number", "A"); 103 | parser.addColumn("d", "double").set("column_number", "A"); 104 | parser.addColumn("t", "timestamp").set("column_number", "A").set("format", "%Y/%m/%d") 105 | .set("on_convert_error", "constant.2000/1/1"); 106 | 107 | URL inFile = getClass().getResource(excelFile); 108 | List result = tester.runParser(inFile, parser); 109 | 110 | assertThat(result.size(), is(2)); 111 | for (OutputRecord r : result) { 112 | assertThat(r.getAsBoolean("b"), is(false)); 113 | assertThat(r.getAsLong("l"), is(0L)); 114 | assertThat(r.getAsDouble("d"), is(0d)); 115 | assertThat(r.getAsTimestamp("t"), is(Timestamp.ofEpochMilli(new SimpleDateFormat("yyyy/MM/dd z").parse( 116 | "2000/01/01 UTC").getTime()))); 117 | } 118 | } 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_formula.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel; 2 | 3 | import static org.hamcrest.CoreMatchers.is; 4 | import static org.hamcrest.MatcherAssert.assertThat; 5 | 6 | import java.net.URL; 7 | import java.text.ParseException; 8 | import java.util.Arrays; 9 | import java.util.List; 10 | 11 | import org.embulk.config.ConfigSource; 12 | import org.embulk.parser.EmbulkPluginTester; 13 | import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord; 14 | import org.embulk.parser.EmbulkTestParserConfig; 15 | import org.junit.experimental.theories.DataPoints; 16 | import org.junit.experimental.theories.Theories; 17 | import org.junit.experimental.theories.Theory; 18 | import org.junit.runner.RunWith; 19 | 20 | @RunWith(Theories.class) 21 | public class TestPoiExcelParserPlugin_formula { 22 | 23 | @DataPoints 24 | public static String[] FILES = { "test1.xls", "test2.xlsx" }; 25 | 26 | @Theory 27 | public void testForumlaHandlingCashedValue(String excelFile) throws ParseException { 28 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 29 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 30 | 31 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 32 | parser.set("sheet", "formula_replace"); 33 | 34 | parser.addColumn("text", "string").set("formula_handling", "cashed_value"); 35 | 36 | URL inFile = getClass().getResource(excelFile); 37 | List result = tester.runParser(inFile, parser); 38 | 39 | assertThat(result.size(), is(2)); 40 | assertThat(result.get(0).getAsString("text"), is("boolean")); 41 | assertThat(result.get(1).getAsString("text"), is("test2-b1")); 42 | } 43 | } 44 | 45 | @Theory 46 | public void testForumlaHandlingEvaluate(String excelFile) throws ParseException { 47 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 48 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 49 | 50 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 51 | parser.set("sheet", "formula_replace"); 52 | 53 | parser.addColumn("text", "string").set("formula_handling", "evaluate"); 54 | 55 | URL inFile = getClass().getResource(excelFile); 56 | List result = tester.runParser(inFile, parser); 57 | 58 | assertThat(result.size(), is(2)); 59 | assertThat(result.get(0).getAsString("text"), is("boolean")); 60 | assertThat(result.get(1).getAsString("text"), is("test2-b1")); 61 | } 62 | } 63 | 64 | @Theory 65 | public void testForumlaReplace(String excelFile) throws ParseException { 66 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 67 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 68 | 69 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 70 | parser.set("sheet", "formula_replace"); 71 | 72 | ConfigSource replace0 = tester.newConfigSource(); 73 | replace0.set("regex", "test1"); 74 | replace0.set("to", "merged_cell"); 75 | ConfigSource replace1 = tester.newConfigSource(); 76 | replace1.set("regex", "B1"); 77 | replace1.set("to", "B${row}"); 78 | parser.set("formula_replace", Arrays.asList(replace0, replace1)); 79 | 80 | parser.addColumn("text", "string"); 81 | 82 | URL inFile = getClass().getResource(excelFile); 83 | List result = tester.runParser(inFile, parser); 84 | 85 | assertThat(result.size(), is(2)); 86 | assertThat(result.get(0).getAsString("text"), is("test3-a1")); 87 | assertThat(result.get(1).getAsString("text"), is("test2-b2")); 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_mergedCell.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel; 2 | 3 | import static org.hamcrest.CoreMatchers.is; 4 | import static org.hamcrest.MatcherAssert.assertThat; 5 | 6 | import java.net.URL; 7 | import java.text.ParseException; 8 | import java.util.List; 9 | 10 | import org.embulk.parser.EmbulkPluginTester; 11 | import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord; 12 | import org.embulk.parser.EmbulkTestParserConfig; 13 | import org.junit.experimental.theories.DataPoints; 14 | import org.junit.experimental.theories.Theories; 15 | import org.junit.experimental.theories.Theory; 16 | import org.junit.runner.RunWith; 17 | 18 | @RunWith(Theories.class) 19 | public class TestPoiExcelParserPlugin_mergedCell { 20 | 21 | @DataPoints 22 | public static String[] FILES = { "test1.xls", "test2.xlsx" }; 23 | 24 | @Theory 25 | public void testSearchMergedCell_default(String excelFile) throws ParseException { 26 | test(excelFile, null, true); 27 | } 28 | 29 | @Theory 30 | public void testSearchMergedCell_true(String excelFile) throws ParseException { 31 | // compatibility ver 0.1.7 32 | test(excelFile, true, true); 33 | } 34 | 35 | @Theory 36 | public void testSearchMergedCell_false(String excelFile) throws ParseException { 37 | // compatibility ver 0.1.7 38 | test(excelFile, false, false); 39 | } 40 | 41 | @Theory 42 | public void testSearchMergedCell_none(String excelFile) throws ParseException { 43 | test(excelFile, "none", false); 44 | } 45 | 46 | @Theory 47 | public void testSearchMergedCell_linear(String excelFile) throws ParseException { 48 | test(excelFile, "linear_search", true); 49 | } 50 | 51 | @Theory 52 | public void testSearchMergedCell_tree(String excelFile) throws ParseException { 53 | test(excelFile, "tree_search", true); 54 | } 55 | 56 | @Theory 57 | public void testSearchMergedCell_hash(String excelFile) throws ParseException { 58 | test(excelFile, "hash_search", true); 59 | } 60 | 61 | private void test(String excelFile, Object arg, boolean search) { 62 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 63 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 64 | 65 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 66 | parser.set("sheet", "merged_cell"); 67 | if (arg != null) { 68 | parser.set("search_merged_cell", arg); 69 | } 70 | parser.addColumn("a", "string"); 71 | parser.addColumn("b", "string"); 72 | 73 | URL inFile = getClass().getResource(excelFile); 74 | List result = tester.runParser(inFile, parser); 75 | 76 | assertThat(result.size(), is(4)); 77 | if (search) { 78 | check6(result, 0, "test3-a1", "test3-a1"); 79 | } else { 80 | check6(result, 0, "test3-a1", null); 81 | } 82 | check6(result, 1, "data", "0"); 83 | check6(result, 2, null, null); 84 | check6(result, 3, null, null); 85 | } 86 | } 87 | 88 | private void check6(List result, int index, String a, String b) { 89 | OutputRecord r = result.get(index); 90 | // System.out.println(r); 91 | assertThat(r.getAsString("a"), is(a)); 92 | assertThat(r.getAsString("b"), is(b)); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_recordType.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel; 2 | 3 | import static org.hamcrest.CoreMatchers.is; 4 | import static org.hamcrest.MatcherAssert.assertThat; 5 | 6 | import java.net.URL; 7 | import java.text.ParseException; 8 | import java.util.List; 9 | 10 | import org.embulk.parser.EmbulkPluginTester; 11 | import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord; 12 | import org.embulk.parser.EmbulkTestParserConfig; 13 | import org.junit.experimental.theories.DataPoints; 14 | import org.junit.experimental.theories.Theories; 15 | import org.junit.experimental.theories.Theory; 16 | import org.junit.runner.RunWith; 17 | 18 | @RunWith(Theories.class) 19 | public class TestPoiExcelParserPlugin_recordType { 20 | 21 | @DataPoints 22 | public static String[] FILES = { "test1.xls", "test2.xlsx" }; 23 | 24 | @Theory 25 | public void testRecordType_row(String excelFile) throws ParseException { 26 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 27 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 28 | 29 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 30 | parser.set("sheet", "test1"); 31 | parser.set("record_type", "row"); 32 | parser.set("skip_header_lines", 1); 33 | parser.addColumn("text", "string").set("cell_column", "D"); 34 | parser.addColumn("text_row", "long").set("cell_column", "D").set("value", "row_number"); 35 | parser.addColumn("text_col", "long").set("cell_column", "D").set("value", "column_number"); 36 | parser.addColumn("text2", "string").set("cell_row", "4"); 37 | parser.addColumn("text2_row", "long").set("cell_row", "4").set("value", "row_number"); 38 | parser.addColumn("text2_col", "long").set("cell_row", "4").set("value", "column_number"); 39 | parser.addColumn("address1", "string").set("cell_address", "B1").set("value", "cell_value"); 40 | parser.addColumn("address2", "string").set("cell_column", "D").set("cell_row", "2"); 41 | parser.addColumn("fix_row", "long").set("cell_address", "B1").set("value", "row_number"); 42 | parser.addColumn("fix_col", "long").set("cell_address", "B1").set("value", "column_number"); 43 | 44 | URL inFile = getClass().getResource(excelFile); 45 | List result = tester.runParser(inFile, parser); 46 | 47 | assertThat(result.size(), is(7)); 48 | check1(result, 0, "abc"); 49 | check1(result, 1, "def"); 50 | check1(result, 2, "456"); 51 | check1(result, 3, "abc"); 52 | check1(result, 4, "abc"); 53 | check1(result, 5, "true"); 54 | check1(result, 6, null); 55 | } 56 | } 57 | 58 | private void check1(List result, int index, String text) { 59 | OutputRecord record = result.get(index); 60 | // System.out.println(record); 61 | assertThat(record.getAsString("text"), is(text)); 62 | assertThat(record.getAsLong("text_row"), is((long) index + 2)); 63 | assertThat(record.getAsLong("text_col"), is(4L)); 64 | assertThat(record.getAsString("text2"), is("42283")); 65 | assertThat(record.getAsLong("text2_row"), is(4L)); 66 | assertThat(record.getAsLong("text2_col"), is(5L)); 67 | assertThat(record.getAsString("address1"), is("long")); 68 | assertThat(record.getAsString("address2"), is("abc")); 69 | assertThat(record.getAsLong("fix_row"), is(1L)); 70 | assertThat(record.getAsLong("fix_col"), is(2L)); 71 | } 72 | 73 | @Theory 74 | public void testRecordType_column0(String excelFile) throws ParseException { 75 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 76 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 77 | 78 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 79 | parser.set("sheet", "test1"); 80 | parser.set("record_type", "column"); 81 | // parser.set("skip_header_lines", 0); 82 | parser.addColumn("text", "string").set("cell_row", "5"); 83 | parser.addColumn("text_row", "long").set("cell_row", "5").set("value", "row_number"); 84 | parser.addColumn("text_col", "long").set("cell_row", "5").set("value", "column_number"); 85 | parser.addColumn("text2", "string").set("cell_column", "D"); 86 | parser.addColumn("text2_row", "long").set("cell_column", "D").set("value", "row_number"); 87 | parser.addColumn("text2_col", "long").set("cell_column", "D").set("value", "column_number"); 88 | parser.addColumn("address1", "string").set("cell_address", "B1").set("value", "cell_value"); 89 | parser.addColumn("address2", "string").set("cell_column", "D").set("cell_row", "2"); 90 | parser.addColumn("fix_row", "long").set("cell_address", "B1").set("value", "row_number"); 91 | parser.addColumn("fix_col", "long").set("cell_address", "B1").set("value", "column_number"); 92 | 93 | URL inFile = getClass().getResource(excelFile); 94 | List result = tester.runParser(inFile, parser); 95 | 96 | assertThat(result.size(), is(7)); 97 | int z = 1; 98 | check2(result, 0, z, "true"); 99 | check2(result, 1, z, "123"); 100 | check2(result, 2, z, "123.4"); 101 | check2(result, 3, z, "abc"); 102 | check2(result, 4, z, "2015/10/07"); 103 | check2(result, 5, z, null); 104 | check2(result, 6, z, "CELL_TYPE_STRING"); 105 | } 106 | } 107 | 108 | @Theory 109 | public void testRecordType_column1(String excelFile) throws ParseException { 110 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 111 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 112 | 113 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 114 | parser.set("sheet", "test1"); 115 | parser.set("record_type", "column"); 116 | parser.set("skip_header_lines", 1); 117 | parser.addColumn("text", "string").set("cell_row", "5"); 118 | parser.addColumn("text_row", "long").set("cell_row", "5").set("value", "row_number"); 119 | parser.addColumn("text_col", "long").set("cell_row", "5").set("value", "column_number"); 120 | parser.addColumn("text2", "string").set("cell_column", "D"); 121 | parser.addColumn("text2_row", "long").set("cell_column", "D").set("value", "row_number"); 122 | parser.addColumn("text2_col", "long").set("cell_column", "D").set("value", "column_number"); 123 | parser.addColumn("address1", "string").set("cell_address", "B1").set("value", "cell_value"); 124 | parser.addColumn("address2", "string").set("cell_column", "D").set("cell_row", "2"); 125 | parser.addColumn("fix_row", "long").set("cell_address", "B1").set("value", "row_number"); 126 | parser.addColumn("fix_col", "long").set("cell_address", "B1").set("value", "column_number"); 127 | 128 | URL inFile = getClass().getResource(excelFile); 129 | List result = tester.runParser(inFile, parser); 130 | 131 | assertThat(result.size(), is(6)); 132 | int z = 2; 133 | check2(result, 0, z, "123"); 134 | check2(result, 1, z, "123.4"); 135 | check2(result, 2, z, "abc"); 136 | check2(result, 3, z, "2015/10/07"); 137 | check2(result, 4, z, null); 138 | check2(result, 5, z, "CELL_TYPE_STRING"); 139 | } 140 | } 141 | 142 | private void check2(List result, int index, int z, String text) { 143 | OutputRecord record = result.get(index); 144 | // System.out.println(record); 145 | assertThat(record.getAsString("text"), is(text)); 146 | assertThat(record.getAsLong("text_row"), is(5L)); 147 | assertThat(record.getAsLong("text_col"), is((long) index + z)); 148 | assertThat(record.getAsString("text2"), is("abc")); 149 | assertThat(record.getAsLong("text2_row"), is(6L)); 150 | assertThat(record.getAsLong("text2_col"), is(4L)); 151 | assertThat(record.getAsString("address1"), is("long")); 152 | assertThat(record.getAsString("address2"), is("abc")); 153 | assertThat(record.getAsLong("fix_row"), is(1L)); 154 | assertThat(record.getAsLong("fix_col"), is(2L)); 155 | } 156 | 157 | @Theory 158 | public void testRecordType_sheet(String excelFile) throws ParseException { 159 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 160 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 161 | 162 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 163 | parser.set("sheet", "style"); 164 | parser.set("record_type", "sheet"); 165 | parser.addColumn("text", "string"); 166 | parser.addColumn("text_row", "long").set("cell_row", "5").set("value", "row_number"); 167 | parser.addColumn("text_col", "long").set("cell_column", "6").set("value", "column_number"); 168 | parser.addColumn("address1", "string").set("cell_address", "B1").set("value", "cell_value"); 169 | parser.addColumn("address2", "string").set("cell_column", "A").set("cell_row", "4"); 170 | parser.addColumn("fix_row", "long").set("cell_address", "B1").set("value", "row_number"); 171 | parser.addColumn("fix_col", "long").set("cell_address", "B1").set("value", "column_number"); 172 | 173 | URL inFile = getClass().getResource(excelFile); 174 | List result = tester.runParser(inFile, parser); 175 | 176 | assertThat(result.size(), is(1)); 177 | check3(result, 0, "red"); 178 | } 179 | } 180 | 181 | private void check3(List result, int index, String text) { 182 | OutputRecord record = result.get(index); 183 | // System.out.println(record); 184 | assertThat(record.getAsString("text"), is(text)); 185 | assertThat(record.getAsLong("text_row"), is(5L)); 186 | assertThat(record.getAsLong("text_col"), is(6L)); 187 | assertThat(record.getAsString("address1"), is("top")); 188 | assertThat(record.getAsString("address2"), is("white")); 189 | assertThat(record.getAsLong("fix_row"), is(1L)); 190 | assertThat(record.getAsLong("fix_col"), is(2L)); 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/parser/poi_excel/TestPoiExcelParserPlugin_sheets.java: -------------------------------------------------------------------------------- 1 | package org.embulk.parser.poi_excel; 2 | 3 | import static org.hamcrest.CoreMatchers.is; 4 | import static org.hamcrest.MatcherAssert.assertThat; 5 | 6 | import java.net.URL; 7 | import java.text.ParseException; 8 | import java.util.Arrays; 9 | import java.util.HashMap; 10 | import java.util.List; 11 | import java.util.Map; 12 | 13 | import org.embulk.parser.EmbulkPluginTester; 14 | import org.embulk.parser.EmbulkTestOutputPlugin.OutputRecord; 15 | import org.embulk.parser.EmbulkTestParserConfig; 16 | import org.junit.experimental.theories.DataPoints; 17 | import org.junit.experimental.theories.Theories; 18 | import org.junit.experimental.theories.Theory; 19 | import org.junit.runner.RunWith; 20 | 21 | @RunWith(Theories.class) 22 | public class TestPoiExcelParserPlugin_sheets { 23 | 24 | @DataPoints 25 | public static String[] FILES = { "test1.xls", "test2.xlsx" }; 26 | 27 | @Theory 28 | public void testSheets(String excelFile) throws ParseException { 29 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 30 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 31 | 32 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 33 | parser.set("sheets", Arrays.asList("test1", "formula_replace", "style")); 34 | parser.addColumn("text", "string"); 35 | parser.addColumn("number", "long"); 36 | 37 | Map sheetOptions = new HashMap<>(); 38 | { 39 | Map sheet = new HashMap<>(); 40 | sheet.put("skip_header_lines", "5"); 41 | Map columns = new HashMap<>(); 42 | columns.put("text", newMap("column_number", "D")); 43 | columns.put("number", newMap("column_number", "B")); 44 | sheet.put("columns", columns); 45 | sheetOptions.put("test1", sheet); 46 | } 47 | { 48 | Map sheet = new HashMap<>(); 49 | Map columns = new HashMap<>(); 50 | columns.put("number", newMap("value", "constant.0")); 51 | sheet.put("columns", columns); 52 | sheetOptions.put("formula_replace", sheet); 53 | } 54 | { 55 | Map sheet = new HashMap<>(); 56 | sheet.put("skip_header_lines", "2"); 57 | Map columns = new HashMap<>(); 58 | columns.put("text", newMap("column_number", "B")); 59 | columns.put("number", newMap("value", "constant.-1")); 60 | sheet.put("columns", columns); 61 | sheetOptions.put("style", sheet); 62 | } 63 | parser.set("sheet_options", sheetOptions); 64 | 65 | URL inFile = getClass().getResource(excelFile); 66 | List result = tester.runParser(inFile, parser); 67 | 68 | assertThat(result.size(), is(8)); 69 | check1(result, 0, "abc", 123L); 70 | check1(result, 1, "true", 1L); 71 | check1(result, 2, null, null); 72 | check1(result, 3, "boolean", 0L); 73 | check1(result, 4, "test2-b1", 0L); 74 | check1(result, 5, "left", -1L); 75 | check1(result, 6, "right", -1L); 76 | check1(result, 7, "bottom", -1L); 77 | } 78 | } 79 | 80 | private Map newMap(String key, Object value) { 81 | Map map = new HashMap<>(); 82 | map.put(key, value); 83 | return map; 84 | } 85 | 86 | private void check1(List result, int index, String text, Long number) { 87 | OutputRecord record = result.get(index); 88 | // System.out.println(record); 89 | assertThat(record.getAsString("text"), is(text)); 90 | assertThat(record.getAsLong("number"), is(number)); 91 | } 92 | 93 | @Theory 94 | public void testResolveSheetName1(String excelFile) throws ParseException { 95 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 96 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 97 | 98 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 99 | parser.set("sheets", Arrays.asList("*es*")); 100 | parser.addColumn("name", "string").set("value", "sheet_name"); 101 | 102 | URL inFile = getClass().getResource(excelFile); 103 | List result = tester.runParser(inFile, parser); 104 | 105 | OutputRecord record = result.get(0); 106 | assertThat(record.getAsString("name"), is("test1")); 107 | } 108 | } 109 | 110 | @Theory 111 | public void testResolveSheetName2(String excelFile) throws ParseException { 112 | try (EmbulkPluginTester tester = new EmbulkPluginTester()) { 113 | tester.addParserPlugin(PoiExcelParserPlugin.TYPE, PoiExcelParserPlugin.class); 114 | 115 | EmbulkTestParserConfig parser = tester.newParserConfig(PoiExcelParserPlugin.TYPE); 116 | parser.set("sheets", Arrays.asList("test?")); 117 | parser.addColumn("name", "string").set("value", "sheet_name"); 118 | 119 | URL inFile = getClass().getResource(excelFile); 120 | List result = tester.runParser(inFile, parser); 121 | 122 | OutputRecord record = result.get(0); 123 | assertThat(record.getAsString("name"), is("test1")); 124 | } 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/test/resources/org/embulk/parser/poi_excel/test1.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hishidama/embulk-parser-poi_excel/2a279661543f0552b285ac37467b1c39d7f81cf6/src/test/resources/org/embulk/parser/poi_excel/test1.xls -------------------------------------------------------------------------------- /src/test/resources/org/embulk/parser/poi_excel/test2.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hishidama/embulk-parser-poi_excel/2a279661543f0552b285ac37467b1c39d7f81cf6/src/test/resources/org/embulk/parser/poi_excel/test2.xlsx --------------------------------------------------------------------------------