├── .circleci └── config.yml ├── .github ├── CODEOWNERS ├── dependabot.yml └── workflows │ ├── build.yml │ └── codeql.yml ├── .gitignore ├── CHANGELOG.md ├── README.md ├── build.gradle ├── config └── checkstyle │ └── checkstyle.xml ├── gradle ├── dependency-locks │ └── embulkPluginRuntime.lockfile └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── settings.gradle └── src ├── main └── java │ └── org │ └── embulk │ └── output │ └── td │ ├── FinalizableExecutorService.java │ ├── MsgpackGZFileBuilder.java │ ├── RecordWriter.java │ ├── TdOutputPlugin.java │ ├── TimeValueConfig.java │ ├── TimeValueGenerator.java │ └── writer │ ├── ArrayFieldWriter.java │ ├── BooleanFieldWriter.java │ ├── DoubleFieldWriter.java │ ├── FieldWriter.java │ ├── FieldWriterSet.java │ ├── IFieldWriter.java │ ├── JsonFieldWriter.java │ ├── LongFieldWriter.java │ ├── MapFieldWriter.java │ ├── StringFieldWriter.java │ ├── TimestampFieldLongDuplicator.java │ ├── UnixTimestampFieldDuplicator.java │ └── UnixTimestampLongFieldWriter.java └── test └── java └── org └── embulk └── output └── td ├── TestRecordWriter.java ├── TestTdOutputPlugin.java ├── TestTimeValueGenerator.java └── writer ├── TestArrayFieldWriter.java ├── TestBooleanFieldWriter.java ├── TestDoubleFieldWriter.java ├── TestFieldWriterSet.java ├── TestJsonFieldWriter.java ├── TestLongFieldWriter.java ├── TestMapFieldWriter.java ├── TestStringFieldWriter.java ├── TestTimestampFieldLongDuplicator.java ├── TestUnixTimestampFieldDuplicator.java └── TestUnixTimestampLongFieldWriter.java /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | jobs: 3 | build: 4 | docker: 5 | - image: alpine:3.15 6 | steps: 7 | - run: 8 | name: Init Build 9 | command: | 10 | echo 'init CI build' -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @treasure-data/integrations 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 2 3 | updates: 4 | # Enable workflow version updates for GitHub Actions 5 | - package-ecosystem: "github-actions" 6 | directory: "/" 7 | schedule: 8 | interval: "daily" 9 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build and test 2 | 3 | on: push 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v4 12 | 13 | - name: Set up JDK 1.8 14 | uses: actions/setup-java@v4 15 | with: 16 | java-version: 8 17 | distribution: "zulu" 18 | 19 | - name: Grant execute permission for gradlew 20 | run: chmod +x gradlew 21 | 22 | - name: Build with testing 23 | run: ./gradlew clean check jacocoTestReport --console verbose 24 | 25 | - uses: actions/upload-artifact@v4 26 | if: always() 27 | with: 28 | name: tests 29 | path: ./build/reports/tests/test 30 | 31 | - uses: actions/upload-artifact@v4 32 | if: success() 33 | with: 34 | name: jacoco 35 | path: ./build/reports/jacoco/test 36 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: "CodeQL" 3 | 4 | on: 5 | schedule: 6 | - cron: '26 19 * * 0' 7 | 8 | jobs: 9 | analyze: 10 | name: Analyze 11 | runs-on: ubuntu-latest 12 | 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | language: [java] 17 | 18 | steps: 19 | - name: Checkout repository 20 | uses: actions/checkout@v4 21 | 22 | - name: Cache gradle repository 23 | uses: actions/cache@v4 24 | with: 25 | path: | 26 | ~/.gradle/caches 27 | ~/.gradle/wrapper 28 | key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} 29 | restore-keys: | 30 | ${{ runner.os }}-gradle- 31 | 32 | - name: Initialize CodeQL 33 | uses: github/codeql-action/init@v3 34 | with: 35 | languages: ${{ matrix.language }} 36 | queries: +security-extended 37 | 38 | - name: build 39 | run: | 40 | ./gradlew build 41 | 42 | - name: Perform CodeQL Analysis 43 | uses: github/codeql-action/analyze@v3 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.iml 3 | .idea 4 | build/ 5 | /classpath/ 6 | /.gradle 7 | /pkg/ 8 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | Look at [Git annotated tags](https://github.com/treasure-data/embulk-output-td/tags). 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TD output plugin for Embulk 2 | 3 | [Treasure Data Service](https://www.treasuredata.com/) output plugin for [Embulk](https://github.com/embulk/embulk) 4 | 5 | **NOTICE**: 6 | * embulk-output-td v0.8.0+ only supports **Embulk v0.9.24**. Embulk v0.9.23 does not work. 7 | * embulk-output-td v0.5.0+ requires Java 1.8 or higher. 8 | * embulk-output-td v0.4.0+ only supports **Embulk v0.8.22+**. 9 | 10 | ## Overview 11 | 12 | * **Plugin type**: output 13 | * **Load all or nothing**: yes 14 | * **Resume supported**: no 15 | 16 | ## Configuration 17 | 18 | - **apikey**: apikey (string, required) 19 | - **endpoint**: hostname (string, default='api.treasuredata.com') 20 | - **http_proxy**: http proxy configuration (tuple of host, port, useSsl, user, and password. default is null) 21 | - **use_ssl**: the flag (boolean, default=true) 22 | - **auto_create_table**: the flag for creating the database and/or the table if they don't exist (boolean, default=true) 23 | - **mode**: 'append', 'replace' and 'truncate' (string, default='append') 24 | - **database**: database name (string, required) 25 | - **table**: table name (string, required) 26 | - **session**: bulk_import session name (string, optional) 27 | - **pool_name**: bulk_import session pool name (string, optional) 28 | - **time_column**: user-defined time column (string, optional) 29 | - **unix_timestamp_unit**: if type of "time" or **time_column** is long, it's considered unix timestamp. This option specify its unit in sec, milli, micro or nano (enum, default: `sec`) 30 | - **tmpdir**: temporal directory (string, optional) if set to null, plugin will use directory that could get from System.property 31 | - **upload_concurrency**: upload concurrency (int, default=2). max concurrency is 8. 32 | - **file_split_size**: split size (long, default=16384 (16MB)). 33 | - **stop_on_invalid_record**: stop bulk load transaction if a file includes invalid record (such as invalid timestamp) (boolean, default=false). 34 | - **displayed_error_records_count_limit**: limit the count of the shown error records skipped by the perform job (int, default=10). 35 | - **default_timestamp_type_convert_to**: configure output type of timestamp columns. Available options are "sec" (convert timestamp to UNIX timestamp in seconds) and "string" (convert timestamp to string). (string, default: `"string"`) 36 | - **default_timezone**: default timezone (string, default='UTC') 37 | - **default_timestamp_format**: default timestamp format (string, default=`%Y-%m-%d %H:%M:%S.%6N`) 38 | - **column_options**: advanced: a key-value pairs where key is a column name and value is options for the column. 39 | - **type**: The type of column when this plugin adds a new column to a TD's table (e.g. `array`). Available options are: `int`, `long`, `float`, `double`, `string`, `array`, `array`, `array`, `array`, `array>`. More information can be found: https://tddocs.atlassian.net/wiki/spaces/PD/pages/1083743/Schema+Management. (string, optional) 40 | - **value_type**: This plugin converts Embulk input data type to msgpack data type that is uploaded to TD. This option controls the msgpack data type which Embulk data in the column is converted to. Available options are: `boolean`, `long`, `double`, `string`, `timestamp`, `array`, `map`. (string, optional) 41 | - **timezone**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp value into a SQL string. In this cases, this timezone option is used to control the timezone. (string, value of default_timezone option is used by default) 42 | - **format**: If input column type (embulk type) is timestamp, this plugin needs to format the timestamp value into a string. This timestamp_format option is used to control the format of the timestamp. (string, value of default_timestamp_format option is used by default) 43 | - **retry_limit**: indicates how many retries are allowed (int, default: 20) 44 | - **retry_initial_interval_millis**: the initial intervals (int, default: 1000) 45 | - **retry_max_interval_millis**: the maximum intervals. The interval doubles every retry until retry_max_interval_millis is reached. (int, default: 90000) 46 | - **additional_http_headers**: add additional headers to the requests (a key & value map, default: null) 47 | - **port**: set port for Http requests. By default will connect to port 443 or 80 if `use_ssl: false` (int, optional) 48 | - **ignore_alternative_time_if_time_exists**: ignore `time_column` and `time_value` in the configuration if a `time` column exists in the input schema. (boolean, default: false) 49 | - **default_boolean_type_convert_to**: configure output TD's type from Embulk's BOOLEAN columns. Available options are "long" (convert Embulk's BOOLEAN to TD's long) and "string" (convert Embulk's BOOLEAN to TD's string). (string, default: `"long"`) 50 | 51 | ## Modes 52 | * **append**: 53 | - Uploads data to existing table directly. 54 | * **replace**: 55 | - Creates new temp table and uploads data to the temp table first. 56 | - After uploading finished, the table specified as 'table' option is replaced with the temp table. 57 | - Schema in existing table is not migrated to the replaced table. 58 | * **truncate**: 59 | - Creates new temp table and uploads data to the temp table first. 60 | - After uploading finished, the table specified as 'table' option is replaced with the temp table. 61 | - Schema in existing table is added to the replaced table. 62 | 63 | ## Example 64 | Here is sample configuration for TD output plugin. 65 | ```yaml 66 | out: 67 | type: td 68 | apikey: 69 | endpoint: api.treasuredata.com 70 | database: my_db 71 | table: my_table 72 | time_column: created_at 73 | auto_create_table: true 74 | mode: append 75 | ``` 76 | 77 | ### Http Proxy Configuration 78 | If you want to add your Http Proxy configuration, you can use `http_proxy` parameter: 79 | ```yaml 80 | out: 81 | type: td 82 | apikey: 83 | endpoint: api.treasuredata.com 84 | http_proxy: {host: localhost, port: 8080, use_ssl: false, user: "proxyuser", password: "PASSWORD"} 85 | database: my_db 86 | table: my_table 87 | time_column: created_at 88 | auto_create_table: true 89 | mode: append 90 | ``` 91 | 92 | ### Additional Http headers 93 | ```yaml 94 | out: 95 | type: td 96 | apikey: 97 | endpoint: api.treasuredata.com 98 | database: my_db 99 | table: my_table 100 | time_column: created_at 101 | auto_create_table: true 102 | mode: append 103 | additional_http_headers: 104 | Content_Type: 'application/json' 105 | foo: bar 106 | ``` 107 | 108 | ### Column options 109 | ```yaml 110 | out: 111 | type: td 112 | apikey: 113 | endpoint: api.treasuredata.com 114 | database: my_db 115 | table: my_table 116 | time_column: created_at 117 | auto_create_table: true 118 | mode: append 119 | column_options: 120 | col_array: 121 | type: array 122 | value_type: array 123 | col_long: 124 | type: string 125 | value_type: long 126 | col_timestamp: 127 | type: string 128 | value_type: timestamp 129 | timestamp_format: `%Y-%m-%d %H:%M:%S %z` 130 | timezone: '-0700' 131 | ``` 132 | 133 | ## Install 134 | 135 | ``` 136 | $ embulk gem install embulk-output-td 137 | ``` 138 | 139 | ## Build 140 | 141 | ### Build by Gradle 142 | ``` 143 | $ git clone https://github.com/treasure-data/embulk-output-td.git 144 | $ cd embulk-output-td 145 | $ ./gradlew gem classpath 146 | ``` 147 | 148 | ### Run on Embulk 149 | $ bin/embulk run -I embulk-output-td/lib/ config.yml 150 | 151 | ## Release 152 | 153 | ### Upload gem to Rubygems.org 154 | 155 | ``` 156 | $ ./gradlew gem # create .gem file under pkg/ directory 157 | $ ./gradlew gemPush # create and publish .gem file 158 | ``` 159 | 160 | Repo URL: https://rubygems.org/gems/embulk-output-td 161 | 162 | ### Upload jars to Bintray.com 163 | 164 | ``` 165 | $ ./gradlew bintrayUpload 166 | ``` 167 | 168 | Repo URL: https://bintray.com/embulk-output-td/maven/embulk-output-td 169 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id "java" 3 | id "maven-publish" 4 | id "signing" 5 | id "org.embulk.embulk-plugins" version "0.4.2" 6 | id "checkstyle" 7 | id "pmd" 8 | id "jacoco" 9 | } 10 | 11 | repositories { 12 | mavenCentral() 13 | } 14 | 15 | group = "com.treasuredata.embulk.plugins" 16 | version = "0.8.2-SNAPSHOT" 17 | description = "TreasureData output plugin is an Embulk plugin that loads records to Treasure Data read by any input plugins." 18 | 19 | sourceCompatibility = 1.8 20 | targetCompatibility = 1.8 21 | 22 | tasks.withType(JavaCompile) { 23 | options.encoding = "UTF-8" 24 | options.compilerArgs << "-Xlint:deprecation" << "-Xlint:unchecked" 25 | } 26 | 27 | java { 28 | withJavadocJar() 29 | withSourcesJar() 30 | } 31 | 32 | dependencies { 33 | compileOnly "org.embulk:embulk-api:0.10.34" 34 | compileOnly "org.embulk:embulk-spi:0.10.34" 35 | 36 | compile("com.treasuredata.client:td-client:0.9.5") { 37 | // td-client:0.9.5 transitively depends on Jackson 2.9.10 and Guava 24.1.1. 38 | // 39 | // They could conflict with Jackson 2.6.7 and Guava 18.0 in embulk-core. 40 | // It has been a known potential problem. It has been working so far, fortunately. 41 | // 42 | // The conflict has been accepted now proved by time. This plugin will be able to start using "compliant" Jackson and Guava 43 | // once Embulk removes Jackson (in v0.10.32) and Guava (planned for in v0.10.35+) from embulk-core. 44 | 45 | // Jackson 2.6.7 is included in embulk-core's dependencies before Embulk v0.10.32. 46 | // They are once excluded here, and added independently with explicit versions specified. 47 | exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations" 48 | exclude group: "com.fasterxml.jackson.core", module: "jackson-core" 49 | exclude group: "com.fasterxml.jackson.core", module: "jackson-databind" 50 | 51 | // Guice 4.0 is included in embulk-core's dependencies before Embulk v0.10.33. 52 | // They are once excluded here, and added independently with explicit versions specified. 53 | exclude group: "com.google.inject", module: "guice" 54 | exclude group: "com.google.inject.extensions", module: "guice-multibindings" 55 | exclude group: "aopalliance", module: "aopalliance" 56 | exclude group: "javax.inject", module: "javax.inject" 57 | 58 | // slf4j-api is included in embulk-api's dependencies. 59 | exclude group: "org.slf4j", module: "slf4j-api" 60 | } 61 | compile "com.squareup.okhttp3:okhttp:4.9.3" 62 | 63 | compile("org.embulk:embulk-util-config:0.3.1") { 64 | // They conflict with embulk-core. 65 | // They are once excluded here, and added independently with explicit versions specified. 66 | exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations" 67 | exclude group: "com.fasterxml.jackson.core", module: "jackson-core" 68 | exclude group: "com.fasterxml.jackson.core", module: "jackson-databind" 69 | exclude group: "com.fasterxml.jackson.datatype", module: "jackson-datatype-jdk8" 70 | exclude group: "javax.validation", module: "validation-api" 71 | } 72 | 73 | // Guice is in embulk-core's dependencies before Embulk v0.10.33, and to be removed. 74 | // They, whose versions are the 100% same with embulk-core's, are explicitly included here to work even before/after v0.10.33. 75 | compile "com.google.inject.extensions:guice-multibindings:4.0" // td-client:0.9.5's dependency is 3.0, but embulk-core on 4.0. 76 | compile "com.google.inject:guice:4.0" 77 | compile "aopalliance:aopalliance:1.0" 78 | compile "javax.inject:javax.inject:1" 79 | 80 | // Jackson is in embulk-core's dependencies before Embulk v0.10.32, and to be removed. 81 | // are once excluded from transitive dependencies of other dependencies, 82 | // and added explicitly with versions that were used in embulk-output-td:0.7.2. 83 | compile "com.fasterxml.jackson.core:jackson-annotations:2.9.10" 84 | compile "com.fasterxml.jackson.core:jackson-core:2.9.10" 85 | compile "com.fasterxml.jackson.core:jackson-databind:2.9.10.5" 86 | compile "com.fasterxml.jackson.datatype:jackson-datatype-guava:2.9.10" 87 | compile "com.fasterxml.jackson.datatype:jackson-datatype-json-org:2.9.10" 88 | compile "javax.validation:validation-api:1.1.0.Final" 89 | 90 | compile "com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.9.10" 91 | 92 | compile "org.embulk:embulk-util-timestamp:0.2.1" 93 | 94 | testCompile "junit:junit:4.13.2" 95 | testCompile "org.bigtesting:fixd:1.0.0" 96 | testCompile "org.embulk:embulk-core:0.10.34" 97 | testCompile "org.embulk:embulk-deps:0.10.34" 98 | testCompile "org.embulk:embulk-junit4:0.10.34" 99 | testCompile "org.mockito:mockito-core:1.9.5" 100 | testCompile "com.github.tomakehurst:wiremock-jre8:2.25.1" 101 | } 102 | 103 | embulkPlugin { 104 | mainClass = "org.embulk.output.td.TdOutputPlugin" 105 | category = "output" 106 | type = "td" 107 | } 108 | 109 | javadoc { 110 | options { 111 | locale = 'en_US' 112 | encoding = 'UTF-8' 113 | } 114 | } 115 | 116 | publishing { 117 | publications { 118 | maven(MavenPublication) { 119 | groupId = project.group 120 | artifactId = project.name 121 | 122 | from components.java // Must be "components.java". The dependency modification works only for it. 123 | // javadocJar and sourcesJar are added by java.withJavadocJar() and java.withSourcesJar() above. 124 | // See: https://docs.gradle.org/current/javadoc/org/gradle/api/plugins/JavaPluginExtension.html 125 | 126 | pom { // https://central.sonatype.org/pages/requirements.html 127 | packaging "jar" 128 | 129 | name = project.name 130 | description = project.description 131 | url = "https://github.com/treasure-data/embulk-output-td" 132 | 133 | licenses { 134 | license { 135 | // http://central.sonatype.org/pages/requirements.html#license-information 136 | name = "The Apache License, Version 2.0" 137 | url = "https://www.apache.org/licenses/LICENSE-2.0.txt" 138 | } 139 | } 140 | 141 | developers { 142 | developer { 143 | name = "Muga Nishizawa" 144 | email = "muga.nishizawa@gmail.com" 145 | } 146 | developer { 147 | name = "Sadayuki Furuhashi" 148 | email = "frsyuki@gmail.com" 149 | } 150 | developer { 151 | name = "Satoshi Akama" 152 | email = "satoshiakama@gmail.com" 153 | } 154 | developer { 155 | name = "Dai MIKURUBE" 156 | email = "dmikurube@treasure-data.com" 157 | } 158 | developer { 159 | name = "Trung Huynh" 160 | email = "httrung90@gmail.com" 161 | } 162 | developer { 163 | name = "Tuan Bui" 164 | email = "xuantuan58@gmail.com" 165 | } 166 | developer { 167 | name = "Serhii Himadieiev" 168 | email = "gimadeevsv@gmail.com" 169 | } 170 | } 171 | 172 | scm { 173 | connection = "scm:git:git://github.com/treasure-data/embulk-output-td.git" 174 | developerConnection = "scm:git:git@github.com:treasure-data/embulk-output-td.git" 175 | url = "https://github.com/treasure-data/embulk-output-td" 176 | } 177 | } 178 | } 179 | } 180 | 181 | repositories { 182 | maven { // publishMavenPublicationToMavenCentralRepository 183 | name = "mavenCentral" 184 | if (project.version.endsWith("-SNAPSHOT")) { 185 | url "https://oss.sonatype.org/content/repositories/snapshots" 186 | } else { 187 | url "https://oss.sonatype.org/service/local/staging/deploy/maven2" 188 | } 189 | 190 | credentials { 191 | username = project.hasProperty("ossrhUsername") ? ossrhUsername : "" 192 | password = project.hasProperty("ossrhPassword") ? ossrhPassword : "" 193 | } 194 | } 195 | } 196 | } 197 | 198 | signing { 199 | sign publishing.publications.maven 200 | } 201 | 202 | gem { 203 | authors = [ "Muga Nishizawa" ] 204 | email = [ "muga.nishizawa@gmail.com" ] 205 | summary = "TreasureData output plugin for Embulk" 206 | homepage = "https://github.com/treasure-data/embulk-output-td" 207 | licenses = [ "Apache-2.0" ] 208 | } 209 | 210 | gemPush { 211 | host = "https://rubygems.org" 212 | outputs.upToDateWhen { false } 213 | } 214 | 215 | test { 216 | testLogging { 217 | events "passed", "skipped", "failed", "standardOut", "standardError" 218 | 219 | exceptionFormat = org.gradle.api.tasks.testing.logging.TestExceptionFormat.FULL 220 | showCauses = true 221 | showExceptions = true 222 | showStackTraces = true 223 | showStandardStreams = true 224 | 225 | outputs.upToDateWhen { false } 226 | } 227 | } 228 | 229 | // Checkstyle 230 | checkstyle { 231 | ignoreFailures = true 232 | // @see https://github.com/facebook/presto/blob/master/src/checkstyle/checks.xml 233 | //configFile = rootProject.file('./checkstyle.xml') // default {project.projectDir}/config/checkstyle/checkstyle.xml 234 | } 235 | 236 | // PMD 237 | tasks.withType(Pmd) { 238 | ignoreFailures = true 239 | reports.html.enabled true 240 | } 241 | 242 | // JaCoCo 243 | jacocoTestReport { // will use td-client v0.6.x 244 | afterEvaluate { 245 | classDirectories.from = files(classDirectories.files.collect { 246 | fileTree(dir: it, exclude: 'com/treasuredata/api/**') 247 | }) 248 | } 249 | } 250 | -------------------------------------------------------------------------------- /config/checkstyle/checkstyle.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /gradle/dependency-locks/embulkPluginRuntime.lockfile: -------------------------------------------------------------------------------- 1 | # This is a Gradle generated file for dependency locking. 2 | # Manual edits can break the build and are not advised. 3 | # This file is expected to be part of source control. 4 | aopalliance:aopalliance:1.0 5 | com.fasterxml.jackson.core:jackson-annotations:2.9.10 6 | com.fasterxml.jackson.core:jackson-core:2.9.10 7 | com.fasterxml.jackson.core:jackson-databind:2.9.10.5 8 | com.fasterxml.jackson.datatype:jackson-datatype-guava:2.9.10 9 | com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.9.10 10 | com.fasterxml.jackson.datatype:jackson-datatype-json-org:2.9.10 11 | com.google.code.findbugs:jsr305:1.3.9 12 | com.google.errorprone:error_prone_annotations:2.1.3 13 | com.google.guava:guava:24.1.1-jre 14 | com.google.inject.extensions:guice-multibindings:4.0 15 | com.google.inject:guice:4.0 16 | com.google.j2objc:j2objc-annotations:1.1 17 | com.googlecode.json-simple:json-simple:1.1.1 18 | com.squareup.okhttp3:okhttp-urlconnection:3.14.7 19 | com.squareup.okhttp3:okhttp:4.9.3 20 | com.squareup.okio:okio:2.8.0 21 | com.treasuredata.client:td-client:0.9.5 22 | javax.annotation:javax.annotation-api:1.2 23 | javax.inject:javax.inject:1 24 | javax.validation:validation-api:1.1.0.Final 25 | junit:junit:4.10 26 | org.checkerframework:checker-compat-qual:2.0.0 27 | org.codehaus.mojo:animal-sniffer-annotations:1.14 28 | org.embulk:embulk-util-config:0.3.1 29 | org.embulk:embulk-util-rubytime:0.3.2 30 | org.embulk:embulk-util-timestamp:0.2.1 31 | org.hamcrest:hamcrest-core:1.1 32 | org.jetbrains.kotlin:kotlin-stdlib-common:1.4.10 33 | org.jetbrains.kotlin:kotlin-stdlib:1.4.10 34 | org.jetbrains:annotations:13.0 35 | org.json:json:20171018 36 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treasure-data/embulk-output-td/bc5a21cba2337cd34dbccb2891e3c50bb90af684/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.2.1-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | # 4 | # Copyright 2015 the original author or authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | ## 21 | ## Gradle start up script for UN*X 22 | ## 23 | ############################################################################## 24 | 25 | # Attempt to set APP_HOME 26 | # Resolve links: $0 may be a link 27 | PRG="$0" 28 | # Need this for relative symlinks. 29 | while [ -h "$PRG" ] ; do 30 | ls=`ls -ld "$PRG"` 31 | link=`expr "$ls" : '.*-> \(.*\)$'` 32 | if expr "$link" : '/.*' > /dev/null; then 33 | PRG="$link" 34 | else 35 | PRG=`dirname "$PRG"`"/$link" 36 | fi 37 | done 38 | SAVED="`pwd`" 39 | cd "`dirname \"$PRG\"`/" >/dev/null 40 | APP_HOME="`pwd -P`" 41 | cd "$SAVED" >/dev/null 42 | 43 | APP_NAME="Gradle" 44 | APP_BASE_NAME=`basename "$0"` 45 | 46 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 47 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 48 | 49 | # Use the maximum available, or set MAX_FD != -1 to use that value. 50 | MAX_FD="maximum" 51 | 52 | warn () { 53 | echo "$*" 54 | } 55 | 56 | die () { 57 | echo 58 | echo "$*" 59 | echo 60 | exit 1 61 | } 62 | 63 | # OS specific support (must be 'true' or 'false'). 64 | cygwin=false 65 | msys=false 66 | darwin=false 67 | nonstop=false 68 | case "`uname`" in 69 | CYGWIN* ) 70 | cygwin=true 71 | ;; 72 | Darwin* ) 73 | darwin=true 74 | ;; 75 | MINGW* ) 76 | msys=true 77 | ;; 78 | NONSTOP* ) 79 | nonstop=true 80 | ;; 81 | esac 82 | 83 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 84 | 85 | # Determine the Java command to use to start the JVM. 86 | if [ -n "$JAVA_HOME" ] ; then 87 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 88 | # IBM's JDK on AIX uses strange locations for the executables 89 | JAVACMD="$JAVA_HOME/jre/sh/java" 90 | else 91 | JAVACMD="$JAVA_HOME/bin/java" 92 | fi 93 | if [ ! -x "$JAVACMD" ] ; then 94 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 95 | 96 | Please set the JAVA_HOME variable in your environment to match the 97 | location of your Java installation." 98 | fi 99 | else 100 | JAVACMD="java" 101 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 102 | 103 | Please set the JAVA_HOME variable in your environment to match the 104 | location of your Java installation." 105 | fi 106 | 107 | # Increase the maximum file descriptors if we can. 108 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 109 | MAX_FD_LIMIT=`ulimit -H -n` 110 | if [ $? -eq 0 ] ; then 111 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 112 | MAX_FD="$MAX_FD_LIMIT" 113 | fi 114 | ulimit -n $MAX_FD 115 | if [ $? -ne 0 ] ; then 116 | warn "Could not set maximum file descriptor limit: $MAX_FD" 117 | fi 118 | else 119 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 120 | fi 121 | fi 122 | 123 | # For Darwin, add options to specify how the application appears in the dock 124 | if $darwin; then 125 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 126 | fi 127 | 128 | # For Cygwin or MSYS, switch paths to Windows format before running java 129 | if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then 130 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 131 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 132 | JAVACMD=`cygpath --unix "$JAVACMD"` 133 | 134 | # We build the pattern for arguments to be converted via cygpath 135 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 136 | SEP="" 137 | for dir in $ROOTDIRSRAW ; do 138 | ROOTDIRS="$ROOTDIRS$SEP$dir" 139 | SEP="|" 140 | done 141 | OURCYGPATTERN="(^($ROOTDIRS))" 142 | # Add a user-defined pattern to the cygpath arguments 143 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 144 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 145 | fi 146 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 147 | i=0 148 | for arg in "$@" ; do 149 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 150 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 151 | 152 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 153 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 154 | else 155 | eval `echo args$i`="\"$arg\"" 156 | fi 157 | i=`expr $i + 1` 158 | done 159 | case $i in 160 | 0) set -- ;; 161 | 1) set -- "$args0" ;; 162 | 2) set -- "$args0" "$args1" ;; 163 | 3) set -- "$args0" "$args1" "$args2" ;; 164 | 4) set -- "$args0" "$args1" "$args2" "$args3" ;; 165 | 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 166 | 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 167 | 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 168 | 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 169 | 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 170 | esac 171 | fi 172 | 173 | # Escape application args 174 | save () { 175 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 176 | echo " " 177 | } 178 | APP_ARGS=`save "$@"` 179 | 180 | # Collect all arguments for the java command, following the shell quoting and substitution rules 181 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 182 | 183 | exec "$JAVACMD" "$@" 184 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 33 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 34 | 35 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 36 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 37 | 38 | @rem Find java.exe 39 | if defined JAVA_HOME goto findJavaFromJavaHome 40 | 41 | set JAVA_EXE=java.exe 42 | %JAVA_EXE% -version >NUL 2>&1 43 | if "%ERRORLEVEL%" == "0" goto init 44 | 45 | echo. 46 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 47 | echo. 48 | echo Please set the JAVA_HOME variable in your environment to match the 49 | echo location of your Java installation. 50 | 51 | goto fail 52 | 53 | :findJavaFromJavaHome 54 | set JAVA_HOME=%JAVA_HOME:"=% 55 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 56 | 57 | if exist "%JAVA_EXE%" goto init 58 | 59 | echo. 60 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 61 | echo. 62 | echo Please set the JAVA_HOME variable in your environment to match the 63 | echo location of your Java installation. 64 | 65 | goto fail 66 | 67 | :init 68 | @rem Get command-line arguments, handling Windows variants 69 | 70 | if not "%OS%" == "Windows_NT" goto win9xME_args 71 | 72 | :win9xME_args 73 | @rem Slurp the command line arguments. 74 | set CMD_LINE_ARGS= 75 | set _SKIP=2 76 | 77 | :win9xME_args_slurp 78 | if "x%~1" == "x" goto execute 79 | 80 | set CMD_LINE_ARGS=%* 81 | 82 | :execute 83 | @rem Setup the command line 84 | 85 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 86 | 87 | @rem Execute Gradle 88 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 89 | 90 | :end 91 | @rem End local scope for the variables with windows NT shell 92 | if "%ERRORLEVEL%"=="0" goto mainEnd 93 | 94 | :fail 95 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 96 | rem the _cmd.exe /c_ return code! 97 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 98 | exit /b 1 99 | 100 | :mainEnd 101 | if "%OS%"=="Windows_NT" endlocal 102 | 103 | :omega 104 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'embulk-output-td' 2 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/FinalizableExecutorService.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td; 2 | 3 | import java.io.Closeable; 4 | import java.io.IOException; 5 | import java.util.LinkedList; 6 | import java.util.Queue; 7 | import java.util.concurrent.Callable; 8 | import java.util.concurrent.ExecutionException; 9 | import java.util.concurrent.ExecutorService; 10 | import java.util.concurrent.Executors; 11 | import java.util.concurrent.Future; 12 | 13 | public class FinalizableExecutorService 14 | { 15 | public static class NotCloseable 16 | implements Closeable 17 | { 18 | @Override 19 | public void close() 20 | throws IOException 21 | { 22 | // ignore 23 | } 24 | } 25 | 26 | protected ExecutorService threads; 27 | protected Queue runningTasks; 28 | 29 | public FinalizableExecutorService() 30 | { 31 | this.threads = Executors.newCachedThreadPool(); 32 | this.runningTasks = new LinkedList<>(); 33 | } 34 | 35 | private static class RunningTask 36 | { 37 | private Future future; 38 | private Closeable finalizer; 39 | 40 | RunningTask(Future future, Closeable finalizer) 41 | { 42 | this.future = future; 43 | this.finalizer = finalizer; 44 | } 45 | 46 | public void join() 47 | throws IOException 48 | { 49 | try { 50 | future.get(); 51 | } 52 | catch (InterruptedException e) { 53 | throw new IOException(e); 54 | } 55 | catch (ExecutionException e) { 56 | throw new IOException(e.getCause()); 57 | } 58 | finalizer.close(); 59 | } 60 | 61 | public void abort() 62 | throws IOException 63 | { 64 | finalizer.close(); 65 | } 66 | } 67 | 68 | public void submit(Callable task, Closeable finalizer) 69 | { 70 | Future future = threads.submit(task); 71 | runningTasks.add(new RunningTask(future, finalizer)); 72 | } 73 | 74 | public void joinPartial(long upto) 75 | throws IOException 76 | { 77 | while (runningTasks.size() > upto) { 78 | runningTasks.peek().join(); 79 | runningTasks.remove(); 80 | } 81 | } 82 | 83 | public void joinAll() 84 | throws IOException 85 | { 86 | joinPartial(0); 87 | } 88 | 89 | public void shutdown() 90 | throws IOException 91 | { 92 | try { 93 | joinAll(); 94 | } 95 | finally { 96 | threads.shutdown(); 97 | for (RunningTask task : runningTasks) { 98 | task.abort(); 99 | } 100 | } 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/MsgpackGZFileBuilder.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td; 2 | 3 | import org.msgpack.core.MessagePack; 4 | import org.msgpack.core.MessagePacker; 5 | import org.msgpack.value.Value; 6 | 7 | import java.io.BufferedOutputStream; 8 | import java.io.Closeable; 9 | import java.io.File; 10 | import java.io.FileOutputStream; 11 | import java.io.FilterOutputStream; 12 | import java.io.IOException; 13 | import java.io.OutputStream; 14 | import java.util.zip.GZIPOutputStream; 15 | 16 | import static com.google.common.base.Preconditions.checkNotNull; 17 | 18 | public class MsgpackGZFileBuilder 19 | implements Closeable 20 | { 21 | static class DataSizeFilter 22 | extends FilterOutputStream 23 | { 24 | private long size = 0; 25 | 26 | public DataSizeFilter(OutputStream out) 27 | { 28 | super(out); 29 | } 30 | 31 | @Override 32 | public void write(int b) 33 | throws IOException 34 | { 35 | size += 1; 36 | super.write(b); 37 | } 38 | 39 | @Override 40 | public void write(byte[] b, int off, int len) 41 | throws IOException 42 | { 43 | size += len; 44 | super.write(b, off, len); 45 | } 46 | 47 | @Override 48 | public void close() 49 | throws IOException 50 | { 51 | super.close(); 52 | } 53 | 54 | public long size() 55 | { 56 | return size; 57 | } 58 | } 59 | 60 | private final File file; 61 | private final DataSizeFilter out; 62 | private final GZIPOutputStream gzout; 63 | 64 | private MessagePacker packer; 65 | private long recordCount; 66 | 67 | public MsgpackGZFileBuilder(File file) 68 | throws IOException 69 | { 70 | this.file = checkNotNull(file); 71 | this.out = new DataSizeFilter(new BufferedOutputStream(new FileOutputStream(file))); 72 | this.gzout = new GZIPOutputStream(this.out); 73 | this.packer = MessagePack.newDefaultPacker(this.gzout); 74 | 75 | this.recordCount = 0; 76 | } 77 | 78 | public long getRecordCount() 79 | { 80 | return recordCount; 81 | } 82 | 83 | public long getWrittenSize() 84 | { 85 | return out.size(); 86 | } 87 | 88 | public File getFile() 89 | { 90 | return file; 91 | } 92 | 93 | public boolean delete() 94 | { 95 | return file.delete(); 96 | } 97 | 98 | public void finish() 99 | throws IOException 100 | { 101 | try { 102 | packer.flush(); 103 | } 104 | finally { 105 | close(); 106 | } 107 | } 108 | 109 | @Override 110 | public void close() 111 | throws IOException 112 | { 113 | if (packer != null) { 114 | packer.close(); 115 | packer = null; 116 | } 117 | } 118 | 119 | public void writeNil() 120 | throws IOException 121 | { 122 | packer.packNil(); 123 | } 124 | 125 | public void writeMapBegin(int size) 126 | throws IOException 127 | { 128 | packer.packMapHeader(size); 129 | } 130 | 131 | public void writeMapEnd() 132 | throws IOException 133 | { 134 | recordCount++; 135 | } 136 | 137 | public void writeString(String v) 138 | throws IOException 139 | { 140 | packer.packString(v); 141 | } 142 | 143 | public void writeBoolean(boolean v) 144 | throws IOException 145 | { 146 | packer.packBoolean(v); 147 | } 148 | 149 | public void writeLong(long v) 150 | throws IOException 151 | { 152 | packer.packLong(v); 153 | } 154 | 155 | public void writeDouble(double v) 156 | throws IOException 157 | { 158 | packer.packDouble(v); 159 | } 160 | 161 | public void writeValue(Value v) throws IOException { 162 | packer.packValue(v); 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/RecordWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td; 2 | 3 | import com.google.common.annotations.VisibleForTesting; 4 | import com.google.common.base.Stopwatch; 5 | import com.treasuredata.client.TDClient; 6 | import org.embulk.config.TaskReport; 7 | import org.embulk.output.td.writer.FieldWriterSet; 8 | import org.embulk.spi.Exec; 9 | import org.embulk.spi.Page; 10 | import org.embulk.spi.PageReader; 11 | import org.embulk.spi.Schema; 12 | import org.embulk.spi.TransactionalPageOutput; 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | 16 | import java.io.File; 17 | import java.io.Closeable; 18 | import java.io.IOException; 19 | import java.util.Locale; 20 | import java.text.NumberFormat; 21 | import java.util.concurrent.Callable; 22 | import java.util.concurrent.TimeUnit; 23 | 24 | import static com.google.common.base.Preconditions.checkNotNull; 25 | 26 | public class RecordWriter 27 | implements TransactionalPageOutput 28 | { 29 | private static final Logger log = LoggerFactory.getLogger(RecordWriter.class); 30 | private final TDClient client; 31 | private final String sessionName; 32 | private final int taskIndex; 33 | 34 | private final FieldWriterSet fieldWriters; 35 | private final File tempDir; 36 | 37 | private int partSeqId = 0; 38 | private PageReader pageReader; 39 | private MsgpackGZFileBuilder builder; 40 | 41 | private final FinalizableExecutorService executor; 42 | private final int uploadConcurrency; 43 | private final long fileSplitSize; // unit: kb 44 | 45 | public RecordWriter(TdOutputPlugin.PluginTask task, int taskIndex, TDClient client, FieldWriterSet fieldWriters) 46 | { 47 | this.client = checkNotNull(client); 48 | this.sessionName = task.getSessionName(); 49 | this.taskIndex = taskIndex; 50 | 51 | this.fieldWriters = fieldWriters; 52 | this.tempDir = new File(task.getTempDir().get()); 53 | this.executor = new FinalizableExecutorService(); 54 | this.uploadConcurrency = task.getUploadConcurrency(); 55 | this.fileSplitSize = task.getFileSplitSize() * 1024; 56 | } 57 | 58 | @VisibleForTesting 59 | public void open(final Schema schema) 60 | throws IOException 61 | { 62 | this.pageReader = getPageReader(checkNotNull(schema)); 63 | prepareNextBuilder(); 64 | } 65 | 66 | private void prepareNextBuilder() 67 | throws IOException 68 | { 69 | String prefix = String.format("%s-", sessionName); 70 | File tempFile = File.createTempFile(prefix, ".msgpack.gz", tempDir); 71 | this.builder = new MsgpackGZFileBuilder(tempFile); 72 | } 73 | 74 | @VisibleForTesting 75 | public MsgpackGZFileBuilder getBuilder() 76 | { 77 | return builder; 78 | } 79 | 80 | @Override 81 | public void add(final Page page) 82 | { 83 | pageReader.setPage(checkNotNull(page)); 84 | 85 | try { 86 | while (pageReader.nextRecord()) { 87 | fieldWriters.addRecord(builder, pageReader); 88 | 89 | if (builder.getWrittenSize() > fileSplitSize) { 90 | flush(); 91 | prepareNextBuilder(); 92 | } 93 | } 94 | 95 | } 96 | catch (IOException e) { 97 | throw new RuntimeException(e); 98 | } 99 | } 100 | 101 | public void flush() throws IOException 102 | { 103 | if (builder != null && builder.getRecordCount() > 0) { 104 | builder.finish(); 105 | 106 | log.info("{uploading: {rows: {}, size: {} bytes (compressed)}}", 107 | builder.getRecordCount(), 108 | NumberFormat.getNumberInstance().format(builder.getWrittenSize())); 109 | upload(builder, String.format(Locale.ENGLISH, "task-%d_%d", taskIndex, partSeqId)); 110 | partSeqId++; 111 | builder = null; 112 | } 113 | } 114 | 115 | private void upload(final MsgpackGZFileBuilder builder, final String uniquePartName) 116 | throws IOException 117 | { 118 | executor.joinPartial(uploadConcurrency - 1); 119 | executor.submit(new Callable() { 120 | @Override 121 | public Void call() throws Exception 122 | { 123 | File file = builder.getFile(); 124 | 125 | log.debug("{uploading: {file: {}}}", file.getAbsolutePath()); 126 | Stopwatch stopwatch = Stopwatch.createStarted(); 127 | 128 | client.uploadBulkImportPart(sessionName, uniquePartName, builder.getFile()); 129 | 130 | stopwatch.stop(); 131 | stopwatch.elapsed(TimeUnit.MILLISECONDS); 132 | log.debug("{uploaded: {file: {}, time: {}}}", file.getAbsolutePath(), stopwatch); 133 | return null; 134 | } 135 | }, 136 | new Closeable() { 137 | public void close() throws IOException 138 | { 139 | builder.close(); 140 | if (!builder.delete()) { 141 | log.warn("Failed to delete local temporary file {}. Ignoring.", builder.getFile()); 142 | } 143 | } 144 | }); 145 | } 146 | 147 | @Override 148 | public void finish() 149 | { 150 | try { 151 | flush(); 152 | } 153 | catch (IOException e) { 154 | throw new RuntimeException(e); 155 | } 156 | finally { 157 | close(); 158 | } 159 | } 160 | 161 | @Override 162 | public void close() 163 | { 164 | try { 165 | try { 166 | executor.joinAll(); 167 | executor.shutdown(); // shutdown calls joinAll 168 | } 169 | finally { 170 | if (builder != null) { 171 | builder.close(); 172 | builder.delete(); 173 | builder = null; 174 | } 175 | 176 | if (client != null) { 177 | client.close(); 178 | } 179 | } 180 | } 181 | catch (IOException e) { 182 | throw new RuntimeException(e); 183 | } 184 | } 185 | 186 | @Override 187 | public void abort() 188 | { 189 | // do nothing 190 | } 191 | 192 | @Override 193 | public TaskReport commit() 194 | { 195 | final TaskReport report = TdOutputPlugin.CONFIG_MAPPER_FACTORY.newTaskReport() 196 | .set(TdOutputPlugin.TASK_REPORT_UPLOADED_PART_NUMBER, partSeqId); 197 | return report; 198 | } 199 | 200 | @SuppressWarnings("deprecation") 201 | private static PageReader getPageReader(final Schema schema) 202 | { 203 | if (HAS_EXEC_GET_PAGE_READER) { 204 | return Exec.getPageReader(schema); 205 | } 206 | else { 207 | return new PageReader(schema); 208 | } 209 | } 210 | 211 | private static boolean hasExecGetPageReader() 212 | { 213 | try { 214 | Exec.class.getMethod("getPageReader", Schema.class); 215 | } 216 | catch (final NoSuchMethodException ex) { 217 | return false; 218 | } 219 | return true; 220 | } 221 | 222 | private static final boolean HAS_EXEC_GET_PAGE_READER = hasExecGetPageReader(); 223 | } 224 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/TdOutputPlugin.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td; 2 | 3 | import com.fasterxml.jackson.annotation.JsonCreator; 4 | import com.fasterxml.jackson.annotation.JsonValue; 5 | import com.google.common.annotations.VisibleForTesting; 6 | import com.google.common.base.Function; 7 | import com.google.common.collect.ArrayListMultimap; 8 | import com.google.common.collect.Lists; 9 | import com.google.common.collect.Maps; 10 | import com.google.common.collect.Multimap; 11 | import com.treasuredata.client.ProxyConfig; 12 | import com.treasuredata.client.TDClient; 13 | import com.treasuredata.client.TDClientBuilder; 14 | import com.treasuredata.client.TDClientHttpConflictException; 15 | import com.treasuredata.client.TDClientHttpNotFoundException; 16 | import com.treasuredata.client.model.TDBulkImportSession; 17 | import com.treasuredata.client.model.TDBulkImportSession.ImportStatus; 18 | import com.treasuredata.client.model.TDColumn; 19 | import com.treasuredata.client.model.TDColumnType; 20 | import com.treasuredata.client.model.TDTable; 21 | import org.embulk.config.ConfigDiff; 22 | import org.embulk.config.ConfigException; 23 | import org.embulk.config.ConfigSource; 24 | import org.embulk.config.TaskReport; 25 | import org.embulk.config.TaskSource; 26 | import org.embulk.output.td.writer.FieldWriterSet; 27 | import org.embulk.spi.Column; 28 | import org.embulk.spi.ColumnVisitor; 29 | import org.embulk.spi.DataException; 30 | import org.embulk.spi.Exec; 31 | import org.embulk.spi.OutputPlugin; 32 | import org.embulk.spi.Schema; 33 | import org.embulk.spi.TransactionalPageOutput; 34 | import org.embulk.util.config.Config; 35 | import org.embulk.util.config.ConfigDefault; 36 | import org.embulk.util.config.ConfigMapper; 37 | import org.embulk.util.config.ConfigMapperFactory; 38 | import org.embulk.util.config.Task; 39 | import org.embulk.util.config.TaskMapper; 40 | import org.msgpack.core.MessagePack; 41 | import org.msgpack.core.MessageUnpacker; 42 | import org.msgpack.value.Value; 43 | import org.slf4j.Logger; 44 | import org.slf4j.LoggerFactory; 45 | 46 | import javax.validation.constraints.Max; 47 | import javax.validation.constraints.Min; 48 | 49 | import java.io.IOException; 50 | import java.io.InputStream; 51 | import java.nio.charset.StandardCharsets; 52 | import java.time.Instant; 53 | import java.time.ZoneOffset; 54 | import java.time.format.DateTimeFormatter; 55 | import java.util.ArrayList; 56 | import java.util.HashMap; 57 | import java.util.LinkedHashMap; 58 | import java.util.List; 59 | import java.util.Map; 60 | import java.util.Optional; 61 | import java.util.Properties; 62 | import java.util.UUID; 63 | import java.util.regex.Pattern; 64 | import java.util.zip.GZIPInputStream; 65 | 66 | import static java.lang.Integer.parseInt; 67 | 68 | public class TdOutputPlugin 69 | implements OutputPlugin 70 | { 71 | public interface PluginTask 72 | extends Task 73 | { 74 | @Config("apikey") 75 | String getApiKey(); 76 | 77 | @Config("endpoint") 78 | @ConfigDefault("\"api.treasuredata.com\"") 79 | String getEndpoint(); 80 | 81 | @Config("use_ssl") 82 | @ConfigDefault("true") 83 | boolean getUseSsl(); 84 | 85 | @Config("http_proxy") 86 | @ConfigDefault("null") 87 | Optional getHttpProxy(); 88 | 89 | // TODO connect_timeout, read_timeout, send_timeout 90 | 91 | @Config("mode") 92 | @ConfigDefault("\"append\"") 93 | Mode getMode(); 94 | 95 | @Config("auto_create_table") 96 | @ConfigDefault("true") 97 | boolean getAutoCreateTable(); 98 | 99 | @Config("database") 100 | String getDatabase(); 101 | 102 | @Config("table") 103 | String getTable(); 104 | 105 | void setLoadTargetTableName(String name); 106 | String getLoadTargetTableName(); 107 | 108 | @Config("session") 109 | @ConfigDefault("null") 110 | Optional getSession(); 111 | 112 | @Config("default_timestamp_type_convert_to") 113 | @ConfigDefault("\"string\"") 114 | ConvertTimestampType getConvertTimestampType(); 115 | 116 | @Config("time_column") 117 | @ConfigDefault("null") 118 | Optional getTimeColumn(); 119 | 120 | @Config("time_value") 121 | @ConfigDefault("null") 122 | Optional getTimeValue(); // TODO allow timestamp format such as {from: "2015-01-01 00:00:00 UTC", to: "2015-01-02 00:00:00 UTC"} as well as unixtime integer 123 | void setTimeValue(Optional timeValue); 124 | 125 | @Config("ignore_alternative_time_if_time_exists") 126 | @ConfigDefault("false") 127 | boolean getIgnoreAlternativeTimeIfTimeExists(); 128 | 129 | @Config("unix_timestamp_unit") 130 | @ConfigDefault("\"sec\"") 131 | UnixTimestampUnit getUnixTimestampUnit(); 132 | 133 | @Config("tmpdir") 134 | @ConfigDefault("null") 135 | Optional getTempDir(); 136 | void setTempDir(Optional dir); 137 | 138 | @Config("upload_concurrency") 139 | @ConfigDefault("2") 140 | @Min(1) 141 | @Max(8) 142 | int getUploadConcurrency(); 143 | 144 | @Config("file_split_size") 145 | @ConfigDefault("16384") // default 16MB (unit: kb) 146 | long getFileSplitSize(); 147 | 148 | // From org.embulk.spi.time.TimestampFormatter.Task. 149 | @Config("default_timezone") 150 | @ConfigDefault("\"UTC\"") 151 | String getDefaultTimeZoneId(); 152 | 153 | // From org.embulk.spi.time.TimestampFormatter.Task, but modified to have a different @ConfigDefault. 154 | @Config("default_timestamp_format") 155 | // SQL timestamp with milliseconds is, by defualt, used because Hive and Presto use 156 | // those format. As timestamp type, Presto 157 | // * cannot parse SQL timestamp with timezone like '2015-02-03 04:05:06.789 UTC' 158 | // * cannot parse SQL timestamp with nanoseconds like '2015-02-03 04:05:06.789012345' 159 | // * cannot parse SQL timestamp with microseconds like '2015-02-03 04:05:06.789012' 160 | // * can parse SQL timestamp with milliseconds like '2015-02-03 04:05:06.789' 161 | // On the other hand, Hive 162 | // * cannot parse SQL timestamp with timezone like '2015-02-03 04:05:06.789 UTC' 163 | // * can parse SQL timestamp with nanoseconds like '2015-02-03 04:05:06.789012345' 164 | // * can parse SQL timestamp with microseconds like '2015-02-03 04:05:06.789012' 165 | // * can parse SQL timestamp with milliseconds like '2015-02-03 04:05:06.789' 166 | @ConfigDefault("\"%Y-%m-%d %H:%M:%S.%3N\"") 167 | String getDefaultTimestampFormat(); 168 | 169 | @Config("column_options") 170 | @ConfigDefault("{}") 171 | Map getColumnOptions(); 172 | 173 | @Config("stop_on_invalid_record") 174 | @ConfigDefault("false") 175 | boolean getStopOnInvalidRecord(); 176 | 177 | @Config("displayed_error_records_count_limit") 178 | @ConfigDefault("10") 179 | @Min(0) 180 | int getDisplayedErrorRecordsCountLimit(); 181 | 182 | @Config("retry_limit") 183 | @ConfigDefault("20") 184 | int getRetryLimit(); 185 | 186 | @Config("retry_initial_interval_millis") 187 | @ConfigDefault("1000") 188 | int getRetryInitialIntervalMillis(); 189 | 190 | @Config("retry_max_interval_millis") 191 | @ConfigDefault("90000") 192 | int getRetryMaxIntervalMillis(); 193 | 194 | @Config("pool_name") 195 | @ConfigDefault("null") 196 | Optional getPoolName(); 197 | 198 | @Config("additional_http_headers") 199 | @ConfigDefault("null") 200 | Optional> getAdditionalHttpHeaders(); 201 | 202 | @Config("port") 203 | @ConfigDefault("null") 204 | Optional getPort(); 205 | void setPort(Optional port); 206 | 207 | @Config("default_boolean_type_convert_to") 208 | @ConfigDefault("\"long\"") 209 | ConvertBooleanType getConvertBooleanType(); 210 | 211 | boolean getDoUpload(); 212 | void setDoUpload(boolean doUpload); 213 | 214 | String getSessionName(); 215 | void setSessionName(String session); 216 | } 217 | 218 | public interface ColumnOption 219 | extends Task 220 | { 221 | // From org.embulk.spi.time.TimestampFormatter.TimestampColumnOption. 222 | @Config("timezone") 223 | @ConfigDefault("null") 224 | Optional getTimeZoneId(); 225 | 226 | // From org.embulk.spi.time.TimestampFormatter.TimestampColumnOption. 227 | @Config("format") 228 | @ConfigDefault("null") 229 | Optional getFormat(); 230 | 231 | // It was in an internal interface TdOutputPlugin.TypeColumnOption, but merged directly in TdOutputPlugin.ColumnOption. 232 | // keep backward compatible 233 | @Config("type") 234 | @ConfigDefault("null") 235 | Optional getType(); 236 | 237 | // It was in an internal interface TdOutputPlugin.TypeColumnOption, but merged directly in TdOutputPlugin.ColumnOption. 238 | // keep backward compatible 239 | @Config("value_type") 240 | @ConfigDefault("null") 241 | Optional getValueType(); 242 | } 243 | 244 | public enum Mode 245 | { 246 | APPEND, REPLACE, TRUNCATE; 247 | 248 | @JsonCreator 249 | public static Mode fromConfig(String value) 250 | { 251 | switch(value) { 252 | case "append": 253 | return APPEND; 254 | case "replace": 255 | return REPLACE; 256 | case "truncate": 257 | return TRUNCATE; 258 | default: 259 | throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are [append, replace, truncate]", value)); 260 | } 261 | } 262 | 263 | @JsonValue 264 | public String toString() 265 | { 266 | switch(this) { 267 | case APPEND: 268 | return "append"; 269 | case REPLACE: 270 | return "replace"; 271 | case TRUNCATE: 272 | return "truncate"; 273 | default: 274 | throw new IllegalStateException(); 275 | } 276 | } 277 | } 278 | 279 | public interface HttpProxyTask 280 | extends Task 281 | { 282 | @Config("host") 283 | String getHost(); 284 | 285 | @Config("port") 286 | int getPort(); 287 | 288 | @Config("use_ssl") 289 | @ConfigDefault("false") 290 | boolean getUseSsl(); 291 | 292 | @Config("user") 293 | @ConfigDefault("null") 294 | Optional getUser(); 295 | 296 | @Config("password") 297 | @ConfigDefault("null") 298 | Optional getPassword(); 299 | } 300 | 301 | public static enum ConvertTimestampType 302 | { 303 | STRING(-1), 304 | //SEC_DOUBLE(-1), // TODO 305 | SEC(1); 306 | //MILLI(1000), // TODO 307 | //MICRO(1000000), // TODO 308 | //NANO(1000000000); // TODO 309 | 310 | private final int unit; 311 | 312 | private ConvertTimestampType(int unit) 313 | { 314 | this.unit = unit; 315 | } 316 | 317 | public int getFractionUnit() 318 | { 319 | return unit; 320 | } 321 | 322 | @JsonCreator 323 | public static ConvertTimestampType of(String s) 324 | { 325 | switch (s) { 326 | case "string": return STRING; 327 | //case "sec_double": return SEC_DOUBLE; 328 | case "sec": return SEC; 329 | //case "milli": return MILLI; 330 | //case "micro": return MICRO; 331 | //case "nano": return NANO; 332 | default: 333 | throw new ConfigException( 334 | String.format("Unknown convert_timestamp_type '%s'. Supported units are string, sec, milli, micro, nano, and sec_double", s)); 335 | } 336 | } 337 | 338 | @JsonValue 339 | @Override 340 | public String toString() 341 | { 342 | return name().toLowerCase(); 343 | } 344 | } 345 | 346 | public static enum ConvertBooleanType 347 | { 348 | STRING(TDColumnType.STRING), 349 | LONG(TDColumnType.LONG); 350 | 351 | private final TDColumnType outputColumnType; 352 | 353 | private ConvertBooleanType(TDColumnType outputColumnType) 354 | { 355 | this.outputColumnType = outputColumnType; 356 | } 357 | 358 | public TDColumnType getOutputColumnType() 359 | { 360 | return outputColumnType; 361 | } 362 | 363 | @JsonCreator 364 | public static ConvertBooleanType of(String value) 365 | { 366 | final String loweredCaseValue = value.toLowerCase(); 367 | switch (loweredCaseValue) { 368 | case "long": return LONG; 369 | case "string": return STRING; 370 | default: 371 | throw new ConfigException(String.format("Unknown convert_boolean_type '%s'. Supported types are [long, string]", loweredCaseValue)); 372 | } 373 | } 374 | } 375 | 376 | public static enum UnixTimestampUnit 377 | { 378 | SEC(1), 379 | MILLI(1000), 380 | MICRO(1000000), 381 | NANO(1000000000); 382 | 383 | private final int unit; 384 | 385 | private UnixTimestampUnit(int unit) 386 | { 387 | this.unit = unit; 388 | } 389 | 390 | public int getFractionUnit() 391 | { 392 | return unit; 393 | } 394 | 395 | @JsonCreator 396 | public static UnixTimestampUnit of(String s) 397 | { 398 | switch (s) { 399 | case "sec": return SEC; 400 | case "milli": return MILLI; 401 | case "micro": return MICRO; 402 | case "nano": return NANO; 403 | default: 404 | throw new ConfigException( 405 | String.format("Unknown unix_timestamp_unit '%s'. Supported units are sec, milli, micro, and nano", s)); 406 | } 407 | } 408 | 409 | @JsonValue 410 | @Override 411 | public String toString() 412 | { 413 | return name().toLowerCase(); 414 | } 415 | } 416 | 417 | static final String TASK_REPORT_UPLOADED_PART_NUMBER = "uploaded_part_number"; 418 | 419 | private static final Logger log = LoggerFactory.getLogger(TdOutputPlugin.class); 420 | 421 | public ConfigDiff transaction(final ConfigSource config, final Schema schema, int processorCount, 422 | OutputPlugin.Control control) 423 | { 424 | final PluginTask task = CONFIG_MAPPER.map(config, PluginTask.class); 425 | 426 | // check column_options is valid or not 427 | checkColumnOptions(schema, task.getColumnOptions()); 428 | 429 | // generate session name 430 | task.setSessionName(buildBulkImportSessionName(task)); 431 | 432 | if (!task.getTempDir().isPresent()) { 433 | task.setTempDir(Optional.of(getEnvironmentTempDirectory())); 434 | } 435 | 436 | try (TDClient client = newTDClient(task)) { 437 | String databaseName = task.getDatabase(); 438 | String tableName = task.getTable(); 439 | 440 | switch (task.getMode()) { 441 | case APPEND: 442 | if (task.getAutoCreateTable()) { 443 | // auto_create_table is valid only with append mode 444 | createTableIfNotExists(client, databaseName, tableName); 445 | } 446 | else { 447 | // check if the database and/or table exist or not 448 | validateTableExists(client, databaseName, tableName); 449 | } 450 | task.setLoadTargetTableName(tableName); 451 | break; 452 | 453 | case REPLACE: 454 | case TRUNCATE: 455 | // replace and truncate modes always create a new table if the table doesn't exist 456 | createTableIfNotExists(client, databaseName, tableName); 457 | task.setLoadTargetTableName(createTemporaryTableWithPrefix(client, databaseName, makeTablePrefix(task))); 458 | break; 459 | } 460 | 461 | // validate FieldWriterSet configuration before transaction is started 462 | validateFieldWriterSet(task, schema); 463 | 464 | return doRun(client, schema, task, control); 465 | } 466 | } 467 | 468 | public ConfigDiff resume(TaskSource taskSource, 469 | Schema schema, int processorCount, 470 | OutputPlugin.Control control) 471 | { 472 | final PluginTask task = TASK_MAPPER.map(taskSource, PluginTask.class); 473 | try (TDClient client = newTDClient(task)) { 474 | return doRun(client, schema, task, control); 475 | } 476 | } 477 | 478 | @VisibleForTesting 479 | ConfigDiff doRun(TDClient client, Schema schema, PluginTask task, OutputPlugin.Control control) 480 | { 481 | boolean doUpload = startBulkImportSession(client, task.getSessionName(), task.getDatabase(), task.getLoadTargetTableName()); 482 | task.setDoUpload(doUpload); 483 | final List taskReports = control.run(task.toTaskSource()); 484 | if (!isNoUploadedParts(taskReports)) { 485 | completeBulkImportSession(client, schema, task, 0); // TODO perform job priority 486 | } 487 | else { 488 | // if no parts, it skips submitting requests for perform and commit. 489 | log.info("Skip performing and committing bulk import session '{}' since no parts are uploaded.", task.getSessionName()); 490 | Map newColumns = updateSchema(client, schema, task); 491 | printNewAddedColumns(newColumns); 492 | } 493 | 494 | // commit 495 | switch (task.getMode()) { 496 | case APPEND: 497 | // already done 498 | break; 499 | case REPLACE: 500 | case TRUNCATE: 501 | // rename table 502 | renameTable(client, task.getDatabase(), task.getLoadTargetTableName(), task.getTable()); 503 | } 504 | 505 | final ConfigDiff configDiff = CONFIG_MAPPER_FACTORY.newConfigDiff(); 506 | configDiff.set("last_session", task.getSessionName()); 507 | return configDiff; 508 | } 509 | 510 | public void cleanup(TaskSource taskSource, 511 | Schema schema, int processorCount, 512 | List successTaskReports) 513 | { 514 | final PluginTask task = TASK_MAPPER.map(taskSource, PluginTask.class); 515 | try (TDClient client = newTDClient(task)) { 516 | String sessionName = task.getSessionName(); 517 | log.info("Deleting bulk import session '{}'", sessionName); 518 | client.deleteBulkImportSession(sessionName); 519 | } 520 | } 521 | 522 | private String makeTablePrefix(PluginTask task) 523 | { 524 | return task.getTable() + "_" + task.getSessionName(); 525 | } 526 | 527 | @VisibleForTesting 528 | void checkColumnOptions(Schema schema, Map columnOptions) 529 | { 530 | for (String columnName : columnOptions.keySet()) { 531 | schema.lookupColumn(columnName); // throws SchemaConfigException 532 | } 533 | } 534 | 535 | @VisibleForTesting 536 | public TDClient newTDClient(final PluginTask task) 537 | { 538 | TDClientBuilder builder = TDClient.newBuilder(); 539 | builder.setApiKey(task.getApiKey()); 540 | builder.setEndpoint(task.getEndpoint()); 541 | builder.setUseSSL(task.getUseSsl()); 542 | builder.setConnectTimeoutMillis(60000); // default 15000 543 | builder.setReadTimeoutMillis(60000); // default 60000 544 | builder.setRetryLimit(task.getRetryLimit()); 545 | builder.setRetryInitialIntervalMillis(task.getRetryInitialIntervalMillis()); 546 | builder.setRetryMaxIntervalMillis(task.getRetryMaxIntervalMillis()); 547 | 548 | if (task.getPort().isPresent()) { 549 | builder.setPort(task.getPort().get()); 550 | } 551 | 552 | if (task.getAdditionalHttpHeaders().isPresent()) { 553 | builder.setHeaders(buildMultiMapHeaders(task.getAdditionalHttpHeaders().get())); 554 | } 555 | 556 | Optional proxyConfig = newProxyConfig(task.getHttpProxy()); 557 | if (proxyConfig.isPresent()) { 558 | builder.setProxy(proxyConfig.get()); 559 | } 560 | 561 | return builder.build(); 562 | } 563 | 564 | private Multimap buildMultiMapHeaders(Map headers) 565 | { 566 | Multimap multimap = ArrayListMultimap.create(); 567 | for (Map.Entry entry : headers.entrySet()) { 568 | multimap.put(entry.getKey(), entry.getValue()); 569 | } 570 | return multimap; 571 | } 572 | 573 | @VisibleForTesting 574 | Optional newProxyConfig(Optional task) 575 | { 576 | // This plugin searches http proxy settings and configures them to TDClient. The order of proxy setting searching is: 577 | // 1. System properties 578 | // 2. http_proxy config option provided by this plugin 579 | 580 | Properties props = System.getProperties(); 581 | if (props.containsKey("http.proxyHost") || props.containsKey("https.proxyHost")) { 582 | boolean useSsl = props.containsKey("https.proxyHost"); 583 | String proto = !useSsl ? "http" : "https"; 584 | String host = props.getProperty(proto + ".proxyHost"); 585 | int port = parseInt(props.getProperty(proto + ".proxyPort", !useSsl ? "80" : "443")); 586 | final com.google.common.base.Optional user = 587 | com.google.common.base.Optional.fromNullable(props.getProperty(proto + ".proxyUser")); 588 | final com.google.common.base.Optional password = 589 | com.google.common.base.Optional.fromNullable(props.getProperty(proto + ".proxyPassword")); 590 | return Optional.of(new ProxyConfig(host, port, useSsl, user, password)); 591 | } 592 | else if (task.isPresent()) { 593 | HttpProxyTask proxyTask = task.get(); 594 | return Optional.of(new ProxyConfig(proxyTask.getHost(), proxyTask.getPort(), proxyTask.getUseSsl(), 595 | com.google.common.base.Optional.fromNullable(proxyTask.getUser().orElse(null)), 596 | com.google.common.base.Optional.fromNullable(proxyTask.getPassword().orElse(null)))); 597 | 598 | } 599 | else { 600 | return Optional.empty(); 601 | } 602 | } 603 | 604 | @VisibleForTesting 605 | void createTableIfNotExists(TDClient client, String databaseName, String tableName) 606 | { 607 | log.debug("Creating table \"{}\".\"{}\" if not exists", databaseName, tableName); 608 | try { 609 | client.createTable(databaseName, tableName); 610 | log.debug("Created table \"{}\".\"{}\"", databaseName, tableName); 611 | } 612 | catch (TDClientHttpNotFoundException e) { 613 | try { 614 | client.createDatabase(databaseName); 615 | log.debug("Created database \"{}\"", databaseName); 616 | } 617 | catch (TDClientHttpConflictException ex) { 618 | // ignorable error 619 | } 620 | try { 621 | client.createTable(databaseName, tableName); 622 | log.debug("Created table \"{}\".\"{}\"", databaseName, tableName); 623 | } 624 | catch (TDClientHttpConflictException exe) { 625 | // ignorable error 626 | } 627 | } 628 | catch (TDClientHttpConflictException e) { 629 | // ignorable error 630 | } 631 | } 632 | 633 | @VisibleForTesting 634 | String createTemporaryTableWithPrefix(TDClient client, String databaseName, String tablePrefix) 635 | throws TDClientHttpConflictException 636 | { 637 | String tableName = tablePrefix; 638 | while (true) { 639 | log.debug("Creating temporal table \"{}\".\"{}\"", databaseName, tableName); 640 | try { 641 | client.createTable(databaseName, tableName); 642 | log.debug("Created temporal table \"{}\".\"{}\"", databaseName, tableName); 643 | return tableName; 644 | } 645 | catch (TDClientHttpConflictException e) { 646 | log.debug("\"{}\".\"{}\" table already exists. Renaming temporal table.", databaseName, tableName); 647 | tableName += "_"; 648 | } 649 | } 650 | } 651 | 652 | @VisibleForTesting 653 | void validateTableExists(TDClient client, String databaseName, String tableName) 654 | { 655 | try { 656 | client.showTable(databaseName, tableName); 657 | } 658 | catch (TDClientHttpNotFoundException ex) { 659 | throw new ConfigException(String.format("Database \"%s\" or table \"%s\" doesn't exist", databaseName, tableName), ex); 660 | } 661 | } 662 | 663 | @VisibleForTesting 664 | String buildBulkImportSessionName(PluginTask task) 665 | { 666 | if (task.getSession().isPresent()) { 667 | return task.getSession().get(); 668 | } 669 | else { 670 | final Instant transactionTime = getTransactionTime(); 671 | final DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss") 672 | .withZone(ZoneOffset.UTC); 673 | return String.format("embulk_%s_%09d_%s", 674 | dateTimeFormatter.format(transactionTime), 675 | transactionTime.getNano(), UUID.randomUUID().toString().replace('-', '_')); 676 | } 677 | } 678 | 679 | // return false if all files are already uploaded 680 | @VisibleForTesting 681 | boolean startBulkImportSession(TDClient client, 682 | String sessionName, String databaseName, String tableName) 683 | { 684 | log.info("Create bulk_import session {}", sessionName); 685 | TDBulkImportSession session; 686 | try { 687 | client.createBulkImportSession(sessionName, databaseName, tableName); 688 | } 689 | catch (TDClientHttpConflictException ex) { 690 | // ignorable error 691 | } 692 | session = client.getBulkImportSession(sessionName); 693 | // TODO check associated databaseName and tableName 694 | 695 | switch (session.getStatus()) { 696 | case UPLOADING: 697 | if (session.isUploadFrozen()) { 698 | return false; 699 | } 700 | return true; 701 | case PERFORMING: 702 | return false; 703 | case READY: 704 | return false; 705 | case COMMITTING: 706 | return false; 707 | case COMMITTED: 708 | return false; 709 | case UNKNOWN: 710 | default: 711 | throw new RuntimeException("Unknown bulk import status"); 712 | } 713 | } 714 | 715 | @VisibleForTesting 716 | void completeBulkImportSession(TDClient client, Schema schema, PluginTask task, int priority) 717 | { 718 | String sessionName = task.getSessionName(); 719 | TDBulkImportSession session = client.getBulkImportSession(sessionName); 720 | 721 | switch (session.getStatus()) { 722 | case UPLOADING: 723 | if (!session.isUploadFrozen()) { 724 | // freeze 725 | try { 726 | client.freezeBulkImportSession(sessionName); 727 | } 728 | catch (TDClientHttpConflictException e) { 729 | // ignorable error 730 | } 731 | } 732 | // perform 733 | client.performBulkImportSession( 734 | sessionName, 735 | com.google.common.base.Optional.fromNullable(task.getPoolName().orElse(null))); // TODO use priority 736 | 737 | // pass 738 | case PERFORMING: 739 | log.info("Performing bulk import session '{}'", sessionName); 740 | session = waitForStatusChange(client, sessionName, 741 | ImportStatus.PERFORMING, ImportStatus.READY, 742 | "perform"); 743 | log.info(" job id: {}", session.getJobId()); 744 | 745 | // pass 746 | case READY: 747 | // TODO add an option to make the transaction failed if error_records or error_parts is too large 748 | 749 | // add Embulk's columns to the table schema 750 | Map newColumns = updateSchema(client, schema, task); 751 | log.info("Committing bulk import session '{}'", sessionName); 752 | log.info(" valid records: {}", session.getValidRecords()); 753 | log.info(" error records: {}", session.getErrorRecords()); 754 | log.info(" valid parts: {}", session.getValidParts()); 755 | log.info(" error parts: {}", session.getErrorParts()); 756 | printNewAddedColumns(newColumns); 757 | 758 | if (session.getErrorRecords() > 0L) { 759 | showBulkImportErrorRecords(client, sessionName, (int) Math.min(session.getErrorRecords(), task.getDisplayedErrorRecordsCountLimit())); 760 | } 761 | 762 | if (session.getErrorRecords() > 0 && task.getStopOnInvalidRecord()) { 763 | throw new DataException(String.format("Stop committing because the perform job skipped %d error records", session.getErrorRecords())); 764 | } 765 | 766 | // commit 767 | client.commitBulkImportSession(sessionName); 768 | 769 | // pass 770 | case COMMITTING: 771 | session = waitForStatusChange(client, sessionName, 772 | ImportStatus.COMMITTING, ImportStatus.COMMITTED, 773 | "commit"); 774 | 775 | // pass 776 | case COMMITTED: 777 | return; 778 | 779 | case UNKNOWN: 780 | throw new RuntimeException("Unknown bulk import status"); 781 | } 782 | } 783 | 784 | Map updateSchema(TDClient client, Schema inputSchema, PluginTask task) 785 | { 786 | String databaseName = task.getDatabase(); 787 | TDTable table = findTable(client, databaseName, task.getTable()); 788 | 789 | final Map guessedSchema = new LinkedHashMap<>(); 790 | inputSchema.visitColumns(new ColumnVisitor() { 791 | public void booleanColumn(Column column) 792 | { 793 | guessedSchema.put(column.getName(), task.getConvertBooleanType().getOutputColumnType()); 794 | } 795 | 796 | public void longColumn(Column column) 797 | { 798 | guessedSchema.put(column.getName(), TDColumnType.LONG); 799 | } 800 | 801 | public void doubleColumn(Column column) 802 | { 803 | guessedSchema.put(column.getName(), TDColumnType.DOUBLE); 804 | } 805 | 806 | public void stringColumn(Column column) 807 | { 808 | guessedSchema.put(column.getName(), TDColumnType.STRING); 809 | } 810 | 811 | public void timestampColumn(Column column) 812 | { 813 | guessedSchema.put(column.getName(), TDColumnType.STRING); 814 | } 815 | 816 | public void jsonColumn(Column column) 817 | { 818 | guessedSchema.put(column.getName(), TDColumnType.STRING); 819 | } 820 | }); 821 | 822 | Map usedNames = new HashMap<>(); 823 | if (task.getMode() != Mode.REPLACE) { 824 | for (TDColumn existent : table.getColumns()) { 825 | usedNames.put(existent.getName(), 1); 826 | guessedSchema.remove(existent.getKeyString()); // don't change type of existent columns 827 | } 828 | } 829 | guessedSchema.remove("time"); // don't change type of 'time' column 830 | // 'v' column is special column in TD's table, it is reserved column to be used in Hive 831 | // by executing `SELECT *` query, thus it must not be appended 832 | // otherwise 422 response code will be responded. 833 | guessedSchema.remove("v"); 834 | 835 | List newSchema; 836 | if (task.getMode() != Mode.REPLACE) { 837 | newSchema = new ArrayList<>(table.getColumns()); 838 | } 839 | else { 840 | newSchema = Lists.newArrayList(); 841 | } 842 | 843 | final Map appliedColumnOptionSchema = applyColumnOptions(guessedSchema, task.getColumnOptions()); 844 | for (Map.Entry pair : appliedColumnOptionSchema.entrySet()) { 845 | String key = renameColumn(pair.getKey()); 846 | 847 | if (!usedNames.containsKey(key)) { 848 | usedNames.put(key, 1); 849 | } 850 | else { 851 | int next = usedNames.get(key); 852 | key = key + "_" + next; 853 | usedNames.put(key, next + 1); 854 | } 855 | 856 | newSchema.add(new TDColumn(key, pair.getValue(), pair.getKey().getBytes(StandardCharsets.UTF_8))); 857 | } 858 | 859 | client.appendTableSchema(databaseName, task.getLoadTargetTableName(), newSchema); 860 | return appliedColumnOptionSchema; 861 | } 862 | 863 | void printNewAddedColumns(Map newColumns) 864 | { 865 | if (!newColumns.isEmpty()) { 866 | log.info(" new columns:"); 867 | } 868 | for (Map.Entry pair : newColumns.entrySet()) { 869 | log.info(" - {}: {}", pair.getKey(), pair.getValue()); 870 | } 871 | } 872 | 873 | private static TDTable findTable(TDClient client, String databaseName, String tableName) 874 | { 875 | try { 876 | return client.showTable(databaseName, tableName); 877 | } 878 | catch (TDClientHttpNotFoundException e) { 879 | return null; 880 | } 881 | } 882 | 883 | public static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ConfigMapperFactory.builder().addDefaultModules().build(); 884 | public static final ConfigMapper CONFIG_MAPPER = CONFIG_MAPPER_FACTORY.createConfigMapper(); 885 | static final TaskMapper TASK_MAPPER = CONFIG_MAPPER_FACTORY.createTaskMapper(); 886 | 887 | private static final Pattern COLUMN_NAME_PATTERN = Pattern.compile("\\A[a-z_][a-z0-9_]*\\z"); 888 | private static final Pattern COLUMN_NAME_SQUASH_PATTERN = Pattern.compile("(?:[^a-zA-Z0-9_]|(?:\\A[^a-zA-Z_]))+"); 889 | 890 | private static String renameColumn(String origName) 891 | { 892 | if (COLUMN_NAME_PATTERN.matcher(origName).matches()) { 893 | return origName; 894 | } 895 | return COLUMN_NAME_SQUASH_PATTERN.matcher(origName).replaceAll("_").toLowerCase(); 896 | } 897 | 898 | void showBulkImportErrorRecords(TDClient client, String sessionName, final int recordCountLimit) 899 | { 900 | log.info("Show {} error records", recordCountLimit); 901 | client.getBulkImportErrorRecords(sessionName, new Function() 902 | { 903 | @Override 904 | public Void apply(InputStream input) 905 | { 906 | int errorRecordCount = 0; 907 | try (MessageUnpacker unpacker = MessagePack.newDefaultUnpacker(new GZIPInputStream(input))) { 908 | while (unpacker.hasNext()) { 909 | Value v = unpacker.unpackValue(); 910 | log.info(" {}", v.toJson()); 911 | errorRecordCount += 1; 912 | 913 | if (errorRecordCount >= recordCountLimit) { 914 | break; 915 | } 916 | } 917 | } 918 | catch (IOException ignored) { 919 | log.info("Stop downloading error records"); 920 | } 921 | return null; 922 | } 923 | }); 924 | } 925 | 926 | private Map applyColumnOptions(Map schema, Map columnOptions) 927 | { 928 | return Maps.asMap(schema.keySet(), key -> { 929 | if (columnOptions.containsKey(key)) { 930 | Optional columnType = columnOptions.get(key).getType(); 931 | if (columnType.isPresent()) { 932 | return TDColumnType.parseColumnType(columnType.get()); 933 | } 934 | } 935 | 936 | return schema.get(key); 937 | }); 938 | } 939 | 940 | @VisibleForTesting 941 | TDBulkImportSession waitForStatusChange(TDClient client, String sessionName, 942 | ImportStatus current, ImportStatus expecting, String operation) 943 | { 944 | TDBulkImportSession importSession; 945 | while (true) { 946 | importSession = client.getBulkImportSession(sessionName); 947 | 948 | if (importSession.getStatus() == expecting) { 949 | return importSession; 950 | 951 | } 952 | else if (importSession.getStatus() == current) { 953 | // in progress 954 | 955 | } 956 | else { 957 | throw new RuntimeException(String.format("Failed to %s bulk import session '%s'", 958 | operation, sessionName)); 959 | } 960 | 961 | try { 962 | Thread.sleep(3000); 963 | } 964 | catch (InterruptedException e) { 965 | } 966 | } 967 | } 968 | 969 | boolean isNoUploadedParts(List taskReports) 970 | { 971 | int partNumber = 0; 972 | for (TaskReport taskReport : taskReports) { 973 | if (!taskReport.has(TASK_REPORT_UPLOADED_PART_NUMBER)) { 974 | return false; 975 | } 976 | partNumber += taskReport.get(int.class, TASK_REPORT_UPLOADED_PART_NUMBER); 977 | } 978 | return partNumber == 0; 979 | } 980 | 981 | @VisibleForTesting 982 | void renameTable(TDClient client, String databaseName, String oldName, String newName) 983 | { 984 | log.debug("Renaming table \"{}\".\"{}\" to \"{}\"", databaseName, oldName, newName); 985 | client.renameTable(databaseName, oldName, newName, true); 986 | } 987 | 988 | @Override 989 | public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex) 990 | { 991 | final PluginTask task = TASK_MAPPER.map(taskSource, PluginTask.class); 992 | 993 | RecordWriter closeLater = null; 994 | try { 995 | final FieldWriterSet fieldWriters = createFieldWriterSet(task, schema); 996 | closeLater = new RecordWriter(task, taskIndex, newTDClient(task), fieldWriters); 997 | RecordWriter recordWriter = closeLater; 998 | recordWriter.open(schema); 999 | closeLater = null; 1000 | return recordWriter; 1001 | 1002 | } 1003 | catch (IOException e) { 1004 | throw new RuntimeException(e); 1005 | } 1006 | finally { 1007 | if (closeLater != null) { 1008 | closeLater.close(); 1009 | } 1010 | } 1011 | } 1012 | 1013 | @VisibleForTesting 1014 | String getEnvironmentTempDirectory() 1015 | { 1016 | return System.getProperty("java.io.tmpdir"); 1017 | } 1018 | 1019 | protected FieldWriterSet createFieldWriterSet(PluginTask task, Schema schema) 1020 | { 1021 | return FieldWriterSet.createWithValidation(task, schema, true); 1022 | } 1023 | 1024 | protected void validateFieldWriterSet(PluginTask task, Schema schema) 1025 | { 1026 | FieldWriterSet.createWithValidation(task, schema, false); 1027 | } 1028 | 1029 | @SuppressWarnings("deprecation") 1030 | private static Instant getTransactionTime() 1031 | { 1032 | if (HAS_EXEC_GET_TRANSACTION_TIME_INSTANT) { 1033 | return Exec.getTransactionTimeInstant(); 1034 | } 1035 | return Exec.getTransactionTime().getInstant(); 1036 | } 1037 | 1038 | private static boolean hasExecGetTransactionTimeInstant() 1039 | { 1040 | try { 1041 | Exec.class.getMethod("getTransactionTimeInstant"); 1042 | } 1043 | catch (final NoSuchMethodException ex) { 1044 | return false; 1045 | } 1046 | return true; 1047 | } 1048 | 1049 | private static final boolean HAS_EXEC_GET_TRANSACTION_TIME_INSTANT = hasExecGetTransactionTimeInstant(); 1050 | } 1051 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/TimeValueConfig.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td; 2 | 3 | import java.util.Optional; 4 | import org.embulk.util.config.Config; 5 | import org.embulk.util.config.ConfigDefault; 6 | import org.embulk.util.config.Task; 7 | 8 | public interface TimeValueConfig 9 | extends Task 10 | { 11 | @Config("mode") 12 | @ConfigDefault("\"incremental_time\"") 13 | String getMode(); 14 | 15 | @Config("value") 16 | @ConfigDefault("null") 17 | Optional getValue(); 18 | 19 | @Config("from") 20 | @ConfigDefault("null") 21 | Optional getFrom(); 22 | 23 | @Config("to") 24 | @ConfigDefault("null") 25 | Optional getTo(); 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/TimeValueGenerator.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td; 2 | 3 | import java.util.Optional; 4 | import org.embulk.config.ConfigException; 5 | 6 | public abstract class TimeValueGenerator 7 | { 8 | public abstract long next(); 9 | 10 | public static TimeValueGenerator newGenerator(final TimeValueConfig config) 11 | { 12 | switch (config.getMode()) { 13 | case "incremental_time": // default mode 14 | require(config.getFrom(), "'from', 'to'"); 15 | validateTimeRange(config.getFrom().get(), "'from'"); 16 | require(config.getTo(), "'to'"); 17 | validateTimeRange(config.getTo().get(), "'to'"); 18 | reject(config.getValue(), "'value'"); 19 | 20 | return new IncrementalTimeValueGenerator(config); 21 | 22 | case "fixed_time": 23 | require(config.getValue(), "'value'"); 24 | validateTimeRange(config.getValue().get(), "'value'"); 25 | reject(config.getFrom(), "'from'"); 26 | reject(config.getTo(), "'to'"); 27 | 28 | return new FixedTimeValueGenerator(config); 29 | 30 | default: 31 | throw new ConfigException(String.format("Unknwon mode '%s'. Supported methods are incremental_time, fixed_time.", config.getMode())); 32 | } 33 | } 34 | 35 | public static class IncrementalTimeValueGenerator 36 | extends TimeValueGenerator 37 | { 38 | private final long from; 39 | private final long to; 40 | 41 | private long current; 42 | 43 | public IncrementalTimeValueGenerator(final TimeValueConfig config) 44 | { 45 | this.from = config.getFrom().get(); 46 | this.to = config.getTo().get(); 47 | this.current = from; 48 | } 49 | 50 | @Override 51 | public long next() 52 | { 53 | try { 54 | return current++; 55 | } 56 | finally { 57 | if (current > to) { 58 | current = from; 59 | } 60 | } 61 | } 62 | } 63 | 64 | public static class FixedTimeValueGenerator 65 | extends TimeValueGenerator 66 | { 67 | private final long value; 68 | 69 | public FixedTimeValueGenerator(final TimeValueConfig config) 70 | { 71 | value = config.getValue().get(); 72 | } 73 | 74 | @Override 75 | public long next() 76 | { 77 | return value; 78 | } 79 | } 80 | 81 | // ported from embulk-input-s3 82 | private static T require(Optional value, String message) 83 | { 84 | if (value.isPresent()) { 85 | return value.get(); 86 | } 87 | else { 88 | throw new ConfigException("Required option is not set: " + message); 89 | } 90 | } 91 | 92 | private static void validateTimeRange(long value, String message) 93 | { 94 | if (value < 0 || 253402300799L < value) { // should be [1970-01-01 00:00:00, 9999-12-31 23:59:59] 95 | throw new ConfigException("The option value must be within [0, 253402300799L]: " + message); 96 | } 97 | } 98 | 99 | // ported from embulk-input-s3 100 | private static void reject(Optional value, String message) 101 | { 102 | if (value.isPresent()) { 103 | throw new ConfigException("Invalid option is set: " + message); 104 | } 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/writer/ArrayFieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.PageReader; 6 | 7 | import java.io.IOException; 8 | 9 | public class ArrayFieldWriter 10 | extends JsonFieldWriter 11 | { 12 | public ArrayFieldWriter(String keyName) 13 | { 14 | super(keyName); 15 | } 16 | 17 | @Override 18 | public void writeJsonValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 19 | throws IOException 20 | { 21 | builder.writeValue(reader.getJson(column)); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/writer/BooleanFieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.DataException; 6 | import org.embulk.spi.PageReader; 7 | 8 | import java.io.IOException; 9 | 10 | public class BooleanFieldWriter 11 | extends FieldWriter 12 | { 13 | public BooleanFieldWriter(String keyName) 14 | { 15 | super(keyName); 16 | } 17 | 18 | @Override 19 | protected void writeBooleanValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 20 | { 21 | builder.writeBoolean(reader.getBoolean(column)); 22 | } 23 | 24 | @Override 25 | protected void writeLongValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 26 | { 27 | builder.writeBoolean(reader.getLong(column) != 0); 28 | } 29 | 30 | @Override 31 | protected void writeDoubleValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 32 | { 33 | builder.writeBoolean(Double.valueOf(reader.getDouble(column)).longValue() != 0); 34 | } 35 | 36 | @Override 37 | protected void writeStringValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 38 | { 39 | builder.writeBoolean(reader.getString(column).length() > 0); 40 | } 41 | 42 | @Override 43 | protected void writeTimestampValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 44 | { 45 | throw new DataException("It is not able to convert from timestamp to boolean."); 46 | } 47 | 48 | @Override 49 | protected void writeJsonValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 50 | { 51 | throw new DataException("It is not able to convert from json to boolean."); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/writer/DoubleFieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.DataException; 6 | import org.embulk.spi.PageReader; 7 | 8 | import java.io.IOException; 9 | 10 | public class DoubleFieldWriter 11 | extends FieldWriter 12 | { 13 | public DoubleFieldWriter(String keyName) 14 | { 15 | super(keyName); 16 | } 17 | 18 | @Override 19 | protected void writeBooleanValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 20 | { 21 | builder.writeDouble(reader.getBoolean(column) ? 1.0 : 0.0); 22 | } 23 | 24 | @Override 25 | protected void writeLongValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 26 | { 27 | builder.writeDouble(reader.getLong(column)); 28 | } 29 | 30 | @Override 31 | protected void writeDoubleValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 32 | { 33 | builder.writeDouble(reader.getDouble(column)); 34 | } 35 | 36 | @Override 37 | protected void writeStringValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 38 | { 39 | builder.writeDouble(Double.valueOf(reader.getString(column))); 40 | } 41 | 42 | @Override 43 | protected void writeTimestampValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 44 | { 45 | throw new DataException("It is not able to convert from timestamp to double."); 46 | } 47 | 48 | @Override 49 | protected void writeJsonValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 50 | { 51 | throw new DataException("It is not able to convert from json to double."); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/writer/FieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.PageReader; 6 | import org.embulk.spi.type.BooleanType; 7 | import org.embulk.spi.type.DoubleType; 8 | import org.embulk.spi.type.JsonType; 9 | import org.embulk.spi.type.LongType; 10 | import org.embulk.spi.type.StringType; 11 | import org.embulk.spi.type.TimestampType; 12 | 13 | import java.io.IOException; 14 | import java.time.Instant; 15 | 16 | public abstract class FieldWriter 17 | implements IFieldWriter 18 | { 19 | private final String keyName; 20 | 21 | protected FieldWriter(String keyName) 22 | { 23 | this.keyName = keyName; 24 | } 25 | 26 | public void writeKeyValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 27 | throws IOException 28 | { 29 | writeKey(builder); 30 | if (reader.isNull(column)) { 31 | builder.writeNil(); 32 | return; 33 | } 34 | 35 | if (column.getType() instanceof BooleanType) { 36 | writeBooleanValue(builder, reader, column); 37 | } 38 | else if (column.getType() instanceof LongType) { 39 | writeLongValue(builder, reader, column); 40 | } 41 | else if (column.getType() instanceof DoubleType) { 42 | writeDoubleValue(builder, reader, column); 43 | } 44 | else if (column.getType() instanceof StringType) { 45 | writeStringValue(builder, reader, column); 46 | } 47 | else if (column.getType() instanceof TimestampType) { 48 | writeTimestampValue(builder, reader, column); 49 | } 50 | else if (column.getType() instanceof JsonType){ 51 | writeJsonValue(builder, reader, column); 52 | } 53 | else { 54 | // this state should not be reached because all supported types have been handled above. 55 | throw new IllegalArgumentException(String.format("Column: %s contains unsupported type: %s", 56 | column.getName(), column.getType().getName())); 57 | } 58 | } 59 | 60 | private void writeKey(MsgpackGZFileBuilder builder) 61 | throws IOException 62 | { 63 | builder.writeString(keyName); 64 | } 65 | 66 | protected abstract void writeBooleanValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 67 | throws IOException; 68 | 69 | protected abstract void writeLongValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 70 | throws IOException; 71 | 72 | protected abstract void writeDoubleValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 73 | throws IOException; 74 | 75 | protected abstract void writeStringValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 76 | throws IOException; 77 | 78 | protected abstract void writeTimestampValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 79 | throws IOException; 80 | 81 | protected abstract void writeJsonValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 82 | throws IOException; 83 | 84 | @SuppressWarnings("deprecation") // org.embulk.spi.time.Timestamp 85 | Instant getTimestamp(final PageReader reader, final Column column) 86 | { 87 | if (HAS_GET_TIMESTAMP_INSTANT) { 88 | return reader.getTimestampInstant(column); 89 | } 90 | return reader.getTimestamp(column).getInstant(); 91 | } 92 | 93 | private static boolean hasGetTimestampInstant() 94 | { 95 | try { 96 | PageReader.class.getMethod("getTimestampInstant", Column.class); 97 | } 98 | catch (final NoSuchMethodException ex) { 99 | return false; 100 | } 101 | return true; 102 | } 103 | 104 | private static final boolean HAS_GET_TIMESTAMP_INSTANT = hasGetTimestampInstant(); 105 | } 106 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/writer/FieldWriterSet.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import java.io.IOException; 4 | import java.util.Optional; 5 | import java.util.Map; 6 | import org.embulk.config.ConfigException; 7 | import org.embulk.config.ConfigSource; 8 | import org.embulk.output.td.TdOutputPlugin; 9 | import org.embulk.output.td.TdOutputPlugin.ConvertTimestampType; 10 | import org.embulk.output.td.TimeValueConfig; 11 | import org.embulk.output.td.TimeValueGenerator; 12 | import org.embulk.spi.Column; 13 | import org.embulk.spi.ColumnVisitor; 14 | import org.embulk.spi.DataException; 15 | import org.embulk.spi.PageReader; 16 | import org.embulk.spi.Schema; 17 | import org.embulk.spi.type.BooleanType; 18 | import org.embulk.spi.type.DoubleType; 19 | import org.embulk.spi.type.JsonType; 20 | import org.embulk.spi.type.LongType; 21 | import org.embulk.spi.type.StringType; 22 | import org.embulk.spi.type.TimestampType; 23 | import org.embulk.spi.type.Type; 24 | import org.embulk.spi.type.Types; 25 | import org.embulk.output.td.MsgpackGZFileBuilder; 26 | import org.embulk.util.timestamp.TimestampFormatter; 27 | import org.slf4j.Logger; 28 | import org.slf4j.LoggerFactory; 29 | 30 | public class FieldWriterSet 31 | { 32 | private enum ColumnWriterMode 33 | { 34 | PRIMARY_KEY, 35 | SIMPLE_VALUE, 36 | ADVANCED_VALUE, 37 | DUPLICATE_PRIMARY_KEY; 38 | } 39 | 40 | private static final Logger log = LoggerFactory.getLogger(FieldWriterSet.class); 41 | 42 | private final int fieldCount; 43 | private final IFieldWriter[] fieldWriters; 44 | private final Optional staticTimeValue; 45 | 46 | protected FieldWriterSet(int fieldCount, IFieldWriter[] fieldWriters, Optional staticTimeValue) 47 | { 48 | this.fieldCount = fieldCount; 49 | this.fieldWriters = fieldWriters; 50 | this.staticTimeValue = staticTimeValue; 51 | } 52 | 53 | public static FieldWriterSet createWithValidation(final TdOutputPlugin.PluginTask task, final Schema schema, final boolean runStage) 54 | { 55 | boolean isIgnoreAlternativeTime = task.getIgnoreAlternativeTimeIfTimeExists() 56 | && schema.getColumns().stream().anyMatch(column -> "time".equals(column.getName())); 57 | final Optional userDefinedPrimaryKeySourceColumnName = isIgnoreAlternativeTime ? Optional.empty() : task.getTimeColumn(); 58 | ConvertTimestampType convertTimestampType = task.getConvertTimestampType(); 59 | final Optional timeValueConfig = isIgnoreAlternativeTime ? Optional.empty() : task.getTimeValue(); 60 | 61 | if (timeValueConfig.isPresent() && userDefinedPrimaryKeySourceColumnName.isPresent()) { 62 | throw new ConfigException("Setting both time_column and time_value is invalid"); 63 | } 64 | 65 | boolean foundPrimaryKey = false; 66 | int duplicatePrimaryKeySourceIndex = -1; 67 | 68 | int fc = 0; 69 | IFieldWriter[] createdFieldWriters = new IFieldWriter[schema.size()]; 70 | final TimestampFormatter[] timestampFormatters = newTimestampColumnFormatters(task, schema, task.getColumnOptions()); 71 | 72 | for (int i = 0; i < schema.size(); i++) { 73 | String columnName = schema.getColumnName(i); 74 | Type columnType = schema.getColumnType(i); 75 | 76 | // choose the mode 77 | final ColumnWriterMode mode; 78 | 79 | if (userDefinedPrimaryKeySourceColumnName.isPresent() && 80 | columnName.equals(userDefinedPrimaryKeySourceColumnName.get())) { 81 | // found time_column 82 | if ("time".equals(userDefinedPrimaryKeySourceColumnName.get())) { 83 | mode = ColumnWriterMode.PRIMARY_KEY; 84 | } 85 | else { 86 | mode = ColumnWriterMode.DUPLICATE_PRIMARY_KEY; 87 | } 88 | } 89 | else if ("time".equals(columnName)) { 90 | // the column name is same with the primary key name. 91 | if (userDefinedPrimaryKeySourceColumnName.isPresent()) { 92 | columnName = newColumnUniqueName(columnName, schema); 93 | mode = ColumnWriterMode.SIMPLE_VALUE; 94 | if (!runStage) { 95 | log.warn("time_column '{}' is set but 'time' column also exists. The existent 'time' column is renamed to {}", 96 | userDefinedPrimaryKeySourceColumnName.get(), columnName); 97 | } 98 | } 99 | else if (timeValueConfig.isPresent()) { 100 | columnName = newColumnUniqueName(columnName, schema); 101 | mode = ColumnWriterMode.SIMPLE_VALUE; 102 | if (!runStage) { 103 | log.warn("time_value is set but 'time' column also exists. The existent 'time' column is renamed to {}", 104 | columnName); 105 | } 106 | } 107 | else { 108 | mode = ColumnWriterMode.PRIMARY_KEY; 109 | } 110 | } 111 | else if (task.getColumnOptions().containsKey(columnName) && task.getColumnOptions().get(columnName).getValueType().isPresent()) { 112 | mode = ColumnWriterMode.ADVANCED_VALUE; 113 | } 114 | else { 115 | mode = ColumnWriterMode.SIMPLE_VALUE; 116 | } 117 | 118 | // create the fieldWriters writer depending on the mode 119 | final FieldWriter writer; 120 | 121 | switch (mode) { 122 | case PRIMARY_KEY: 123 | if (!runStage) { 124 | log.info("Using {}:{} column as the data partitioning key", columnName, columnType); 125 | } 126 | if (columnType instanceof LongType) { 127 | if (task.getUnixTimestampUnit() != TdOutputPlugin.UnixTimestampUnit.SEC) { 128 | if (!runStage) { 129 | log.warn("time column is converted from {} to seconds", task.getUnixTimestampUnit()); 130 | } 131 | } 132 | writer = new UnixTimestampLongFieldWriter(columnName, task.getUnixTimestampUnit().getFractionUnit()); 133 | foundPrimaryKey = true; 134 | } 135 | else if (columnType instanceof TimestampType) { 136 | writer = new LongFieldWriter(columnName); 137 | foundPrimaryKey = true; 138 | } 139 | else { 140 | throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s", 141 | columnName, columnType)); 142 | } 143 | break; 144 | 145 | case SIMPLE_VALUE: 146 | writer = newSimpleFieldWriter(columnName, columnType, convertTimestampType, timestampFormatters[i]); 147 | break; 148 | 149 | case ADVANCED_VALUE: 150 | writer = newAdvancedFieldWriter(columnName, task.getColumnOptions().get(columnName).getValueType().get(), convertTimestampType, timestampFormatters[i]); 151 | break; 152 | 153 | case DUPLICATE_PRIMARY_KEY: 154 | duplicatePrimaryKeySourceIndex = i; 155 | writer = null; // handle later 156 | break; 157 | 158 | default: 159 | throw new AssertionError(); 160 | } 161 | 162 | createdFieldWriters[i] = writer; 163 | fc += 1; 164 | } 165 | 166 | if (foundPrimaryKey) { 167 | // appropriate 'time' column is found 168 | return new FieldWriterSet(fc, createdFieldWriters, Optional.empty()); 169 | } 170 | 171 | if (timeValueConfig.isPresent()) { 172 | // 'time_value' option is specified 173 | return new FieldWriterSet(fc + 1, createdFieldWriters, Optional.of(TimeValueGenerator.newGenerator(timeValueConfig.get()))); 174 | } 175 | 176 | if (!foundPrimaryKey && duplicatePrimaryKeySourceIndex >= 0) { 177 | // 'time_column' option is correctly specified 178 | 179 | String columnName = schema.getColumnName(duplicatePrimaryKeySourceIndex); 180 | Type columnType = schema.getColumnType(duplicatePrimaryKeySourceIndex); 181 | 182 | IFieldWriter writer; 183 | if (columnType instanceof LongType) { 184 | if (!runStage) { 185 | log.info("Duplicating {}:{} column (unix timestamp {}) to 'time' column as seconds for the data partitioning", 186 | columnName, columnType, task.getUnixTimestampUnit()); 187 | } 188 | IFieldWriter fw = new LongFieldWriter(columnName); 189 | writer = new UnixTimestampFieldDuplicator(fw, "time", task.getUnixTimestampUnit().getFractionUnit()); 190 | } 191 | else if (columnType instanceof TimestampType) { 192 | if (!runStage) { 193 | log.info("Duplicating {}:{} column to 'time' column as seconds for the data partitioning", 194 | columnName, columnType); 195 | } 196 | IFieldWriter fw = newSimpleTimestampFieldWriter(columnName, columnType, convertTimestampType, timestampFormatters[duplicatePrimaryKeySourceIndex]); 197 | writer = new TimestampFieldLongDuplicator(fw, "time"); 198 | } 199 | else { 200 | throw new ConfigException(String.format("Type of '%s' column must be long or timestamp but got %s", 201 | columnName, columnType)); 202 | } 203 | 204 | // replace existint writer 205 | createdFieldWriters[duplicatePrimaryKeySourceIndex] = writer; 206 | return new FieldWriterSet(fc + 1, createdFieldWriters, Optional.empty()); 207 | } 208 | 209 | if (!foundPrimaryKey) { 210 | // primary key is not found yet 211 | 212 | if (userDefinedPrimaryKeySourceColumnName.isPresent()) { 213 | throw new ConfigException(String.format("A specified time_column '%s' does not exist", userDefinedPrimaryKeySourceColumnName.get())); 214 | } 215 | 216 | long uploadTime = System.currentTimeMillis() / 1000; 217 | if (!runStage) { 218 | log.info("'time' column is generated and is set to a unix time {}", uploadTime); 219 | } 220 | final ConfigSource newConfigSource = 221 | TdOutputPlugin.CONFIG_MAPPER_FACTORY.newConfigSource().set("mode", "fixed_time").set("value", uploadTime); 222 | final TimeValueConfig newConfig = 223 | TdOutputPlugin.CONFIG_MAPPER.map(newConfigSource, TimeValueConfig.class); 224 | task.setTimeValue(Optional.of(newConfig)); 225 | return new FieldWriterSet(fc + 1, createdFieldWriters, Optional.of(TimeValueGenerator.newGenerator(newConfig))); 226 | } 227 | 228 | throw new AssertionError("Cannot select primary key"); 229 | } 230 | 231 | private static String newColumnUniqueName(String originalName, Schema schema) 232 | { 233 | String name = originalName; 234 | do { 235 | name += "_"; 236 | } 237 | while (containsColumnName(schema, name)); 238 | return name; 239 | } 240 | 241 | private static boolean containsColumnName(Schema schema, String name) 242 | { 243 | for (Column c : schema.getColumns()) { 244 | if (c.getName().equals(name)) { 245 | return true; 246 | } 247 | } 248 | return false; 249 | } 250 | 251 | protected static FieldWriter newSimpleFieldWriter(String columnName, Type columnType, ConvertTimestampType convertTimestampType, TimestampFormatter timestampFormatter) 252 | { 253 | if (columnType instanceof BooleanType) { 254 | return new BooleanFieldWriter(columnName); 255 | } 256 | else if (columnType instanceof LongType) { 257 | return new LongFieldWriter(columnName); 258 | } 259 | else if (columnType instanceof DoubleType) { 260 | return new DoubleFieldWriter(columnName); 261 | } 262 | else if (columnType instanceof StringType) { 263 | return new StringFieldWriter(columnName, timestampFormatter); 264 | } 265 | else if (columnType instanceof TimestampType) { 266 | return newSimpleTimestampFieldWriter(columnName, columnType, convertTimestampType, timestampFormatter); 267 | } 268 | else if (columnType instanceof JsonType) { 269 | return new JsonFieldWriter(columnName); 270 | } 271 | else { 272 | throw new ConfigException("Unsupported type: " + columnType); 273 | } 274 | } 275 | 276 | protected static FieldWriter newAdvancedFieldWriter(String columnName, String valueType, ConvertTimestampType convertTimestampType, TimestampFormatter timestampFormatter) 277 | { 278 | switch (valueType) { 279 | case "string": 280 | return new StringFieldWriter(columnName, timestampFormatter); 281 | case "long": 282 | return new LongFieldWriter(columnName); 283 | case "boolean": 284 | return new BooleanFieldWriter(columnName); 285 | case "double": 286 | return new DoubleFieldWriter(columnName); 287 | case "timestamp": 288 | return newSimpleTimestampFieldWriter(columnName, Types.TIMESTAMP, convertTimestampType, timestampFormatter); 289 | case "array": 290 | return new ArrayFieldWriter(columnName); 291 | case "map": 292 | return new MapFieldWriter(columnName); 293 | 294 | default: 295 | throw new DataException("Unsupported value: " + valueType); 296 | } 297 | } 298 | 299 | protected static FieldWriter newSimpleTimestampFieldWriter(String columnName, Type columnType, ConvertTimestampType convertTimestampType, TimestampFormatter timestampFormatter) 300 | { 301 | switch (convertTimestampType) { 302 | case STRING: 303 | return new StringFieldWriter(columnName, timestampFormatter); 304 | 305 | case SEC: 306 | return new LongFieldWriter(columnName); 307 | 308 | default: 309 | // Thread of control doesn't come here but, just in case, it throws ConfigException. 310 | throw new ConfigException(String.format("Unknown option {} as convert_timestamp_type", convertTimestampType)); 311 | } 312 | } 313 | 314 | public IFieldWriter getFieldWriter(int index) 315 | { 316 | return fieldWriters[index]; 317 | } 318 | 319 | public void addRecord(final MsgpackGZFileBuilder builder, final PageReader reader) 320 | throws IOException 321 | { 322 | beginRecord(builder); 323 | 324 | reader.getSchema().visitColumns(new ColumnVisitor() { 325 | @Override 326 | public void booleanColumn(Column column) 327 | { 328 | addColumn(builder, reader, column); 329 | } 330 | 331 | @Override 332 | public void longColumn(Column column) 333 | { 334 | addColumn(builder, reader, column); 335 | } 336 | 337 | @Override 338 | public void doubleColumn(Column column) 339 | { 340 | addColumn(builder, reader, column); 341 | } 342 | 343 | @Override 344 | public void stringColumn(Column column) 345 | { 346 | addColumn(builder, reader, column); 347 | } 348 | 349 | @Override 350 | public void timestampColumn(Column column) 351 | { 352 | addColumn(builder, reader, column); 353 | } 354 | 355 | @Override 356 | public void jsonColumn(Column column) 357 | { 358 | addColumn(builder, reader, column); 359 | } 360 | }); 361 | 362 | endRecord(builder); 363 | } 364 | 365 | private void beginRecord(MsgpackGZFileBuilder builder) 366 | throws IOException 367 | { 368 | builder.writeMapBegin(fieldCount); 369 | if (staticTimeValue.isPresent()) { 370 | builder.writeString("time"); 371 | builder.writeLong(staticTimeValue.get().next()); 372 | } 373 | } 374 | 375 | private void endRecord(MsgpackGZFileBuilder builder) 376 | throws IOException 377 | { 378 | builder.writeMapEnd(); 379 | } 380 | 381 | private void addColumn(MsgpackGZFileBuilder builder, PageReader reader, Column column) 382 | { 383 | try { 384 | fieldWriters[column.getIndex()].writeKeyValue(builder, reader, column); 385 | } 386 | catch (IOException e) { 387 | throw new RuntimeException(e); 388 | } 389 | } 390 | 391 | private static TimestampFormatter[] newTimestampColumnFormatters( 392 | final TdOutputPlugin.PluginTask task, final Schema schema, final Map columnOptions) 393 | { 394 | final TimestampFormatter[] formatters = new TimestampFormatter[schema.getColumnCount()]; 395 | int i = 0; 396 | for (final Column column : schema.getColumns()) { 397 | if (column.getType() instanceof TimestampType) { 398 | final Optional columnOption = Optional.ofNullable(columnOptions.get(column.getName())); 399 | 400 | final String pattern; 401 | if (columnOption.isPresent()) { 402 | pattern = columnOption.get().getFormat().orElse(task.getDefaultTimestampFormat()); 403 | } 404 | else { 405 | pattern = task.getDefaultTimestampFormat(); 406 | } 407 | 408 | final String zoneIdString; 409 | if (columnOption.isPresent()) { 410 | zoneIdString = columnOption.get().getTimeZoneId().orElse(task.getDefaultTimeZoneId()); 411 | } 412 | else { 413 | zoneIdString = task.getDefaultTimeZoneId(); 414 | } 415 | formatters[i] = TimestampFormatter.builder(pattern, true).setDefaultZoneFromString(zoneIdString).build(); 416 | } 417 | i++; 418 | } 419 | return formatters; 420 | } 421 | } 422 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/writer/IFieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.PageReader; 6 | 7 | import java.io.IOException; 8 | 9 | public interface IFieldWriter 10 | { 11 | void writeKeyValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 12 | throws IOException; 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/writer/JsonFieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.DataException; 6 | import org.embulk.spi.PageReader; 7 | 8 | import java.io.IOException; 9 | 10 | public class JsonFieldWriter 11 | extends FieldWriter 12 | { 13 | public JsonFieldWriter(String keyName) 14 | { 15 | super(keyName); 16 | } 17 | 18 | @Override 19 | protected void writeBooleanValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 20 | { 21 | throw new DataException("It is not able to convert from boolean to json."); 22 | } 23 | 24 | @Override 25 | protected void writeLongValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 26 | { 27 | throw new DataException("It is not able to convert from long to json."); 28 | } 29 | 30 | @Override 31 | protected void writeDoubleValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 32 | { 33 | throw new DataException("It is not able to convert from double to json."); 34 | } 35 | 36 | @Override 37 | protected void writeStringValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 38 | { 39 | throw new DataException("It is not able to convert from string to json."); 40 | } 41 | 42 | @Override 43 | protected void writeTimestampValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 44 | { 45 | throw new DataException("It is not able to convert from timestamp to json."); 46 | } 47 | 48 | @Override 49 | protected void writeJsonValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 50 | { 51 | builder.writeString(reader.getJson(column).toJson()); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/writer/LongFieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.DataException; 6 | import org.embulk.spi.PageReader; 7 | 8 | import java.io.IOException; 9 | 10 | public class LongFieldWriter 11 | extends FieldWriter 12 | { 13 | public LongFieldWriter(String keyName) 14 | { 15 | super(keyName); 16 | } 17 | 18 | @Override 19 | protected void writeBooleanValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 20 | { 21 | builder.writeLong(reader.getBoolean(column) ? 1 : 0); 22 | } 23 | 24 | @Override 25 | protected void writeLongValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 26 | { 27 | builder.writeLong(reader.getLong(column)); 28 | } 29 | 30 | @Override 31 | protected void writeDoubleValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 32 | { 33 | builder.writeLong(Double.valueOf(reader.getDouble(column)).longValue()); 34 | } 35 | 36 | @Override 37 | protected void writeStringValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 38 | { 39 | builder.writeLong(Long.valueOf(reader.getString(column))); 40 | } 41 | 42 | @Override 43 | protected void writeTimestampValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 44 | { 45 | builder.writeLong(getTimestamp(reader, column).getEpochSecond()); 46 | } 47 | 48 | @Override 49 | protected void writeJsonValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 50 | { 51 | throw new DataException("It is not able to convert from json to long."); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/writer/MapFieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.PageReader; 6 | 7 | import java.io.IOException; 8 | 9 | public class MapFieldWriter 10 | extends JsonFieldWriter 11 | { 12 | public MapFieldWriter(String keyName) 13 | { 14 | super(keyName); 15 | } 16 | 17 | @Override 18 | public void writeJsonValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 19 | throws IOException 20 | { 21 | builder.writeValue(reader.getJson(column)); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/writer/StringFieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.PageReader; 6 | import org.embulk.util.timestamp.TimestampFormatter; 7 | 8 | import java.io.IOException; 9 | 10 | public class StringFieldWriter 11 | extends FieldWriter 12 | { 13 | private final TimestampFormatter formatter; 14 | 15 | public StringFieldWriter(String keyName, TimestampFormatter formatter) 16 | { 17 | super(keyName); 18 | this.formatter = formatter; 19 | } 20 | 21 | @Override 22 | protected void writeBooleanValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 23 | { 24 | builder.writeString(reader.getBoolean(column) ? "true" : "false"); 25 | } 26 | 27 | @Override 28 | protected void writeLongValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 29 | { 30 | builder.writeString(String.valueOf(reader.getLong(column))); 31 | } 32 | 33 | @Override 34 | protected void writeDoubleValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 35 | { 36 | builder.writeString(String.valueOf(reader.getDouble(column))); 37 | } 38 | 39 | @Override 40 | protected void writeStringValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 41 | { 42 | builder.writeString(reader.getString(column)); 43 | } 44 | 45 | @Override 46 | protected void writeTimestampValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 47 | { 48 | builder.writeString(formatter.format(getTimestamp(reader, column))); 49 | } 50 | 51 | @Override 52 | protected void writeJsonValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) throws IOException 53 | { 54 | builder.writeString(reader.getJson(column).toString()); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/writer/TimestampFieldLongDuplicator.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.PageReader; 6 | 7 | import java.io.IOException; 8 | 9 | public class TimestampFieldLongDuplicator 10 | implements IFieldWriter 11 | { 12 | private final IFieldWriter nextWriter; 13 | private final LongFieldWriter timeFieldWriter; 14 | 15 | public TimestampFieldLongDuplicator(IFieldWriter nextWriter, String duplicateKeyName) 16 | { 17 | this.nextWriter = nextWriter; 18 | timeFieldWriter = new LongFieldWriter(duplicateKeyName); 19 | } 20 | 21 | public void writeKeyValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 22 | throws IOException 23 | { 24 | nextWriter.writeKeyValue(builder, reader, column); 25 | timeFieldWriter.writeKeyValue(builder, reader, column); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/writer/UnixTimestampFieldDuplicator.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.PageReader; 6 | 7 | import java.io.IOException; 8 | 9 | public class UnixTimestampFieldDuplicator 10 | implements IFieldWriter 11 | { 12 | private final IFieldWriter nextWriter; 13 | private final UnixTimestampLongFieldWriter timeFieldWriter; 14 | 15 | public UnixTimestampFieldDuplicator(IFieldWriter nextWriter, String duplicateKeyName, int fractionUnit) 16 | { 17 | this.nextWriter = nextWriter; 18 | timeFieldWriter = new UnixTimestampLongFieldWriter(duplicateKeyName, fractionUnit); 19 | } 20 | 21 | public void writeKeyValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 22 | throws IOException 23 | { 24 | nextWriter.writeKeyValue(builder, reader, column); 25 | timeFieldWriter.writeKeyValue(builder, reader, column); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/td/writer/UnixTimestampLongFieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.PageReader; 6 | 7 | import java.io.IOException; 8 | 9 | public class UnixTimestampLongFieldWriter 10 | extends LongFieldWriter 11 | { 12 | private final int fractionUnit; 13 | 14 | public UnixTimestampLongFieldWriter(String keyName, int fractionUnit) 15 | { 16 | super(keyName); 17 | this.fractionUnit = fractionUnit; 18 | } 19 | 20 | @Override 21 | public void writeLongValue(MsgpackGZFileBuilder builder, PageReader reader, Column column) 22 | throws IOException 23 | { 24 | builder.writeLong(reader.getLong(column) / fractionUnit); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/td/TestRecordWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td; 2 | 3 | import com.google.common.collect.ImmutableMap; 4 | import com.treasuredata.client.TDClient; 5 | import org.embulk.EmbulkTestRuntime; 6 | import org.embulk.output.td.TdOutputPlugin.PluginTask; 7 | import org.embulk.spi.Page; 8 | import org.embulk.spi.PageTestUtils; 9 | import org.embulk.spi.Schema; 10 | import org.embulk.spi.type.Types; 11 | import org.junit.Before; 12 | import org.junit.Rule; 13 | import org.junit.Test; 14 | import org.msgpack.core.MessagePack; 15 | import org.msgpack.core.MessageUnpacker; 16 | import org.msgpack.value.Value; 17 | 18 | import java.io.File; 19 | import java.io.FileInputStream; 20 | import java.time.Instant; 21 | import java.util.Map; 22 | import java.util.Optional; 23 | import java.util.zip.GZIPInputStream; 24 | 25 | import static org.embulk.output.td.TestTdOutputPlugin.config; 26 | import static org.embulk.output.td.TestTdOutputPlugin.fieldWriters; 27 | import static org.embulk.output.td.TestTdOutputPlugin.plugin; 28 | import static org.embulk.output.td.TestTdOutputPlugin.pluginTask; 29 | import static org.embulk.output.td.TestTdOutputPlugin.schema; 30 | import static org.embulk.output.td.TestTdOutputPlugin.recordWriter; 31 | import static org.embulk.output.td.TestTdOutputPlugin.tdClient; 32 | import static org.junit.Assert.assertEquals; 33 | import static org.junit.Assert.assertTrue; 34 | import static org.mockito.Matchers.any; 35 | import static org.mockito.Matchers.anyString; 36 | import static org.mockito.Mockito.doNothing; 37 | import static org.mockito.Mockito.spy; 38 | import static org.msgpack.value.ValueFactory.newString; 39 | 40 | public class TestRecordWriter 41 | { 42 | @Rule 43 | public EmbulkTestRuntime runtime = new EmbulkTestRuntime(); 44 | 45 | private Schema schema; 46 | private TdOutputPlugin plugin; // mock 47 | private PluginTask task; 48 | private RecordWriter recordWriter; // mock 49 | 50 | @Before 51 | public void createResources() 52 | { 53 | schema = schema("time", Types.LONG, "_c0", Types.LONG, "_c1", Types.STRING, 54 | "_c2", Types.BOOLEAN, "_c3", Types.DOUBLE, "_c4", Types.TIMESTAMP); 55 | 56 | plugin = plugin(); 57 | task = pluginTask(config().set("session_name", "my_session") 58 | .set("tmpdir", Optional.of(plugin.getEnvironmentTempDirectory()))); 59 | } 60 | 61 | @Test 62 | public void checkOpenAndClose() 63 | throws Exception 64 | { 65 | recordWriter = recordWriter(task, tdClient(plugin, task), fieldWriters(task, schema)); 66 | 67 | // confirm that no error happens 68 | try { 69 | recordWriter.open(schema); 70 | } 71 | finally { 72 | recordWriter.close(); 73 | } 74 | } 75 | 76 | @Test 77 | public void checkFlushAndFinish() 78 | throws Exception 79 | { 80 | TDClient client = spy(plugin.newTDClient(task)); 81 | recordWriter = recordWriter(task, client, fieldWriters(task, schema)); 82 | 83 | { // add no record 84 | RecordWriter recordWriter = recordWriter(task, client, fieldWriters(task, schema)); 85 | try { 86 | recordWriter.open(schema); 87 | } 88 | finally { 89 | recordWriter.finish(); 90 | } 91 | } 92 | 93 | { // add 1 record 94 | doNothing().when(client).uploadBulkImportPart(anyString(), anyString(), any(File.class)); 95 | 96 | RecordWriter recordWriter = recordWriter(task, client, fieldWriters(task, schema)); 97 | try { 98 | recordWriter.open(schema); 99 | 100 | // values are not null 101 | for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, 102 | 1442595600L, 0L, "v", true, 0.0, Instant.ofEpochSecond(1442595600L))) { 103 | recordWriter.add(page); 104 | } 105 | } 106 | finally { 107 | recordWriter.finish(); 108 | } 109 | } 110 | } 111 | 112 | @Test 113 | public void addNonNullValues() 114 | throws Exception 115 | { 116 | recordWriter = recordWriter(task, tdClient(plugin, task), fieldWriters(task, schema)); 117 | 118 | try { 119 | recordWriter.open(schema); 120 | 121 | // values are not null 122 | for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, 123 | 1442595600L, 0L, "v", true, 0.0, Instant.ofEpochSecond(1442595600L))) { 124 | recordWriter.add(page); 125 | } 126 | 127 | MsgpackGZFileBuilder builder = recordWriter.getBuilder(); 128 | builder.finish(); 129 | 130 | // record count 1 131 | assertEquals(1, builder.getRecordCount()); 132 | 133 | MessageUnpacker u = MessagePack.newDefaultUnpacker(new GZIPInputStream(new FileInputStream(builder.getFile()))); 134 | Map v = u.unpackValue().asMapValue().map(); 135 | 136 | // compare actual values 137 | assertEquals(1442595600L, v.get(newString("time")).asIntegerValue().toLong()); 138 | assertEquals(0L, v.get(newString("_c0")).asIntegerValue().toLong()); 139 | assertEquals("v", v.get(newString("_c1")).asStringValue().toString()); 140 | assertEquals(true, v.get(newString("_c2")).asBooleanValue().getBoolean()); 141 | assertEquals(0.0, v.get(newString("_c3")).asFloatValue().toFloat(), 0.000001); 142 | assertEquals("2015-09-18 17:00:00.000", v.get(newString("_c4")).asStringValue().toString()); 143 | 144 | } 145 | finally { 146 | recordWriter.close(); 147 | } 148 | } 149 | 150 | @Test 151 | public void addNullValues() 152 | throws Exception 153 | { 154 | recordWriter = recordWriter(task, tdClient(plugin, task), fieldWriters(task, schema)); 155 | 156 | try { 157 | recordWriter.open(schema); 158 | 159 | // values are not null 160 | for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, 161 | 1442595600L, null, null, null, null, null)) { 162 | recordWriter.add(page); 163 | } 164 | 165 | MsgpackGZFileBuilder builder = recordWriter.getBuilder(); 166 | builder.finish(); 167 | 168 | // record count 1 169 | assertEquals(1, builder.getRecordCount()); 170 | 171 | MessageUnpacker u = MessagePack.newDefaultUnpacker(new GZIPInputStream(new FileInputStream(builder.getFile()))); 172 | Map v = u.unpackValue().asMapValue().map(); 173 | 174 | // compare actual values 175 | assertTrue(v.get(newString("_c0")).isNilValue()); 176 | assertTrue(v.get(newString("_c1")).isNilValue()); 177 | assertTrue(v.get(newString("_c2")).isNilValue()); 178 | assertTrue(v.get(newString("_c3")).isNilValue()); 179 | assertTrue(v.get(newString("_c4")).isNilValue()); 180 | 181 | } 182 | finally { 183 | recordWriter.close(); 184 | } 185 | } 186 | 187 | @Test 188 | public void checkGeneratedTimeValueByOption() 189 | throws Exception 190 | { 191 | schema = schema("_c0", Types.LONG, "_c1", Types.STRING, 192 | "_c2", Types.BOOLEAN, "_c3", Types.DOUBLE, "_c4", Types.TIMESTAMP); 193 | task = pluginTask(config() 194 | .set("session_name", "my_session") 195 | .set("time_value", ImmutableMap.of("from", 0L, "to", 0L)) 196 | .set("tmpdir", Optional.of(plugin.getEnvironmentTempDirectory())) 197 | ); 198 | recordWriter = recordWriter(task, tdClient(plugin, task), fieldWriters(task, schema)); 199 | 200 | try { 201 | recordWriter.open(schema); 202 | 203 | // values are not null 204 | for (Page page : PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, 205 | 0L, "v", true, 0.0, Instant.ofEpochSecond(1442595600L))) { 206 | recordWriter.add(page); 207 | } 208 | 209 | MsgpackGZFileBuilder builder = recordWriter.getBuilder(); 210 | builder.finish(); 211 | 212 | // record count 1 213 | assertEquals(1, builder.getRecordCount()); 214 | 215 | MessageUnpacker u = MessagePack.newDefaultUnpacker(new GZIPInputStream(new FileInputStream(builder.getFile()))); 216 | Map v = u.unpackValue().asMapValue().map(); 217 | 218 | // compare actual values 219 | assertEquals(0L, v.get(newString("time")).asIntegerValue().toLong()); 220 | assertEquals(0L, v.get(newString("_c0")).asIntegerValue().toLong()); 221 | assertEquals("v", v.get(newString("_c1")).asStringValue().toString()); 222 | assertEquals(true, v.get(newString("_c2")).asBooleanValue().getBoolean()); 223 | assertEquals(0.0, v.get(newString("_c3")).asFloatValue().toFloat(), 0.000001); 224 | assertEquals("2015-09-18 17:00:00.000", v.get(newString("_c4")).asStringValue().toString()); 225 | 226 | } 227 | finally { 228 | recordWriter.close(); 229 | } 230 | } 231 | 232 | @Test 233 | public void doAbortNorthing() 234 | { 235 | recordWriter = recordWriter(task, tdClient(plugin, task), fieldWriters(task, schema)); 236 | recordWriter.abort(); 237 | // no error happen 238 | } 239 | 240 | @Test 241 | public void checkTaskReport() 242 | { 243 | recordWriter = recordWriter(task, tdClient(plugin, task), fieldWriters(task, schema)); 244 | assertTrue(recordWriter.commit().has(TdOutputPlugin.TASK_REPORT_UPLOADED_PART_NUMBER)); 245 | } 246 | } 247 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/td/TestTdOutputPlugin.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td; 2 | 3 | import com.github.tomakehurst.wiremock.junit.WireMockRule; 4 | import com.google.common.collect.ImmutableMap; 5 | import com.google.common.collect.Lists; 6 | import com.treasuredata.client.ProxyConfig; 7 | import com.treasuredata.client.TDClient; 8 | import com.treasuredata.client.TDClientHttpConflictException; 9 | import com.treasuredata.client.TDClientHttpNotFoundException; 10 | import com.treasuredata.client.model.TDBulkImportSession; 11 | import com.treasuredata.client.model.TDBulkImportSession.ImportStatus; 12 | import com.treasuredata.client.model.TDColumn; 13 | import com.treasuredata.client.model.TDColumnType; 14 | import com.treasuredata.client.model.TDTable; 15 | import com.treasuredata.client.model.TDTableType; 16 | import org.apache.http.Header; 17 | import org.apache.http.message.BasicHeader; 18 | import org.embulk.EmbulkTestRuntime; 19 | import org.embulk.config.ConfigDiff; 20 | import org.embulk.config.ConfigException; 21 | import org.embulk.config.ConfigSource; 22 | import org.embulk.config.TaskReport; 23 | import org.embulk.config.TaskSource; 24 | import org.embulk.output.td.TdOutputPlugin.HttpProxyTask; 25 | import org.embulk.output.td.TdOutputPlugin.PluginTask; 26 | import org.embulk.output.td.TdOutputPlugin.ColumnOption; 27 | import org.embulk.output.td.TdOutputPlugin.UnixTimestampUnit; 28 | import org.embulk.output.td.writer.FieldWriterSet; 29 | import org.embulk.spi.Column; 30 | import org.embulk.spi.Exec; 31 | import org.embulk.spi.OutputPlugin; 32 | import org.embulk.spi.Schema; 33 | import org.embulk.spi.SchemaConfigException; 34 | import org.embulk.spi.TransactionalPageOutput; 35 | import org.embulk.spi.type.Type; 36 | import org.embulk.spi.type.Types; 37 | import org.junit.Before; 38 | import org.junit.Rule; 39 | import org.junit.Test; 40 | import org.mockito.ArgumentCaptor; 41 | import org.slf4j.Logger; 42 | 43 | import java.util.HashMap; 44 | import java.util.List; 45 | import java.util.Map; 46 | import java.util.Optional; 47 | 48 | import static com.github.tomakehurst.wiremock.client.WireMock.aResponse; 49 | import static com.github.tomakehurst.wiremock.client.WireMock.equalTo; 50 | import static com.github.tomakehurst.wiremock.client.WireMock.get; 51 | import static com.github.tomakehurst.wiremock.client.WireMock.getRequestedFor; 52 | import static com.github.tomakehurst.wiremock.client.WireMock.stubFor; 53 | import static com.github.tomakehurst.wiremock.client.WireMock.urlMatching; 54 | import static com.github.tomakehurst.wiremock.client.WireMock.verify; 55 | import static com.github.tomakehurst.wiremock.core.WireMockConfiguration.options; 56 | import static com.treasuredata.client.model.TDBulkImportSession.ImportStatus.COMMITTED; 57 | import static com.treasuredata.client.model.TDBulkImportSession.ImportStatus.COMMITTING; 58 | import static com.treasuredata.client.model.TDBulkImportSession.ImportStatus.PERFORMING; 59 | import static com.treasuredata.client.model.TDBulkImportSession.ImportStatus.READY; 60 | import static com.treasuredata.client.model.TDBulkImportSession.ImportStatus.UNKNOWN; 61 | import static com.treasuredata.client.model.TDBulkImportSession.ImportStatus.UPLOADING; 62 | import static org.junit.Assert.assertEquals; 63 | import static org.junit.Assert.assertFalse; 64 | import static org.junit.Assert.assertTrue; 65 | import static org.junit.Assert.fail; 66 | import static org.mockito.Matchers.any; 67 | import static org.mockito.Matchers.anyInt; 68 | import static org.mockito.Matchers.anyString; 69 | import static org.mockito.Mockito.doNothing; 70 | import static org.mockito.Mockito.doReturn; 71 | import static org.mockito.Mockito.doThrow; 72 | import static org.mockito.Mockito.mock; 73 | import static org.mockito.Mockito.spy; 74 | 75 | public class TestTdOutputPlugin 76 | { 77 | @Rule 78 | public EmbulkTestRuntime runtime = new EmbulkTestRuntime(); 79 | 80 | private final int wireMockPort = 10888; 81 | 82 | @Rule 83 | public WireMockRule wireMockRule = new WireMockRule(options().port(wireMockPort), false); 84 | 85 | private ConfigSource config; // not mock 86 | private TdOutputPlugin plugin; // mock 87 | 88 | @Before 89 | public void createResources() 90 | { 91 | config = config(); 92 | plugin = plugin(); 93 | } 94 | 95 | @Test 96 | public void checkDefaultValues() 97 | { 98 | ConfigSource config = this.config.deepCopy(); 99 | final PluginTask task = TdOutputPlugin.CONFIG_MAPPER.map(config, PluginTask.class); 100 | assertEquals(true, task.getUseSsl()); 101 | assertFalse(task.getHttpProxy().isPresent()); 102 | assertEquals(TdOutputPlugin.Mode.APPEND, task.getMode()); 103 | assertEquals(true, task.getAutoCreateTable()); 104 | assertFalse(task.getSession().isPresent()); 105 | assertEquals(TdOutputPlugin.ConvertTimestampType.STRING, task.getConvertTimestampType()); 106 | assertFalse(task.getTimeColumn().isPresent()); 107 | assertFalse(task.getTimeValue().isPresent()); 108 | assertEquals(TdOutputPlugin.UnixTimestampUnit.SEC, task.getUnixTimestampUnit()); 109 | assertFalse(task.getTempDir().isPresent()); 110 | assertEquals(2, task.getUploadConcurrency()); 111 | assertEquals(16384, task.getFileSplitSize()); 112 | assertEquals("%Y-%m-%d %H:%M:%S.%3N", task.getDefaultTimestampFormat()); 113 | assertTrue(task.getColumnOptions().isEmpty()); 114 | assertFalse(task.getStopOnInvalidRecord()); 115 | assertEquals(10, task.getDisplayedErrorRecordsCountLimit()); 116 | assertEquals(20, task.getRetryLimit()); 117 | assertEquals(1000, task.getRetryInitialIntervalMillis()); 118 | assertEquals(90000, task.getRetryMaxIntervalMillis()); 119 | } 120 | 121 | @Test 122 | public void checkRetryValues() 123 | { 124 | ConfigSource config = this.config.deepCopy() 125 | .set("retry_limit", 17) 126 | .set("retry_initial_interval_millis", 4822) 127 | .set("retry_max_interval_millis", 19348); 128 | final PluginTask task = TdOutputPlugin.CONFIG_MAPPER.map(config, PluginTask.class); 129 | assertEquals(17, task.getRetryLimit()); 130 | assertEquals(4822, task.getRetryInitialIntervalMillis()); 131 | assertEquals(19348, task.getRetryMaxIntervalMillis()); 132 | } 133 | 134 | @Test 135 | public void checkUnixTimestampUnit() 136 | { 137 | { // sec 138 | assertEquals(UnixTimestampUnit.SEC, UnixTimestampUnit.of("sec")); 139 | assertEquals(UnixTimestampUnit.SEC.toString(), "sec"); 140 | assertEquals(UnixTimestampUnit.SEC.getFractionUnit(), 1); 141 | } 142 | 143 | { // milli 144 | assertEquals(UnixTimestampUnit.MILLI, UnixTimestampUnit.of("milli")); 145 | assertEquals(UnixTimestampUnit.MILLI.toString(), "milli"); 146 | assertEquals(UnixTimestampUnit.MILLI.getFractionUnit(), 1000); 147 | } 148 | 149 | { // micro 150 | assertEquals(UnixTimestampUnit.MICRO, UnixTimestampUnit.of("micro")); 151 | assertEquals(UnixTimestampUnit.MICRO.toString(), "micro"); 152 | assertEquals(UnixTimestampUnit.MICRO.getFractionUnit(), 1000000); 153 | } 154 | 155 | { // nano 156 | assertEquals(UnixTimestampUnit.NANO, UnixTimestampUnit.of("nano")); 157 | assertEquals(UnixTimestampUnit.NANO.toString(), "nano"); 158 | assertEquals(UnixTimestampUnit.NANO.getFractionUnit(), 1000000000); 159 | } 160 | 161 | { // invalid_unit 162 | try { 163 | UnixTimestampUnit.of("invalid_unit"); 164 | fail(); 165 | } 166 | catch (Throwable e) { 167 | e.printStackTrace(); 168 | assertTrue(e instanceof ConfigException); 169 | } 170 | } 171 | } 172 | 173 | @Test 174 | public void transaction() 175 | { 176 | doReturn("session_name").when(plugin).buildBulkImportSessionName(any(PluginTask.class)); 177 | ConfigDiff configDiff = TdOutputPlugin.CONFIG_MAPPER_FACTORY.newConfigDiff().set("last_session", "session_name"); 178 | doReturn(configDiff).when(plugin).doRun(any(TDClient.class), any(Schema.class), any(PluginTask.class), any(OutputPlugin.Control.class)); 179 | Schema schema = schema("time", Types.LONG, "c0", Types.STRING, "c1", Types.STRING); 180 | 181 | { // auto_create_table is true 182 | ConfigSource config = this.config.deepCopy().set("auto_create_table", "true"); 183 | doNothing().when(plugin).createTableIfNotExists(any(TDClient.class), anyString(), anyString()); 184 | assertEquals("session_name", plugin.transaction(config, schema, 0, new OutputPlugin.Control() 185 | { 186 | @Override 187 | public List run(TaskSource taskSource) 188 | { 189 | return Lists.newArrayList(TdOutputPlugin.CONFIG_MAPPER_FACTORY.newTaskReport()); 190 | } 191 | }).get(String.class, "last_session")); 192 | } 193 | 194 | { // auto_create_table is false 195 | ConfigSource config = this.config.deepCopy().set("auto_create_table", "false"); 196 | doNothing().when(plugin).validateTableExists(any(TDClient.class), anyString(), anyString()); 197 | assertEquals("session_name", plugin.transaction(config, schema, 0, new OutputPlugin.Control() 198 | { 199 | @Override 200 | public List run(TaskSource taskSource) 201 | { 202 | return Lists.newArrayList(TdOutputPlugin.CONFIG_MAPPER_FACTORY.newTaskReport()); 203 | } 204 | }).get(String.class, "last_session")); 205 | } 206 | } 207 | 208 | @Test 209 | public void resume() 210 | throws Exception 211 | { 212 | PluginTask task = pluginTask(config); 213 | task.setSessionName("session_name"); 214 | task.setLoadTargetTableName("my_table"); 215 | task.setDoUpload(true); 216 | doReturn(true).when(plugin).startBulkImportSession(any(TDClient.class), anyString(), anyString(), anyString()); 217 | doNothing().when(plugin).completeBulkImportSession(any(TDClient.class), any(Schema.class), any(PluginTask.class), anyInt()); 218 | Schema schema = schema("time", Types.LONG, "c0", Types.STRING, "c1", Types.STRING); 219 | 220 | ConfigDiff configDiff = plugin.resume(task.dump(), schema, 0, new OutputPlugin.Control() 221 | { 222 | @Override 223 | public List run(TaskSource taskSource) 224 | { 225 | return Lists.newArrayList(TdOutputPlugin.CONFIG_MAPPER_FACTORY.newTaskReport()); 226 | } 227 | }); 228 | 229 | assertEquals("session_name", configDiff.get(String.class, "last_session")); 230 | } 231 | 232 | @Test 233 | public void cleanup() 234 | { 235 | PluginTask task = pluginTask(config); 236 | task.setSessionName("session_name"); 237 | task.setLoadTargetTableName("my_table"); 238 | task.setDoUpload(true); 239 | TDClient client = spy(plugin.newTDClient(task)); 240 | doNothing().when(client).deleteBulkImportSession(anyString()); 241 | doReturn(client).when(plugin).newTDClient(task); 242 | Schema schema = schema("time", Types.LONG, "c0", Types.STRING, "c1", Types.STRING); 243 | 244 | plugin.cleanup(task.dump(), schema, 0, Lists.newArrayList(TdOutputPlugin.CONFIG_MAPPER_FACTORY.newTaskReport())); 245 | // no error happens 246 | } 247 | 248 | @Test 249 | public void checkColumnOptions() 250 | { 251 | final ColumnOption columnOption = TdOutputPlugin.CONFIG_MAPPER.map(config, ColumnOption.class); 252 | ImmutableMap columnOptions = ImmutableMap.of( 253 | "c0", columnOption, "c1", columnOption 254 | ); 255 | 256 | { // schema includes column options' keys 257 | Schema schema = schema("c0", Types.LONG, "c1", Types.LONG); 258 | plugin.checkColumnOptions(schema, columnOptions); 259 | // no error happens 260 | } 261 | 262 | { // schema doesn't include one of column options' keys 263 | Schema schema = schema("c0", Types.LONG); 264 | try { 265 | plugin.checkColumnOptions(schema, columnOptions); 266 | fail(); 267 | } 268 | catch (Throwable t) { 269 | assertTrue(t instanceof SchemaConfigException); 270 | } 271 | } 272 | } 273 | 274 | @Test 275 | public void newTDClient() 276 | { 277 | { // no proxy setting 278 | PluginTask task = pluginTask(config); 279 | TDClient client = plugin.newTDClient(task); 280 | // Expect no error happens 281 | } 282 | 283 | { // proxy setting 284 | PluginTask task = pluginTask(config.deepCopy() 285 | .set("http_proxy", ImmutableMap.of("host", "xxx", "port", "8080", "user", "foo", "password", "PASSWORD"))); 286 | TDClient client = plugin.newTDClient(task); 287 | // Expect no error happens 288 | } 289 | 290 | { // proxy setting without user/password 291 | PluginTask task = pluginTask(config.deepCopy() 292 | .set("http_proxy", ImmutableMap.of("host", "xxx", "port", "8080"))); 293 | TDClient client = plugin.newTDClient(task); 294 | // Expect no error happens 295 | } 296 | } 297 | 298 | @Test 299 | public void createTableIfNotExists() 300 | { 301 | PluginTask task = pluginTask(config); 302 | TDClient client = spy(plugin.newTDClient(task)); 303 | 304 | { // database exists but table doesn't exist 305 | doNothing().when(client).createTable(anyString(), anyString()); 306 | plugin.createTableIfNotExists(client, "my_db", "my_table"); 307 | // no error happens 308 | } 309 | 310 | { // table already exists 311 | doThrow(conflict()).when(client).createTable(anyString(), anyString()); 312 | plugin.createTableIfNotExists(client, "my_db", "my_table"); 313 | // no error happens 314 | } 315 | 316 | { // database and table don't exist 317 | { // createTable -> createDB -> createTable 318 | doThrow(notFound()).doNothing().when(client).createTable(anyString(), anyString()); 319 | doNothing().when(client).createDatabase(anyString()); 320 | plugin.createTableIfNotExists(client, "my_db", "my_table"); 321 | // no error happens 322 | } 323 | 324 | { // createTable -> createDB -> createTable 325 | doThrow(notFound()).doNothing().when(client).createTable(anyString(), anyString()); 326 | doThrow(conflict()).when(client).createDatabase(anyString()); 327 | plugin.createTableIfNotExists(client, "my_db", "my_table"); 328 | // no error happens 329 | } 330 | 331 | { // createTable -> createDB -> createTable 332 | doThrow(notFound()).doThrow(conflict()).when(client).createTable(anyString(), anyString()); 333 | doNothing().when(client).createDatabase(anyString()); 334 | plugin.createTableIfNotExists(client, "my_db", "my_table"); 335 | // no error happens 336 | } 337 | 338 | { // createTable -> createDB -> createTable 339 | doThrow(notFound()).doThrow(conflict()).when(client).createTable(anyString(), anyString()); 340 | doThrow(conflict()).when(client).createDatabase(anyString()); 341 | plugin.createTableIfNotExists(client, "my_db", "my_table"); 342 | // no error happens 343 | } 344 | } 345 | } 346 | 347 | @Test 348 | public void validateTableExists() 349 | { 350 | PluginTask task = pluginTask(config); 351 | TDClient client = spy(plugin.newTDClient(task)); 352 | TDTable table = newTable("my_table", "[]"); 353 | 354 | { // table exists 355 | doReturn(table).when(client).showTable(anyString(), anyString()); 356 | plugin.validateTableExists(client, "my_db", "my_table"); 357 | // no error happens 358 | } 359 | 360 | { // database or table doesn't exist 361 | doThrow(notFound()).when(client).showTable(anyString(), anyString()); 362 | try { 363 | plugin.validateTableExists(client, "my_db", "my_table"); 364 | fail(); 365 | } 366 | catch (Throwable t) { 367 | assertTrue(t instanceof ConfigException); 368 | } 369 | } 370 | } 371 | 372 | @Test 373 | public void buildBulkImportSessionName() 374 | { 375 | { // session option is specified 376 | PluginTask task = pluginTask(config.deepCopy().set("session", "my_session")); 377 | assertEquals("my_session", plugin.buildBulkImportSessionName(task)); 378 | } 379 | 380 | { // session is not specified as option 381 | PluginTask task = pluginTask(config); 382 | assertTrue(plugin.buildBulkImportSessionName(task).startsWith("embulk_")); 383 | } 384 | } 385 | 386 | @Test 387 | public void startBulkImportSession() 388 | { 389 | PluginTask task = pluginTask(config); 390 | TDClient client = spy(plugin.newTDClient(task)); 391 | doNothing().when(client).createBulkImportSession(anyString(), anyString(), anyString()); 392 | 393 | { // status is uploading and unfrozen 394 | doReturn(session(UPLOADING, true)).when(client).getBulkImportSession("my_session"); 395 | assertEquals(false, plugin.startBulkImportSession(client, "my_session", "my_db", "my_table")); 396 | } 397 | 398 | { // status is uploading and frozen 399 | doReturn(session(UPLOADING, false)).when(client).getBulkImportSession("my_session"); 400 | assertEquals(true, plugin.startBulkImportSession(client, "my_session", "my_db", "my_table")); 401 | } 402 | 403 | { // status is performing 404 | doReturn(session(PERFORMING, false)).when(client).getBulkImportSession("my_session"); 405 | assertEquals(false, plugin.startBulkImportSession(client, "my_session", "my_db", "my_table")); 406 | } 407 | 408 | { // status is ready 409 | doReturn(session(READY, false)).when(client).getBulkImportSession("my_session"); 410 | assertEquals(false, plugin.startBulkImportSession(client, "my_session", "my_db", "my_table")); 411 | } 412 | 413 | { // status is committing 414 | doReturn(session(COMMITTING, false)).when(client).getBulkImportSession("my_session"); 415 | assertEquals(false, plugin.startBulkImportSession(client, "my_session", "my_db", "my_table")); 416 | } 417 | 418 | { // status is committed 419 | doReturn(session(COMMITTED, false)).when(client).getBulkImportSession("my_session"); 420 | assertEquals(false, plugin.startBulkImportSession(client, "my_session", "my_db", "my_table")); 421 | } 422 | 423 | { // status is unkown 424 | doReturn(session(UNKNOWN, false)).when(client).getBulkImportSession("my_session"); 425 | try { 426 | plugin.startBulkImportSession(client, "my_session", "my_db", "my_table"); 427 | fail(); 428 | } 429 | catch (Throwable t) { 430 | } 431 | } 432 | 433 | { // if createBulkImportSession got 409, it can be ignoreable. 434 | doThrow(conflict()).when(client).createBulkImportSession(anyString(), anyString(), anyString()); 435 | doReturn(session(UPLOADING, true)).when(client).getBulkImportSession("my_session"); 436 | assertEquals(false, plugin.startBulkImportSession(client, "my_session", "my_db", "my_table")); 437 | } 438 | } 439 | 440 | @Test 441 | public void newProxyConfig() 442 | { 443 | // confirm if proxy system properties override proxy setting by http_proxy config option. 444 | 445 | final HttpProxyTask proxyTask = TdOutputPlugin.CONFIG_MAPPER.map(TdOutputPlugin.CONFIG_MAPPER_FACTORY.newConfigSource() 446 | .set("host", "option_host") 447 | .set("port", 8080), 448 | HttpProxyTask.class); 449 | 450 | String originalProxyHost = System.getProperty("http.proxyHost"); 451 | try { 452 | System.setProperty("http.proxyHost", "property_host"); 453 | Optional proxyConfig = plugin.newProxyConfig(Optional.of(proxyTask)); 454 | assertEquals("property_host", proxyConfig.get().getHost()); 455 | assertEquals(80, proxyConfig.get().getPort()); 456 | } 457 | finally { 458 | if (originalProxyHost != null) { 459 | System.setProperty("http.proxyHost", originalProxyHost); 460 | } 461 | } 462 | } 463 | 464 | @Test 465 | public void completeBulkImportSession() 466 | { 467 | PluginTask task = pluginTask(config); 468 | Schema schema = schema("c0", Types.LONG); 469 | 470 | doReturn(session(UNKNOWN, false)).when(plugin).waitForStatusChange(any(TDClient.class), anyString(), any(ImportStatus.class), any(ImportStatus.class), anyString()); 471 | doReturn(new HashMap()).when(plugin).updateSchema(any(TDClient.class), any(Schema.class), any(PluginTask.class)); 472 | 473 | TDClient client = spy(plugin.newTDClient(task)); 474 | doNothing().when(client).freezeBulkImportSession(anyString()); 475 | doNothing().when(client).performBulkImportSession(anyString(), any(com.google.common.base.Optional.class)); 476 | doNothing().when(client).commitBulkImportSession(anyString()); 477 | 478 | { // uploading + unfreeze 479 | doReturn(session(UPLOADING, false)).when(client).getBulkImportSession(anyString()); 480 | plugin.completeBulkImportSession(client, schema, task, 0); 481 | // no error happens 482 | } 483 | 484 | { // uploading + frozen 485 | doReturn(session(UPLOADING, true)).when(client).getBulkImportSession(anyString()); 486 | plugin.completeBulkImportSession(client, schema, task, 0); 487 | // no error happens 488 | } 489 | 490 | { // performing 491 | doReturn(session(PERFORMING, false)).when(client).getBulkImportSession(anyString()); 492 | plugin.completeBulkImportSession(client, schema, task, 0); 493 | // no error happens 494 | } 495 | 496 | { // ready 497 | doReturn(session(READY, false)).when(client).getBulkImportSession(anyString()); 498 | plugin.completeBulkImportSession(client, schema, task, 0); 499 | // no error happens 500 | } 501 | 502 | { // committing 503 | doReturn(session(COMMITTING, false)).when(client).getBulkImportSession(anyString()); 504 | plugin.completeBulkImportSession(client, schema, task, 0); 505 | // no error happens 506 | } 507 | 508 | { // committed 509 | doReturn(session(COMMITTED, false)).when(client).getBulkImportSession(anyString()); 510 | plugin.completeBulkImportSession(client, schema, task, 0); 511 | // no error happens 512 | } 513 | 514 | { // unknown 515 | doReturn(session(UNKNOWN, false)).when(client).getBulkImportSession(anyString()); 516 | try { 517 | plugin.completeBulkImportSession(client, schema, task, 0); 518 | fail(); 519 | } 520 | catch (Throwable t) { 521 | } 522 | } 523 | 524 | { // if freezeBulkImportSession got 409, it can be ignoreable. 525 | doThrow(conflict()).when(client).freezeBulkImportSession(anyString()); 526 | doReturn(session(UPLOADING, true)).when(client).getBulkImportSession(anyString()); 527 | plugin.completeBulkImportSession(client, schema, task, 0); 528 | // no error happens 529 | } 530 | } 531 | 532 | @Test 533 | public void waitForStatusChange() 534 | { 535 | PluginTask task = pluginTask(config); 536 | TDClient client = spy(plugin.newTDClient(task)); 537 | 538 | { // performing -> ready 539 | doReturn(session(PERFORMING, false)).doReturn(session(READY, false)).when(client).getBulkImportSession("my_session"); 540 | plugin.waitForStatusChange(client, "my_session", PERFORMING, READY, ""); 541 | } 542 | 543 | { // committing -> committed 544 | doReturn(session(COMMITTING, false)).doReturn(session(COMMITTED, false)).when(client).getBulkImportSession("my_session"); 545 | plugin.waitForStatusChange(client, "my_session", COMMITTING, COMMITTED, ""); 546 | } 547 | } 548 | 549 | @Test 550 | public void open() 551 | { 552 | PluginTask task = pluginTask(config); 553 | task.setSessionName("session_name"); 554 | task.setLoadTargetTableName("my_table"); 555 | task.setDoUpload(true); 556 | task.setTempDir(Optional.of(plugin.getEnvironmentTempDirectory())); 557 | Schema schema = schema("time", Types.LONG, "c0", Types.STRING, "c1", Types.STRING); 558 | 559 | TransactionalPageOutput output = plugin.open(task.dump(), schema, 0); 560 | // Expect no error happens. 561 | } 562 | 563 | @Test 564 | public void testUpdateSchemaWillPreserveIndex() 565 | { 566 | final String dbName = "test_db"; 567 | final String tblName = "test_tbl"; 568 | PluginTask task = mock(PluginTask.class); 569 | doReturn(dbName).when(task).getDatabase(); 570 | doReturn(tblName).when(task).getTable(); 571 | doReturn(tblName).when(task).getLoadTargetTableName(); 572 | 573 | TDTable table = mock(TDTable.class); 574 | 575 | TDClient client = mock(TDClient.class); 576 | doReturn(table).when(client).showTable(anyString(), anyString()); 577 | 578 | Schema schema = schema("col3", Types.LONG, "col0", Types.STRING, "col1", Types.STRING); 579 | 580 | // capture param of client append schema to check for columns order 581 | ArgumentCaptor> schemaCaptor = ArgumentCaptor.forClass((Class) List.class); 582 | doNothing().when(client).appendTableSchema(anyString(), anyString(), schemaCaptor.capture()); 583 | 584 | plugin.updateSchema(client, schema, task); 585 | 586 | List inputCols = schema.getColumns(); 587 | List uploadedCols = schemaCaptor.getValue(); 588 | 589 | assertEquals(inputCols.get(0).getName(), uploadedCols.get(0).getName()); 590 | assertEquals(inputCols.get(1).getName(), uploadedCols.get(1).getName()); 591 | assertEquals(inputCols.get(2).getName(), uploadedCols.get(2).getName()); 592 | } 593 | 594 | @Test 595 | public void testUpdateSchemaWillApplyColumnOption() 596 | { 597 | final String dbName = "test_db"; 598 | final String tblName = "test_tbl"; 599 | PluginTask task = mock(PluginTask.class); 600 | doReturn(dbName).when(task).getDatabase(); 601 | doReturn(tblName).when(task).getTable(); 602 | doReturn(tblName).when(task).getLoadTargetTableName(); 603 | 604 | TDTable table = mock(TDTable.class); 605 | TDClient client = mock(TDClient.class); 606 | doReturn(table).when(client).showTable(anyString(), anyString()); 607 | 608 | Schema schema = schema("col1", Types.LONG, "col2", Types.JSON, "col3", Types.JSON, "col4", Types.JSON); 609 | // capture param of client append schema to check for columns order 610 | ArgumentCaptor> schemaCaptor = ArgumentCaptor.forClass((Class) List.class); 611 | doNothing().when(client).appendTableSchema(anyString(), anyString(), schemaCaptor.capture()); 612 | 613 | ImmutableMap columnOptions = ImmutableMap.of( 614 | "col2", TdOutputPlugin.CONFIG_MAPPER.map(TdOutputPlugin.CONFIG_MAPPER_FACTORY.newConfigSource().set("type", "array").set("value_type", "array"), ColumnOption.class), 615 | "col3", TdOutputPlugin.CONFIG_MAPPER.map(TdOutputPlugin.CONFIG_MAPPER_FACTORY.newConfigSource().set("type", "string").set("value_type", "map"), ColumnOption.class) 616 | ); 617 | doReturn(columnOptions).when(task).getColumnOptions(); 618 | 619 | plugin.updateSchema(client, schema, task); 620 | 621 | List uploadedCols = schemaCaptor.getValue(); 622 | assertEquals(4, uploadedCols.size()); 623 | assertEquals("array", uploadedCols.get(1).getType().toString()); 624 | assertEquals("string", uploadedCols.get(2).getType().toString()); 625 | } 626 | 627 | @Test 628 | public void testTDClientSendsExtraHeader() 629 | { 630 | final String urlRegx = "/v3/table/show/.*"; 631 | final Header header1 = new BasicHeader("h_name", "h_value"); 632 | final Header header2 = new BasicHeader("ABC", "XYZ"); 633 | final Map headers = ImmutableMap.of( 634 | header1.getName(), header1.getValue(), 635 | header2.getName(), header2.getValue()); 636 | 637 | PluginTask task = TdOutputPlugin.CONFIG_MAPPER.map(config() 638 | .set("endpoint", "localhost") 639 | .set("port", wireMockPort) 640 | .set("use_ssl", "false") // ssl disabled for wiremock 641 | .set("additional_http_headers", headers), 642 | PluginTask.class); 643 | 644 | stubFor(get(urlMatching(urlRegx)) 645 | .willReturn(aResponse() 646 | .withBody("{\n" + 647 | " \"id\": 46732,\n" + 648 | " \"name\": \"sample_csv11\",\n" + 649 | " \"estimated_storage_size\": 0,\n" + 650 | " \"counter_updated_at\": null,\n" + 651 | " \"last_log_timestamp\": null,\n" + 652 | " \"delete_protected\": false,\n" + 653 | " \"created_at\": \"2019-10-23 07:49:02 UTC\",\n" + 654 | " \"updated_at\": \"2019-10-23 07:51:13 UTC\",\n" + 655 | " \"type\": \"log\",\n" + 656 | " \"include_v\": true,\n" + 657 | " \"count\": 15,\n" + 658 | " \"schema\": \"[]\",\n" + 659 | " \"expire_days\": null\n" + 660 | "}"))); 661 | 662 | TDClient client = plugin.newTDClient(task); 663 | 664 | // issue an API request 665 | plugin.validateTableExists(client, "tmp_db", "tmp_table"); 666 | 667 | verify(getRequestedFor(urlMatching(urlRegx)) 668 | .withHeader(header1.getName(), equalTo(header1.getValue())) 669 | .withHeader(header2.getName(), equalTo(header2.getValue()))); 670 | } 671 | 672 | public static ConfigSource config() 673 | { 674 | return TdOutputPlugin.CONFIG_MAPPER_FACTORY.newConfigSource() 675 | .set("apikey", "xxx") 676 | .set("endpoint", "api.treasuredata.com") 677 | .set("database", "my_db") 678 | .set("table", "my_table"); 679 | } 680 | 681 | public static Schema schema(Object... nameAndTypes) 682 | { 683 | Schema.Builder builder = Schema.builder(); 684 | for (int i = 0; i < nameAndTypes.length; i += 2) { 685 | String name = (String) nameAndTypes[i]; 686 | Type type = (Type) nameAndTypes[i + 1]; 687 | builder.add(name, type); 688 | } 689 | return builder.build(); 690 | } 691 | 692 | public static PluginTask pluginTask(ConfigSource config) 693 | { 694 | return TdOutputPlugin.CONFIG_MAPPER.map(config, PluginTask.class); 695 | } 696 | 697 | public static TdOutputPlugin plugin() 698 | { 699 | return spy(new TdOutputPlugin()); 700 | } 701 | 702 | public static TDClient tdClient(TdOutputPlugin plugin, PluginTask task) 703 | { 704 | return spy(plugin.newTDClient(task)); 705 | } 706 | 707 | public static TDTable newTable(String name, String schema) 708 | { 709 | return new TDTable("", name, TDTableType.LOG, schema, 0, 0, "", "", "", ""); 710 | } 711 | 712 | public static FieldWriterSet fieldWriters(PluginTask task, Schema schema) 713 | { 714 | return spy(FieldWriterSet.createWithValidation(task, schema, false)); 715 | } 716 | 717 | public static RecordWriter recordWriter(PluginTask task, TDClient client, FieldWriterSet fieldWriters) 718 | { 719 | return spy(new RecordWriter(task, 0, client, fieldWriters)); 720 | } 721 | 722 | static TDClientHttpNotFoundException notFound() 723 | { 724 | return new TDClientHttpNotFoundException("not found"); 725 | } 726 | 727 | static TDClientHttpConflictException conflict() 728 | { 729 | return new TDClientHttpConflictException("conflict"); 730 | } 731 | 732 | private static TDBulkImportSession session(ImportStatus status, boolean uploadFrozen) 733 | { 734 | return new TDBulkImportSession("my_session", "my_db", "my_table", status, uploadFrozen, "0", 0, 0, 0, 0); 735 | } 736 | } 737 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/td/TestTimeValueGenerator.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td; 2 | 3 | import com.google.common.collect.ImmutableMap; 4 | import org.embulk.EmbulkTestRuntime; 5 | import org.embulk.config.ConfigException; 6 | import org.embulk.config.ConfigSource; 7 | import org.embulk.output.td.writer.FieldWriterSet; 8 | import org.embulk.spi.Schema; 9 | import org.embulk.spi.type.Types; 10 | import org.junit.Before; 11 | import org.junit.Rule; 12 | import org.junit.Test; 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | 16 | import static org.embulk.output.td.TestTdOutputPlugin.config; 17 | import static org.embulk.output.td.TestTdOutputPlugin.pluginTask; 18 | import static org.embulk.output.td.TestTdOutputPlugin.schema; 19 | import static org.junit.Assert.assertTrue; 20 | import static org.junit.Assert.fail; 21 | 22 | public class TestTimeValueGenerator 23 | { 24 | @Rule 25 | public EmbulkTestRuntime runtime = new EmbulkTestRuntime(); 26 | 27 | private Logger log; 28 | private ConfigSource config; 29 | private Schema schema; 30 | 31 | @Before 32 | public void createResources() 33 | { 34 | log = LoggerFactory.getLogger(TestTimeValueGenerator.class); 35 | config = config(); 36 | } 37 | 38 | @Test 39 | public void validateTimeValue() 40 | { 41 | // incremental_time 42 | { // {from: 0, to: 0} # default incremental_time 43 | schema = schema("_c0", Types.STRING, "_c1", Types.LONG); 44 | FieldWriterSet.createWithValidation(pluginTask(config.set("time_value", ImmutableMap.of("from", 0L, "to", 0L))), schema, false); 45 | } 46 | { // {from: 0} # default incremental_time 47 | schema = schema("_c0", Types.STRING, "_c1", Types.LONG); 48 | try { 49 | FieldWriterSet.createWithValidation(pluginTask(config.set("time_value", ImmutableMap.of("from", 0L))), schema, false); 50 | fail(); 51 | } 52 | catch (Throwable t) { 53 | assertTrue(t instanceof ConfigException); 54 | } 55 | } 56 | { // {to: 0} # default incremental_time 57 | schema = schema("_c0", Types.STRING, "_c1", Types.LONG); 58 | try { 59 | FieldWriterSet.createWithValidation(pluginTask(config.set("time_value", ImmutableMap.of("to", 0L))), schema, false); 60 | fail(); 61 | } 62 | catch (Throwable t) { 63 | assertTrue(t instanceof ConfigException); 64 | } 65 | } 66 | { // {from: 0, to: 0, mode: incremental_time} 67 | schema = schema("_c0", Types.STRING, "_c1", Types.LONG); 68 | FieldWriterSet.createWithValidation(pluginTask(config.set("time_value", ImmutableMap.of("from", 0L, "to", 0L, "mode", "incremental_time"))), schema, false); 69 | } 70 | { // {from: 0, mode: incremental_time} 71 | schema = schema("_c0", Types.STRING, "_c1", Types.LONG); 72 | try { 73 | FieldWriterSet.createWithValidation(pluginTask(config.set("time_value", ImmutableMap.of("from", 0L, "mode", "incremental_time"))), schema, false); 74 | fail(); 75 | } 76 | catch (Throwable t) { 77 | assertTrue(t instanceof ConfigException); 78 | } 79 | } 80 | { // {to: 0, mode: incremental_time} 81 | schema = schema("_c0", Types.STRING, "_c1", Types.LONG); 82 | try { 83 | FieldWriterSet.createWithValidation(pluginTask(config.set("time_value", ImmutableMap.of("to", 0L, "mode", "incremental_time"))), schema, false); 84 | fail(); 85 | } 86 | catch (Throwable t) { 87 | assertTrue(t instanceof ConfigException); 88 | } 89 | } 90 | { // {mode: incremental_time} 91 | schema = schema("_c0", Types.STRING, "_c1", Types.LONG); 92 | try { 93 | FieldWriterSet.createWithValidation(pluginTask(config.set("time_value", ImmutableMap.of("mode", "incremental_time"))), schema, false); 94 | fail(); 95 | } 96 | catch (Throwable t) { 97 | assertTrue(t instanceof ConfigException); 98 | } 99 | } 100 | 101 | // fixed_time 102 | { // {value: 0, mode: fixed_time} 103 | schema = schema("_c0", Types.STRING, "_c1", Types.LONG); 104 | FieldWriterSet.createWithValidation(pluginTask(config.set("time_value", ImmutableMap.of("value", 0L, "mode", "fixed_time"))), schema, false); 105 | } 106 | { // {mode: fixed_time} 107 | schema = schema("_c0", Types.STRING, "_c1", Types.LONG); 108 | try { 109 | FieldWriterSet.createWithValidation(pluginTask(config.set("time_value", ImmutableMap.of("mode", "fixed_time"))), schema, false); 110 | } 111 | catch (Throwable t) { 112 | assertTrue(t instanceof ConfigException); 113 | } 114 | } 115 | { // {value: 0} 116 | schema = schema("_c0", Types.STRING, "_c1", Types.LONG); 117 | try { 118 | FieldWriterSet.createWithValidation(pluginTask(config.set("time_value", ImmutableMap.of("value", 0L))), schema, false); 119 | } 120 | catch (Throwable t) { 121 | assertTrue(t instanceof ConfigException); 122 | } 123 | } 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/td/writer/TestArrayFieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.PageReader; 6 | import org.junit.Test; 7 | import org.junit.runner.RunWith; 8 | import org.mockito.Mock; 9 | import org.mockito.runners.MockitoJUnitRunner; 10 | import org.msgpack.value.impl.ImmutableStringValueImpl; 11 | 12 | import java.io.IOException; 13 | 14 | import static org.mockito.Mockito.verify; 15 | import static org.mockito.Mockito.when; 16 | 17 | @RunWith(MockitoJUnitRunner.class) 18 | public class TestArrayFieldWriter 19 | { 20 | private static final String KEY_NAME = "key_name"; 21 | 22 | @Mock 23 | private MsgpackGZFileBuilder builder; 24 | 25 | @Mock 26 | private PageReader reader; 27 | 28 | @Mock 29 | private Column column; 30 | 31 | private ArrayFieldWriter writer = new ArrayFieldWriter(KEY_NAME); 32 | 33 | @Test 34 | public void testWriteJsonValue() throws IOException 35 | { 36 | ImmutableStringValueImpl value = new ImmutableStringValueImpl("json_value"); 37 | when(reader.getJson(column)).thenReturn(value); 38 | writer.writeJsonValue(builder, reader, column); 39 | verify(builder).writeValue(value); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/td/writer/TestBooleanFieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.apache.commons.lang3.StringUtils; 4 | import org.embulk.output.td.MsgpackGZFileBuilder; 5 | import org.embulk.spi.Column; 6 | import org.embulk.spi.DataException; 7 | import org.embulk.spi.PageReader; 8 | import org.junit.Test; 9 | import org.junit.runner.RunWith; 10 | import org.mockito.Mock; 11 | import org.mockito.runners.MockitoJUnitRunner; 12 | 13 | import java.io.IOException; 14 | 15 | import static org.mockito.Mockito.verify; 16 | import static org.mockito.Mockito.when; 17 | 18 | @RunWith(MockitoJUnitRunner.class) 19 | public class TestBooleanFieldWriter 20 | { 21 | private static final String KEY_NAME = "key_name"; 22 | 23 | @Mock 24 | private MsgpackGZFileBuilder builder; 25 | 26 | @Mock 27 | private PageReader reader; 28 | 29 | @Mock 30 | private Column column; 31 | 32 | private BooleanFieldWriter writer = new BooleanFieldWriter(KEY_NAME); 33 | 34 | @Test 35 | public void testWriteBooleanValue() throws IOException 36 | { 37 | when(reader.getBoolean(column)).thenReturn(true); 38 | writer.writeBooleanValue(builder, reader, column); 39 | verify(builder).writeBoolean(true); 40 | } 41 | 42 | @Test 43 | public void testWriteLongValue() throws IOException 44 | { 45 | // write true if the long value is not equal to 0 46 | { 47 | when(reader.getLong(column)).thenReturn(10L); 48 | writer.writeLongValue(builder, reader, column); 49 | verify(builder).writeBoolean(true); 50 | } 51 | 52 | // write false if long value is equal to 0 53 | { 54 | when(reader.getLong(column)).thenReturn(0L); 55 | writer.writeLongValue(builder, reader, column); 56 | verify(builder).writeBoolean(false); 57 | } 58 | } 59 | 60 | @Test 61 | public void testWriteDoubleValue() throws IOException 62 | { 63 | // write true if the truncation of double value is not equal to 0 64 | { 65 | when(reader.getDouble(column)).thenReturn(10.0); 66 | writer.writeDoubleValue(builder, reader, column); 67 | verify(builder).writeBoolean(true); 68 | } 69 | 70 | // write false if the round to nearest of double value is equal to 0 71 | { 72 | when(reader.getDouble(column)).thenReturn(0.5); 73 | writer.writeDoubleValue(builder, reader, column); 74 | verify(builder).writeBoolean(false); 75 | } 76 | } 77 | 78 | @Test 79 | public void testWriteStringValue() throws IOException 80 | { 81 | // write true if the length of string value is larger than 0 82 | { 83 | when(reader.getString(column)).thenReturn("larger_than_0"); 84 | writer.writeStringValue(builder, reader, column); 85 | verify(builder).writeBoolean(true); 86 | } 87 | 88 | // write false if the length of string value is equal to 0 89 | { 90 | when(reader.getString(column)).thenReturn(StringUtils.EMPTY); 91 | writer.writeStringValue(builder, reader, column); 92 | verify(builder).writeBoolean(false); 93 | } 94 | } 95 | 96 | @Test(expected = DataException.class) 97 | public void testWriteTimestampValue() 98 | { 99 | writer.writeTimestampValue(builder, reader, column); 100 | } 101 | 102 | @Test(expected = DataException.class) 103 | public void testWriteJsonValue() 104 | { 105 | writer.writeJsonValue(builder, reader, column); 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/td/writer/TestDoubleFieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.DataException; 6 | import org.embulk.spi.PageReader; 7 | import org.junit.Test; 8 | import org.junit.runner.RunWith; 9 | import org.mockito.Mock; 10 | import org.mockito.runners.MockitoJUnitRunner; 11 | 12 | import java.io.IOException; 13 | 14 | import static org.mockito.Mockito.verify; 15 | import static org.mockito.Mockito.when; 16 | 17 | @RunWith(MockitoJUnitRunner.class) 18 | public class TestDoubleFieldWriter 19 | { 20 | private static final String KEY_NAME = "key_name"; 21 | 22 | @Mock 23 | private MsgpackGZFileBuilder builder; 24 | 25 | @Mock 26 | private PageReader reader; 27 | 28 | @Mock 29 | private Column column; 30 | 31 | private DoubleFieldWriter writer = new DoubleFieldWriter(KEY_NAME); 32 | 33 | @Test 34 | public void testWriteBooleanValue() throws IOException 35 | { 36 | // write 1.0 if the boolean value is true 37 | { 38 | when(reader.getBoolean(column)).thenReturn(true); 39 | writer.writeBooleanValue(builder, reader, column); 40 | verify(builder).writeDouble(1.0); 41 | } 42 | // write 0.0 if the boolean value is false 43 | { 44 | when(reader.getBoolean(column)).thenReturn(false); 45 | writer.writeBooleanValue(builder, reader, column); 46 | verify(builder).writeDouble(0.0); 47 | } 48 | } 49 | 50 | @Test 51 | public void testWriteLongValue() throws IOException 52 | { 53 | when(reader.getLong(column)).thenReturn(10L); 54 | writer.writeLongValue(builder, reader, column); 55 | verify(builder).writeDouble(10); 56 | } 57 | 58 | @Test 59 | public void testWriteDoubleValue() throws IOException 60 | { 61 | when(reader.getDouble(column)).thenReturn(50.5); 62 | writer.writeDoubleValue(builder, reader, column); 63 | verify(builder).writeDouble(50.5); 64 | } 65 | 66 | @Test 67 | public void testWriteStringValue() throws IOException 68 | { 69 | when(reader.getString(column)).thenReturn("100.1"); 70 | writer.writeStringValue(builder, reader, column); 71 | verify(builder).writeDouble(100.1); 72 | } 73 | 74 | @Test(expected = DataException.class) 75 | public void testWriteTimestampValue() 76 | { 77 | writer.writeTimestampValue(builder, reader, column); 78 | } 79 | 80 | @Test(expected = DataException.class) 81 | public void testWriteJsonValue() 82 | { 83 | writer.writeJsonValue(builder, reader, column); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/td/writer/TestFieldWriterSet.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import com.fasterxml.jackson.databind.node.JsonNodeFactory; 4 | import com.fasterxml.jackson.databind.node.ObjectNode; 5 | import com.google.common.collect.ImmutableMap; 6 | import org.embulk.EmbulkTestRuntime; 7 | import org.embulk.config.ConfigException; 8 | import org.embulk.config.ConfigSource; 9 | import org.embulk.spi.Schema; 10 | import org.embulk.spi.type.Types; 11 | import org.junit.Before; 12 | import org.junit.Rule; 13 | import org.junit.Test; 14 | import org.slf4j.Logger; 15 | import org.slf4j.LoggerFactory; 16 | 17 | import static org.embulk.output.td.TestTdOutputPlugin.config; 18 | import static org.embulk.output.td.TestTdOutputPlugin.pluginTask; 19 | import static org.embulk.output.td.TestTdOutputPlugin.schema; 20 | import static org.junit.Assert.assertTrue; 21 | import static org.junit.Assert.fail; 22 | 23 | public class TestFieldWriterSet 24 | { 25 | @Rule 26 | public EmbulkTestRuntime runtime = new EmbulkTestRuntime(); 27 | 28 | private Logger log; 29 | private ConfigSource config; 30 | private Schema schema; 31 | 32 | @Before 33 | public void createResources() 34 | { 35 | log = LoggerFactory.getLogger(TestFieldWriterSet.class); 36 | config = config(); 37 | } 38 | 39 | @Test 40 | public void validateFieldWriterSet() 41 | { 42 | { // if schema doesn't have appropriate time column, it throws ConfigError. 43 | schema = schema("_c0", Types.STRING, "time", Types.STRING); // not long or timestamp 44 | try { 45 | FieldWriterSet.createWithValidation(pluginTask(config), schema, false); 46 | fail(); 47 | } 48 | catch (Throwable t) { 49 | assertTrue(t instanceof ConfigException); 50 | } 51 | } 52 | 53 | { // if schema doesn't have a column specified as time_column column, it throws ConfigError 54 | schema = schema("_c0", Types.STRING, "_c1", Types.STRING); 55 | try { 56 | FieldWriterSet.createWithValidation(pluginTask(config.deepCopy().set("time_column", "_c2")), schema, false); 57 | fail(); 58 | } 59 | catch (Throwable t) { 60 | assertTrue(t instanceof ConfigException); 61 | } 62 | } 63 | 64 | { // if time_column column is not appropriate column type, it throws ConfigError. 65 | schema = schema("_c0", Types.STRING, "_c1", Types.STRING); 66 | try { 67 | FieldWriterSet.createWithValidation(pluginTask(config.deepCopy().set("time_column", "_c1")), schema, false); 68 | fail(); 69 | } 70 | catch (Throwable t) { 71 | assertTrue(t instanceof ConfigException); 72 | } 73 | } 74 | 75 | { // if both of time_column and time_value are specified, it throws ConfigError. 76 | schema = schema("_c0", Types.STRING, "_c1", Types.LONG); 77 | try { 78 | FieldWriterSet.createWithValidation(pluginTask(config.deepCopy().set("time_column", "_c1").set("time_value", ImmutableMap.of("from", 0L, "to", 0L))), schema, false); 79 | fail(); 80 | } 81 | catch (Throwable t) { 82 | assertTrue(t instanceof ConfigException); 83 | } 84 | } 85 | } 86 | 87 | @Test 88 | public void hasTimeColumn() 89 | { 90 | { // time column (timestamp type) exists 91 | Schema schema = schema("time", Types.TIMESTAMP, "_c0", Types.TIMESTAMP); 92 | FieldWriterSet writers = FieldWriterSet.createWithValidation(pluginTask(config), schema, false); 93 | 94 | assertTrue(writers.getFieldWriter(0) instanceof LongFieldWriter); 95 | } 96 | 97 | { // time column (long type) exists 98 | Schema schema = schema("time", Types.LONG, "_c0", Types.TIMESTAMP); 99 | FieldWriterSet writers = FieldWriterSet.createWithValidation(pluginTask(config), schema, false); 100 | 101 | assertTrue(writers.getFieldWriter(0) instanceof UnixTimestampLongFieldWriter); 102 | 103 | } 104 | } 105 | 106 | @Test 107 | public void specifiedTimeColumn() 108 | { 109 | { // time_column option (timestamp type) 110 | Schema schema = schema("_c0", Types.TIMESTAMP, "_c1", Types.STRING); 111 | FieldWriterSet writers = FieldWriterSet.createWithValidation(pluginTask(config.deepCopy().set("time_column", "_c0")), schema, false); 112 | 113 | assertTrue(writers.getFieldWriter(0) instanceof TimestampFieldLongDuplicator); 114 | } 115 | 116 | { // time_column option (long type) 117 | Schema schema = schema("_c0", Types.LONG, "_c1", Types.STRING); 118 | FieldWriterSet writers = FieldWriterSet.createWithValidation(pluginTask(config.deepCopy().set("time_column", "_c0")), schema, false); 119 | 120 | assertTrue(writers.getFieldWriter(0) instanceof UnixTimestampFieldDuplicator); 121 | } 122 | 123 | { // time_column option (typestamp type) if time column exists 124 | Schema schema = schema("_c0", Types.TIMESTAMP, "time", Types.TIMESTAMP); 125 | FieldWriterSet writers = FieldWriterSet.createWithValidation(pluginTask(config.deepCopy().set("time_column", "_c0")), schema, false); 126 | 127 | assertTrue(writers.getFieldWriter(0) instanceof TimestampFieldLongDuplicator); // c0 128 | assertTrue(writers.getFieldWriter(1) instanceof StringFieldWriter); // renamed column 129 | } 130 | 131 | { // time_column option (long type) if time column exists 132 | Schema schema = schema("_c0", Types.LONG, "time", Types.TIMESTAMP); 133 | FieldWriterSet writers = FieldWriterSet.createWithValidation(pluginTask(config.deepCopy().set("time_column", "_c0")), schema, false); 134 | 135 | assertTrue(writers.getFieldWriter(0) instanceof UnixTimestampFieldDuplicator); // c0 136 | assertTrue(writers.getFieldWriter(1) instanceof StringFieldWriter); // renamed column 137 | } 138 | 139 | { // time_column option (long type) is ignored if time column exists and ignore_alternative_time is enabled 140 | Schema schema = schema("_c0", Types.LONG, "time", Types.TIMESTAMP); 141 | FieldWriterSet writers = FieldWriterSet.createWithValidation(pluginTask(config 142 | .deepCopy() 143 | .set("time_column", "_c0") 144 | .set("ignore_alternative_time_if_time_exists", true)), schema, false); 145 | 146 | assertTrue(writers.getFieldWriter(0) instanceof LongFieldWriter); // c0 147 | assertTrue(writers.getFieldWriter(1) instanceof LongFieldWriter); // time primary key 148 | } 149 | } 150 | 151 | @Test 152 | public void useDefaultTimestampTypeConvertTo() 153 | { 154 | { // if not specify default_timestamp_type_convert_to, use string by default 155 | Schema schema = schema("_c0", Types.TIMESTAMP, "time", Types.TIMESTAMP); 156 | FieldWriterSet writers = FieldWriterSet.createWithValidation(pluginTask(config.deepCopy()), schema, false); 157 | 158 | assertTrue(writers.getFieldWriter(0) instanceof StringFieldWriter); // c0 159 | assertTrue(writers.getFieldWriter(1) instanceof LongFieldWriter); // time 160 | } 161 | 162 | { // and use time_column option 163 | Schema schema = schema("_c0", Types.TIMESTAMP, "time", Types.TIMESTAMP); 164 | FieldWriterSet writers = FieldWriterSet.createWithValidation(pluginTask(config.deepCopy().set("time_column", "_c0")), schema, false); 165 | 166 | assertTrue(writers.getFieldWriter(0) instanceof TimestampFieldLongDuplicator); // c0 167 | assertTrue(writers.getFieldWriter(1) instanceof StringFieldWriter); // time renamed 168 | } 169 | 170 | { // if default_timestamp_type_convert_to is string, use string 171 | Schema schema = schema("_c0", Types.TIMESTAMP, "time", Types.TIMESTAMP); 172 | FieldWriterSet writers = FieldWriterSet.createWithValidation(pluginTask(config.deepCopy().set("default_timestamp_type_convert_to", "string")), schema, false); 173 | 174 | assertTrue(writers.getFieldWriter(0) instanceof StringFieldWriter); // c0 175 | assertTrue(writers.getFieldWriter(1) instanceof LongFieldWriter); // time 176 | } 177 | 178 | { // and use time_column option 179 | Schema schema = schema("_c0", Types.TIMESTAMP, "time", Types.TIMESTAMP); 180 | FieldWriterSet writers = FieldWriterSet.createWithValidation(pluginTask(config.deepCopy().set("default_timestamp_type_convert_to", "string").set("time_column", "_c0")), schema, false); 181 | 182 | assertTrue(writers.getFieldWriter(0) instanceof TimestampFieldLongDuplicator); // c0 183 | assertTrue(writers.getFieldWriter(1) instanceof StringFieldWriter); // time renamed 184 | } 185 | 186 | { // if default_timestamp_type_conver_to is sec, use long 187 | Schema schema = schema("_c0", Types.TIMESTAMP, "time", Types.TIMESTAMP); 188 | FieldWriterSet writers = FieldWriterSet.createWithValidation(pluginTask(config.deepCopy().set("default_timestamp_type_convert_to", "sec")), schema, false); 189 | 190 | assertTrue(writers.getFieldWriter(0) instanceof LongFieldWriter); // c0 191 | assertTrue(writers.getFieldWriter(1) instanceof LongFieldWriter); // time 192 | } 193 | 194 | { // and use time_column option 195 | Schema schema = schema("_c0", Types.TIMESTAMP, "time", Types.TIMESTAMP); 196 | FieldWriterSet writers = FieldWriterSet.createWithValidation(pluginTask(config.deepCopy().set("default_timestamp_type_convert_to", "sec").set("time_column", "_c0")), schema, false); 197 | 198 | assertTrue(writers.getFieldWriter(0) instanceof TimestampFieldLongDuplicator); // c0 199 | assertTrue(writers.getFieldWriter(1) instanceof LongFieldWriter); // time renamed 200 | } 201 | } 202 | 203 | @Test 204 | public void useFirstTimestampColumn() 205 | throws Exception 206 | { 207 | Schema schema = schema("_c0", Types.TIMESTAMP, "_c1", Types.LONG); 208 | FieldWriterSet writers = FieldWriterSet.createWithValidation(pluginTask(config), schema, false); 209 | 210 | assertTrue(writers.getFieldWriter(0) instanceof StringFieldWriter); // c0 211 | assertTrue(writers.getFieldWriter(1) instanceof LongFieldWriter); // c1 212 | } 213 | 214 | @Test 215 | public void useColumnOptions() 216 | { 217 | Schema schema = schema("col_long", Types.LONG, 218 | "col_val_type_double", Types.JSON, 219 | "col_val_type_long", Types.STRING, 220 | "col_val_type_boolean", Types.STRING, 221 | "col_val_type_string", Types.JSON, 222 | "col_val_type_timestamp", Types.STRING 223 | ); 224 | ImmutableMap columnOptions = ImmutableMap.of( 225 | "col_val_type_double", newObjectNode().put("type", "double").put("value_type", "double"), 226 | "col_val_type_long", newObjectNode().put("type", "long").put("value_type", "long"), 227 | "col_val_type_boolean", newObjectNode().put("type", "boolean").put("value_type", "boolean"), 228 | "col_val_type_string", newObjectNode().put("type", "string").put("value_type", "string"), 229 | "col_val_type_timestamp", newObjectNode().put("type", "timestamp").put("value_type", "timestamp") 230 | ); 231 | 232 | FieldWriterSet writers = FieldWriterSet.createWithValidation(pluginTask(config.deepCopy() 233 | .set("column_options", columnOptions) 234 | .set("default_timestamp_type_convert_to", "string")), schema, false); 235 | 236 | assertTrue(writers.getFieldWriter(0) instanceof LongFieldWriter); 237 | assertTrue(writers.getFieldWriter(1) instanceof DoubleFieldWriter); 238 | assertTrue(writers.getFieldWriter(2) instanceof LongFieldWriter); 239 | assertTrue(writers.getFieldWriter(3) instanceof BooleanFieldWriter); 240 | assertTrue(writers.getFieldWriter(4) instanceof StringFieldWriter); 241 | assertTrue(writers.getFieldWriter(5) instanceof StringFieldWriter); 242 | } 243 | 244 | static ObjectNode newObjectNode() 245 | { 246 | return JsonNodeFactory.instance.objectNode(); 247 | } 248 | } 249 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/td/writer/TestJsonFieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.DataException; 6 | import org.embulk.spi.PageReader; 7 | import org.junit.Test; 8 | import org.junit.runner.RunWith; 9 | import org.mockito.Mock; 10 | import org.mockito.runners.MockitoJUnitRunner; 11 | import org.msgpack.value.impl.ImmutableStringValueImpl; 12 | 13 | import java.io.IOException; 14 | 15 | import static org.mockito.Mockito.verify; 16 | import static org.mockito.Mockito.when; 17 | 18 | @RunWith(MockitoJUnitRunner.class) 19 | public class TestJsonFieldWriter 20 | { 21 | private static final String KEY_NAME = "key_name"; 22 | 23 | @Mock 24 | private MsgpackGZFileBuilder builder; 25 | 26 | @Mock 27 | private PageReader reader; 28 | 29 | @Mock 30 | private Column column; 31 | 32 | private JsonFieldWriter writer = new JsonFieldWriter(KEY_NAME); 33 | 34 | @Test(expected = DataException.class) 35 | public void testWriteBooleanValue() 36 | { 37 | writer.writeBooleanValue(builder, reader, column); 38 | } 39 | 40 | @Test(expected = DataException.class) 41 | public void testWriteLongValue() 42 | { 43 | writer.writeLongValue(builder, reader, column); 44 | } 45 | 46 | @Test(expected = DataException.class) 47 | public void testWriteDoubleValue() 48 | { 49 | writer.writeDoubleValue(builder, reader, column); 50 | } 51 | 52 | @Test(expected = DataException.class) 53 | public void testWriteStringValue() 54 | { 55 | writer.writeStringValue(builder, reader, column); 56 | } 57 | 58 | @Test(expected = DataException.class) 59 | public void testWriteTimestampValue() 60 | { 61 | writer.writeTimestampValue(builder, reader, column); 62 | } 63 | 64 | @Test 65 | public void testWriteJsonValue() throws IOException 66 | { 67 | when(reader.getJson(column)).thenReturn(new ImmutableStringValueImpl("json_value")); 68 | writer.writeJsonValue(builder, reader, column); 69 | verify(builder).writeString("\"json_value\""); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/td/writer/TestLongFieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.DataException; 6 | import org.embulk.spi.PageReader; 7 | import org.junit.Test; 8 | import org.junit.runner.RunWith; 9 | import org.mockito.Mock; 10 | import org.mockito.runners.MockitoJUnitRunner; 11 | 12 | import java.io.IOException; 13 | import java.time.Instant; 14 | 15 | import static org.mockito.Mockito.verify; 16 | import static org.mockito.Mockito.when; 17 | 18 | @RunWith(MockitoJUnitRunner.class) 19 | public class TestLongFieldWriter 20 | { 21 | private static final String KEY_NAME = "key_name"; 22 | 23 | @Mock 24 | private MsgpackGZFileBuilder builder; 25 | 26 | @Mock 27 | private PageReader reader; 28 | 29 | @Mock 30 | private Column column; 31 | 32 | private LongFieldWriter writer = new LongFieldWriter(KEY_NAME); 33 | 34 | @Test 35 | public void testWriteBooleanValue() throws IOException 36 | { 37 | // write 1 if the boolean value is true 38 | { 39 | when(reader.getBoolean(column)).thenReturn(true); 40 | writer.writeBooleanValue(builder, reader, column); 41 | verify(builder).writeLong(1); 42 | } 43 | // write 0 if the boolean value is false 44 | { 45 | when(reader.getBoolean(column)).thenReturn(false); 46 | writer.writeBooleanValue(builder, reader, column); 47 | verify(builder).writeLong(0); 48 | } 49 | } 50 | 51 | @Test 52 | public void testWriteLongValue() throws IOException 53 | { 54 | when(reader.getLong(column)).thenReturn(10L); 55 | writer.writeLongValue(builder, reader, column); 56 | verify(builder).writeLong(10); 57 | } 58 | 59 | @Test 60 | public void testWriteDoubleValue() throws IOException 61 | { 62 | when(reader.getDouble(column)).thenReturn(50.5); 63 | writer.writeDoubleValue(builder, reader, column); 64 | verify(builder).writeLong(50); 65 | } 66 | 67 | @Test 68 | public void testWriteStringValue() throws IOException 69 | { 70 | when(reader.getString(column)).thenReturn("100"); 71 | writer.writeStringValue(builder, reader, column); 72 | verify(builder).writeLong(100); 73 | } 74 | 75 | @Test 76 | public void testWriteTimestampValue() throws IOException 77 | { 78 | when(reader.getTimestampInstant(column)).thenReturn(Instant.ofEpochSecond(200)); 79 | writer.writeTimestampValue(builder, reader, column); 80 | verify(builder).writeLong(200); 81 | } 82 | 83 | @Test(expected = DataException.class) 84 | public void testWriteJsonValue() 85 | { 86 | writer.writeJsonValue(builder, reader, column); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/td/writer/TestMapFieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.PageReader; 6 | import org.junit.Test; 7 | import org.junit.runner.RunWith; 8 | import org.mockito.Mock; 9 | import org.mockito.runners.MockitoJUnitRunner; 10 | import org.msgpack.value.impl.ImmutableStringValueImpl; 11 | 12 | import java.io.IOException; 13 | 14 | import static org.mockito.Mockito.verify; 15 | import static org.mockito.Mockito.when; 16 | 17 | @RunWith(MockitoJUnitRunner.class) 18 | public class TestMapFieldWriter 19 | { 20 | private static final String KEY_NAME = "key_name"; 21 | 22 | @Mock 23 | private MsgpackGZFileBuilder builder; 24 | 25 | @Mock 26 | private PageReader reader; 27 | 28 | @Mock 29 | private Column column; 30 | 31 | private MapFieldWriter writer = new MapFieldWriter(KEY_NAME); 32 | 33 | @Test 34 | public void testWriteJsonValue() throws IOException 35 | { 36 | ImmutableStringValueImpl value = new ImmutableStringValueImpl("json_value"); 37 | when(reader.getJson(column)).thenReturn(value); 38 | writer.writeJsonValue(builder, reader, column); 39 | verify(builder).writeValue(value); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/td/writer/TestStringFieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.PageReader; 6 | import org.embulk.util.timestamp.TimestampFormatter; 7 | import org.junit.Test; 8 | import org.junit.runner.RunWith; 9 | import org.mockito.Mock; 10 | import org.mockito.runners.MockitoJUnitRunner; 11 | import org.msgpack.value.impl.ImmutableStringValueImpl; 12 | 13 | import java.io.IOException; 14 | import java.time.Instant; 15 | 16 | import static org.mockito.Mockito.verify; 17 | import static org.mockito.Mockito.when; 18 | 19 | @RunWith(MockitoJUnitRunner.class) 20 | public class TestStringFieldWriter 21 | { 22 | private static final String KEY_NAME = "key_name"; 23 | 24 | @Mock 25 | private MsgpackGZFileBuilder builder; 26 | 27 | @Mock 28 | private PageReader reader; 29 | 30 | @Mock 31 | private Column column; 32 | 33 | private StringFieldWriter writer = new StringFieldWriter(KEY_NAME, null); 34 | 35 | @Test 36 | public void testWriteBooleanValue() throws IOException 37 | { 38 | // write 'true' if the boolean value is true 39 | { 40 | when(reader.getBoolean(column)).thenReturn(true); 41 | writer.writeBooleanValue(builder, reader, column); 42 | verify(builder).writeString("true"); 43 | } 44 | // write 'false' if the boolean value is false 45 | { 46 | when(reader.getBoolean(column)).thenReturn(false); 47 | writer.writeBooleanValue(builder, reader, column); 48 | verify(builder).writeString("false"); 49 | } 50 | } 51 | 52 | @Test 53 | public void testWriteLongValue() throws IOException 54 | { 55 | when(reader.getLong(column)).thenReturn(10L); 56 | writer.writeLongValue(builder, reader, column); 57 | verify(builder).writeString("10"); 58 | } 59 | 60 | @Test 61 | public void testWriteDoubleValue() throws IOException 62 | { 63 | when(reader.getDouble(column)).thenReturn(50.5); 64 | writer.writeDoubleValue(builder, reader, column); 65 | verify(builder).writeString("50.5"); 66 | } 67 | 68 | @Test 69 | public void testWriteStringValue() throws IOException 70 | { 71 | when(reader.getString(column)).thenReturn("a string"); 72 | writer.writeStringValue(builder, reader, column); 73 | verify(builder).writeString("a string"); 74 | } 75 | 76 | @Test 77 | public void testWriteTimestampValue() throws IOException 78 | { 79 | writer = new StringFieldWriter(KEY_NAME, TimestampFormatter.builderWithJava("yyyy-MM-dd").build()); 80 | when(reader.getTimestampInstant(column)).thenReturn(Instant.ofEpochSecond(200)); 81 | writer.writeTimestampValue(builder, reader, column); 82 | verify(builder).writeString("1970-01-01"); 83 | } 84 | 85 | @Test 86 | public void testWriteJsonValue() throws IOException 87 | { 88 | when(reader.getJson(column)).thenReturn(new ImmutableStringValueImpl("json_value")); 89 | writer.writeJsonValue(builder, reader, column); 90 | verify(builder).writeString("json_value"); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/td/writer/TestTimestampFieldLongDuplicator.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.spi.Column; 5 | import org.embulk.spi.PageReader; 6 | import org.embulk.spi.type.Types; 7 | import org.junit.Before; 8 | import org.junit.Test; 9 | import org.junit.runner.RunWith; 10 | import org.mockito.Mock; 11 | import org.mockito.runners.MockitoJUnitRunner; 12 | 13 | import java.io.IOException; 14 | 15 | import static org.mockito.Mockito.verify; 16 | import static org.mockito.Mockito.when; 17 | 18 | @RunWith(MockitoJUnitRunner.class) 19 | public class TestTimestampFieldLongDuplicator 20 | { 21 | private static final String KEY_NAME = "key_name"; 22 | 23 | @Mock 24 | private IFieldWriter nextWriter; 25 | 26 | @Mock 27 | private MsgpackGZFileBuilder builder; 28 | 29 | @Mock 30 | private PageReader reader; 31 | 32 | @Mock 33 | private Column column; 34 | 35 | private TimestampFieldLongDuplicator timestampFieldLongDuplicator; 36 | 37 | @Before 38 | public void setUp() 39 | { 40 | timestampFieldLongDuplicator = new TimestampFieldLongDuplicator(nextWriter, KEY_NAME); 41 | } 42 | 43 | @Test 44 | public void testWriteKeyValue() throws IOException 45 | { 46 | when(reader.getLong(column)).thenReturn(10L); 47 | when(column.getType()).thenReturn(Types.LONG); 48 | timestampFieldLongDuplicator.writeKeyValue(builder, reader, column); 49 | verify(nextWriter).writeKeyValue(builder, reader, column); 50 | verify(builder).writeLong(10); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/td/writer/TestUnixTimestampFieldDuplicator.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.output.td.TdOutputPlugin; 5 | import org.embulk.spi.Column; 6 | import org.embulk.spi.PageReader; 7 | import org.embulk.spi.type.Types; 8 | import org.junit.Before; 9 | import org.junit.Test; 10 | import org.junit.runner.RunWith; 11 | import org.mockito.Mock; 12 | import org.mockito.runners.MockitoJUnitRunner; 13 | 14 | import java.io.IOException; 15 | 16 | import static org.mockito.Mockito.verify; 17 | import static org.mockito.Mockito.when; 18 | 19 | @RunWith(MockitoJUnitRunner.class) 20 | public class TestUnixTimestampFieldDuplicator 21 | { 22 | private static final String KEY_NAME = "key_name"; 23 | 24 | @Mock 25 | private IFieldWriter nextWriter; 26 | 27 | @Mock 28 | private MsgpackGZFileBuilder builder; 29 | 30 | @Mock 31 | private PageReader reader; 32 | 33 | @Mock 34 | private Column column; 35 | 36 | private UnixTimestampFieldDuplicator timestampFieldLongDuplicator; 37 | 38 | @Before 39 | public void setUp() 40 | { 41 | timestampFieldLongDuplicator = new UnixTimestampFieldDuplicator(nextWriter, KEY_NAME, TdOutputPlugin.UnixTimestampUnit.MILLI.getFractionUnit()); 42 | } 43 | 44 | @Test 45 | public void testWriteKeyValue() throws IOException 46 | { 47 | when(reader.getLong(column)).thenReturn(1000L); 48 | when(column.getType()).thenReturn(Types.LONG); 49 | timestampFieldLongDuplicator.writeKeyValue(builder, reader, column); 50 | verify(nextWriter).writeKeyValue(builder, reader, column); 51 | verify(builder).writeLong(1); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/td/writer/TestUnixTimestampLongFieldWriter.java: -------------------------------------------------------------------------------- 1 | package org.embulk.output.td.writer; 2 | 3 | import org.embulk.output.td.MsgpackGZFileBuilder; 4 | import org.embulk.output.td.TdOutputPlugin; 5 | import org.embulk.spi.Column; 6 | import org.embulk.spi.PageReader; 7 | import org.junit.Test; 8 | import org.junit.runner.RunWith; 9 | import org.mockito.Mock; 10 | import org.mockito.runners.MockitoJUnitRunner; 11 | 12 | import java.io.IOException; 13 | 14 | import static org.mockito.Mockito.verify; 15 | import static org.mockito.Mockito.when; 16 | 17 | @RunWith(MockitoJUnitRunner.class) 18 | public class TestUnixTimestampLongFieldWriter 19 | { 20 | private static final String KEY_NAME = "key_name"; 21 | 22 | @Mock 23 | private MsgpackGZFileBuilder builder; 24 | 25 | @Mock 26 | private PageReader reader; 27 | 28 | @Mock 29 | private Column column; 30 | 31 | private UnixTimestampLongFieldWriter writer = new UnixTimestampLongFieldWriter(KEY_NAME, TdOutputPlugin.UnixTimestampUnit.MILLI.getFractionUnit()); 32 | 33 | @Test 34 | public void testWriteLongValue() throws IOException 35 | { 36 | when(reader.getLong(column)).thenReturn(1000L); 37 | writer.writeLongValue(builder, reader, column); 38 | verify(builder).writeLong(1); 39 | } 40 | } 41 | --------------------------------------------------------------------------------