├── .github └── workflows │ └── build.yml ├── .gitignore ├── LICENSE ├── NOTICE ├── NOTICE_GEM ├── README.md ├── build.gradle ├── config └── checkstyle │ ├── checkstyle.xml │ └── default.xml ├── gradle ├── dependency-locks │ └── embulkPluginRuntime.lockfile └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat └── src ├── main └── java │ └── org │ └── embulk │ └── output │ └── gcs │ ├── AuthMethod.java │ ├── GcsAuthentication.java │ ├── GcsOutputPlugin.java │ ├── GcsTransactionalFileOutput.java │ └── PluginTask.java └── test ├── java └── org │ └── embulk │ └── output │ └── gcs │ ├── TestGcsAuthentication.java │ └── TestGcsOutputPlugin.java └── resources └── sample_01.csv /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build and test 2 | on: push 3 | jobs: 4 | build: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v1 8 | - name: Set up JDK 1.8 9 | uses: actions/setup-java@v1 10 | with: 11 | java-version: 1.8 12 | - name: Test 13 | env: 14 | GCP_EMAIL: ${{secrets.GCP_EMAIL}} 15 | GCP_JSON_KEYFILE: ${{secrets.GCP_JSON_KEYFILE}} 16 | GCP_PRIVATE_KEYFILE: ${{secrets.GCP_PRIVATE_KEYFILE}} 17 | GCP_BUCKET: ${{secrets.GCP_BUCKET}} 18 | GCP_BUCKET_DIRECTORY: ${{secrets.GCP_BUCKET_DIRECTORY}} 19 | run: ./gradlew --stacktrace test 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | /pkg/ 3 | /tmp/ 4 | .gradle/ 5 | /classpath/ 6 | build/ 7 | .idea 8 | *.iml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | embulk-output-gcs 2 | Copyright 2015 Kazuyuki Honda, and the Embulk project 3 | 4 | This product includes software developed by Kazuyuki Honda, and in a part of the Embulk project (https://www.embulk.org/). 5 | It was originally developed by Kazuyuki Honda (https://github.com/hakobera), and then transferred to the Embulk project. 6 | It was originally licenced under the MIT License before the transfer, and it is now licensed under the Apache Software License, Version 2.0. 7 | -------------------------------------------------------------------------------- /NOTICE_GEM: -------------------------------------------------------------------------------- 1 | embulk-output-gcs 2 | Copyright 2015 Kazuyuki Honda, and the Embulk project 3 | 4 | This product includes software developed by Kazuyuki Honda, and in a part of the Embulk project (https://www.embulk.org/). 5 | It was originally developed by Kazuyuki Honda (https://github.com/hakobera), and then transferred to the Embulk project. 6 | It was originally licenced under the MIT License before the transfer, and it is now licensed under the Apache Software License, Version 2.0. 7 | 8 | The gem distribution of this product includes software developed in a part of the Embulk project (https://www.embulk.org/). 9 | It has been licenced under the Apache Software License, Version 2.0. 10 | 11 | The gem distribution of this product includes JARs of the Jackson project (https://github.com/FasterXML/jackson), as-is. 12 | They are licensed under the Apache Software License, Version 2.0. 13 | 14 | The gem distribution of this product includes JARs of the Jakarta Bean Validation API 1.1 (https://beanvalidation.org/1.1/), as-is. 15 | It is licensed under the Apache Software License, Version 2.0. 16 | 17 | The gem distribution of this product includes JAR of the JSR305 Annotations for Findbugs (https://code.google.com/archive/p/jsr-305/), as-is. 18 | It is licensed under the Apache Software License, Version 2.0. 19 | 20 | The gem distribution of this product includes JARs of Apache HttpClient, HttpCore, and Commons Codec, as-is. 21 | They are licensed under the Apache Software License, Version 2.0. 22 | 23 | The gem distribution of this product includes a JAR of Joda-Time (https://www.joda.org/joda-time/), as-is. 24 | It is licensed under the Apache Software License, Version 2.0. 25 | 26 | The gem distribution of this product includes a JAR of the JCL 1.2 implemented over SLF4J (http://www.slf4j.org/legacy.html) 1.7, as-is. 27 | It is licensed under the Apache Software License, Version 2.0. 28 | 29 | The gem distribution of this product includes a JAR of the ThreeTen Backport (https://www.threeten.org/threetenbp/), as-is. 30 | It is licensed under the 3-Clause "New" BSD License. 31 | 32 | The gem distribution of this product includes JAR of Guava (https://guava.dev/), as-is. 33 | It is licensed under the Apache Software License, Version 2.0. 34 | 35 | The gem distribution of this product includes JAR of GSON (https://github.com/google/gson), as-is. 36 | It is licensed under the Apache Software License, Version 2.0. 37 | 38 | The gem distribution of this product includes JARs of Protocol Buffers (https://developers.google.com/protocol-buffers), as-is. 39 | They are licensed under the 3-Clause "New" BSD License. 40 | 41 | The gem distribution of this product includes a JAR of AutoValue Processor, as-is. 42 | It is licensed under the Apache License, Version 2.0. 43 | 44 | The gem distribution of this product includes JARs of Google Cloud Clients (https://github.com/GoogleCloudPlatform/google-cloud-java/), as-is. 45 | They are licensed under the Apache Software License, Version 2.0. 46 | 47 | The gem distribution of this product includes JARs of Google APIs/HTTP/OAuth Client Libraries, as-is. 48 | They are licensed under the Apache Software License, Version 2.0. 49 | 50 | The gem distribution of this product includes a JAR of GAX (Google Api EXtensions), as-is. 51 | It is licensed under the 3-Clause "New" BSD License. 52 | 53 | The gem distribution of this product includes JARs of Google API Common, as-is. 54 | It is licensed under the 3-Clause "New" BSD License. 55 | 56 | The gem distribution of this product includes JARs of Proto Libraries for Google APIs, as-is. 57 | They are licensed under the Apache Software License, Version 2.0. 58 | 59 | The gem distribution of this product includes a JAR of Cloud Storage JSON API V1, as-is. 60 | It is licensed under the Apache License, Version 2.0. 61 | 62 | The gem distribution of this product includes JARs of Google Auth Libraries, as-is. 63 | They are licensed under the 3-Clause "New" BSD License. 64 | 65 | The gem distribution of this product includes a JAR of JSON in Java (https://github.com/stleary/JSON-java), as-is. 66 | It is licensed under the JSON License. 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/embulk/embulk-output-gcs.svg?branch=master)](https://travis-ci.org/embulk/embulk-output-gcs) 2 | 3 | # Google Cloud Storage output plugin for Embulk 4 | 5 | Google Cloud Storage output plugin for [Embulk](https://github.com/embulk/embulk). 6 | 7 | ## Overview 8 | 9 | * **Plugin type**: file output 10 | * **Load all or nothing**: no 11 | * **Resume supported**: yes 12 | * **Cleanup supported**: no 13 | 14 | - Connector do not support retry in case we have any problem with streaming chanel. In this case, we need to run the job again. 15 | 16 | ## Configuration 17 | 18 | - **bucket**: Google Cloud Storage bucket name (string, required) 19 | - **path_prefix**: Prefix of output keys (string, required) 20 | - **file_ext**: Extention of output file (string, required) 21 | - **sequence_format**: Format of the sequence number of the output files (string, default value is ".%03d.%02d") 22 | - **content_type**: content type of output file (string, optional, default value is "application/octet-stream") 23 | - **auth_method**: Authentication method `private_key`, `json_key` or `compute_engine` (string, optional, default value is "private_key") 24 | - **service_account_email**: Google Cloud Platform service account email (string, required when auth_method is private_key) 25 | - **p12_keyfile**: Private key file fullpath of Google Cloud Platform service account (string, required when auth_method is private_key) 26 | - **json_keyfile** fullpath of json_key (string, required when auth_method is json_key) 27 | - **application_name**: Application name, anything you like (string, optional, default value is "embulk-output-gcs") 28 | - **max_connection_retry**: Number of connection retries to GCS (number, default value is 10) 29 | 30 | ## Example 31 | 32 | ```yaml 33 | out: 34 | type: gcs 35 | bucket: your-gcs-bucket-name 36 | path_prefix: logs/out 37 | file_ext: .csv 38 | auth_method: `private_key` #default 39 | service_account_email: 'XYZ@developer.gserviceaccount.com' 40 | p12_keyfile: '/path/to/private/key.p12' 41 | formatter: 42 | type: csv 43 | encoding: UTF-8 44 | ``` 45 | 46 | ## Authentication 47 | 48 | There are three methods supported to fetch access token for the service account. 49 | 50 | 1. Public-Private key pair of GCP(Google Cloud Platform)'s service account 51 | 2. JSON key of GCP(Google Cloud Platform)'s service account 52 | 3. Pre-defined access token (Google Compute Engine only) 53 | 54 | ### Public-Private key pair of GCP's service account 55 | 56 | You first need to create a service account (client ID), download its private key and deploy the key with embulk. 57 | 58 | ```yaml 59 | out: 60 | type: gcs 61 | auth_method: private_key 62 | service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com 63 | p12_keyfile: /path/to/p12_keyfile.p12 64 | ``` 65 | 66 | ### JSON key of GCP's service account 67 | 68 | You first need to create a service account (client ID), download its json key and deploy the key with embulk. 69 | 70 | ```yaml 71 | out: 72 | type: gcs 73 | auth_method: json_key 74 | json_keyfile: /path/to/json_keyfile.json 75 | ``` 76 | 77 | You can also embed contents of json_keyfile at config.yml. 78 | 79 | ```yaml 80 | out: 81 | type: gcs 82 | auth_method: json_key 83 | json_keyfile: 84 | content: | 85 | { 86 | "private_key_id": "123456789", 87 | "private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF", 88 | "client_email": "..." 89 | } 90 | ``` 91 | 92 | ### Pre-defined access token(GCE only) 93 | 94 | On the other hand, you don't need to explicitly create a service account for embulk when you 95 | run embulk in Google Compute Engine. In this third authentication method, you need to 96 | add the API scope "https://www.googleapis.com/auth/devstorage.read_write" to the scope list of your 97 | Compute Engine VM instance, then you can configure embulk like this. 98 | 99 | [Setting the scope of service account access for instances](https://cloud.google.com/compute/docs/authentication) 100 | 101 | ```yaml 102 | out: 103 | type: gcs 104 | auth_method: compute_engine 105 | ``` 106 | 107 | ## Build 108 | 109 | ``` 110 | $ ./gradlew gem 111 | ``` 112 | 113 | ## Test 114 | 115 | ``` 116 | $ ./gradlew test # -t to watch change of files and rebuild continuously 117 | ``` 118 | 119 | To run unit tests, we need to configure the following environment variables. 120 | 121 | When environment variables are not set, skip almost test cases. 122 | 123 | ``` 124 | GCP_EMAIL 125 | GCP_P12_KEYFILE 126 | GCP_JSON_KEYFILE 127 | GCP_BUCKET 128 | GCP_BUCKET_DIRECTORY(optional, if needed) 129 | ``` 130 | 131 | If you're using Mac OS X El Capitan and GUI Applications(IDE), like as follows. 132 | ``` 133 | $ vi ~/Library/LaunchAgents/environment.plist 134 | 135 | 136 | 137 | 138 | Label 139 | my.startup 140 | ProgramArguments 141 | 142 | sh 143 | -c 144 | 145 | launchctl setenv GCP_EMAIL ABCXYZ123ABCXYZ123.gserviceaccount.com 146 | launchctl setenv GCP_P12_KEYFILE /path/to/p12_keyfile.p12 147 | launchctl setenv GCP_JSON_KEYFILE /path/to/json_keyfile.json 148 | launchctl setenv GCP_BUCKET my-bucket 149 | launchctl setenv GCP_BUCKET_DIRECTORY unittests 150 | 151 | 152 | RunAtLoad 153 | 154 | 155 | 156 | 157 | $ launchctl load ~/Library/LaunchAgents/environment.plist 158 | $ launchctl getenv GCP_EMAIL //try to get value. 159 | 160 | Then start your applications. 161 | ``` 162 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id "java" 3 | id "checkstyle" 4 | id "jacoco" 5 | id "signing" 6 | id "maven-publish" 7 | id "org.embulk.embulk-plugins" version "0.4.2" 8 | } 9 | 10 | repositories { 11 | mavenCentral() 12 | } 13 | 14 | sourceCompatibility = 1.8 15 | targetCompatibility = 1.8 16 | 17 | group = "org.embulk" 18 | version = "0.6.0-SNAPSHOT" 19 | description = "Dumps records to Google Cloud Storage." 20 | 21 | tasks.withType(JavaCompile) { 22 | options.compilerArgs << "-Xlint:deprecation" << "-Xlint:unchecked" 23 | options.encoding = "UTF-8" 24 | } 25 | 26 | java { 27 | withJavadocJar() 28 | withSourcesJar() 29 | } 30 | 31 | def embulkVersion = '0.10.31' 32 | dependencies { 33 | compileOnly "org.embulk:embulk-api:$embulkVersion" 34 | compileOnly "org.embulk:embulk-spi:$embulkVersion" 35 | 36 | compile('org.embulk:embulk-util-config:0.3.1') { 37 | // Conflict with Embulk Core 38 | exclude group: 'com.fasterxml.jackson.core', module: 'jackson-annotations' 39 | exclude group: 'com.fasterxml.jackson.core', module: 'jackson-core' 40 | exclude group: 'com.fasterxml.jackson.core', module: 'jackson-databind' 41 | exclude group: 'com.fasterxml.jackson.datatype', module: 'jackson-datatype-jdk8' 42 | exclude group: 'javax.validation', module: 'validation-api' 43 | } 44 | compile('org.embulk:embulk-util-retryhelper:0.8.2') 45 | 46 | compile 'com.fasterxml.jackson.core:jackson-core:2.6.7' 47 | compile 'com.fasterxml.jackson.core:jackson-annotations:2.6.7' 48 | compile 'com.fasterxml.jackson.core:jackson-databind:2.6.7' 49 | compile 'com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.6.7' 50 | 51 | compile 'javax.validation:validation-api:1.1.0.Final' 52 | compile 'com.google.guava:guava:18.0' 53 | 54 | //we use version 1.2.0 due to prevent conflict guava and jackson version with embulk 55 | compile ("com.google.cloud:google-cloud-storage:1.2.0"){ 56 | exclude group: 'com.google.cloud', module: 'google-cloud-core' 57 | exclude group: 'com.google.cloud', module: 'google-cloud-core-http' 58 | exclude group: "commons-logging", module: "commons-logging" 59 | } 60 | compile ("com.google.cloud:google-cloud-core:1.2.0"){ 61 | exclude group: 'com.google.guava', module: 'guava' 62 | exclude group: "commons-logging", module: "commons-logging" 63 | } 64 | compile ("com.google.cloud:google-cloud-core-http:1.2.0"){ 65 | exclude group: 'com.google.guava', module: 'guava' 66 | exclude group: 'com.google.cloud', module: 'google-cloud-core' 67 | exclude group: "commons-logging", module: "commons-logging" 68 | } 69 | 70 | // Instead of "commons-logging:commons-logging:1.2" that is required from "google-cloud-storage", 71 | // we use "jcl-over-slf4j" because Embulk is based on SLF4J. 72 | compile("org.slf4j:jcl-over-slf4j:1.7.12") { 73 | // slf4j-api is in embulk-core / embulk-api. 74 | exclude group: "org.slf4j", module: "slf4j-api" 75 | } 76 | 77 | testCompile "junit:junit:4.12" 78 | testCompile "org.mockito:mockito-core:2.28.2" 79 | testCompile "org.embulk:embulk-core:$embulkVersion" 80 | testCompile "org.embulk:embulk-core:$embulkVersion:tests" 81 | testCompile "org.embulk:embulk-deps:$embulkVersion" 82 | testCompile "org.embulk:embulk-junit4:$embulkVersion" 83 | testCompile "org.embulk:embulk-formatter-csv:0.10.31" 84 | testCompile "org.embulk:embulk-input-file:0.10.31" 85 | testCompile "org.embulk:embulk-parser-csv:0.10.31" 86 | } 87 | 88 | embulkPlugin { 89 | mainClass = "org.embulk.output.gcs.GcsOutputPlugin" 90 | category = "output" 91 | type = "gcs" 92 | } 93 | 94 | javadoc { 95 | options { 96 | locale = 'en_US' 97 | encoding = 'UTF-8' 98 | } 99 | } 100 | 101 | jar { 102 | from rootProject.file("LICENSE") 103 | from rootProject.file("NOTICE") 104 | } 105 | 106 | sourcesJar { 107 | from rootProject.file("LICENSE") 108 | from rootProject.file("NOTICE") 109 | } 110 | 111 | javadocJar { 112 | from rootProject.file("LICENSE") 113 | from rootProject.file("NOTICE") 114 | } 115 | 116 | publishing { 117 | publications { 118 | maven(MavenPublication) { 119 | groupId = project.group 120 | artifactId = project.name 121 | 122 | from components.java 123 | 124 | pom { // https://central.sonatype.org/pages/requirements.html 125 | packaging "jar" 126 | 127 | name = project.name 128 | description = project.description 129 | url = "https://www.embulk.org/" 130 | 131 | developers { 132 | developer { 133 | name = "Kazuyuki Honda" 134 | email = "hakobera@gmail.com" 135 | } 136 | developer { 137 | name = "Satoshi Akama" 138 | email = "satoshiakama@gmail.com" 139 | } 140 | developer { 141 | name = "John Luong" 142 | email = "jluong@treasure-data.com" 143 | } 144 | } 145 | 146 | licenses { 147 | license { 148 | // http://central.sonatype.org/pages/requirements.html#license-information 149 | name = "The Apache License, Version 2.0" 150 | url = "https://www.apache.org/licenses/LICENSE-2.0.txt" 151 | } 152 | } 153 | 154 | scm { 155 | connection = "scm:git:git://github.com/embulk/embulk-output-gcs.git" 156 | developerConnection = "scm:git:git@github.com:embulk/embulk-output-gcs.git" 157 | url = "https://github.com/embulk/embulk-output-gcs" 158 | } 159 | } 160 | } 161 | } 162 | 163 | repositories { 164 | maven { // publishMavenPublicationToMavenCentralRepository 165 | name = "mavenCentral" 166 | if (project.version.endsWith("-SNAPSHOT")) { 167 | url "https://oss.sonatype.org/content/repositories/snapshots" 168 | } else { 169 | url "https://oss.sonatype.org/service/local/staging/deploy/maven2" 170 | } 171 | 172 | credentials { 173 | username = project.hasProperty("ossrhUsername") ? ossrhUsername : "" 174 | password = project.hasProperty("ossrhPassword") ? ossrhPassword : "" 175 | } 176 | } 177 | } 178 | } 179 | 180 | signing { 181 | sign publishing.publications.maven 182 | } 183 | 184 | gem { 185 | authors = [ "Kazuyuki Honda" ] 186 | email = [ "hakobera@gmail.com" ] 187 | summary = "Google Cloud Storage output plugin for Embulk" 188 | homepage = "https://github.com/embulk/embulk-output-gcs" 189 | licenses = [ "Apache-2.0" ] 190 | from rootProject.file("LICENSE") 191 | from rootProject.file("NOTICE_GEM") 192 | rename ("NOTICE_GEM", "NOTICE") 193 | } 194 | 195 | gemPush { 196 | host = "https://rubygems.org" 197 | } 198 | 199 | checkstyle { 200 | configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml") 201 | toolVersion = '6.14.1' 202 | } 203 | checkstyleMain { 204 | configFile = file("${project.rootDir}/config/checkstyle/default.xml") 205 | ignoreFailures = true 206 | } 207 | checkstyleTest { 208 | configFile = file("${project.rootDir}/config/checkstyle/default.xml") 209 | ignoreFailures = true 210 | } 211 | task checkstyle(type: Checkstyle) { 212 | classpath = sourceSets.main.output + sourceSets.test.output 213 | source = sourceSets.main.allJava + sourceSets.test.allJava 214 | } 215 | 216 | test { 217 | testLogging { 218 | outputs.upToDateWhen { false } 219 | exceptionFormat = org.gradle.api.tasks.testing.logging.TestExceptionFormat.FULL 220 | showCauses = true 221 | showExceptions = true 222 | showStackTraces = true 223 | showStandardStreams = true 224 | events "passed", "skipped", "failed", "standardOut", "standardError" 225 | } 226 | } -------------------------------------------------------------------------------- /config/checkstyle/checkstyle.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /config/checkstyle/default.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /gradle/dependency-locks/embulkPluginRuntime.lockfile: -------------------------------------------------------------------------------- 1 | # This is a Gradle generated file for dependency locking. 2 | # Manual edits can break the build and are not advised. 3 | # This file is expected to be part of source control. 4 | com.fasterxml.jackson.core:jackson-annotations:2.6.7 5 | com.fasterxml.jackson.core:jackson-core:2.6.7 6 | com.fasterxml.jackson.core:jackson-databind:2.6.7 7 | com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.6.7 8 | com.google.api-client:google-api-client:1.21.0 9 | com.google.api.grpc:proto-google-common-protos:0.1.11 10 | com.google.api.grpc:proto-google-iam-v1:0.1.11 11 | com.google.api:api-common:1.1.0 12 | com.google.api:gax:1.3.1 13 | com.google.apis:google-api-services-storage:v1-rev100-1.22.0 14 | com.google.auth:google-auth-library-credentials:0.7.0 15 | com.google.auth:google-auth-library-oauth2-http:0.7.0 16 | com.google.auto.value:auto-value:1.2 17 | com.google.cloud:google-cloud-core-http:1.2.0 18 | com.google.cloud:google-cloud-core:1.2.0 19 | com.google.cloud:google-cloud-storage:1.2.0 20 | com.google.code.findbugs:jsr305:3.0.0 21 | com.google.code.gson:gson:2.7 22 | com.google.guava:guava:18.0 23 | com.google.http-client:google-http-client-appengine:1.21.0 24 | com.google.http-client:google-http-client-jackson2:1.21.0 25 | com.google.http-client:google-http-client-jackson:1.21.0 26 | com.google.http-client:google-http-client:1.21.0 27 | com.google.oauth-client:google-oauth-client:1.21.0 28 | com.google.protobuf:protobuf-java-util:3.2.0 29 | com.google.protobuf:protobuf-java:3.2.0 30 | commons-codec:commons-codec:1.3 31 | javax.validation:validation-api:1.1.0.Final 32 | joda-time:joda-time:2.9.2 33 | org.apache.httpcomponents:httpclient:4.0.1 34 | org.apache.httpcomponents:httpcore:4.0.1 35 | org.codehaus.jackson:jackson-core-asl:1.9.11 36 | org.embulk:embulk-util-config:0.3.1 37 | org.embulk:embulk-util-retryhelper:0.8.2 38 | org.json:json:20160810 39 | org.slf4j:jcl-over-slf4j:1.7.12 40 | org.threeten:threetenbp:1.3.3 41 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/embulk/embulk-output-gcs/427f9fdc677885a7467606393f6a343ceda2c4c9/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Tue Nov 02 00:01:46 ICT 2021 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.5-bin.zip 7 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Attempt to set APP_HOME 10 | # Resolve links: $0 may be a link 11 | PRG="$0" 12 | # Need this for relative symlinks. 13 | while [ -h "$PRG" ] ; do 14 | ls=`ls -ld "$PRG"` 15 | link=`expr "$ls" : '.*-> \(.*\)$'` 16 | if expr "$link" : '/.*' > /dev/null; then 17 | PRG="$link" 18 | else 19 | PRG=`dirname "$PRG"`"/$link" 20 | fi 21 | done 22 | SAVED="`pwd`" 23 | cd "`dirname \"$PRG\"`/" >/dev/null 24 | APP_HOME="`pwd -P`" 25 | cd "$SAVED" >/dev/null 26 | 27 | APP_NAME="Gradle" 28 | APP_BASE_NAME=`basename "$0"` 29 | 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 31 | DEFAULT_JVM_OPTS="" 32 | 33 | # Use the maximum available, or set MAX_FD != -1 to use that value. 34 | MAX_FD="maximum" 35 | 36 | warn ( ) { 37 | echo "$*" 38 | } 39 | 40 | die ( ) { 41 | echo 42 | echo "$*" 43 | echo 44 | exit 1 45 | } 46 | 47 | # OS specific support (must be 'true' or 'false'). 48 | cygwin=false 49 | msys=false 50 | darwin=false 51 | nonstop=false 52 | case "`uname`" in 53 | CYGWIN* ) 54 | cygwin=true 55 | ;; 56 | Darwin* ) 57 | darwin=true 58 | ;; 59 | MINGW* ) 60 | msys=true 61 | ;; 62 | NONSTOP* ) 63 | nonstop=true 64 | ;; 65 | esac 66 | 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 68 | 69 | # Determine the Java command to use to start the JVM. 70 | if [ -n "$JAVA_HOME" ] ; then 71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 72 | # IBM's JDK on AIX uses strange locations for the executables 73 | JAVACMD="$JAVA_HOME/jre/sh/java" 74 | else 75 | JAVACMD="$JAVA_HOME/bin/java" 76 | fi 77 | if [ ! -x "$JAVACMD" ] ; then 78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 79 | 80 | Please set the JAVA_HOME variable in your environment to match the 81 | location of your Java installation." 82 | fi 83 | else 84 | JAVACMD="java" 85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 86 | 87 | Please set the JAVA_HOME variable in your environment to match the 88 | location of your Java installation." 89 | fi 90 | 91 | # Increase the maximum file descriptors if we can. 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 93 | MAX_FD_LIMIT=`ulimit -H -n` 94 | if [ $? -eq 0 ] ; then 95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 96 | MAX_FD="$MAX_FD_LIMIT" 97 | fi 98 | ulimit -n $MAX_FD 99 | if [ $? -ne 0 ] ; then 100 | warn "Could not set maximum file descriptor limit: $MAX_FD" 101 | fi 102 | else 103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 104 | fi 105 | fi 106 | 107 | # For Darwin, add options to specify how the application appears in the dock 108 | if $darwin; then 109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 110 | fi 111 | 112 | # For Cygwin, switch paths to Windows format before running java 113 | if $cygwin ; then 114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 116 | JAVACMD=`cygpath --unix "$JAVACMD"` 117 | 118 | # We build the pattern for arguments to be converted via cygpath 119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 120 | SEP="" 121 | for dir in $ROOTDIRSRAW ; do 122 | ROOTDIRS="$ROOTDIRS$SEP$dir" 123 | SEP="|" 124 | done 125 | OURCYGPATTERN="(^($ROOTDIRS))" 126 | # Add a user-defined pattern to the cygpath arguments 127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 129 | fi 130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 131 | i=0 132 | for arg in "$@" ; do 133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 135 | 136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 138 | else 139 | eval `echo args$i`="\"$arg\"" 140 | fi 141 | i=$((i+1)) 142 | done 143 | case $i in 144 | (0) set -- ;; 145 | (1) set -- "$args0" ;; 146 | (2) set -- "$args0" "$args1" ;; 147 | (3) set -- "$args0" "$args1" "$args2" ;; 148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 154 | esac 155 | fi 156 | 157 | # Escape application args 158 | save ( ) { 159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 160 | echo " " 161 | } 162 | APP_ARGS=$(save "$@") 163 | 164 | # Collect all arguments for the java command, following the shell quoting and substitution rules 165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 166 | 167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 169 | cd "$(dirname "$0")" 170 | fi 171 | 172 | exec "$JAVACMD" "$@" 173 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS= 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | 53 | :win9xME_args 54 | @rem Slurp the command line arguments. 55 | set CMD_LINE_ARGS= 56 | set _SKIP=2 57 | 58 | :win9xME_args_slurp 59 | if "x%~1" == "x" goto execute 60 | 61 | set CMD_LINE_ARGS=%* 62 | 63 | :execute 64 | @rem Setup the command line 65 | 66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 67 | 68 | @rem Execute Gradle 69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 70 | 71 | :end 72 | @rem End local scope for the variables with windows NT shell 73 | if "%ERRORLEVEL%"=="0" goto mainEnd 74 | 75 | :fail 76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 77 | rem the _cmd.exe /c_ return code! 78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 79 | exit /b 1 80 | 81 | :mainEnd 82 | if "%OS%"=="Windows_NT" endlocal 83 | 84 | :omega 85 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/gcs/AuthMethod.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2018 The Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.output.gcs; 18 | 19 | public enum AuthMethod 20 | { 21 | private_key("private_key"), 22 | compute_engine("compute_engine"), 23 | json_key("json_key"); 24 | 25 | private final String string; 26 | 27 | AuthMethod(String string) 28 | { 29 | this.string = string; 30 | } 31 | 32 | public String getString() 33 | { 34 | return string; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/gcs/GcsAuthentication.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 The Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.output.gcs; 18 | 19 | import com.google.api.client.auth.oauth2.TokenResponseException; 20 | import com.google.api.client.googleapis.json.GoogleJsonResponseException; 21 | import com.google.api.client.http.HttpTransport; 22 | import com.google.api.client.http.apache.ApacheHttpTransport; 23 | import com.google.api.client.json.jackson2.JacksonFactory; 24 | import com.google.api.client.util.SecurityUtils; 25 | import com.google.api.services.storage.StorageScopes; 26 | import com.google.auth.http.HttpTransportFactory; 27 | import com.google.auth.oauth2.ComputeEngineCredentials; 28 | import com.google.auth.oauth2.GoogleCredentials; 29 | import com.google.auth.oauth2.ServiceAccountCredentials; 30 | import com.google.cloud.TransportOptions; 31 | import com.google.cloud.http.HttpTransportOptions; 32 | import com.google.cloud.storage.Storage; 33 | import com.google.cloud.storage.StorageException; 34 | import com.google.cloud.storage.StorageOptions; 35 | import org.embulk.config.ConfigException; 36 | import org.embulk.util.config.units.LocalFile; 37 | import org.embulk.util.retryhelper.RetryExecutor; 38 | import org.embulk.util.retryhelper.RetryGiveupException; 39 | import org.embulk.util.retryhelper.Retryable; 40 | import org.slf4j.Logger; 41 | import org.slf4j.LoggerFactory; 42 | 43 | import java.io.File; 44 | import java.io.FileInputStream; 45 | import java.io.IOException; 46 | import java.io.InterruptedIOException; 47 | import java.security.GeneralSecurityException; 48 | import java.security.PrivateKey; 49 | import java.util.Collections; 50 | import java.util.Optional; 51 | import java.util.function.Function; 52 | 53 | public class GcsAuthentication 54 | { 55 | private final Logger log = LoggerFactory.getLogger(GcsAuthentication.class); 56 | private final Optional serviceAccountEmail; 57 | private final Optional p12KeyFilePath; 58 | private final Optional jsonKeyFilePath; 59 | private final String applicationName; 60 | private final HttpTransport httpTransport; 61 | private final JacksonFactory jsonFactory; 62 | private final GoogleCredentials credentials; 63 | private PluginTask task; 64 | 65 | public GcsAuthentication(PluginTask task) throws IOException, GeneralSecurityException 66 | { 67 | this.task = task; 68 | this.serviceAccountEmail = task.getServiceAccountEmail(); 69 | this.p12KeyFilePath = task.getP12Keyfile().map(localFileToPathString()); 70 | this.jsonKeyFilePath = task.getJsonKeyfile().map(localFileToPathString()); 71 | this.applicationName = task.getApplicationName(); 72 | this.httpTransport = new ApacheHttpTransport.Builder().build(); 73 | this.jsonFactory = new JacksonFactory(); 74 | 75 | if (task.getAuthMethod() == AuthMethod.compute_engine) { 76 | this.credentials = getComputeCredential(); 77 | } 78 | else if (task.getAuthMethod() == AuthMethod.json_key) { 79 | this.credentials = getServiceAccountCredentialFromJsonFile(); 80 | } 81 | else { 82 | this.credentials = getServiceAccountCredential(); 83 | } 84 | } 85 | 86 | /** 87 | * @see https://developers.google.com/accounts/docs/OAuth2ServiceAccount#authorizingrequests 88 | */ 89 | private GoogleCredentials getServiceAccountCredential() throws IOException, GeneralSecurityException 90 | { 91 | File p12 = new File(p12KeyFilePath.get()); 92 | PrivateKey privateKey = SecurityUtils.loadPrivateKeyFromKeyStore(SecurityUtils.getPkcs12KeyStore(), 93 | new FileInputStream(p12), task.getStorePass(), "privatekey", task.getKeyPass()); 94 | HttpTransportFactory transportFactory = () -> httpTransport; 95 | GoogleCredentials credentials = new ServiceAccountCredentials(null, serviceAccountEmail.get(), 96 | privateKey, null, Collections.singleton(StorageScopes.DEVSTORAGE_READ_WRITE), transportFactory, null); 97 | return credentials; 98 | } 99 | 100 | private GoogleCredentials getServiceAccountCredentialFromJsonFile() throws IOException 101 | { 102 | FileInputStream stream = new FileInputStream(jsonKeyFilePath.get()); 103 | return GoogleCredentials.fromStream(stream) 104 | .createScoped(Collections.singleton(StorageScopes.DEVSTORAGE_READ_WRITE)); 105 | } 106 | 107 | /** 108 | * @see http://developers.guge.io/accounts/docs/OAuth2ServiceAccount#creatinganaccount 109 | * @see https://developers.google.com/accounts/docs/OAuth2 110 | */ 111 | private GoogleCredentials getComputeCredential() throws IOException 112 | { 113 | HttpTransportFactory transportFactory = () -> httpTransport; 114 | ComputeEngineCredentials credentials = new ComputeEngineCredentials(transportFactory); 115 | credentials.refreshAccessToken(); 116 | return credentials; 117 | } 118 | 119 | public Storage getGcsClient() throws ConfigException, IOException 120 | { 121 | try { 122 | return RetryExecutor.builder() 123 | .withRetryLimit(task.getMaxConnectionRetry()) 124 | .withInitialRetryWaitMillis(task.getInitialRetryIntervalMillis()) 125 | .withMaxRetryWaitMillis(task.getMaximumRetryIntervalMillis()) 126 | .build() 127 | .runInterruptible(new Retryable() { 128 | @Override 129 | public Storage call() throws IOException, RetryGiveupException 130 | { 131 | final TransportOptions transportOptions = HttpTransportOptions.newBuilder() 132 | .setConnectTimeout(30000) // in milliseconds 133 | .setReadTimeout(30000) // in milliseconds 134 | .build(); 135 | 136 | Storage client = StorageOptions.newBuilder() 137 | .setCredentials(credentials) 138 | .setTransportOptions(transportOptions) 139 | .build().getService(); 140 | 141 | // For throw ConfigException when authentication is fail. 142 | client.list(task.getBucket(), Storage.BlobListOption.pageSize(1)).hasNextPage(); 143 | return client; 144 | } 145 | 146 | @Override 147 | public boolean isRetryableException(Exception exception) 148 | { 149 | if (exception instanceof GoogleJsonResponseException || exception instanceof TokenResponseException || exception instanceof StorageException) { 150 | int statusCode; 151 | if (exception instanceof GoogleJsonResponseException) { 152 | if (((GoogleJsonResponseException) exception).getDetails() == null) { 153 | String content = ""; 154 | if (((GoogleJsonResponseException) exception).getContent() != null) { 155 | content = ((GoogleJsonResponseException) exception).getContent(); 156 | } 157 | log.warn("Invalid response was returned : {}", content); 158 | return true; 159 | } 160 | statusCode = ((GoogleJsonResponseException) exception).getDetails().getCode(); 161 | } 162 | else if (exception instanceof TokenResponseException) { 163 | statusCode = ((TokenResponseException) exception).getStatusCode(); 164 | } 165 | else { 166 | statusCode = ((StorageException) exception).getCode(); 167 | } 168 | 169 | if (statusCode / 100 == 4) { 170 | return false; 171 | } 172 | } 173 | return true; 174 | } 175 | 176 | @Override 177 | public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait) 178 | throws RetryGiveupException 179 | { 180 | String message = String.format("GCS GET request failed. Retrying %d/%d after %d seconds. Message: %s: %s", 181 | retryCount, retryLimit, retryWait / 1000, exception.getClass(), exception.getMessage()); 182 | if (retryCount % 3 == 0) { 183 | log.warn(message, exception); 184 | } 185 | else { 186 | log.warn(message); 187 | } 188 | } 189 | 190 | @Override 191 | public void onGiveup(Exception firstException, Exception lastException) 192 | throws RetryGiveupException 193 | { 194 | } 195 | }); 196 | } 197 | catch (RetryGiveupException ex) { 198 | if (ex.getCause() instanceof GoogleJsonResponseException || ex.getCause() instanceof TokenResponseException || ex.getCause() instanceof StorageException) { 199 | int statusCode = 0; 200 | if (ex.getCause() instanceof GoogleJsonResponseException) { 201 | if (((GoogleJsonResponseException) ex.getCause()).getDetails() != null) { 202 | statusCode = ((GoogleJsonResponseException) ex.getCause()).getDetails().getCode(); 203 | } 204 | } 205 | else if (ex.getCause() instanceof TokenResponseException) { 206 | statusCode = ((TokenResponseException) ex.getCause()).getStatusCode(); 207 | } 208 | else { 209 | statusCode = ((StorageException) ex.getCause()).getCode(); 210 | } 211 | if (statusCode / 100 == 4) { 212 | throw new ConfigException(ex); 213 | } 214 | } 215 | throw new RuntimeException(ex); 216 | } 217 | catch (InterruptedException ex) { 218 | throw new InterruptedIOException(); 219 | } 220 | } 221 | 222 | private Function localFileToPathString() 223 | { 224 | return file -> file.getPath().toString(); 225 | } 226 | } 227 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/gcs/GcsOutputPlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Kazuyuki Honda, and the Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.output.gcs; 18 | 19 | import com.google.cloud.storage.Storage; 20 | import com.google.common.annotations.VisibleForTesting; 21 | import org.embulk.config.ConfigDiff; 22 | import org.embulk.config.ConfigException; 23 | import org.embulk.config.ConfigSource; 24 | import org.embulk.config.TaskReport; 25 | import org.embulk.config.TaskSource; 26 | import org.embulk.spi.FileOutputPlugin; 27 | import org.embulk.spi.TransactionalFileOutput; 28 | import org.embulk.util.config.ConfigMapper; 29 | import org.embulk.util.config.ConfigMapperFactory; 30 | import org.embulk.util.config.TaskMapper; 31 | import org.embulk.util.config.units.LocalFile; 32 | 33 | import java.io.IOException; 34 | import java.security.GeneralSecurityException; 35 | import java.util.List; 36 | import java.util.Optional; 37 | 38 | public class GcsOutputPlugin implements FileOutputPlugin 39 | { 40 | public static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ConfigMapperFactory.builder() 41 | .addDefaultModules().build(); 42 | public static final ConfigMapper CONFIG_MAPPER = CONFIG_MAPPER_FACTORY.createConfigMapper(); 43 | public static final TaskMapper TASK_MAPPER = CONFIG_MAPPER_FACTORY.createTaskMapper(); 44 | @Override 45 | public ConfigDiff transaction(ConfigSource config, 46 | int taskCount, 47 | FileOutputPlugin.Control control) 48 | { 49 | PluginTask task = CONFIG_MAPPER.map(config, PluginTask.class); 50 | 51 | if (task.getP12KeyfilePath().isPresent()) { 52 | if (task.getP12Keyfile().isPresent()) { 53 | throw new ConfigException("Setting both p12_keyfile_path and p12_keyfile is invalid"); 54 | } 55 | try { 56 | task.setP12Keyfile(Optional.of(LocalFile.of(task.getP12KeyfilePath().get()))); 57 | } 58 | catch (IOException ex) { 59 | throw new RuntimeException(ex); 60 | } 61 | } 62 | 63 | if (task.getAuthMethod().getString().equals("json_key")) { 64 | if (!task.getJsonKeyfile().isPresent()) { 65 | throw new ConfigException("If auth_method is json_key, you have to set json_keyfile"); 66 | } 67 | } 68 | else if (task.getAuthMethod().getString().equals("private_key")) { 69 | if (!task.getP12Keyfile().isPresent() || !task.getServiceAccountEmail().isPresent()) { 70 | throw new ConfigException("If auth_method is private_key, you have to set both service_account_email and p12_keyfile"); 71 | } 72 | } 73 | 74 | return resume(task.toTaskSource(), taskCount, control); 75 | } 76 | 77 | @Override 78 | public ConfigDiff resume(TaskSource taskSource, 79 | int taskCount, 80 | FileOutputPlugin.Control control) 81 | { 82 | control.run(taskSource); 83 | return CONFIG_MAPPER_FACTORY.newConfigDiff(); 84 | } 85 | 86 | @Override 87 | public void cleanup(TaskSource taskSource, 88 | int taskCount, 89 | List successTaskReports) 90 | { 91 | } 92 | 93 | @Override 94 | public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex) 95 | { 96 | PluginTask task = TASK_MAPPER.map(taskSource, PluginTask.class); 97 | 98 | Storage client = createClient(task); 99 | return new GcsTransactionalFileOutput(task, client, taskIndex); 100 | } 101 | 102 | private GcsAuthentication newGcsAuth(PluginTask task) 103 | { 104 | try { 105 | return new GcsAuthentication(task); 106 | } 107 | catch (GeneralSecurityException | IOException ex) { 108 | throw new ConfigException(ex); 109 | } 110 | } 111 | 112 | @VisibleForTesting 113 | public Storage createClient(final PluginTask task) 114 | { 115 | try { 116 | GcsAuthentication auth = newGcsAuth(task); 117 | return auth.getGcsClient(); 118 | } 119 | catch (ConfigException | IOException ex) { 120 | throw new RuntimeException(ex); 121 | } 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/gcs/GcsTransactionalFileOutput.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2018 The Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.output.gcs; 18 | 19 | import com.google.cloud.WriteChannel; 20 | import com.google.cloud.storage.Blob; 21 | import com.google.cloud.storage.BlobId; 22 | import com.google.cloud.storage.BlobInfo; 23 | import com.google.cloud.storage.Storage; 24 | import com.google.common.annotations.VisibleForTesting; 25 | import org.embulk.config.TaskReport; 26 | import org.embulk.spi.Buffer; 27 | import org.embulk.spi.TransactionalFileOutput; 28 | import org.slf4j.Logger; 29 | import org.slf4j.LoggerFactory; 30 | 31 | import java.io.IOException; 32 | import java.nio.ByteBuffer; 33 | import java.util.ArrayList; 34 | import java.util.List; 35 | 36 | import static org.embulk.output.gcs.GcsOutputPlugin.CONFIG_MAPPER_FACTORY; 37 | 38 | public class GcsTransactionalFileOutput implements TransactionalFileOutput 39 | { 40 | private static final Logger logger = LoggerFactory.getLogger(GcsTransactionalFileOutput.class); 41 | 42 | private final int taskIndex; 43 | private final Storage client; 44 | private final String bucket; 45 | private final String pathPrefix; 46 | private final String pathSuffix; 47 | private final String sequenceFormat; 48 | private final String contentType; 49 | private final List storageObjects = new ArrayList<>(); 50 | private BlobId blobId = null; 51 | private int fileIndex = 0; 52 | private WriteChannel writer = null; 53 | private long byteCount = 0; 54 | private long totalByte = 0; 55 | 56 | GcsTransactionalFileOutput(PluginTask task, Storage client, int taskIndex) 57 | { 58 | this.taskIndex = taskIndex; 59 | this.client = client; 60 | this.bucket = task.getBucket(); 61 | this.pathPrefix = task.getPathPrefix(); 62 | this.pathSuffix = task.getFileNameExtension(); 63 | this.sequenceFormat = task.getSequenceFormat(); 64 | this.contentType = task.getContentType(); 65 | } 66 | 67 | public void nextFile() 68 | { 69 | closeCurrentWriter(); 70 | try { 71 | String blobName = generateRemotePath(pathPrefix, sequenceFormat, taskIndex, fileIndex, pathSuffix); 72 | blobId = BlobId.of(bucket, blobName); 73 | BlobInfo blobInfo = BlobInfo.newBuilder(blobId).setContentType(contentType).build(); 74 | writer = client.writer(blobInfo); 75 | } 76 | catch (Exception ex) { 77 | throw new RuntimeException(ex); 78 | } 79 | } 80 | 81 | @Override 82 | public void add(Buffer buffer) 83 | { 84 | try { 85 | writer.write(ByteBuffer.wrap(buffer.array(), buffer.offset(), buffer.limit())); 86 | byteCount = byteCount + buffer.limit(); 87 | //104857600 = 100MB 88 | if (byteCount >= 104857600) { 89 | totalByte = totalByte + byteCount; 90 | logger.info("Uploaded {} bytes", totalByte); 91 | byteCount = 0; 92 | } 93 | } 94 | catch (Exception ex) { 95 | //clean up file if exist 96 | try { 97 | boolean deleted = client.delete(blobId); 98 | logger.info(" Delete file: {} > deleted? {}", blobId.getName(), deleted); 99 | } 100 | catch (Exception e) { 101 | logger.warn("Failed to delete file: {}, error message: {}", blobId.getName(), e.getMessage()); 102 | } 103 | throw new RuntimeException(ex); 104 | } 105 | finally { 106 | buffer.release(); 107 | } 108 | } 109 | 110 | @Override 111 | public void finish() 112 | { 113 | logger.info("Uploaded total {} bytes.", totalByte + byteCount); 114 | closeCurrentWriter(); 115 | //query blob again to check 116 | Blob blob = client.get(blobId); 117 | logger.info("Upload {} successfully.", blobId.getName()); 118 | storageObjects.add(blob.getBlobId().toString()); 119 | } 120 | 121 | @Override 122 | public void close() 123 | { 124 | closeCurrentWriter(); 125 | } 126 | 127 | @Override 128 | public void abort() 129 | { 130 | } 131 | 132 | @Override 133 | public TaskReport commit() 134 | { 135 | TaskReport report = CONFIG_MAPPER_FACTORY.newTaskReport(); 136 | report.set("files", storageObjects); 137 | return report; 138 | } 139 | 140 | /** 141 | * GCS has character limitation in object names. 142 | * @see https://cloud.google.com/storage/docs/naming#objectnames 143 | * Although "." isn't listed at above pages, we can't access "./" path from GUI console. 144 | * And in many cases, user don't intend of creating "/" directory under the bucket. 145 | * This method normalizes path when it contains "./" and "/" and its variations at the beginning 146 | */ 147 | @VisibleForTesting 148 | public static String generateRemotePath(String pathPrefix, String sequenceFormat, int taskIndex, int fileIndex, String pathSuffix) 149 | { 150 | String path = pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix; 151 | return path.replaceFirst("^\\.*/*", ""); 152 | } 153 | 154 | private void closeCurrentWriter() 155 | { 156 | if (writer != null && writer.isOpen()) { 157 | try { 158 | writer.close(); 159 | } 160 | catch (IOException ex) { 161 | throw new RuntimeException(ex); 162 | } 163 | } 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/output/gcs/PluginTask.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2018 The Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.output.gcs; 18 | 19 | import org.embulk.util.config.Config; 20 | import org.embulk.util.config.ConfigDefault; 21 | import org.embulk.util.config.Task; 22 | import org.embulk.util.config.units.LocalFile; 23 | 24 | import java.util.Optional; 25 | 26 | public interface PluginTask extends Task 27 | { 28 | @Config("bucket") 29 | String getBucket(); 30 | 31 | @Config("path_prefix") 32 | String getPathPrefix(); 33 | 34 | @Config("file_ext") 35 | String getFileNameExtension(); 36 | 37 | @Config("sequence_format") 38 | @ConfigDefault("\".%03d.%02d\"") 39 | String getSequenceFormat(); 40 | 41 | @Config("content_type") 42 | @ConfigDefault("\"application/octet-stream\"") 43 | String getContentType(); 44 | 45 | @Config("auth_method") 46 | @ConfigDefault("\"private_key\"") 47 | AuthMethod getAuthMethod(); 48 | 49 | @Config("service_account_email") 50 | @ConfigDefault("null") 51 | Optional getServiceAccountEmail(); 52 | 53 | // kept for backward compatibility 54 | @Config("p12_keyfile_path") 55 | @ConfigDefault("null") 56 | Optional getP12KeyfilePath(); 57 | 58 | @Config("p12_keyfile") 59 | @ConfigDefault("null") 60 | Optional getP12Keyfile(); 61 | void setP12Keyfile(Optional p12Keyfile); 62 | 63 | @Config("json_keyfile") 64 | @ConfigDefault("null") 65 | Optional getJsonKeyfile(); 66 | 67 | @Config("application_name") 68 | @ConfigDefault("\"embulk-output-gcs\"") 69 | String getApplicationName(); 70 | 71 | @Config("max_connection_retry") 72 | @ConfigDefault("10") // 10 times retry to connect GCS server if failed. 73 | int getMaxConnectionRetry(); 74 | 75 | @Config("initial_retry_interval_millis") 76 | @ConfigDefault("500") 77 | int getInitialRetryIntervalMillis(); 78 | 79 | @Config("maximum_retry_interval_millis") 80 | @ConfigDefault("30000") 81 | int getMaximumRetryIntervalMillis(); 82 | 83 | @Config("store_pass") 84 | @ConfigDefault("\"notasecret\"") 85 | String getStorePass(); 86 | 87 | @Config("key_pass") 88 | @ConfigDefault("\"notasecret\"") 89 | String getKeyPass(); 90 | } 91 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/gcs/TestGcsAuthentication.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 The Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.output.gcs; 18 | 19 | import com.google.common.base.Throwables; 20 | import org.embulk.EmbulkTestRuntime; 21 | import org.embulk.config.ConfigException; 22 | import org.embulk.config.ConfigSource; 23 | import org.embulk.util.config.units.LocalFile; 24 | import org.junit.Assert; 25 | import org.junit.BeforeClass; 26 | import org.junit.Rule; 27 | import org.junit.Test; 28 | 29 | import static org.embulk.output.gcs.GcsOutputPlugin.CONFIG_MAPPER; 30 | import static org.embulk.output.gcs.GcsOutputPlugin.CONFIG_MAPPER_FACTORY; 31 | import static org.junit.Assert.fail; 32 | import static org.junit.Assume.assumeNotNull; 33 | 34 | import java.nio.file.NoSuchFileException; 35 | import java.security.InvalidKeyException; 36 | import java.util.Base64; 37 | import java.util.Optional; 38 | 39 | public class TestGcsAuthentication 40 | { 41 | private static Optional GCP_EMAIL; 42 | private static Optional GCP_P12_KEYFILE; 43 | private static Optional GCP_JSON_KEYFILE; 44 | private static String GCP_BUCKET; 45 | private static final String GCP_APPLICATION_NAME = "embulk-output-gcs"; 46 | 47 | /* 48 | * This test case requires environment variables 49 | * GCP_EMAIL 50 | * GCP_P12_KEYFILE 51 | * GCP_JSON_KEYFILE 52 | * GCP_BUCKET 53 | */ 54 | @BeforeClass 55 | public static void initializeConstant() 56 | { 57 | GCP_EMAIL = Optional.of(System.getenv("GCP_EMAIL")); 58 | GCP_JSON_KEYFILE = Optional.of(System.getenv("GCP_JSON_KEYFILE")); 59 | GCP_P12_KEYFILE = Optional.of(System.getenv("GCP_PRIVATE_KEYFILE")); 60 | GCP_BUCKET = System.getenv("GCP_BUCKET"); 61 | // skip test cases, if environment variables are not set. 62 | assumeNotNull(GCP_EMAIL, GCP_P12_KEYFILE, GCP_JSON_KEYFILE, GCP_BUCKET); 63 | } 64 | 65 | @Rule 66 | public EmbulkTestRuntime runtime = new EmbulkTestRuntime(); 67 | 68 | @Test 69 | public void testGetServiceAccountCredentialThrowFileNotFoundException() 70 | { 71 | Optional notFoundP12Keyfile = Optional.of("/path/to/notfound.p12"); 72 | ConfigSource configSource = config(AuthMethod.private_key); 73 | configSource.set("p12_keyfile", notFoundP12Keyfile); 74 | try { 75 | CONFIG_MAPPER.map(configSource, PluginTask.class); 76 | fail(); 77 | } 78 | catch (Exception ex) { 79 | Assert.assertTrue(Throwables.getRootCause(ex) instanceof NoSuchFileException); 80 | } 81 | } 82 | 83 | @Test 84 | public void testGetGcsClientUsingServiceAccountCredentialSuccess() throws Exception 85 | { 86 | ConfigSource configSource = config(AuthMethod.private_key); 87 | byte[] keyBytes = Base64.getDecoder().decode(GCP_P12_KEYFILE.get()); 88 | Optional p12Key = Optional.of(LocalFile.ofContent(keyBytes)); 89 | configSource.set("p12_keyfile", p12Key); 90 | PluginTask task = CONFIG_MAPPER.map(configSource, PluginTask.class); 91 | GcsAuthentication auth = new GcsAuthentication(task); 92 | auth.getGcsClient(); 93 | } 94 | 95 | @Test(expected = ConfigException.class) 96 | public void testGetGcsClientUsingServiceAccountCredentialThrowConfigException() throws Exception 97 | { 98 | ConfigSource configSource = config(AuthMethod.private_key); 99 | byte[] keyBytes = Base64.getDecoder().decode(GCP_P12_KEYFILE.get()); 100 | Optional p12Key = Optional.of(LocalFile.ofContent(keyBytes)); 101 | configSource.set("p12_keyfile", p12Key); 102 | configSource.set("bucket", "non-exists-bucket"); 103 | PluginTask task = CONFIG_MAPPER.map(configSource, PluginTask.class); 104 | GcsAuthentication auth = new GcsAuthentication(task); 105 | auth.getGcsClient(); 106 | fail(); 107 | } 108 | 109 | @Test 110 | public void testGetServiceAccountCredentialFromJsonThrowFileFileNotFoundException() 111 | { 112 | Optional notFoundJsonKeyfile = Optional.of("/path/to/notfound.json"); 113 | ConfigSource configSource = config(AuthMethod.json_key); 114 | configSource.set("json_keyfile", notFoundJsonKeyfile); 115 | try { 116 | CONFIG_MAPPER.map(configSource, PluginTask.class); 117 | fail(); 118 | } 119 | catch (Exception ex) { 120 | Assert.assertTrue(Throwables.getRootCause(ex) instanceof NoSuchFileException); 121 | } 122 | } 123 | 124 | @Test 125 | public void testGetServiceAccountCredentialFromInvalidJsonKey() 126 | { 127 | String jsonKey = "{\n" + 128 | "\"type\": \"service_account\",\n" + 129 | "\"project_id\": \"test\",\n" + 130 | "\"private_key_id\": \"private_key_id\",\n" + 131 | "\"private_key\": \"-----BEGIN PRIVATE KEY-----\\nInvalidKey\\n-----END PRIVATE KEY-----\\n\",\n" + 132 | "\"client_email\": \"test@test.iam.gserviceaccount.com\",\n" + 133 | "\"client_id\": \"433252345345\",\n" + 134 | "\"auth_uri\": \"https://accounts.google.com/o/oauth2/auth\",\n" + 135 | " \"token_uri\": \"https://oauth2.googleapis.com/token\",\n" + 136 | "\"auth_provider_x509_cert_url\": \"https://www.googleapis.com/oauth2/v1/certs\",\n" + 137 | "\"client_x509_cert_url\": \"https://www.googleapis.com/robot/v1/metadata/x509/test.iam.gserviceaccount.com\"\n" + 138 | "}"; 139 | 140 | Optional invalidJsonKeyfile = Optional.of(LocalFile.ofContent(jsonKey.getBytes())); 141 | ConfigSource configSource = config(AuthMethod.json_key); 142 | configSource.set("json_keyfile", invalidJsonKeyfile); 143 | try { 144 | PluginTask task = CONFIG_MAPPER.map(configSource, PluginTask.class); 145 | GcsAuthentication auth = new GcsAuthentication(task); 146 | auth.getGcsClient(); 147 | fail(); 148 | } 149 | catch (Exception ex) { 150 | Assert.assertTrue(Throwables.getRootCause(ex) instanceof InvalidKeyException); 151 | } 152 | } 153 | 154 | @Test 155 | public void testGetServiceAccountCredentialFromJsonSuccess() throws Exception 156 | { 157 | ConfigSource configSource = config(AuthMethod.json_key); 158 | Optional jsonKeyfile = Optional.of(LocalFile.ofContent(GCP_JSON_KEYFILE.get().getBytes())); 159 | configSource.set("json_keyfile", jsonKeyfile); 160 | PluginTask task = CONFIG_MAPPER.map(configSource, PluginTask.class); 161 | GcsAuthentication auth = new GcsAuthentication(task); 162 | auth.getGcsClient(); 163 | } 164 | 165 | @Test(expected = ConfigException.class) 166 | public void testGetServiceAccountCredentialFromJsonThrowConfigException() throws Exception 167 | { 168 | ConfigSource configSource = config(AuthMethod.json_key); 169 | Optional jsonKeyfile = Optional.of(LocalFile.ofContent(GCP_JSON_KEYFILE.get().getBytes())); 170 | configSource.set("json_keyfile", jsonKeyfile); 171 | configSource.set("bucket", "non-exists-bucket"); 172 | PluginTask task = CONFIG_MAPPER.map(configSource, PluginTask.class); 173 | GcsAuthentication auth = new GcsAuthentication(task); 174 | auth.getGcsClient(); 175 | fail(); 176 | } 177 | 178 | public ConfigSource config(AuthMethod authMethod) 179 | { 180 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 181 | .set("type", "gcs") 182 | .set("bucket", GCP_BUCKET) 183 | .set("path_prefix", "") 184 | .set("last_path", "") 185 | .set("file_ext", ".csv") 186 | .set("service_account_email", GCP_EMAIL) 187 | .set("application_name", GCP_APPLICATION_NAME) 188 | .set("max_connection_retry", 3); 189 | 190 | if (authMethod == AuthMethod.private_key) { 191 | config.set("auth_method", "private_key"); 192 | } 193 | else if (authMethod == AuthMethod.json_key) { 194 | config.set("auth_method", "json_key"); 195 | } 196 | else { 197 | config.set("auth_method", "compute_engine"); 198 | } 199 | return config; 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/output/gcs/TestGcsOutputPlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Kazuyuki Honda, and the Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.output.gcs; 18 | 19 | import com.google.cloud.storage.Blob; 20 | import com.google.cloud.storage.BlobId; 21 | import com.google.cloud.storage.Storage; 22 | import com.google.common.collect.ImmutableList; 23 | import com.google.common.collect.ImmutableMap; 24 | import com.google.common.collect.Lists; 25 | 26 | import org.embulk.EmbulkSystemProperties; 27 | import org.embulk.EmbulkTestRuntime; 28 | import org.embulk.config.ConfigException; 29 | import org.embulk.config.ConfigSource; 30 | import org.embulk.config.TaskReport; 31 | import org.embulk.config.TaskSource; 32 | import org.embulk.exec.PartialExecutionException; 33 | import org.embulk.formatter.csv.CsvFormatterPlugin; 34 | import org.embulk.input.file.LocalFileInputPlugin; 35 | import org.embulk.parser.csv.CsvParserPlugin; 36 | import org.embulk.spi.FileInputPlugin; 37 | import org.embulk.spi.FileOutputPlugin; 38 | import org.embulk.spi.FormatterPlugin; 39 | import org.embulk.spi.ParserPlugin; 40 | import org.embulk.test.TestingEmbulk; 41 | import org.embulk.util.config.units.LocalFile; 42 | import org.junit.Before; 43 | import org.junit.BeforeClass; 44 | import org.junit.Rule; 45 | import org.junit.Test; 46 | import org.mockito.Mockito; 47 | 48 | import static org.embulk.output.gcs.GcsOutputPlugin.CONFIG_MAPPER; 49 | import static org.embulk.output.gcs.GcsOutputPlugin.CONFIG_MAPPER_FACTORY; 50 | import static org.junit.Assert.assertEquals; 51 | import static org.junit.Assert.assertTrue; 52 | import static org.junit.Assert.fail; 53 | import static org.junit.Assume.assumeNotNull; 54 | 55 | import java.io.BufferedReader; 56 | import java.io.ByteArrayOutputStream; 57 | import java.io.IOException; 58 | import java.io.InputStream; 59 | import java.io.InputStreamReader; 60 | import java.nio.channels.Channels; 61 | import java.nio.file.Paths; 62 | import java.security.GeneralSecurityException; 63 | import java.util.Arrays; 64 | import java.util.Base64; 65 | import java.util.List; 66 | import java.util.Optional; 67 | import java.util.Properties; 68 | 69 | public class TestGcsOutputPlugin 70 | { 71 | private static final EmbulkSystemProperties EMBULK_SYSTEM_PROPERTIES; 72 | private static Optional GCP_EMAIL; 73 | private static Optional GCP_P12_KEYFILE; 74 | private static Optional GCP_JSON_KEYFILE; 75 | private static String GCP_BUCKET; 76 | private static String GCP_BUCKET_DIRECTORY; 77 | private static String GCP_PATH_PREFIX; 78 | private static String LOCAL_PATH_PREFIX; 79 | private static String GCP_APPLICATION_NAME; 80 | 81 | static { 82 | final Properties properties = new Properties(); 83 | properties.setProperty("default_guess_plugins", "gzip,bzip2,json,csv"); 84 | EMBULK_SYSTEM_PROPERTIES = EmbulkSystemProperties.of(properties); 85 | } 86 | 87 | /* 88 | * This test case requires environment variables 89 | * GCP_EMAIL 90 | * GCP_P12_KEYFILE 91 | * GCP_JSON_KEYFILE 92 | * GCP_BUCKET 93 | */ 94 | @BeforeClass 95 | public static void initializeConstant() 96 | { 97 | GCP_EMAIL = Optional.of(System.getenv("GCP_EMAIL")); 98 | GCP_P12_KEYFILE = Optional.of(System.getenv("GCP_PRIVATE_KEYFILE")); 99 | GCP_JSON_KEYFILE = Optional.of(System.getenv("GCP_JSON_KEYFILE")); 100 | GCP_BUCKET = System.getenv("GCP_BUCKET"); 101 | // skip test cases, if environment variables are not set. 102 | assumeNotNull(GCP_EMAIL, GCP_P12_KEYFILE, GCP_JSON_KEYFILE, GCP_BUCKET); 103 | 104 | GCP_BUCKET_DIRECTORY = System.getenv("GCP_BUCKET_DIRECTORY") != null ? getDirectory(System.getenv("GCP_BUCKET_DIRECTORY")) : getDirectory(""); 105 | GCP_PATH_PREFIX = GCP_BUCKET_DIRECTORY + "output_"; 106 | LOCAL_PATH_PREFIX = GcsOutputPlugin.class.getClassLoader().getResource("sample_01.csv").getPath(); 107 | GCP_APPLICATION_NAME = "embulk-output-gcs"; 108 | } 109 | 110 | @Rule 111 | public TestingEmbulk embulk = TestingEmbulk.builder() 112 | .setEmbulkSystemProperties(EMBULK_SYSTEM_PROPERTIES) 113 | .registerPlugin(FormatterPlugin.class, "csv", CsvFormatterPlugin.class) 114 | .registerPlugin(FileInputPlugin.class, "file", LocalFileInputPlugin.class) 115 | .registerPlugin(FileOutputPlugin.class, "gcs", GcsOutputPlugin.class) 116 | .registerPlugin(ParserPlugin.class, "csv", CsvParserPlugin.class) 117 | .build(); 118 | 119 | @Rule 120 | public EmbulkTestRuntime runtime = new EmbulkTestRuntime(); 121 | private GcsOutputPlugin plugin; 122 | 123 | @Before 124 | public void createResources() throws GeneralSecurityException, NoSuchMethodException, IOException 125 | { 126 | plugin = new GcsOutputPlugin(); 127 | } 128 | 129 | @Test 130 | public void checkDefaultValues() 131 | { 132 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 133 | .set("type", "gcs") 134 | .set("bucket", GCP_BUCKET) 135 | .set("path_prefix", "my-prefix") 136 | .set("file_ext", ".csv") 137 | .set("formatter", formatterConfig()); 138 | 139 | PluginTask task = CONFIG_MAPPER.map(config, PluginTask.class); 140 | assertEquals("private_key", task.getAuthMethod().toString()); 141 | } 142 | 143 | // p12_keyfile is null when auth_method is private_key 144 | @Test 145 | public void checkDefaultValuesP12keyNull() throws IOException 146 | { 147 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 148 | .set("type", "gcs") 149 | .set("bucket", GCP_BUCKET) 150 | .set("path_prefix", "my-prefix") 151 | .set("file_ext", ".csv") 152 | .set("auth_method", "private_key") 153 | .set("service_account_email", GCP_EMAIL) 154 | .set("p12_keyfile", null) 155 | .set("formatter", formatterConfig()); 156 | 157 | try { 158 | embulk.runOutput(config, Paths.get(LOCAL_PATH_PREFIX)); 159 | fail("Expected Exception was not thrown."); 160 | } 161 | catch (PartialExecutionException ex) { 162 | assertTrue(ex.getCause() instanceof ConfigException); 163 | assertEquals("If auth_method is private_key, you have to set both service_account_email and p12_keyfile", ex.getCause().getMessage()); 164 | } 165 | } 166 | 167 | // both p12_keyfile and p12_keyfile_path set 168 | @Test 169 | public void checkDefaultValuesConflictSetting() throws IOException 170 | { 171 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 172 | .set("type", "gcs") 173 | .set("bucket", GCP_BUCKET) 174 | .set("path_prefix", "my-prefix") 175 | .set("file_ext", ".csv") 176 | .set("auth_method", "private_key") 177 | .set("service_account_email", GCP_EMAIL) 178 | .set("formatter", formatterConfig()); 179 | 180 | config.set("p12_keyfile", Optional.of(LocalFile.ofContent("dummy".getBytes()))); 181 | config.set("p12_keyfile_path", Optional.of("dummy_path")); 182 | try { 183 | embulk.runOutput(config, Paths.get(LOCAL_PATH_PREFIX)); 184 | fail("Expected Exception was not thrown."); 185 | } 186 | catch (final PartialExecutionException ex) { 187 | assertTrue(ex.getCause() instanceof ConfigException); 188 | assertEquals("Setting both p12_keyfile_path and p12_keyfile is invalid", ex.getCause().getMessage()); 189 | } 190 | } 191 | 192 | // invalid p12keyfile when auth_method is private_key 193 | @Test 194 | public void checkDefaultValuesInvalidPrivateKey() throws IOException 195 | { 196 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 197 | .set("type", "gcs") 198 | .set("bucket", GCP_BUCKET) 199 | .set("path_prefix", "my-prefix") 200 | .set("file_ext", ".csv") 201 | .set("auth_method", "private_key") 202 | .set("service_account_email", GCP_EMAIL) 203 | .set("p12_keyfile", "invalid-key.p12") 204 | .set("formatter", formatterConfig()); 205 | try { 206 | embulk.runOutput(config, Paths.get(LOCAL_PATH_PREFIX)); 207 | fail("Expected Exception was not thrown."); 208 | } 209 | catch (final PartialExecutionException ex) { 210 | assertTrue(ex.getCause() instanceof ConfigException); 211 | } 212 | } 213 | 214 | // json_keyfile is null when auth_method is json_key 215 | @Test 216 | public void checkDefaultValuesJsonKeyfileNull() throws IOException 217 | { 218 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 219 | .set("type", "gcs") 220 | .set("bucket", GCP_BUCKET) 221 | .set("path_prefix", "my-prefix") 222 | .set("file_ext", ".csv") 223 | .set("auth_method", "json_key") 224 | .set("service_account_email", GCP_EMAIL) 225 | .set("json_keyfile", null) 226 | .set("formatter", formatterConfig()); 227 | 228 | try { 229 | embulk.runOutput(config, Paths.get(LOCAL_PATH_PREFIX)); 230 | fail("Expected Exception was not thrown."); 231 | } 232 | catch (final PartialExecutionException ex) { 233 | assertTrue(ex.getCause() instanceof ConfigException); 234 | } 235 | } 236 | 237 | @Test 238 | public void testGcsClientCreateSuccessfully() 239 | { 240 | ConfigSource configSource = config(); 241 | PluginTask task = CONFIG_MAPPER.map(configSource, PluginTask.class); 242 | plugin.transaction(configSource, 1, new FileOutputControl()); // no errors happens 243 | plugin.createClient(task); // no errors happens 244 | } 245 | 246 | @Test 247 | public void testGcsClientCreateThrowConfigException() 248 | { 249 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 250 | .set("type", "gcs") 251 | .set("bucket", "non-exists-bucket") 252 | .set("path_prefix", "my-prefix") 253 | .set("file_ext", ".csv") 254 | .set("auth_method", "json_key") 255 | .set("service_account_email", GCP_EMAIL) 256 | .set("json_keyfile", Optional.of(LocalFile.ofContent(GCP_JSON_KEYFILE.get().getBytes()))) 257 | .set("formatter", formatterConfig()); 258 | 259 | plugin.transaction(config, 1, new FileOutputControl()); // no errors happens 260 | PluginTask task = CONFIG_MAPPER.map(config, PluginTask.class); 261 | try { 262 | plugin.createClient(task); 263 | fail("Expected Exception was not thrown."); 264 | } 265 | catch (Exception ex) { 266 | assertTrue(ex.getCause() instanceof ConfigException); 267 | } 268 | } 269 | 270 | @Test 271 | public void testGcsFileOutputByOpen() throws Exception 272 | { 273 | ConfigSource configSource = config(); 274 | PluginTask task = CONFIG_MAPPER.map(configSource, PluginTask.class); 275 | Storage client = plugin.createClient(task); 276 | try { 277 | embulk.runOutput(configSource, Paths.get(LOCAL_PATH_PREFIX)); 278 | } 279 | catch (Exception ex) { 280 | fail(ex.getMessage()); 281 | } 282 | 283 | String remotePath = GCP_PATH_PREFIX + String.format(task.getSequenceFormat(), 0, 0) + task.getFileNameExtension(); 284 | assertRecords(remotePath, client); 285 | } 286 | 287 | @Test 288 | public void testGenerateRemotePath() throws Exception 289 | { 290 | ConfigSource configSource = config(); 291 | PluginTask task = CONFIG_MAPPER.map(configSource, PluginTask.class); 292 | Storage storage = Mockito.mock(Storage.class); 293 | GcsTransactionalFileOutput fileOutput = new GcsTransactionalFileOutput(task, storage, 0); 294 | assertEquals("sample.000.01.csv", fileOutput.generateRemotePath("/sample", task.getSequenceFormat(), 0, 1, ".csv")); 295 | assertEquals("sample.000.01.csv", fileOutput.generateRemotePath("./sample", task.getSequenceFormat(), 0, 1, ".csv")); 296 | assertEquals("sample.000.01.csv", fileOutput.generateRemotePath("../sample", task.getSequenceFormat(), 0, 1, ".csv")); 297 | assertEquals("sample.000.01.csv", fileOutput.generateRemotePath("//sample", task.getSequenceFormat(), 0, 1, ".csv")); 298 | assertEquals("path/to/sample.000.01.csv", fileOutput.generateRemotePath("/path/to/sample", task.getSequenceFormat(), 0, 1, ".csv")); 299 | assertEquals("path/to/./sample.000.01.csv", fileOutput.generateRemotePath("path/to/./sample", task.getSequenceFormat(), 0, 1, ".csv")); 300 | assertEquals("path/to/../sample.000.01.csv", fileOutput.generateRemotePath("path/to/../sample", task.getSequenceFormat(), 0, 1, ".csv")); 301 | assertEquals("sample.000.01.csv", fileOutput.generateRemotePath("....../sample", task.getSequenceFormat(), 0, 1, ".csv")); 302 | assertEquals("sample.000.01.csv", fileOutput.generateRemotePath("......///sample", task.getSequenceFormat(), 0, 1, ".csv")); 303 | } 304 | 305 | public ConfigSource config() 306 | { 307 | byte[] keyBytes = Base64.getDecoder().decode(GCP_P12_KEYFILE.get()); 308 | Optional p12Key = Optional.of(LocalFile.ofContent(keyBytes)); 309 | Optional jsonKey = Optional.of(LocalFile.ofContent(GCP_JSON_KEYFILE.get().getBytes())); 310 | 311 | return CONFIG_MAPPER_FACTORY.newConfigSource() 312 | .set("type", "gcs") 313 | .set("bucket", GCP_BUCKET) 314 | .set("path_prefix", GCP_PATH_PREFIX) 315 | .set("last_path", "") 316 | .set("file_ext", ".csv") 317 | .set("auth_method", "json_key") 318 | .set("service_account_email", GCP_EMAIL) 319 | .set("p12_keyfile", p12Key) 320 | .set("json_keyfile", jsonKey) 321 | .set("application_name", GCP_APPLICATION_NAME) 322 | .set("formatter", formatterConfig()); 323 | } 324 | 325 | private class FileOutputControl implements FileOutputPlugin.Control 326 | { 327 | @Override 328 | public List run(TaskSource taskSource) 329 | { 330 | return Lists.newArrayList(CONFIG_MAPPER_FACTORY.newTaskReport()); 331 | } 332 | } 333 | 334 | private ImmutableMap formatterConfig() 335 | { 336 | ImmutableMap.Builder builder = new ImmutableMap.Builder<>(); 337 | builder.put("type", "csv"); 338 | builder.put("header_line", "false"); 339 | builder.put("timezone", "Asia/Tokyo"); 340 | return builder.build(); 341 | } 342 | 343 | private void assertRecords(String gcsPath, Storage client) throws Exception 344 | { 345 | ImmutableList> records = getFileContentsFromGcs(gcsPath, client); 346 | assertEquals(4, records.size()); 347 | { 348 | List record = records.get(0); 349 | assertEquals("1", record.get(0)); 350 | assertEquals("32864", record.get(1)); 351 | } 352 | 353 | { 354 | List record = records.get(1); 355 | assertEquals("2", record.get(0)); 356 | assertEquals("14824", record.get(1)); 357 | } 358 | 359 | { 360 | List record = records.get(2); 361 | assertEquals("3", record.get(0)); 362 | assertEquals("27559", record.get(1)); 363 | } 364 | 365 | { 366 | List record = records.get(3); 367 | assertEquals("4", record.get(0)); 368 | assertEquals("11270", record.get(1)); 369 | } 370 | } 371 | 372 | private ImmutableList> getFileContentsFromGcs(String path, Storage client) throws Exception 373 | { 374 | ConfigSource config = config(); 375 | Blob blob = client.get(BlobId.of(GCP_BUCKET, path)); 376 | InputStream is = Channels.newInputStream(blob.reader()); 377 | ImmutableList.Builder> builder = new ImmutableList.Builder<>(); 378 | 379 | BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 380 | String line; 381 | while ((line = reader.readLine()) != null) { 382 | List records = Arrays.asList(line.split(",", 0)); 383 | builder.add(records); 384 | } 385 | return builder.build(); 386 | } 387 | 388 | private static String getDirectory(String dir) 389 | { 390 | if (dir != null && !dir.endsWith("/")) { 391 | dir = dir + "/"; 392 | } 393 | if (dir.startsWith("/")) { 394 | dir = dir.replaceFirst("/", ""); 395 | } 396 | return dir; 397 | } 398 | 399 | private byte[] convertInputStreamToByte(InputStream is) throws IOException 400 | { 401 | ByteArrayOutputStream bo = new ByteArrayOutputStream(); 402 | byte [] buffer = new byte[1024]; 403 | while (true) { 404 | int len = is.read(buffer); 405 | if (len < 0) { 406 | break; 407 | } 408 | bo.write(buffer, 0, len); 409 | } 410 | return bo.toByteArray(); 411 | } 412 | } 413 | -------------------------------------------------------------------------------- /src/test/resources/sample_01.csv: -------------------------------------------------------------------------------- 1 | id:long,account:long 2 | 1,32864 3 | 2,14824 4 | 3,27559 5 | 4,11270 6 | --------------------------------------------------------------------------------