├── .github
└── workflows
│ └── build.yml
├── .gitignore
├── LICENSE
├── NOTICE
├── NOTICE_GEM
├── README.md
├── build.gradle
├── config
└── checkstyle
│ ├── checkstyle.xml
│ └── default.xml
├── gradle
├── dependency-locks
│ └── embulkPluginRuntime.lockfile
└── wrapper
│ ├── gradle-wrapper.jar
│ └── gradle-wrapper.properties
├── gradlew
├── gradlew.bat
└── src
├── main
└── java
│ └── org
│ └── embulk
│ └── output
│ └── gcs
│ ├── AuthMethod.java
│ ├── GcsAuthentication.java
│ ├── GcsOutputPlugin.java
│ ├── GcsTransactionalFileOutput.java
│ └── PluginTask.java
└── test
├── java
└── org
│ └── embulk
│ └── output
│ └── gcs
│ ├── TestGcsAuthentication.java
│ └── TestGcsOutputPlugin.java
└── resources
└── sample_01.csv
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: Build and test
2 | on: push
3 | jobs:
4 | build:
5 | runs-on: ubuntu-latest
6 | steps:
7 | - uses: actions/checkout@v1
8 | - name: Set up JDK 1.8
9 | uses: actions/setup-java@v1
10 | with:
11 | java-version: 1.8
12 | - name: Test
13 | env:
14 | GCP_EMAIL: ${{secrets.GCP_EMAIL}}
15 | GCP_JSON_KEYFILE: ${{secrets.GCP_JSON_KEYFILE}}
16 | GCP_PRIVATE_KEYFILE: ${{secrets.GCP_PRIVATE_KEYFILE}}
17 | GCP_BUCKET: ${{secrets.GCP_BUCKET}}
18 | GCP_BUCKET_DIRECTORY: ${{secrets.GCP_BUCKET_DIRECTORY}}
19 | run: ./gradlew --stacktrace test
20 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | /pkg/
3 | /tmp/
4 | .gradle/
5 | /classpath/
6 | build/
7 | .idea
8 | *.iml
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | embulk-output-gcs
2 | Copyright 2015 Kazuyuki Honda, and the Embulk project
3 |
4 | This product includes software developed by Kazuyuki Honda, and in a part of the Embulk project (https://www.embulk.org/).
5 | It was originally developed by Kazuyuki Honda (https://github.com/hakobera), and then transferred to the Embulk project.
6 | It was originally licenced under the MIT License before the transfer, and it is now licensed under the Apache Software License, Version 2.0.
7 |
--------------------------------------------------------------------------------
/NOTICE_GEM:
--------------------------------------------------------------------------------
1 | embulk-output-gcs
2 | Copyright 2015 Kazuyuki Honda, and the Embulk project
3 |
4 | This product includes software developed by Kazuyuki Honda, and in a part of the Embulk project (https://www.embulk.org/).
5 | It was originally developed by Kazuyuki Honda (https://github.com/hakobera), and then transferred to the Embulk project.
6 | It was originally licenced under the MIT License before the transfer, and it is now licensed under the Apache Software License, Version 2.0.
7 |
8 | The gem distribution of this product includes software developed in a part of the Embulk project (https://www.embulk.org/).
9 | It has been licenced under the Apache Software License, Version 2.0.
10 |
11 | The gem distribution of this product includes JARs of the Jackson project (https://github.com/FasterXML/jackson), as-is.
12 | They are licensed under the Apache Software License, Version 2.0.
13 |
14 | The gem distribution of this product includes JARs of the Jakarta Bean Validation API 1.1 (https://beanvalidation.org/1.1/), as-is.
15 | It is licensed under the Apache Software License, Version 2.0.
16 |
17 | The gem distribution of this product includes JAR of the JSR305 Annotations for Findbugs (https://code.google.com/archive/p/jsr-305/), as-is.
18 | It is licensed under the Apache Software License, Version 2.0.
19 |
20 | The gem distribution of this product includes JARs of Apache HttpClient, HttpCore, and Commons Codec, as-is.
21 | They are licensed under the Apache Software License, Version 2.0.
22 |
23 | The gem distribution of this product includes a JAR of Joda-Time (https://www.joda.org/joda-time/), as-is.
24 | It is licensed under the Apache Software License, Version 2.0.
25 |
26 | The gem distribution of this product includes a JAR of the JCL 1.2 implemented over SLF4J (http://www.slf4j.org/legacy.html) 1.7, as-is.
27 | It is licensed under the Apache Software License, Version 2.0.
28 |
29 | The gem distribution of this product includes a JAR of the ThreeTen Backport (https://www.threeten.org/threetenbp/), as-is.
30 | It is licensed under the 3-Clause "New" BSD License.
31 |
32 | The gem distribution of this product includes JAR of Guava (https://guava.dev/), as-is.
33 | It is licensed under the Apache Software License, Version 2.0.
34 |
35 | The gem distribution of this product includes JAR of GSON (https://github.com/google/gson), as-is.
36 | It is licensed under the Apache Software License, Version 2.0.
37 |
38 | The gem distribution of this product includes JARs of Protocol Buffers (https://developers.google.com/protocol-buffers), as-is.
39 | They are licensed under the 3-Clause "New" BSD License.
40 |
41 | The gem distribution of this product includes a JAR of AutoValue Processor, as-is.
42 | It is licensed under the Apache License, Version 2.0.
43 |
44 | The gem distribution of this product includes JARs of Google Cloud Clients (https://github.com/GoogleCloudPlatform/google-cloud-java/), as-is.
45 | They are licensed under the Apache Software License, Version 2.0.
46 |
47 | The gem distribution of this product includes JARs of Google APIs/HTTP/OAuth Client Libraries, as-is.
48 | They are licensed under the Apache Software License, Version 2.0.
49 |
50 | The gem distribution of this product includes a JAR of GAX (Google Api EXtensions), as-is.
51 | It is licensed under the 3-Clause "New" BSD License.
52 |
53 | The gem distribution of this product includes JARs of Google API Common, as-is.
54 | It is licensed under the 3-Clause "New" BSD License.
55 |
56 | The gem distribution of this product includes JARs of Proto Libraries for Google APIs, as-is.
57 | They are licensed under the Apache Software License, Version 2.0.
58 |
59 | The gem distribution of this product includes a JAR of Cloud Storage JSON API V1, as-is.
60 | It is licensed under the Apache License, Version 2.0.
61 |
62 | The gem distribution of this product includes JARs of Google Auth Libraries, as-is.
63 | They are licensed under the 3-Clause "New" BSD License.
64 |
65 | The gem distribution of this product includes a JAR of JSON in Java (https://github.com/stleary/JSON-java), as-is.
66 | It is licensed under the JSON License.
67 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://travis-ci.org/embulk/embulk-output-gcs)
2 |
3 | # Google Cloud Storage output plugin for Embulk
4 |
5 | Google Cloud Storage output plugin for [Embulk](https://github.com/embulk/embulk).
6 |
7 | ## Overview
8 |
9 | * **Plugin type**: file output
10 | * **Load all or nothing**: no
11 | * **Resume supported**: yes
12 | * **Cleanup supported**: no
13 |
14 | - Connector do not support retry in case we have any problem with streaming chanel. In this case, we need to run the job again.
15 |
16 | ## Configuration
17 |
18 | - **bucket**: Google Cloud Storage bucket name (string, required)
19 | - **path_prefix**: Prefix of output keys (string, required)
20 | - **file_ext**: Extention of output file (string, required)
21 | - **sequence_format**: Format of the sequence number of the output files (string, default value is ".%03d.%02d")
22 | - **content_type**: content type of output file (string, optional, default value is "application/octet-stream")
23 | - **auth_method**: Authentication method `private_key`, `json_key` or `compute_engine` (string, optional, default value is "private_key")
24 | - **service_account_email**: Google Cloud Platform service account email (string, required when auth_method is private_key)
25 | - **p12_keyfile**: Private key file fullpath of Google Cloud Platform service account (string, required when auth_method is private_key)
26 | - **json_keyfile** fullpath of json_key (string, required when auth_method is json_key)
27 | - **application_name**: Application name, anything you like (string, optional, default value is "embulk-output-gcs")
28 | - **max_connection_retry**: Number of connection retries to GCS (number, default value is 10)
29 |
30 | ## Example
31 |
32 | ```yaml
33 | out:
34 | type: gcs
35 | bucket: your-gcs-bucket-name
36 | path_prefix: logs/out
37 | file_ext: .csv
38 | auth_method: `private_key` #default
39 | service_account_email: 'XYZ@developer.gserviceaccount.com'
40 | p12_keyfile: '/path/to/private/key.p12'
41 | formatter:
42 | type: csv
43 | encoding: UTF-8
44 | ```
45 |
46 | ## Authentication
47 |
48 | There are three methods supported to fetch access token for the service account.
49 |
50 | 1. Public-Private key pair of GCP(Google Cloud Platform)'s service account
51 | 2. JSON key of GCP(Google Cloud Platform)'s service account
52 | 3. Pre-defined access token (Google Compute Engine only)
53 |
54 | ### Public-Private key pair of GCP's service account
55 |
56 | You first need to create a service account (client ID), download its private key and deploy the key with embulk.
57 |
58 | ```yaml
59 | out:
60 | type: gcs
61 | auth_method: private_key
62 | service_account_email: ABCXYZ123ABCXYZ123.gserviceaccount.com
63 | p12_keyfile: /path/to/p12_keyfile.p12
64 | ```
65 |
66 | ### JSON key of GCP's service account
67 |
68 | You first need to create a service account (client ID), download its json key and deploy the key with embulk.
69 |
70 | ```yaml
71 | out:
72 | type: gcs
73 | auth_method: json_key
74 | json_keyfile: /path/to/json_keyfile.json
75 | ```
76 |
77 | You can also embed contents of json_keyfile at config.yml.
78 |
79 | ```yaml
80 | out:
81 | type: gcs
82 | auth_method: json_key
83 | json_keyfile:
84 | content: |
85 | {
86 | "private_key_id": "123456789",
87 | "private_key": "-----BEGIN PRIVATE KEY-----\nABCDEF",
88 | "client_email": "..."
89 | }
90 | ```
91 |
92 | ### Pre-defined access token(GCE only)
93 |
94 | On the other hand, you don't need to explicitly create a service account for embulk when you
95 | run embulk in Google Compute Engine. In this third authentication method, you need to
96 | add the API scope "https://www.googleapis.com/auth/devstorage.read_write" to the scope list of your
97 | Compute Engine VM instance, then you can configure embulk like this.
98 |
99 | [Setting the scope of service account access for instances](https://cloud.google.com/compute/docs/authentication)
100 |
101 | ```yaml
102 | out:
103 | type: gcs
104 | auth_method: compute_engine
105 | ```
106 |
107 | ## Build
108 |
109 | ```
110 | $ ./gradlew gem
111 | ```
112 |
113 | ## Test
114 |
115 | ```
116 | $ ./gradlew test # -t to watch change of files and rebuild continuously
117 | ```
118 |
119 | To run unit tests, we need to configure the following environment variables.
120 |
121 | When environment variables are not set, skip almost test cases.
122 |
123 | ```
124 | GCP_EMAIL
125 | GCP_P12_KEYFILE
126 | GCP_JSON_KEYFILE
127 | GCP_BUCKET
128 | GCP_BUCKET_DIRECTORY(optional, if needed)
129 | ```
130 |
131 | If you're using Mac OS X El Capitan and GUI Applications(IDE), like as follows.
132 | ```
133 | $ vi ~/Library/LaunchAgents/environment.plist
134 |
135 |
136 |
137 |
138 | Label
139 | my.startup
140 | ProgramArguments
141 |
142 | sh
143 | -c
144 |
145 | launchctl setenv GCP_EMAIL ABCXYZ123ABCXYZ123.gserviceaccount.com
146 | launchctl setenv GCP_P12_KEYFILE /path/to/p12_keyfile.p12
147 | launchctl setenv GCP_JSON_KEYFILE /path/to/json_keyfile.json
148 | launchctl setenv GCP_BUCKET my-bucket
149 | launchctl setenv GCP_BUCKET_DIRECTORY unittests
150 |
151 |
152 | RunAtLoad
153 |
154 |
155 |
156 |
157 | $ launchctl load ~/Library/LaunchAgents/environment.plist
158 | $ launchctl getenv GCP_EMAIL //try to get value.
159 |
160 | Then start your applications.
161 | ```
162 |
--------------------------------------------------------------------------------
/build.gradle:
--------------------------------------------------------------------------------
1 | plugins {
2 | id "java"
3 | id "checkstyle"
4 | id "jacoco"
5 | id "signing"
6 | id "maven-publish"
7 | id "org.embulk.embulk-plugins" version "0.4.2"
8 | }
9 |
10 | repositories {
11 | mavenCentral()
12 | }
13 |
14 | sourceCompatibility = 1.8
15 | targetCompatibility = 1.8
16 |
17 | group = "org.embulk"
18 | version = "0.6.0-SNAPSHOT"
19 | description = "Dumps records to Google Cloud Storage."
20 |
21 | tasks.withType(JavaCompile) {
22 | options.compilerArgs << "-Xlint:deprecation" << "-Xlint:unchecked"
23 | options.encoding = "UTF-8"
24 | }
25 |
26 | java {
27 | withJavadocJar()
28 | withSourcesJar()
29 | }
30 |
31 | def embulkVersion = '0.10.31'
32 | dependencies {
33 | compileOnly "org.embulk:embulk-api:$embulkVersion"
34 | compileOnly "org.embulk:embulk-spi:$embulkVersion"
35 |
36 | compile('org.embulk:embulk-util-config:0.3.1') {
37 | // Conflict with Embulk Core
38 | exclude group: 'com.fasterxml.jackson.core', module: 'jackson-annotations'
39 | exclude group: 'com.fasterxml.jackson.core', module: 'jackson-core'
40 | exclude group: 'com.fasterxml.jackson.core', module: 'jackson-databind'
41 | exclude group: 'com.fasterxml.jackson.datatype', module: 'jackson-datatype-jdk8'
42 | exclude group: 'javax.validation', module: 'validation-api'
43 | }
44 | compile('org.embulk:embulk-util-retryhelper:0.8.2')
45 |
46 | compile 'com.fasterxml.jackson.core:jackson-core:2.6.7'
47 | compile 'com.fasterxml.jackson.core:jackson-annotations:2.6.7'
48 | compile 'com.fasterxml.jackson.core:jackson-databind:2.6.7'
49 | compile 'com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.6.7'
50 |
51 | compile 'javax.validation:validation-api:1.1.0.Final'
52 | compile 'com.google.guava:guava:18.0'
53 |
54 | //we use version 1.2.0 due to prevent conflict guava and jackson version with embulk
55 | compile ("com.google.cloud:google-cloud-storage:1.2.0"){
56 | exclude group: 'com.google.cloud', module: 'google-cloud-core'
57 | exclude group: 'com.google.cloud', module: 'google-cloud-core-http'
58 | exclude group: "commons-logging", module: "commons-logging"
59 | }
60 | compile ("com.google.cloud:google-cloud-core:1.2.0"){
61 | exclude group: 'com.google.guava', module: 'guava'
62 | exclude group: "commons-logging", module: "commons-logging"
63 | }
64 | compile ("com.google.cloud:google-cloud-core-http:1.2.0"){
65 | exclude group: 'com.google.guava', module: 'guava'
66 | exclude group: 'com.google.cloud', module: 'google-cloud-core'
67 | exclude group: "commons-logging", module: "commons-logging"
68 | }
69 |
70 | // Instead of "commons-logging:commons-logging:1.2" that is required from "google-cloud-storage",
71 | // we use "jcl-over-slf4j" because Embulk is based on SLF4J.
72 | compile("org.slf4j:jcl-over-slf4j:1.7.12") {
73 | // slf4j-api is in embulk-core / embulk-api.
74 | exclude group: "org.slf4j", module: "slf4j-api"
75 | }
76 |
77 | testCompile "junit:junit:4.12"
78 | testCompile "org.mockito:mockito-core:2.28.2"
79 | testCompile "org.embulk:embulk-core:$embulkVersion"
80 | testCompile "org.embulk:embulk-core:$embulkVersion:tests"
81 | testCompile "org.embulk:embulk-deps:$embulkVersion"
82 | testCompile "org.embulk:embulk-junit4:$embulkVersion"
83 | testCompile "org.embulk:embulk-formatter-csv:0.10.31"
84 | testCompile "org.embulk:embulk-input-file:0.10.31"
85 | testCompile "org.embulk:embulk-parser-csv:0.10.31"
86 | }
87 |
88 | embulkPlugin {
89 | mainClass = "org.embulk.output.gcs.GcsOutputPlugin"
90 | category = "output"
91 | type = "gcs"
92 | }
93 |
94 | javadoc {
95 | options {
96 | locale = 'en_US'
97 | encoding = 'UTF-8'
98 | }
99 | }
100 |
101 | jar {
102 | from rootProject.file("LICENSE")
103 | from rootProject.file("NOTICE")
104 | }
105 |
106 | sourcesJar {
107 | from rootProject.file("LICENSE")
108 | from rootProject.file("NOTICE")
109 | }
110 |
111 | javadocJar {
112 | from rootProject.file("LICENSE")
113 | from rootProject.file("NOTICE")
114 | }
115 |
116 | publishing {
117 | publications {
118 | maven(MavenPublication) {
119 | groupId = project.group
120 | artifactId = project.name
121 |
122 | from components.java
123 |
124 | pom { // https://central.sonatype.org/pages/requirements.html
125 | packaging "jar"
126 |
127 | name = project.name
128 | description = project.description
129 | url = "https://www.embulk.org/"
130 |
131 | developers {
132 | developer {
133 | name = "Kazuyuki Honda"
134 | email = "hakobera@gmail.com"
135 | }
136 | developer {
137 | name = "Satoshi Akama"
138 | email = "satoshiakama@gmail.com"
139 | }
140 | developer {
141 | name = "John Luong"
142 | email = "jluong@treasure-data.com"
143 | }
144 | }
145 |
146 | licenses {
147 | license {
148 | // http://central.sonatype.org/pages/requirements.html#license-information
149 | name = "The Apache License, Version 2.0"
150 | url = "https://www.apache.org/licenses/LICENSE-2.0.txt"
151 | }
152 | }
153 |
154 | scm {
155 | connection = "scm:git:git://github.com/embulk/embulk-output-gcs.git"
156 | developerConnection = "scm:git:git@github.com:embulk/embulk-output-gcs.git"
157 | url = "https://github.com/embulk/embulk-output-gcs"
158 | }
159 | }
160 | }
161 | }
162 |
163 | repositories {
164 | maven { // publishMavenPublicationToMavenCentralRepository
165 | name = "mavenCentral"
166 | if (project.version.endsWith("-SNAPSHOT")) {
167 | url "https://oss.sonatype.org/content/repositories/snapshots"
168 | } else {
169 | url "https://oss.sonatype.org/service/local/staging/deploy/maven2"
170 | }
171 |
172 | credentials {
173 | username = project.hasProperty("ossrhUsername") ? ossrhUsername : ""
174 | password = project.hasProperty("ossrhPassword") ? ossrhPassword : ""
175 | }
176 | }
177 | }
178 | }
179 |
180 | signing {
181 | sign publishing.publications.maven
182 | }
183 |
184 | gem {
185 | authors = [ "Kazuyuki Honda" ]
186 | email = [ "hakobera@gmail.com" ]
187 | summary = "Google Cloud Storage output plugin for Embulk"
188 | homepage = "https://github.com/embulk/embulk-output-gcs"
189 | licenses = [ "Apache-2.0" ]
190 | from rootProject.file("LICENSE")
191 | from rootProject.file("NOTICE_GEM")
192 | rename ("NOTICE_GEM", "NOTICE")
193 | }
194 |
195 | gemPush {
196 | host = "https://rubygems.org"
197 | }
198 |
199 | checkstyle {
200 | configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
201 | toolVersion = '6.14.1'
202 | }
203 | checkstyleMain {
204 | configFile = file("${project.rootDir}/config/checkstyle/default.xml")
205 | ignoreFailures = true
206 | }
207 | checkstyleTest {
208 | configFile = file("${project.rootDir}/config/checkstyle/default.xml")
209 | ignoreFailures = true
210 | }
211 | task checkstyle(type: Checkstyle) {
212 | classpath = sourceSets.main.output + sourceSets.test.output
213 | source = sourceSets.main.allJava + sourceSets.test.allJava
214 | }
215 |
216 | test {
217 | testLogging {
218 | outputs.upToDateWhen { false }
219 | exceptionFormat = org.gradle.api.tasks.testing.logging.TestExceptionFormat.FULL
220 | showCauses = true
221 | showExceptions = true
222 | showStackTraces = true
223 | showStandardStreams = true
224 | events "passed", "skipped", "failed", "standardOut", "standardError"
225 | }
226 | }
--------------------------------------------------------------------------------
/config/checkstyle/checkstyle.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
128 |
129 |
130 |
131 |
--------------------------------------------------------------------------------
/config/checkstyle/default.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
108 |
109 |
110 |
111 |
--------------------------------------------------------------------------------
/gradle/dependency-locks/embulkPluginRuntime.lockfile:
--------------------------------------------------------------------------------
1 | # This is a Gradle generated file for dependency locking.
2 | # Manual edits can break the build and are not advised.
3 | # This file is expected to be part of source control.
4 | com.fasterxml.jackson.core:jackson-annotations:2.6.7
5 | com.fasterxml.jackson.core:jackson-core:2.6.7
6 | com.fasterxml.jackson.core:jackson-databind:2.6.7
7 | com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.6.7
8 | com.google.api-client:google-api-client:1.21.0
9 | com.google.api.grpc:proto-google-common-protos:0.1.11
10 | com.google.api.grpc:proto-google-iam-v1:0.1.11
11 | com.google.api:api-common:1.1.0
12 | com.google.api:gax:1.3.1
13 | com.google.apis:google-api-services-storage:v1-rev100-1.22.0
14 | com.google.auth:google-auth-library-credentials:0.7.0
15 | com.google.auth:google-auth-library-oauth2-http:0.7.0
16 | com.google.auto.value:auto-value:1.2
17 | com.google.cloud:google-cloud-core-http:1.2.0
18 | com.google.cloud:google-cloud-core:1.2.0
19 | com.google.cloud:google-cloud-storage:1.2.0
20 | com.google.code.findbugs:jsr305:3.0.0
21 | com.google.code.gson:gson:2.7
22 | com.google.guava:guava:18.0
23 | com.google.http-client:google-http-client-appengine:1.21.0
24 | com.google.http-client:google-http-client-jackson2:1.21.0
25 | com.google.http-client:google-http-client-jackson:1.21.0
26 | com.google.http-client:google-http-client:1.21.0
27 | com.google.oauth-client:google-oauth-client:1.21.0
28 | com.google.protobuf:protobuf-java-util:3.2.0
29 | com.google.protobuf:protobuf-java:3.2.0
30 | commons-codec:commons-codec:1.3
31 | javax.validation:validation-api:1.1.0.Final
32 | joda-time:joda-time:2.9.2
33 | org.apache.httpcomponents:httpclient:4.0.1
34 | org.apache.httpcomponents:httpcore:4.0.1
35 | org.codehaus.jackson:jackson-core-asl:1.9.11
36 | org.embulk:embulk-util-config:0.3.1
37 | org.embulk:embulk-util-retryhelper:0.8.2
38 | org.json:json:20160810
39 | org.slf4j:jcl-over-slf4j:1.7.12
40 | org.threeten:threetenbp:1.3.3
41 |
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/embulk/embulk-output-gcs/427f9fdc677885a7467606393f6a343ceda2c4c9/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Tue Nov 02 00:01:46 ICT 2021
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.5-bin.zip
7 |
--------------------------------------------------------------------------------
/gradlew:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 |
3 | ##############################################################################
4 | ##
5 | ## Gradle start up script for UN*X
6 | ##
7 | ##############################################################################
8 |
9 | # Attempt to set APP_HOME
10 | # Resolve links: $0 may be a link
11 | PRG="$0"
12 | # Need this for relative symlinks.
13 | while [ -h "$PRG" ] ; do
14 | ls=`ls -ld "$PRG"`
15 | link=`expr "$ls" : '.*-> \(.*\)$'`
16 | if expr "$link" : '/.*' > /dev/null; then
17 | PRG="$link"
18 | else
19 | PRG=`dirname "$PRG"`"/$link"
20 | fi
21 | done
22 | SAVED="`pwd`"
23 | cd "`dirname \"$PRG\"`/" >/dev/null
24 | APP_HOME="`pwd -P`"
25 | cd "$SAVED" >/dev/null
26 |
27 | APP_NAME="Gradle"
28 | APP_BASE_NAME=`basename "$0"`
29 |
30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
31 | DEFAULT_JVM_OPTS=""
32 |
33 | # Use the maximum available, or set MAX_FD != -1 to use that value.
34 | MAX_FD="maximum"
35 |
36 | warn ( ) {
37 | echo "$*"
38 | }
39 |
40 | die ( ) {
41 | echo
42 | echo "$*"
43 | echo
44 | exit 1
45 | }
46 |
47 | # OS specific support (must be 'true' or 'false').
48 | cygwin=false
49 | msys=false
50 | darwin=false
51 | nonstop=false
52 | case "`uname`" in
53 | CYGWIN* )
54 | cygwin=true
55 | ;;
56 | Darwin* )
57 | darwin=true
58 | ;;
59 | MINGW* )
60 | msys=true
61 | ;;
62 | NONSTOP* )
63 | nonstop=true
64 | ;;
65 | esac
66 |
67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
68 |
69 | # Determine the Java command to use to start the JVM.
70 | if [ -n "$JAVA_HOME" ] ; then
71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
72 | # IBM's JDK on AIX uses strange locations for the executables
73 | JAVACMD="$JAVA_HOME/jre/sh/java"
74 | else
75 | JAVACMD="$JAVA_HOME/bin/java"
76 | fi
77 | if [ ! -x "$JAVACMD" ] ; then
78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
79 |
80 | Please set the JAVA_HOME variable in your environment to match the
81 | location of your Java installation."
82 | fi
83 | else
84 | JAVACMD="java"
85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
86 |
87 | Please set the JAVA_HOME variable in your environment to match the
88 | location of your Java installation."
89 | fi
90 |
91 | # Increase the maximum file descriptors if we can.
92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
93 | MAX_FD_LIMIT=`ulimit -H -n`
94 | if [ $? -eq 0 ] ; then
95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
96 | MAX_FD="$MAX_FD_LIMIT"
97 | fi
98 | ulimit -n $MAX_FD
99 | if [ $? -ne 0 ] ; then
100 | warn "Could not set maximum file descriptor limit: $MAX_FD"
101 | fi
102 | else
103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
104 | fi
105 | fi
106 |
107 | # For Darwin, add options to specify how the application appears in the dock
108 | if $darwin; then
109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
110 | fi
111 |
112 | # For Cygwin, switch paths to Windows format before running java
113 | if $cygwin ; then
114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"`
115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
116 | JAVACMD=`cygpath --unix "$JAVACMD"`
117 |
118 | # We build the pattern for arguments to be converted via cygpath
119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120 | SEP=""
121 | for dir in $ROOTDIRSRAW ; do
122 | ROOTDIRS="$ROOTDIRS$SEP$dir"
123 | SEP="|"
124 | done
125 | OURCYGPATTERN="(^($ROOTDIRS))"
126 | # Add a user-defined pattern to the cygpath arguments
127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129 | fi
130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh
131 | i=0
132 | for arg in "$@" ; do
133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135 |
136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138 | else
139 | eval `echo args$i`="\"$arg\""
140 | fi
141 | i=$((i+1))
142 | done
143 | case $i in
144 | (0) set -- ;;
145 | (1) set -- "$args0" ;;
146 | (2) set -- "$args0" "$args1" ;;
147 | (3) set -- "$args0" "$args1" "$args2" ;;
148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154 | esac
155 | fi
156 |
157 | # Escape application args
158 | save ( ) {
159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
160 | echo " "
161 | }
162 | APP_ARGS=$(save "$@")
163 |
164 | # Collect all arguments for the java command, following the shell quoting and substitution rules
165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
166 |
167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
169 | cd "$(dirname "$0")"
170 | fi
171 |
172 | exec "$JAVACMD" "$@"
173 |
--------------------------------------------------------------------------------
/gradlew.bat:
--------------------------------------------------------------------------------
1 | @if "%DEBUG%" == "" @echo off
2 | @rem ##########################################################################
3 | @rem
4 | @rem Gradle startup script for Windows
5 | @rem
6 | @rem ##########################################################################
7 |
8 | @rem Set local scope for the variables with windows NT shell
9 | if "%OS%"=="Windows_NT" setlocal
10 |
11 | set DIRNAME=%~dp0
12 | if "%DIRNAME%" == "" set DIRNAME=.
13 | set APP_BASE_NAME=%~n0
14 | set APP_HOME=%DIRNAME%
15 |
16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17 | set DEFAULT_JVM_OPTS=
18 |
19 | @rem Find java.exe
20 | if defined JAVA_HOME goto findJavaFromJavaHome
21 |
22 | set JAVA_EXE=java.exe
23 | %JAVA_EXE% -version >NUL 2>&1
24 | if "%ERRORLEVEL%" == "0" goto init
25 |
26 | echo.
27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28 | echo.
29 | echo Please set the JAVA_HOME variable in your environment to match the
30 | echo location of your Java installation.
31 |
32 | goto fail
33 |
34 | :findJavaFromJavaHome
35 | set JAVA_HOME=%JAVA_HOME:"=%
36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37 |
38 | if exist "%JAVA_EXE%" goto init
39 |
40 | echo.
41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42 | echo.
43 | echo Please set the JAVA_HOME variable in your environment to match the
44 | echo location of your Java installation.
45 |
46 | goto fail
47 |
48 | :init
49 | @rem Get command-line arguments, handling Windows variants
50 |
51 | if not "%OS%" == "Windows_NT" goto win9xME_args
52 |
53 | :win9xME_args
54 | @rem Slurp the command line arguments.
55 | set CMD_LINE_ARGS=
56 | set _SKIP=2
57 |
58 | :win9xME_args_slurp
59 | if "x%~1" == "x" goto execute
60 |
61 | set CMD_LINE_ARGS=%*
62 |
63 | :execute
64 | @rem Setup the command line
65 |
66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67 |
68 | @rem Execute Gradle
69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70 |
71 | :end
72 | @rem End local scope for the variables with windows NT shell
73 | if "%ERRORLEVEL%"=="0" goto mainEnd
74 |
75 | :fail
76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77 | rem the _cmd.exe /c_ return code!
78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79 | exit /b 1
80 |
81 | :mainEnd
82 | if "%OS%"=="Windows_NT" endlocal
83 |
84 | :omega
85 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/output/gcs/AuthMethod.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2018 The Embulk project
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package org.embulk.output.gcs;
18 |
19 | public enum AuthMethod
20 | {
21 | private_key("private_key"),
22 | compute_engine("compute_engine"),
23 | json_key("json_key");
24 |
25 | private final String string;
26 |
27 | AuthMethod(String string)
28 | {
29 | this.string = string;
30 | }
31 |
32 | public String getString()
33 | {
34 | return string;
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/output/gcs/GcsAuthentication.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 The Embulk project
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package org.embulk.output.gcs;
18 |
19 | import com.google.api.client.auth.oauth2.TokenResponseException;
20 | import com.google.api.client.googleapis.json.GoogleJsonResponseException;
21 | import com.google.api.client.http.HttpTransport;
22 | import com.google.api.client.http.apache.ApacheHttpTransport;
23 | import com.google.api.client.json.jackson2.JacksonFactory;
24 | import com.google.api.client.util.SecurityUtils;
25 | import com.google.api.services.storage.StorageScopes;
26 | import com.google.auth.http.HttpTransportFactory;
27 | import com.google.auth.oauth2.ComputeEngineCredentials;
28 | import com.google.auth.oauth2.GoogleCredentials;
29 | import com.google.auth.oauth2.ServiceAccountCredentials;
30 | import com.google.cloud.TransportOptions;
31 | import com.google.cloud.http.HttpTransportOptions;
32 | import com.google.cloud.storage.Storage;
33 | import com.google.cloud.storage.StorageException;
34 | import com.google.cloud.storage.StorageOptions;
35 | import org.embulk.config.ConfigException;
36 | import org.embulk.util.config.units.LocalFile;
37 | import org.embulk.util.retryhelper.RetryExecutor;
38 | import org.embulk.util.retryhelper.RetryGiveupException;
39 | import org.embulk.util.retryhelper.Retryable;
40 | import org.slf4j.Logger;
41 | import org.slf4j.LoggerFactory;
42 |
43 | import java.io.File;
44 | import java.io.FileInputStream;
45 | import java.io.IOException;
46 | import java.io.InterruptedIOException;
47 | import java.security.GeneralSecurityException;
48 | import java.security.PrivateKey;
49 | import java.util.Collections;
50 | import java.util.Optional;
51 | import java.util.function.Function;
52 |
53 | public class GcsAuthentication
54 | {
55 | private final Logger log = LoggerFactory.getLogger(GcsAuthentication.class);
56 | private final Optional serviceAccountEmail;
57 | private final Optional p12KeyFilePath;
58 | private final Optional jsonKeyFilePath;
59 | private final String applicationName;
60 | private final HttpTransport httpTransport;
61 | private final JacksonFactory jsonFactory;
62 | private final GoogleCredentials credentials;
63 | private PluginTask task;
64 |
65 | public GcsAuthentication(PluginTask task) throws IOException, GeneralSecurityException
66 | {
67 | this.task = task;
68 | this.serviceAccountEmail = task.getServiceAccountEmail();
69 | this.p12KeyFilePath = task.getP12Keyfile().map(localFileToPathString());
70 | this.jsonKeyFilePath = task.getJsonKeyfile().map(localFileToPathString());
71 | this.applicationName = task.getApplicationName();
72 | this.httpTransport = new ApacheHttpTransport.Builder().build();
73 | this.jsonFactory = new JacksonFactory();
74 |
75 | if (task.getAuthMethod() == AuthMethod.compute_engine) {
76 | this.credentials = getComputeCredential();
77 | }
78 | else if (task.getAuthMethod() == AuthMethod.json_key) {
79 | this.credentials = getServiceAccountCredentialFromJsonFile();
80 | }
81 | else {
82 | this.credentials = getServiceAccountCredential();
83 | }
84 | }
85 |
86 | /**
87 | * @see https://developers.google.com/accounts/docs/OAuth2ServiceAccount#authorizingrequests
88 | */
89 | private GoogleCredentials getServiceAccountCredential() throws IOException, GeneralSecurityException
90 | {
91 | File p12 = new File(p12KeyFilePath.get());
92 | PrivateKey privateKey = SecurityUtils.loadPrivateKeyFromKeyStore(SecurityUtils.getPkcs12KeyStore(),
93 | new FileInputStream(p12), task.getStorePass(), "privatekey", task.getKeyPass());
94 | HttpTransportFactory transportFactory = () -> httpTransport;
95 | GoogleCredentials credentials = new ServiceAccountCredentials(null, serviceAccountEmail.get(),
96 | privateKey, null, Collections.singleton(StorageScopes.DEVSTORAGE_READ_WRITE), transportFactory, null);
97 | return credentials;
98 | }
99 |
100 | private GoogleCredentials getServiceAccountCredentialFromJsonFile() throws IOException
101 | {
102 | FileInputStream stream = new FileInputStream(jsonKeyFilePath.get());
103 | return GoogleCredentials.fromStream(stream)
104 | .createScoped(Collections.singleton(StorageScopes.DEVSTORAGE_READ_WRITE));
105 | }
106 |
107 | /**
108 | * @see http://developers.guge.io/accounts/docs/OAuth2ServiceAccount#creatinganaccount
109 | * @see https://developers.google.com/accounts/docs/OAuth2
110 | */
111 | private GoogleCredentials getComputeCredential() throws IOException
112 | {
113 | HttpTransportFactory transportFactory = () -> httpTransport;
114 | ComputeEngineCredentials credentials = new ComputeEngineCredentials(transportFactory);
115 | credentials.refreshAccessToken();
116 | return credentials;
117 | }
118 |
119 | public Storage getGcsClient() throws ConfigException, IOException
120 | {
121 | try {
122 | return RetryExecutor.builder()
123 | .withRetryLimit(task.getMaxConnectionRetry())
124 | .withInitialRetryWaitMillis(task.getInitialRetryIntervalMillis())
125 | .withMaxRetryWaitMillis(task.getMaximumRetryIntervalMillis())
126 | .build()
127 | .runInterruptible(new Retryable() {
128 | @Override
129 | public Storage call() throws IOException, RetryGiveupException
130 | {
131 | final TransportOptions transportOptions = HttpTransportOptions.newBuilder()
132 | .setConnectTimeout(30000) // in milliseconds
133 | .setReadTimeout(30000) // in milliseconds
134 | .build();
135 |
136 | Storage client = StorageOptions.newBuilder()
137 | .setCredentials(credentials)
138 | .setTransportOptions(transportOptions)
139 | .build().getService();
140 |
141 | // For throw ConfigException when authentication is fail.
142 | client.list(task.getBucket(), Storage.BlobListOption.pageSize(1)).hasNextPage();
143 | return client;
144 | }
145 |
146 | @Override
147 | public boolean isRetryableException(Exception exception)
148 | {
149 | if (exception instanceof GoogleJsonResponseException || exception instanceof TokenResponseException || exception instanceof StorageException) {
150 | int statusCode;
151 | if (exception instanceof GoogleJsonResponseException) {
152 | if (((GoogleJsonResponseException) exception).getDetails() == null) {
153 | String content = "";
154 | if (((GoogleJsonResponseException) exception).getContent() != null) {
155 | content = ((GoogleJsonResponseException) exception).getContent();
156 | }
157 | log.warn("Invalid response was returned : {}", content);
158 | return true;
159 | }
160 | statusCode = ((GoogleJsonResponseException) exception).getDetails().getCode();
161 | }
162 | else if (exception instanceof TokenResponseException) {
163 | statusCode = ((TokenResponseException) exception).getStatusCode();
164 | }
165 | else {
166 | statusCode = ((StorageException) exception).getCode();
167 | }
168 |
169 | if (statusCode / 100 == 4) {
170 | return false;
171 | }
172 | }
173 | return true;
174 | }
175 |
176 | @Override
177 | public void onRetry(Exception exception, int retryCount, int retryLimit, int retryWait)
178 | throws RetryGiveupException
179 | {
180 | String message = String.format("GCS GET request failed. Retrying %d/%d after %d seconds. Message: %s: %s",
181 | retryCount, retryLimit, retryWait / 1000, exception.getClass(), exception.getMessage());
182 | if (retryCount % 3 == 0) {
183 | log.warn(message, exception);
184 | }
185 | else {
186 | log.warn(message);
187 | }
188 | }
189 |
190 | @Override
191 | public void onGiveup(Exception firstException, Exception lastException)
192 | throws RetryGiveupException
193 | {
194 | }
195 | });
196 | }
197 | catch (RetryGiveupException ex) {
198 | if (ex.getCause() instanceof GoogleJsonResponseException || ex.getCause() instanceof TokenResponseException || ex.getCause() instanceof StorageException) {
199 | int statusCode = 0;
200 | if (ex.getCause() instanceof GoogleJsonResponseException) {
201 | if (((GoogleJsonResponseException) ex.getCause()).getDetails() != null) {
202 | statusCode = ((GoogleJsonResponseException) ex.getCause()).getDetails().getCode();
203 | }
204 | }
205 | else if (ex.getCause() instanceof TokenResponseException) {
206 | statusCode = ((TokenResponseException) ex.getCause()).getStatusCode();
207 | }
208 | else {
209 | statusCode = ((StorageException) ex.getCause()).getCode();
210 | }
211 | if (statusCode / 100 == 4) {
212 | throw new ConfigException(ex);
213 | }
214 | }
215 | throw new RuntimeException(ex);
216 | }
217 | catch (InterruptedException ex) {
218 | throw new InterruptedIOException();
219 | }
220 | }
221 |
222 | private Function localFileToPathString()
223 | {
224 | return file -> file.getPath().toString();
225 | }
226 | }
227 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/output/gcs/GcsOutputPlugin.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Kazuyuki Honda, and the Embulk project
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package org.embulk.output.gcs;
18 |
19 | import com.google.cloud.storage.Storage;
20 | import com.google.common.annotations.VisibleForTesting;
21 | import org.embulk.config.ConfigDiff;
22 | import org.embulk.config.ConfigException;
23 | import org.embulk.config.ConfigSource;
24 | import org.embulk.config.TaskReport;
25 | import org.embulk.config.TaskSource;
26 | import org.embulk.spi.FileOutputPlugin;
27 | import org.embulk.spi.TransactionalFileOutput;
28 | import org.embulk.util.config.ConfigMapper;
29 | import org.embulk.util.config.ConfigMapperFactory;
30 | import org.embulk.util.config.TaskMapper;
31 | import org.embulk.util.config.units.LocalFile;
32 |
33 | import java.io.IOException;
34 | import java.security.GeneralSecurityException;
35 | import java.util.List;
36 | import java.util.Optional;
37 |
38 | public class GcsOutputPlugin implements FileOutputPlugin
39 | {
40 | public static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ConfigMapperFactory.builder()
41 | .addDefaultModules().build();
42 | public static final ConfigMapper CONFIG_MAPPER = CONFIG_MAPPER_FACTORY.createConfigMapper();
43 | public static final TaskMapper TASK_MAPPER = CONFIG_MAPPER_FACTORY.createTaskMapper();
44 | @Override
45 | public ConfigDiff transaction(ConfigSource config,
46 | int taskCount,
47 | FileOutputPlugin.Control control)
48 | {
49 | PluginTask task = CONFIG_MAPPER.map(config, PluginTask.class);
50 |
51 | if (task.getP12KeyfilePath().isPresent()) {
52 | if (task.getP12Keyfile().isPresent()) {
53 | throw new ConfigException("Setting both p12_keyfile_path and p12_keyfile is invalid");
54 | }
55 | try {
56 | task.setP12Keyfile(Optional.of(LocalFile.of(task.getP12KeyfilePath().get())));
57 | }
58 | catch (IOException ex) {
59 | throw new RuntimeException(ex);
60 | }
61 | }
62 |
63 | if (task.getAuthMethod().getString().equals("json_key")) {
64 | if (!task.getJsonKeyfile().isPresent()) {
65 | throw new ConfigException("If auth_method is json_key, you have to set json_keyfile");
66 | }
67 | }
68 | else if (task.getAuthMethod().getString().equals("private_key")) {
69 | if (!task.getP12Keyfile().isPresent() || !task.getServiceAccountEmail().isPresent()) {
70 | throw new ConfigException("If auth_method is private_key, you have to set both service_account_email and p12_keyfile");
71 | }
72 | }
73 |
74 | return resume(task.toTaskSource(), taskCount, control);
75 | }
76 |
77 | @Override
78 | public ConfigDiff resume(TaskSource taskSource,
79 | int taskCount,
80 | FileOutputPlugin.Control control)
81 | {
82 | control.run(taskSource);
83 | return CONFIG_MAPPER_FACTORY.newConfigDiff();
84 | }
85 |
86 | @Override
87 | public void cleanup(TaskSource taskSource,
88 | int taskCount,
89 | List successTaskReports)
90 | {
91 | }
92 |
93 | @Override
94 | public TransactionalFileOutput open(TaskSource taskSource, final int taskIndex)
95 | {
96 | PluginTask task = TASK_MAPPER.map(taskSource, PluginTask.class);
97 |
98 | Storage client = createClient(task);
99 | return new GcsTransactionalFileOutput(task, client, taskIndex);
100 | }
101 |
102 | private GcsAuthentication newGcsAuth(PluginTask task)
103 | {
104 | try {
105 | return new GcsAuthentication(task);
106 | }
107 | catch (GeneralSecurityException | IOException ex) {
108 | throw new ConfigException(ex);
109 | }
110 | }
111 |
112 | @VisibleForTesting
113 | public Storage createClient(final PluginTask task)
114 | {
115 | try {
116 | GcsAuthentication auth = newGcsAuth(task);
117 | return auth.getGcsClient();
118 | }
119 | catch (ConfigException | IOException ex) {
120 | throw new RuntimeException(ex);
121 | }
122 | }
123 | }
124 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/output/gcs/GcsTransactionalFileOutput.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2018 The Embulk project
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package org.embulk.output.gcs;
18 |
19 | import com.google.cloud.WriteChannel;
20 | import com.google.cloud.storage.Blob;
21 | import com.google.cloud.storage.BlobId;
22 | import com.google.cloud.storage.BlobInfo;
23 | import com.google.cloud.storage.Storage;
24 | import com.google.common.annotations.VisibleForTesting;
25 | import org.embulk.config.TaskReport;
26 | import org.embulk.spi.Buffer;
27 | import org.embulk.spi.TransactionalFileOutput;
28 | import org.slf4j.Logger;
29 | import org.slf4j.LoggerFactory;
30 |
31 | import java.io.IOException;
32 | import java.nio.ByteBuffer;
33 | import java.util.ArrayList;
34 | import java.util.List;
35 |
36 | import static org.embulk.output.gcs.GcsOutputPlugin.CONFIG_MAPPER_FACTORY;
37 |
38 | public class GcsTransactionalFileOutput implements TransactionalFileOutput
39 | {
40 | private static final Logger logger = LoggerFactory.getLogger(GcsTransactionalFileOutput.class);
41 |
42 | private final int taskIndex;
43 | private final Storage client;
44 | private final String bucket;
45 | private final String pathPrefix;
46 | private final String pathSuffix;
47 | private final String sequenceFormat;
48 | private final String contentType;
49 | private final List storageObjects = new ArrayList<>();
50 | private BlobId blobId = null;
51 | private int fileIndex = 0;
52 | private WriteChannel writer = null;
53 | private long byteCount = 0;
54 | private long totalByte = 0;
55 |
56 | GcsTransactionalFileOutput(PluginTask task, Storage client, int taskIndex)
57 | {
58 | this.taskIndex = taskIndex;
59 | this.client = client;
60 | this.bucket = task.getBucket();
61 | this.pathPrefix = task.getPathPrefix();
62 | this.pathSuffix = task.getFileNameExtension();
63 | this.sequenceFormat = task.getSequenceFormat();
64 | this.contentType = task.getContentType();
65 | }
66 |
67 | public void nextFile()
68 | {
69 | closeCurrentWriter();
70 | try {
71 | String blobName = generateRemotePath(pathPrefix, sequenceFormat, taskIndex, fileIndex, pathSuffix);
72 | blobId = BlobId.of(bucket, blobName);
73 | BlobInfo blobInfo = BlobInfo.newBuilder(blobId).setContentType(contentType).build();
74 | writer = client.writer(blobInfo);
75 | }
76 | catch (Exception ex) {
77 | throw new RuntimeException(ex);
78 | }
79 | }
80 |
81 | @Override
82 | public void add(Buffer buffer)
83 | {
84 | try {
85 | writer.write(ByteBuffer.wrap(buffer.array(), buffer.offset(), buffer.limit()));
86 | byteCount = byteCount + buffer.limit();
87 | //104857600 = 100MB
88 | if (byteCount >= 104857600) {
89 | totalByte = totalByte + byteCount;
90 | logger.info("Uploaded {} bytes", totalByte);
91 | byteCount = 0;
92 | }
93 | }
94 | catch (Exception ex) {
95 | //clean up file if exist
96 | try {
97 | boolean deleted = client.delete(blobId);
98 | logger.info(" Delete file: {} > deleted? {}", blobId.getName(), deleted);
99 | }
100 | catch (Exception e) {
101 | logger.warn("Failed to delete file: {}, error message: {}", blobId.getName(), e.getMessage());
102 | }
103 | throw new RuntimeException(ex);
104 | }
105 | finally {
106 | buffer.release();
107 | }
108 | }
109 |
110 | @Override
111 | public void finish()
112 | {
113 | logger.info("Uploaded total {} bytes.", totalByte + byteCount);
114 | closeCurrentWriter();
115 | //query blob again to check
116 | Blob blob = client.get(blobId);
117 | logger.info("Upload {} successfully.", blobId.getName());
118 | storageObjects.add(blob.getBlobId().toString());
119 | }
120 |
121 | @Override
122 | public void close()
123 | {
124 | closeCurrentWriter();
125 | }
126 |
127 | @Override
128 | public void abort()
129 | {
130 | }
131 |
132 | @Override
133 | public TaskReport commit()
134 | {
135 | TaskReport report = CONFIG_MAPPER_FACTORY.newTaskReport();
136 | report.set("files", storageObjects);
137 | return report;
138 | }
139 |
140 | /**
141 | * GCS has character limitation in object names.
142 | * @see https://cloud.google.com/storage/docs/naming#objectnames
143 | * Although "." isn't listed at above pages, we can't access "./" path from GUI console.
144 | * And in many cases, user don't intend of creating "/" directory under the bucket.
145 | * This method normalizes path when it contains "./" and "/" and its variations at the beginning
146 | */
147 | @VisibleForTesting
148 | public static String generateRemotePath(String pathPrefix, String sequenceFormat, int taskIndex, int fileIndex, String pathSuffix)
149 | {
150 | String path = pathPrefix + String.format(sequenceFormat, taskIndex, fileIndex) + pathSuffix;
151 | return path.replaceFirst("^\\.*/*", "");
152 | }
153 |
154 | private void closeCurrentWriter()
155 | {
156 | if (writer != null && writer.isOpen()) {
157 | try {
158 | writer.close();
159 | }
160 | catch (IOException ex) {
161 | throw new RuntimeException(ex);
162 | }
163 | }
164 | }
165 | }
166 |
--------------------------------------------------------------------------------
/src/main/java/org/embulk/output/gcs/PluginTask.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2018 The Embulk project
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package org.embulk.output.gcs;
18 |
19 | import org.embulk.util.config.Config;
20 | import org.embulk.util.config.ConfigDefault;
21 | import org.embulk.util.config.Task;
22 | import org.embulk.util.config.units.LocalFile;
23 |
24 | import java.util.Optional;
25 |
26 | public interface PluginTask extends Task
27 | {
28 | @Config("bucket")
29 | String getBucket();
30 |
31 | @Config("path_prefix")
32 | String getPathPrefix();
33 |
34 | @Config("file_ext")
35 | String getFileNameExtension();
36 |
37 | @Config("sequence_format")
38 | @ConfigDefault("\".%03d.%02d\"")
39 | String getSequenceFormat();
40 |
41 | @Config("content_type")
42 | @ConfigDefault("\"application/octet-stream\"")
43 | String getContentType();
44 |
45 | @Config("auth_method")
46 | @ConfigDefault("\"private_key\"")
47 | AuthMethod getAuthMethod();
48 |
49 | @Config("service_account_email")
50 | @ConfigDefault("null")
51 | Optional getServiceAccountEmail();
52 |
53 | // kept for backward compatibility
54 | @Config("p12_keyfile_path")
55 | @ConfigDefault("null")
56 | Optional getP12KeyfilePath();
57 |
58 | @Config("p12_keyfile")
59 | @ConfigDefault("null")
60 | Optional getP12Keyfile();
61 | void setP12Keyfile(Optional p12Keyfile);
62 |
63 | @Config("json_keyfile")
64 | @ConfigDefault("null")
65 | Optional getJsonKeyfile();
66 |
67 | @Config("application_name")
68 | @ConfigDefault("\"embulk-output-gcs\"")
69 | String getApplicationName();
70 |
71 | @Config("max_connection_retry")
72 | @ConfigDefault("10") // 10 times retry to connect GCS server if failed.
73 | int getMaxConnectionRetry();
74 |
75 | @Config("initial_retry_interval_millis")
76 | @ConfigDefault("500")
77 | int getInitialRetryIntervalMillis();
78 |
79 | @Config("maximum_retry_interval_millis")
80 | @ConfigDefault("30000")
81 | int getMaximumRetryIntervalMillis();
82 |
83 | @Config("store_pass")
84 | @ConfigDefault("\"notasecret\"")
85 | String getStorePass();
86 |
87 | @Config("key_pass")
88 | @ConfigDefault("\"notasecret\"")
89 | String getKeyPass();
90 | }
91 |
--------------------------------------------------------------------------------
/src/test/java/org/embulk/output/gcs/TestGcsAuthentication.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 The Embulk project
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package org.embulk.output.gcs;
18 |
19 | import com.google.common.base.Throwables;
20 | import org.embulk.EmbulkTestRuntime;
21 | import org.embulk.config.ConfigException;
22 | import org.embulk.config.ConfigSource;
23 | import org.embulk.util.config.units.LocalFile;
24 | import org.junit.Assert;
25 | import org.junit.BeforeClass;
26 | import org.junit.Rule;
27 | import org.junit.Test;
28 |
29 | import static org.embulk.output.gcs.GcsOutputPlugin.CONFIG_MAPPER;
30 | import static org.embulk.output.gcs.GcsOutputPlugin.CONFIG_MAPPER_FACTORY;
31 | import static org.junit.Assert.fail;
32 | import static org.junit.Assume.assumeNotNull;
33 |
34 | import java.nio.file.NoSuchFileException;
35 | import java.security.InvalidKeyException;
36 | import java.util.Base64;
37 | import java.util.Optional;
38 |
39 | public class TestGcsAuthentication
40 | {
41 | private static Optional GCP_EMAIL;
42 | private static Optional GCP_P12_KEYFILE;
43 | private static Optional GCP_JSON_KEYFILE;
44 | private static String GCP_BUCKET;
45 | private static final String GCP_APPLICATION_NAME = "embulk-output-gcs";
46 |
47 | /*
48 | * This test case requires environment variables
49 | * GCP_EMAIL
50 | * GCP_P12_KEYFILE
51 | * GCP_JSON_KEYFILE
52 | * GCP_BUCKET
53 | */
54 | @BeforeClass
55 | public static void initializeConstant()
56 | {
57 | GCP_EMAIL = Optional.of(System.getenv("GCP_EMAIL"));
58 | GCP_JSON_KEYFILE = Optional.of(System.getenv("GCP_JSON_KEYFILE"));
59 | GCP_P12_KEYFILE = Optional.of(System.getenv("GCP_PRIVATE_KEYFILE"));
60 | GCP_BUCKET = System.getenv("GCP_BUCKET");
61 | // skip test cases, if environment variables are not set.
62 | assumeNotNull(GCP_EMAIL, GCP_P12_KEYFILE, GCP_JSON_KEYFILE, GCP_BUCKET);
63 | }
64 |
65 | @Rule
66 | public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
67 |
68 | @Test
69 | public void testGetServiceAccountCredentialThrowFileNotFoundException()
70 | {
71 | Optional notFoundP12Keyfile = Optional.of("/path/to/notfound.p12");
72 | ConfigSource configSource = config(AuthMethod.private_key);
73 | configSource.set("p12_keyfile", notFoundP12Keyfile);
74 | try {
75 | CONFIG_MAPPER.map(configSource, PluginTask.class);
76 | fail();
77 | }
78 | catch (Exception ex) {
79 | Assert.assertTrue(Throwables.getRootCause(ex) instanceof NoSuchFileException);
80 | }
81 | }
82 |
83 | @Test
84 | public void testGetGcsClientUsingServiceAccountCredentialSuccess() throws Exception
85 | {
86 | ConfigSource configSource = config(AuthMethod.private_key);
87 | byte[] keyBytes = Base64.getDecoder().decode(GCP_P12_KEYFILE.get());
88 | Optional p12Key = Optional.of(LocalFile.ofContent(keyBytes));
89 | configSource.set("p12_keyfile", p12Key);
90 | PluginTask task = CONFIG_MAPPER.map(configSource, PluginTask.class);
91 | GcsAuthentication auth = new GcsAuthentication(task);
92 | auth.getGcsClient();
93 | }
94 |
95 | @Test(expected = ConfigException.class)
96 | public void testGetGcsClientUsingServiceAccountCredentialThrowConfigException() throws Exception
97 | {
98 | ConfigSource configSource = config(AuthMethod.private_key);
99 | byte[] keyBytes = Base64.getDecoder().decode(GCP_P12_KEYFILE.get());
100 | Optional p12Key = Optional.of(LocalFile.ofContent(keyBytes));
101 | configSource.set("p12_keyfile", p12Key);
102 | configSource.set("bucket", "non-exists-bucket");
103 | PluginTask task = CONFIG_MAPPER.map(configSource, PluginTask.class);
104 | GcsAuthentication auth = new GcsAuthentication(task);
105 | auth.getGcsClient();
106 | fail();
107 | }
108 |
109 | @Test
110 | public void testGetServiceAccountCredentialFromJsonThrowFileFileNotFoundException()
111 | {
112 | Optional notFoundJsonKeyfile = Optional.of("/path/to/notfound.json");
113 | ConfigSource configSource = config(AuthMethod.json_key);
114 | configSource.set("json_keyfile", notFoundJsonKeyfile);
115 | try {
116 | CONFIG_MAPPER.map(configSource, PluginTask.class);
117 | fail();
118 | }
119 | catch (Exception ex) {
120 | Assert.assertTrue(Throwables.getRootCause(ex) instanceof NoSuchFileException);
121 | }
122 | }
123 |
124 | @Test
125 | public void testGetServiceAccountCredentialFromInvalidJsonKey()
126 | {
127 | String jsonKey = "{\n" +
128 | "\"type\": \"service_account\",\n" +
129 | "\"project_id\": \"test\",\n" +
130 | "\"private_key_id\": \"private_key_id\",\n" +
131 | "\"private_key\": \"-----BEGIN PRIVATE KEY-----\\nInvalidKey\\n-----END PRIVATE KEY-----\\n\",\n" +
132 | "\"client_email\": \"test@test.iam.gserviceaccount.com\",\n" +
133 | "\"client_id\": \"433252345345\",\n" +
134 | "\"auth_uri\": \"https://accounts.google.com/o/oauth2/auth\",\n" +
135 | " \"token_uri\": \"https://oauth2.googleapis.com/token\",\n" +
136 | "\"auth_provider_x509_cert_url\": \"https://www.googleapis.com/oauth2/v1/certs\",\n" +
137 | "\"client_x509_cert_url\": \"https://www.googleapis.com/robot/v1/metadata/x509/test.iam.gserviceaccount.com\"\n" +
138 | "}";
139 |
140 | Optional invalidJsonKeyfile = Optional.of(LocalFile.ofContent(jsonKey.getBytes()));
141 | ConfigSource configSource = config(AuthMethod.json_key);
142 | configSource.set("json_keyfile", invalidJsonKeyfile);
143 | try {
144 | PluginTask task = CONFIG_MAPPER.map(configSource, PluginTask.class);
145 | GcsAuthentication auth = new GcsAuthentication(task);
146 | auth.getGcsClient();
147 | fail();
148 | }
149 | catch (Exception ex) {
150 | Assert.assertTrue(Throwables.getRootCause(ex) instanceof InvalidKeyException);
151 | }
152 | }
153 |
154 | @Test
155 | public void testGetServiceAccountCredentialFromJsonSuccess() throws Exception
156 | {
157 | ConfigSource configSource = config(AuthMethod.json_key);
158 | Optional jsonKeyfile = Optional.of(LocalFile.ofContent(GCP_JSON_KEYFILE.get().getBytes()));
159 | configSource.set("json_keyfile", jsonKeyfile);
160 | PluginTask task = CONFIG_MAPPER.map(configSource, PluginTask.class);
161 | GcsAuthentication auth = new GcsAuthentication(task);
162 | auth.getGcsClient();
163 | }
164 |
165 | @Test(expected = ConfigException.class)
166 | public void testGetServiceAccountCredentialFromJsonThrowConfigException() throws Exception
167 | {
168 | ConfigSource configSource = config(AuthMethod.json_key);
169 | Optional jsonKeyfile = Optional.of(LocalFile.ofContent(GCP_JSON_KEYFILE.get().getBytes()));
170 | configSource.set("json_keyfile", jsonKeyfile);
171 | configSource.set("bucket", "non-exists-bucket");
172 | PluginTask task = CONFIG_MAPPER.map(configSource, PluginTask.class);
173 | GcsAuthentication auth = new GcsAuthentication(task);
174 | auth.getGcsClient();
175 | fail();
176 | }
177 |
178 | public ConfigSource config(AuthMethod authMethod)
179 | {
180 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
181 | .set("type", "gcs")
182 | .set("bucket", GCP_BUCKET)
183 | .set("path_prefix", "")
184 | .set("last_path", "")
185 | .set("file_ext", ".csv")
186 | .set("service_account_email", GCP_EMAIL)
187 | .set("application_name", GCP_APPLICATION_NAME)
188 | .set("max_connection_retry", 3);
189 |
190 | if (authMethod == AuthMethod.private_key) {
191 | config.set("auth_method", "private_key");
192 | }
193 | else if (authMethod == AuthMethod.json_key) {
194 | config.set("auth_method", "json_key");
195 | }
196 | else {
197 | config.set("auth_method", "compute_engine");
198 | }
199 | return config;
200 | }
201 | }
202 |
--------------------------------------------------------------------------------
/src/test/java/org/embulk/output/gcs/TestGcsOutputPlugin.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2015 Kazuyuki Honda, and the Embulk project
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package org.embulk.output.gcs;
18 |
19 | import com.google.cloud.storage.Blob;
20 | import com.google.cloud.storage.BlobId;
21 | import com.google.cloud.storage.Storage;
22 | import com.google.common.collect.ImmutableList;
23 | import com.google.common.collect.ImmutableMap;
24 | import com.google.common.collect.Lists;
25 |
26 | import org.embulk.EmbulkSystemProperties;
27 | import org.embulk.EmbulkTestRuntime;
28 | import org.embulk.config.ConfigException;
29 | import org.embulk.config.ConfigSource;
30 | import org.embulk.config.TaskReport;
31 | import org.embulk.config.TaskSource;
32 | import org.embulk.exec.PartialExecutionException;
33 | import org.embulk.formatter.csv.CsvFormatterPlugin;
34 | import org.embulk.input.file.LocalFileInputPlugin;
35 | import org.embulk.parser.csv.CsvParserPlugin;
36 | import org.embulk.spi.FileInputPlugin;
37 | import org.embulk.spi.FileOutputPlugin;
38 | import org.embulk.spi.FormatterPlugin;
39 | import org.embulk.spi.ParserPlugin;
40 | import org.embulk.test.TestingEmbulk;
41 | import org.embulk.util.config.units.LocalFile;
42 | import org.junit.Before;
43 | import org.junit.BeforeClass;
44 | import org.junit.Rule;
45 | import org.junit.Test;
46 | import org.mockito.Mockito;
47 |
48 | import static org.embulk.output.gcs.GcsOutputPlugin.CONFIG_MAPPER;
49 | import static org.embulk.output.gcs.GcsOutputPlugin.CONFIG_MAPPER_FACTORY;
50 | import static org.junit.Assert.assertEquals;
51 | import static org.junit.Assert.assertTrue;
52 | import static org.junit.Assert.fail;
53 | import static org.junit.Assume.assumeNotNull;
54 |
55 | import java.io.BufferedReader;
56 | import java.io.ByteArrayOutputStream;
57 | import java.io.IOException;
58 | import java.io.InputStream;
59 | import java.io.InputStreamReader;
60 | import java.nio.channels.Channels;
61 | import java.nio.file.Paths;
62 | import java.security.GeneralSecurityException;
63 | import java.util.Arrays;
64 | import java.util.Base64;
65 | import java.util.List;
66 | import java.util.Optional;
67 | import java.util.Properties;
68 |
69 | public class TestGcsOutputPlugin
70 | {
71 | private static final EmbulkSystemProperties EMBULK_SYSTEM_PROPERTIES;
72 | private static Optional GCP_EMAIL;
73 | private static Optional GCP_P12_KEYFILE;
74 | private static Optional GCP_JSON_KEYFILE;
75 | private static String GCP_BUCKET;
76 | private static String GCP_BUCKET_DIRECTORY;
77 | private static String GCP_PATH_PREFIX;
78 | private static String LOCAL_PATH_PREFIX;
79 | private static String GCP_APPLICATION_NAME;
80 |
81 | static {
82 | final Properties properties = new Properties();
83 | properties.setProperty("default_guess_plugins", "gzip,bzip2,json,csv");
84 | EMBULK_SYSTEM_PROPERTIES = EmbulkSystemProperties.of(properties);
85 | }
86 |
87 | /*
88 | * This test case requires environment variables
89 | * GCP_EMAIL
90 | * GCP_P12_KEYFILE
91 | * GCP_JSON_KEYFILE
92 | * GCP_BUCKET
93 | */
94 | @BeforeClass
95 | public static void initializeConstant()
96 | {
97 | GCP_EMAIL = Optional.of(System.getenv("GCP_EMAIL"));
98 | GCP_P12_KEYFILE = Optional.of(System.getenv("GCP_PRIVATE_KEYFILE"));
99 | GCP_JSON_KEYFILE = Optional.of(System.getenv("GCP_JSON_KEYFILE"));
100 | GCP_BUCKET = System.getenv("GCP_BUCKET");
101 | // skip test cases, if environment variables are not set.
102 | assumeNotNull(GCP_EMAIL, GCP_P12_KEYFILE, GCP_JSON_KEYFILE, GCP_BUCKET);
103 |
104 | GCP_BUCKET_DIRECTORY = System.getenv("GCP_BUCKET_DIRECTORY") != null ? getDirectory(System.getenv("GCP_BUCKET_DIRECTORY")) : getDirectory("");
105 | GCP_PATH_PREFIX = GCP_BUCKET_DIRECTORY + "output_";
106 | LOCAL_PATH_PREFIX = GcsOutputPlugin.class.getClassLoader().getResource("sample_01.csv").getPath();
107 | GCP_APPLICATION_NAME = "embulk-output-gcs";
108 | }
109 |
110 | @Rule
111 | public TestingEmbulk embulk = TestingEmbulk.builder()
112 | .setEmbulkSystemProperties(EMBULK_SYSTEM_PROPERTIES)
113 | .registerPlugin(FormatterPlugin.class, "csv", CsvFormatterPlugin.class)
114 | .registerPlugin(FileInputPlugin.class, "file", LocalFileInputPlugin.class)
115 | .registerPlugin(FileOutputPlugin.class, "gcs", GcsOutputPlugin.class)
116 | .registerPlugin(ParserPlugin.class, "csv", CsvParserPlugin.class)
117 | .build();
118 |
119 | @Rule
120 | public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
121 | private GcsOutputPlugin plugin;
122 |
123 | @Before
124 | public void createResources() throws GeneralSecurityException, NoSuchMethodException, IOException
125 | {
126 | plugin = new GcsOutputPlugin();
127 | }
128 |
129 | @Test
130 | public void checkDefaultValues()
131 | {
132 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
133 | .set("type", "gcs")
134 | .set("bucket", GCP_BUCKET)
135 | .set("path_prefix", "my-prefix")
136 | .set("file_ext", ".csv")
137 | .set("formatter", formatterConfig());
138 |
139 | PluginTask task = CONFIG_MAPPER.map(config, PluginTask.class);
140 | assertEquals("private_key", task.getAuthMethod().toString());
141 | }
142 |
143 | // p12_keyfile is null when auth_method is private_key
144 | @Test
145 | public void checkDefaultValuesP12keyNull() throws IOException
146 | {
147 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
148 | .set("type", "gcs")
149 | .set("bucket", GCP_BUCKET)
150 | .set("path_prefix", "my-prefix")
151 | .set("file_ext", ".csv")
152 | .set("auth_method", "private_key")
153 | .set("service_account_email", GCP_EMAIL)
154 | .set("p12_keyfile", null)
155 | .set("formatter", formatterConfig());
156 |
157 | try {
158 | embulk.runOutput(config, Paths.get(LOCAL_PATH_PREFIX));
159 | fail("Expected Exception was not thrown.");
160 | }
161 | catch (PartialExecutionException ex) {
162 | assertTrue(ex.getCause() instanceof ConfigException);
163 | assertEquals("If auth_method is private_key, you have to set both service_account_email and p12_keyfile", ex.getCause().getMessage());
164 | }
165 | }
166 |
167 | // both p12_keyfile and p12_keyfile_path set
168 | @Test
169 | public void checkDefaultValuesConflictSetting() throws IOException
170 | {
171 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
172 | .set("type", "gcs")
173 | .set("bucket", GCP_BUCKET)
174 | .set("path_prefix", "my-prefix")
175 | .set("file_ext", ".csv")
176 | .set("auth_method", "private_key")
177 | .set("service_account_email", GCP_EMAIL)
178 | .set("formatter", formatterConfig());
179 |
180 | config.set("p12_keyfile", Optional.of(LocalFile.ofContent("dummy".getBytes())));
181 | config.set("p12_keyfile_path", Optional.of("dummy_path"));
182 | try {
183 | embulk.runOutput(config, Paths.get(LOCAL_PATH_PREFIX));
184 | fail("Expected Exception was not thrown.");
185 | }
186 | catch (final PartialExecutionException ex) {
187 | assertTrue(ex.getCause() instanceof ConfigException);
188 | assertEquals("Setting both p12_keyfile_path and p12_keyfile is invalid", ex.getCause().getMessage());
189 | }
190 | }
191 |
192 | // invalid p12keyfile when auth_method is private_key
193 | @Test
194 | public void checkDefaultValuesInvalidPrivateKey() throws IOException
195 | {
196 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
197 | .set("type", "gcs")
198 | .set("bucket", GCP_BUCKET)
199 | .set("path_prefix", "my-prefix")
200 | .set("file_ext", ".csv")
201 | .set("auth_method", "private_key")
202 | .set("service_account_email", GCP_EMAIL)
203 | .set("p12_keyfile", "invalid-key.p12")
204 | .set("formatter", formatterConfig());
205 | try {
206 | embulk.runOutput(config, Paths.get(LOCAL_PATH_PREFIX));
207 | fail("Expected Exception was not thrown.");
208 | }
209 | catch (final PartialExecutionException ex) {
210 | assertTrue(ex.getCause() instanceof ConfigException);
211 | }
212 | }
213 |
214 | // json_keyfile is null when auth_method is json_key
215 | @Test
216 | public void checkDefaultValuesJsonKeyfileNull() throws IOException
217 | {
218 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
219 | .set("type", "gcs")
220 | .set("bucket", GCP_BUCKET)
221 | .set("path_prefix", "my-prefix")
222 | .set("file_ext", ".csv")
223 | .set("auth_method", "json_key")
224 | .set("service_account_email", GCP_EMAIL)
225 | .set("json_keyfile", null)
226 | .set("formatter", formatterConfig());
227 |
228 | try {
229 | embulk.runOutput(config, Paths.get(LOCAL_PATH_PREFIX));
230 | fail("Expected Exception was not thrown.");
231 | }
232 | catch (final PartialExecutionException ex) {
233 | assertTrue(ex.getCause() instanceof ConfigException);
234 | }
235 | }
236 |
237 | @Test
238 | public void testGcsClientCreateSuccessfully()
239 | {
240 | ConfigSource configSource = config();
241 | PluginTask task = CONFIG_MAPPER.map(configSource, PluginTask.class);
242 | plugin.transaction(configSource, 1, new FileOutputControl()); // no errors happens
243 | plugin.createClient(task); // no errors happens
244 | }
245 |
246 | @Test
247 | public void testGcsClientCreateThrowConfigException()
248 | {
249 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource()
250 | .set("type", "gcs")
251 | .set("bucket", "non-exists-bucket")
252 | .set("path_prefix", "my-prefix")
253 | .set("file_ext", ".csv")
254 | .set("auth_method", "json_key")
255 | .set("service_account_email", GCP_EMAIL)
256 | .set("json_keyfile", Optional.of(LocalFile.ofContent(GCP_JSON_KEYFILE.get().getBytes())))
257 | .set("formatter", formatterConfig());
258 |
259 | plugin.transaction(config, 1, new FileOutputControl()); // no errors happens
260 | PluginTask task = CONFIG_MAPPER.map(config, PluginTask.class);
261 | try {
262 | plugin.createClient(task);
263 | fail("Expected Exception was not thrown.");
264 | }
265 | catch (Exception ex) {
266 | assertTrue(ex.getCause() instanceof ConfigException);
267 | }
268 | }
269 |
270 | @Test
271 | public void testGcsFileOutputByOpen() throws Exception
272 | {
273 | ConfigSource configSource = config();
274 | PluginTask task = CONFIG_MAPPER.map(configSource, PluginTask.class);
275 | Storage client = plugin.createClient(task);
276 | try {
277 | embulk.runOutput(configSource, Paths.get(LOCAL_PATH_PREFIX));
278 | }
279 | catch (Exception ex) {
280 | fail(ex.getMessage());
281 | }
282 |
283 | String remotePath = GCP_PATH_PREFIX + String.format(task.getSequenceFormat(), 0, 0) + task.getFileNameExtension();
284 | assertRecords(remotePath, client);
285 | }
286 |
287 | @Test
288 | public void testGenerateRemotePath() throws Exception
289 | {
290 | ConfigSource configSource = config();
291 | PluginTask task = CONFIG_MAPPER.map(configSource, PluginTask.class);
292 | Storage storage = Mockito.mock(Storage.class);
293 | GcsTransactionalFileOutput fileOutput = new GcsTransactionalFileOutput(task, storage, 0);
294 | assertEquals("sample.000.01.csv", fileOutput.generateRemotePath("/sample", task.getSequenceFormat(), 0, 1, ".csv"));
295 | assertEquals("sample.000.01.csv", fileOutput.generateRemotePath("./sample", task.getSequenceFormat(), 0, 1, ".csv"));
296 | assertEquals("sample.000.01.csv", fileOutput.generateRemotePath("../sample", task.getSequenceFormat(), 0, 1, ".csv"));
297 | assertEquals("sample.000.01.csv", fileOutput.generateRemotePath("//sample", task.getSequenceFormat(), 0, 1, ".csv"));
298 | assertEquals("path/to/sample.000.01.csv", fileOutput.generateRemotePath("/path/to/sample", task.getSequenceFormat(), 0, 1, ".csv"));
299 | assertEquals("path/to/./sample.000.01.csv", fileOutput.generateRemotePath("path/to/./sample", task.getSequenceFormat(), 0, 1, ".csv"));
300 | assertEquals("path/to/../sample.000.01.csv", fileOutput.generateRemotePath("path/to/../sample", task.getSequenceFormat(), 0, 1, ".csv"));
301 | assertEquals("sample.000.01.csv", fileOutput.generateRemotePath("....../sample", task.getSequenceFormat(), 0, 1, ".csv"));
302 | assertEquals("sample.000.01.csv", fileOutput.generateRemotePath("......///sample", task.getSequenceFormat(), 0, 1, ".csv"));
303 | }
304 |
305 | public ConfigSource config()
306 | {
307 | byte[] keyBytes = Base64.getDecoder().decode(GCP_P12_KEYFILE.get());
308 | Optional p12Key = Optional.of(LocalFile.ofContent(keyBytes));
309 | Optional jsonKey = Optional.of(LocalFile.ofContent(GCP_JSON_KEYFILE.get().getBytes()));
310 |
311 | return CONFIG_MAPPER_FACTORY.newConfigSource()
312 | .set("type", "gcs")
313 | .set("bucket", GCP_BUCKET)
314 | .set("path_prefix", GCP_PATH_PREFIX)
315 | .set("last_path", "")
316 | .set("file_ext", ".csv")
317 | .set("auth_method", "json_key")
318 | .set("service_account_email", GCP_EMAIL)
319 | .set("p12_keyfile", p12Key)
320 | .set("json_keyfile", jsonKey)
321 | .set("application_name", GCP_APPLICATION_NAME)
322 | .set("formatter", formatterConfig());
323 | }
324 |
325 | private class FileOutputControl implements FileOutputPlugin.Control
326 | {
327 | @Override
328 | public List run(TaskSource taskSource)
329 | {
330 | return Lists.newArrayList(CONFIG_MAPPER_FACTORY.newTaskReport());
331 | }
332 | }
333 |
334 | private ImmutableMap formatterConfig()
335 | {
336 | ImmutableMap.Builder builder = new ImmutableMap.Builder<>();
337 | builder.put("type", "csv");
338 | builder.put("header_line", "false");
339 | builder.put("timezone", "Asia/Tokyo");
340 | return builder.build();
341 | }
342 |
343 | private void assertRecords(String gcsPath, Storage client) throws Exception
344 | {
345 | ImmutableList> records = getFileContentsFromGcs(gcsPath, client);
346 | assertEquals(4, records.size());
347 | {
348 | List record = records.get(0);
349 | assertEquals("1", record.get(0));
350 | assertEquals("32864", record.get(1));
351 | }
352 |
353 | {
354 | List record = records.get(1);
355 | assertEquals("2", record.get(0));
356 | assertEquals("14824", record.get(1));
357 | }
358 |
359 | {
360 | List record = records.get(2);
361 | assertEquals("3", record.get(0));
362 | assertEquals("27559", record.get(1));
363 | }
364 |
365 | {
366 | List record = records.get(3);
367 | assertEquals("4", record.get(0));
368 | assertEquals("11270", record.get(1));
369 | }
370 | }
371 |
372 | private ImmutableList> getFileContentsFromGcs(String path, Storage client) throws Exception
373 | {
374 | ConfigSource config = config();
375 | Blob blob = client.get(BlobId.of(GCP_BUCKET, path));
376 | InputStream is = Channels.newInputStream(blob.reader());
377 | ImmutableList.Builder> builder = new ImmutableList.Builder<>();
378 |
379 | BufferedReader reader = new BufferedReader(new InputStreamReader(is));
380 | String line;
381 | while ((line = reader.readLine()) != null) {
382 | List records = Arrays.asList(line.split(",", 0));
383 | builder.add(records);
384 | }
385 | return builder.build();
386 | }
387 |
388 | private static String getDirectory(String dir)
389 | {
390 | if (dir != null && !dir.endsWith("/")) {
391 | dir = dir + "/";
392 | }
393 | if (dir.startsWith("/")) {
394 | dir = dir.replaceFirst("/", "");
395 | }
396 | return dir;
397 | }
398 |
399 | private byte[] convertInputStreamToByte(InputStream is) throws IOException
400 | {
401 | ByteArrayOutputStream bo = new ByteArrayOutputStream();
402 | byte [] buffer = new byte[1024];
403 | while (true) {
404 | int len = is.read(buffer);
405 | if (len < 0) {
406 | break;
407 | }
408 | bo.write(buffer, 0, len);
409 | }
410 | return bo.toByteArray();
411 | }
412 | }
413 |
--------------------------------------------------------------------------------
/src/test/resources/sample_01.csv:
--------------------------------------------------------------------------------
1 | id:long,account:long
2 | 1,32864
3 | 2,14824
4 | 3,27559
5 | 4,11270
6 |
--------------------------------------------------------------------------------