├── .github └── workflows │ └── test.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── NOTICE ├── NOTICE_GEM ├── README.md ├── build.gradle ├── config └── checkstyle │ ├── checkstyle.xml │ └── default.xml ├── docker-compose.yml ├── gradle ├── dependency-locks │ └── embulkPluginRuntime.lockfile └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat └── src ├── main └── java │ └── org │ └── embulk │ └── input │ └── mongodb │ ├── AuthMethod.java │ ├── HostTask.java │ ├── MongodbInputPlugin.java │ ├── PluginTask.java │ └── ValueCodec.java └── test ├── java └── org │ └── embulk │ └── input │ └── mongodb │ ├── Pages.java │ └── TestMongodbInputPlugin.java └── resources ├── basic.yml ├── basic_expected.csv ├── full.yml ├── full_expected.csv ├── id_field_name.yml ├── id_field_name_expected.csv └── my_collection.jsonl /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | on: push 3 | jobs: 4 | test: 5 | runs-on: ubuntu-latest 6 | env: 7 | MONGO_DATABASE: mydb 8 | MONGO_COLLECTION: my_collection 9 | MONGO_URI: mongodb://localhost:27017/mydb 10 | steps: 11 | - uses: actions/checkout@v2 12 | - name: Set up JDK 8 13 | uses: actions/setup-java@v2 14 | with: 15 | java-version: 8 16 | distribution: "zulu" 17 | - run: mkdir -p ./tmp 18 | - run: date 19 | - run: docker-compose up -d 20 | - run: docker-compose ps 21 | - name: Check 22 | run: ./gradlew check --console rich --info 23 | - name: Install Embulk 24 | run: curl --create-dirs -o ~/.embulk/bin/embulk -L "https://dl.embulk.org/embulk-0.10.19.jar" 25 | - name: Set Embulk executable 26 | run: chmod +x ~/.embulk/bin/embulk 27 | - run: ~/.embulk/bin/embulk --version 28 | - name: Build a gem 29 | run: ./gradlew gem 30 | - name: Mongo DB 31 | run: mongoimport --host 127.0.0.1 -u mongo_user -p dbpass --db $MONGO_DATABASE --collection $MONGO_COLLECTION --type json --drop src/test/resources/my_collection.jsonl 32 | - run: ~/.embulk/bin/embulk run -L build/gemContents src/test/resources/basic.yml 33 | - run: cat tmp/basic000.00.csv 34 | - run: cmp tmp/basic000.00.csv src/test/resources/basic_expected.csv 35 | - run: ~/.embulk/bin/embulk run -L build/gemContents src/test/resources/full.yml 36 | - run: cat tmp/full000.00.csv 37 | - run: cmp tmp/full000.00.csv src/test/resources/full_expected.csv 38 | - run: ~/.embulk/bin/embulk run -L build/gemContents src/test/resources/id_field_name.yml 39 | - run: cat tmp/id_field_name000.00.csv 40 | - run: cmp tmp/id_field_name000.00.csv src/test/resources/id_field_name_expected.csv 41 | - uses: actions/upload-artifact@v2 42 | if: always() 43 | with: 44 | name: tests 45 | path: ./*/build/reports/tests/test 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | /db/ 3 | /out/ 4 | /pkg/ 5 | /tmp/ 6 | *.gemspec 7 | .gradle/ 8 | /classpath/ 9 | build/ 10 | .idea 11 | *.iml 12 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## [v0.5.0](https://github.com/hakobera/embulk-input-mongodb/tree/v0.5.0) (2016-08-23) 4 | [Full Changelog](https://github.com/hakobera/embulk-input-mongodb/compare/v0.4.0...v0.5.0) 5 | 6 | **Implemented enhancements:** 7 | 8 | - Support separated URI params [\#24](https://github.com/hakobera/embulk-input-mongodb/pull/24) ([sakama](https://github.com/sakama)) 9 | 10 | **Merged pull requests:** 11 | 12 | - Add --host option to mongoimport to prevent connection failure. [\#27](https://github.com/hakobera/embulk-input-mongodb/pull/27) ([hakobera](https://github.com/hakobera)) 13 | - Add document about batch\_size config [\#25](https://github.com/hakobera/embulk-input-mongodb/pull/25) ([hakobera](https://github.com/hakobera)) 14 | 15 | ## [v0.4.0](https://github.com/hakobera/embulk-input-mongodb/tree/v0.4.0) (2016-07-08) 16 | [Full Changelog](https://github.com/hakobera/embulk-input-mongodb/compare/v0.3.2...v0.4.0) 17 | 18 | **Implemented enhancements:** 19 | 20 | - Support incremental load [\#16](https://github.com/hakobera/embulk-input-mongodb/pull/16) ([sakama](https://github.com/sakama)) 21 | 22 | **Merged pull requests:** 23 | 24 | - Bumpup version to 0.4.0 [\#23](https://github.com/hakobera/embulk-input-mongodb/pull/23) ([hakobera](https://github.com/hakobera)) 25 | 26 | ## [v0.3.2](https://github.com/hakobera/embulk-input-mongodb/tree/v0.3.2) (2016-06-23) 27 | [Full Changelog](https://github.com/hakobera/embulk-input-mongodb/compare/v0.3.1...v0.3.2) 28 | 29 | **Implemented enhancements:** 30 | 31 | - Support Symbol type [\#19](https://github.com/hakobera/embulk-input-mongodb/issues/19) 32 | 33 | **Merged pull requests:** 34 | 35 | - Bumpup version to 0.3.2 [\#22](https://github.com/hakobera/embulk-input-mongodb/pull/22) ([hakobera](https://github.com/hakobera)) 36 | - Release v0.3.1 [\#17](https://github.com/hakobera/embulk-input-mongodb/pull/17) ([hakobera](https://github.com/hakobera)) 37 | - Support Symbol type [\#21](https://github.com/hakobera/embulk-input-mongodb/pull/21) ([hakobera](https://github.com/hakobera)) 38 | 39 | ## [v0.3.1](https://github.com/hakobera/embulk-input-mongodb/tree/v0.3.1) (2016-06-21) 40 | [Full Changelog](https://github.com/hakobera/embulk-input-mongodb/compare/v0.3.0...v0.3.1) 41 | 42 | **Implemented enhancements:** 43 | 44 | - Support id\_field\_name config [\#15](https://github.com/hakobera/embulk-input-mongodb/pull/15) ([hakobera](https://github.com/hakobera)) 45 | 46 | **Closed issues:** 47 | 48 | - readName can only be called when State is NAME, not when State is VALUE. [\#18](https://github.com/hakobera/embulk-input-mongodb/issues/18) 49 | 50 | ## [v0.3.0](https://github.com/hakobera/embulk-input-mongodb/tree/v0.3.0) (2016-06-13) 51 | [Full Changelog](https://github.com/hakobera/embulk-input-mongodb/compare/v0.2.0...v0.3.0) 52 | 53 | **Merged pull requests:** 54 | 55 | - Add checkstyle plugin and format code [\#14](https://github.com/hakobera/embulk-input-mongodb/pull/14) ([sakama](https://github.com/sakama)) 56 | - Bumpup version to 0.3.0 [\#13](https://github.com/hakobera/embulk-input-mongodb/pull/13) ([hakobera](https://github.com/hakobera)) 57 | - Add unit test for existing code, \#9 and \#10 [\#11](https://github.com/hakobera/embulk-input-mongodb/pull/11) ([sakama](https://github.com/sakama)) 58 | - Change schema convert logic [\#10](https://github.com/hakobera/embulk-input-mongodb/pull/10) ([sakama](https://github.com/sakama)) 59 | 60 | ## [v0.2.0](https://github.com/hakobera/embulk-input-mongodb/tree/v0.2.0) (2016-05-30) 61 | [Full Changelog](https://github.com/hakobera/embulk-input-mongodb/compare/v0.1.2...v0.2.0) 62 | 63 | **Implemented enhancements:** 64 | 65 | - Support JSON type [\#6](https://github.com/hakobera/embulk-input-mongodb/issues/6) 66 | 67 | **Merged pull requests:** 68 | 69 | - Prepare release for v0.2.0 [\#12](https://github.com/hakobera/embulk-input-mongodb/pull/12) ([hakobera](https://github.com/hakobera)) 70 | - Add minimum limit to batch\_size and changed not to allow null [\#9](https://github.com/hakobera/embulk-input-mongodb/pull/9) ([sakama](https://github.com/sakama)) 71 | - Refactor some codes [\#8](https://github.com/hakobera/embulk-input-mongodb/pull/8) ([sakama](https://github.com/sakama)) 72 | - Fix example config on readme [\#5](https://github.com/hakobera/embulk-input-mongodb/pull/5) ([hakobera](https://github.com/hakobera)) 73 | - Support JSON type [\#7](https://github.com/hakobera/embulk-input-mongodb/pull/7) ([hakobera](https://github.com/hakobera)) 74 | 75 | ## [v0.1.2](https://github.com/hakobera/embulk-input-mongodb/tree/v0.1.2) (2015-09-10) 76 | [Full Changelog](https://github.com/hakobera/embulk-input-mongodb/compare/v0.1.1...v0.1.2) 77 | 78 | **Merged pull requests:** 79 | 80 | - Change build option for JDK7 [\#4](https://github.com/hakobera/embulk-input-mongodb/pull/4) ([hakobera](https://github.com/hakobera)) 81 | - Set homepage info [\#3](https://github.com/hakobera/embulk-input-mongodb/pull/3) ([hakobera](https://github.com/hakobera)) 82 | 83 | ## [v0.1.1](https://github.com/hakobera/embulk-input-mongodb/tree/v0.1.1) (2015-09-06) 84 | [Full Changelog](https://github.com/hakobera/embulk-input-mongodb/compare/v0.1.0...v0.1.1) 85 | 86 | ## [v0.1.0](https://github.com/hakobera/embulk-input-mongodb/tree/v0.1.0) (2015-09-06) 87 | **Implemented enhancements:** 88 | 89 | - Fix projection logic and better logging [\#2](https://github.com/hakobera/embulk-input-mongodb/pull/2) ([hakobera](https://github.com/hakobera)) 90 | 91 | **Merged pull requests:** 92 | 93 | - Run test on travis [\#1](https://github.com/hakobera/embulk-input-mongodb/pull/1) ([hakobera](https://github.com/hakobera)) 94 | 95 | 96 | 97 | \* *This Change Log was automatically generated by [github_changelog_generator](https://github.com/skywinder/Github-Changelog-Generator)* -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | embulk-input-mongodb 2 | Copyright 2015 Kazuyuki Honda, and the Embulk project 3 | 4 | This product includes software developed by Kazuyuki Honda, and in a part of the Embulk project (https://www.embulk.org/). 5 | It was originally developed by Kazuyuki Honda (https://github.com/hakobera), and then transferred to the Embulk project. 6 | It was originally licenced under the MIT License before the transfer, and it is now licensed under the Apache Software License, Version 2.0. 7 | -------------------------------------------------------------------------------- /NOTICE_GEM: -------------------------------------------------------------------------------- 1 | embulk-input-mongodb 2 | Copyright 2015 Kazuyuki Honda, and the Embulk project 3 | 4 | The gem distribution of this product includes software developed by Kazuyuki Honda, and in a part of the Embulk project (https://www.embulk.org/). 5 | It was originally developed by Kazuyuki Honda (https://github.com/hakobera), and then transferred to the Embulk project. 6 | It was originally licenced under the MIT License before the transfer, and it is now licensed under the Apache Software License, Version 2.0. 7 | 8 | The gem distribution of this product includes JARs of the Jackson project (https://github.com/FasterXML/jackson), as-is. 9 | They are licensed under the Apache Software License, Version 2.0. 10 | 11 | The gem distribution of this product includes JARs of the Jakarta Bean Validation API 1.1 (https://beanvalidation.org/1.1/), as-is. 12 | It is licensed under the Apache Software License, Version 2.0. 13 | 14 | The gem distribution of this product includes a JAR of MongoDB Java Driver (https://mongodb.github.io/mongo-java-driver/), as-is. 15 | It is licensed under the Apache Software License, Version 2.0. 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MongoDB input plugin for Embulk 2 | 3 | [![Build Status](https://travis-ci.org/hakobera/embulk-input-mongodb.svg)](https://travis-ci.org/hakobera/embulk-input-mongodb) 4 | 5 | MongoDB input plugin for Embulk loads records from MongoDB. 6 | This plugin loads documents as single-column records (column name is "record"). You can use filter plugins such as [embulk-filter-expand_json](https://github.com/civitaspo/embulk-filter-expand_json) or [embulk-filter-add_time](https://github.com/treasure-data/embulk-filter-add_time) to convert the json column to typed columns. [Rename filter](https://www.embulk.org/docs/built-in.html#rename-filter-plugin) is also useful to rename the typed columns. 7 | 8 | ## Overview 9 | 10 | This plugin only works with embulk >= 0.8.8. 11 | 12 | * **Plugin type**: input 13 | * **Guess supported**: no 14 | 15 | ## Configuration 16 | 17 | - Connection parameters 18 | One of them is required. 19 | 20 | - use MongoDB connection string URI 21 | - **uri**: [MongoDB connection string URI](https://docs.mongodb.org/manual/reference/connection-string/) (e.g. 'mongodb://localhost:27017/mydb') (string, required) 22 | - use separated URI parameters 23 | - **hosts**: list of hosts. `hosts` are pairs of host(string, required) and port(integer, optional, default: 27017) 24 | - **auth_method**: Auth method. One of `scram-sha-1`, `mongodb-cr`, `auto` (string, optional, default: null) 25 | - **auth_source**: Auth source. The database name where the user is defined (string, optional, default: null) 26 | - **user**: (string, optional) 27 | - **password**: (string, optional) 28 | - **database**: (string, required) 29 | - **tls**: `true` to use TLS to connect to the host (boolean, optional, default: `false`) 30 | - **tls_insecure**: `true` to disable various certificate validations (boolean, optional, default: `false`) 31 | - The option is similar to an option of the official `mongo` command. 32 | - See also: https://www.mongodb.com/docs/manual/reference/connection-string/#mongodb-urioption-urioption.tlsInsecure 33 | - **collection**: source collection name (string, required) 34 | - **fields**: **(deprecated)** ~~hash records that has the following two fields (array, required)~~ 35 | ~~- name: Name of the column~~ 36 | ~~- type: Column types as follows~~ 37 | ~~- boolean~~ 38 | ~~- long~~ 39 | ~~- double~~ 40 | ~~- string~~ 41 | ~~- timestamp~~ 42 | - **id_field_name** Name of Object ID field name. Set if you want to change the default name `_id` (string, optional, default: "_id") 43 | - **query**: A JSON document used for [querying](https://docs.mongodb.com/manual/tutorial/query-documents/) on the source collection. Documents are loaded from the colleciton if they match with this condition. (string, optional) 44 | - **projection**: A JSON document used for [projection](https://docs.mongodb.com/manual/reference/operator/projection/positional/) on query results. Fields in a document are used only if they match with this condition. (string, optional) 45 | - **sort**: Ordering of results (string, optional) 46 | - **aggregation**: Aggregation query (string, optional) See [Aggregation query](#aggregation-query) for more detail. 47 | - **batch_size**: Limits the number of objects returned in one [batch](https://mongodb.github.io/mongo-java-driver/3.8/javadoc/com/mongodb/DBCursor.html#batchSize-int-) (integer, optional, default: 10000) 48 | - **incremental_field** List of field name (list, optional, can't use with sort option) 49 | - **last_record** Last loaded record for incremental load (hash, optional) 50 | - **stop_on_invalid_record** Stop bulk load transaction if a document includes invalid record (such as unsupported object type) (boolean, optional, default: false) 51 | - **json_column_name**: column name used in outputs (string, optional, default: "record") 52 | 53 | ## Example 54 | 55 | ### Authentication 56 | 57 | #### Use separated URI prameters 58 | 59 | ```yaml 60 | in: 61 | type: mongodb 62 | hosts: 63 | - {host: localhost, port: 27017} 64 | user: myuser 65 | password: mypassword 66 | database: my_database 67 | auth_method: scram-sha-1 68 | auth_source: auth_db 69 | collection: "my_collection" 70 | ``` 71 | 72 | If you set `auth_method: auto`, The client will negotiate the best mechanism based on the version of the server that the client is authenticating to. 73 | 74 | If the server version is 3.0 or higher, the driver will authenticate using the SCRAM-SHA-1 mechanism. 75 | 76 | Otherwise, the driver will authenticate using the MONGODB_CR mechanism. 77 | 78 | #### Use URI String 79 | 80 | ```yaml 81 | in: 82 | type: mongodb 83 | uri: mongodb://myuser:mypassword@localhost:27017/my_database?authMechanism=SCRAM-SHA-1&authSource=another_database 84 | ``` 85 | 86 | ### Exporting all objects 87 | 88 | #### Specify with MongoDB connection string URI. 89 | 90 | ```yaml 91 | in: 92 | type: mongodb 93 | uri: mongodb://myuser:mypassword@localhost:27017/my_database 94 | collection: "my_collection" 95 | ``` 96 | 97 | #### Specify with separated URI parameters. 98 | 99 | ```yaml 100 | in: 101 | type: mongodb 102 | hosts: 103 | - {host: localhost, port: 27017} 104 | - {host: example.com, port: 27017} 105 | user: myuser 106 | password: mypassword 107 | database: my_database 108 | collection: "my_collection" 109 | ``` 110 | 111 | ### Filtering documents by query and projection 112 | 113 | ```yaml 114 | in: 115 | type: mongodb 116 | uri: mongodb://myuser:mypassword@localhost:27017/my_database 117 | collection: "my_collection" 118 | query: '{ field1: { $gte: 3 } }' 119 | projection: '{ "_id": 1, "field1": 1, "field2": 0 }' 120 | sort: '{ "field1": 1 }' 121 | ``` 122 | 123 | ### Incremental loading 124 | 125 | ```yaml 126 | in: 127 | type: mongodb 128 | uri: mongodb://myuser:mypassword@localhost:27017/my_database 129 | collection: "my_collection" 130 | query: '{ field1: { $gt: 3 } }' 131 | projection: '{ "_id": 1, "field1": 1, "field2": 1 }' 132 | incremental_field: 133 | - "field2" 134 | last_record: {"field2": 13215} 135 | ``` 136 | 137 | Plugin will create new query and sort value. 138 | You can't use `incremental_field` option with `sort` option at the same time. 139 | 140 | ``` 141 | query { field1: { $gt: 3 }, field2: { $gt: 13215}} 142 | sort {"field2", 1} # field2 ascending 143 | ``` 144 | 145 | You have to specify last_record with special characters when field type is `ObjectId` or `DateTime`. 146 | 147 | ```yaml 148 | # ObjectId field 149 | in: 150 | type: mongodb 151 | incremental_field: 152 | - "_id" 153 | last_record: {"_id": {"$oid": "5739b2261c21e58edfe39716"}} 154 | 155 | # DateTime field 156 | in: 157 | type: mongodb 158 | incremental_field: 159 | - "time_field" 160 | last_record: {"time_field": {"$date": "2015-01-25T13:23:15.000Z"}} 161 | ``` 162 | 163 | #### Run Incremental load 164 | 165 | ``` 166 | $ embulk run /path/to/config.yml -c config-diff.yml 167 | ``` 168 | 169 | ### Aggregation query 170 | 171 | This plugin supports aggregation query. You can write complex query like below. 172 | 173 | `aggregation` option can't be used with `sort`, `limit`, `skip`, `query` option. Incremental load also doesn't work with aggregation query. 174 | 175 | ```yaml 176 | in: 177 | type: mongodb 178 | aggregation: { $match: {"int32_field":{"$gte":5 },} } 179 | ``` 180 | 181 | See also [Aggregation — MongoDB Manual](https://docs.mongodb.com/manual/aggregation/) and [Aggregation Pipeline Stages — MongoDB Manual](https://docs.mongodb.com/manual/reference/operator/aggregation-pipeline/) 182 | 183 | ### Advanced usage with filter plugins 184 | 185 | ```yaml 186 | in: 187 | type: mongodb 188 | uri: mongodb://myuser:mypassword@localhost:27017/my_database 189 | collection: "my_collection" 190 | query: '{ "age": { $gte: 3 } }' 191 | projection: '{ "_id": 1, "age": 1, "ts": 1, "firstName": 1, "lastName": 1 }' 192 | 193 | filters: 194 | # convert json column into typed columns 195 | - type: expand_json 196 | json_column_name: record 197 | expanded_columns: 198 | - {name: _id, type: long} 199 | - {name: ts, type: string} 200 | - {name: firstName, type: string} 201 | - {name: lastName, type: string} 202 | 203 | # rename column names 204 | - type: rename 205 | columns: 206 | _id: id 207 | firstName: first_name 208 | lastName: last_name 209 | 210 | # convert string "ts" column into timestamp "time" column 211 | - type: add_time 212 | from_column: 213 | name: ts 214 | timestamp_format: "%Y-%m-%dT%H:%M:%S.%N%z" 215 | to_column: 216 | name: time 217 | type: timestamp 218 | ``` 219 | 220 | ## Build 221 | 222 | ``` 223 | $ ./gradlew gem 224 | ``` 225 | 226 | ## Test 227 | 228 | Firstly install Docker and Docker compose then `docker-compose up -d`, 229 | so that an MongoDB server will be locally launched then you can run tests with `./gradlew test`. 230 | 231 | ```sh 232 | $ docker-compose up -d 233 | Creating embulk-input-mongodb_server ... done 234 | Creating mongo-express ... done 235 | Creating mongoClientTemp ... done 236 | 237 | $ docker-compose ps 238 | Name Command State Ports 239 | ------------------------------------------------------------------------------------------------------------------------------ 240 | embulk-input-mongodb_server docker-entrypoint.sh mongod Up 0.0.0.0:27017->27017/tcp, 0.0.0.0:27018->27018/tcp 241 | mongo-express tini -- /docker-entrypoint ... Up 0.0.0.0:8081->8081/tcp 242 | mongoClientTemp docker-entrypoint.sh mongo ... Restarting 243 | 244 | $ ./gradlew test # -t to watch change of files and rebuild continuously 245 | ``` 246 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id "java" 3 | id "maven-publish" 4 | id "signing" 5 | id "checkstyle" 6 | id "org.embulk.embulk-plugins" version "0.4.2" 7 | } 8 | 9 | repositories { 10 | mavenCentral() 11 | } 12 | 13 | group = "org.embulk" 14 | version = "0.8.2-SNAPSHOT" 15 | description = "Loads records from Mongodb." 16 | 17 | sourceCompatibility = 1.8 18 | targetCompatibility = 1.8 19 | 20 | tasks.withType(JavaCompile) { 21 | options.compilerArgs << "-Xlint:deprecation" << "-Xlint:unchecked" 22 | options.encoding = "UTF-8" 23 | } 24 | 25 | java { 26 | withJavadocJar() 27 | withSourcesJar() 28 | } 29 | 30 | dependencies { 31 | compileOnly "org.embulk:embulk-api:0.10.31" 32 | compileOnly "org.embulk:embulk-spi:0.10.31" 33 | compile "org.mongodb:mongo-java-driver:3.8.1" 34 | 35 | compile("org.embulk:embulk-util-config:0.3.1") { 36 | // They conflict with embulk-core. They are once excluded here, 37 | // and added explicitly with versions exactly the same with embulk-core:0.10.31. 38 | exclude group: "com.fasterxml.jackson.core", module: "jackson-annotations" 39 | exclude group: "com.fasterxml.jackson.core", module: "jackson-core" 40 | exclude group: "com.fasterxml.jackson.core", module: "jackson-databind" 41 | exclude group: "com.fasterxml.jackson.datatype", module: "jackson-datatype-jdk8" 42 | exclude group: "javax.validation", module: "validation-api" 43 | } 44 | 45 | // They are once excluded from transitive dependencies of other dependencies, 46 | // and added explicitly with versions exactly the same with embulk-core:0.10.31. 47 | compile "com.fasterxml.jackson.core:jackson-annotations:2.6.7" 48 | compile "com.fasterxml.jackson.core:jackson-core:2.6.7" 49 | compile "com.fasterxml.jackson.core:jackson-databind:2.6.7" 50 | compile "com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.6.7" 51 | compile "javax.validation:validation-api:1.1.0.Final" 52 | 53 | testCompile "junit:junit:4.13" 54 | testCompile "org.embulk:embulk-core:0.10.31" 55 | testCompile "org.embulk:embulk-core:0.10.31:tests" 56 | testCompile "org.embulk:embulk-deps:0.10.31" 57 | } 58 | 59 | test { 60 | testLogging { 61 | outputs.upToDateWhen { false } 62 | showStandardStreams = true 63 | } 64 | } 65 | 66 | jar { 67 | from rootProject.file("LICENSE") 68 | from rootProject.file("NOTICE") 69 | } 70 | 71 | sourcesJar { 72 | from rootProject.file("LICENSE") 73 | from rootProject.file("NOTICE") 74 | } 75 | 76 | javadocJar { 77 | from rootProject.file("LICENSE") 78 | from rootProject.file("NOTICE") 79 | } 80 | 81 | embulkPlugin { 82 | mainClass = "org.embulk.input.mongodb.MongodbInputPlugin" 83 | category = "input" 84 | type = "mongodb" 85 | } 86 | 87 | publishing { 88 | publications { 89 | maven(MavenPublication) { 90 | groupId = project.group 91 | artifactId = project.name 92 | 93 | from components.java // Must be "components.java". The dependency modification works only for it. 94 | // javadocJar and sourcesJar are added by java.withJavadocJar() and java.withSourcesJar() above. 95 | // See: https://docs.gradle.org/current/javadoc/org/gradle/api/plugins/JavaPluginExtension.html 96 | 97 | pom { // https://central.sonatype.org/pages/requirements.html 98 | packaging "jar" 99 | 100 | name = project.name 101 | description = project.description 102 | url = "https://www.embulk.org/" 103 | 104 | licenses { 105 | license { 106 | // http://central.sonatype.org/pages/requirements.html#license-information 107 | name = "The Apache License, Version 2.0" 108 | url = "https://www.apache.org/licenses/LICENSE-2.0.txt" 109 | } 110 | } 111 | 112 | developers { 113 | developer { 114 | name = "Kazuyuki Honda" 115 | email = "hakobera@gmail.com" 116 | } 117 | developer { 118 | name = "Satoshi Akama" 119 | email = "satoshiakama@gmail.com" 120 | } 121 | developer { 122 | name = "Shinichi Ishimura" 123 | email = "shiketaudonko41@gmail.com" 124 | } 125 | developer { 126 | name = "Dai MIKURUBE" 127 | email = "dmikurube@treasure-data.com" 128 | } 129 | } 130 | 131 | scm { 132 | connection = "scm:git:git://github.com/embulk/embulk-input-mongodb.git" 133 | developerConnection = "scm:git:git@github.com:embulk/embulk-input-mongodb.git" 134 | url = "https://github.com/embulk/embulk-input-mongodb" 135 | } 136 | } 137 | } 138 | } 139 | 140 | repositories { 141 | maven { // publishMavenPublicationToMavenCentralRepository 142 | name = "mavenCentral" 143 | if (project.version.endsWith("-SNAPSHOT")) { 144 | url "https://oss.sonatype.org/content/repositories/snapshots" 145 | } else { 146 | url "https://oss.sonatype.org/service/local/staging/deploy/maven2" 147 | } 148 | 149 | credentials { 150 | username = project.hasProperty("ossrhUsername") ? ossrhUsername : "" 151 | password = project.hasProperty("ossrhPassword") ? ossrhPassword : "" 152 | } 153 | } 154 | } 155 | } 156 | 157 | signing { 158 | sign publishing.publications.maven 159 | } 160 | 161 | gem { 162 | authors = [ "Kazuyuki Honda", "Dai MIKURUBE" ] 163 | email = [ "hakobera@gmail.com", "dmikurube@treasure-data.com" ] 164 | summary = "Mongodb input plugin for Embulk" 165 | homepage = "https://github.com/embulk/embulk-input-mongodb" 166 | licenses = [ "Apache-2.0" ] 167 | 168 | from rootProject.file("LICENSE") 169 | from rootProject.file("NOTICE_GEM") 170 | rename ("NOTICE_GEM", "NOTICE") 171 | } 172 | 173 | gemPush { 174 | host = "https://rubygems.org" 175 | outputs.upToDateWhen { false } 176 | } 177 | 178 | checkstyle { 179 | configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml") 180 | toolVersion = '6.14.1' 181 | } 182 | checkstyleMain { 183 | configFile = file("${project.rootDir}/config/checkstyle/default.xml") 184 | ignoreFailures = true 185 | } 186 | checkstyleTest { 187 | configFile = file("${project.rootDir}/config/checkstyle/default.xml") 188 | ignoreFailures = true 189 | } 190 | task checkstyle(type: Checkstyle) { 191 | classpath = sourceSets.main.output + sourceSets.test.output 192 | source = sourceSets.main.allJava + sourceSets.test.allJava 193 | } 194 | -------------------------------------------------------------------------------- /config/checkstyle/checkstyle.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /config/checkstyle/default.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.1' 2 | services: 3 | mongodb: 4 | container_name: embulk-input-mongodb_server 5 | image: mongo:3.6 6 | restart: always 7 | ports: 8 | - 27017:27017 9 | - 27018:27018 10 | environment: 11 | MONGO_INITDB_ROOT_USERNAME: admin 12 | MONGO_INITDB_ROOT_PASSWORD: tiger 13 | volumes: 14 | - mongodb-data:/data/db 15 | - mongodb-configdb:/data/configdb 16 | 17 | mongo-express: 18 | container_name: mongo-express 19 | image: mongo-express 20 | restart: always 21 | ports: 22 | - 8081:8081 23 | depends_on: 24 | - mongodb 25 | environment: 26 | ME_CONFIG_MONGODB_ADMINUSERNAME: admin 27 | ME_CONFIG_MONGODB_ADMINPASSWORD: tiger 28 | ME_CONFIG_MONGODB_SERVER: mongodb 29 | 30 | mongoClientTemp: 31 | container_name: mongoClientTemp 32 | image: mongo:3.6 33 | depends_on: 34 | - mongodb 35 | # Sleep to wait MongoDB wake up on Travis CI 36 | command: > 37 | /bin/bash -c 38 | "sleep 15 && 39 | mongo --host mongodb -u admin -p tiger admin --eval \"db.getSiblingDB('mydb').createUser({user:'mongo_user', pwd:'dbpass', roles:[{role:'readWrite',db:'mydb'}]});\"" 40 | volumes: 41 | mongodb-data: 42 | driver: local 43 | mongodb-configdb: 44 | driver: local 45 | -------------------------------------------------------------------------------- /gradle/dependency-locks/embulkPluginRuntime.lockfile: -------------------------------------------------------------------------------- 1 | # This is a Gradle generated file for dependency locking. 2 | # Manual edits can break the build and are not advised. 3 | # This file is expected to be part of source control. 4 | com.fasterxml.jackson.core:jackson-annotations:2.6.7 5 | com.fasterxml.jackson.core:jackson-core:2.6.7 6 | com.fasterxml.jackson.core:jackson-databind:2.6.7 7 | com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.6.7 8 | javax.validation:validation-api:1.1.0.Final 9 | org.embulk:embulk-util-config:0.3.1 10 | org.mongodb:mongo-java-driver:3.8.1 11 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/embulk/embulk-input-mongodb/c54dfd9bedd0c0a22d936ca2fd2ffada807f7ffa/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.2.2-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | # 4 | # Copyright 2015 the original author or authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | ## 21 | ## Gradle start up script for UN*X 22 | ## 23 | ############################################################################## 24 | 25 | # Attempt to set APP_HOME 26 | # Resolve links: $0 may be a link 27 | PRG="$0" 28 | # Need this for relative symlinks. 29 | while [ -h "$PRG" ] ; do 30 | ls=`ls -ld "$PRG"` 31 | link=`expr "$ls" : '.*-> \(.*\)$'` 32 | if expr "$link" : '/.*' > /dev/null; then 33 | PRG="$link" 34 | else 35 | PRG=`dirname "$PRG"`"/$link" 36 | fi 37 | done 38 | SAVED="`pwd`" 39 | cd "`dirname \"$PRG\"`/" >/dev/null 40 | APP_HOME="`pwd -P`" 41 | cd "$SAVED" >/dev/null 42 | 43 | APP_NAME="Gradle" 44 | APP_BASE_NAME=`basename "$0"` 45 | 46 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 47 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 48 | 49 | # Use the maximum available, or set MAX_FD != -1 to use that value. 50 | MAX_FD="maximum" 51 | 52 | warn () { 53 | echo "$*" 54 | } 55 | 56 | die () { 57 | echo 58 | echo "$*" 59 | echo 60 | exit 1 61 | } 62 | 63 | # OS specific support (must be 'true' or 'false'). 64 | cygwin=false 65 | msys=false 66 | darwin=false 67 | nonstop=false 68 | case "`uname`" in 69 | CYGWIN* ) 70 | cygwin=true 71 | ;; 72 | Darwin* ) 73 | darwin=true 74 | ;; 75 | MINGW* ) 76 | msys=true 77 | ;; 78 | NONSTOP* ) 79 | nonstop=true 80 | ;; 81 | esac 82 | 83 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 84 | 85 | # Determine the Java command to use to start the JVM. 86 | if [ -n "$JAVA_HOME" ] ; then 87 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 88 | # IBM's JDK on AIX uses strange locations for the executables 89 | JAVACMD="$JAVA_HOME/jre/sh/java" 90 | else 91 | JAVACMD="$JAVA_HOME/bin/java" 92 | fi 93 | if [ ! -x "$JAVACMD" ] ; then 94 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 95 | 96 | Please set the JAVA_HOME variable in your environment to match the 97 | location of your Java installation." 98 | fi 99 | else 100 | JAVACMD="java" 101 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 102 | 103 | Please set the JAVA_HOME variable in your environment to match the 104 | location of your Java installation." 105 | fi 106 | 107 | # Increase the maximum file descriptors if we can. 108 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 109 | MAX_FD_LIMIT=`ulimit -H -n` 110 | if [ $? -eq 0 ] ; then 111 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 112 | MAX_FD="$MAX_FD_LIMIT" 113 | fi 114 | ulimit -n $MAX_FD 115 | if [ $? -ne 0 ] ; then 116 | warn "Could not set maximum file descriptor limit: $MAX_FD" 117 | fi 118 | else 119 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 120 | fi 121 | fi 122 | 123 | # For Darwin, add options to specify how the application appears in the dock 124 | if $darwin; then 125 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 126 | fi 127 | 128 | # For Cygwin or MSYS, switch paths to Windows format before running java 129 | if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then 130 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 131 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 132 | JAVACMD=`cygpath --unix "$JAVACMD"` 133 | 134 | # We build the pattern for arguments to be converted via cygpath 135 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 136 | SEP="" 137 | for dir in $ROOTDIRSRAW ; do 138 | ROOTDIRS="$ROOTDIRS$SEP$dir" 139 | SEP="|" 140 | done 141 | OURCYGPATTERN="(^($ROOTDIRS))" 142 | # Add a user-defined pattern to the cygpath arguments 143 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 144 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 145 | fi 146 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 147 | i=0 148 | for arg in "$@" ; do 149 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 150 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 151 | 152 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 153 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 154 | else 155 | eval `echo args$i`="\"$arg\"" 156 | fi 157 | i=`expr $i + 1` 158 | done 159 | case $i in 160 | 0) set -- ;; 161 | 1) set -- "$args0" ;; 162 | 2) set -- "$args0" "$args1" ;; 163 | 3) set -- "$args0" "$args1" "$args2" ;; 164 | 4) set -- "$args0" "$args1" "$args2" "$args3" ;; 165 | 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 166 | 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 167 | 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 168 | 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 169 | 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 170 | esac 171 | fi 172 | 173 | # Escape application args 174 | save () { 175 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 176 | echo " " 177 | } 178 | APP_ARGS=`save "$@"` 179 | 180 | # Collect all arguments for the java command, following the shell quoting and substitution rules 181 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 182 | 183 | exec "$JAVACMD" "$@" 184 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 33 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 34 | 35 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 36 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 37 | 38 | @rem Find java.exe 39 | if defined JAVA_HOME goto findJavaFromJavaHome 40 | 41 | set JAVA_EXE=java.exe 42 | %JAVA_EXE% -version >NUL 2>&1 43 | if "%ERRORLEVEL%" == "0" goto init 44 | 45 | echo. 46 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 47 | echo. 48 | echo Please set the JAVA_HOME variable in your environment to match the 49 | echo location of your Java installation. 50 | 51 | goto fail 52 | 53 | :findJavaFromJavaHome 54 | set JAVA_HOME=%JAVA_HOME:"=% 55 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 56 | 57 | if exist "%JAVA_EXE%" goto init 58 | 59 | echo. 60 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 61 | echo. 62 | echo Please set the JAVA_HOME variable in your environment to match the 63 | echo location of your Java installation. 64 | 65 | goto fail 66 | 67 | :init 68 | @rem Get command-line arguments, handling Windows variants 69 | 70 | if not "%OS%" == "Windows_NT" goto win9xME_args 71 | 72 | :win9xME_args 73 | @rem Slurp the command line arguments. 74 | set CMD_LINE_ARGS= 75 | set _SKIP=2 76 | 77 | :win9xME_args_slurp 78 | if "x%~1" == "x" goto execute 79 | 80 | set CMD_LINE_ARGS=%* 81 | 82 | :execute 83 | @rem Setup the command line 84 | 85 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 86 | 87 | @rem Execute Gradle 88 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 89 | 90 | :end 91 | @rem End local scope for the variables with windows NT shell 92 | if "%ERRORLEVEL%"=="0" goto mainEnd 93 | 94 | :fail 95 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 96 | rem the _cmd.exe /c_ return code! 97 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 98 | exit /b 1 99 | 100 | :mainEnd 101 | if "%OS%"=="Windows_NT" endlocal 102 | 103 | :omega 104 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/input/mongodb/AuthMethod.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2018 The Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.input.mongodb; 18 | 19 | import com.fasterxml.jackson.annotation.JsonCreator; 20 | import com.fasterxml.jackson.annotation.JsonValue; 21 | import org.embulk.config.ConfigException; 22 | 23 | import java.util.Locale; 24 | 25 | public enum AuthMethod 26 | { 27 | AUTO, 28 | SCRAM_SHA_1, 29 | MONGODB_CR; 30 | 31 | @JsonValue 32 | @Override 33 | public String toString() 34 | { 35 | return name().toLowerCase(Locale.ENGLISH); 36 | } 37 | 38 | @JsonCreator 39 | public static AuthMethod fromString(String value) 40 | { 41 | switch (value.replace("_", "-")) { 42 | case "scram-sha-1": 43 | return SCRAM_SHA_1; 44 | case "mongodb-cr": 45 | return MONGODB_CR; 46 | case "auto": 47 | return AUTO; 48 | default: 49 | throw new ConfigException(String.format("Unknown auth_method '%s'. Supported auth_method are scram-sha-1, mongodb-cr, auto", value)); 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/input/mongodb/HostTask.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Kazuyuki Honda, and the Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.input.mongodb; 18 | 19 | import org.embulk.util.config.Config; 20 | import org.embulk.util.config.ConfigDefault; 21 | import org.embulk.util.config.Task; 22 | 23 | public interface HostTask 24 | extends Task 25 | { 26 | @Config("host") 27 | String getHost(); 28 | 29 | @Config("port") 30 | @ConfigDefault("27017") 31 | int getPort(); 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/input/mongodb/MongodbInputPlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Kazuyuki Honda, and the Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.input.mongodb; 18 | 19 | import com.fasterxml.jackson.databind.ObjectMapper; 20 | import com.mongodb.BasicDBObject; 21 | import com.mongodb.MongoClient; 22 | import com.mongodb.MongoClientOptions; 23 | import com.mongodb.MongoClientURI; 24 | import com.mongodb.MongoCredential; 25 | import com.mongodb.MongoException; 26 | import com.mongodb.ServerAddress; 27 | import com.mongodb.client.MongoCollection; 28 | import com.mongodb.client.MongoCursor; 29 | import com.mongodb.client.MongoDatabase; 30 | import org.bson.Document; 31 | import org.bson.codecs.configuration.CodecRegistries; 32 | import org.bson.codecs.configuration.CodecRegistry; 33 | import org.bson.conversions.Bson; 34 | import org.bson.json.JsonParseException; 35 | import org.embulk.config.ConfigDiff; 36 | import org.embulk.config.ConfigException; 37 | import org.embulk.config.ConfigSource; 38 | import org.embulk.config.TaskReport; 39 | import org.embulk.config.TaskSource; 40 | import org.embulk.spi.BufferAllocator; 41 | import org.embulk.spi.Column; 42 | import org.embulk.spi.Exec; 43 | import org.embulk.spi.InputPlugin; 44 | import org.embulk.spi.PageBuilder; 45 | import org.embulk.spi.PageOutput; 46 | import org.embulk.spi.Schema; 47 | import org.embulk.spi.type.Types; 48 | import org.embulk.util.config.ConfigMapper; 49 | import org.embulk.util.config.ConfigMapperFactory; 50 | import org.embulk.util.config.TaskMapper; 51 | import org.msgpack.value.Value; 52 | import org.slf4j.Logger; 53 | import org.slf4j.LoggerFactory; 54 | 55 | import javax.net.ssl.SSLContext; 56 | import javax.net.ssl.TrustManager; 57 | import javax.net.ssl.X509TrustManager; 58 | import java.io.IOException; 59 | import java.net.UnknownHostException; 60 | import java.security.KeyManagementException; 61 | import java.security.NoSuchAlgorithmException; 62 | import java.security.cert.X509Certificate; 63 | import java.util.ArrayList; 64 | import java.util.Arrays; 65 | import java.util.HashMap; 66 | import java.util.LinkedHashMap; 67 | import java.util.List; 68 | import java.util.Map; 69 | import java.util.Optional; 70 | 71 | public class MongodbInputPlugin 72 | implements InputPlugin 73 | { 74 | private final Logger log = LoggerFactory.getLogger(MongodbInputPlugin.class); 75 | 76 | private static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ConfigMapperFactory.builder().addDefaultModules().build(); 77 | 78 | @Override 79 | public ConfigDiff transaction(ConfigSource config, 80 | InputPlugin.Control control) 81 | { 82 | final ConfigMapper configMapper = CONFIG_MAPPER_FACTORY.createConfigMapper(); 83 | final PluginTask task = configMapper.map(config, PluginTask.class); 84 | 85 | if (task.getFields().isPresent()) { 86 | throw new ConfigException("field option was deprecated so setting will be ignored"); 87 | } 88 | if (task.getIncrementalField().isPresent() && !task.getSort().equals("{}")) { 89 | throw new ConfigException("both of sort and incremental_load can't be used together"); 90 | } 91 | if (task.getIncrementalField().isPresent() && task.getSkip().isPresent()) { 92 | throw new ConfigException("both of skip and incremental_load can't be used together"); 93 | } 94 | 95 | if (task.getAggregation().isPresent()) { 96 | if (task.getIncrementalField().isPresent()) { 97 | throw new ConfigException("both of aggregation and incremental_load can't be used together"); 98 | } 99 | if (!task.getSort().equals("{}")) { 100 | throw new ConfigException("both of sort and aggregation can't be used together"); 101 | } 102 | if (task.getLimit().isPresent()) { 103 | throw new ConfigException("both of limit and aggregation can't be used together"); 104 | } 105 | if (task.getSkip().isPresent()) { 106 | throw new ConfigException("both of skip and aggregation can't be used together"); 107 | } 108 | if (!task.getQuery().equals("{}")) { 109 | throw new ConfigException("both of query and aggregation can't be used together"); 110 | } 111 | } 112 | 113 | Map newCondition = buildIncrementalCondition(task); 114 | task.setQuery(newCondition.get("query")); 115 | task.setSort(newCondition.get("sort")); 116 | 117 | validateJsonField("projection", task.getProjection()); 118 | validateJsonField("query", task.getQuery()); 119 | validateJsonField("sort", task.getSort()); 120 | if (task.getAggregation().isPresent()) { 121 | validateJsonField("aggrigation", task.getAggregation().get()); 122 | } 123 | 124 | // Connect once to throw ConfigException in earlier stage of excecution 125 | try { 126 | connect(task); 127 | } 128 | catch (UnknownHostException | MongoException ex) { 129 | throw new ConfigException(ex); 130 | } 131 | Schema schema = Schema.builder().add(task.getJsonColumnName(), Types.JSON).build(); 132 | return resume(task.toTaskSource(), schema, 1, control); 133 | } 134 | 135 | @Override 136 | public ConfigDiff resume(TaskSource taskSource, 137 | Schema schema, int taskCount, 138 | InputPlugin.Control control) 139 | { 140 | List report = control.run(taskSource, schema, taskCount); 141 | 142 | ConfigDiff configDiff = CONFIG_MAPPER_FACTORY.newConfigDiff(); 143 | if (report.size() > 0 && report.get(0).has("last_record")) { 144 | configDiff.set("last_record", report.get(0).get(Map.class, "last_record")); 145 | } 146 | 147 | return configDiff; 148 | } 149 | 150 | @Override 151 | public void cleanup(TaskSource taskSource, 152 | Schema schema, int taskCount, 153 | List successTaskReports) 154 | { 155 | // do nothing 156 | } 157 | 158 | @Override 159 | public TaskReport run(TaskSource taskSource, 160 | Schema schema, int taskIndex, 161 | PageOutput output) 162 | { 163 | final TaskMapper taskMapper = CONFIG_MAPPER_FACTORY.createTaskMapper(); 164 | final PluginTask task = taskMapper.map(taskSource, PluginTask.class); 165 | BufferAllocator allocator = Exec.getBufferAllocator(); 166 | PageBuilder pageBuilder = Exec.getPageBuilder(allocator, schema, output); 167 | final Column column = pageBuilder.getSchema().getColumns().get(0); 168 | 169 | ValueCodec valueCodec = new ValueCodec(task.getStopOnInvalidRecord(), task); 170 | MongoCollection collection; 171 | try { 172 | MongoDatabase db = connect(task); 173 | 174 | CodecRegistry registry = CodecRegistries.fromRegistries( 175 | MongoClient.getDefaultCodecRegistry(), 176 | CodecRegistries.fromCodecs(valueCodec) 177 | ); 178 | collection = db.getCollection(task.getCollection(), Value.class) 179 | .withCodecRegistry(registry); 180 | } 181 | catch (UnknownHostException | MongoException ex) { 182 | throw new ConfigException(ex); 183 | } 184 | 185 | Bson query = BasicDBObject.parse(task.getQuery()); 186 | Bson projection = BasicDBObject.parse(task.getProjection()); 187 | Bson sort = BasicDBObject.parse(task.getSort()); 188 | 189 | log.trace("query: {}", query); 190 | log.trace("projection: {}", projection); 191 | log.trace("sort: {}", sort); 192 | if (task.getLimit().isPresent()) { 193 | log.trace("limit: {}", task.getLimit()); 194 | } 195 | if (task.getSkip().isPresent()) { 196 | log.trace("skip: {}", task.getSkip()); 197 | } 198 | 199 | if (task.getAggregation().isPresent()) { 200 | Bson aggregationString = Document.parse(task.getAggregation().get()); 201 | List aggregation = Arrays.asList(aggregationString); 202 | try (MongoCursor cursor = collection 203 | .aggregate(aggregation).iterator()) { 204 | while (cursor.hasNext()) { 205 | pageBuilder.setJson(column, cursor.next()); 206 | pageBuilder.addRecord(); 207 | } 208 | } catch (MongoException ex) { 209 | if (ex instanceof RuntimeException) { 210 | throw ex; 211 | } 212 | throw new RuntimeException(ex); 213 | } 214 | } 215 | else { 216 | try (MongoCursor cursor = collection 217 | .find(query) 218 | .projection(projection) 219 | .sort(sort) 220 | .batchSize(task.getBatchSize()) 221 | .limit(task.getLimit().orElse(0)) 222 | .skip(task.getSkip().orElse(0)) 223 | .iterator()) { 224 | while (cursor.hasNext()) { 225 | pageBuilder.setJson(column, cursor.next()); 226 | pageBuilder.addRecord(); 227 | } 228 | } catch (MongoException ex) { 229 | if (ex instanceof RuntimeException) { 230 | throw ex; 231 | } 232 | throw new RuntimeException(ex); 233 | } 234 | } 235 | 236 | pageBuilder.finish(); 237 | return updateTaskReport(CONFIG_MAPPER_FACTORY.newTaskReport(), valueCodec, task); 238 | } 239 | 240 | private TaskReport updateTaskReport(TaskReport report, ValueCodec valueCodec, PluginTask task) 241 | { 242 | final TaskReport lastRecord = CONFIG_MAPPER_FACTORY.newTaskReport(); 243 | if (valueCodec.getLastRecord() != null && valueCodec.getProcessedRecordCount() > 0) { 244 | for (String k : valueCodec.getLastRecord().keySet()) { 245 | String value = valueCodec.getLastRecord().get(k).toString(); 246 | Map types = valueCodec.getLastRecordType(); 247 | HashMap innerValue = new HashMap<>(); 248 | switch(types.get(k)) { 249 | case "OBJECT_ID": 250 | innerValue.put("$oid", value); 251 | lastRecord.set(k, innerValue); 252 | break; 253 | case "DATE_TIME": 254 | innerValue.put("$date", value); 255 | lastRecord.set(k, innerValue); 256 | break; 257 | case "INT32": 258 | case "INT64": 259 | case "TIMESTAMP": 260 | lastRecord.set(k, Integer.valueOf(value)); 261 | break; 262 | case "BOOLEAN": 263 | lastRecord.set(k, Boolean.valueOf(value)); 264 | break; 265 | case "DOUBLE": 266 | lastRecord.set(k, Double.valueOf(value)); 267 | break; 268 | case "DOCUMENT": 269 | case "ARRAY": 270 | throw new ConfigException(String.format("Unsupported type '%s' was given for 'last_record' [%s]", types.get(k), value)); 271 | default: 272 | lastRecord.set(k, value); 273 | } 274 | } 275 | } 276 | else if (task.getIncrementalField().isPresent() && task.getLastRecord().isPresent()) { 277 | for (String field : task.getIncrementalField().get()) { 278 | if (task.getLastRecord().get().containsKey(field)) { 279 | lastRecord.set(field, task.getLastRecord().get().get(field)); 280 | } 281 | } 282 | } 283 | report.setNested("last_record", lastRecord); 284 | return report; 285 | } 286 | 287 | @Override 288 | public ConfigDiff guess(ConfigSource config) 289 | { 290 | return CONFIG_MAPPER_FACTORY.newConfigDiff(); 291 | } 292 | 293 | private MongoDatabase connect(final PluginTask task) throws UnknownHostException, MongoException 294 | { 295 | MongoClient mongoClient; 296 | String database; 297 | 298 | if (!task.getUri().isPresent() && !task.getHosts().isPresent()) { 299 | throw new ConfigException("'uri' or 'hosts' is required"); 300 | } 301 | 302 | if (task.getUri().isPresent()) { 303 | MongoClientURI uri = new MongoClientURI(task.getUri().get()); 304 | database = uri.getDatabase(); 305 | mongoClient = new MongoClient(uri); 306 | } 307 | else { 308 | mongoClient = createClientFromParams(task); 309 | database = task.getDatabase().get(); 310 | } 311 | 312 | MongoDatabase db = mongoClient.getDatabase(database); 313 | // Get collection count for throw Exception 314 | db.getCollection(task.getCollection()).count(); 315 | return db; 316 | } 317 | 318 | private MongoClient createClientFromParams(PluginTask task) 319 | { 320 | if (!task.getHosts().isPresent()) { 321 | throw new ConfigException("'hosts' option's value is required but empty"); 322 | } 323 | if (!task.getDatabase().isPresent()) { 324 | throw new ConfigException("'database' option's value is required but empty"); 325 | } 326 | 327 | List addresses = new ArrayList<>(); 328 | for (HostTask host : task.getHosts().get()) { 329 | addresses.add(new ServerAddress(host.getHost(), host.getPort())); 330 | } 331 | 332 | if (task.getUser().isPresent()) { 333 | return new MongoClient(addresses, Arrays.asList(createCredential(task)), createMongoClientOptions(task)); 334 | } 335 | else { 336 | return new MongoClient(addresses, createMongoClientOptions(task)); 337 | } 338 | } 339 | 340 | private MongoClientOptions createMongoClientOptions(PluginTask task) 341 | { 342 | MongoClientOptions.Builder builder = new MongoClientOptions.Builder(); 343 | if (task.getTls()) { 344 | builder.sslEnabled(true); 345 | if (task.getTlsInsecure()) { 346 | builder.sslInvalidHostNameAllowed(true); 347 | builder.sslContext(createSSLContextToAcceptAnyCert()); 348 | } 349 | } 350 | return builder.build(); 351 | } 352 | 353 | private SSLContext createSSLContextToAcceptAnyCert() 354 | { 355 | TrustManager[] trustAllCerts = new TrustManager[] { 356 | new X509TrustManager() 357 | { 358 | public X509Certificate[] getAcceptedIssuers() 359 | { 360 | return new X509Certificate[0]; 361 | } 362 | public void checkClientTrusted( 363 | X509Certificate[] certs, String authType) 364 | { 365 | } 366 | public void checkServerTrusted( 367 | X509Certificate[] certs, String authType) 368 | { 369 | } 370 | } 371 | }; 372 | try { 373 | SSLContext sc = SSLContext.getInstance("SSL"); 374 | sc.init(null, trustAllCerts, new java.security.SecureRandom()); 375 | return sc; 376 | } 377 | catch (NoSuchAlgorithmException | KeyManagementException e) { 378 | throw new ConfigException(e); 379 | } 380 | } 381 | 382 | // @see http://mongodb.github.io/mongo-java-driver/3.0/driver-async/reference/connecting/authenticating/ 383 | private MongoCredential createCredential(PluginTask task) 384 | { 385 | MongoCredential credential; 386 | String authSource = task.getAuthSource().isPresent() ? task.getAuthSource().get() : task.getDatabase().get(); 387 | AuthMethod authMethod = task.getAuthMethod().isPresent() ? task.getAuthMethod().get() : AuthMethod.AUTO; 388 | switch (authMethod) { 389 | case SCRAM_SHA_1: 390 | credential = MongoCredential.createScramSha1Credential( 391 | task.getUser().get(), 392 | authSource, 393 | task.getPassword().get().toCharArray()); 394 | break; 395 | case MONGODB_CR: 396 | credential = MongoCredential.createMongoCRCredential( 397 | task.getUser().get(), 398 | authSource, 399 | task.getPassword().get().toCharArray()); 400 | break; 401 | case AUTO: 402 | default: 403 | /* The client will negotiate the best mechanism based on the 404 | * version of the server that the client is authenticating to. 405 | * If the server version is 3.0 or higher, the driver will authenticate using the SCRAM-SHA-1 mechanism. 406 | * Otherwise, the driver will authenticate using the MONGODB_CR mechanism. 407 | */ 408 | credential = MongoCredential.createCredential( 409 | task.getUser().get(), 410 | authSource, 411 | task.getPassword().get().toCharArray() 412 | ); 413 | } 414 | return credential; 415 | } 416 | 417 | private Map buildIncrementalCondition(PluginTask task) 418 | { 419 | Map result = new HashMap<>(); 420 | String query = task.getQuery(); 421 | String sort = task.getSort(); 422 | result.put("query", query); 423 | result.put("sort", sort); 424 | 425 | Optional> incrementalField = task.getIncrementalField(); 426 | Optional> lastRecord = task.getLastRecord(); 427 | if (!incrementalField.isPresent()) { 428 | return result; 429 | } 430 | 431 | Map newQuery = new LinkedHashMap<>(); 432 | Map newSort = new LinkedHashMap<>(); 433 | ObjectMapper mapper = new ObjectMapper(); 434 | 435 | try { 436 | @SuppressWarnings("unchecked") 437 | Map queryJson = mapper.readValue(query, Map.class); 438 | for (String k : queryJson.keySet()) { 439 | newQuery.put(k, queryJson.get(k)); 440 | } 441 | 442 | if (lastRecord.isPresent()) { 443 | for (String k : lastRecord.get().keySet()) { 444 | Map v = new HashMap<>(); 445 | Object record = lastRecord.get().get(k); 446 | 447 | if (newQuery.containsKey(k)) { 448 | throw new ConfigException("Field declaration was duplicated between 'incremental_field' and 'query' options"); 449 | } 450 | 451 | v.put("$gt", record); 452 | newQuery.put(k, v); 453 | } 454 | String newQueryString = mapper.writeValueAsString(newQuery); 455 | log.info(String.format("New query value was generated for incremental load: '%s'", newQueryString)); 456 | result.put("query", newQueryString); 457 | } 458 | 459 | for (String k : incrementalField.get()) { 460 | newSort.put(k, 1); 461 | } 462 | 463 | String newSortString = mapper.writeValueAsString(newSort); 464 | log.info(String.format("New sort value was generated for incremental load: '%s'", newSortString)); 465 | result.put("sort", newSortString); 466 | 467 | return result; 468 | } 469 | catch (JsonParseException | IOException ex) { 470 | throw new ConfigException("Could not generate new query for incremental load."); 471 | } 472 | } 473 | 474 | private void validateJsonField(String name, String jsonString) 475 | { 476 | try { 477 | Document.parse(jsonString); 478 | } 479 | catch (JsonParseException ex) { 480 | throw new ConfigException(String.format("Invalid JSON string was given for '%s' parameter. [%s]", name, jsonString)); 481 | } 482 | } 483 | } 484 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/input/mongodb/PluginTask.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Kazuyuki Honda, and the Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.input.mongodb; 18 | 19 | import org.embulk.util.config.Config; 20 | import org.embulk.util.config.ConfigDefault; 21 | import org.embulk.util.config.Task; 22 | import org.embulk.util.config.units.SchemaConfig; 23 | 24 | import javax.validation.constraints.Min; 25 | 26 | import java.util.List; 27 | import java.util.Map; 28 | import java.util.Optional; 29 | 30 | public interface PluginTask 31 | extends Task 32 | { 33 | // MongoDB connection string URI 34 | @Config("uri") 35 | @ConfigDefault("null") 36 | Optional getUri(); 37 | 38 | @Config("hosts") 39 | @ConfigDefault("null") 40 | Optional> getHosts(); 41 | 42 | @Config("tls") 43 | @ConfigDefault("false") 44 | boolean getTls(); 45 | 46 | // The option is similar to an option of the official `mongo` command. 47 | // (https://www.mongodb.com/docs/manual/reference/connection-string/#mongodb-urioption-urioption.tlsInsecure) 48 | @Config("tls_insecure") 49 | @ConfigDefault("false") 50 | boolean getTlsInsecure(); 51 | 52 | @Config("auth_method") 53 | @ConfigDefault("null") 54 | Optional getAuthMethod(); 55 | 56 | @Config("auth_source") 57 | @ConfigDefault("null") 58 | Optional getAuthSource(); 59 | 60 | @Config("user") 61 | @ConfigDefault("null") 62 | Optional getUser(); 63 | 64 | @Config("password") 65 | @ConfigDefault("null") 66 | Optional getPassword(); 67 | 68 | @Config("database") 69 | @ConfigDefault("null") 70 | Optional getDatabase(); 71 | 72 | @Config("collection") 73 | String getCollection(); 74 | 75 | @Config("fields") 76 | @ConfigDefault("null") 77 | Optional getFields(); 78 | 79 | @Config("projection") 80 | @ConfigDefault("\"{}\"") 81 | String getProjection(); 82 | 83 | @Config("query") 84 | @ConfigDefault("\"{}\"") 85 | String getQuery(); 86 | void setQuery(String query); 87 | 88 | @Config("aggregation") 89 | @ConfigDefault("null") 90 | Optional getAggregation(); 91 | 92 | @Config("sort") 93 | @ConfigDefault("\"{}\"") 94 | String getSort(); 95 | void setSort(String sort); 96 | 97 | @Config("limit") 98 | @ConfigDefault("null") 99 | Optional getLimit(); 100 | 101 | @Config("skip") 102 | @ConfigDefault("null") 103 | Optional getSkip(); 104 | 105 | @Config("id_field_name") 106 | @ConfigDefault("\"_id\"") 107 | String getIdFieldName(); 108 | 109 | @Config("batch_size") 110 | @ConfigDefault("10000") 111 | @Min(1) 112 | int getBatchSize(); 113 | 114 | @Config("stop_on_invalid_record") 115 | @ConfigDefault("false") 116 | boolean getStopOnInvalidRecord(); 117 | 118 | @Config("json_column_name") 119 | @ConfigDefault("\"record\"") 120 | String getJsonColumnName(); 121 | 122 | @Config("incremental_field") 123 | @ConfigDefault("null") 124 | Optional> getIncrementalField(); 125 | 126 | @Config("last_record") 127 | @ConfigDefault("null") 128 | Optional> getLastRecord(); 129 | } 130 | -------------------------------------------------------------------------------- /src/main/java/org/embulk/input/mongodb/ValueCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Kazuyuki Honda, and the Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.input.mongodb; 18 | 19 | import org.bson.BsonReader; 20 | import org.bson.BsonType; 21 | import org.bson.BsonWriter; 22 | import org.bson.codecs.Codec; 23 | import org.bson.codecs.DecoderContext; 24 | import org.bson.codecs.EncoderContext; 25 | import org.embulk.spi.DataException; 26 | import org.msgpack.value.Value; 27 | import org.slf4j.Logger; 28 | import org.slf4j.LoggerFactory; 29 | 30 | import java.text.SimpleDateFormat; 31 | import java.util.ArrayList; 32 | import java.util.Date; 33 | import java.util.HashMap; 34 | import java.util.LinkedHashMap; 35 | import java.util.List; 36 | import java.util.Map; 37 | import java.util.Optional; 38 | import java.util.TimeZone; 39 | import static org.msgpack.value.ValueFactory.newArray; 40 | import static org.msgpack.value.ValueFactory.newBinary; 41 | import static org.msgpack.value.ValueFactory.newBoolean; 42 | import static org.msgpack.value.ValueFactory.newFloat; 43 | import static org.msgpack.value.ValueFactory.newInteger; 44 | import static org.msgpack.value.ValueFactory.newMap; 45 | import static org.msgpack.value.ValueFactory.newNil; 46 | import static org.msgpack.value.ValueFactory.newString; 47 | 48 | public class ValueCodec implements Codec 49 | { 50 | private final SimpleDateFormat formatter; 51 | private final Logger log = LoggerFactory.getLogger(MongodbInputPlugin.class); 52 | private final boolean stopOnInvalidRecord; 53 | private final PluginTask task; 54 | private final Optional> incrementalField; 55 | private Map lastRecord; 56 | private long processedRecordCount = 0; 57 | private Map lastRecordType; 58 | 59 | public ValueCodec(boolean stopOnInvalidRecord, PluginTask task) 60 | { 61 | this.formatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", java.util.Locale.ENGLISH); 62 | formatter.setTimeZone(TimeZone.getTimeZone("UTC")); 63 | this.stopOnInvalidRecord = stopOnInvalidRecord; 64 | this.task = task; 65 | this.incrementalField = task.getIncrementalField(); 66 | this.lastRecord = new HashMap<>(); 67 | this.lastRecordType = new HashMap<>(); 68 | } 69 | 70 | @Override 71 | public void encode(final BsonWriter writer, final Value value, final EncoderContext encoderContext) 72 | { 73 | throw new UnsupportedOperationException(); 74 | } 75 | 76 | @Override 77 | public Value decode(final BsonReader reader, final DecoderContext decoderContext) 78 | { 79 | Map kvs = new LinkedHashMap<>(); 80 | 81 | reader.readStartDocument(); 82 | while (reader.readBsonType() != BsonType.END_OF_DOCUMENT) { 83 | String originalFieldName = reader.readName(); 84 | BsonType type = reader.getCurrentBsonType(); 85 | String fieldName = normalize(originalFieldName); 86 | 87 | Value value; 88 | try { 89 | value = readValue(reader, decoderContext); 90 | kvs.put(newString(fieldName), value); 91 | if (incrementalField.isPresent() && incrementalField.get().contains(originalFieldName)) { 92 | this.lastRecord.put(originalFieldName, value); 93 | this.lastRecordType.put(originalFieldName, type.toString()); 94 | } 95 | } 96 | catch (UnknownTypeFoundException ex) { 97 | reader.skipValue(); 98 | if (stopOnInvalidRecord) { 99 | throw ex; 100 | } 101 | log.warn(String.format("Skipped document because field '%s' contains unsupported object type [%s]", 102 | fieldName, type)); 103 | } 104 | this.processedRecordCount++; 105 | } 106 | reader.readEndDocument(); 107 | 108 | return newMap(kvs); 109 | } 110 | 111 | public Value decodeArray(final BsonReader reader, final DecoderContext decoderContext) 112 | { 113 | List list = new ArrayList<>(); 114 | 115 | reader.readStartArray(); 116 | while (reader.readBsonType() != BsonType.END_OF_DOCUMENT) { 117 | list.add(readValue(reader, decoderContext)); 118 | } 119 | reader.readEndArray(); 120 | 121 | return newArray(list); 122 | } 123 | 124 | private Value readValue(BsonReader reader, DecoderContext decoderContext) 125 | { 126 | switch (reader.getCurrentBsonType()) { 127 | // https://docs.mongodb.com/manual/reference/bson-types/ 128 | // https://github.com/mongodb/mongo-java-driver/tree/master/bson/src/main/org/bson/codecs 129 | case DOUBLE: 130 | return newFloat(reader.readDouble()); 131 | case STRING: 132 | return newString(reader.readString()); 133 | case ARRAY: 134 | return decodeArray(reader, decoderContext); 135 | case BINARY: 136 | return newBinary(reader.readBinaryData().getData(), true); 137 | case OBJECT_ID: 138 | return newString(reader.readObjectId().toString()); 139 | case BOOLEAN: 140 | return newBoolean(reader.readBoolean()); 141 | case DATE_TIME: 142 | return newString(formatter.format(new Date(reader.readDateTime()))); 143 | case NULL: 144 | reader.readNull(); 145 | return newNil(); 146 | case REGULAR_EXPRESSION: 147 | return newString(reader.readRegularExpression().toString()); 148 | case JAVASCRIPT: 149 | return newString(reader.readJavaScript()); 150 | case JAVASCRIPT_WITH_SCOPE: 151 | return newString(reader.readJavaScriptWithScope()); 152 | case INT32: 153 | return newInteger(reader.readInt32()); 154 | case TIMESTAMP: 155 | return newInteger(reader.readTimestamp().getTime()); 156 | case INT64: 157 | return newInteger(reader.readInt64()); 158 | case DOCUMENT: 159 | return decode(reader, decoderContext); 160 | case SYMBOL: 161 | return newString(reader.readSymbol()); 162 | default: // e.g. MIN_KEY, MAX_KEY, DB_POINTER, UNDEFINED 163 | throw new UnknownTypeFoundException(String.format("Unsupported type %s of '%s' field. Please exclude the field from 'projection:' option", 164 | reader.getCurrentBsonType(), reader.getCurrentName())); 165 | } 166 | } 167 | 168 | @Override 169 | public Class getEncoderClass() 170 | { 171 | return Value.class; 172 | } 173 | 174 | private String normalize(String key) 175 | { 176 | if (key.equals("_id")) { 177 | return task.getIdFieldName(); 178 | } 179 | return key; 180 | } 181 | 182 | public Map getLastRecord() 183 | { 184 | return this.lastRecord; 185 | } 186 | 187 | public Long getProcessedRecordCount() 188 | { 189 | return this.processedRecordCount; 190 | } 191 | 192 | public Map getLastRecordType() 193 | { 194 | return this.lastRecordType; 195 | } 196 | 197 | public static class UnknownTypeFoundException extends DataException 198 | { 199 | UnknownTypeFoundException(String message) 200 | { 201 | super(message); 202 | } 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/input/mongodb/Pages.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2020 The Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.input.mongodb; 18 | 19 | import java.util.ArrayList; 20 | import java.util.Collections; 21 | import java.util.Iterator; 22 | import java.util.List; 23 | import org.embulk.spi.Column; 24 | import org.embulk.spi.ColumnVisitor; 25 | import org.embulk.spi.Page; 26 | import org.embulk.spi.PageReader; 27 | import org.embulk.spi.Schema; 28 | 29 | class Pages 30 | { 31 | private Pages() 32 | { 33 | // No instantiation. 34 | } 35 | 36 | static List toObjects(Schema schema, Iterable pages) 37 | { 38 | return toObjects(schema, pages, false); 39 | } 40 | 41 | private static List toObjects(final Schema schema, final Iterable pages, final boolean useInstant) 42 | { 43 | final ArrayList builder = new ArrayList<>(); 44 | Iterator ite = pages.iterator(); 45 | try (PageReader reader = new PageReader(schema)) { 46 | while (ite.hasNext()) { 47 | reader.setPage(ite.next()); 48 | while (reader.nextRecord()) { 49 | builder.add(toObjects(reader, useInstant)); 50 | } 51 | } 52 | } 53 | return Collections.unmodifiableList(builder); 54 | } 55 | 56 | private static Object[] toObjects(final PageReader record, final boolean useInstant) 57 | { 58 | final Object[] values = new Object[record.getSchema().getColumns().size()]; 59 | record.getSchema().visitColumns(new ObjectColumnVisitor(record, useInstant) { 60 | @Override 61 | public void visit(Column column, Object object) 62 | { 63 | values[column.getIndex()] = object; 64 | } 65 | }); 66 | return values; 67 | } 68 | 69 | private static Object[] toObjects(final PageReader record) 70 | { 71 | return toObjects(record, false); 72 | } 73 | 74 | private abstract static class ObjectColumnVisitor implements ColumnVisitor 75 | { 76 | private final PageReader record; 77 | private final boolean useInstant; 78 | 79 | public ObjectColumnVisitor(final PageReader record, final boolean useInstant) 80 | { 81 | this.record = record; 82 | this.useInstant = useInstant; 83 | } 84 | 85 | public ObjectColumnVisitor(PageReader record) 86 | { 87 | this(record, false); 88 | } 89 | 90 | public abstract void visit(Column column, Object obj); 91 | 92 | @Override 93 | public void booleanColumn(Column column) 94 | { 95 | if (record.isNull(column)) { 96 | visit(column, null); 97 | } 98 | else { 99 | visit(column, record.getBoolean(column)); 100 | } 101 | } 102 | 103 | @Override 104 | public void longColumn(Column column) 105 | { 106 | if (record.isNull(column)) { 107 | visit(column, null); 108 | } 109 | else { 110 | visit(column, record.getLong(column)); 111 | } 112 | } 113 | 114 | @Override 115 | public void doubleColumn(Column column) 116 | { 117 | if (record.isNull(column)) { 118 | visit(column, null); 119 | } 120 | else { 121 | visit(column, record.getDouble(column)); 122 | } 123 | } 124 | 125 | @Override 126 | public void stringColumn(Column column) 127 | { 128 | if (record.isNull(column)) { 129 | visit(column, null); 130 | } 131 | else { 132 | visit(column, record.getString(column)); 133 | } 134 | } 135 | 136 | @Override 137 | @SuppressWarnings("deprecation") // https://github.com/embulk/embulk/issues/1292 138 | public void timestampColumn(Column column) 139 | { 140 | if (record.isNull(column)) { 141 | visit(column, null); 142 | } 143 | else { 144 | if (this.useInstant) { 145 | visit(column, record.getTimestampInstant(column)); 146 | } 147 | else { 148 | visit(column, record.getTimestamp(column)); 149 | } 150 | } 151 | } 152 | 153 | @Override 154 | public void jsonColumn(Column column) 155 | { 156 | if (record.isNull(column)) { 157 | visit(column, null); 158 | } 159 | else { 160 | visit(column, record.getJson(column)); 161 | } 162 | } 163 | } 164 | 165 | private static Object getObject(PageReader record, Column column) 166 | { 167 | GetObjectColumnVisitor visitor = new GetObjectColumnVisitor(record); 168 | column.visit(visitor); 169 | return visitor.get(); 170 | } 171 | 172 | private static class GetObjectColumnVisitor extends ObjectColumnVisitor 173 | { 174 | private Object object; 175 | 176 | public GetObjectColumnVisitor(PageReader record) 177 | { 178 | super(record); 179 | } 180 | 181 | public Object get() 182 | { 183 | return object; 184 | } 185 | 186 | public void visit(Column column, Object object) 187 | { 188 | this.object = object; 189 | } 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /src/test/java/org/embulk/input/mongodb/TestMongodbInputPlugin.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Kazuyuki Honda, and the Embulk project 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package org.embulk.input.mongodb; 18 | 19 | import com.fasterxml.jackson.databind.JsonNode; 20 | import com.fasterxml.jackson.databind.ObjectMapper; 21 | import com.google.common.collect.ImmutableList; 22 | import com.google.common.collect.ImmutableMap; 23 | import com.google.common.collect.Lists; 24 | import com.mongodb.MongoClientOptions; 25 | import com.mongodb.MongoClientURI; 26 | import com.mongodb.MongoCredential; 27 | import com.mongodb.client.MongoCollection; 28 | import com.mongodb.client.MongoDatabase; 29 | import org.bson.BsonBinary; 30 | import org.bson.BsonInt64; 31 | import org.bson.BsonJavaScript; 32 | import org.bson.BsonMaxKey; 33 | import org.bson.BsonRegularExpression; 34 | import org.bson.BsonTimestamp; 35 | import org.bson.Document; 36 | import org.bson.types.Symbol; 37 | import org.embulk.EmbulkTestRuntime; 38 | import org.embulk.config.ConfigDiff; 39 | import org.embulk.config.ConfigException; 40 | import org.embulk.config.ConfigSource; 41 | import org.embulk.config.TaskReport; 42 | import org.embulk.config.TaskSource; 43 | import org.embulk.spi.Column; 44 | import org.embulk.spi.InputPlugin; 45 | import org.embulk.spi.Schema; 46 | import org.embulk.spi.TestPageBuilderReader.MockPageOutput; 47 | import org.embulk.spi.type.Types; 48 | import org.embulk.util.config.ConfigMapperFactory; 49 | import org.junit.Before; 50 | import org.junit.Rule; 51 | import org.junit.Test; 52 | import org.junit.rules.ExpectedException; 53 | 54 | import java.lang.reflect.InvocationTargetException; 55 | import java.lang.reflect.Method; 56 | import java.text.DateFormat; 57 | import java.text.SimpleDateFormat; 58 | import java.util.ArrayList; 59 | import java.util.Arrays; 60 | import java.util.HashMap; 61 | import java.util.List; 62 | import java.util.Map; 63 | import java.util.Optional; 64 | import java.util.TimeZone; 65 | 66 | import static org.hamcrest.CoreMatchers.is; 67 | import static org.junit.Assert.assertEquals; 68 | import static org.junit.Assert.assertThat; 69 | 70 | public class TestMongodbInputPlugin 71 | { 72 | private static final ConfigMapperFactory CONFIG_MAPPER_FACTORY = ConfigMapperFactory.builder().addDefaultModules().build(); 73 | 74 | private final String mongoUri = "mongodb://mongo_user:dbpass@localhost:27017/mydb"; 75 | private final String mongoCollection = "my_collection"; 76 | 77 | @Rule 78 | public EmbulkTestRuntime runtime = new EmbulkTestRuntime(); 79 | 80 | @Rule 81 | public ExpectedException exception = ExpectedException.none(); 82 | 83 | private ConfigSource config; 84 | private MongodbInputPlugin plugin; 85 | private MockPageOutput output; 86 | 87 | @Before 88 | public void createResources() 89 | { 90 | config = config(); 91 | plugin = new MongodbInputPlugin(); 92 | output = new MockPageOutput(); 93 | } 94 | 95 | @Test 96 | public void checkDefaultValues() 97 | { 98 | final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 99 | .set("uri", mongoUri) 100 | .set("collection", mongoCollection); 101 | 102 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 103 | assertEquals("{}", task.getQuery()); 104 | assertEquals("{}", task.getSort()); 105 | assertEquals(Optional.empty(), task.getLimit()); 106 | assertEquals(Optional.empty(), task.getSkip()); 107 | assertEquals((long) 10000, (long) task.getBatchSize()); 108 | assertEquals("record", task.getJsonColumnName()); 109 | assertEquals(Optional.empty(), task.getIncrementalField()); 110 | assertEquals(Optional.empty(), task.getLastRecord()); 111 | } 112 | 113 | @Test(expected = ConfigException.class) 114 | public void checkDefaultValuesUriIsNull() 115 | { 116 | final ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 117 | .set("uri", null) 118 | .set("collection", mongoCollection); 119 | 120 | plugin.transaction(config, new Control()); 121 | } 122 | 123 | @Test(expected = ConfigException.class) 124 | public void checkDefaultValuesInvalidUri() 125 | { 126 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 127 | .set("uri", "mongodb://mongouser:password@non-exists.example.com:23490/test") 128 | .set("collection", mongoCollection); 129 | 130 | plugin.transaction(config, new Control()); 131 | } 132 | 133 | @Test(expected = ConfigException.class) 134 | public void checkDefaultValuesCollectionIsNull() 135 | { 136 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 137 | .set("uri", mongoUri) 138 | .set("collection", null); 139 | 140 | plugin.transaction(config, new Control()); 141 | } 142 | 143 | @Test(expected = ConfigException.class) 144 | public void checkSortCannotUseWithIncremental() 145 | { 146 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 147 | .set("uri", mongoUri) 148 | .set("collection", mongoCollection) 149 | .set("sort", "{ \"field1\": 1 }") 150 | .set("incremental_field", Optional.of(Arrays.asList("account"))); 151 | 152 | plugin.transaction(config, new Control()); 153 | } 154 | 155 | @Test(expected = ConfigException.class) 156 | public void checkSkipCannotUseWithIncremental() 157 | { 158 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 159 | .set("uri", mongoUri) 160 | .set("collection", mongoCollection) 161 | .set("skip", 10) 162 | .set("incremental_field", Optional.of(Arrays.asList("account"))); 163 | 164 | plugin.transaction(config, new Control()); 165 | } 166 | 167 | @Test(expected = ConfigException.class) 168 | public void checkInvalidQueryOption() 169 | { 170 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 171 | .set("uri", mongoUri) 172 | .set("collection", mongoCollection) 173 | .set("query", "{\"key\":invalid_value}") 174 | .set("last_record", 0) 175 | .set("incremental_field", Optional.of(Arrays.asList("account"))); 176 | 177 | plugin.transaction(config, new Control()); 178 | } 179 | 180 | @Test(expected = ConfigException.class) 181 | public void checkAggregationWithOtherOption() 182 | { 183 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 184 | .set("uri", mongoUri) 185 | .set("collection", mongoCollection) 186 | .set("query", "{\"key\":\"valid_value\"}") 187 | .set("aggregation", "{$match: { account: { $gt: 32864}}}") 188 | .set("incremental_field", Optional.of(Arrays.asList("account"))); 189 | 190 | plugin.transaction(config, new Control()); 191 | } 192 | 193 | @Test 194 | public void testCreateCredentialsSha1() throws Exception 195 | { 196 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map( 197 | configForAuth().deepCopy() 198 | .set("auth_method", "scram-sha-1") 199 | .set("database", "db"), 200 | PluginTask.class); 201 | 202 | Method createCredential = MongodbInputPlugin.class.getDeclaredMethod("createCredential", PluginTask.class); 203 | createCredential.setAccessible(true); 204 | MongoCredential credential = (MongoCredential) createCredential.invoke(plugin, task); 205 | assertThat("SCRAM-SHA-1", is(credential.getMechanism())); 206 | assertThat("db", is(credential.getSource())); 207 | } 208 | 209 | @Test 210 | public void testCreateCredentialsSha1WithAuthSource() throws Exception 211 | { 212 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map( 213 | configForAuth().deepCopy() 214 | .set("auth_method", "scram-sha-1") 215 | .set("database", "db") 216 | .set("auth_source", "authdb"), 217 | PluginTask.class); 218 | 219 | Method createCredential = MongodbInputPlugin.class.getDeclaredMethod("createCredential", PluginTask.class); 220 | createCredential.setAccessible(true); 221 | MongoCredential credential = (MongoCredential) createCredential.invoke(plugin, task); 222 | assertThat("SCRAM-SHA-1", is(credential.getMechanism())); 223 | assertThat("authdb", is(credential.getSource())); 224 | } 225 | 226 | @Test 227 | public void testCreateCredentialsCr() throws Exception 228 | { 229 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map( 230 | configForAuth().deepCopy() 231 | .set("auth_method", "mongodb-cr"), 232 | PluginTask.class); 233 | 234 | Method createCredential = MongodbInputPlugin.class.getDeclaredMethod("createCredential", PluginTask.class); 235 | createCredential.setAccessible(true); 236 | MongoCredential credential = (MongoCredential) createCredential.invoke(plugin, task); 237 | assertThat("MONGODB-CR", is(credential.getMechanism())); 238 | } 239 | 240 | @Test 241 | public void testCreateMongoClientOptionsTLSEnableWithInsecureEnable() throws Exception 242 | { 243 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map( 244 | configForAuth().deepCopy() 245 | .set("tls", "true") 246 | .set("tls_insecure", "true"), 247 | PluginTask.class); 248 | Method createMongoClientOptions = MongodbInputPlugin.class.getDeclaredMethod("createMongoClientOptions", PluginTask.class); 249 | createMongoClientOptions.setAccessible(true); 250 | MongoClientOptions mongoClientOptions = (MongoClientOptions) createMongoClientOptions.invoke(plugin, task); 251 | assertThat(true, is(mongoClientOptions.isSslEnabled())); 252 | assertThat(true, is(mongoClientOptions.isSslInvalidHostNameAllowed())); 253 | } 254 | 255 | @Test 256 | public void testCreateMongoClientOptionsTLSEnableWithInsecureDisable() throws Exception 257 | { 258 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map( 259 | configForAuth().deepCopy() 260 | .set("tls", "true"), 261 | PluginTask.class); 262 | Method createMongoClientOptions = MongodbInputPlugin.class.getDeclaredMethod("createMongoClientOptions", PluginTask.class); 263 | createMongoClientOptions.setAccessible(true); 264 | MongoClientOptions mongoClientOptions = (MongoClientOptions) createMongoClientOptions.invoke(plugin, task); 265 | assertThat(true, is(mongoClientOptions.isSslEnabled())); 266 | assertThat(false, is(mongoClientOptions.isSslInvalidHostNameAllowed())); 267 | } 268 | 269 | @Test 270 | public void testCreateMongoClientOptionsTLSDisable() throws Exception 271 | { 272 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map( 273 | configForAuth().deepCopy() 274 | .set("tls", "false"), 275 | PluginTask.class); 276 | Method createMongoClientOptions = MongodbInputPlugin.class.getDeclaredMethod("createMongoClientOptions", PluginTask.class); 277 | createMongoClientOptions.setAccessible(true); 278 | MongoClientOptions mongoClientOptions = (MongoClientOptions) createMongoClientOptions.invoke(plugin, task); 279 | assertThat(false, is(mongoClientOptions.isSslEnabled())); 280 | assertThat(false, is(mongoClientOptions.isSslInvalidHostNameAllowed())); 281 | } 282 | 283 | @Test 284 | public void testResume() 285 | { 286 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 287 | final Schema schema = getFieldSchema(); 288 | plugin.resume(task.toTaskSource(), schema, 0, new InputPlugin.Control() { 289 | @Override 290 | public List run(TaskSource taskSource, Schema schema, int taskCount) 291 | { 292 | return emptyTaskReports(taskCount); 293 | } 294 | }); 295 | // no errors happens 296 | } 297 | 298 | @Test 299 | public void testCleanup() 300 | { 301 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 302 | Schema schema = getFieldSchema(); 303 | plugin.cleanup(task.dump(), schema, 0, Lists.newArrayList()); // no errors happens 304 | } 305 | 306 | @Test 307 | public void testGuess() 308 | { 309 | plugin.guess(config); // no errors happens 310 | } 311 | 312 | @Test 313 | public void testRun() throws Exception 314 | { 315 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 316 | 317 | dropCollection(task, mongoCollection); 318 | createCollection(task, mongoCollection); 319 | insertDocument(task, createValidDocuments()); 320 | 321 | plugin.transaction(config, new Control()); 322 | assertValidRecords(getFieldSchema(), output); 323 | } 324 | 325 | @Test 326 | public void testRunWithLimit() throws Exception 327 | { 328 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 329 | .set("uri", mongoUri) 330 | .set("collection", mongoCollection) 331 | .set("limit", 1); 332 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 333 | 334 | dropCollection(task, mongoCollection); 335 | createCollection(task, mongoCollection); 336 | insertDocument(task, createValidDocuments()); 337 | 338 | plugin.transaction(config, new Control()); 339 | assertValidRecords(getFieldSchema(), output, 1, 0); 340 | } 341 | 342 | @Test 343 | public void testRunWithLimitSkip() throws Exception 344 | { 345 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 346 | .set("uri", mongoUri) 347 | .set("collection", mongoCollection) 348 | .set("limit", 3) 349 | .set("skip", 1); 350 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 351 | 352 | dropCollection(task, mongoCollection); 353 | createCollection(task, mongoCollection); 354 | insertDocument(task, createValidDocuments()); 355 | 356 | plugin.transaction(config, new Control()); 357 | assertValidRecords(getFieldSchema(), output, 3, 1); 358 | } 359 | 360 | @Test 361 | public void testRunWithConnectionParams() throws Exception 362 | { 363 | MongoClientURI uri = new MongoClientURI(mongoUri); 364 | String host = uri.getHosts().get(0); 365 | Integer port = (host.split(":")[1] != null) ? Integer.valueOf(host.split(":")[1]) : 27017; 366 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 367 | .set("hosts", Arrays.asList(ImmutableMap.of("host", host.split(":")[0], "port", port))) 368 | .set("user", uri.getUsername()) 369 | .set("password", uri.getPassword()) 370 | .set("database", uri.getDatabase()) 371 | .set("collection", mongoCollection); 372 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 373 | 374 | dropCollection(task, mongoCollection); 375 | createCollection(task, mongoCollection); 376 | insertDocument(task, createValidDocuments()); 377 | 378 | plugin.transaction(config, new Control()); 379 | assertValidRecords(getFieldSchema(), output); 380 | } 381 | 382 | @Test 383 | public void testRunWithIncrementalLoad() throws Exception 384 | { 385 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 386 | .set("uri", mongoUri) 387 | .set("collection", mongoCollection) 388 | .set("incremental_field", Optional.of(Arrays.asList("int32_field"))); 389 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 390 | 391 | dropCollection(task, mongoCollection); 392 | createCollection(task, mongoCollection); 393 | insertDocument(task, createValidDocuments()); 394 | 395 | ConfigDiff diff = plugin.transaction(config, new Control()); 396 | ConfigDiff lastRecord = diff.getNested("last_record"); 397 | 398 | assertEquals("5", lastRecord.get(String.class, "int32_field")); 399 | } 400 | 401 | @Test 402 | public void testRunWithLimitIncrementalLoad() throws Exception 403 | { 404 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 405 | .set("uri", mongoUri) 406 | .set("collection", mongoCollection) 407 | .set("id_field_name", "int32_field") 408 | .set("incremental_field", Optional.of(Arrays.asList("int32_field", "double_field", "datetime_field", "boolean_field"))) 409 | .set("limit", 1); 410 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 411 | 412 | dropCollection(task, mongoCollection); 413 | createCollection(task, mongoCollection); 414 | insertDocument(task, createValidDocuments()); 415 | 416 | ConfigDiff diff = plugin.transaction(config, new Control()); 417 | ConfigDiff lastRecord = diff.getNested("last_record"); 418 | 419 | assertEquals("1", lastRecord.get(String.class, "int32_field")); 420 | assertEquals("1.23", lastRecord.get(String.class, "double_field")); 421 | assertEquals("{$date=2015-01-27T10:23:49.000Z}", lastRecord.get(Map.class, "datetime_field").toString()); 422 | assertEquals("true", lastRecord.get(String.class, "boolean_field")); 423 | } 424 | 425 | @Test 426 | public void testRunWithLimitIncrementalLoadWithNoRecord() throws Exception 427 | { 428 | Map previousLastRecord = new HashMap<>(); 429 | previousLastRecord.put("int32_field", 1); 430 | previousLastRecord.put("datetime_field", "{$date=2015-01-27T10:23:49.000Z}"); 431 | previousLastRecord.put("boolean_field", true); 432 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 433 | .set("uri", mongoUri) 434 | .set("collection", mongoCollection) 435 | .set("id_field_name", "int32_field") 436 | .set("query", "{\"double_field\":{\"$gte\": 1.23}}") 437 | .set("incremental_field", Optional.of(Arrays.asList("int32_field", "datetime_field", "boolean_field"))) 438 | .set("last_record", previousLastRecord); 439 | 440 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 441 | 442 | dropCollection(task, mongoCollection); 443 | createCollection(task, mongoCollection); 444 | insertDocument(task, createValidDocuments()); 445 | 446 | ConfigDiff diff = plugin.transaction(config, new Control()); 447 | ConfigDiff lastRecord = diff.getNested("last_record"); 448 | 449 | assertEquals("1", lastRecord.get(String.class, "int32_field")); 450 | assertEquals("{$date=2015-01-27T10:23:49.000Z}", lastRecord.get(String.class, "datetime_field")); 451 | assertEquals("true", lastRecord.get(String.class, "boolean_field")); 452 | } 453 | 454 | @Test(expected = ConfigException.class) 455 | public void testRunWithIncrementalLoadUnsupportedType() throws Exception 456 | { 457 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 458 | .set("uri", mongoUri) 459 | .set("collection", mongoCollection) 460 | .set("incremental_field", Optional.of(Arrays.asList("document_field"))); 461 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 462 | 463 | dropCollection(task, mongoCollection); 464 | createCollection(task, mongoCollection); 465 | insertDocument(task, createValidDocuments()); 466 | 467 | plugin.transaction(config, new Control()); 468 | } 469 | 470 | @Test(expected = ValueCodec.UnknownTypeFoundException.class) 471 | public void testRunWithUnsupportedType() throws Exception 472 | { 473 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 474 | .set("uri", mongoUri) 475 | .set("collection", mongoCollection) 476 | .set("stop_on_invalid_record", true); 477 | 478 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 479 | 480 | dropCollection(task, mongoCollection); 481 | createCollection(task, mongoCollection); 482 | 483 | List documents = new ArrayList<>(); 484 | documents.add( 485 | new Document("invalid_field", new BsonMaxKey()) 486 | ); 487 | insertDocument(task, documents); 488 | 489 | plugin.transaction(config, new Control()); 490 | } 491 | 492 | @Test 493 | public void testRunWithAggregation() throws Exception 494 | { 495 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 496 | .set("uri", mongoUri) 497 | .set("collection", mongoCollection) 498 | .set("id_field_name", "int32_field") 499 | .set("aggregation", "{ $match: {\"int32_field\":{\"$gte\":5 },} }"); 500 | 501 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 502 | 503 | dropCollection(task, mongoCollection); 504 | createCollection(task, mongoCollection); 505 | insertDocument(task, createValidDocuments()); 506 | 507 | plugin.transaction(config, new Control()); 508 | assertValidRecordsForAggregation(getFieldSchema(), output); 509 | } 510 | 511 | @Test 512 | public void testNormalize() throws Exception 513 | { 514 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 515 | ValueCodec codec = new ValueCodec(true, task); 516 | 517 | Method normalize = ValueCodec.class.getDeclaredMethod("normalize", String.class); 518 | normalize.setAccessible(true); 519 | assertEquals("_id", normalize.invoke(codec, "_id").toString()); 520 | assertEquals("f1", normalize.invoke(codec, "f1").toString()); 521 | } 522 | 523 | @Test 524 | public void testNormlizeWithIdFieldName() throws Exception 525 | { 526 | ConfigSource config = config().set("id_field_name", "object_id"); 527 | 528 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 529 | ValueCodec codec = new ValueCodec(true, task); 530 | 531 | Method normalize = ValueCodec.class.getDeclaredMethod("normalize", String.class); 532 | normalize.setAccessible(true); 533 | assertEquals("object_id", normalize.invoke(codec, "_id").toString()); 534 | assertEquals("f1", normalize.invoke(codec, "f1").toString()); 535 | } 536 | 537 | @Test 538 | public void testValidateJsonField() throws Exception 539 | { 540 | Method validate = MongodbInputPlugin.class.getDeclaredMethod("validateJsonField", String.class, String.class); 541 | validate.setAccessible(true); 542 | String invalidJsonString = "{\"name\": invalid}"; 543 | try { 544 | validate.invoke(plugin, "name", invalidJsonString); 545 | } 546 | catch (InvocationTargetException ex) { 547 | assertEquals(ConfigException.class, ex.getCause().getClass()); 548 | } 549 | } 550 | 551 | @Test 552 | @SuppressWarnings("unchecked") 553 | public void testBuildIncrementalCondition() throws Exception 554 | { 555 | PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 556 | dropCollection(task, mongoCollection); 557 | createCollection(task, mongoCollection); 558 | insertDocument(task, createValidDocuments()); 559 | 560 | Method method = MongodbInputPlugin.class.getDeclaredMethod("buildIncrementalCondition", PluginTask.class); 561 | method.setAccessible(true); 562 | 563 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 564 | .set("uri", mongoUri) 565 | .set("collection", mongoCollection) 566 | .set("incremental_field", Optional.of(Arrays.asList("account"))); 567 | task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 568 | Map actual = (Map) method.invoke(plugin, task); 569 | Map expected = new HashMap<>(); 570 | expected.put("query", "{}"); 571 | expected.put("sort", "{\"account\":1}"); 572 | assertEquals(expected, actual); 573 | 574 | Map lastRecord = new HashMap<>(); 575 | Map innerRecord = new HashMap<>(); 576 | innerRecord.put("$oid", "abc"); 577 | lastRecord.put("_id", innerRecord); 578 | lastRecord.put("int32_field", 15000); 579 | innerRecord = new HashMap<>(); 580 | innerRecord.put("$date", "2015-01-27T19:23:49Z"); 581 | lastRecord.put("datetime_field", innerRecord); 582 | config = CONFIG_MAPPER_FACTORY.newConfigSource() 583 | .set("uri", mongoUri) 584 | .set("collection", mongoCollection) 585 | .set("query", "{\"double_field\":{\"$gte\": 1.23}}") 586 | .set("incremental_field", Optional.of(Arrays.asList("_id", "int32_field", "datetime_field"))) 587 | .set("last_record", Optional.of(lastRecord)); 588 | task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 589 | actual = (Map) method.invoke(plugin, task); 590 | expected.put("query", "{\"double_field\":{\"$gte\":1.23},\"int32_field\":{\"$gt\":15000},\"_id\":{\"$gt\":{\"$oid\":\"abc\"}},\"datetime_field\":{\"$gt\":{\"$date\":\"2015-01-27T19:23:49Z\"}}}"); 591 | expected.put("sort", "{\"_id\":1,\"int32_field\":1,\"datetime_field\":1}"); 592 | assertEquals(expected, actual); 593 | } 594 | 595 | @Test 596 | public void testBuildIncrementalConditionFieldDuplicated() throws Exception 597 | { 598 | Map lastRecord = new HashMap<>(); 599 | lastRecord.put("double_field", "0"); 600 | 601 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 602 | .set("uri", mongoUri) 603 | .set("collection", mongoCollection) 604 | .set("query", "{\"double_field\":{\"$gte\": 1.23}}") 605 | .set("incremental_field", Optional.of(Arrays.asList("double_field"))) 606 | .set("last_record", Optional.of(lastRecord)); 607 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 608 | dropCollection(task, mongoCollection); 609 | createCollection(task, mongoCollection); 610 | insertDocument(task, createValidDocuments()); 611 | 612 | Method method = MongodbInputPlugin.class.getDeclaredMethod("buildIncrementalCondition", PluginTask.class); 613 | method.setAccessible(true); 614 | try { 615 | method.invoke(plugin, task); // field declaration was duplicated between query and incremental_field 616 | } 617 | catch (Exception ex) { 618 | assertEquals(ConfigException.class, ex.getCause().getClass()); 619 | } 620 | } 621 | 622 | @Test 623 | public void testBuildIncrementalConditionFieldRequired() throws Exception 624 | { 625 | Map lastRecord = new HashMap<>(); 626 | lastRecord.put("double_field", "0"); 627 | 628 | ConfigSource config = CONFIG_MAPPER_FACTORY.newConfigSource() 629 | .set("uri", mongoUri) 630 | .set("collection", mongoCollection) 631 | .set("incremental_field", Optional.of(Arrays.asList("invalid_field"))) 632 | .set("last_record", Optional.of(lastRecord)); 633 | final PluginTask task = CONFIG_MAPPER_FACTORY.createConfigMapper().map(config, PluginTask.class); 634 | dropCollection(task, mongoCollection); 635 | createCollection(task, mongoCollection); 636 | 637 | Method method = MongodbInputPlugin.class.getDeclaredMethod("buildIncrementalCondition", PluginTask.class); 638 | method.setAccessible(true); 639 | try { 640 | method.invoke(plugin, task); // field declaration was not set at incremental_field 641 | } 642 | catch (Exception ex) { 643 | assertEquals(ConfigException.class, ex.getCause().getClass()); 644 | } 645 | } 646 | 647 | static List emptyTaskReports(int taskCount) 648 | { 649 | ImmutableList.Builder reports = new ImmutableList.Builder<>(); 650 | for (int i = 0; i < taskCount; i++) { 651 | reports.add(CONFIG_MAPPER_FACTORY.newTaskReport()); 652 | } 653 | return reports.build(); 654 | } 655 | 656 | private class Control 657 | implements InputPlugin.Control 658 | { 659 | @Override 660 | public List run(TaskSource taskSource, Schema schema, int taskCount) 661 | { 662 | List reports = new ArrayList<>(); 663 | for (int i = 0; i < taskCount; i++) { 664 | reports.add(plugin.run(taskSource, schema, i, output)); 665 | } 666 | return reports; 667 | } 668 | } 669 | 670 | private ConfigSource config() 671 | { 672 | return CONFIG_MAPPER_FACTORY.newConfigSource() 673 | .set("uri", mongoUri) 674 | .set("collection", mongoCollection); 675 | } 676 | 677 | private ConfigSource configForAuth() 678 | { 679 | return CONFIG_MAPPER_FACTORY.newConfigSource() 680 | .set("database", "db") 681 | .set("collection", mongoCollection) 682 | .set("user", "abcde") 683 | .set("password", "passw0rd"); 684 | } 685 | 686 | private List createValidDocuments() throws Exception 687 | { 688 | DateFormat format = getUTCDateFormat(); 689 | 690 | List documents = new ArrayList<>(); 691 | documents.add( 692 | new Document("double_field", 1.23) 693 | .append("string_field", "embulk") 694 | .append("array_field", Arrays.asList(1, 2, 3)) 695 | .append("binary_field", new BsonBinary(("test").getBytes("UTF-8"))) 696 | .append("boolean_field", true) 697 | .append("datetime_field", format.parse("2015-01-27T10:23:49.000Z")) 698 | .append("null_field", null) 699 | .append("regex_field", new BsonRegularExpression(".+?")) 700 | .append("javascript_field", new BsonJavaScript("var s = \"javascript\";")) 701 | .append("int32_field", 1) 702 | .append("timestamp_field", new BsonTimestamp(1463991177, 4)) 703 | .append("int64_field", new BsonInt64(314159265)) 704 | .append("document_field", new Document("k", true)) 705 | .append("symbol_field", new Symbol("symbol")) 706 | ); 707 | 708 | documents.add( 709 | new Document("boolean_field", false) 710 | .append("int32_field", 2) 711 | .append("document_field", new Document("k", 1)) 712 | ); 713 | 714 | documents.add( 715 | new Document("int32_field", 3) 716 | .append("document_field", new Document("k", 1.23)) 717 | ); 718 | 719 | documents.add( 720 | new Document("int32_field", 4) 721 | .append("document_field", new Document("k", "v")) 722 | ); 723 | 724 | documents.add( 725 | new Document("int32_field", 5) 726 | .append("document_field", new Document("k", format.parse("2015-02-02T23:13:45.000Z"))) 727 | ); 728 | 729 | return documents; 730 | } 731 | 732 | private Schema getFieldSchema() 733 | { 734 | ImmutableList.Builder columns = ImmutableList.builder(); 735 | columns.add(new Column(0, "record", Types.JSON)); 736 | return new Schema(columns.build()); 737 | } 738 | 739 | private void assertValidRecords(Schema schema, MockPageOutput output) throws Exception 740 | { 741 | assertValidRecords(schema, output, 5, 0); 742 | } 743 | 744 | private void assertValidRecordsForAggregation(Schema schema, MockPageOutput output) throws Exception 745 | { 746 | assertValidRecords(schema, output, 1, 4); 747 | } 748 | 749 | private void assertValidRecords(Schema schema, MockPageOutput output, int limit, int skip) throws Exception 750 | { 751 | int maxRecordSize = 5; 752 | int actualRecordSize = Math.min(maxRecordSize - skip, limit); 753 | List records = Pages.toObjects(schema, output.pages); 754 | assertEquals(actualRecordSize, records.size()); 755 | 756 | ObjectMapper mapper = new ObjectMapper(); 757 | mapper.setDateFormat(getUTCDateFormat()); 758 | 759 | int recordIndex = 0; 760 | for (int i = skip; i < actualRecordSize; i++) { 761 | if (i == 0) { 762 | JsonNode node = mapper.readTree(records.get(recordIndex)[0].toString()); 763 | assertThat(1.23, is(node.get("double_field").asDouble())); 764 | assertEquals("embulk", node.get("string_field").asText()); 765 | assertEquals("[1,2,3]", node.get("array_field").toString()); 766 | assertEquals("test", node.get("binary_field").asText()); 767 | assertEquals(true, node.get("boolean_field").asBoolean()); 768 | assertEquals("2015-01-27T10:23:49.000Z", node.get("datetime_field").asText()); 769 | assertEquals("null", node.get("null_field").asText()); 770 | assertEquals("BsonRegularExpression{pattern='.+?', options=''}", node.get("regex_field").asText()); 771 | assertEquals("var s = \"javascript\";", node.get("javascript_field").asText()); 772 | assertEquals(1, node.get("int32_field").asLong()); 773 | assertEquals("1463991177", node.get("timestamp_field").asText()); 774 | assertEquals(314159265L, node.get("int64_field").asLong()); 775 | assertEquals("{\"k\":true}", node.get("document_field").toString()); 776 | assertEquals("symbol", node.get("symbol_field").asText()); 777 | } 778 | 779 | if (i == 1) { 780 | JsonNode node = mapper.readTree(records.get(recordIndex)[0].toString()); 781 | assertEquals(false, node.get("boolean_field").asBoolean()); 782 | assertEquals("{\"k\":1}", node.get("document_field").toString()); 783 | } 784 | 785 | if (i == 2) { 786 | JsonNode node = mapper.readTree(records.get(recordIndex)[0].toString()); 787 | assertEquals("{\"k\":1.23}", node.get("document_field").toString()); 788 | } 789 | 790 | if (i == 3) { 791 | JsonNode node = mapper.readTree(records.get(recordIndex)[0].toString()); 792 | assertEquals("{\"k\":\"v\"}", node.get("document_field").toString()); 793 | } 794 | 795 | if (i == 4) { 796 | JsonNode node = mapper.readTree(records.get(recordIndex)[0].toString()); 797 | assertEquals("{\"k\":\"2015-02-02T23:13:45.000Z\"}", node.get("document_field").toString()); 798 | } 799 | recordIndex++; 800 | } 801 | } 802 | 803 | private void createCollection(PluginTask task, String collectionName) throws Exception 804 | { 805 | Method method = MongodbInputPlugin.class.getDeclaredMethod("connect", PluginTask.class); 806 | method.setAccessible(true); 807 | MongoDatabase db = (MongoDatabase) method.invoke(plugin, task); 808 | db.createCollection(collectionName); 809 | } 810 | 811 | private void dropCollection(PluginTask task, String collectionName) throws Exception 812 | { 813 | Method method = MongodbInputPlugin.class.getDeclaredMethod("connect", PluginTask.class); 814 | method.setAccessible(true); 815 | MongoDatabase db = (MongoDatabase) method.invoke(plugin, task); 816 | MongoCollection collection = db.getCollection(collectionName); 817 | collection.drop(); 818 | } 819 | 820 | private void insertDocument(PluginTask task, List documents) throws Exception 821 | { 822 | Method method = MongodbInputPlugin.class.getDeclaredMethod("connect", PluginTask.class); 823 | method.setAccessible(true); 824 | MongoDatabase db = (MongoDatabase) method.invoke(plugin, task); 825 | MongoCollection collection = db.getCollection(task.getCollection()); 826 | collection.insertMany(documents); 827 | } 828 | 829 | private DateFormat getUTCDateFormat() 830 | { 831 | DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", java.util.Locale.ENGLISH); 832 | dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); 833 | return dateFormat; 834 | } 835 | } 836 | -------------------------------------------------------------------------------- /src/test/resources/basic.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: mongodb 3 | uri: mongodb://mongo_user:dbpass@localhost:27017/mydb 4 | collection: "my_collection" 5 | projection: '{ "_id": 0, "name": 1, "rank": 1 }' 6 | sort: '{ rank: 1 }' 7 | out: 8 | type: file 9 | path_prefix: ./tmp/basic 10 | file_ext: csv 11 | formatter: 12 | type: csv 13 | header_line: true 14 | charset: UTF-8 15 | newline: CRLF 16 | -------------------------------------------------------------------------------- /src/test/resources/basic_expected.csv: -------------------------------------------------------------------------------- 1 | record 2 | "{""name"":""obj1"",""rank"":1}" 3 | "{""name"":""obj2"",""rank"":2}" 4 | "{""name"":""obj3"",""rank"":3}" 5 | "{""name"":""obj4"",""rank"":4}" 6 | "{""name"":""obj5"",""rank"":5}" 7 | "{""name"":""obj6"",""rank"":6}" 8 | "{""name"":""obj7"",""rank"":7}" 9 | "{""name"":""obj8"",""rank"":8}" 10 | "{""name"":""obj9"",""rank"":9}" 11 | -------------------------------------------------------------------------------- /src/test/resources/full.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: mongodb 3 | uri: mongodb://mongo_user:dbpass@localhost:27017/mydb 4 | collection: "my_collection" 5 | json_column_name: "json" 6 | query: '{ rank: { $gte: 3 } }' 7 | sort: '{ rank: -1 }' 8 | batch_size: 100 9 | out: 10 | type: file 11 | path_prefix: ./tmp/full 12 | file_ext: csv 13 | formatter: 14 | type: csv 15 | header_line: true 16 | charset: UTF-8 17 | newline: CRLF -------------------------------------------------------------------------------- /src/test/resources/full_expected.csv: -------------------------------------------------------------------------------- 1 | json 2 | "{""_id"":""55eae883689a08361045d652"",""name"":""obj9"",""rank"":9,""value"":9.9,""created_at"":""2015-09-06T10:05:18.786Z"",""embeded"":{""key"":""value9""}}" 3 | "{""_id"":""55eae883689a08361045d651"",""name"":""obj8"",""rank"":8,""value"":8.8,""created_at"":""2015-09-06T10:05:28.786Z"",""embeded"":{""key"":""value8""}}" 4 | "{""_id"":""55eae883689a08361045d650"",""name"":""obj7"",""rank"":7,""value"":7.7,""created_at"":""2015-09-06T10:05:38.786Z"",""embeded"":{""key"":""value7""}}" 5 | "{""_id"":""55eae883689a08361045d64f"",""name"":""obj6"",""rank"":6,""value"":6.6,""created_at"":""2015-09-06T10:05:48.786Z"",""embeded"":{""key"":""value6""}}" 6 | "{""_id"":""55eae883689a08361045d64e"",""name"":""obj5"",""rank"":5,""value"":5.5,""created_at"":""2015-09-06T10:05:58.786Z"",""embeded"":{""key"":""value5""}}" 7 | "{""_id"":""55eae883689a08361045d64d"",""name"":""obj4"",""rank"":4,""value"":4.4,""created_at"":""2015-09-06T10:06:08.786Z"",""embeded"":{""key"":{""inner_key"":""value4""}}}" 8 | "{""_id"":""55eae883689a08361045d64c"",""name"":""obj3"",""rank"":3,""value"":3.3,""created_at"":""2015-09-06T10:06:18.786Z"",""embeded"":{""key"":[""v3-1"",""v3-2""]}}" 9 | -------------------------------------------------------------------------------- /src/test/resources/id_field_name.yml: -------------------------------------------------------------------------------- 1 | in: 2 | type: mongodb 3 | uri: mongodb://mongo_user:dbpass@localhost:27017/mydb 4 | collection: "my_collection" 5 | json_column_name: "json" 6 | query: '{ rank: { $gte: 3 } }' 7 | sort: '{ rank: -1 }' 8 | id_field_name: "object_id" 9 | batch_size: 100 10 | out: 11 | type: file 12 | path_prefix: ./tmp/id_field_name 13 | file_ext: csv 14 | formatter: 15 | type: csv 16 | header_line: true 17 | charset: UTF-8 18 | newline: CRLF 19 | -------------------------------------------------------------------------------- /src/test/resources/id_field_name_expected.csv: -------------------------------------------------------------------------------- 1 | json 2 | "{""object_id"":""55eae883689a08361045d652"",""name"":""obj9"",""rank"":9,""value"":9.9,""created_at"":""2015-09-06T10:05:18.786Z"",""embeded"":{""key"":""value9""}}" 3 | "{""object_id"":""55eae883689a08361045d651"",""name"":""obj8"",""rank"":8,""value"":8.8,""created_at"":""2015-09-06T10:05:28.786Z"",""embeded"":{""key"":""value8""}}" 4 | "{""object_id"":""55eae883689a08361045d650"",""name"":""obj7"",""rank"":7,""value"":7.7,""created_at"":""2015-09-06T10:05:38.786Z"",""embeded"":{""key"":""value7""}}" 5 | "{""object_id"":""55eae883689a08361045d64f"",""name"":""obj6"",""rank"":6,""value"":6.6,""created_at"":""2015-09-06T10:05:48.786Z"",""embeded"":{""key"":""value6""}}" 6 | "{""object_id"":""55eae883689a08361045d64e"",""name"":""obj5"",""rank"":5,""value"":5.5,""created_at"":""2015-09-06T10:05:58.786Z"",""embeded"":{""key"":""value5""}}" 7 | "{""object_id"":""55eae883689a08361045d64d"",""name"":""obj4"",""rank"":4,""value"":4.4,""created_at"":""2015-09-06T10:06:08.786Z"",""embeded"":{""key"":{""inner_key"":""value4""}}}" 8 | "{""object_id"":""55eae883689a08361045d64c"",""name"":""obj3"",""rank"":3,""value"":3.3,""created_at"":""2015-09-06T10:06:18.786Z"",""embeded"":{""key"":[""v3-1"",""v3-2""]}}" 9 | -------------------------------------------------------------------------------- /src/test/resources/my_collection.jsonl: -------------------------------------------------------------------------------- 1 | { "_id": "55eae883689a08361045d64a", "name": "obj1", "rank": 1, "value": 1.1, "created_at": { "$date" : 1441533998786 }, "embeded": { "key": "value1" } } 2 | { "_id": "55eae883689a08361045d64b", "name": "obj2", "rank": 2, "value": 2.2, "created_at": { "$date" : 1441533988786 }, "embeded": { "key": "value2" } } 3 | { "_id": "55eae883689a08361045d64c", "name": "obj3", "rank": 3, "value": 3.3, "created_at": { "$date" : 1441533978786 }, "embeded": { "key": ["v3-1", "v3-2"]} } 4 | { "_id": "55eae883689a08361045d64d", "name": "obj4", "rank": 4, "value": 4.4, "created_at": { "$date" : 1441533968786 }, "embeded": { "key": { "inner_key": "value4" } } } 5 | { "_id": "55eae883689a08361045d64e", "name": "obj5", "rank": 5, "value": 5.5, "created_at": { "$date" : 1441533958786 }, "embeded": { "key": "value5" } } 6 | { "_id": "55eae883689a08361045d64f", "name": "obj6", "rank": 6, "value": 6.6, "created_at": { "$date" : 1441533948786 }, "embeded": { "key": "value6" } } 7 | { "_id": "55eae883689a08361045d650", "name": "obj7", "rank": 7, "value": 7.7, "created_at": { "$date" : 1441533938786 }, "embeded": { "key": "value7" } } 8 | { "_id": "55eae883689a08361045d651", "name": "obj8", "rank": 8, "value": 8.8, "created_at": { "$date" : 1441533928786 }, "embeded": { "key": "value8" } } 9 | { "_id": "55eae883689a08361045d652", "name": "obj9", "rank": 9, "value": 9.9, "created_at": { "$date" : 1441533918786 }, "embeded": { "key": "value9" } } 10 | --------------------------------------------------------------------------------