├── .gitignore ├── ChangeLog.md ├── LICENSE.md ├── README.md ├── build.gradle ├── sample ├── cassandra-schema.cql ├── load.sh └── titanic.csv └── src ├── main ├── java │ └── com │ │ └── datastax │ │ └── loader │ │ ├── AbstractDynamicRateLimiter.java │ │ ├── CqlDelimLoad.java │ │ ├── CqlDelimLoadTask.java │ │ ├── CqlDelimParser.java │ │ ├── CqlDelimUnload.java │ │ ├── EnhancedSession.java │ │ ├── Latency999RateLimiter.java │ │ ├── LoaderRetryPolicy.java │ │ ├── RateLimitedSession.java │ │ ├── RateLimiter.java │ │ ├── futures │ │ ├── AbstractFutureManager.java │ │ ├── ActionFutureList.java │ │ ├── ActionFutureSet.java │ │ ├── FutureAction.java │ │ ├── FutureManager.java │ │ ├── JsonPrintingFutureAction.java │ │ ├── JsonPrintingFutureList.java │ │ ├── JsonPrintingFutureSet.java │ │ ├── NullFutureAction.java │ │ ├── PrintingFutureAction.java │ │ ├── PrintingFutureList.java │ │ └── PrintingFutureSet.java │ │ └── parser │ │ ├── AbstractParser.java │ │ ├── BigDecimalParser.java │ │ ├── BigIntegerParser.java │ │ ├── BooleanParser.java │ │ ├── ByteBufferParser.java │ │ ├── ByteParser.java │ │ ├── DateParser.java │ │ ├── DelimParser.java │ │ ├── DoubleParser.java │ │ ├── FloatParser.java │ │ ├── IndexedLine.java │ │ ├── InetAddressParser.java │ │ ├── IntegerParser.java │ │ ├── ListParser.java │ │ ├── LocalDateParser.java │ │ ├── LongParser.java │ │ ├── MapParser.java │ │ ├── NumberParser.java │ │ ├── Parser.java │ │ ├── SetParser.java │ │ ├── ShortParser.java │ │ ├── StringParser.java │ │ └── UUIDParser.java └── resources │ └── logback.xml └── make ├── buildit.sh ├── cassandra-loader.sh └── unloader.sh /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | .gradle/ 3 | *~ 4 | *.jar 5 | cassandra-loader 6 | *.BADINSERT 7 | *.BADPARSE 8 | *.LOG 9 | -------------------------------------------------------------------------------- /ChangeLog.md: -------------------------------------------------------------------------------- 1 | ## 0.0.27 2 | - Added support for specifying a TTL (Issue 67) 3 | 4 | ## 0.0.26 5 | - Fixed issue with long schemas (Issue 65) 6 | 7 | ## 0.0.25 8 | - Added support for Fetch Size in cassandra-unloader 9 | 10 | ## 0.0.24 11 | - Added support for DATE (and associated -localDateFormat option) 12 | - Added support for SHORTINT and TINYINT 13 | - Fixed an issue with special characters in column names (Issue 59) 14 | 15 | ## 0.0.23 16 | - Fixed case of commas in Map keys 17 | - Fixed keystore/truststore-pw typo (was pwd) 18 | - Fixed quoting of non-collections 19 | - Added support for supplying comment character 20 | 21 | ## 0.0.22 22 | - Added support for gzipped input files 23 | - Fixed issue with quoted values (and in collections) 24 | 25 | ## 0.0.21 26 | - Added support for jsonarray (one JSON array per file) 27 | - Added support for jsonline (one JSON per line) 28 | - Changed JVM settings to 1GB heap 29 | - Added -charsPerColumn (default to 4096) to enable Univocity optimizations 30 | - Removed explicit queries to metadata tables and use driver API calls 31 | 32 | ## 0.0.20 33 | - Fixed delimiter in MapParser 34 | - Catch NULLs in Map/Set/List parsing and throw in BADPARSE 35 | 36 | ## 0.0.19 37 | - Converted to parse with Univocity CSV parser 38 | - Added -where 39 | 40 | ## 0.0.18 41 | - Support for Cassandra 3.0 42 | 43 | ## 0.0.17 44 | - Fixed null collection issue / NPE (Issue 8) 45 | 46 | ## 0.0.16 47 | - Unloader will quote collections (which the loader expects) 48 | - Fixed collection issue (Issue 14) 49 | - BLOBs are now Base64 encoded on unload, and should be Base64 to load (Issue 15) 50 | - Support for quoted keyspace, table, and column names 51 | 52 | ## 0.0.15 53 | - Better error handling for case when C* inserts are failing 54 | 55 | ## 0.0.14 56 | - Updated cassandra-unloader to add support for collections, 57 | consistency level, ssl, etc 58 | 59 | ## 0.0.13 60 | - Added configFile 61 | - added ssl options (with truststore and keystore) 62 | 63 | ## 0.0.12 64 | - Added a rateFile to output CSV rate statistics 65 | - added -skipCols to skip input columns 66 | 67 | ## 0.0.11 68 | - Added support for quoted Keyspaces, Tables, and Columns 69 | 70 | ## 0.0.10 71 | - You want collections? You got 'em 72 | - Added progress reporting - you can specify the rate at which 73 | the rate is reported via the -progressRate option 74 | - Refactored RateLimiting - added it to a new RateLimitingSession 75 | - Laid groundwork for Dynamic rate limiting - to be worked out 76 | once we find a way to collect the right statistic 77 | 78 | ## 0.0.9 79 | - Added -successDir and -failureDir 80 | - Added return codes for the loader and unloader 81 | - Refactored BoolStyle 82 | - Cleaned up the readme a bit 83 | 84 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2016, DataStax 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # CASSANDRA-LOADER 3 | 4 | ## Introduction 5 | 6 | cassandra-loader is a general-purpose, delimited-file, bulk loader for 7 | Cassandra. It supports a number of configuration options to enable bulk 8 | loading of various types of delimited files, including 9 | * comma-separated values 10 | * tab-separated values 11 | * customer delimiter-separated values 12 | * header row 13 | * comma as decimal separator 14 | * ... 15 | 16 | ## Getting it 17 | 18 | ### Downloading 19 | 20 | This utility has already been built, and is available at 21 | https://github.com/brianmhess/cassandra-loader/releases/download/v0.0.27/cassandra-loader 22 | 23 | Get it with wget: 24 | 25 | ``` 26 | wget https://github.com/brianmhess/cassandra-loader/releases/download/v0.0.27/cassandra-loader 27 | ``` 28 | 29 | ### Building 30 | 31 | To build this repository, simply clone this repo and run: 32 | 33 | ``` 34 | gradle loader 35 | ``` 36 | 37 | All of the dependencies are included (namely, the Java driver - currently 38 | version 3.0.0). The output will be the cassandra-loader executable 39 | in the build directory. There will also be an jar with all of the 40 | dependencies included in the build/libs/cassandra-loader-uber-.jar 41 | 42 | ## Documentation 43 | 44 | To extract this README document, simply run (on the cassandra-loader 45 | executable - (e.g., on build/cassandra-loader): 46 | 47 | ``` 48 | jar xf cassandra-loader README.md 49 | ``` 50 | 51 | ## Run 52 | 53 | To run cassandra-loader, simply run the cassandra-loader executable 54 | (e.g., located at build/cassandra-loader): 55 | 56 | ``` 57 | cassandra-loader 58 | ``` 59 | 60 | If you built this with gradle, you can also run: 61 | 62 | ``` 63 | gradle run 64 | ``` 65 | 66 | This will print the usage statement. 67 | 68 | The following will load the `myFileToLoad.csv` file into the Cassandra 69 | cluster at IP address 1.2.3.4 into the `test.ltest` column family where 70 | the myFileToLoad file has the format of 4 columns - and it gets the 71 | data type information from the database - and using the default options: 72 | 73 | ``` 74 | cassandra-loader -f myFileToLoad.csv -host 1.2.3.4 -schema "test.ltest(a, b, c, d)" 75 | ``` 76 | 77 | ## Options: 78 | 79 | Switch | Option | Default | Description 80 | -----------------:|-------------------:|---------------------------:|:---------- 81 | `-configFile` | Filename | none | Filename of configuration options 82 | `-f` | Filename | <REQUIRED> | Filename to load - required. 83 | `-host` | IP Address | <REQUIRED> | Cassandra connection point - required. 84 | `-format` | Input format | delim | Format of the data. Options are "delim" or "json". 85 | `-schema` | CQL schema | | Schema of input data - required for delim In the format "keySpace.table(col1,col2,...)" and in the order that the data will be in the file. 86 | `-keyspace` | Keyspace name | | Name of keyspace (case sensitive) to load in to - required for json 87 | `-table` | Table name | | Name of table (case sensitive) to load in to - required for json 88 | `-port` | Port Number | 9042 | Cassandra native protocol port number 89 | `-user` | Username | none | Cassandra username 90 | `-pw` | Password | none | Cassandra password 91 | `-ssl-truststore-path` | Truststore Path | none | Path to SSL truststore 92 | `-ssl-truststore-pw` | Truststore Password | none | Password to SSL truststore 93 | `-ssl-keystore-path` | Keystore Path | none | Path to SSL keystore 94 | `-ssl-keystore-path` | Keystore Password | none | Password to SSL keystore 95 | `-consistencyLevel` | Consistency Level | ONE | CQL Consistency Level 96 | `-numThreads` | Number of threads | Number of CPUs | Number of threads to use (one per file) 97 | `-numFutures` | Number of Futures | 1000 | Number of Java driver futures in flight. 98 | `-numRetries` | Number of retries | 1 | Number of times to retry the INSERT before declaring defeat. 99 | `-queryTimeout` | Timeout in seconds | 2 | Amount of time to wait for a query to finish before timing out. 100 | `-ttl` | Time To Live | none | TTL to use when inserting these rows 101 | `-delim` | Delimiter | , | Delimiter to use 102 | `-charsPerColumn`| Characters per column | 4096 | Maximum characters per column 103 | `-nullString` | Null String | <empty string> | String to represent NULL data 104 | `-boolStyle` | Boolean Style | TRUE_FALSE | String for boolean values. Options are "1_0", "Y_N", "T_F", "YES_NO", "TRUE_FALSE". 105 | `-decimalDelim` | Decimal delimiter | . | Delimiter for decimal values. Options are "." or "," 106 | `-dateFormat` | Date Format String | default for Locale.ENGLISH | Date format string as specified in the SimpleDateFormat Java class: http://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html 107 | `-skipRows` | Rows to skip | 0 | Number of rows to skip at the beginning of the file 108 | `-skipCols` | Columns to skip | <not set> | Comma-separated list of columns to skip loading (0-counted) 109 | `-maxRows` | Max rows to read | -1 | Maximum rows to read (after optional skipping of rows). -1 signifies all rows. 110 | `-maxErrors` | Max parse errors | 10 | Maximum number of rows that do not parse to allow before exiting. 111 | `-maxInsertErrors`| Max insert errors | 10 | Maximum number of rows that do not insert to allow before exiting. 112 | `-badDir` | Bad directory | current directory | Directory to write badly parsed and badly inserted rows - as well as the log file. 113 | `-rate` | Ingest rate | unlimited | Maximum rate to insert data - in rows/sec. 114 | `-progressRate` | Progress rate | 100000 | How often to report the ingest rate (number of rows) 115 | `-rateFile` | Rate Stats File | <not set> | File to contain CSV rate statistics 116 | `-successDir` | Success directory | <not set> | Location to move successfully loaded files 117 | `-failureDir` | Failure directory | <not set> | Location to move files that failed to load 118 | `-batchSize` | Batch size | 1 | Size of unlogged batches. If set to 1 then no batching. 119 | `-comment` | Comment character | $lt;not set> | Comment character. 120 | 121 | ## Comments 122 | 123 | ### Using stdin 124 | 125 | You can send data in on stdin by specifying the filename (via the -f switch) as "stdin" (case insensitive). 126 | That way, you could pipe data in from other commands: 127 | 128 | ``` 129 | grep IMPORTANT data.csv | cassandra-loader -f stdin -h 1.2.3.4 -schema "test.itest(a, b)" 130 | ``` 131 | 132 | ### Support for collections 133 | 134 | Collections are supported. Their format is the CQL native one. 135 | Sets are started with '{' and ended with '}' and enclose a comma-separated list 136 | {1,2,3} or {"a","b","c"} 137 | Lists are started with '[' and ended with ']' and enclose a comma-separated list 138 | [1,2,3] or ["a","b","c"] 139 | Maps are started with '{' and ended with '}' and enclose a comma-separated list 140 | of pairs that are separated by ':' 141 | {1:1,2:2,3:3} or {"a":1, "b":2, "c":3} 142 | All collections must be enclosed in double-quotes. 143 | 144 | ### Username/Password 145 | 146 | If you specify either the username or the password, then you must specify both. 147 | 148 | ### Boolean Style 149 | 150 | boolStyle is a case-insensitive test of the True and False strings. For the 151 | different styles, the True and False strings are as follows: 152 | 153 | ``` 154 | Style | True | False 155 | ------------|------|------- 156 | 0_1 | 1 | 0 157 | Y_N | Y | N 158 | T_F | T | F 159 | YES_NO | YES | NO 160 | TRUE_FALSE | TRUE | FALSE 161 | ``` 162 | 163 | ### Configuration file 164 | 165 | configFile is a file with configuration options that are formatted just like on 166 | the command line. This allows you to not specify arguments on the command line. 167 | For example, you can specify passwords in the configFile and avoid having them on 168 | the command line. The format is one switch and option per line: 169 | 170 | ``` 171 | -pwd mypassword 172 | -host 1.2.3.4 173 | ``` 174 | 175 | ### Miscelaneous 176 | 177 | numFutures is a way to control the level of parallelism, but at some point 178 | too many will actually slow down the load. The default of 500 is a decent 179 | place to start. 180 | 181 | If you use quoted strings, you need to use double-quotes. To escape a double-quote inside a quoted string, use the backslash to escape it ("\""). To create a backslash inside a quoted string, use two backslashes in a row ("\\"). If you quote your string, it will not be trimmed, but if you do not quote your string it will be trimmed. 182 | 183 | Loading into counter columns is not supported. 184 | 185 | The default nullString is the empty string. If you want empty strings to be saved as empty strings, set the nullString to something else. 186 | 187 | If you do not set the successDir then files that successfully loaded will remain in their input directory. The same is true for failed files if you do not set the failureDir. You cannot set either if the input file is "stdin". 188 | 189 | When using `jsonline`, all JSON field names are case-sensitive. When using `jsonline` or `jsonarray`, the `-keyspace` and `-table` arguments are case-sensitive. 190 | 191 | ## Usage Statement: 192 | 193 | ``` 194 | version: 0.0.27 195 | Usage: -f -host [OPTIONS] 196 | OPTIONS: 197 | -schema Table schema (when using delim) 198 | -table Table name (when using json) 199 | -keyspace Keyspace name (when using json) 200 | -configFile File with configuration options 201 | -delim Delimiter to use [,] 202 | -charsPerColumn Max number of chars per column [4096] 203 | -dateFormat Date format for TIMESTAMP [default for Locale.ENGLISH] 204 | -localDateFormat Date format for DATE [yyyy-MM-dd] 205 | -nullString String that signifies NULL [none] 206 | -comment Comment symbol to use [none] 207 | -skipRows Number of rows to skip [0] 208 | -skipCols Comma-separated list of columsn to skip in the input file 209 | -maxRows Maximum number of rows to read (-1 means all) [-1] 210 | -maxErrors Maximum parse errors to endure [10] 211 | -badDir Directory for where to place badly parsed rows. [none] 212 | -port CQL Port Number [9042] 213 | -user Cassandra username [none] 214 | -pw Password for user [none] 215 | -ssl-truststore-path Path to SSL truststore [none] 216 | -ssl-truststore-pw Password for SSL truststore [none] 217 | -ssl-keystore-path Path to SSL keystore [none] 218 | -ssl-keystore-pw Password for SSL keystore [none] 219 | -consistencyLevel Consistency level [LOCAL_ONE] 220 | -numFutures Number of CQL futures to keep in flight [1000] 221 | -batchSize Number of INSERTs to batch together [1] 222 | -decimalDelim Decimal delimiter [.] Other option is ',' 223 | -boolStyle Style for booleans [TRUE_FALSE] 224 | -numThreads Number of concurrent threads (files) to load [num cores] 225 | -queryTimeout <# seconds> Query timeout (in seconds) [2] 226 | -numRetries Number of times to retry the INSERT [1] 227 | -maxInsertErrors <# errors> Maximum INSERT errors to endure [10] 228 | -rate Maximum insert rate [50000] 229 | -progressRate How often to report the insert rate [100000] 230 | -rateFile Where to print the rate statistics 231 | -successDir Directory where to move successfully loaded files 232 | -failureDir Directory where to move files that did not successfully load 233 | -nullsUnset [false|true] Treat nulls as unset [faslse] 234 | -format [delim|jsonline|jsonarray] Format of data: delimited or JSON [delim] 235 | -table Table name (when using JSON) 236 | -keyspace Keyspace name (when using JSON) 237 | -ttl TTL for all rows in this invocation [unset] 238 | 239 | 240 | Examples: 241 | cassandra-loader -f /path/to/file.csv -host localhost -schema "test.test3(a, b, c)" 242 | cassandra-loader -f /path/to/directory -host 1.2.3.4 -schema "test.test3(a, b, c)" -delim "\t" -numThreads 10 243 | cassandra-loader -f stdin -host localhost -schema "test.test3(a, b, c)" -user myuser -pw mypassword 244 | ``` 245 | 246 | ##Examples: 247 | 248 | Load file /path/to/file.csv into the test3 table in the test keyspace using 249 | the cluster at localhost. Use the default options: 250 | 251 | ``` 252 | cassandra-loader -f /path/to/file.csv -host localhost -schema "test.test3(a, b, c)" 253 | ``` 254 | 255 | Load all the files from /path/to/directory into the test3 table in the test 256 | keyspace using the cluster at 1.2.3.4. Use 10 threads and use tab as the 257 | delimiter: 258 | 259 | ``` 260 | cassandra-loader -f /path/to/directory -host 1.2.3.4 -schema "test.test3(a, b, c)" -delim "\t" -numThreads 10 261 | ``` 262 | 263 | Load the data from stdin into the test3 table in the test keyspace using the 264 | cluster at localhost. Use "myuser" as the username and "mypassword" as the 265 | password: 266 | 267 | ``` 268 | cassandra-loader -f stdin -host localhost -schema "test.test3(a, b, c)" -user myuser -pw mypassword 269 | ``` 270 | 271 | ##Sample 272 | 273 | Included here is a set of sample data. It is in the sample/ directory. 274 | You can set up the table and keyspace by running: 275 | 276 | ``` 277 | cqlsh -f sample/cassandra-schema.cql 278 | ``` 279 | 280 | To load the data, run: 281 | 282 | ``` 283 | cd sample 284 | ./load.sh 285 | ``` 286 | 287 | To check that things have succeeded, you can run: 288 | 289 | ``` 290 | wc -l titanic.csv 291 | ``` 292 | 293 | And: 294 | 295 | ``` 296 | cqlsh -e "SELECT COUNT(*) FROM titanic.surviors" 297 | ``` 298 | 299 | Both should return 891. 300 | 301 | 302 | 303 | ## cassandra-unloader 304 | cassandra-unloader is a utility to dump the contents 305 | of a Cassandra table to delimited file format. It uses 306 | the same sorts of options as cassandra-loader so that the 307 | output of cassandra-unloader could be piped into 308 | cassandra-loader: 309 | 310 | ``` 311 | cassandra-unloader -f stdout -host host1 -schema "ks.table(a,b,c)" | cassandra-loader -f stdin -host host2 -schema "ks2.table2(x,y,z)" 312 | ``` 313 | 314 | Get it with wget: 315 | ``` 316 | wget https://github.com/brianmhess/cassandra-loader/releases/download/v0.0.27/cassandra-unloader 317 | ``` 318 | 319 | To build, run: 320 | 321 | ``` 322 | gradle unloader 323 | ``` 324 | 325 | To run cassandra-unloader, simply run the cassandra-unloader executable 326 | (e.g., located at build/cassandra-unloader): 327 | 328 | ``` 329 | cassandra-unloader 330 | ``` 331 | 332 | ###Usage statement: 333 | 334 | ``` 335 | version: 0.0.27 336 | Usage: -f -host -schema [OPTIONS] 337 | OPTIONS: 338 | -configFile File with configuration options 339 | -format [delim|jsonline|jsonarray] Format of data: delimited or JSON [delim] 340 | -delim Delimiter to use [,] 341 | -dateFormat Date format for TIMESTAMP [default for Locale.ENGLISH] 342 | -localDateFormat Date format for DATE [yyyy-MM-dd] 343 | -nullString String that signifies NULL [none] 344 | -port CQL Port Number [9042] 345 | -user Cassandra username [none] 346 | -pw Password for user [none] 347 | -ssl-truststore-path Path to SSL truststore [none] 348 | -ssl-truststore-pw Password for SSL truststore [none] 349 | -ssl-keystore-path Path to SSL keystore [none] 350 | -ssl-keystore-pw Password for SSL keystore [none] 351 | -consistencyLevel Consistency level [LOCAL_ONE] 352 | -decimalDelim Decimal delimiter [.] Other option is ',' 353 | -boolStyle Style for booleans [TRUE_FALSE] 354 | -numThreads Number of concurrent threads to unload [5] 355 | -beginToken Begin token [none] 356 | -endToken End token [none] 357 | -where WHERE clause [none] 358 | -fetchSize Fetch size to use [0] 359 | ``` 360 | 361 | A few simple examples using the `-where` are as follows: 362 | 363 | ``` 364 | cassandra-unloader -host localhost -f stdout -schema "testks.testtable(pkey,ccol,x,y)" -where "pkey=5" 365 | cassandra-unloader -host localhost -f stdout -schema "testks.testtable(pkey,ccol,x,y)" -where "x = 100 ALLOW FILTERING" 366 | ``` 367 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'java' 2 | apply plugin: 'application' 3 | 4 | def versionNum = '0.0.27' 5 | 6 | allprojects { 7 | tasks.withType(JavaCompile) { 8 | options.compilerArgs << "-Xlint:unchecked" 9 | options.compilerArgs << "-Xlint:deprecation" 10 | } 11 | } 12 | 13 | task loader(type: Exec) { 14 | dependsOn << 'uberloader' 15 | commandLine './src/make/buildit.sh' 16 | } 17 | 18 | task unloader(type: Exec) { 19 | dependsOn << 'uberunloader' 20 | commandLine './src/make/unloader.sh' 21 | } 22 | 23 | jar { 24 | baseName = 'cassandra-loader' 25 | version = versionNum 26 | } 27 | 28 | repositories { 29 | mavenCentral() 30 | } 31 | 32 | dependencies { 33 | compile 'com.datastax.cassandra:cassandra-driver-core:3.2.0' 34 | compile 'org.xerial.snappy:snappy-java:1.0.5' 35 | compile 'net.jpountz.lz4:lz4:1.2.0' 36 | compile 'ch.qos.logback:logback-classic:1.1.3' 37 | compile 'com.univocity:univocity-parsers:2.2.3' 38 | compile 'org.apache.commons:commons-lang3:3.0' 39 | compile group: 'com.googlecode.json-simple', name: 'json-simple', version: '1.1.1' 40 | } 41 | 42 | task uberloader(type: Jar) { 43 | dependsOn << 'compileJava' 44 | from "src/main/resources/logback.xml" 45 | from files(sourceSets.main.output.classesDir) 46 | from(configurations.runtime.asFileTree.files.collect { zipTree(it) }) { 47 | exclude "META-INF/**" 48 | } 49 | manifest { 50 | attributes "Main-Class" : 'com.datastax.loader.CqlDelimLoad' 51 | } 52 | baseName = 'cassandra-loader-uber' 53 | version = versionNum 54 | } 55 | 56 | task uberunloader(type: Jar) { 57 | dependsOn << 'compileJava' 58 | from "src/main/resources/logback.xml" 59 | from files(sourceSets.main.output.classesDir) 60 | from(configurations.runtime.asFileTree.files.collect { zipTree(it) }) { 61 | exclude "META-INF/**" 62 | } 63 | manifest { 64 | attributes "Main-Class" : 'com.datastax.loader.CqlDelimUnload' 65 | } 66 | baseName = 'cassandra-unloader-uber' 67 | version = versionNum 68 | } 69 | -------------------------------------------------------------------------------- /sample/cassandra-schema.cql: -------------------------------------------------------------------------------- 1 | create keyspace titanic WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}; 2 | 3 | use titanic; 4 | 5 | CREATE TABLE survivors ( 6 | id varchar, 7 | survived boolean, 8 | passenger_class int, 9 | name varchar, 10 | sex varchar, 11 | age int, 12 | num_siblings_spouse int, 13 | num_parents_children int, 14 | ticket_id varchar, 15 | fare varchar, 16 | cabin varchar, 17 | port_of_embarkation varchar, 18 | primary key (id)); -------------------------------------------------------------------------------- /sample/load.sh: -------------------------------------------------------------------------------- 1 | ../build/cassandra-loader -f titanic.csv -host localhost -schema "titanic.survivors(id, survived, passenger_class, name, sex, age, num_siblings_spouse, num_parents_children, ticket_id, fare, cabin, port_of_embarkation)" -boolStyle 1_0 2 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/AbstractDynamicRateLimiter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader; 17 | 18 | public abstract class AbstractDynamicRateLimiter extends RateLimiter { 19 | protected long lastCheck = 0; 20 | protected long howOften; 21 | protected double lastStat; 22 | protected double maxStat; 23 | protected double minStat; 24 | protected double downFraction; 25 | protected double upFraction; 26 | protected boolean invertLogic; 27 | 28 | public AbstractDynamicRateLimiter(double inRate, long inHowOften, 29 | double inMaxStat, double inMinStat, 30 | double inDownFraction, 31 | double inUpFraction, 32 | boolean inInvertLogic) { 33 | this(inRate, Long.MAX_VALUE, inHowOften, inMaxStat, inMinStat, 34 | inDownFraction, inUpFraction, inInvertLogic); 35 | } 36 | 37 | public AbstractDynamicRateLimiter(double inRate, long inUpdateRate, 38 | long inHowOften, double inMaxStat, 39 | double inMinStat, double inDownFraction, 40 | double inUpFraction, 41 | boolean inInvertLogic) { 42 | super(inRate); 43 | howOften = inHowOften; 44 | maxStat = inMaxStat; 45 | minStat = inMinStat; 46 | downFraction = inDownFraction; 47 | upFraction = inUpFraction; 48 | invertLogic = inInvertLogic; 49 | } 50 | 51 | public void acquire() { 52 | this.acquire(1); 53 | } 54 | 55 | public synchronized void acquire(int permits) { 56 | long currTime = System.currentTimeMillis(); 57 | if (currTime - lastCheck > howOften) { 58 | adjustRate(); 59 | lastCheck = currTime; 60 | } 61 | super.acquire(permits); 62 | } 63 | 64 | protected synchronized void adjustRate() { 65 | double currStat = getCurrStat(); 66 | if (statTooHigh(currStat)) { 67 | if (invertLogic) 68 | adjustRateUp(); 69 | else 70 | adjustRateDown(); 71 | System.err.println("Adjusting rate down : " + currStat + " > " + maxStat + " " + super.getRate()); 72 | } 73 | else if (statTooLow(currStat)) { 74 | if (invertLogic) 75 | adjustRateDown(); 76 | else 77 | adjustRateUp(); 78 | System.err.println("Adjusting rate up : " + currStat + " > " + maxStat + " " + super.getRate()); 79 | } 80 | } 81 | 82 | protected synchronized boolean statTooHigh(double currStat) { 83 | return currStat > maxStat; 84 | } 85 | 86 | protected synchronized boolean statTooLow(double currStat) { 87 | return currStat < minStat; 88 | } 89 | 90 | protected synchronized void adjustRateDown() { 91 | double currRate = super.getRate(); 92 | super.setRate(currRate - (currRate * downFraction)); 93 | } 94 | 95 | protected synchronized void adjustRateUp() { 96 | double currRate = super.getRate(); 97 | super.setRate(currRate + (currRate * upFraction)); 98 | } 99 | 100 | protected abstract double getCurrStat(); 101 | } 102 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/CqlDelimLoadTask.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader; 17 | 18 | import com.datastax.driver.core.BatchStatement; 19 | import com.datastax.driver.core.BoundStatement; 20 | import com.datastax.driver.core.ConsistencyLevel; 21 | import com.datastax.driver.core.PreparedStatement; 22 | import com.datastax.driver.core.ResultSetFuture; 23 | import com.datastax.driver.core.Session; 24 | import com.datastax.loader.futures.FutureManager; 25 | import com.datastax.loader.futures.PrintingFutureSet; 26 | import com.datastax.loader.futures.JsonPrintingFutureSet; 27 | import com.datastax.loader.parser.BooleanParser; 28 | import org.json.simple.JSONArray; 29 | import org.json.simple.JSONObject; 30 | import org.json.simple.parser.JSONParser; 31 | 32 | import java.io.BufferedOutputStream; 33 | import java.io.BufferedReader; 34 | import java.io.File; 35 | import java.io.FileOutputStream; 36 | import java.io.FileInputStream; 37 | import java.io.InputStream; 38 | import java.io.IOException; 39 | import java.io.InputStreamReader; 40 | import java.io.PrintStream; 41 | import java.nio.file.Files; 42 | import java.nio.file.Path; 43 | import java.nio.file.Paths; 44 | import java.nio.file.StandardCopyOption; 45 | import java.text.ParseException; 46 | import java.util.List; 47 | import java.util.Locale; 48 | import java.util.concurrent.Callable; 49 | import java.util.concurrent.TimeUnit; 50 | import java.util.zip.GZIPInputStream; 51 | import java.util.zip.ZipException; 52 | 53 | class CqlDelimLoadTask implements Callable { 54 | private String BADPARSE = ".BADPARSE"; 55 | private String BADINSERT = ".BADINSERT"; 56 | private String LOG = ".LOG"; 57 | private Session session; 58 | private String insert; 59 | private PreparedStatement statement; 60 | private BatchStatement batch; 61 | private StringBuilder batchString; 62 | private FutureManager fm; 63 | private ConsistencyLevel consistencyLevel; 64 | private CqlDelimParser cdp; 65 | private long maxErrors; 66 | private long skipRows; 67 | private String skipCols = null; 68 | private long maxRows; 69 | private String badDir; 70 | private String successDir; 71 | private String failureDir; 72 | private String readerName; 73 | private PrintStream badParsePrinter = null; 74 | private PrintStream badInsertPrinter = null; 75 | private PrintStream logPrinter = null; 76 | private String logFname = ""; 77 | private BufferedReader reader; 78 | private File infile; 79 | private int numFutures; 80 | private int batchSize; 81 | private long numInserted; 82 | private int ttl = -1; 83 | 84 | private String cqlSchema; 85 | private Locale locale = null; 86 | private BooleanParser.BoolStyle boolStyle = null; 87 | private String dateFormatString = null; 88 | private String localDateFormatString = null; 89 | private String nullString = null; 90 | private String commentString = null; 91 | private String delimiter = null; 92 | private int charsPerColumn = 4096; 93 | private TimeUnit unit = TimeUnit.SECONDS; 94 | private long queryTimeout = 2; 95 | private int numRetries = 1; 96 | private long maxInsertErrors = 10; 97 | private long insertErrors = 0; 98 | private boolean nullsUnset; 99 | private String format = "delim"; 100 | private String keyspace = null; 101 | private String table = null; 102 | private JSONArray jsonArray; 103 | 104 | public CqlDelimLoadTask(String inCqlSchema, String inDelimiter, 105 | int inCharsPerColumn, 106 | String inNullString, String inCommentString, 107 | String inDateFormatString, String inLocalDateFormatString, 108 | BooleanParser.BoolStyle inBoolStyle, 109 | Locale inLocale, 110 | long inMaxErrors, long inSkipRows, 111 | String inSkipCols, long inMaxRows, 112 | String inBadDir, File inFile, 113 | Session inSession, ConsistencyLevel inCl, 114 | int inNumFutures, int inBatchSize, int inNumRetries, 115 | int inQueryTimeout, long inMaxInsertErrors, 116 | String inSuccessDir, String inFailureDir, 117 | boolean inNullsUnset, String inFormat, 118 | String inKeyspace, String inTable, int inTtl) { 119 | super(); 120 | cqlSchema = inCqlSchema; 121 | delimiter = inDelimiter; 122 | charsPerColumn = inCharsPerColumn; 123 | nullString = inNullString; 124 | commentString = inCommentString; 125 | dateFormatString = inDateFormatString; 126 | localDateFormatString = inLocalDateFormatString; 127 | boolStyle = inBoolStyle; 128 | locale = inLocale; 129 | maxErrors = inMaxErrors; 130 | skipRows = inSkipRows; 131 | skipCols = inSkipCols; 132 | maxRows = inMaxRows; 133 | badDir = inBadDir; 134 | infile = inFile; 135 | session = inSession; 136 | consistencyLevel = inCl; 137 | numFutures = inNumFutures; 138 | batchSize = inBatchSize; 139 | numRetries = inNumRetries; 140 | queryTimeout = inQueryTimeout; 141 | maxInsertErrors = inMaxInsertErrors; 142 | successDir = inSuccessDir; 143 | failureDir = inFailureDir; 144 | nullsUnset = inNullsUnset; 145 | format = inFormat; 146 | keyspace = inKeyspace; 147 | table = inTable; 148 | ttl = inTtl; 149 | } 150 | 151 | public Long call() throws IOException, ParseException, org.json.simple.parser.ParseException { 152 | setup(); 153 | numInserted = execute(); 154 | return numInserted; 155 | } 156 | 157 | private void setup() throws IOException, ParseException, org.json.simple.parser.ParseException { 158 | if (null == infile) { 159 | reader = new BufferedReader(new InputStreamReader(System.in)); 160 | readerName = "stdin"; 161 | } 162 | else { 163 | InputStream is = null; 164 | try { 165 | is = new GZIPInputStream(new FileInputStream(infile)); 166 | } 167 | catch (ZipException e) { 168 | is = new FileInputStream(infile); 169 | } 170 | reader = new BufferedReader(new InputStreamReader(is)); 171 | readerName = infile.getName(); 172 | } 173 | 174 | //setup json reader 175 | if(format.equalsIgnoreCase("jsonarray")){ 176 | JSONParser parser = new JSONParser(); 177 | jsonArray= (JSONArray) parser.parse(reader); 178 | } 179 | 180 | // Prepare Badfile 181 | if (null != badDir) { 182 | badParsePrinter = new PrintStream(new BufferedOutputStream(new FileOutputStream(badDir + "/" + readerName + BADPARSE))); 183 | badInsertPrinter = new PrintStream(new BufferedOutputStream(new FileOutputStream(badDir + "/" + readerName + BADINSERT))); 184 | logFname = badDir + "/" + readerName + LOG; 185 | logPrinter = new PrintStream(new BufferedOutputStream(new FileOutputStream(logFname))); 186 | } 187 | 188 | if (format.equalsIgnoreCase("delim")) { 189 | cdp = new CqlDelimParser(cqlSchema, delimiter, charsPerColumn, 190 | nullString, commentString, 191 | dateFormatString, localDateFormatString, 192 | boolStyle, locale, 193 | skipCols, session, true, ttl); 194 | } 195 | else if (format.equalsIgnoreCase("jsonline") 196 | || format.equalsIgnoreCase("jsonarray")) { 197 | cdp = new CqlDelimParser(keyspace, table, delimiter, charsPerColumn, 198 | nullString, commentString, 199 | dateFormatString, localDateFormatString, 200 | boolStyle, locale, 201 | skipCols, session, true, ttl); 202 | } 203 | 204 | insert = cdp.generateInsert(); 205 | statement = session.prepare(insert); 206 | statement.setRetryPolicy(new LoaderRetryPolicy(numRetries)); 207 | statement.setConsistencyLevel(consistencyLevel); 208 | batch = new BatchStatement(BatchStatement.Type.UNLOGGED); 209 | batchString = new StringBuilder(); 210 | if (format.equalsIgnoreCase("delim")) { 211 | fm = new PrintingFutureSet(numFutures, queryTimeout, 212 | maxInsertErrors, logPrinter, 213 | badInsertPrinter); 214 | } 215 | else if (format.equalsIgnoreCase("jsonline") 216 | || format.equalsIgnoreCase("jsonarray")) { 217 | fm = new JsonPrintingFutureSet(numFutures, queryTimeout, 218 | maxInsertErrors, logPrinter, 219 | badInsertPrinter); 220 | } 221 | } 222 | 223 | private void cleanup(boolean success) throws IOException { 224 | if (null != badParsePrinter) { 225 | if (format.equalsIgnoreCase("jsonarray")) 226 | badParsePrinter.println("]"); 227 | badParsePrinter.close(); 228 | } 229 | if (null != badInsertPrinter) { 230 | if (format.equalsIgnoreCase("jsonarray")) 231 | badInsertPrinter.println("]"); 232 | badInsertPrinter.close(); 233 | } 234 | if (null != logPrinter) 235 | logPrinter.close(); 236 | if (success) { 237 | if (null != successDir) { 238 | Path src = infile.toPath(); 239 | Path dst = Paths.get(successDir); 240 | Files.move(src, dst.resolve(src.getFileName()), 241 | StandardCopyOption.REPLACE_EXISTING); 242 | } 243 | } 244 | else { 245 | if (null != failureDir) { 246 | Path src = infile.toPath(); 247 | Path dst = Paths.get(failureDir); 248 | Files.move(src, dst.resolve(src.getFileName()), 249 | StandardCopyOption.REPLACE_EXISTING); 250 | } 251 | } 252 | } 253 | 254 | private int sendInsert(List elements, String line) { 255 | BoundStatement bind = statement.bind(elements.toArray()); 256 | ResultSetFuture resultSetFuture; 257 | int retval = 0; 258 | if (nullsUnset) { 259 | for (int i = 0; i < elements.size(); i++) 260 | if (null == elements.get(i)) 261 | bind.unset(i); 262 | } 263 | if (1 == batchSize) { 264 | resultSetFuture = session.executeAsync(bind); 265 | if (!fm.add(resultSetFuture, line)) { 266 | System.err.println("There was an error. Please check the log file for more information (" + logFname + ")"); 267 | //cleanup(false); 268 | return -2; 269 | } 270 | //numInserted += 1; 271 | retval = 1; 272 | } 273 | else { 274 | batch.add(bind); 275 | batchString.append("\n").append(line); 276 | if (batchSize == batch.size()) { 277 | resultSetFuture = session.executeAsync(batch); 278 | if (!fm.add(resultSetFuture, batchString.toString())) { 279 | System.err.println("There was an error. Please check the log file for more information (" + logFname + ")"); 280 | //cleanup(false); 281 | return -2; 282 | } 283 | int numInserted = batch.size(); 284 | batch.clear(); 285 | batchString.setLength(0); 286 | retval = numInserted; 287 | } 288 | } 289 | return retval; 290 | } 291 | 292 | private long execute() throws IOException { 293 | String line = null; 294 | int lineNumber = 0; 295 | long numInserted = 0; 296 | int numErrors = 0; 297 | int curBatch = 0; 298 | BoundStatement bind = null; 299 | List elements = null; 300 | 301 | System.err.println("*** Processing " + readerName); 302 | if (format.equalsIgnoreCase("delim") 303 | || format.equalsIgnoreCase("jsonline")) { 304 | while ((line = reader.readLine()) != null) { 305 | lineNumber++; 306 | if (skipRows > 0) { 307 | skipRows--; 308 | continue; 309 | } 310 | if (maxRows-- < 0) 311 | break; 312 | 313 | if (0 == line.trim().length()) 314 | continue; 315 | 316 | elements = null; 317 | if (format.equalsIgnoreCase("delim")) 318 | elements = cdp.parse(line); 319 | else if (format.equalsIgnoreCase("jsonline")) 320 | elements = cdp.parseJson(line); 321 | if (null != elements) { 322 | int ret = sendInsert(elements, line); 323 | if (-2 == ret) { 324 | cleanup(false); 325 | return -2; 326 | } 327 | numInserted += ret; 328 | } 329 | else { 330 | if (null != logPrinter) { 331 | logPrinter.println(String.format("Error parsing line %d in %s: %s", lineNumber, readerName, line)); 332 | } 333 | System.err.println(String.format("Error parsing line %d in %s: %s", lineNumber, readerName, line)); 334 | if (null != badParsePrinter) { 335 | badParsePrinter.println(line); 336 | } 337 | numErrors++; 338 | if (maxErrors <= numErrors) { 339 | if (null != logPrinter) { 340 | logPrinter.println(String.format("Maximum number of errors exceeded (%d) for %s", numErrors, readerName)); 341 | } 342 | System.err.println(String.format("Maximum number of errors exceeded (%d) for %s", numErrors, readerName)); 343 | cleanup(false); 344 | return -1; 345 | } 346 | } 347 | } 348 | } // if (format.equalsIgnoreCase("delim")) 349 | else if (format.equalsIgnoreCase("jsonarray")) { 350 | boolean firstBadJson = true; 351 | String badJsonDelim = "[\n"; 352 | List columnBackbone = cdp.getColumnNames(); 353 | int columnCount = columnBackbone.size(); 354 | for (Object o : jsonArray) { 355 | JSONObject jsonRow = (JSONObject) o; 356 | String[] jsonElements = new String[columnCount]; 357 | jsonElements[0] = jsonRow.get(columnBackbone.get(0)).toString(); 358 | for (int i = 1; i < columnCount; i++) { 359 | if (null != jsonRow.get(columnBackbone.get(i))) { 360 | jsonElements[i] = jsonRow.get(columnBackbone.get(i)).toString(); 361 | } else { 362 | jsonElements[i] = null; 363 | } 364 | } 365 | if (null != (elements = cdp.parse(jsonElements))) { 366 | int ret = sendInsert(elements, line); 367 | if (-2 == ret) { 368 | cleanup(false); 369 | return -2; 370 | } 371 | numInserted += ret; 372 | } else { 373 | String badString = jsonRow.toJSONString(); 374 | if (null != logPrinter) { 375 | logPrinter.println(String.format("Error parsing JSON item %d in %s: %s", lineNumber, readerName, badString)); 376 | } 377 | System.err.println(String.format("Error parsing JSON item %d in %s: %s", lineNumber, readerName, badString)); 378 | if (null != badParsePrinter) { 379 | badParsePrinter.println(badJsonDelim + badString); 380 | if (firstBadJson) { 381 | firstBadJson = false; 382 | badJsonDelim = ",\n"; 383 | } 384 | } 385 | numErrors++; 386 | if (maxErrors <= numErrors) { 387 | if (null != logPrinter) { 388 | logPrinter.println(String.format("Maximum number of errors exceeded (%d) for %s", numErrors, readerName)); 389 | } 390 | System.err.println(String.format("Maximum number of errors exceeded (%d) for %s", numErrors, readerName)); 391 | cleanup(false); 392 | return -1; 393 | } 394 | } 395 | } 396 | }// if (format.equalsIgnoreCase("json")) 397 | 398 | // Send last partially filled batch 399 | if ((batchSize > 1) && (batch.size() > 0)) { 400 | ResultSetFuture resultSetFuture = session.executeAsync(batch); 401 | if (!fm.add(resultSetFuture, line)) { 402 | cleanup(false); 403 | return -2; 404 | } 405 | numInserted += batch.size(); 406 | } 407 | 408 | if (!fm.cleanup()) { 409 | cleanup(false); 410 | return -1; 411 | } 412 | 413 | if (null != logPrinter) { 414 | logPrinter.println("*** DONE: " + readerName + " number of lines processed: " + lineNumber + " (" + numInserted + " inserted)"); 415 | } 416 | System.err.println("*** DONE: " + readerName + " number of lines processed: " + lineNumber + " (" + numInserted + " inserted)"); 417 | 418 | cleanup(true); 419 | return fm.getNumInserted(); 420 | } 421 | } 422 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/CqlDelimParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader; 17 | 18 | import com.datastax.driver.core.ColumnMetadata; 19 | import com.datastax.driver.core.DataType; 20 | import com.datastax.driver.core.Row; 21 | import com.datastax.driver.core.Session; 22 | import com.datastax.driver.core.TableMetadata; 23 | import com.datastax.driver.core.KeyspaceMetadata; 24 | import com.datastax.driver.core.exceptions.InvalidTypeException; 25 | import com.datastax.loader.parser.BigDecimalParser; 26 | import com.datastax.loader.parser.BigIntegerParser; 27 | import com.datastax.loader.parser.BooleanParser; 28 | import com.datastax.loader.parser.ByteBufferParser; 29 | import com.datastax.loader.parser.ByteParser; 30 | import com.datastax.loader.parser.DateParser; 31 | import com.datastax.loader.parser.DelimParser; 32 | import com.datastax.loader.parser.DoubleParser; 33 | import com.datastax.loader.parser.FloatParser; 34 | import com.datastax.loader.parser.InetAddressParser; 35 | import com.datastax.loader.parser.IntegerParser; 36 | import com.datastax.loader.parser.ListParser; 37 | import com.datastax.loader.parser.LocalDateParser; 38 | import com.datastax.loader.parser.LongParser; 39 | import com.datastax.loader.parser.MapParser; 40 | import com.datastax.loader.parser.Parser; 41 | import com.datastax.loader.parser.SetParser; 42 | import com.datastax.loader.parser.ShortParser; 43 | import com.datastax.loader.parser.StringParser; 44 | import com.datastax.loader.parser.UUIDParser; 45 | 46 | import java.text.ParseException; 47 | import java.util.ArrayList; 48 | import java.util.HashMap; 49 | import java.util.List; 50 | import java.util.Locale; 51 | import java.util.Map; 52 | import java.util.Set; 53 | import java.util.regex.Matcher; 54 | import java.util.regex.Pattern; 55 | 56 | import org.json.simple.JSONObject; 57 | import org.json.simple.parser.JSONParser; 58 | 59 | import com.univocity.parsers.csv.CsvParser; 60 | import com.univocity.parsers.csv.CsvParserSettings; 61 | 62 | public class CqlDelimParser { 63 | private Map pmap; 64 | private List sbl; 65 | private List columnNames; 66 | private String keyspace; 67 | private String tablename; 68 | private DelimParser delimParser; 69 | private JSONParser jsonParser; 70 | private int ttl = -1; 71 | 72 | public CqlDelimParser(String inCqlSchema, String inDelimiter, int inCharsPerColumn, 73 | String inNullString, String inCommentString, 74 | String inDateFormatString, String inLocalDateFormatString, 75 | BooleanParser.BoolStyle inBoolStyle, Locale inLocale, 76 | String skipList, Session session, boolean bLoader, int inTtl) 77 | throws ParseException { 78 | // Optionally provide things for the line parser - date format, boolean format, locale 79 | ttl = inTtl; 80 | initPmap(inDateFormatString, inLocalDateFormatString, inBoolStyle, 81 | inLocale, bLoader); 82 | processCqlSchema(inCqlSchema, session); 83 | createDelimParser(inDelimiter, inCharsPerColumn, inNullString, inCommentString, skipList); 84 | } 85 | 86 | public CqlDelimParser(String inKeyspace, String inTable, String inDelimiter, 87 | int inCharsPerColumn, 88 | String inNullString, String inCommentString, 89 | String inDateFormatString, String inLocalDateFormatString, 90 | BooleanParser.BoolStyle inBoolStyle, Locale inLocale, 91 | String skipList, Session session, boolean bLoader, int inTtl) 92 | throws ParseException { 93 | // Optionally provide things for the line parser - date format, boolean format, locale 94 | ttl = inTtl; 95 | keyspace = inKeyspace; 96 | tablename = inTable; 97 | initPmap(inDateFormatString, inLocalDateFormatString, inBoolStyle, 98 | inLocale, bLoader); 99 | processCqlSchema(session); 100 | createDelimParser(inDelimiter, inCharsPerColumn, inNullString, inCommentString, skipList); 101 | } 102 | 103 | public List getColumnNames() { 104 | return columnNames; 105 | } 106 | 107 | private void setColumnNames(List columnNames) { 108 | this.columnNames = columnNames; 109 | } 110 | 111 | // used internally to store schema information 112 | private class SchemaBits { 113 | public String name; 114 | public DataType.Name datatype; 115 | public Parser parser; 116 | } 117 | 118 | // intialize the Parsers and the parser map 119 | private void initPmap(String dateFormatString, String localDateFormatString, 120 | BooleanParser.BoolStyle inBoolStyle, 121 | Locale inLocale, boolean bLoader) { 122 | pmap = new HashMap(); 123 | Parser byteParser = new ByteParser(inLocale, bLoader); 124 | Parser shortParser = new ShortParser(inLocale, bLoader); 125 | Parser integerParser = new IntegerParser(inLocale, bLoader); 126 | Parser longParser = new LongParser(inLocale, bLoader); 127 | Parser floatParser = new FloatParser(inLocale, bLoader); 128 | Parser doubleParser = new DoubleParser(inLocale, bLoader); 129 | Parser stringParser = new StringParser(); 130 | Parser booleanParser = new BooleanParser(inBoolStyle); 131 | Parser uuidParser = new UUIDParser(); 132 | Parser bigDecimalParser = new BigDecimalParser(); 133 | Parser bigIntegerParser = new BigIntegerParser(); 134 | Parser byteBufferParser = new ByteBufferParser(); 135 | Parser inetAddressParser = new InetAddressParser(); 136 | Parser dateParser = new DateParser(dateFormatString); 137 | Parser localDateParser = new LocalDateParser(localDateFormatString); 138 | 139 | pmap.put(DataType.Name.ASCII, stringParser); 140 | pmap.put(DataType.Name.BIGINT, longParser); 141 | pmap.put(DataType.Name.BLOB, byteBufferParser); 142 | pmap.put(DataType.Name.BOOLEAN, booleanParser); 143 | pmap.put(DataType.Name.COUNTER, longParser); 144 | pmap.put(DataType.Name.DATE , localDateParser); 145 | pmap.put(DataType.Name.DECIMAL, bigDecimalParser); 146 | pmap.put(DataType.Name.DOUBLE, doubleParser); 147 | pmap.put(DataType.Name.FLOAT, floatParser); 148 | pmap.put(DataType.Name.INET, inetAddressParser); 149 | pmap.put(DataType.Name.INT, integerParser); 150 | pmap.put(DataType.Name.SMALLINT , shortParser); 151 | pmap.put(DataType.Name.TEXT, stringParser); 152 | pmap.put(DataType.Name.TIME , longParser); 153 | pmap.put(DataType.Name.TIMESTAMP, dateParser); 154 | pmap.put(DataType.Name.TIMEUUID, uuidParser); 155 | pmap.put(DataType.Name.TINYINT , byteParser); 156 | pmap.put(DataType.Name.UUID, uuidParser); 157 | pmap.put(DataType.Name.VARCHAR, stringParser); 158 | pmap.put(DataType.Name.VARINT, bigIntegerParser); 159 | 160 | } 161 | 162 | // Validate the CQL schema, extract the keyspace and tablename, and process the rest of the schema 163 | private void processCqlSchema(String cqlSchema, Session session) throws ParseException { 164 | CsvParserSettings ks_settings = new CsvParserSettings(); 165 | ks_settings.getFormat().setLineSeparator("\n"); 166 | ks_settings.getFormat().setDelimiter('.'); 167 | ks_settings.getFormat().setQuote('\"'); 168 | ks_settings.getFormat().setQuoteEscape('\\'); 169 | ks_settings.getFormat().setCharToEscapeQuoteEscaping('\\'); 170 | ks_settings.setKeepQuotes(true); 171 | ks_settings.setKeepEscapeSequences(true); 172 | ks_settings.setMaxCharsPerColumn(-1); 173 | CsvParser ks_parser = new CsvParser(ks_settings); 174 | String[] ks_elements = ks_parser.parseLine(cqlSchema); 175 | keyspace = ks_elements[0]; 176 | String table_string = cqlSchema.substring(keyspace.length() + 1); 177 | 178 | CsvParserSettings table_settings = new CsvParserSettings(); 179 | table_settings.getFormat().setLineSeparator("\n"); 180 | table_settings.getFormat().setDelimiter('('); 181 | table_settings.getFormat().setQuote('\"'); 182 | table_settings.getFormat().setQuoteEscape('\\'); 183 | table_settings.getFormat().setCharToEscapeQuoteEscaping('\\'); 184 | table_settings.setKeepQuotes(true); 185 | table_settings.setKeepEscapeSequences(true); 186 | table_settings.setMaxCharsPerColumn(-1); 187 | CsvParser table_parser = new CsvParser(table_settings); 188 | String[] table_elements = table_parser.parseLine(table_string); 189 | tablename = table_elements[0]; 190 | 191 | String schemaString = table_string.substring(tablename.length() + 1, 192 | table_string.length() - 1); 193 | 194 | sbl = schemaBits(schemaString, session); 195 | } 196 | 197 | private void processCqlSchema(Session session) throws ParseException { 198 | sbl = schemaBits(null, session); 199 | } 200 | 201 | 202 | private List schemaBits(String in, Session session) throws ParseException { 203 | KeyspaceMetadata km = session.getCluster().getMetadata().getKeyspace(keyspace); 204 | if (null == km) { 205 | System.err.println("Keyspace " + keyspace + " not found."); 206 | System.exit(-1); 207 | } 208 | TableMetadata tm = km.getTable(tablename); 209 | if (null == tm) { 210 | System.err.println("Table " + tablename + " not found."); 211 | System.exit(-1); 212 | } 213 | List inList = new ArrayList(); 214 | if (null != in) { 215 | CsvParserSettings settings = new CsvParserSettings(); 216 | settings.getFormat().setLineSeparator("\n"); 217 | settings.getFormat().setDelimiter(','); 218 | settings.getFormat().setQuote('\"'); 219 | settings.getFormat().setQuoteEscape('\\'); 220 | settings.getFormat().setCharToEscapeQuoteEscaping('\\'); 221 | settings.setKeepQuotes(true); 222 | settings.setKeepEscapeSequences(true); 223 | settings.setMaxCharsPerColumn(-1); 224 | CsvParser parser = new CsvParser(settings); 225 | String[] tlist = parser.parseLine(in); 226 | for (int i = 0; i < tlist.length; i++) 227 | inList.add(tlist[i].trim()); 228 | } 229 | else { 230 | for (ColumnMetadata cm : tm.getColumns()) 231 | inList.add("\""+cm.getName()+"\""); 232 | } 233 | //keep the list of columns from metadata to use as column backbone for JSON 234 | setColumnNames(inList); 235 | List sbl = new ArrayList(); 236 | for (int i = 0; i < inList.size(); i++) { 237 | String col = inList.get(i); 238 | SchemaBits sb = new SchemaBits(); 239 | ColumnMetadata cm = tm.getColumn(col); 240 | if (null == cm) { 241 | System.err.println("Column " + col + " of table " + keyspace + "." + tablename + " not found"); 242 | System.exit(-1); 243 | } 244 | DataType dt = cm.getType(); 245 | sb.name = col; 246 | sb.datatype = dt.getName(); 247 | if (dt.isCollection()) { 248 | if (sb.datatype == DataType.Name.LIST) { 249 | DataType.Name listType = dt.getTypeArguments().get(0).getName(); 250 | Parser listParser = pmap.get(listType); 251 | if (null == listParser) { 252 | throw new ParseException("List data type not recognized (" 253 | + listType + ")", i); 254 | } 255 | sb.parser = new ListParser(listParser, ',', '[', ']'); 256 | } 257 | else if (sb.datatype == DataType.Name.SET) { 258 | DataType.Name setType = dt.getTypeArguments().get(0).getName(); 259 | Parser setParser = pmap.get(setType); 260 | if (null == setParser) { 261 | throw new ParseException("Set data type not recognized (" 262 | + setType + ")", i); 263 | } 264 | sb.parser = new SetParser(setParser, ',', '{', '}'); 265 | } 266 | else if (sb.datatype == DataType.Name.MAP) { 267 | DataType.Name keyType = dt.getTypeArguments().get(0).getName(); 268 | Parser keyParser = pmap.get(keyType); 269 | if (null == keyParser) { 270 | throw new ParseException("Map key data type not recognized (" 271 | + keyType + ")", i); 272 | } 273 | DataType.Name valueType = dt.getTypeArguments().get(1).getName(); 274 | Parser valueParser = pmap.get(valueType); 275 | if (null == valueParser) { 276 | throw new ParseException("Map value data type not recognized (" 277 | + valueType + ")", i); 278 | } 279 | sb.parser = new MapParser(keyParser, valueParser, ',', '{', '}', ':'); 280 | } 281 | else { 282 | throw new ParseException("Collection data type not recognized (" 283 | + sb.datatype + ")", i); 284 | } 285 | } 286 | else { 287 | sb.parser = pmap.get(sb.datatype); 288 | if (null == sb.parser) { 289 | throw new ParseException("Column data type not recognized (" + sb.datatype + ")", i); 290 | } 291 | } 292 | sbl.add(sb); 293 | } 294 | return sbl; 295 | } 296 | 297 | // Creates the DelimParser that will parse the line 298 | private void createDelimParser(String delimiter, int charsPerColumn, 299 | String nullString, String commentString, 300 | String skipList) throws NumberFormatException { 301 | delimParser = new DelimParser(delimiter, charsPerColumn, nullString, commentString); 302 | for (int i = 0; i < sbl.size(); i++) 303 | delimParser.add(sbl.get(i).parser); 304 | if (null != skipList) { 305 | for (String s : skipList.split(",")) { 306 | delimParser.addSkip(Integer.parseInt(s.trim())); 307 | } 308 | } 309 | jsonParser = new JSONParser(); 310 | } 311 | 312 | // Convenience method to return the INSERT statement for a PreparedStatement. 313 | public String generateInsert() { 314 | String insert = "INSERT INTO " + keyspace + "." + tablename + "(" + sbl.get(0).name; 315 | String qmarks = "?"; 316 | for (int i = 1; i < sbl.size(); i++) { 317 | insert = insert + ", " + sbl.get(i).name; 318 | qmarks = qmarks + ", ?"; 319 | } 320 | insert = insert + ") VALUES (" + qmarks + ")"; 321 | if (0 < ttl) 322 | insert = insert + " USING TTL " + ttl; 323 | return insert; 324 | } 325 | 326 | public String generateSelect() { 327 | String select = "SELECT " + sbl.get(0).name; 328 | for (int i = 1; i < sbl.size(); i++) { 329 | select = select + ", " + sbl.get(i).name; 330 | } 331 | select += " FROM " + keyspace + "." + tablename; 332 | return select; 333 | } 334 | 335 | public String getKeyspace() { 336 | return keyspace; 337 | } 338 | 339 | public String getTable() { 340 | return tablename; 341 | } 342 | 343 | // Pass through to parse the line - the DelimParser we created will be used. 344 | public List parse(String line) { 345 | return delimParser.parse(line); 346 | } 347 | 348 | public List parse(String[] row) { 349 | return delimParser.parse(row); 350 | } 351 | 352 | @SuppressWarnings("unchecked") 353 | public List parseJson(String line) { 354 | JSONObject jsonObject = null; 355 | try { 356 | jsonObject = (JSONObject)jsonParser.parse(line); 357 | } catch (org.json.simple.parser.ParseException e) { 358 | System.err.println(String.format("Invalid format in input %d: %s",line, e.getMessage())); 359 | return null; 360 | } 361 | String[] row = new String[columnNames.size()]; 362 | Set fields = (Set)jsonObject.keySet(); 363 | for (int i = 0; i < columnNames.size(); i++) { 364 | String s = columnNames.get(i); 365 | Object o = jsonObject.get(s); 366 | if (null != o) 367 | row[i] = o.toString(); 368 | else row[i] = null; 369 | fields.remove(s); 370 | } 371 | if (0 != fields.size()) { 372 | for (String f : fields) { 373 | System.err.println("Unknown JSON field " + f); 374 | } 375 | return null; 376 | } 377 | return parse(row); 378 | } 379 | 380 | public String format(Row row) throws IndexOutOfBoundsException, InvalidTypeException { 381 | return delimParser.format(row); 382 | } 383 | 384 | public String formatJson(Row row) throws IndexOutOfBoundsException, InvalidTypeException { 385 | String[] stringVals = delimParser.stringVals(row); 386 | Map pairs = new HashMap(); 387 | for (int i = 0; i < sbl.size(); i++) 388 | pairs.put(sbl.get(i).name, stringVals[i]); 389 | return JSONObject.toJSONString(pairs); 390 | } 391 | 392 | } 393 | 394 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/CqlDelimUnload.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader; 17 | 18 | import com.datastax.loader.parser.BooleanParser; 19 | 20 | import java.util.List; 21 | import java.util.ArrayList; 22 | import java.util.Map; 23 | import java.util.HashMap; 24 | import java.util.Set; 25 | import java.util.HashSet; 26 | import java.util.Deque; 27 | import java.util.ArrayDeque; 28 | import java.util.Locale; 29 | import java.util.concurrent.ExecutorService; 30 | import java.util.concurrent.Executors; 31 | import java.util.concurrent.Callable; 32 | import java.util.concurrent.Future; 33 | import java.util.concurrent.ExecutionException; 34 | import java.util.concurrent.TimeUnit; 35 | import java.math.BigInteger; 36 | import java.io.FileOutputStream; 37 | import java.io.BufferedOutputStream; 38 | import java.io.PrintStream; 39 | import java.io.File; 40 | import java.io.BufferedReader; 41 | import java.io.FileReader; 42 | import java.io.InputStreamReader; 43 | import java.io.InputStream; 44 | import java.io.FileInputStream; 45 | import java.io.IOException; 46 | import java.io.FileNotFoundException; 47 | import java.text.ParseException; 48 | import java.security.KeyStore; 49 | import java.security.SecureRandom; 50 | import java.security.KeyStoreException; 51 | import java.security.KeyManagementException; 52 | import java.security.NoSuchAlgorithmException; 53 | import java.security.UnrecoverableKeyException; 54 | import java.security.cert.CertificateException; 55 | import javax.net.ssl.KeyManagerFactory; 56 | import javax.net.ssl.SSLContext; 57 | import javax.net.ssl.TrustManagerFactory; 58 | 59 | import com.datastax.driver.core.Cluster; 60 | import com.datastax.driver.core.Session; 61 | import com.datastax.driver.core.ColumnMetadata; 62 | import com.datastax.driver.core.PreparedStatement; 63 | import com.datastax.driver.core.PoolingOptions; 64 | import com.datastax.driver.core.HostDistance; 65 | import com.datastax.driver.core.BoundStatement; 66 | import com.datastax.driver.core.ResultSet; 67 | import com.datastax.driver.core.ConsistencyLevel; 68 | import com.datastax.driver.core.Row; 69 | import com.datastax.driver.core.SSLOptions; 70 | import com.datastax.driver.core.RemoteEndpointAwareJdkSSLOptions; 71 | import com.datastax.driver.core.policies.TokenAwarePolicy; 72 | import com.datastax.driver.core.policies.DCAwareRoundRobinPolicy; 73 | import com.datastax.driver.core.exceptions.QueryValidationException; 74 | 75 | 76 | public class CqlDelimUnload { 77 | private String version = "0.0.27"; 78 | private String host = null; 79 | private int port = 9042; 80 | private String username = null; 81 | private String password = null; 82 | private String truststorePath = null; 83 | private String truststorePwd = null; 84 | private String keystorePath = null; 85 | private String keystorePwd = null; 86 | private ConsistencyLevel consistencyLevel = ConsistencyLevel.LOCAL_ONE; 87 | private Cluster cluster = null; 88 | private Session session = null; 89 | private String beginToken = "-9223372036854775808"; 90 | private String endToken = "9223372036854775807"; 91 | private String where = null; 92 | 93 | private String cqlSchema = null; 94 | private String filename = null; 95 | private String format = "delim"; 96 | 97 | private Locale locale = null; 98 | private BooleanParser.BoolStyle boolStyle = null; 99 | private String dateFormatString = null; 100 | private String localDateFormatString = "yyyy-MM-dd"; 101 | private String nullString = null; 102 | private String delimiter = null; 103 | 104 | private int numThreads = 5; 105 | private int fetchSize = 0; 106 | 107 | private String usage() { 108 | StringBuilder usage = new StringBuilder("version: ").append(version).append("\n"); 109 | usage.append("Usage: -f -host -schema [OPTIONS]\n"); 110 | usage.append("OPTIONS:\n"); 111 | usage.append(" -configFile File with configuration options\n"); 112 | usage.append(" -format [delim|jsonline|jsonarray] Format of data: delimited or JSON [delim]\n"); 113 | usage.append(" -delim Delimiter to use [,]\n"); 114 | usage.append(" -dateFormat Date format for TIMESTAMP [default for Locale.ENGLISH]\n"); 115 | usage.append(" -localDateFormat Date format for DATE [yyyy-MM-dd]\n"); 116 | usage.append(" -nullString String that signifies NULL [none]\n"); 117 | usage.append(" -port CQL Port Number [9042]\n"); 118 | usage.append(" -user Cassandra username [none]\n"); 119 | usage.append(" -pw Password for user [none]\n"); 120 | usage.append(" -ssl-truststore-path Path to SSL truststore [none]\n"); 121 | usage.append(" -ssl-truststore-pw Password for SSL truststore [none]\n"); 122 | usage.append(" -ssl-keystore-path Path to SSL keystore [none]\n"); 123 | usage.append(" -ssl-keystore-pw Password for SSL keystore [none]\n"); 124 | usage.append(" -consistencyLevel Consistency level [LOCAL_ONE]\n"); 125 | usage.append(" -decimalDelim Decimal delimiter [.] Other option is ','\n"); 126 | usage.append(" -boolStyle Style for booleans [TRUE_FALSE]\n"); 127 | usage.append(" -numThreads Number of concurrent threads to unload [5]\n"); 128 | usage.append(" -beginToken Begin token [none]\n"); 129 | usage.append(" -endToken End token [none]\n"); 130 | usage.append(" -where WHERE clause [none]\n"); 131 | usage.append(" -fetchSize Fetch size to use [0]\n"); 132 | return usage.toString(); 133 | } 134 | 135 | private boolean validateArgs() { 136 | if (!format.equalsIgnoreCase("delim") 137 | && !format.equalsIgnoreCase("jsonline") 138 | && !format.equalsIgnoreCase("jsonarray")) { 139 | System.err.println("Invalid format (" + format + ")"); 140 | return false; 141 | } 142 | if (numThreads < 1) { 143 | System.err.println("Number of threads must be positive"); 144 | return false; 145 | } 146 | if (fetchSize < 0) { 147 | System.err.println("Fetch size must be non-negative"); 148 | return false; 149 | } 150 | if ((null == username) && (null != password)) { 151 | System.err.println("If you supply the password, you must supply the username"); 152 | return false; 153 | } 154 | if ((null != username) && (null == password)) { 155 | System.err.println("If you supply the username, you must supply the password"); 156 | return false; 157 | } 158 | if (filename.equalsIgnoreCase("stdout")) { 159 | numThreads = 1; 160 | } 161 | if ((null == truststorePath) && (null != truststorePwd)) { 162 | System.err.println("If you supply the ssl-truststore-pw, you must supply the ssl-truststore-path"); 163 | return false; 164 | } 165 | if ((null != truststorePath) && (null == truststorePwd)) { 166 | System.err.println("If you supply the ssl-truststore-path, you must supply the ssl-truststore-pw"); 167 | return false; 168 | } 169 | if ((null == keystorePath) && (null != keystorePwd)) { 170 | System.err.println("If you supply the ssl-keystore-pw, you must supply the ssl-keystore-path"); 171 | return false; 172 | } 173 | if ((null != keystorePath) && (null == keystorePwd)) { 174 | System.err.println("If you supply the ssl-keystore-path, you must supply the ssl-keystore-pw"); 175 | return false; 176 | } 177 | File tfile = null; 178 | if (null != truststorePath) { 179 | tfile = new File(truststorePath); 180 | if (!tfile.isFile()) { 181 | System.err.println("truststore file must be a file"); 182 | return false; 183 | } 184 | } 185 | if (null != keystorePath) { 186 | tfile = new File(keystorePath); 187 | if (!tfile.isFile()) { 188 | System.err.println("keystore file must be a file"); 189 | return false; 190 | } 191 | } 192 | if ((null != beginToken) && (null == endToken)) { 193 | System.err.println("If you supply the beginToken then you need to specify the endToken"); 194 | return false; 195 | } 196 | if ((null == beginToken) && (null != endToken)) { 197 | System.err.println("If you supply the endToken then you need to specify the beginToken"); 198 | return false; 199 | } 200 | 201 | return true; 202 | } 203 | 204 | private boolean processConfigFile(String fname, Map amap) 205 | throws IOException, FileNotFoundException { 206 | File cFile = new File(fname); 207 | if (!cFile.isFile()) { 208 | System.err.println("Configuration File must be a file"); 209 | return false; 210 | } 211 | 212 | BufferedReader cReader = new BufferedReader(new FileReader(cFile)); 213 | String line; 214 | while ((line = cReader.readLine()) != null) { 215 | String[] fields = line.trim().split("\\s+"); 216 | if (2 != fields.length) { 217 | System.err.println("Bad line in config file: " + line); 218 | return false; 219 | } 220 | if (null == amap.get(fields[0])) { 221 | amap.put(fields[0], fields[1]); 222 | } 223 | } 224 | return true; 225 | } 226 | 227 | private boolean parseArgs(String[] args) 228 | throws IOException, FileNotFoundException { 229 | String tkey; 230 | if (args.length == 0) { 231 | System.err.println("No arguments specified"); 232 | return false; 233 | } 234 | if (0 != args.length % 2) 235 | return false; 236 | 237 | Map amap = new HashMap(); 238 | for (int i = 0; i < args.length; i+=2) { 239 | amap.put(args[i], args[i+1]); 240 | } 241 | 242 | if (null != (tkey = amap.remove("-configFile"))) 243 | if (!processConfigFile(tkey, amap)) 244 | return false; 245 | 246 | host = amap.remove("-host"); 247 | if (null == host) { // host is required 248 | System.err.println("Must provide a host"); 249 | return false; 250 | } 251 | 252 | filename = amap.remove("-f"); 253 | if (null == filename) { // filename is required 254 | System.err.println("Must provide an output filename stem"); 255 | return false; 256 | } 257 | 258 | cqlSchema = amap.remove("-schema"); 259 | if (null == cqlSchema) { // schema is required 260 | System.err.println("Must provide a schema"); 261 | return false; 262 | } 263 | 264 | if (null != (tkey = amap.remove("-port"))) port = Integer.parseInt(tkey); 265 | if (null != (tkey = amap.remove("-user"))) username = tkey; 266 | if (null != (tkey = amap.remove("-pw"))) password = tkey; 267 | if (null != (tkey = amap.remove("-ssl-truststore-path"))) truststorePath = tkey; 268 | if (null != (tkey = amap.remove("-ssl-truststore-pw"))) truststorePwd = tkey; 269 | if (null != (tkey = amap.remove("-ssl-keystore-path"))) keystorePath = tkey; 270 | if (null != (tkey = amap.remove("-ssl-keystore-pw"))) keystorePwd = tkey; 271 | if (null != (tkey = amap.remove("-consistencyLevel"))) consistencyLevel = ConsistencyLevel.valueOf(tkey); 272 | if (null != (tkey = amap.remove("-dateFormat"))) dateFormatString = tkey; 273 | if (null != (tkey = amap.remove("-localDateFormat"))) localDateFormatString = tkey; 274 | if (null != (tkey = amap.remove("-nullString"))) nullString = tkey; 275 | if (null != (tkey = amap.remove("-delim"))) delimiter = tkey; 276 | if (null != (tkey = amap.remove("-decimalDelim"))) { 277 | if (tkey.equals(",")) 278 | locale = Locale.FRANCE; 279 | } 280 | if (null != (tkey = amap.remove("-boolStyle"))) { 281 | boolStyle = BooleanParser.getBoolStyle(tkey); 282 | if (null == boolStyle) { 283 | System.err.println("Bad boolean style. Options are: " + BooleanParser.getOptions()); 284 | return false; 285 | } 286 | } 287 | if (null != (tkey = amap.remove("-numThreads"))) numThreads = Integer.parseInt(tkey); 288 | if (null != (tkey = amap.remove("-beginToken"))) beginToken = tkey; 289 | if (null != (tkey = amap.remove("-endToken"))) endToken = tkey; 290 | if (null != (tkey = amap.remove("-where"))) where = tkey; 291 | if (null != (tkey = amap.remove("-format"))) format = tkey; 292 | if (null != (tkey = amap.remove("-fetchSize"))) fetchSize = Integer.parseInt(tkey); 293 | 294 | if (!amap.isEmpty()) { 295 | for (String k : amap.keySet()) 296 | System.err.println("Unrecognized option: " + k); 297 | return false; 298 | } 299 | return validateArgs(); 300 | } 301 | 302 | private SSLOptions createSSLOptions() 303 | throws KeyStoreException, FileNotFoundException, IOException, NoSuchAlgorithmException, 304 | KeyManagementException, CertificateException, UnrecoverableKeyException { 305 | TrustManagerFactory tmf = null; 306 | KeyStore tks = KeyStore.getInstance("JKS"); 307 | tks.load((InputStream) new FileInputStream(new File(truststorePath)), 308 | truststorePwd.toCharArray()); 309 | tmf = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm()); 310 | tmf.init(tks); 311 | 312 | KeyManagerFactory kmf = null; 313 | if (null != keystorePath) { 314 | KeyStore kks = KeyStore.getInstance("JKS"); 315 | kks.load((InputStream) new FileInputStream(new File(keystorePath)), 316 | keystorePwd.toCharArray()); 317 | kmf = KeyManagerFactory.getInstance(KeyManagerFactory.getDefaultAlgorithm()); 318 | kmf.init(kks, keystorePwd.toCharArray()); 319 | } 320 | 321 | SSLContext sslContext = SSLContext.getInstance("TLS"); 322 | sslContext.init(kmf != null? kmf.getKeyManagers() : null, 323 | tmf != null ? tmf.getTrustManagers() : null, 324 | new SecureRandom()); 325 | 326 | return RemoteEndpointAwareJdkSSLOptions.builder().withSSLContext(sslContext).build(); 327 | } 328 | 329 | private void setup() 330 | throws IOException, KeyStoreException, NoSuchAlgorithmException, KeyManagementException, 331 | CertificateException, UnrecoverableKeyException { 332 | // Connect to Cassandra 333 | PoolingOptions pOpts = new PoolingOptions(); 334 | pOpts.setCoreConnectionsPerHost(HostDistance.LOCAL, 4); 335 | pOpts.setMaxConnectionsPerHost(HostDistance.LOCAL, 4); 336 | Cluster.Builder clusterBuilder = Cluster.builder() 337 | .addContactPoint(host) 338 | .withPort(port) 339 | .withPoolingOptions(pOpts) 340 | .withLoadBalancingPolicy(new TokenAwarePolicy( DCAwareRoundRobinPolicy.builder().build())); 341 | if (null != username) 342 | clusterBuilder = clusterBuilder.withCredentials(username, password); 343 | if (null != truststorePath) 344 | clusterBuilder = clusterBuilder.withSSL(createSSLOptions()); 345 | 346 | cluster = clusterBuilder.build(); 347 | if (null == cluster) { 348 | throw new IOException("Could not create cluster"); 349 | } 350 | session = cluster.connect(); 351 | } 352 | 353 | private void cleanup() { 354 | if (null != session) 355 | session.close(); 356 | if (null != cluster) 357 | cluster.close(); 358 | } 359 | 360 | public boolean run(String[] args) 361 | throws IOException, ParseException, InterruptedException, ExecutionException, 362 | KeyStoreException, NoSuchAlgorithmException, KeyManagementException, 363 | CertificateException, UnrecoverableKeyException { 364 | if (false == parseArgs(args)) { 365 | System.err.println("Bad arguments"); 366 | System.err.println(usage()); 367 | return false; 368 | } 369 | 370 | // Setup 371 | setup(); 372 | 373 | PrintStream pstream = null; 374 | if (1 == numThreads) { 375 | if (filename.equalsIgnoreCase("stdout")) { 376 | pstream = System.out; 377 | } 378 | else { 379 | pstream = new PrintStream(new BufferedOutputStream(new FileOutputStream(filename + ".0"))); 380 | } 381 | beginToken = null; 382 | endToken = null; 383 | } 384 | 385 | // Launch Threads 386 | ExecutorService executor; 387 | long total = 0; 388 | if (null != pstream) { 389 | // One file/stdin to process 390 | executor = Executors.newSingleThreadExecutor(); 391 | Callable worker = new ThreadExecute(cqlSchema, delimiter, 392 | nullString, 393 | dateFormatString, 394 | localDateFormatString, 395 | boolStyle, locale, 396 | pstream, 397 | beginToken, 398 | endToken, session, 399 | consistencyLevel, where, 400 | format, fetchSize); 401 | Future res = executor.submit(worker); 402 | total = res.get(); 403 | executor.shutdown(); 404 | } 405 | else { 406 | BigInteger begin = null; 407 | BigInteger end = null; 408 | BigInteger delta = null; 409 | List beginList = new ArrayList(); 410 | List endList = new ArrayList(); 411 | if (null != beginToken) { 412 | begin = new BigInteger(beginToken); 413 | end = new BigInteger(endToken); 414 | delta = end.subtract(begin).divide(new BigInteger(String.valueOf(numThreads))); 415 | for (int mype = 0; mype < numThreads; mype++) { 416 | if (mype < numThreads - 1) { 417 | beginList.add(begin.add(delta.multiply(new BigInteger(String.valueOf(mype)))).toString()); 418 | endList.add(begin.add(delta.multiply(new BigInteger(String.valueOf(mype+1)))).toString()); 419 | } 420 | else { 421 | beginList.add(begin.add(delta.multiply(new BigInteger(String.valueOf(numThreads-1)))).toString()); 422 | endList.add(end.toString()); 423 | } 424 | } 425 | } 426 | else { 427 | // What's the right thing here? 428 | // (1) Split into canonical token ranges - numThreads=numRanges 429 | // (2) Split into subranges of canonical token ranges 430 | // - if numThreads < numRanges, then reset numThreads=numRanges 431 | // - let K=CEIL(numThreads/numRanges) and M=MOD(numThreads/numRanges), for the first M token ranges split into K subranges, and for the remaining ones split into K-1 subranges 432 | // (?) Should there be an option for numThreads-per-range? 433 | // (?) Should there be an option for numThreads=numRanges 434 | } 435 | 436 | executor = Executors.newFixedThreadPool(numThreads); 437 | Set> results = new HashSet>(); 438 | for (int mype = 0; mype < numThreads; mype++) { 439 | String tBeginString = beginList.get(mype); 440 | String tEndString = endList.get(mype); 441 | pstream = new PrintStream(new BufferedOutputStream(new FileOutputStream(filename + "." + mype))); 442 | Callable worker = new ThreadExecute(cqlSchema, delimiter, 443 | nullString, 444 | dateFormatString, 445 | localDateFormatString, 446 | boolStyle, locale, 447 | pstream, 448 | tBeginString, 449 | tEndString, session, 450 | consistencyLevel, 451 | where, format, fetchSize); 452 | results.add(executor.submit(worker)); 453 | } 454 | executor.shutdown(); 455 | for (Future res : results) 456 | total += res.get(); 457 | } 458 | System.err.println("Total rows retrieved: " + total); 459 | 460 | // Cleanup 461 | cleanup(); 462 | 463 | return true; 464 | } 465 | 466 | public static void main(String[] args) 467 | throws IOException, ParseException, InterruptedException, ExecutionException, 468 | KeyStoreException, NoSuchAlgorithmException, UnrecoverableKeyException, 469 | CertificateException, KeyManagementException { 470 | CqlDelimUnload cdu = new CqlDelimUnload(); 471 | boolean success = cdu.run(args); 472 | if (success) { 473 | System.exit(0); 474 | } else { 475 | System.exit(-1); 476 | } 477 | } 478 | 479 | class ThreadExecute implements Callable { 480 | private Session session; 481 | private ConsistencyLevel consistencyLevel; 482 | private PreparedStatement statement; 483 | private CqlDelimParser cdp; 484 | 485 | private String cqlSchema; 486 | private String format = "delim"; 487 | private Locale locale = null; 488 | private BooleanParser.BoolStyle boolStyle = null; 489 | private String nullString = null; 490 | private String delimiter = null; 491 | 492 | private PrintStream writer = null; 493 | private String beginToken = null; 494 | private String endToken = null; 495 | private String partitionKey = null; 496 | private long numRead = 0; 497 | private String where = null; 498 | private String dateFormatString = null; 499 | private String localDateFormatString = null; 500 | private int fetchSize = 0; 501 | 502 | public ThreadExecute(String inCqlSchema, String inDelimiter, 503 | String inNullString, 504 | String inDateFormatString, 505 | String inLocalDateFormatString, 506 | BooleanParser.BoolStyle inBoolStyle, 507 | Locale inLocale, 508 | PrintStream inWriter, 509 | String inBeginToken, String inEndToken, 510 | Session inSession, ConsistencyLevel inConsistencyLevel, 511 | String inWhere, String inFormat, int inFetchSize) { 512 | super(); 513 | cqlSchema = inCqlSchema; 514 | delimiter = inDelimiter; 515 | nullString = inNullString; 516 | dateFormatString = inDateFormatString; 517 | localDateFormatString = inLocalDateFormatString; 518 | boolStyle = inBoolStyle; 519 | locale = inLocale; 520 | beginToken = inBeginToken; 521 | endToken = inEndToken; 522 | session = inSession; 523 | writer = inWriter; 524 | consistencyLevel = inConsistencyLevel; 525 | where = inWhere; 526 | format = inFormat; 527 | fetchSize = inFetchSize; 528 | } 529 | 530 | public Long call() throws IOException, ParseException { 531 | if (false == setup()) { 532 | return 0L; 533 | } 534 | numRead = execute(); 535 | cleanup(); 536 | return numRead; 537 | } 538 | 539 | private String getPartitionKey(CqlDelimParser cdp, Session session) { 540 | String keyspace = cdp.getKeyspace(); 541 | String table = cdp.getTable(); 542 | if (keyspace.startsWith("\"") && keyspace.endsWith("\"")) 543 | keyspace = keyspace.replaceAll("\"", ""); 544 | else 545 | keyspace = keyspace.toLowerCase(); 546 | if (table.startsWith("\"") && table.endsWith("\"")) 547 | table = table.replaceAll("\"", ""); 548 | else 549 | table = table.toLowerCase(); 550 | 551 | List lcm = session.getCluster().getMetadata() 552 | .getKeyspace(keyspace).getTable(table).getPartitionKey(); 553 | String partitionKey = lcm.get(0).getName(); 554 | for (int i = 1; i < lcm.size(); i++) { 555 | partitionKey = partitionKey + "," + lcm.get(i).getName(); 556 | } 557 | return partitionKey; 558 | } 559 | 560 | private boolean setup() throws IOException, ParseException { 561 | cdp = new CqlDelimParser(cqlSchema, delimiter, 4096, nullString, 562 | null, dateFormatString, localDateFormatString, 563 | boolStyle, locale, null, session, false, -1); 564 | String select = cdp.generateSelect(); 565 | String partitionKey = getPartitionKey(cdp, session); 566 | if (null != beginToken) { 567 | select = select + " WHERE Token(" + partitionKey + ") > " 568 | + beginToken + " AND Token(" + partitionKey + ") <= " 569 | + endToken; 570 | if (null != where) 571 | select = select + " AND " + where; 572 | } 573 | else { 574 | if (null != where) 575 | select = select + " WHERE " + where; 576 | } 577 | try { 578 | statement = session.prepare(select); 579 | } 580 | catch (QueryValidationException iqe) { 581 | System.err.println("Error creating statement: " + iqe.getMessage()); 582 | System.err.println("CQL Query: " + select); 583 | if (null != where) 584 | System.err.println("Check your syntax for -where: " + where); 585 | return false; 586 | } 587 | statement.setConsistencyLevel(consistencyLevel); 588 | return true; 589 | } 590 | 591 | private void cleanup() throws IOException { 592 | writer.flush(); 593 | writer.close(); 594 | } 595 | 596 | private long execute() throws IOException { 597 | BoundStatement bound = statement.bind(); 598 | bound.setFetchSize(fetchSize); 599 | ResultSet rs = session.execute(bound); 600 | numRead = 0; 601 | String s = null; 602 | String jsonArrayChar = "[\n"; 603 | for (Row row : rs) { 604 | if (format.equalsIgnoreCase("jsonarray")) { 605 | writer.print(jsonArrayChar); 606 | jsonArrayChar = ","; 607 | } 608 | if (format.equalsIgnoreCase("delim")) { 609 | s = cdp.format(row); 610 | } 611 | else if (format.equalsIgnoreCase("jsonline") 612 | || format.equalsIgnoreCase("jsonarray")) { 613 | s = cdp.formatJson(row); 614 | } 615 | writer.println(s); 616 | numRead++; 617 | } 618 | if (format.equalsIgnoreCase("jsonarray")) 619 | writer.println("]"); 620 | return numRead; 621 | } 622 | } 623 | } 624 | 625 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/EnhancedSession.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader; 17 | 18 | import java.util.Map; 19 | 20 | import com.datastax.driver.core.Session; 21 | import com.datastax.driver.core.Cluster; 22 | import com.datastax.driver.core.CloseFuture; 23 | import com.datastax.driver.core.ResultSet; 24 | import com.datastax.driver.core.ResultSetFuture; 25 | import com.datastax.driver.core.Statement; 26 | import com.datastax.driver.core.RegularStatement; 27 | import com.datastax.driver.core.PreparedStatement; 28 | 29 | import com.google.common.util.concurrent.ListenableFuture; 30 | 31 | public class EnhancedSession implements Session { 32 | private Session session; 33 | public EnhancedSession(Session inSession) { 34 | session = inSession; 35 | } 36 | 37 | public void close() { 38 | session.close(); 39 | } 40 | 41 | public CloseFuture closeAsync() { 42 | return session.closeAsync(); 43 | } 44 | 45 | public ResultSet execute(Statement statement) { 46 | return session.execute(statement); 47 | } 48 | 49 | public ResultSet execute(String query) { 50 | return session.execute(query); 51 | } 52 | 53 | public ResultSet execute(String query, Object... values) { 54 | return session.execute(query, values); 55 | } 56 | 57 | public ResultSet execute(String query, Map values) { 58 | return session.execute(query, values); 59 | } 60 | 61 | public ResultSetFuture executeAsync(Statement statement) { 62 | return session.executeAsync(statement); 63 | } 64 | 65 | public ResultSetFuture executeAsync(String query) { 66 | return session.executeAsync(query); 67 | } 68 | 69 | public ResultSetFuture executeAsync(String query, Object... values) { 70 | return session.executeAsync(query, values); 71 | } 72 | 73 | public ResultSetFuture executeAsync(String query, Map values) { 74 | return session.executeAsync(query, values); 75 | } 76 | 77 | public Cluster getCluster() { 78 | return session.getCluster(); 79 | } 80 | 81 | public String getLoggedKeyspace() { 82 | return session.getLoggedKeyspace(); 83 | } 84 | 85 | public Session.State getState() { 86 | return session.getState(); 87 | } 88 | 89 | public EnhancedSession init() { 90 | session.init(); 91 | return this; 92 | } 93 | 94 | public com.google.common.util.concurrent.ListenableFuture initAsync() { 95 | return session.initAsync(); 96 | } 97 | 98 | public boolean isClosed() { 99 | return session.isClosed(); 100 | } 101 | 102 | public PreparedStatement prepare(RegularStatement statement) { 103 | return session.prepare(statement); 104 | } 105 | 106 | public PreparedStatement prepare(String query) { 107 | return session.prepare(query); 108 | } 109 | 110 | public com.google.common.util.concurrent.ListenableFuture 111 | prepareAsync(RegularStatement statement) { 112 | return session.prepareAsync(statement); 113 | } 114 | 115 | public com.google.common.util.concurrent.ListenableFuture 116 | prepareAsync(String query) { 117 | return session.prepareAsync(query); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/Latency999RateLimiter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader; 17 | 18 | import com.datastax.driver.core.Cluster; 19 | 20 | public class Latency999RateLimiter extends AbstractDynamicRateLimiter { 21 | private Cluster cluster; 22 | 23 | public Latency999RateLimiter(double inRate, long inHowOften, 24 | double inMaxStat, double inMinStat, 25 | double inDownFraction, 26 | double inUpFraction, Cluster inCluster, 27 | boolean inInvertLogic) { 28 | this(inRate, Long.MAX_VALUE, inHowOften, inMaxStat, inMinStat, 29 | inDownFraction, inUpFraction, inCluster, inInvertLogic); 30 | } 31 | 32 | public Latency999RateLimiter(double inRate, long inUpdateRate, 33 | long inHowOften, double inMaxStat, 34 | double inMinStat, double inDownFraction, 35 | double inUpFraction, Cluster inCluster, 36 | boolean inInvertLogic) { 37 | super(inRate, inUpdateRate, inHowOften, inMaxStat, inMinStat, 38 | inDownFraction, inUpFraction, inInvertLogic); 39 | cluster = inCluster; 40 | } 41 | 42 | protected synchronized double getCurrStat() { 43 | return cluster.getMetrics().getRequestsTimer().getSnapshot().get999thPercentile(); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/LoaderRetryPolicy.java: -------------------------------------------------------------------------------- 1 | package com.datastax.loader; 2 | 3 | import java.io.File; 4 | import java.io.BufferedWriter; 5 | import java.io.FileWriter; 6 | 7 | import com.datastax.driver.core.policies.RetryPolicy; 8 | import com.datastax.driver.core.ConsistencyLevel; 9 | import com.datastax.driver.core.Statement; 10 | import com.datastax.driver.core.Cluster; 11 | import com.datastax.driver.core.WriteType; 12 | import com.datastax.driver.core.exceptions.DriverException; 13 | 14 | class LoaderRetryPolicy implements RetryPolicy { 15 | private int numRetries; 16 | 17 | public LoaderRetryPolicy(int inNumRetries) { 18 | numRetries = inNumRetries; 19 | } 20 | 21 | // Taken from DefaultRetryPolicy 22 | public RetryDecision onReadTimeout(Statement statement, ConsistencyLevel cl, 23 | int requiredResponses, 24 | int receivedResponses, 25 | boolean dataRetrieved, int nbRetry) { 26 | if (nbRetry != 0) 27 | return RetryDecision.rethrow(); 28 | 29 | return receivedResponses >= requiredResponses && !dataRetrieved 30 | ? RetryDecision.retry(cl) 31 | : RetryDecision.rethrow(); 32 | } 33 | 34 | // Taken from DefaultRetryPolicy 35 | public RetryDecision onUnavailable(Statement statement, ConsistencyLevel cl, 36 | int requiredReplica, int aliveReplica, 37 | int nbRetry) { 38 | return RetryDecision.rethrow(); 39 | } 40 | 41 | public RetryDecision onWriteTimeout(Statement statement, 42 | ConsistencyLevel cl, 43 | WriteType writeType, int requiredAcks, 44 | int receivedAcks, int nbRetry) { 45 | if (nbRetry >= numRetries) 46 | return RetryDecision.rethrow(); 47 | 48 | return RetryDecision.retry(cl); 49 | } 50 | 51 | public RetryPolicy.RetryDecision onRequestError(Statement statement, 52 | ConsistencyLevel cl, 53 | DriverException e, 54 | int nbRetry) { 55 | return RetryDecision.tryNextHost(cl); 56 | } 57 | 58 | public void close() { 59 | } 60 | 61 | public void init(Cluster cluster) { 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/RateLimitedSession.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader; 17 | 18 | import com.datastax.driver.core.Session; 19 | import com.datastax.driver.core.ResultSet; 20 | import com.datastax.driver.core.ResultSetFuture; 21 | import com.datastax.driver.core.Statement; 22 | import com.datastax.driver.core.BatchStatement; 23 | 24 | public class RateLimitedSession extends EnhancedSession { 25 | RateLimiter rateLimiter; 26 | private Class batchClass; 27 | public RateLimitedSession(Session inSession, RateLimiter inRateLimiter) { 28 | super(inSession); 29 | rateLimiter = inRateLimiter; 30 | BatchStatement batch = new BatchStatement(); 31 | batchClass = batch.getClass(); 32 | } 33 | 34 | public long numAcquires() { 35 | return rateLimiter.numAcquires(); 36 | } 37 | 38 | public double getRate() { 39 | return rateLimiter.getRate(); 40 | } 41 | 42 | public ResultSet execute(Statement statement) { 43 | rateLimiter.acquire(); 44 | return super.execute(statement); 45 | } 46 | 47 | public ResultSet execute(String query) { 48 | rateLimiter.acquire(); 49 | return super.execute(query); 50 | } 51 | 52 | public ResultSet execute(String query, Object... values) { 53 | rateLimiter.acquire(); 54 | return super.execute(query, values); 55 | } 56 | 57 | public ResultSetFuture executeAsync(Statement statement) { 58 | if (statement.getClass() == batchClass) 59 | rateLimiter.acquire(((BatchStatement)statement).size()); 60 | else 61 | rateLimiter.acquire(); 62 | return super.executeAsync(statement); 63 | } 64 | 65 | public ResultSetFuture executeAsync(String query) { 66 | rateLimiter.acquire(); 67 | return super.executeAsync(query); 68 | } 69 | 70 | public ResultSetFuture executeAsync(String query, Object... values) { 71 | rateLimiter.acquire(); 72 | return super.executeAsync(query, values); 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/RateLimiter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader; 17 | 18 | import java.io.PrintStream; 19 | import java.util.concurrent.TimeUnit; 20 | import java.util.concurrent.atomic.AtomicLong; 21 | import com.codahale.metrics.Timer; 22 | import com.codahale.metrics.Snapshot; 23 | 24 | public class RateLimiter { 25 | private com.google.common.util.concurrent.RateLimiter rateLimiter; 26 | private AtomicLong numAcquires; 27 | private static long updateRate = 100000; 28 | private Timer timer; 29 | private PrintStream stream; 30 | private long lastVal; 31 | private long firstTime; 32 | private long lastTime; 33 | 34 | public RateLimiter(double inRate) { 35 | this(inRate, Long.MAX_VALUE); 36 | } 37 | 38 | public RateLimiter(double inRate, long inUpdateRate) { 39 | this(inRate, inUpdateRate, null, null); 40 | } 41 | 42 | public RateLimiter(double inRate, long inUpdateRate, 43 | Timer inTimer, PrintStream inStream) { 44 | rateLimiter = com.google.common.util.concurrent.RateLimiter.create(inRate); 45 | updateRate = inUpdateRate; 46 | timer = inTimer; 47 | stream = inStream; 48 | if ((null != stream) && (null != timer)) { 49 | printHeader(); 50 | } 51 | numAcquires = new AtomicLong(0); 52 | lastTime = System.currentTimeMillis(); 53 | firstTime = lastTime; 54 | lastVal = 0; 55 | } 56 | 57 | protected void printHeader() { 58 | stream.println("Count,Min,Max,Mean,StdDev,50th,75th,95th,98th,99th,999th,MeanRate,1MinuteRate,5MinuteRate,15MinuteRate"); 59 | } 60 | 61 | protected void printStats() { 62 | Snapshot snapshot = timer.getSnapshot(); 63 | stream.println(String.format("%d,%d,%d,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f", 64 | timer.getCount(), 65 | snapshot.getMin(), 66 | snapshot.getMax(), 67 | snapshot.getMean(), 68 | snapshot.getStdDev(), 69 | snapshot.getMedian(), 70 | snapshot.get75thPercentile(), 71 | snapshot.get95thPercentile(), 72 | snapshot.get98thPercentile(), 73 | snapshot.get99thPercentile(), 74 | snapshot.get999thPercentile(), 75 | timer.getMeanRate(), 76 | timer.getOneMinuteRate(), 77 | timer.getFiveMinuteRate(), 78 | timer.getFifteenMinuteRate()) 79 | ); 80 | } 81 | 82 | public void report(Long currentVal, Long currentTime) { 83 | if ((null != stream) && (null != timer)) { 84 | printStats(); 85 | return; 86 | } 87 | if (null == currentTime) 88 | currentTime = System.currentTimeMillis(); 89 | long etime = (currentTime - firstTime)/1000; 90 | double rateFromBeginning; 91 | if (null == currentVal) { 92 | currentVal = numAcquires.get() - 1; 93 | rateFromBeginning = (etime > 0) ? (currentVal + 0.0) / etime : 0; 94 | System.err.println("Lines Processed: \t" + currentVal 95 | + " Rate: \t" + rateFromBeginning); 96 | } 97 | else { 98 | long ltime = (currentTime - lastTime)/1000; 99 | double rateFromLast = (ltime > 0) ? (currentVal - lastVal + 0.0) / ltime : 0; 100 | rateFromBeginning = (etime > 0) ? (currentVal + 0.0) / etime : 0; 101 | System.err.println("Lines Processed: \t" + currentVal 102 | + " Rate: \t" + rateFromBeginning 103 | + " (" + rateFromLast 104 | + ")" 105 | ); 106 | } 107 | } 108 | 109 | protected synchronized void incrementAndReport(int permits) { 110 | long currentVal = numAcquires.addAndGet(permits); 111 | long currentTime = System.currentTimeMillis(); 112 | if (permits > currentVal % updateRate) { 113 | report(currentVal, currentTime); 114 | lastTime = currentTime; 115 | lastVal = currentVal; 116 | } 117 | } 118 | 119 | public void acquire() { 120 | rateLimiter.acquire(); 121 | incrementAndReport(1); 122 | } 123 | 124 | public void acquire(int permits) { 125 | rateLimiter.acquire(permits); 126 | incrementAndReport(permits); 127 | } 128 | 129 | public double getRate() { 130 | return rateLimiter.getRate(); 131 | } 132 | 133 | public synchronized void setRate(double permitsPerSecond) { 134 | rateLimiter.setRate(permitsPerSecond); 135 | } 136 | 137 | public String toString() { 138 | return rateLimiter.toString(); 139 | } 140 | 141 | public boolean tryAcquire() { 142 | if (rateLimiter.tryAcquire()) { 143 | incrementAndReport(1); 144 | return true; 145 | } 146 | return false; 147 | } 148 | 149 | public boolean tryAcquire(int permits) { 150 | if (rateLimiter.tryAcquire(permits)) { 151 | incrementAndReport(permits); 152 | return true; 153 | } 154 | return false; 155 | } 156 | 157 | public boolean tryAcquire(int permits, long timeout, TimeUnit unit) { 158 | if (rateLimiter.tryAcquire(permits, timeout, unit)) { 159 | incrementAndReport(permits); 160 | return true; 161 | } 162 | return false; 163 | } 164 | 165 | public boolean tryAcquire(long timeout, TimeUnit unit) { 166 | if (rateLimiter.tryAcquire(timeout, unit)) { 167 | incrementAndReport(1); 168 | return true; 169 | } 170 | return false; 171 | } 172 | 173 | public long numAcquires() { 174 | return numAcquires.get(); 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/futures/AbstractFutureManager.java: -------------------------------------------------------------------------------- 1 | package com.datastax.loader.futures; 2 | 3 | import java.util.concurrent.TimeUnit; 4 | import com.datastax.driver.core.ResultSetFuture; 5 | 6 | public abstract class AbstractFutureManager implements FutureManager { 7 | protected int size; 8 | protected long queryTimeout; 9 | protected long maxInsertErrors; 10 | protected TimeUnit unit = TimeUnit.SECONDS; 11 | 12 | public AbstractFutureManager(int inSize, long inQueryTimeout, long inMaxInsertErrors) { 13 | size = inSize; 14 | queryTimeout = inQueryTimeout; 15 | maxInsertErrors = inMaxInsertErrors; 16 | } 17 | 18 | public abstract boolean add(ResultSetFuture future, String line); 19 | 20 | public abstract boolean cleanup(); 21 | 22 | public abstract long getNumInserted(); 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/futures/ActionFutureList.java: -------------------------------------------------------------------------------- 1 | package com.datastax.loader.futures; 2 | 3 | import java.util.List; 4 | import java.util.ArrayList; 5 | 6 | import com.datastax.driver.core.ResultSetFuture; 7 | import com.datastax.driver.core.ResultSet; 8 | 9 | public class ActionFutureList extends AbstractFutureManager { 10 | protected List futures; 11 | protected List strings; 12 | protected long insertErrors; 13 | protected long numInserted; 14 | protected FutureAction futureAction = null; 15 | 16 | public ActionFutureList(int inSize, long inQueryTimeout, long inMaxInsertErrors, FutureAction inFutureAction) { 17 | super(inSize, inQueryTimeout, inMaxInsertErrors); 18 | futureAction = inFutureAction; 19 | futures = new ArrayList(size); 20 | strings = new ArrayList(size); 21 | insertErrors = 0; 22 | numInserted = 0; 23 | } 24 | 25 | public boolean add(ResultSetFuture future, String line) { 26 | if (futures.size() >= size) { 27 | if (!purgeFutures()) 28 | return false; 29 | } 30 | futures.add(future); 31 | strings.add(line); 32 | numInserted++; 33 | return true; 34 | } 35 | 36 | protected boolean purgeFutures() { 37 | if (0 == futures.size()) 38 | return true; 39 | for (int i = 0; i < futures.size(); i++) { 40 | ResultSetFuture future = futures.get(i); 41 | String line = strings.get(i); 42 | try { 43 | //long beginTime = System.currentTimeMillis(); 44 | ResultSet rs = future.getUninterruptibly(queryTimeout, unit); 45 | //long duration = System.currentTimeMillis() - beginTime; 46 | //if (2000 < duration) { 47 | //System.err.println("Query took " + duration + " ms"); 48 | //} 49 | futureAction.onSuccess(rs, line); 50 | } 51 | catch (Exception e) { 52 | insertErrors++; 53 | futureAction.onFailure(e, line); 54 | if (maxInsertErrors <= insertErrors) { 55 | futureAction.onTooManyFailures(); 56 | return false; 57 | } 58 | } 59 | } 60 | futures.clear(); 61 | return true; 62 | } 63 | 64 | public boolean cleanup() { 65 | return purgeFutures(); 66 | } 67 | 68 | public long getNumInserted() { 69 | return numInserted; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/futures/ActionFutureSet.java: -------------------------------------------------------------------------------- 1 | package com.datastax.loader.futures; 2 | 3 | import java.util.List; 4 | import java.util.ArrayList; 5 | import java.util.concurrent.TimeUnit; 6 | import java.util.concurrent.Semaphore; 7 | import java.util.concurrent.atomic.AtomicLong; 8 | 9 | import com.datastax.driver.core.ResultSetFuture; 10 | import com.datastax.driver.core.ResultSet; 11 | 12 | import com.google.common.util.concurrent.Futures; 13 | import com.google.common.util.concurrent.FutureCallback; 14 | 15 | public class ActionFutureSet extends AbstractFutureManager { 16 | protected FutureAction futureAction = null; 17 | protected Semaphore available; 18 | protected AtomicLong insertErrors; 19 | protected AtomicLong numInserted; 20 | 21 | public ActionFutureSet(int inSize, long inQueryTimeout, 22 | long inMaxInsertErrors, 23 | FutureAction inFutureAction) { 24 | super(inSize, inQueryTimeout, inMaxInsertErrors); 25 | futureAction = inFutureAction; 26 | available = new Semaphore(size, true); 27 | insertErrors = new AtomicLong(0); 28 | numInserted = new AtomicLong(0); 29 | } 30 | 31 | public boolean add(ResultSetFuture future, final String line) { 32 | if (maxInsertErrors <= insertErrors.get()) 33 | return false; 34 | try { 35 | available.acquire(); 36 | } 37 | catch (InterruptedException e) { 38 | return false; 39 | } 40 | Futures.addCallback(future, new FutureCallback() { 41 | @Override 42 | public void onSuccess(ResultSet rs) { 43 | available.release(); 44 | numInserted.incrementAndGet(); 45 | futureAction.onSuccess(rs, line); 46 | } 47 | @Override 48 | public void onFailure(Throwable t) { 49 | available.release(); 50 | long numErrors = insertErrors.incrementAndGet(); 51 | futureAction.onFailure(t, line); 52 | if (maxInsertErrors <= numErrors) { 53 | futureAction.onTooManyFailures(); 54 | } 55 | } 56 | }); 57 | return true; 58 | } 59 | 60 | public boolean cleanup() { 61 | try { 62 | available.acquire(this.size); 63 | } catch (InterruptedException e) { 64 | return false; 65 | } 66 | return true; 67 | } 68 | 69 | public long getNumInserted() { 70 | return numInserted.get(); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/futures/FutureAction.java: -------------------------------------------------------------------------------- 1 | package com.datastax.loader.futures; 2 | 3 | import com.datastax.driver.core.ResultSet; 4 | 5 | public interface FutureAction { 6 | public void onSuccess(ResultSet rs, String line); 7 | public void onFailure(Throwable t, String line); 8 | public void onTooManyFailures(); 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/futures/FutureManager.java: -------------------------------------------------------------------------------- 1 | package com.datastax.loader.futures; 2 | 3 | import com.datastax.driver.core.ResultSetFuture; 4 | 5 | public interface FutureManager { 6 | public boolean add(ResultSetFuture future, String line); 7 | 8 | public boolean cleanup(); 9 | 10 | public long getNumInserted(); 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/futures/JsonPrintingFutureAction.java: -------------------------------------------------------------------------------- 1 | package com.datastax.loader.futures; 2 | 3 | import java.io.PrintStream; 4 | import java.util.concurrent.atomic.AtomicLong; 5 | import com.datastax.driver.core.ResultSet; 6 | 7 | public class JsonPrintingFutureAction extends PrintingFutureAction { 8 | private boolean firstBad = true; 9 | private String badDelim = "[\n"; 10 | public JsonPrintingFutureAction(PrintStream inLogPrinter, 11 | PrintStream inBadInsertPrinter) { 12 | super(inLogPrinter, inBadInsertPrinter); 13 | } 14 | 15 | public void onFailure(Throwable t, String line) { 16 | if (logPrinter != null) { 17 | logPrinter.println("Error inserting: " + t.getMessage()); 18 | t.printStackTrace(logPrinter); 19 | } 20 | if (badInsertPrinter != null) { 21 | badInsertPrinter.println(badDelim + line); 22 | if (firstBad) { 23 | firstBad = false; 24 | badDelim = ",\n"; 25 | } 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/futures/JsonPrintingFutureList.java: -------------------------------------------------------------------------------- 1 | package com.datastax.loader.futures; 2 | 3 | import java.io.PrintStream; 4 | 5 | import com.datastax.driver.core.ResultSetFuture; 6 | 7 | public class JsonPrintingFutureList extends ActionFutureList { 8 | public JsonPrintingFutureList() { 9 | this(500, 2, 10); 10 | } 11 | 12 | public JsonPrintingFutureList(int inSize, long inQueryTimeout, 13 | long inMaxInsertErrors) { 14 | this(inSize, inQueryTimeout, inMaxInsertErrors, System.err, System.err); 15 | } 16 | 17 | public JsonPrintingFutureList(int inSize, long inQueryTimeout, 18 | long inMaxInsertErrors, 19 | PrintStream inLogPrinter, 20 | PrintStream inBadInsertPrinter) { 21 | super(inSize, inQueryTimeout, inMaxInsertErrors, 22 | new JsonPrintingFutureAction(inLogPrinter, inBadInsertPrinter)); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/futures/JsonPrintingFutureSet.java: -------------------------------------------------------------------------------- 1 | package com.datastax.loader.futures; 2 | 3 | import java.io.PrintStream; 4 | 5 | public class JsonPrintingFutureSet extends ActionFutureSet { 6 | public JsonPrintingFutureSet() { 7 | this(500, 2, 10); 8 | } 9 | 10 | public JsonPrintingFutureSet(int inSize, long inQueryTimeout, 11 | long inMaxInsertErrors) { 12 | this(inSize, inQueryTimeout, inMaxInsertErrors, System.err, System.err); 13 | } 14 | 15 | public JsonPrintingFutureSet(int inSize, long inQueryTimeout, 16 | long inMaxInsertErrors, 17 | PrintStream inLogPrinter, 18 | PrintStream inBadInsertPrinter) { 19 | super(inSize, inQueryTimeout, inMaxInsertErrors, 20 | new JsonPrintingFutureAction(inLogPrinter, inBadInsertPrinter)); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/futures/NullFutureAction.java: -------------------------------------------------------------------------------- 1 | package com.datastax.loader.futures; 2 | 3 | import com.datastax.driver.core.ResultSet; 4 | 5 | public class NullFutureAction implements FutureAction { 6 | public void onSuccess(ResultSet rs, String line) { } 7 | public void onFailure(Throwable t, String line) { } 8 | public void onTooManyFailures() { } 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/futures/PrintingFutureAction.java: -------------------------------------------------------------------------------- 1 | package com.datastax.loader.futures; 2 | 3 | import java.io.PrintStream; 4 | import java.util.concurrent.atomic.AtomicLong; 5 | import com.datastax.driver.core.ResultSet; 6 | 7 | public class PrintingFutureAction implements FutureAction { 8 | protected PrintStream logPrinter = null; 9 | protected PrintStream badInsertPrinter = null; 10 | protected AtomicLong numInserted; 11 | protected final long period = 100000; 12 | 13 | public PrintingFutureAction(PrintStream inLogPrinter, 14 | PrintStream inBadInsertPrinter) { 15 | logPrinter = inLogPrinter; 16 | badInsertPrinter = inBadInsertPrinter; 17 | numInserted = new AtomicLong(0); 18 | } 19 | 20 | public void onSuccess(ResultSet rs, String line) { 21 | if (logPrinter != null) { 22 | long cur = numInserted.incrementAndGet(); 23 | if (0 == (cur % period)) { 24 | logPrinter.println("Progress: " + cur); 25 | } 26 | } 27 | } 28 | 29 | public void onFailure(Throwable t, String line) { 30 | if (logPrinter != null) { 31 | logPrinter.println("Error inserting: " + t.getMessage()); 32 | t.printStackTrace(logPrinter); 33 | } 34 | if (badInsertPrinter != null) { 35 | badInsertPrinter.println(line); 36 | } 37 | } 38 | 39 | public void onTooManyFailures() { 40 | if (logPrinter != null) { 41 | logPrinter.println("Too many INSERT errors ... Stopping"); 42 | } 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/futures/PrintingFutureList.java: -------------------------------------------------------------------------------- 1 | package com.datastax.loader.futures; 2 | 3 | import java.io.PrintStream; 4 | 5 | import com.datastax.driver.core.ResultSetFuture; 6 | 7 | public class PrintingFutureList extends ActionFutureList { 8 | public PrintingFutureList() { 9 | this(500, 2, 10); 10 | } 11 | 12 | public PrintingFutureList(int inSize, long inQueryTimeout, long inMaxInsertErrors) { 13 | this(inSize, inQueryTimeout, inMaxInsertErrors, System.err, System.err); 14 | } 15 | 16 | public PrintingFutureList(int inSize, long inQueryTimeout, long inMaxInsertErrors, 17 | PrintStream inLogPrinter, PrintStream inBadInsertPrinter) { 18 | super(inSize, inQueryTimeout, inMaxInsertErrors, new PrintingFutureAction(inLogPrinter, inBadInsertPrinter)); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/futures/PrintingFutureSet.java: -------------------------------------------------------------------------------- 1 | package com.datastax.loader.futures; 2 | 3 | import java.io.PrintStream; 4 | 5 | public class PrintingFutureSet extends ActionFutureSet { 6 | public PrintingFutureSet() { 7 | this(500, 2, 10); 8 | } 9 | 10 | public PrintingFutureSet(int inSize, long inQueryTimeout, 11 | long inMaxInsertErrors) { 12 | this(inSize, inQueryTimeout, inMaxInsertErrors, System.err, System.err); 13 | } 14 | 15 | public PrintingFutureSet(int inSize, long inQueryTimeout, 16 | long inMaxInsertErrors, 17 | PrintStream inLogPrinter, 18 | PrintStream inBadInsertPrinter) { 19 | super(inSize, inQueryTimeout, inMaxInsertErrors, 20 | new PrintingFutureAction(inLogPrinter, inBadInsertPrinter)); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/AbstractParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.io.StringReader; 19 | import java.io.IOException; 20 | import java.text.ParseException; 21 | import com.datastax.driver.core.Row; 22 | import com.datastax.driver.core.exceptions.InvalidTypeException; 23 | 24 | import org.apache.commons.lang3.StringEscapeUtils; 25 | 26 | public abstract class AbstractParser implements Parser { 27 | public abstract Object parseIt(String toparse) throws ParseException; 28 | public String format(Row row, int index) throws IndexOutOfBoundsException, InvalidTypeException { 29 | if (row.isNull(index)) 30 | return null; 31 | return format(row.getObject(index)); 32 | } 33 | public abstract String format(Object o); 34 | 35 | public Object parse(String toparse) throws ParseException { 36 | String toparseit = unquote(toparse); 37 | return parseIt(toparseit); 38 | } 39 | 40 | public Object parse(IndexedLine il, String nullString, Character delim, 41 | Character escape, Character quote, boolean last) 42 | throws IOException, ParseException { 43 | //if (last) 44 | // return parse(prepareToParse(il.remaining(), nullString, quote)); 45 | //return parse(getQuotedOrUnquoted(il, nullString, delim, escape, quote)); 46 | return parse(getQuotedOrUnquoted(il, nullString, delim, escape, quote)); 47 | } 48 | 49 | public String prepareToParse(String retstring, String nullString, Character quote) { 50 | if (retstring.startsWith(quote.toString()) 51 | && retstring.endsWith(quote.toString())) 52 | //if ((quote == retstring.charAt(0)) 53 | //&& (quote == retstring.charAt(retstring.length() - 1))) 54 | retstring = retstring.substring(1, retstring.length() - 1); 55 | else 56 | retstring = retstring.trim(); 57 | if (null != nullString) 58 | if (nullString.equalsIgnoreCase(retstring)) 59 | return null; 60 | return retstring; 61 | } 62 | 63 | public String getQuotedOrUnquoted(IndexedLine il, String nullString, 64 | Character delim, Character escape, 65 | Character quote) 66 | throws IOException, ParseException { 67 | String retstring; 68 | if (null == delim) { 69 | return null; 70 | } 71 | if (!il.hasNext()) 72 | return ""; 73 | char c = il.getNext(); 74 | if (c == delim) { 75 | retstring = ""; 76 | } 77 | else { 78 | StringBuilder sb = new StringBuilder(10240).append(c); 79 | String s = extractUntil(il, delim, escape, quote, (c == quote)); 80 | if (null == s) { 81 | return null; 82 | } 83 | retstring = sb.append(s).toString(); 84 | } 85 | return prepareToParse(retstring, nullString, quote); 86 | } 87 | 88 | public String extractUntil(IndexedLine il, Character delim, 89 | Character escape, Character quote, 90 | boolean inquote) 91 | throws IOException, ParseException { 92 | if (null == delim) { 93 | return null; 94 | } 95 | StringBuilder sb = new StringBuilder(10240); 96 | char c; 97 | while (il.hasNext()) { 98 | c = il.getNext(); 99 | if ((c == delim) && (!inquote)) { 100 | break; 101 | } 102 | sb.append(c); 103 | if (null != quote) { 104 | if (c == quote) { 105 | inquote = !inquote; 106 | } 107 | } 108 | if (null != escape) { 109 | if (c == escape) { 110 | c = il.getNext(); 111 | sb.append(Character.toChars(c)); 112 | } 113 | } 114 | } 115 | return sb.toString(); 116 | } 117 | 118 | public static String quote(String instr) { 119 | if (null == instr) 120 | return null; 121 | return "\"" + escape(instr) + "\""; 122 | } 123 | 124 | public static String unquote(String instr) { 125 | if (null == instr) 126 | return null; 127 | if ((instr.startsWith("\"")) && (instr.endsWith("\""))) 128 | return unescape(instr.substring(1, instr.length() - 1)); 129 | return instr; 130 | } 131 | 132 | public static String unstripQuote(String instr) { 133 | if (null == instr) 134 | return null; 135 | return "\"" + instr + "\""; 136 | } 137 | 138 | public static String stripQuote(String instr) { 139 | if (null == instr) 140 | return null; 141 | if ((instr.startsWith("\"")) && (instr.endsWith("\""))) 142 | return instr.substring(1, instr.length() - 1); 143 | return instr; 144 | } 145 | 146 | public static String escape(String instr) { 147 | if (null == instr) 148 | return null; 149 | return StringEscapeUtils.escapeJava(instr); 150 | } 151 | 152 | public static String unescape(String instr) { 153 | if (null == instr) 154 | return null; 155 | return StringEscapeUtils.unescapeJava(instr); 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/BigDecimalParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.math.BigDecimal; 19 | import com.datastax.driver.core.Row; 20 | import com.datastax.driver.core.exceptions.InvalidTypeException; 21 | 22 | // BigDecimal parser 23 | public class BigDecimalParser extends AbstractParser { 24 | public BigDecimal parseIt(String toparse) throws NumberFormatException { 25 | if (null == toparse) 26 | return null; 27 | return new BigDecimal(toparse); 28 | } 29 | 30 | public String format(Object o) { 31 | BigDecimal v = (BigDecimal)o; 32 | return v.toString(); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/BigIntegerParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.math.BigInteger; 19 | import com.datastax.driver.core.Row; 20 | import com.datastax.driver.core.exceptions.InvalidTypeException; 21 | 22 | // BigInteger parser 23 | public class BigIntegerParser extends AbstractParser { 24 | public BigInteger parseIt(String toparse) throws NumberFormatException { 25 | if (null == toparse) 26 | return null; 27 | return new BigInteger(toparse); 28 | } 29 | 30 | public String format(Object o) { 31 | BigInteger v = (BigInteger)o; 32 | return v.toString(); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/BooleanParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.text.ParseException; 19 | import com.datastax.driver.core.Row; 20 | import com.datastax.driver.core.exceptions.InvalidTypeException; 21 | 22 | // Boolean parser - handles any way that Booleans can be expressed in Java 23 | public class BooleanParser extends AbstractParser { 24 | public static enum BoolStyle { 25 | BoolStyle_TrueFalse("TRUE_FALSE", "TRUE", "FALSE"), 26 | BoolStyle_10("1_0", "1", "0"), 27 | BoolStyle_TF("T_F", "T", "F"), 28 | BoolStyle_YN("Y_N", "Y", "N"), 29 | BoolStyle_YesNo("YES_NO", "YES", "NO"); 30 | 31 | private String styleStr; 32 | private String trueStr; 33 | private String falseStr; 34 | 35 | BoolStyle(String inStyleStr, String inTrueStr, String inFalseStr) { 36 | styleStr = inStyleStr; 37 | trueStr = inTrueStr; 38 | falseStr = inFalseStr; 39 | } 40 | 41 | public String getStyle() { 42 | return styleStr; 43 | } 44 | 45 | public String getTrueStr() { 46 | return trueStr; 47 | } 48 | 49 | public String getFalseStr() { 50 | return falseStr; 51 | } 52 | } 53 | 54 | private String boolTrue; 55 | private String boolFalse; 56 | private static String BOOLSTYLE_1_0 = "1_0"; 57 | private static String BOOLSTYLE_T_F = "T_F"; 58 | private static String BOOLSTYLE_Y_N = "Y_N"; 59 | private static String BOOLSTYLE_YES_NO = "YES_NO"; 60 | private static String BOOLSTYLE_TRUE_FALSE = "TRUE_FALSE"; 61 | 62 | public BooleanParser() { 63 | this(BoolStyle.BoolStyle_TrueFalse); 64 | } 65 | 66 | public BooleanParser(BoolStyle inBoolStyle) { 67 | if (null == inBoolStyle) 68 | inBoolStyle = BoolStyle.BoolStyle_TrueFalse; 69 | boolTrue = inBoolStyle.getTrueStr(); 70 | boolFalse = inBoolStyle.getFalseStr(); 71 | } 72 | 73 | public BooleanParser(String inBoolTrue, String inBoolFalse) { 74 | boolTrue = inBoolTrue; 75 | boolFalse = inBoolFalse; 76 | } 77 | 78 | public static BoolStyle getBoolStyle(String instr) { 79 | for (BoolStyle bs : BoolStyle.values()) { 80 | if (bs.getStyle().equalsIgnoreCase(instr)) { 81 | return bs; 82 | } 83 | } 84 | return null; 85 | } 86 | 87 | public static String getOptions() { 88 | String ret = "'" + BOOLSTYLE_1_0 + "'"; 89 | ret = ret + ", '" + BOOLSTYLE_T_F + "'"; 90 | ret = ret + ", '" + BOOLSTYLE_Y_N + "'"; 91 | ret = ret + ", '" + BOOLSTYLE_TRUE_FALSE + "'"; 92 | ret = ret + ", '" + BOOLSTYLE_YES_NO + "'"; 93 | return ret; 94 | }; 95 | 96 | public Boolean parseIt(String toparse) throws ParseException { 97 | if (null == toparse) 98 | return null; 99 | if (boolTrue.equalsIgnoreCase(toparse)) 100 | return new Boolean("TRUE"); 101 | if (boolFalse.equalsIgnoreCase(toparse)) 102 | return new Boolean("FALSE"); 103 | throw new ParseException("Boolean was not TRUE (" + boolTrue + ") or FALSE (" + boolFalse + ")", 0); 104 | } 105 | 106 | public String format(Object o) { 107 | Boolean v = (Boolean)o; 108 | if (v) 109 | return boolTrue; 110 | return boolFalse; 111 | } 112 | } 113 | 114 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/ByteBufferParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.nio.ByteBuffer; 19 | import javax.xml.bind.DatatypeConverter; 20 | import com.datastax.driver.core.Row; 21 | import com.datastax.driver.core.exceptions.InvalidTypeException; 22 | 23 | // ByteBuffer parser 24 | public class ByteBufferParser extends AbstractParser { 25 | public ByteBuffer parseIt(String toparse) { 26 | if (null == toparse) 27 | return null; 28 | byte[] barry = DatatypeConverter.parseBase64Binary(toparse); 29 | return ByteBuffer.wrap(barry); 30 | } 31 | 32 | public String format(Object o) { 33 | ByteBuffer v = (ByteBuffer)o; 34 | return DatatypeConverter.printBase64Binary(v.array()); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/ByteParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.util.Locale; 19 | import java.text.ParseException; 20 | 21 | import com.datastax.driver.core.Row; 22 | import com.datastax.driver.core.exceptions.InvalidTypeException; 23 | 24 | // Byte parser - use the Number parser 25 | public class ByteParser extends NumberParser { 26 | public ByteParser() { 27 | super(); 28 | } 29 | 30 | public ByteParser(Locale inLocale) { 31 | super(inLocale); 32 | } 33 | 34 | public ByteParser(Locale inLocale, Boolean grouping) { 35 | super(inLocale, grouping); 36 | } 37 | 38 | public Byte parseIt(String toparse) throws ParseException { 39 | Number val = super.parseIt(toparse); 40 | return (null == val) ? null : val.byteValue(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/DateParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.util.Date; 19 | import java.util.Locale; 20 | import java.text.DateFormat; 21 | import java.text.SimpleDateFormat; 22 | import java.text.ParseException; 23 | 24 | import com.datastax.driver.core.Row; 25 | import com.datastax.driver.core.exceptions.InvalidTypeException; 26 | 27 | // Date parser - takes a format string 28 | public class DateParser extends AbstractParser { 29 | private DateFormat format; 30 | public DateParser(String inFormatString) { 31 | if (null == inFormatString) 32 | format = new SimpleDateFormat(); 33 | else 34 | format = new SimpleDateFormat(inFormatString, Locale.ENGLISH); 35 | } 36 | 37 | public Date parseIt(String toparse) throws ParseException { 38 | if (null == toparse) 39 | return null; 40 | return format.parse(toparse); 41 | } 42 | 43 | public String format(Object o) { 44 | Date v = (Date)o; 45 | if (v == null) 46 | return null; 47 | return format.format(v); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/DelimParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import com.datastax.driver.core.Row; 19 | import com.datastax.driver.core.exceptions.InvalidTypeException; 20 | import com.univocity.parsers.csv.CsvParser; 21 | import com.univocity.parsers.csv.CsvParserSettings; 22 | 23 | import java.io.IOException; 24 | import java.text.ParseException; 25 | import java.util.ArrayList; 26 | import java.util.List; 27 | 28 | public class DelimParser { 29 | private List parsers; 30 | private int parsersSize; 31 | private List elements; 32 | private String delimiter; 33 | private int charsPerColumn; 34 | private String nullString; 35 | private char delim; 36 | private char quote; 37 | private char escape; 38 | private char comment; 39 | private List skip; 40 | 41 | private CsvParser csvp = null; 42 | 43 | public static String DEFAULT_DELIMITER = ","; 44 | public static String DEFAULT_NULLSTRING = ""; 45 | public static String DEFAULT_COMMENT_STRING = "\0"; 46 | public static int DEFAULT_CHARSPERCOLUMN = 4096; 47 | 48 | public DelimParser() { 49 | this(DEFAULT_DELIMITER); 50 | } 51 | 52 | public DelimParser(String inDelimiter) { 53 | this(inDelimiter, DEFAULT_CHARSPERCOLUMN, DEFAULT_NULLSTRING, DEFAULT_COMMENT_STRING); 54 | } 55 | 56 | public DelimParser(String inDelimiter, int inCharsPerColumn, 57 | String inNullString, String inComment) { 58 | parsers = new ArrayList(); 59 | elements = new ArrayList(); 60 | skip = new ArrayList(); 61 | parsersSize = parsers.size(); 62 | if (null == inDelimiter) 63 | delimiter = DEFAULT_DELIMITER; 64 | else 65 | delimiter = inDelimiter; 66 | if (null == inNullString) 67 | nullString = DEFAULT_NULLSTRING; 68 | else 69 | nullString = inNullString; 70 | if (null == inComment) 71 | comment = DEFAULT_COMMENT_STRING.charAt(0); 72 | else 73 | comment = inComment.charAt(0); 74 | charsPerColumn = inCharsPerColumn; 75 | delim = ("\\t".equals(delimiter)) ? '\t' : delimiter.charAt(0); 76 | quote = '\"'; 77 | escape = '\\'; 78 | 79 | CsvParserSettings settings = new CsvParserSettings(); 80 | settings.getFormat().setLineSeparator("\n"); 81 | settings.getFormat().setDelimiter(delim); 82 | settings.setMaxCharsPerColumn(charsPerColumn); 83 | settings.getFormat().setQuote(quote); 84 | settings.getFormat().setQuoteEscape(escape); 85 | settings.getFormat().setCharToEscapeQuoteEscaping(escape); 86 | settings.setKeepQuotes(true); 87 | settings.setKeepEscapeSequences(true); 88 | settings.getFormat().setComment(comment); 89 | 90 | csvp = new CsvParser(settings); 91 | } 92 | 93 | // Adds a parser to the list 94 | public void add(Parser p) { 95 | parsers.add(p); 96 | skip.add(false); 97 | parsersSize = parsers.size(); 98 | } 99 | 100 | public void addSkip(int idx) { 101 | parsers.add(idx, new StringParser()); 102 | skip.add(idx, true); 103 | parsersSize = parsers.size(); 104 | } 105 | 106 | // This is where we apply rules like quoting, NULL, etc 107 | private String prepareToParse(String toparse) { 108 | String trimmedToParse = toparse.trim(); 109 | if (trimmedToParse.startsWith("\"") && trimmedToParse.endsWith("\"")) 110 | trimmedToParse = trimmedToParse.substring(1, trimmedToParse.length() - 1); 111 | if (trimmedToParse.equals(nullString)) 112 | return null; 113 | return trimmedToParse; 114 | } 115 | 116 | public List parse(String line) { 117 | //return parseComplex(line); 118 | return parseWithUnivocity(line); 119 | } 120 | 121 | public List parseWithUnivocity(String line) { 122 | String[] row = csvp.parseLine(line); 123 | return parse(row); 124 | } 125 | 126 | public List parse(String[] row) { 127 | if (row.length != parsersSize) { 128 | System.err.println("Row has different number of fields (" + row.length + ") than expected (" + parsersSize + ")"); 129 | return null; 130 | } 131 | elements.clear(); 132 | Object toAdd; 133 | for (int i = 0; i < parsersSize; i++) { 134 | try { 135 | String toparse = row[i]; 136 | if ((null == toparse) || 137 | ((null != nullString) && 138 | (nullString.equalsIgnoreCase(AbstractParser.unquote(toparse))))) 139 | toAdd = null; 140 | else 141 | toAdd = parsers.get(i).parse(toparse); 142 | 143 | if (!skip.get(i)) 144 | elements.add(toAdd); 145 | } 146 | catch (NumberFormatException e) { 147 | System.err.println(String.format("Invalid number in input number %d: %s", i, e.getMessage())); 148 | return null; 149 | } 150 | catch (ParseException pe) { 151 | System.err.println(String.format("Invalid format in input %d: %s", i, pe.getMessage())); 152 | return null; 153 | } 154 | } 155 | 156 | return elements; 157 | } 158 | 159 | public List parseComplex(String line) { 160 | elements.clear(); 161 | IndexedLine sr = new IndexedLine(line); 162 | for (int i = 0; i < parsersSize; i++) { 163 | try { 164 | Object toAdd = parsers.get(i).parse(sr, nullString, delim, 165 | escape, quote, 166 | (parsersSize-1 == i)); 167 | if (!skip.get(i)) 168 | elements.add(toAdd); 169 | } 170 | catch (NumberFormatException e) { 171 | System.err.println(String.format("Invalid number in input number %d: %s", i, e.getMessage())); 172 | return null; 173 | } 174 | catch (ParseException pe) { 175 | System.err.println(String.format("Invalid format in input %d: %s", i, pe.getMessage())); 176 | return null; 177 | } 178 | catch (IOException e) { 179 | System.err.println(String.format("Invalid number of fields - ran out of string: %s", i, e.getMessage())); 180 | return null; 181 | } 182 | } 183 | return elements; 184 | } 185 | 186 | // returns an array of Objects - to be used in PreparedStatement.bind() 187 | public Object[] getElements() { 188 | return elements.toArray(); 189 | } 190 | 191 | public String format(Row row) throws IndexOutOfBoundsException, InvalidTypeException { 192 | StringBuilder retVal = new StringBuilder(); 193 | String[] stringVals = stringVals(row); 194 | retVal.append(stringVals[0]); 195 | for (int i = 1; i < parsersSize; i++) { 196 | retVal.append(delimiter).append(stringVals[i]); 197 | } 198 | return retVal.toString(); 199 | } 200 | 201 | public String[] stringVals(Row row) throws IndexOutOfBoundsException, InvalidTypeException { 202 | String[] stringVals = new String[parsers.size()]; 203 | for (int i = 0; i < parsersSize; i++) { 204 | stringVals[i] = parsers.get(i).format(row, i); 205 | if (null == stringVals[i]) 206 | stringVals[i] = nullString; 207 | } 208 | return stringVals; 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/DoubleParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.util.Locale; 19 | import java.text.ParseException; 20 | 21 | import com.datastax.driver.core.Row; 22 | import com.datastax.driver.core.exceptions.InvalidTypeException; 23 | 24 | // Double parser - use the Number parser 25 | public class DoubleParser extends NumberParser { 26 | public DoubleParser() { 27 | super(); 28 | } 29 | 30 | public DoubleParser(Locale inLocale) { 31 | super(inLocale); 32 | } 33 | 34 | public DoubleParser(Locale inLocale, Boolean grouping) { 35 | super(inLocale, grouping); 36 | } 37 | 38 | public Double parseIt(String toparse) throws ParseException { 39 | Number val = super.parseIt(toparse); 40 | return (null == val) ? null : val.doubleValue(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/FloatParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.util.Locale; 19 | import java.text.ParseException; 20 | 21 | import com.datastax.driver.core.Row; 22 | import com.datastax.driver.core.exceptions.InvalidTypeException; 23 | 24 | // Float parser - use the Number parser 25 | public class FloatParser extends NumberParser { 26 | public FloatParser() { 27 | super(); 28 | } 29 | 30 | public FloatParser(Locale inLocale) { 31 | super(inLocale); 32 | } 33 | 34 | public FloatParser(Locale inLocale, Boolean grouping) { 35 | super(inLocale, grouping); 36 | } 37 | 38 | public Float parseIt(String toparse) throws ParseException { 39 | Number val = super.parseIt(toparse); 40 | return (null == val) ? null : val.floatValue(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/IndexedLine.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.text.ParseException; 19 | 20 | public class IndexedLine { 21 | private int index; 22 | private char[] buffer; 23 | 24 | public IndexedLine(char[] inBuffer) { 25 | buffer = inBuffer; 26 | index = 0; 27 | } 28 | 29 | public IndexedLine(String instr) { 30 | this(instr.toCharArray()); 31 | } 32 | 33 | public char getNext() throws ParseException { 34 | if (buffer.length == index) 35 | throw new ParseException("ran out of buffer (" + index + " / " + buffer.length + ")", 0); 36 | char c = buffer[index]; 37 | index++; 38 | return c; 39 | } 40 | 41 | public boolean hasNext() { 42 | return buffer.length > index; 43 | } 44 | 45 | public boolean setIndex(int idx) throws ParseException { 46 | if (buffer.length - 1 < idx) 47 | throw new ParseException("index out of range", 0); 48 | index = idx; 49 | return true; 50 | } 51 | 52 | public char get(int idx) throws ParseException { 53 | if (buffer.length - 1 < idx) 54 | throw new ParseException("index out of range", 0); 55 | return buffer[idx]; 56 | } 57 | 58 | public boolean backup() { 59 | if (0 == index) 60 | return false; 61 | index--; 62 | return true; 63 | } 64 | 65 | public int length() { 66 | return buffer.length; 67 | } 68 | 69 | public int numRemaining() { 70 | return buffer.length - index; 71 | } 72 | 73 | public String remaining() { 74 | return new String(buffer, index, numRemaining()); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/InetAddressParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.net.InetAddress; 19 | import java.text.ParseException; 20 | import java.net.UnknownHostException; 21 | 22 | import com.datastax.driver.core.Row; 23 | import com.datastax.driver.core.exceptions.InvalidTypeException; 24 | 25 | // InetAddress parser 26 | public class InetAddressParser extends AbstractParser { 27 | public InetAddress parseIt(String toparse) throws ParseException { 28 | if (null == toparse) 29 | return null; 30 | InetAddress ret; 31 | try { 32 | ret = InetAddress.getByName(toparse); 33 | } 34 | catch (UnknownHostException uhe) { 35 | throw new ParseException("Error parsing Inet: " + uhe.getMessage(), 0); 36 | } 37 | return ret; 38 | } 39 | 40 | public String format(Object o) { 41 | InetAddress v = (InetAddress)o; 42 | return v.getHostAddress(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/IntegerParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.util.Locale; 19 | import java.text.ParseException; 20 | 21 | import com.datastax.driver.core.Row; 22 | import com.datastax.driver.core.exceptions.InvalidTypeException; 23 | 24 | // Integer parser - use the Number parser 25 | public class IntegerParser extends NumberParser { 26 | public IntegerParser() { 27 | super(); 28 | } 29 | 30 | public IntegerParser(Locale inLocale) { 31 | super(inLocale); 32 | } 33 | 34 | public IntegerParser(Locale inLocale, Boolean grouping) { 35 | super(inLocale, grouping); 36 | } 37 | 38 | public Integer parseIt(String toparse) throws ParseException { 39 | Number val = super.parseIt(toparse); 40 | return (null == val) ? null : val.intValue(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/ListParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.util.List; 19 | import java.util.ArrayList; 20 | import java.io.StringReader; 21 | import java.io.IOException; 22 | import java.text.ParseException; 23 | 24 | import com.datastax.driver.core.Row; 25 | import com.datastax.driver.core.exceptions.InvalidTypeException; 26 | 27 | import com.univocity.parsers.csv.CsvParser; 28 | import com.univocity.parsers.csv.CsvParserSettings; 29 | 30 | public class ListParser extends AbstractParser { 31 | private Parser parser; 32 | private char collectionDelim; 33 | private char collectionBegin; 34 | private char collectionEnd; 35 | private char collectionQuote = '\"'; 36 | private char collectionEscape = '\\'; 37 | private String collectionNullString = "null"; 38 | private List elements; 39 | 40 | private CsvParser csvp = null; 41 | 42 | public ListParser(Parser inParser, char inCollectionDelim, 43 | char inCollectionBegin, char inCollectionEnd) { 44 | parser = inParser; 45 | collectionDelim = inCollectionDelim; 46 | collectionBegin = inCollectionBegin; 47 | collectionEnd = inCollectionEnd; 48 | elements = new ArrayList(); 49 | 50 | CsvParserSettings settings = new CsvParserSettings(); 51 | settings.getFormat().setLineSeparator("\n"); 52 | settings.getFormat().setDelimiter(collectionDelim); 53 | settings.getFormat().setQuote(collectionQuote); 54 | settings.getFormat().setQuoteEscape(collectionEscape); 55 | settings.getFormat().setCharToEscapeQuoteEscaping(collectionEscape); 56 | settings.setKeepQuotes(true); 57 | settings.setKeepEscapeSequences(true); 58 | 59 | csvp = new CsvParser(settings); 60 | } 61 | 62 | public Object parseIt(String toparse) throws ParseException { 63 | if (null == toparse) 64 | return null; 65 | if (!toparse.startsWith(Character.toString(collectionBegin))) 66 | throw new ParseException("Must begin with " + collectionBegin 67 | + "\n", 0); 68 | if (!toparse.endsWith(Character.toString(collectionEnd))) 69 | throw new ParseException("Must end with " + collectionEnd 70 | + "\n", 0); 71 | toparse = toparse.substring(1, toparse.length() - 1); 72 | String[] row = csvp.parseLine(toparse); 73 | elements.clear(); 74 | try { 75 | for (int i = 0; i < row.length; i++) 76 | elements.add(parser.parse(row[i])); 77 | } 78 | catch (Exception e) { 79 | System.err.println("Trouble parsing : " + e.getMessage()); 80 | return null; 81 | } 82 | return elements; 83 | } 84 | 85 | //public String format(Row row, int index) { 86 | // if (row.isNull(index)) 87 | // return null; 88 | // List list = row.getList(index, Object.class); 89 | @SuppressWarnings("unchecked") 90 | public String format(Object o) { 91 | List list = (List)o; 92 | StringBuilder sb = new StringBuilder(); 93 | sb.append(collectionBegin); 94 | if (list.size() > 0) { 95 | for (int i = 0; i < list.size() - 1; i++) { 96 | sb.append(parser.format(list.get(i))); 97 | sb.append(collectionDelim); 98 | } 99 | sb.append(parser.format(list.get(list.size() - 1))); 100 | } 101 | sb.append(collectionEnd); 102 | return quote(sb.toString()); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/LocalDateParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.util.Date; 19 | import java.text.ParseException; 20 | 21 | import com.datastax.driver.core.LocalDate; 22 | 23 | public class LocalDateParser extends AbstractParser { 24 | private DateParser dateParser; 25 | 26 | public LocalDateParser(String inFormatString) { 27 | dateParser = new DateParser(inFormatString); 28 | } 29 | 30 | public LocalDate parseIt(String toparse) throws ParseException { 31 | if (null == toparse) 32 | return null; 33 | Date d = dateParser.parseIt(toparse); 34 | LocalDate ret = LocalDate.fromMillisSinceEpoch(d.getTime()); 35 | return ret; 36 | } 37 | 38 | public String format(Object o) { 39 | LocalDate v = (LocalDate)o; 40 | if (v == null) 41 | return null; 42 | Date d = new Date(v.getMillisSinceEpoch()); 43 | return dateParser.format(d); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/LongParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.util.Locale; 19 | import java.text.ParseException; 20 | 21 | import com.datastax.driver.core.Row; 22 | import com.datastax.driver.core.exceptions.InvalidTypeException; 23 | 24 | // Long parser - use the Number parser 25 | public class LongParser extends NumberParser { 26 | public LongParser() { 27 | super(); 28 | } 29 | 30 | public LongParser(Locale inLocale) { 31 | super(inLocale); 32 | } 33 | 34 | public LongParser(Locale inLocale, Boolean grouping) { 35 | super(inLocale, grouping); 36 | } 37 | 38 | public Long parseIt(String toparse) throws ParseException { 39 | Number val = super.parseIt(toparse); 40 | return (null == val) ? null : val.longValue(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/MapParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.util.Map; 19 | import java.util.Set; 20 | import java.util.HashMap; 21 | import java.util.Iterator; 22 | import java.io.StringReader; 23 | import java.io.IOException; 24 | import java.io.StringReader; 25 | import java.text.ParseException; 26 | 27 | import com.datastax.driver.core.Row; 28 | import com.datastax.driver.core.exceptions.InvalidTypeException; 29 | 30 | import com.univocity.parsers.csv.CsvParser; 31 | import com.univocity.parsers.csv.CsvParserSettings; 32 | 33 | public class MapParser extends AbstractParser { 34 | private Parser keyParser; 35 | private Parser valueParser; 36 | private char collectionDelim; 37 | private char collectionBegin; 38 | private char collectionEnd; 39 | private char collectionQuote = '\"'; 40 | private char collectionEscape = '\\'; 41 | private char mapDelim; 42 | private String collectionNullString = null; 43 | private Map elements; 44 | 45 | private CsvParser csvp = null; 46 | 47 | public MapParser(Parser inKeyParser, Parser inValueParser, 48 | char inCollectionDelim, char inCollectionBegin, 49 | char inCollectionEnd, char inMapDelim) { 50 | keyParser = inKeyParser; 51 | valueParser = inValueParser; 52 | collectionDelim = inCollectionDelim; 53 | collectionBegin = inCollectionBegin; 54 | collectionEnd = inCollectionEnd; 55 | mapDelim = inMapDelim; 56 | elements = new HashMap(); 57 | 58 | CsvParserSettings settings = new CsvParserSettings(); 59 | settings.getFormat().setLineSeparator("" + collectionDelim); 60 | settings.getFormat().setNormalizedNewline(collectionDelim); 61 | settings.getFormat().setDelimiter(mapDelim); 62 | settings.getFormat().setQuote(collectionQuote); 63 | settings.getFormat().setQuoteEscape(collectionEscape); 64 | settings.getFormat().setCharToEscapeQuoteEscaping(collectionEscape); 65 | settings.setKeepQuotes(true); 66 | settings.setKeepEscapeSequences(true); 67 | 68 | csvp = new CsvParser(settings); 69 | } 70 | public Object parseIt(String toparse) throws ParseException { 71 | if (null == toparse) 72 | return null; 73 | if (!toparse.startsWith(Character.toString(collectionBegin))) 74 | throw new ParseException("Must begin with " + collectionBegin 75 | + "\n", 0); 76 | if (!toparse.endsWith(Character.toString(collectionEnd))) 77 | throw new ParseException("Must end with " + collectionEnd 78 | + "\n", 0); 79 | toparse = toparse.substring(1, toparse.length() - 1); 80 | elements.clear(); 81 | StringReader sr = new StringReader(toparse); 82 | csvp.beginParsing(sr); 83 | try { 84 | String[] row; 85 | while ((row = csvp.parseNext()) != null) { 86 | Object key = keyParser.parse(row[0]); 87 | Object value = valueParser.parse(row[1]); 88 | elements.put(key, value); 89 | } 90 | } 91 | catch (Exception e) { 92 | System.err.println("Trouble parsing : " + e.getMessage()); 93 | e.printStackTrace(); 94 | return null; 95 | } 96 | return elements; 97 | } 98 | 99 | @SuppressWarnings("unchecked") 100 | public String format(Object o) { 101 | Map map = (Map)o; 102 | Iterator > iter = map.entrySet().iterator(); 103 | Map.Entry me; 104 | StringBuilder sb = new StringBuilder(); 105 | sb.append(collectionBegin); 106 | if (iter.hasNext()) { 107 | me = iter.next(); 108 | sb.append(keyParser.format(me.getKey())); 109 | sb.append(mapDelim); 110 | sb.append(valueParser.format(me.getValue())); 111 | } 112 | while (iter.hasNext()) { 113 | sb.append(collectionDelim); 114 | me = iter.next(); 115 | sb.append(keyParser.format(me.getKey())); 116 | sb.append(mapDelim); 117 | sb.append(valueParser.format(me.getValue())); 118 | } 119 | sb.append(collectionEnd); 120 | 121 | return quote(sb.toString()); 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/NumberParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.util.Locale; 19 | import java.text.NumberFormat; 20 | import java.text.DecimalFormat; 21 | import java.text.ParseException; 22 | 23 | import com.datastax.driver.core.Row; 24 | import com.datastax.driver.core.exceptions.InvalidTypeException; 25 | 26 | // General number parser 27 | // This is useful as it can take care of Locales for us 28 | // That means comma as a decimal separator, etc. 29 | public class NumberParser extends AbstractParser { 30 | protected NumberFormat nf; 31 | public NumberParser() { 32 | this(null); 33 | } 34 | 35 | public NumberParser(Locale locale) { 36 | this(locale, true); 37 | } 38 | 39 | public NumberParser(Locale locale, Boolean grouping) { 40 | if (null == locale) 41 | locale = Locale.ENGLISH; 42 | nf = NumberFormat.getInstance(locale); 43 | if (nf instanceof DecimalFormat) { 44 | ((DecimalFormat) nf).setGroupingUsed(grouping); 45 | } 46 | } 47 | 48 | // Need this method for the subclasses 49 | public Number parseIt(String toparse) throws ParseException { 50 | if ((null == toparse) || (0 == toparse.length())) 51 | return null; 52 | return nf.parse(toparse); 53 | } 54 | 55 | public String format(Object o) { 56 | return nf.format(o); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/Parser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.text.ParseException; 19 | import java.io.StringReader; 20 | import java.io.IOException; 21 | 22 | import com.datastax.driver.core.Row; 23 | import com.datastax.driver.core.exceptions.InvalidTypeException; 24 | 25 | // Parsing Interface - one method parse(String) 26 | public interface Parser { 27 | public Object parse(String toparse) throws ParseException; 28 | public Object parse(IndexedLine il, String nullString, Character delim, 29 | Character escape, Character quote, boolean last) 30 | throws IOException, ParseException; 31 | public String format(Row row, int index) throws IndexOutOfBoundsException, InvalidTypeException; 32 | public String format(Object o); 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/SetParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.util.Set; 19 | import java.util.HashSet; 20 | import java.util.Iterator; 21 | import java.io.StringReader; 22 | import java.io.IOException; 23 | import java.text.ParseException; 24 | 25 | import com.datastax.driver.core.Row; 26 | import com.datastax.driver.core.exceptions.InvalidTypeException; 27 | 28 | import com.univocity.parsers.csv.CsvParser; 29 | import com.univocity.parsers.csv.CsvParserSettings; 30 | 31 | public class SetParser extends AbstractParser { 32 | private Parser parser; 33 | private char collectionDelim; 34 | private char collectionBegin; 35 | private char collectionEnd; 36 | private char collectionQuote = '\"'; 37 | private char collectionEscape = '\\'; 38 | private String collectionNullString = "null"; 39 | private Set elements; 40 | 41 | private CsvParser csvp = null; 42 | 43 | public SetParser(Parser inParser, char inCollectionDelim, 44 | char inCollectionBegin, char inCollectionEnd) { 45 | parser = inParser; 46 | collectionDelim = inCollectionDelim; 47 | collectionBegin = inCollectionBegin; 48 | collectionEnd = inCollectionEnd; 49 | elements = new HashSet(); 50 | 51 | CsvParserSettings settings = new CsvParserSettings(); 52 | settings.getFormat().setLineSeparator("\n"); 53 | settings.getFormat().setDelimiter(collectionDelim); 54 | settings.getFormat().setQuote(collectionQuote); 55 | settings.getFormat().setQuoteEscape(collectionEscape); 56 | settings.getFormat().setCharToEscapeQuoteEscaping(collectionEscape); 57 | settings.setKeepQuotes(true); 58 | settings.setKeepEscapeSequences(true); 59 | 60 | csvp = new CsvParser(settings); 61 | } 62 | public Object parseIt(String toparse) throws ParseException { 63 | if (null == toparse) 64 | return null; 65 | if (!toparse.startsWith(Character.toString(collectionBegin))) 66 | throw new ParseException("Must begin with " + collectionBegin 67 | + "\n", 0); 68 | if (!toparse.endsWith(Character.toString(collectionEnd))) 69 | throw new ParseException("Must end with " + collectionEnd 70 | + "\n", 0); 71 | toparse = toparse.substring(1, toparse.length() - 1); 72 | String[] row = csvp.parseLine(toparse); 73 | elements.clear(); 74 | try { 75 | for (int i = 0; i < row.length; i++) 76 | elements.add(parser.parse(row[i])); 77 | } 78 | catch (Exception e) { 79 | System.err.println("Trouble parsing : " + e.getMessage()); 80 | e.printStackTrace(); 81 | return null; 82 | } 83 | return elements; 84 | } 85 | 86 | //public String format(Row row, int index) { 87 | // if (row.isNull(index)) 88 | // return null; 89 | // Set set = row.getSet(index, Object.class); 90 | @SuppressWarnings("unchecked") 91 | public String format(Object o) { 92 | Set set = (Set)o; 93 | Iterator iter = set.iterator(); 94 | StringBuilder sb = new StringBuilder(); 95 | sb.append(collectionBegin); 96 | if (iter.hasNext()) 97 | sb.append(parser.format(iter.next())); 98 | while (iter.hasNext()) { 99 | sb.append(collectionDelim); 100 | sb.append(parser.format(iter.next())); 101 | } 102 | sb.append(collectionEnd); 103 | 104 | return quote(sb.toString()); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/ShortParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.util.Locale; 19 | import java.text.ParseException; 20 | 21 | import com.datastax.driver.core.Row; 22 | import com.datastax.driver.core.exceptions.InvalidTypeException; 23 | 24 | // Short parser - use the Number parser 25 | public class ShortParser extends NumberParser { 26 | public ShortParser() { 27 | super(); 28 | } 29 | 30 | public ShortParser(Locale inLocale) { 31 | super(inLocale); 32 | } 33 | 34 | public ShortParser(Locale inLocale, Boolean grouping) { 35 | super(inLocale, grouping); 36 | } 37 | 38 | public Short parseIt(String toparse) throws ParseException { 39 | Number val = super.parseIt(toparse); 40 | return (null == val) ? null : val.shortValue(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/StringParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.io.StringReader; 19 | import java.io.IOException; 20 | 21 | import com.datastax.driver.core.Row; 22 | import com.datastax.driver.core.exceptions.InvalidTypeException; 23 | 24 | // String parser - simple 25 | public class StringParser extends AbstractParser { 26 | public String parseIt(String toparse) { 27 | return toparse; 28 | } 29 | 30 | public String format(Object o) { 31 | String iv = (String)o; 32 | return quote(iv); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/com/datastax/loader/parser/UUIDParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Brian Hess 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datastax.loader.parser; 17 | 18 | import java.util.UUID; 19 | 20 | import com.datastax.driver.core.Row; 21 | import com.datastax.driver.core.exceptions.InvalidTypeException; 22 | 23 | // UUID parser 24 | public class UUIDParser extends AbstractParser { 25 | public UUID parseIt(String toparse) throws IllegalArgumentException { 26 | if (null == toparse) 27 | return null; 28 | return UUID.fromString(toparse); 29 | } 30 | public String format(Object o) { 31 | UUID v = (UUID)o; 32 | return v.toString(); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |    5 |    %-5p %msg%n 6 | 7 |    8 | 9 |    10 |    11 | 12 | -------------------------------------------------------------------------------- /src/make/buildit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cat src/make/cassandra-loader.sh build/libs/cassandra-loader-uber*.jar > build/cassandra-loader && chmod 755 build/cassandra-loader 4 | -------------------------------------------------------------------------------- /src/make/cassandra-loader.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | MYSELF=`which "$0" 2>/dev/null` 3 | [ $? -gt 0 -a -f "$0" ] && MYSELF="./$0" 4 | java=java 5 | if test -n "$JAVA_HOME"; then 6 | java="$JAVA_HOME/bin/java" 7 | fi 8 | exec "$java" -XX:+UseG1GC -Xmx1G -Xms1G -XX:+UseTLAB -XX:+ResizeTLAB $java_args -jar $MYSELF "$@" 9 | exit 1 10 | -------------------------------------------------------------------------------- /src/make/unloader.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cat src/make/cassandra-loader.sh build/libs/cassandra-unloader-uber*.jar > build/cassandra-unloader && chmod 755 build/cassandra-unloader 4 | --------------------------------------------------------------------------------