├── .build.yml ├── .gitignore ├── .settings ├── .gitignore └── org.eclipse.core.resources.prefs ├── README.md ├── build ├── doc ├── configformat.md ├── datafileformat.md ├── examples.md ├── options.md └── releasenotes.md ├── example ├── alldatatype.dsv ├── alldatatype.json ├── secondarymapping.dsv └── secondarymapping.json ├── pom.xml ├── pom.xml.orig ├── run_loader └── src ├── main ├── java │ └── com │ │ └── aerospike │ │ └── load │ │ ├── AerospikeLoad.java │ │ ├── AsWriterTask.java │ │ ├── BinDefinition.java │ │ ├── ColumnDefinition.java │ │ ├── Constants.java │ │ ├── Counter.java │ │ ├── MappingDefinition.java │ │ ├── MetaDefinition.java │ │ ├── Parameters.java │ │ ├── Parser.java │ │ ├── PrintStat.java │ │ └── Utils.java └── resources │ ├── log4j2.properties │ └── project.properties └── test ├── java └── com │ └── aerospike │ └── load │ └── DataTypeTest.java └── resources ├── configAllDataType.json ├── configBlob.json ├── configDate.json ├── configDynamicBinName.json ├── configInt.json ├── configJson.json ├── configList.json ├── configMap.json ├── configStaticBinName.json ├── configString.json ├── configUtf8.json ├── data.dsv ├── dataAllTypes.dsv ├── dataBlob.dsv ├── dataDate.dsv ├── dataDynamicBin.csv ├── dataInt.dsv ├── dataJson.dsv ├── dataList.dsv ├── dataMap.dsv ├── dataStaticBin.csv ├── dataString.dsv ├── dataUtf8.dsv └── testSchema.json /.build.yml: -------------------------------------------------------------------------------- 1 | name: aerospike-loader 2 | 3 | container: 4 | - base: 5 | - docker.qe.aerospike.com/build/aerospike-client-java:openjdk-8 6 | 7 | build: 8 | - name: build 9 | script: 10 | - mvn package -f pom.xml -DskipTests 11 | artifact: 12 | - target/aerospike-load-*-jar-with-dependencies.jar 13 | 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | target 3 | bin 4 | .vscode -------------------------------------------------------------------------------- /.settings/.gitignore: -------------------------------------------------------------------------------- 1 | /org.eclipse.jdt.core.prefs 2 | /org.eclipse.m2e.core.prefs 3 | -------------------------------------------------------------------------------- /.settings/org.eclipse.core.resources.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | encoding//src/main/java=UTF-8 3 | encoding//src/main/resources=UTF-8 4 | encoding//src/test/java=UTF-8 5 | encoding//src/test/resources=UTF-8 6 | encoding/=UTF-8 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Aerospike Loader 2 | > Aerospike Data Loader can help in migrating data from any other database to 3 | > Aerospike. User can dump data from different databases in .DSV format and use 4 | > this tool to parse and load them in Aerospike server. User need to provide 5 | > .DSV data files to load and aerospike schema file in JSON format. It parse 6 | > those .DSV files and load data in Aerospike Server according to given schema 7 | > in schema files. 8 | 9 | - [Prerequisites](#Prerequisites) 10 | - [Installation](#Installation) 11 | - [Dependencies](#Dependencies) 12 | - [Usage](#Usage) 13 | - [Options](doc/options.md) 14 | - [Config file format](doc/configformat.md) 15 | - [Data file format](doc/datafileformat.md) 16 | - [Examples](doc/examples.md) 17 | - [Demo example](#demoexample) 18 | - [Detailed examples](doc/examples.md) 19 | - [Release Notes](doc/releasenotes.md) 20 | 21 | 22 | ## Prerequisites 23 | * Java 1.8 or greater 24 | * Maven 3.0 or greater 25 | 26 | 27 | ## Installation 28 | * Source code is available on github: 29 | 30 | $ git clone https://github.com/aerospike/aerospike-loader.git 31 | 32 | * Then build the utility by running following: 33 | 34 | $ cd aerospike-loader 35 | $ ./build 36 | 37 | 38 | ## Dependencies 39 | Following dependencies are downloaded automatically: 40 | * Aerospike Java client 7.2.2 or greater 41 | * Apache commons cli 1.7.0 42 | * Log4j 2.22.1 43 | * Junit 4.13.1 44 | * Json-simple 1.1.1 45 | 46 | 47 | ## Usage 48 | 49 | If you downloaded the jar from [the releases page](https://github.com/aerospike/aerospike-loader/releases). Use 50 | 51 | $ java -cp aerospike-load-*-jar-with-dependencies.jar com.aerospike.load.AerospikeLoad 52 | 53 | If you downloaded the source. Use **run_loader** script along with options and data files. 54 | 55 | $ ./run_loader 56 | 57 | "data file name(s)/directory" can either be space delimited files or a directory name containing data files. See "Data Files" section for more details. 58 | 59 | For available options and their descriptions run with asloader's --usage option. 60 | 61 | $ java -cp aerospike-load-*-jar-with-dependencies.jar com.aerospike.load.AerospikeLoad --usage 62 | $ ./run_loader --usage 63 | 64 | For more details, refer to [Options](https://aerospike.com/docs/tools/asloader/options). 65 | 66 | ### Some extra info about internal working: 67 | 68 | * There are 2 types of threads: 69 | * reader threads (reads CSV files) (The number of reader threads = either number of CPUs or number of files in the directory, whichever one is lower.) 70 | * writer threads (writes to the cluster) (The number of writer threads = number of CPUs * 5 (5 is scaleFactor)) 71 | 72 | ### Sample usage of common options: 73 | 74 | $ ./run_loader -h nodex -p 3000 -n test -T 3000 -e 2592000 -ec 100 -tz PST -wa update -c ~/pathto/config.json datafiles/ 75 | 76 | Where: 77 | 78 | ``` 79 | Server IP: nodex (-h) 80 | Port: 3000 (-p) 81 | Namespace: test (-n) 82 | Write Operation Timeout (in milliseconds): 3000 (-T) 83 | Write Error Threshold: 100 (-ec) 84 | Record Expiration: 2592000 (-e) 85 | Timezone: PST (-tz) 86 | Write Action: update (-wa) 87 | Data Mapping: ~/pathto/config.json (-c) 88 | Data Files: datafiles/ 89 | ``` 90 | 91 | 92 | ## Demo example 93 | Example directory contains two files: allDatatype.json and data.csv. Run the following command to load data from data file data.csv. 94 | 95 | ./run_loader -h localhost -c example/alldatatype.json example/alldatatype.dsv 96 | 97 | For more examples, see the examples/README.md. 98 | -------------------------------------------------------------------------------- /build: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | # Build Java projects with Maven. 3 | # Maven and Java must be installed prior to running this script. 4 | 5 | mvn clean 6 | mvn install -DskipTests 7 | -------------------------------------------------------------------------------- /doc/configformat.md: -------------------------------------------------------------------------------- 1 | # Configuration file 2 | >The Aerospike-loader configuration file specifies the schema mapping of the source data set to the Aerospike database, as well as specifies attributes to help parse the source data files. Format of this file is in JSON. To write configuration file we need preliminary information from data file like column header and type of data. 3 | 4 | - [Sample Config](#config) 5 | - [Keywords Supported](#keyword) 6 | - [dsv_config attributes](#dsv_config) 7 | - [mappings attributes](#mappings) 8 | - [key/set attribute](#key/set) 9 | - [bin_list attributes](#bin_list) 10 | 11 | 12 | ## Sample configuration file: 13 | Following config file maps data file having five columns. If first line of data file contains column_names than given column_names can be used for mapping. If column_positions is given than data position can be obtained while processing data file. This config file defines dsv_config and mappings. dsv_config would have delimiter, n_columns_datafile, header_exist info. There are two types of mapping (Primary_mapping, secondary_mapping used for any column to key reverse indexing). Each mapping should have three fields (key, set, bin_list). Here for eg. primary mapping has a key (having column_name 'key'), set (having column_name set) and bin_list having definition for three bins. 14 | ``` c 15 | { 16 | "version" : "2.0", 17 | "dsv_config": { 18 | "delimiter": ",", 19 | "n_columns_datafile": 5, 20 | "header_exist": true 21 | }, 22 | 23 | "mappings" [ 24 | { 25 | "key": { 26 | "column_name": "key", 27 | "type": "string" 28 | }, 29 | "set": { 30 | "column_name": "set" 31 | "type": "string" 32 | }, 33 | "bin_list": [ 34 | { 35 | "name": "dob", 36 | "value": { 37 | "column_position": 3, 38 | "type": "timestamp", 39 | "dst_type": "integer", 40 | "encoding" : "MM/dd/yyyy" 41 | } 42 | }, 43 | { 44 | "name": "lstblob", 45 | "value": { 46 | "column_name": "lstblob", 47 | "type": "blob", 48 | "dst_type": "blob", 49 | "encoding" : "hex" 50 | } 51 | }, 52 | { 53 | "name": "age", 54 | "value": { 55 | "column_name": "name", 56 | "type": "string", 57 | } 58 | } 59 | ] 60 | }, 61 | { 62 | "secondary_mapping": "true", 63 | "key": { 64 | "column_name": "name", 65 | "type": "String" 66 | }, 67 | "set": "name_map", 68 | "bin_list": [ 69 | { 70 | "name": "name_key", 71 | "value": { 72 | "column_name": "key", 73 | "type": "integer", 74 | } 75 | } 76 | ] 77 | } 78 | ] 79 | } 80 | 81 | ``` 82 | 83 | 84 | ## Keywords Supported in Config file: 85 | 86 | | Keywords | Description | Required/ Optional | Value | Attributes | 87 | |------------|--------------------------------------------------------------------------------------------------------------|-----------------------------------------|--------------------------|---------------------------------------------------------------------------| 88 | | version | Version of Aerospike loader. Current version is 2.0 | Required | "2.0" | No attributes | 89 | | dsv_config | dsv_config is used for specifying configs. | Required | Map of attribute values | delimiter, n_columns_datafile, header_exist | 90 | | mappings | List of mapping primary and secondary (secondary mapping is used to create a reverse mapping from secondary key. If there are columns other than Primary_key on which user want to create index.) | Required | List of mapping_def map | No direct attributes. Each map in array has four attributes: secondary_mapping (optional boolean. used to specify secondary_mapping), key (as above), set (as above), bin_list (as above in bin_list) 91 | | key (mappingDef attribute) | Key mapping from data file. | Required | Map of attribute values | choice( column_position/column_name), type | 92 | | set (mappingDef attribute) | Set name mapping from data file. Set name can be provided as static value or dynamic (defined by mapping) in config file. Set name is always string type. | Required | Map of attribute values | choice( column_position/column_name), type | 93 | | bin_list (mappingDef attribute) | List of bin mapping from data file. | Required | List of bin_def map | No direct attributes. Each map in array has two attributes: name, value (column_position/column_name, type, dst_type, encoding). | 94 | 95 | 96 | ### dsv_config Attributes: 97 | 98 | | Keywords | Description | Required/ Optional | Value | 99 | |------------------- |---------------------------------------------------------------------------------------------- |--------------------------- |------------------------------------------------------------------------------ | 100 | | delimiter | delimiter is used to separate data in each row of data file. Data file should not contain JSON specific char('}', ']', ',', ':'...) as delimiter if file consist any JSON data. Only data inside double quotes (" ") will not be searched for delimiter. Its DSV supported so user can use any good delimiter | Optional (default is ',') | any string Data part should not contain this delimiter character. | 101 | | n_columns_datafile | Number of columns in data file. | Required | Integer | 102 | | header_exist | This attribute is used to skip first line of data file where header information is present. | Required | "true","false". | 103 | 104 | 105 | ### mappings Attributes: 106 | mapping is list of primary or secondary mappingDefs. mapping has four attributes. 107 | - secondary_mapping (boolean optional) 108 | - key (map) 109 | - set (string or map) 110 | - bin_list (list of bindefs.) 111 | 112 | __Note__: Definition is given below for all attributes. 113 | 114 | 115 | ### key/set Attributes: 116 | key is unique and always picked from data file. 117 | 118 | | Keywords | Description | Required/ Optional | Value | 119 | |------------------------------|----------------------------------------------------------------------------|------------------------------------------------|-----------------| 120 | | column_position/ column_name | Column position number in data file or column name in header of data file. | Require one of column_position/ column_name. | integer/ string | 121 | | type | Type of key/set. Set name should be string. | Require | string | 122 | 123 | 124 | ### Bin_list Attributes: 125 | "bin_list" contains array of lists. So there is no direct attributes. Each list in bin_list has two attributes one is "name"(name mapping for each bin) and other one is "value"(value mapping for each bin). In following table some sub attributes for "name/value" is described. "name" attribute doesn't have dst_type and encoding attribute, and type is always string. "name/value" can have static/fixed values or we can pick name/value from data file. Length of each bin name should be less than or equal to 14. 126 | 127 | | Keywords | Description | Required/ Optional | Value | 128 | |------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------|------------------| 129 | | column_position/ column_name | Column name in header of data file or column position. | Require any one of column_position/ column_name | integer / string | 130 | | type | Data type of source data. Supported data types are: integer, float, string, blob, json(list/map), geojson, timestamp. | Require | string | 131 | | dst_type | Destination data type in Aerospike. Supported data types are: integer, float, string, blob, list, map, geojson. For integer, float, string, geojson, json source type the implicit dst_type is integer, float, string, geojson, list or map respectively. In case of Timestamp dst_type can be integer/string.| Require if source type to destination type conversion needed and for timestamp and blob case. | string | 132 | | encoding | Encoding format for data conversion from source to destination type. Blob type data should be hex encoded. Timestamp type data can be encoded as "MM/DD/YYYY" if dst_type is integer. | Require if dst_type is given | string | 133 | 134 | __Note:__ Specify column_name:"system_time" in config file to insert extra bin in each record with system time at the time of writing stored in it. 135 | -------------------------------------------------------------------------------- /doc/datafileformat.md: -------------------------------------------------------------------------------- 1 | #Data file format 2 | > User can provide "list of filenames" or "directory name", which contains data dump. Currently data dump in DSV formatted is supported by aerospike-loader. 3 | 4 | ##Sample data file: 5 | ``` c 6 | user_location##user_id##last_visited##community 7 | India##1##08/16/2011##facebook 8 | India##2##08/17/2011##Twitter 9 | USA##3##08/16/2011##Twitter 10 | ``` 11 | 12 | In the above sample data file first row is the header information for each column. This column name can be used for column mapping in configuration file. Next three rows contain data with delimiter('##') separated. 13 | 14 | ##Supported Data Types: 15 | 16 | - Integer : Integer type data including numbers. E.g. 123456 17 | - Float : Floating type data is stored as float earlier it was stored as 8 byte byte array. E.g. 0.345 18 | - String : String type data. E.g. "Aerospike" 19 | - Blob : Binary fields which is hex encoded is stored as blobs. E.g. hex encoded "abc" as 616263. 20 | - Timestamp: Timestamp type data stored as string or integer. E.g. "1-1-1970" as string stored as "1-1-1970" but as integer stored as -19800 seconds(negative because its calculated reference to UTC timezone). 21 | - Json : Any standard JSON doc. (List, map will also go as JSON.). E.g. List: ["a", "b", ["c", "d"]], Map: {"a": "b", "c": {"d", "e"}}. 22 | - GeoJson : Aerospike support GeoJson datatype natively. It can be stored in its standard format. E.g. {"type": "Point", "coordinates": [123.4, -456.7]} 23 | 24 | > **Note**: Data file should not contain JSON specific char('}', ']', ',', ':'...) as delimiter if file consist any JSON data. Only data inside double quotes (" ") will not be searched for delimiter. Its DSV supported so user can use any good delimiter. 25 | 26 | > **Note**: Timestamp type data should have some format, and always be in double quotes. For best practices in timestamp formatting refer [SimpleDateFormat](http://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html). 27 | -------------------------------------------------------------------------------- /doc/examples.md: -------------------------------------------------------------------------------- 1 | # Example 2 | 3 | Each example given below is for different use cases. To learn the usage quickly, just copy the data file content to data.dsv, config file content to config.json and run the loader using following run command. 4 | 5 | ./run_loader -h localhost -c config.json data.dsv 6 | 7 | - [With header in data file](#with header) 8 | - [Without header in data file](#without header) 9 | - [Static value](#static value) 10 | - [System time](#system_time) 11 | - [Options usage](#options) 12 | 13 | 14 | ## __1. With header in data file__ : 15 | Given below is an example of using datafile to write config file and usage of all supported data type formats. Referring data file user can create config file first. Data file may or may not contain column name as header information. If header information exists then it must present in first line of data file and user can use this header for column position mapping. Use of without header data file is given in next example. 16 | 17 | ### Data file content: 18 | ```DSV 19 | user_location##user_id##last_visited##set_name##age##user_name##user_name_blob##user_rating 20 | IND## userid1## 04/1/2014## facebook## 20## X20## 583230## 8.1 21 | USA## userid2## 03/18/2014## twitter## 27## X2## 5832## 6.4 22 | UK## userid3## 01/9/2014## twitter## 21## X3## 5833## 4.3 23 | UK## userid4## 01/2/2014## facebook## 16## X9## 5839## 5.9 24 | IND## userid5## 08/20/2014## twitter## 37## X10## 583130## 9.3 25 | ``` 26 | 27 | ### Config file content: 28 | ```JSON 29 | { 30 | "version" : "2.0", 31 | "dsv_config": { "delimiter": "," , "n_columns_datafile": 8, "header_exist": true}, 32 | 33 | "mappings": [ 34 | { 35 | "key": {"column_name":"user_id", "type": "string"}, 36 | 37 | "set": { "column_name":"set_name" , "type": "string"}, 38 | 39 | "bin_list": [ 40 | {"name": "age", 41 | "value": {"column_name": "age", "type" : "integer"} 42 | }, 43 | {"name": "location", 44 | "value": {"column_name": "user_location", "type" : "string"} 45 | }, 46 | {"name": "name", 47 | "value": {"column_name": "user_name", "type" : "String"} 48 | }, 49 | {"name": "name_blob", 50 | "value": {"column_name": "user_name_blob", "type" : "blob", "dst_type" : "blob", "encoding":"hex"} 51 | }, 52 | {"name": "recent_visit", 53 | "value": {"column_name": "last_visited", "type" : "timestamp", "encoding":"MM/dd/yy", "dst_type": "integer"} 54 | }, 55 | {"name": "rating", 56 | "value": {"column_name": "user_rating", "type" : "float"} 57 | } 58 | ] 59 | } 60 | ] 61 | } 62 | ``` 63 | ### Explanation: 64 | - Specify delimiter (the way two bin values separated), n_columns_datafile(actual column count in data file), header_exist (true if header or column names is mentioned in first line of datafile else false.) 65 | - Bin_list contains array of bin mapping. In each bin mapping two entries are there, one is name which is used as bin name in aerospike and other one is value, which is the bin content mapping. If one column mapping is absent in config file then that column will be skipped while loading. 66 | - Instead of using column_name user can use column_position. 67 | - Native data types integer and string are stored as it is. 68 | - add following line to bin_list in config file for __integer__ type data. 69 | - {"name": "age", "value": { "column_name": "age", "type" : "integer"}} 70 | - add following line to bin_list in config file for __string__ type data. 71 | - {"name": "location", "value": { "column_name": "user_location", "type" : "string"}} 72 | - Data types other than native types: 73 | - add following line to bin_list in config file for __blob__ type data. Data in data file should be in hex format. 74 | - {"name": "name_blob", "value": {"column_name": "user_name_blob", "type" : "blob", "dst_type" : "blob" , "encoding":"hex"} 75 | } 76 | - add following line to bin_list in config file for __float__ type data. 77 | - {"name": "rating", "value": {"column_name": "user_rating", "type" : "float"} 78 | } 79 | - add following line to bin_list in config file for __timestamp__ type data and store it as integer. 80 | - { "name": "recent_visit", "value": {"column_name": "last_visited", "type" : "timestamp", "encoding":"MM/dd/yy", "dst_type": "integer"} 81 | } 82 | - add following line to bin_list in config file for __timestamp__ type data and store it as string. 83 | - { "name": "recent_visit", "value": {"column_name": "last_visited", "type" : "timestamp", "encoding":"MM/dd/yy", "dst_type": "string"} 84 | } 85 | - Specify static set name in config file as follows: 86 | - "set": "setnameforall", 87 | 88 | 89 | 90 | 91 | ## __2. Without header in data file__: 92 | Given below is an example of using datafile in which there is no header information in first line. When header information is not present in data file always use column_position for column mapping. 93 | 94 | ### Data file content: 95 | ```DSV 96 | IND## userid1## 04/1/2014## facebook## 20## X20## 583230## 8.1 97 | USA## userid2## 03/18/2014## twitter## 27## X2## 5832## 6.4 98 | UK## userid3## 01/9/2014## twitter## 21## X3## 5833## 4.3 99 | UK## userid4## 01/2/2014## facebook## 16## X9## 5839## 5.9 100 | IND## userid5## 08/20/2014## twitter## 37## X10## 583130## 9.3 101 | ``` 102 | ### Config file content: 103 | ```JSON 104 | { 105 | "version" : "2.0", 106 | "dsv_config": { "delimiter": "," , "n_columns_datafile": 8, "header_exist": false}, 107 | 108 | "mappings": [ 109 | { 110 | "key": {"column_position":2, "type": "string"}, 111 | 112 | "set": { "column_position":4 , "type": "string"}, 113 | 114 | "bin_list": [ 115 | {"name": "age", 116 | "value": {"column_position": 5, "type" : "integer"} 117 | }, 118 | {"name": "location", 119 | "value": {"column_position": 1, "type" : "string"} 120 | }, 121 | {"name": "name", 122 | "value": {"column_position": 6, "type" : "String"} 123 | }, 124 | {"name": "name_blob", 125 | "value": {"column_position": 7, "type" : "blob", "dst_type" : "blob" , "encoding":"hex"} 126 | }, 127 | {"name": "recent_visit", 128 | "value": {"column_position": 3, "type" : "timestamp", "encoding":"MM/dd/yy", "dst_type": "integer"} 129 | }, 130 | {"name": "rating", 131 | "value": {"column_position": 8, "type" : "float"} 132 | } 133 | ] 134 | } 135 | ] 136 | } 137 | ``` 138 | ### Explanation: 139 | 140 | - As there is no header information in data file so "header_exist" should be false. 141 | - Each column mapping is specified in column_position only. 142 | - Check [example-1](#with header) for other details. 143 | 144 | 145 | ## __3. Static value__: 146 | Apart from loading data from file, user can add extra information for each record. Example given below inserts user name and from which db this data is taken as extra information. 147 | 148 | ### Data file content: 149 | ```DSV 150 | IND## userid1## 04/1/2014## facebook## 20## X20## 583230## 8.1 151 | USA## userid2## 03/18/2014## twitter## 27## X2## 5832## 6.4 152 | UK## userid3## 01/9/2014## twitter## 21## X3## 5833## 4.3 153 | UK## userid4## 01/2/2014## facebook## 16## X9## 5839## 5.9 154 | IND## userid5## 08/20/2014## twitter## 37## X10## 583130## 9.3 155 | ``` 156 | ### Config file content: 157 | ```JSON 158 | { 159 | "version" : "2.0", 160 | "dsv_config": { "delimiter": "," , "n_columns_datafile": 8, "header_exist": false}, 161 | 162 | "mappings": [ 163 | { 164 | "key": {"column_position":2, "type": "string"}, 165 | 166 | "set": { "column_position":4 , "type": "string"}, 167 | 168 | "bin_list": [ 169 | 170 | { 171 | "name": "name", 172 | "value": {"column_position": 6, "type" : "String"} 173 | }, 174 | { 175 | "name": "load_from", 176 | "value": "xyz database" 177 | } 178 | 179 | ] 180 | } 181 | ] 182 | } 183 | ``` 184 | ### Explanation: 185 | - Extra information load_from xyz database is added to each record. This is called as static bin mapping. 186 | 187 | 188 | ## __4. System time__: 189 | Apart from loading data from file, user can add system time of writting for each record. Example given below inserts user name and System time of writting in millisecond. this data is taken as extra information. 190 | 191 | ### Data file content: 192 | ```DSV 193 | IND## userid1## 04/1/2014## facebook## 20## X20## 583230## 8.1 194 | USA## userid2## 03/18/2014## twitter## 27## X2## 5832## 6.4 195 | UK## userid3## 01/9/2014## twitter## 21## X3## 5833## 4.3 196 | UK## userid4## 01/2/2014## facebook## 16## X9## 5839## 5.9 197 | IND## userid5## 08/20/2014## twitter## 37## X10## 583130## 9.3 198 | ``` 199 | ### Config file content: 200 | ```JSON 201 | { 202 | "version" : "2.0", 203 | "dsv_config": { "delimiter": "," , "n_columns_datafile": 8, "header_exist": false}, 204 | 205 | 206 | "key": {"column_position":2, "type": "string"}, 207 | 208 | "set": { "column_position":4 , "type": "string"}, 209 | 210 | "bin_list": [ 211 | 212 | { 213 | "name": "name", 214 | "value": {"column_position": 6, "type" : "String"} 215 | }, 216 | {"name": "write_time", 217 | "value": {"column_name": "system_time", "type" : "timestamp", "encoding":"MM/dd/yy HH:mm:ss.SSS", "dst_type": "integer"} 218 | } 219 | 220 | ] 221 | } 222 | ``` 223 | ### Explanation: 224 | - Extra information write_time is added to each record. If encoding contains SSS then write_time will be in milliseconds or else it is in seconds. 225 | 226 | 227 | ## __5. Option usage__: 228 | 229 | 230 | * With all default values, run aerospike loader as follows: 231 | 232 | ```java 233 | 234 | ./run_loader -c ~/pathto/config.json ~/pathto/data.dsv 235 | 236 | ``` 237 | 238 | * Use list of data files for loading: 239 | 240 | ```java 241 | 242 | ./run_loader -c ~/pathto/config.json data1.dsv data2.dsv data3.dsv 243 | 244 | ``` 245 | 246 | * Use directory name of data files for loading: 247 | 248 | ```java 249 | 250 | ./run_loader -c ~/pathto/config.json data/ 251 | 252 | ``` 253 | * Specify time zone of the location from where data dump is taken. Its optional, if source and destination are same. 254 | 255 | ```java 256 | 257 | ./run_loader -c ~/pathto/config.dsv -tz PST data/ 258 | 259 | ``` 260 | 261 | * Specify write action for existing records: 262 | 263 | ```java 264 | 265 | ./run_loader -c ~/pathto/config.dsv -wa update data/ 266 | 267 | ``` 268 | 269 | -------------------------------------------------------------------------------- /doc/options.md: -------------------------------------------------------------------------------- 1 | #Options 2 | 3 | 4 | | Options | Description | Default | 5 | |---------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------| 6 | | -h | List of seed hosts, where Aerospike servers are running | 127.0.0.1 | 7 | | -p | Port to use with the host specified in the -h option. | 3000 | 8 | | -U | User name 9 | | -P | Password 10 | | -n | Namespace to load data into. | test | 11 | | -c | JSON formatted configuration file specifying parsing attributes and schema mapping. | | 12 | | -g | Set a maximum target transactions per second for the loader. | 0 (no throttling) 13 | | -T | (In milliseconds) Timeout for a transaction during write operation. | 0 (no timeout) | 14 | | -e | Expiration time of records in seconds. Other valid values are: set it to -1 for records to never expire and 0 to use server default. | -1 | 15 | | -tz | Time zone of data backup source. This value is used when loading data of timestamp datatype. For example, if data backup location timezone is X and that data is to be loaded into server located in Y timezone, then specify X's timezone. Valid values are standard three-letter codes such as PST, EST, etc. | local timezone | 16 | | -ec | Error threshold to determine when the loader should stop loading data. Set it to 0 to ignore the threshold. | 0 | 17 | | -wa | The possible values are: 1) UPDATE - Create or update records. Merge incoming bin values with existing ones, 2) UPDATE_ONLY - Update existing records. Fail if record does not exist. Merge incoming bin values with existing ones, 3) REPLACE - Create or replace existing records, 4) REPLACE_ONLY - Replace existing records. Fail if record does not exist, 5) CREATE_ONLY - Create new records. Fail if record already exists. | UPDATE | 18 | | -tls | Use TLS/SSL sockets | False | 19 | | -tp | Allow TLS protocols. Values: TLSv1,TLSv1.1,TLSv1.2 separated by comma | TLSv1.2) | 20 | | -tlsCiphers | Allow TLS cipher suites. Values: cipher names defined by JVM separated by comma | null (default cipher list provided by JVM)| 21 | | -tr | Revoke certificates identified by their serial number. Values: serial numbers separated by comma | null (Do not revoke certificates)| 22 | | -uk | Send user defined key in addition to hash digest to store on the server. (default: userKey is not sent to reduce meta-data overhead). 23 | | -v | Verbose mode. If this option is specified, verbose mode is enabled and additional information is displayed on the console. | DISABLED | 24 | | -u | Display command usage. | | 25 | |-V | Print version | 26 | -------------------------------------------------------------------------------- /doc/releasenotes.md: -------------------------------------------------------------------------------- 1 | # Release notes: 2 | 3 | - 1.0: Initial implementation 4 | - 1.1: Insert system time along with the record. 5 | 6 | ## Aerospike Loader [2.0] Release Date [26 Apr 2017] 7 | ### New features: 8 | - Added Json datatype support (List, Maps can be nested also.) 9 | - Added TLS security support 10 | - Added mapping section. user can mention any number of mappings in one file. User can add secondary_mapping (used for any column to primary key mapping.) by adding attribute secondary_mapping: 'true'. 11 | - Added (-g) (max-throughput) to limit max average throughput of loader. 12 | 13 | ### Fix/Changes: 14 | - Removed Specific List/Map datatype option as supported type. Json datatype will cover them. 15 | - Removed CSV specific options from config file. There will be only DSV support (covers CSV also.) 16 | - Changed parameter options to be consistent with Aerospike-java-client parameters. 17 | - Added simple examples explaing all datatype and config options simply. 18 | - Deprecated -s (--set) commandline param. Will be passed only by config file. 19 | - Deprecated -wt (write-threads), -rt (read-threads) and introduce new option -g defining max throughput for loader. 20 | - Changed schema for mapping definitions. 21 | 22 | ## Aerospike Loader [2.1] Release Date [27 Jun 2017] 23 | ### New features: 24 | - None 25 | 26 | ### Fix/Changes: 27 | - Fixed config-name binlist in docs. Changed to bin_list 28 | 29 | ## Aerospike Loader [2.2] Release Date [12 Feb 2018] 30 | ### New features: 31 | - Added native float support. Earlier float was stored as blob. 32 | 33 | ### Fix/Changes: 34 | - Fixed loading for JSON object. Now JSON object format in datafile will be of Standerd JSON. limitation (JSON specific special char ('[', '{', ',', ':') can't be used as delimiter.) 35 | - Fixed tests. 36 | - Fix docs related to json spec. 37 | - Fix examples for JSON object. 38 | 39 | ## Aerospike Loader [2.3] Release Date [12 Mar 2018] 40 | ### New features: 41 | - Added GeoJson support. 42 | 43 | ### Fix/Changes: 44 | - Fixed data upload counters. 45 | - Fix docs related to json/geojson spec. 46 | - Fix examples for GeoJSON object. 47 | 48 | ## Aerospike Loader [2.4] Release Date [19 Oct 2021] 49 | ### New features: 50 | - Add shebang to run_loader script to allow execution from non-bash based shells. 51 | 52 | ## Aerospike Loader [2.4.1] Release Date [13 Dec 2021] 53 | ### Fix/Changes: 54 | - Update log4j to version 2.15.0. 55 | 56 | ## Aerospike Loader [2.4.2] Release Date [16 Dec 2021] 57 | ### Fix/Changes: 58 | - Update log4j to version 2.16.0. 59 | 60 | ## Aerospike Loader [2.4.3] Release Date [4 Jan 2022] 61 | ### Fix/Changes: 62 | - Update log4j to version 2.17.1. 63 | 64 | ## Aerospike Loader [3.0.0] Release Date [27 Jan 2023] 65 | ### Breaking Changes: 66 | - TOOLS-2346 Upgrade Java client to 6.1.6. 67 | - Aerospike Java client to 6.1.6 is only compatible with Aerospike server versions 4.9 or newer. 68 | - TOOLS-2322 Write key ordered maps by default. 69 | ### New features: 70 | - TOOLS-2347 Add -um, --unorderedMaps flags. 71 | - Forces all maps to be written as unorderd maps. This was standard before the 3.0.0 asloader release. 72 | 73 | ## Aerospike Loader [4.0.0] Release Date [12 Apr 2023] 74 | ## Breaking Changes: 75 | * TOOLS-2469 \(ASLOADER\) Float data type loses precision, change to double. 76 | * Asloader 4.0.0 will parse data specified as "float" in the JSON spec as Java doubles. This means you may see changes in the precision of floating point values when compared to previous versions of asloader. Parsing as double matches the precision of the [Aerospike double data type](https://docs.aerospike.com/server/guide/data-types/scalar-data-types#double) which is what all floats are stored as in the Aerospike database. 77 | 78 | ## Security: 79 | * TOOLS-1669 Handle CVE-2020-9488 in asloader. 80 | * TOOLS-1670 Handle CVE-2020-15250 in asloader. 81 | 82 | ## Bug Fixes: 83 | * TOOLS-2469 \(ASLOADER\) Float data type loses precision, change to double. 84 | 85 | ## Updates: 86 | * [Snyk] Upgrade com.aerospike:aerospike-client from 6.1.6 to 6.1.7 by @snyk-bot 87 | * [DOCS-1320] [Snyk] Upgrade org.apache.logging.log4j:log4j-core from 2.17.1 to 2.19.0 by @snyk-bot 88 | * [DOCS-1320] [Snyk] Upgrade commons-cli:commons-cli from 1.2 to 1.5.0 by @snyk-bot 89 | * [TOOLS-1670] [TOOLS-1690] Bump junit from 4.11 to 4.13.1 by @dependabot 90 | 91 | ## Aerospike Loader [4.0.1] Release Date [7 Aug 2023] 92 | ## Security: 93 | * [Snyk] Security upgrade com.aerospike:aerospike-client from 6.1.7 to 7.0.0 by @arrowplum in https://github.com/aerospike/aerospike-loader/pull/40 94 | * TOOLS-2640 fix [CVE-2023](https://aerospike.atlassian.net/browse/TOOLS-2640) 95 | 96 | ## Aerospike Loader [4.0.2] Release Date [15 Jan 2024] 97 | ## Security 98 | * [Snyk] Upgrade org.apache.logging.log4j:log4j-api from 2.20.0 to 2.21.0 99 | * [Snyk] Upgrade org.apache.logging.log4j:log4j-core from 2.20.0 to 2.21.0 100 | * [Snyk] Upgrade commons-cli:commons-cli from 1.5.0 to 1.6.0 101 | 102 | ## Bug Fixes: 103 | * TOOLS-2826 \(ASLOADER\) Fixed an issue where ldap users fail read-write role validation. 104 | Asloader no longer checks if the Aerospike user has read-write permissions before beginning writes. 105 | 106 | ## Aerospike Loader [4.0.3] Release Date [7 Aug 2024] 107 | ## Security 108 | * [Snyk] fix: upgrade org.apache.logging.log4j:log4j-core from 2.21.0 to 2.22.1 109 | * [Snyk] fix: upgrade org.apache.logging.log4j:log4j-api from 2.21.0 to 2.22.1 110 | * [Snyk] fix: upgrade org.apache.logging.log4j:log4j-api from 2.21.0 to 2.22.1 111 | 112 | ## Bug Fixes: 113 | * [TOOLS-2690] \(ASLOADER\) Set client policy maxConnsPerNode to the max amount of worker threads to prevent out of connection errors. -------------------------------------------------------------------------------- /example/alldatatype.dsv: -------------------------------------------------------------------------------- 1 | set##key##intData##floatData##stringData##listData##mapData##dateData##blobData##geoData 2 | set1##key1##11##1.9##string1##["a","b", ["c", "d"]]##{"a": {"b": "c"}}##01/16/2011##6c6f63626c6f623830##{"type": "Point", "coordinates": [123.4, -45.7]} 3 | set2##key2##12##4.5##string2##["d","e", ["c", "d"]]##{"c": {"b": "c"}}##02/16/2011##6c6f63626c6f623830##{"type": "Point", "coordinates": [123.5, -45.7]} 4 | set3##key3##13##3.6##string3##["a","b", ["c", "d"]]##{"a": {"b": "c"}}##03/16/2011##6c6f63626c6f623830##{"type": "Point", "coordinates": [123.6, -45.7]} 5 | set4##key4##14##5.6##string4##["d","e", ["c", "d"]]##{"c": {"b": "c"}}##04/16/2011##6c6f63626c6f623830##{"type": "Point", "coordinates": [123.7, -43.7]} 6 | set5##key5##15##8.3##string5##["a","b", ["c", "d"]]##{"a": {"b": "c"}}##05/16/2011##6c6f63626c6f623830##{"type": "Point", "coordinates": [123.8, -55.7]} 7 | set6##key6##16##3.0##string6##["d","e", ["c", "d"]]##{"c": {"b": "c"}}##06/16/2011##6c6f63626c6f623830##{"type": "Point", "coordinates": [123.4, -47.7]} 8 | set7##key7##17##6.9##string7##["a","b", ["c", "d"]]##{"a": {"b": "c"}}##07/16/2011##6c6f63626c6f623830##{"type": "Point", "coordinates": [133.4, -25.7]} 9 | set8##key8##18##0.2##string8##["d","b", ["c", "d"]]##{"c": {"b": "c"}}##08/16/2011##6c6f63626c6f623830##{"type": "Point", "coordinates": [143.4, -75.7]} 10 | -------------------------------------------------------------------------------- /example/alldatatype.json: -------------------------------------------------------------------------------- 1 | { 2 | "version" : "2.0", 3 | "input_type" : "dsv", 4 | "dsv_config": { "delimiter": "##" , "n_columns_datafile": 10, "header_exist": true}, 5 | 6 | "mappings": [ 7 | { 8 | "key": {"column_name":"key", "type": "string"}, 9 | 10 | "set": { "column_name":"set" , "type": "string"}, 11 | 12 | "bin_list": [ 13 | { 14 | "name": "intDataBin", 15 | "value": {"column_name": "intData", "type" : "integer"} 16 | }, 17 | { 18 | "name": "floatDataBin", 19 | "value": {"column_name": "floatData", "type" : "float"} 20 | }, 21 | { 22 | "name": "stringDataBin", 23 | "value": {"column_name": "stringData", "type" : "string"} 24 | }, 25 | { 26 | "name": "listDataBin", 27 | "value": {"column_name": "listData", "type" : "json"} 28 | }, 29 | { 30 | "name": "mapDataBin", 31 | "value": {"column_name": "mapData", "type" : "json"} 32 | }, 33 | { 34 | "name": "dateDataBin", 35 | "value": {"column_name": "dateData", "type" : "timestamp", "dst_type": "integer", "encoding":"MM/dd/yy"} 36 | }, 37 | { 38 | "name": "blobDataBin", 39 | "value": {"column_name": "blobData", "type" : "blob", "dst_type": "blob", "encoding":"hex"} 40 | }, 41 | { 42 | "name": "geoDataBin", 43 | "value": {"column_name": "geoData", "type" : "geojson"} 44 | }, 45 | { 46 | "name": "timestamp", 47 | "value": {"column_name": "system_time", "type" : "timestamp", "dst_type": "integer", "encoding":"MM/dd/yy"} 48 | } 49 | ] 50 | } 51 | ] 52 | 53 | } 54 | -------------------------------------------------------------------------------- /example/secondarymapping.dsv: -------------------------------------------------------------------------------- 1 | set#~#age#~#dob#~#name#~#locblob#~#key 2 | set1#~#80#~#8/20/80#~#name80#~#6c6f63626c6f623830#~#1 3 | set2#~#93#~#9/3/93#~#name93#~#6c6f63626c6f623933#~#2 4 | set3#~#90#~#6/0/90#~#name90#~#6c6f63626c6f623930#~#3 5 | set4#~#46#~#10/16/46#~#name46#~#6c6f63626c6f623436#~#4 6 | set0#~#56#~#8/26/56#~#name56#~#6c6f63626c6f623536#~#5 7 | set1#~#97#~#1/7/97#~#name97#~#6c6f63626c6f623937#~#6 8 | set2#~#88#~#4/28/88#~#name88#~#6c6f63626c6f623838#~#7 9 | set3#~#81#~#9/21/81#~#name81#~#6c6f63626c6f623831#~#8 10 | set4#~#14#~#2/14/14#~#name14#~#6c6f63626c6f623134#~#9 11 | set0#~#23#~#11/23/23#~#name23#~#6c6f63626c6f623233#~#10 12 | set1#~#99#~#3/9/99#~#name99#~#6c6f63626c6f623939#~#11 13 | set2#~#91#~#7/1/91#~#name91#~#6c6f63626c6f623931#~#12 14 | set3#~#8#~#8/8/8#~#name8#~#6c6f63626c6f6238#~#13 15 | set4#~#95#~#11/5/95#~#name95#~#6c6f63626c6f623935#~#14 16 | set0#~#80#~#8/20/80#~#name80#~#6c6f63626c6f623830#~#15 17 | set1#~#86#~#2/26/86#~#name86#~#6c6f63626c6f623836#~#16 18 | set2#~#53#~#5/23/53#~#name53#~#6c6f63626c6f623533#~#17 19 | set3#~#73#~#1/13/73#~#name73#~#6c6f63626c6f623733#~#18 20 | set4#~#38#~#2/8/38#~#name38#~#6c6f63626c6f623338#~#19 -------------------------------------------------------------------------------- /example/secondarymapping.json: -------------------------------------------------------------------------------- 1 | { 2 | "version" : "2.0", 3 | "dsv_config": { "delimiter": "#~#" , "n_columns_datafile": 6, "header_exist": true}, 4 | "mappings": [ 5 | { 6 | "key": {"column_name":"key", "type": "integer"}, 7 | "set": { "column_name":"set" , "type": "string"}, 8 | "bin_list": [ 9 | {"name": "age", 10 | "value": {"column_name": "age", "type" : "integer"} 11 | }, 12 | {"name": "name", 13 | "value": {"column_name": "name", "type" : "String"} 14 | }, 15 | {"name": "locblob", 16 | "value": {"column_name": "locblob", "type" : "blob", "dst_type" : "blob" , "encoding":"hex"} 17 | }, 18 | {"name": "dob", 19 | "value": {"column_name": "dob", "type" : "timestamp", "encoding":"MM/dd/yy", "dst_type": "integer"} 20 | } 21 | ] 22 | }, 23 | { 24 | "secondary_mapping" : "true", 25 | "key": {"column_name":"name", "type": "String"}, 26 | "set": "name_map", 27 | "bin_list": [ 28 | {"name": "key1", 29 | "value": {"column_name": "key", "type" : "integer"} 30 | } 31 | ] 32 | }, 33 | { 34 | "secondary_mapping" : "true", 35 | "key": {"column_name":"age", "type": "integer"}, 36 | "set": "age_map", 37 | "bin_list": [ 38 | {"name": "key2", 39 | "value": {"column_name": "key", "type" : "integer"} 40 | } 41 | ] 42 | } 43 | ] 44 | 45 | } 46 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.aerospike 5 | aerospike-load 6 | 4.0.3 7 | Aerospike DSV Loader 8 | jar 9 | 10 | 11 | Aerospike Inc. 12 | http://www.aerospike.com 13 | 14 | 15 | 16 | 17 | peter 18 | Peter Milne 19 | peter@aerospike.com 20 | http://www.aerospike.com 21 | Aerospike Inc. 22 | http://www.aerospike.com 23 | 24 | developer 25 | 26 | -6 27 | 28 | 29 | 30 | 31 | 32 | 33 | com.aerospike 34 | aerospike-client 35 | 7.2.2 36 | 37 | 38 | 39 | commons-cli 40 | commons-cli 41 | 1.7.0 42 | 43 | 44 | 45 | org.apache.logging.log4j 46 | log4j-api 47 | 2.22.1 48 | 49 | 50 | org.apache.logging.log4j 51 | log4j-core 52 | 2.22.1 53 | 54 | 55 | 56 | junit 57 | junit 58 | 4.13.1 59 | test 60 | 61 | 62 | 63 | com.googlecode.json-simple 64 | json-simple 65 | 1.1.1 66 | 67 | 68 | 69 | 70 | 71 | 72 | maven-compiler-plugin 73 | 2.3.2 74 | 75 | 1.8 76 | 1.8 77 | 78 | 79 | 80 | maven-assembly-plugin 81 | 82 | 83 | jar-with-dependencies 84 | 85 | 86 | 87 | com.aerospike.load.AerospikeLoad 88 | 89 | 90 | 91 | 92 | 93 | make-my-jar-with-dependencies 94 | package 95 | 96 | single 97 | 98 | 99 | 100 | 101 | 102 | org.apache.maven.plugins 103 | maven-surefire-plugin 104 | 2.16 105 | 106 | -Xmx512m 107 | 108 | 109 | 110 | org.apache.maven.plugins 111 | maven-javadoc-plugin 112 | 2.9.1 113 | 114 | private 115 | 116 | 117 | 118 | 119 | 120 | 121 | src/main/resources 122 | true 123 | 124 | 125 | 126 | 127 | 128 | UTF-8 129 | UTF-8 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /pom.xml.orig: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.aerospike 5 | aerospike-load 6 | 2.4.1 7 | Aerospike DSV Loader 8 | jar 9 | 10 | 11 | Aerospike Inc. 12 | http://www.aerospike.com 13 | 14 | 15 | 16 | 17 | peter 18 | Peter Milne 19 | peter@aerospike.com 20 | http://www.aerospike.com 21 | Aerospike Inc. 22 | http://www.aerospike.com 23 | 24 | developer 25 | 26 | -6 27 | 28 | 29 | 30 | 31 | 32 | 33 | com.aerospike 34 | aerospike-client 35 | 4.4.11 36 | 37 | 38 | 39 | commons-cli 40 | commons-cli 41 | 1.2 42 | 43 | 44 | 45 | org.apache.logging.log4j 46 | log4j-api 47 | 2.11.1 48 | 49 | 50 | org.apache.logging.log4j 51 | log4j-core 52 | 2.11.1 53 | 54 | 55 | 56 | junit 57 | junit 58 | 4.11 59 | test 60 | 61 | 62 | 63 | com.googlecode.json-simple 64 | json-simple 65 | 1.1.1 66 | 67 | 68 | 69 | 70 | 71 | 72 | maven-compiler-plugin 73 | 2.3.2 74 | 75 | 1.6 76 | 1.6 77 | 78 | 79 | 80 | maven-assembly-plugin 81 | 82 | 83 | jar-with-dependencies 84 | 85 | 86 | 87 | com.aerospike.load.AerospikeLoad 88 | 89 | 90 | 91 | 92 | 93 | make-my-jar-with-dependencies 94 | package 95 | 96 | single 97 | 98 | 99 | 100 | 101 | 102 | org.apache.maven.plugins 103 | maven-surefire-plugin 104 | 2.16 105 | 106 | -Xmx512m 107 | 108 | 109 | 110 | org.apache.maven.plugins 111 | maven-javadoc-plugin 112 | 2.9.1 113 | 114 | private 115 | 116 | 117 | 118 | 119 | 120 | 121 | src/main/resources 122 | true 123 | 124 | 125 | 126 | 127 | 128 | UTF-8 129 | UTF-8 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /run_loader: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Java Aerospike-import Script. 3 | # 4 | # Build with maven before running this script. 5 | # mvn install 6 | 7 | java -Dlog4j2.contextSelector=org.apache.logging.log4j.core.selector.BasicContextSelector -cp target/aerospike-load-*-jar-with-dependencies.jar com.aerospike.load.AerospikeLoad $* 8 | -------------------------------------------------------------------------------- /src/main/java/com/aerospike/load/AerospikeLoad.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2022 by Aerospike. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to 6 | * deal in the Software without restriction, including without limitation the 7 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | * sell copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | * IN THE SOFTWARE. 21 | ******************************************************************************/ 22 | 23 | package com.aerospike.load; 24 | 25 | import java.io.BufferedReader; 26 | import java.io.File; 27 | import java.io.FileInputStream; 28 | import java.io.IOException; 29 | import java.io.InputStreamReader; 30 | import java.io.PrintWriter; 31 | import java.io.StringWriter; 32 | import java.util.ArrayList; 33 | import java.util.Arrays; 34 | import java.util.HashMap; 35 | import java.util.List; 36 | import java.util.Properties; 37 | import java.util.concurrent.ExecutorService; 38 | import java.util.concurrent.Executors; 39 | 40 | import org.apache.commons.cli.CommandLine; 41 | import org.apache.commons.cli.CommandLineParser; 42 | import org.apache.commons.cli.HelpFormatter; 43 | import org.apache.commons.cli.Options; 44 | import org.apache.commons.cli.PosixParser; 45 | import org.apache.logging.log4j.Level; 46 | import org.apache.logging.log4j.LogManager; 47 | import org.apache.logging.log4j.Logger; 48 | import org.apache.logging.log4j.core.config.Configurator; 49 | 50 | import com.aerospike.client.AerospikeClient; 51 | import com.aerospike.client.admin.Role; 52 | import com.aerospike.client.AerospikeException; 53 | import com.aerospike.client.policy.AuthMode; 54 | import com.aerospike.client.policy.ClientPolicy; 55 | import com.aerospike.client.policy.TlsPolicy; 56 | import com.aerospike.client.util.Util; 57 | 58 | /** 59 | * This is the main class for the Aerospike import. 60 | * 61 | * It will import multiple Data Dump files concurrently 62 | * 63 | * To run: java -jar aerospike-import- * 64 | * @author Aerospike 65 | * 66 | */ 67 | public class AerospikeLoad implements Runnable { 68 | 69 | private AerospikeClient client; 70 | private String fileName; 71 | 72 | // Config related variable 73 | private static ExecutorService writerPool; 74 | private static int nWriterThreads; 75 | private static int nReaderThreads; 76 | private static int maxConnsPerNode; 77 | 78 | private static final int scaleFactor = 5; 79 | private static String DEFAULT_DELIMITER = ","; 80 | private static String DEFAULT_HEADER_EXIST = "false"; 81 | 82 | // Other variables. 83 | private static Parameters params; 84 | private static Counter counters; 85 | private static Thread statPrinter; 86 | 87 | // Data definition related variable 88 | private static HashMap dsvConfigs; 89 | private static List mappingDefs; 90 | 91 | private static Logger log = LogManager.getLogger(AerospikeLoad.class); 92 | 93 | 94 | private static void printVersion() 95 | { 96 | final Properties properties = new Properties(); 97 | try { 98 | properties.load(AerospikeLoad.class.getClassLoader().getResourceAsStream("project.properties")); 99 | } catch (Exception e) { 100 | System.out.println("None"); 101 | } finally { 102 | System.out.println(properties.getProperty("name")); 103 | System.out.println("Version " + properties.getProperty("version")); 104 | } 105 | } 106 | 107 | public static void main(String[] args) throws IOException { 108 | long processStart = System.currentTimeMillis(); 109 | 110 | AerospikeClient client = null; 111 | counters = new Counter(); 112 | CommandLine cl; 113 | 114 | try { 115 | Options options = new Options(); 116 | options.addOption("h", "hosts", true, 117 | "List of seed hosts in format:\n" + 118 | "hostname1[:tlsname][:port1],...\n" + 119 | "The tlsname is only used when connecting with a secure TLS enabled server. " + 120 | "If the port is not specified, the default port is used.\n" + 121 | "IPv6 addresses must be enclosed in square brackets.\n" + 122 | "Default: localhost\n" + 123 | "Examples:\n" + 124 | "host1\n" + 125 | "host1:3000,host2:3000\n" + 126 | "192.168.1.10:cert1:3000,[2001::1111]:cert2:3000\n" 127 | ); 128 | options.addOption("V", "version", false, "Aerospike Loader Version"); 129 | options.addOption("p", "port", true, "Server port (default: 3000)"); 130 | options.addOption("U", "user", true, "User name"); 131 | options.addOption("P", "password", true, "Password"); 132 | options.addOption("n", "namespace", true, "Namespace (default: test)"); 133 | options.addOption("c", "config", true, "Column definition file name"); 134 | options.addOption("g", "max-throughput", true, "It limit numer of writes/sec in aerospike."); 135 | options.addOption("T", "transaction-timeout", true, "write transaction timeout in milliseconds(default: No timeout)"); 136 | options.addOption("e", "expirationTime", true, 137 | "Set expiration time of each record in seconds." + 138 | " -1: Never expire, " + 139 | " 0: Default to namespace," + 140 | " >0: Actual given expiration time" 141 | ); 142 | options.addOption("tz", "timezone", true, "Timezone of source where data dump is taken (default: local timezone)"); 143 | options.addOption("ec", "abort-error-count", true, "Error count to abort (default: 0)"); 144 | options.addOption("wa", "write-action", true, "Write action if key already exists (default: update)"); 145 | options.addOption("sa", "services_alternate", false, "Enable alternate services."); 146 | options.addOption("tls", "tls-enable", false, "Use TLS/SSL sockets"); 147 | options.addOption("tp", "tls-protocols", true, 148 | "Allow TLS protocols\n" + 149 | "Values: TLSv1,TLSv1.1,TLSv1.2 separated by comma\n" + 150 | "Default: TLSv1.2" 151 | ); 152 | options.addOption("tlsCiphers", "tls-cipher-suite", true, 153 | "Allow TLS cipher suites\n" + 154 | "Values: cipher names defined by JVM separated by comma\n" + 155 | "Default: null (default cipher list provided by JVM)" 156 | ); 157 | options.addOption("tr", "tlsRevoke", true, 158 | "Revoke certificates identified by their serial number\n" + 159 | "Values: serial numbers separated by comma\n" + 160 | "Default: null (Do not revoke certificates)" 161 | ); 162 | 163 | options.addOption("tlsLoginOnly", false, "Use TLS/SSL sockets on node login only"); 164 | options.addOption("auth", true, "Authentication mode. Values: " + Arrays.toString(AuthMode.values())); 165 | 166 | options.addOption("uk", "send-user-key", false, 167 | "Send user defined key in addition to hash digest to store on the server. (default: userKey is not sent to reduce meta-data overhead)" 168 | ); 169 | options.addOption("v", "verbose", false, "Logging all"); 170 | options.addOption("um", "unorderdMaps", false, "Write all maps as unorderd maps"); 171 | options.addOption("u", "usage", false, "Print usage."); 172 | 173 | CommandLineParser parser = new PosixParser(); 174 | cl = parser.parse(options, args, false); 175 | 176 | if (args.length == 0 || cl.hasOption("u")) { 177 | printUsage(options); 178 | return; 179 | } 180 | 181 | if (cl.hasOption("V")) { 182 | printVersion(); 183 | return; 184 | } 185 | } catch (Exception e) { 186 | log.error(e); 187 | if (log.isDebugEnabled()) { 188 | e.printStackTrace(); 189 | } 190 | return; 191 | } 192 | 193 | try { 194 | statPrinter = new Thread(new PrintStat(counters)); 195 | // Create Abstract derived params from provided commandline params. 196 | params = Utils.parseParameters(cl); 197 | if (params.verbose) { 198 | Configurator.setAllLevels(LogManager.getRootLogger().getName(), Level.DEBUG); 199 | } 200 | 201 | initReadWriteThreadCnt(cl); 202 | 203 | // Get and validate user roles for client. 204 | client = getAerospikeClient(cl); 205 | if (client == null) { 206 | return; 207 | } 208 | 209 | List dataFileNames = new ArrayList(); 210 | initDataFileNameList(cl, dataFileNames); 211 | if (dataFileNames.size() == 0) { 212 | return; 213 | } 214 | 215 | // Remove column definition file from list. if directory containing config file is passed. 216 | String columnDefinitionFileName = cl.getOptionValue("c", ""); 217 | dataFileNames.remove(columnDefinitionFileName); 218 | log.info("Number of data files:" + dataFileNames.size()); 219 | 220 | initBytesToRead(dataFileNames); 221 | 222 | log.info("Aerospike loader started"); 223 | // Perform main Read Write job. 224 | runLoader(client, columnDefinitionFileName, dataFileNames); 225 | 226 | } catch (Exception e) { 227 | log.error(e); 228 | if (log.isDebugEnabled()) { 229 | e.printStackTrace(); 230 | } 231 | } finally { 232 | // Stop statistic printer thread. 233 | statPrinter.interrupt(); 234 | log.info("Aerospike loader completed"); 235 | if (client != null) { 236 | client.close(); 237 | } 238 | } 239 | 240 | long processStop = System.currentTimeMillis(); 241 | log.info(String.format("Loader completed in %.3fsec", (float) (processStop - processStart) / 1000)); 242 | } 243 | 244 | private static AerospikeClient getAerospikeClient(CommandLine cl) { 245 | ClientPolicy clientPolicy = new ClientPolicy(); 246 | 247 | initClientPolicy(cl, clientPolicy); 248 | 249 | AerospikeClient client = new AerospikeClient(clientPolicy, params.hosts); 250 | 251 | if (!client.isConnected()) { 252 | log.error("Client is not able to connect:" + params.hosts); 253 | return null; 254 | } 255 | return client; 256 | } 257 | 258 | private static void initClientPolicy(CommandLine cl, ClientPolicy clientPolicy) { 259 | 260 | if (cl.hasOption("auth")) { 261 | clientPolicy.authMode = AuthMode.valueOf(cl.getOptionValue("auth", "").toUpperCase()); 262 | } 263 | // Setting user, password in client policy. 264 | clientPolicy.user = cl.getOptionValue("user"); 265 | clientPolicy.password = cl.getOptionValue("password"); 266 | 267 | if (clientPolicy.user != null && clientPolicy.password == null) { 268 | java.io.Console console = System.console(); 269 | try { 270 | if (console != null) { 271 | char[] pass = console.readPassword("Enter password:"); 272 | 273 | if (pass != null) { 274 | clientPolicy.password = new String(pass); 275 | } 276 | } 277 | } catch (Exception ex) { 278 | ex.printStackTrace(); 279 | } 280 | } 281 | 282 | // Setting TLS in ClinetPolicy. 283 | if (cl.hasOption("tls")) { 284 | clientPolicy.tlsPolicy = new TlsPolicy(); 285 | 286 | if (cl.hasOption("tp")) { 287 | String s = cl.getOptionValue("tp", ""); 288 | clientPolicy.tlsPolicy.protocols = s.split(","); 289 | } 290 | 291 | if (cl.hasOption("tlsCiphers")) { 292 | String s = cl.getOptionValue("tlsCiphers", ""); 293 | clientPolicy.tlsPolicy.ciphers = s.split(","); 294 | } 295 | 296 | if (cl.hasOption("tr")) { 297 | String s = cl.getOptionValue("tr", ""); 298 | clientPolicy.tlsPolicy.revokeCertificates = Util.toBigIntegerArray(s); 299 | } 300 | } 301 | 302 | if (cl.hasOption("sa")) { 303 | clientPolicy.setUseServicesAlternate(true); 304 | } 305 | 306 | clientPolicy.maxConnsPerNode = maxConnsPerNode; 307 | } 308 | 309 | private static void initReadWriteThreadCnt(CommandLine cl) { 310 | // Get available processors to calculate default number of threads. 311 | int cpus = Runtime.getRuntime().availableProcessors(); 312 | nWriterThreads = cpus * scaleFactor; 313 | nReaderThreads = cpus; 314 | 315 | nWriterThreads = (nWriterThreads > 0 316 | ? (nWriterThreads > Constants.MAX_THREADS ? Constants.MAX_THREADS : nWriterThreads) : 1); 317 | log.debug("Using writer Threads: " + nWriterThreads); 318 | 319 | nReaderThreads = (nReaderThreads > 0 320 | ? (nReaderThreads > Constants.MAX_THREADS ? Constants.MAX_THREADS : nReaderThreads) : 1); 321 | log.debug("Using reader Threads: " + nReaderThreads); 322 | 323 | // add 1 for the tend thread 324 | maxConnsPerNode = nWriterThreads + nReaderThreads + 1; 325 | log.debug("Max connections per node: " + maxConnsPerNode); 326 | } 327 | 328 | private static void initDataFileNameList(CommandLine cl, List dataFileNames) { 329 | // Get data file list. 330 | String[] dataFiles = cl.getArgs(); 331 | 332 | // Get data filename list. 333 | if (dataFiles.length == 0) { 334 | log.error("No data file Specified: add to end of the command"); 335 | return; 336 | } 337 | 338 | dataFileNames.addAll(Utils.getFileNames(dataFiles)); 339 | 340 | if (dataFileNames.size() == 0) { 341 | log.error("Given datafiles/directory does not exist Files: " + dataFileNames.toString()); 342 | return; 343 | } 344 | } 345 | 346 | private static void initBytesToRead(List dataFileNames) { 347 | for (int i = 0; i < dataFileNames.size(); i++) { 348 | log.debug("File names:" + Utils.getFileName(dataFileNames.get(i))); 349 | File file = new File(dataFileNames.get(i)); 350 | counters.write.bytesTotal = counters.write.bytesTotal + file.length(); 351 | } 352 | } 353 | 354 | private static void runLoader(AerospikeClient client, String columnDefinitionFileName, List dataFileNames) throws Exception { 355 | /* 356 | * Process column definition file to get, 357 | * dsv_configs (delimiter, header_exist, n_column) 358 | * mapping definition(key, set, binlist) 359 | */ 360 | File columnDefinitionFile = getColumnDefinitionFile(columnDefinitionFileName); 361 | 362 | dsvConfigs = new HashMap(); 363 | mappingDefs = new ArrayList(); 364 | 365 | if (Parser.parseJSONColumnDefinitions(columnDefinitionFile, dsvConfigs, mappingDefs)) { 366 | log.info("Config file processed."); 367 | } else { 368 | throw new Exception("Config file parsing Error"); 369 | } 370 | 371 | // Validate and set default DsvConfigs (from config/definition file). 372 | validateAndSetDefaultDsvConfig(); 373 | 374 | // Parse datafile header line to get column names. 375 | List columnNames = parseColumnNames(columnDefinitionFileName, dataFileNames); 376 | 377 | updateColumnInfoForMappingDefs(columnNames); 378 | 379 | validateMappingDefs(); 380 | 381 | statPrinter.start(); 382 | 383 | // writerPool is global as it will be used outside. 384 | writerPool = Executors.newFixedThreadPool(nWriterThreads); 385 | 386 | ExecutorService readerPool = Executors.newFixedThreadPool(nReaderThreads > dataFileNames.size() 387 | ? dataFileNames.size() : nReaderThreads); 388 | log.info("Reader pool size : " + nReaderThreads); 389 | 390 | // Read write process start from this point. 391 | counters.write.writeStartTime = System.currentTimeMillis(); 392 | 393 | // Submit all tasks to reader thread pool. 394 | for (String aFile : dataFileNames) { 395 | log.debug("Submitting task for: " + aFile); 396 | readerPool.submit(new AerospikeLoad(aFile, client)); 397 | } 398 | 399 | // Wait for reader pool to complete. 400 | readerPool.shutdown(); 401 | log.info("Shutdown reader thread pool"); 402 | 403 | while(!readerPool.isTerminated()); 404 | log.info("Reader thread pool terminated"); 405 | 406 | // Wait for writer pool to complete after getting all tasks from reader pool. 407 | writerPool.shutdown(); 408 | log.info("Shutdown writer thread pool"); 409 | 410 | while(!writerPool.isTerminated()); 411 | log.info("Writer thread pool terminated"); 412 | 413 | // Print final statistic of aerospike-loader. 414 | log.info("Final Statistics of importer: (" 415 | + "Records Read = " + counters.write.readCount.get() + ", " 416 | + "Successful Writes = " + counters.write.writeCount.get() + ", " 417 | + "Successful Primary Writes = " + (counters.write.writeCount.get() - counters.write.mappingWriteCount.get()) + ", " 418 | + "Successful Mapping Writes = " + counters.write.mappingWriteCount.get() + ", " 419 | + "Errors = " + (counters.write.writeErrors.get() + counters.write.readErrors.get() + counters.write.processingErrors.get()) 420 | + "(" + (counters.write.writeErrors.get()) + "-Write," + counters.write.readErrors.get() + "-Read," 421 | + counters.write.processingErrors.get() + "-Processing), " + "Skipped = " 422 | + (counters.write.keyNullSkipped.get() + counters.write.noBinsSkipped.get()) + "(" 423 | + (counters.write.keyNullSkipped.get()) + "-NullKey," + counters.write.noBinsSkipped.get() 424 | + "-NoBins)"); 425 | } 426 | 427 | private static File getColumnDefinitionFile(String columnDefinitionFileName) throws Exception { 428 | log.debug("Column definition files/directory: " + columnDefinitionFileName); 429 | 430 | if (columnDefinitionFileName == null) { 431 | throw new Exception("Column definition files/directory not specified. use -c "); 432 | } 433 | 434 | File columnDefinitionFile = new File(columnDefinitionFileName); 435 | if (!columnDefinitionFile.exists()) { 436 | throw new Exception("Column definition files/directory does not exist: " 437 | + Utils.getFileName(columnDefinitionFileName)); 438 | } 439 | return columnDefinitionFile; 440 | } 441 | 442 | private static void validateAndSetDefaultDsvConfig() throws Exception { 443 | // Version check. 444 | String version = dsvConfigs.get(Constants.VERSION); 445 | if (!isVersionSupported(version)) { 446 | throw new Exception("\"" + Constants.VERSION + ":" + version + "\" is not Supported"); 447 | } 448 | 449 | // Set default delimiter. 450 | String delimiter = dsvConfigs.get(Constants.DELIMITER); 451 | if (delimiter == null) { 452 | log.warn("\"" + Constants.DELIMITER 453 | + "\" is not properly specified in config file. Default is ','"); 454 | dsvConfigs.put(Constants.DELIMITER, DEFAULT_DELIMITER); 455 | } 456 | 457 | // Set Default header exist. 458 | String isHeaderExist = dsvConfigs.get(Constants.HEADER_EXIST); 459 | if (isHeaderExist == null) { 460 | log.warn("\"" + Constants.HEADER_EXIST 461 | + "\" is not properly specified in config file. Default is false"); 462 | dsvConfigs.put(Constants.HEADER_EXIST, DEFAULT_HEADER_EXIST); 463 | } 464 | } 465 | 466 | private static boolean isVersionSupported(String version) { 467 | // Version check 468 | String[] vNumber = version.split("\\."); 469 | int v1 = Integer.parseInt(vNumber[0]); 470 | int v2 = Integer.parseInt(vNumber[1]); 471 | if ((v1 <= Constants.MajorV) && (v2 <= Constants.MinorV)) { 472 | log.debug("Config version used:" + version); 473 | return true; 474 | } else { 475 | return false; 476 | } 477 | } 478 | 479 | private static List parseColumnNames(String columnDefinitionFileName, List dataFileNames) throws Exception { 480 | 481 | // Parse header line from data file to get Bin names. 482 | List columnNames = null; 483 | 484 | if (dsvHasHeader()) { 485 | String dataFileName = dataFileNames.get(0); 486 | String line; 487 | BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(dataFileName), "UTF8")); 488 | if ((line = br.readLine()) != null) { 489 | columnNames = Parser.getDSVRawColumns(line, dsvConfigs.get(Constants.DELIMITER)); 490 | br.close(); 491 | } 492 | else{ 493 | br.close(); 494 | throw new Exception("Header line not found." + " Datafile: " + Utils.getFileName(dataFileName)); 495 | } 496 | } 497 | 498 | // Throw exception if n_column defined in columndef file doesn't match n_columns in datafile. 499 | if (columnNames != null && (columnNames.size() != Integer.parseInt(dsvConfigs.get(Constants.N_COLUMN)))) { 500 | throw new Exception("Number of column in config file and datafile are mismatch." + " Datafile: " 501 | + Utils.getFileName(dataFileNames.get(0)) + " columns: " + columnNames.toString() + " Configfile: " 502 | + Utils.getFileName(columnDefinitionFileName) + " n_colums: " + Integer.parseInt(dsvConfigs.get(Constants.N_COLUMN))); 503 | } 504 | return columnNames; 505 | } 506 | 507 | private static void updateColumnInfo(ColumnDefinition columnDef, List columnNames) throws Exception { 508 | 509 | if (columnDef.columnPos < 0) { 510 | if (columnDef.columnName == null) { 511 | if (columnDef.jsonPath == null) { 512 | throw new Exception("Improper column definition. Please mention column pos / column name or json path."); 513 | } else { 514 | throw new Exception("JSON path is not a supported feature."); 515 | } 516 | } else if (dsvHasHeader()) { 517 | columnDef.columnPos = columnNames.indexOf(columnDef.columnName); 518 | 519 | if (columnDef.columnPos == -1) { 520 | throw new Exception("Missing column name " + columnDef.columnName + " in data file header."); 521 | } 522 | } 523 | } 524 | 525 | if (dsvHasHeader()) { 526 | columnDef.columnName = columnNames.get(columnDef.columnPos); 527 | } 528 | } 529 | 530 | private static void updateColumnInfoForMappingDefs(List columnNames) throws Exception { 531 | 532 | for (MappingDefinition mappingDef : mappingDefs) { 533 | 534 | // KEY 535 | if (mappingDef.keyColumnDef == null) { 536 | throw new Exception ("Mapping definition without key mapping"); 537 | } 538 | updateColumnInfo(mappingDef.keyColumnDef.nameDef, columnNames); 539 | 540 | // SET 541 | if (mappingDef.setColumnDef.staticName == null) { 542 | if (mappingDef.setColumnDef.nameDef != null) { 543 | updateColumnInfo(mappingDef.setColumnDef.nameDef, columnNames); 544 | } else { 545 | throw new Exception("Set Not defined in mapping definition. null set not allowed !!"); 546 | } 547 | } 548 | 549 | // BINS 550 | for (BinDefinition binColumnDef : mappingDef.binColumnDefs) { 551 | 552 | // BIN NAME 553 | if (binColumnDef.staticName == null) { 554 | if (binColumnDef.nameDef != null) { 555 | updateColumnInfo(binColumnDef.nameDef, columnNames); 556 | } else { 557 | throw new Exception("Mapping Definition with missing bin Name definition"); 558 | } 559 | } 560 | 561 | // BIN VALUE 562 | if (binColumnDef.staticValue == null) { 563 | if (binColumnDef.valueDef != null) { 564 | 565 | // SYSTEM_TIME is reserved column value 566 | if (binColumnDef.valueDef.columnName != null && binColumnDef.valueDef.columnName.toLowerCase().equals(Constants.SYSTEM_TIME)) { 567 | continue; 568 | } 569 | updateColumnInfo(binColumnDef.valueDef, columnNames); 570 | } else { 571 | throw new Exception("Mapping Definition with missing bin value definition"); 572 | } 573 | } 574 | } 575 | log.debug("MappingDef:" + mappingDef.toString()); 576 | } 577 | } 578 | 579 | private static void validateMappingDefs() throws Exception { 580 | 581 | for (MappingDefinition mappingDef : mappingDefs) { 582 | validateSetNameInfo(mappingDef.setColumnDef); 583 | 584 | validateKeyNameInfo(mappingDef.keyColumnDef); 585 | 586 | for (BinDefinition binColumnDef : mappingDef.binColumnDefs) { 587 | 588 | validateBinColumnsNameInfo(binColumnDef); 589 | validateBinColumnsValueInfo(binColumnDef); 590 | 591 | // TODO - arguably not needed. Pretty arbitrary 592 | if ((binColumnDef.nameDef.columnPos == binColumnDef.valueDef.columnPos) 593 | && (binColumnDef.nameDef.columnPos != -1)) { 594 | throw new Exception("Dynamic Bin Name column info same as Dynamic bin Value" + binColumnDef); 595 | } 596 | } 597 | } 598 | } 599 | 600 | private static void validateSetNameInfo(MetaDefinition metadataColDef) throws Exception { 601 | 602 | if ((metadataColDef.staticName != null && metadataColDef.staticName.length() > Constants.SET_NAME_LENGTH)) { 603 | throw new Exception("Set name len exceed Allowd limit. SET_NAME_LEN_MAX: " + Constants.SET_NAME_LENGTH + "Given SetName: " + metadataColDef.staticName); 604 | } 605 | 606 | if (metadataColDef.staticName != null) { 607 | return; 608 | } 609 | 610 | if (metadataColDef.nameDef.columnPos < 0 && (metadataColDef.nameDef.columnName == null)) { 611 | throw new Exception("Information missing(columnName, columnPos) in config file: " + metadataColDef); 612 | } 613 | 614 | if (metadataColDef.nameDef.srcType == null 615 | || !metadataColDef.nameDef.srcType.equals(SrcColumnType.STRING)) { 616 | throw new Exception("Set name should be string type: " + metadataColDef); 617 | } 618 | 619 | } 620 | 621 | private static void validateKeyNameInfo(MetaDefinition metadataColDef) throws Exception { 622 | 623 | if (metadataColDef.nameDef.columnPos < 0 && (metadataColDef.nameDef.columnName == null)) { 624 | throw new Exception("Information missing(columnName, columnPos) in config file: " + metadataColDef); 625 | } 626 | 627 | if (metadataColDef.nameDef.srcType == null) { 628 | throw new Exception("Source data type is not properly mentioned: " + metadataColDef); 629 | } 630 | } 631 | 632 | private static void validateBinColumnsNameInfo(BinDefinition binColumnDef) throws Exception { 633 | if ((binColumnDef.staticName != null && binColumnDef.staticName.length() > Constants.BIN_NAME_LENGTH)) { 634 | throw new Exception("Bin name len exceed Allowd limit. BIN_NAME_LEN_MAX: " + Constants.BIN_NAME_LENGTH + "Given BinName: " + binColumnDef.staticName); 635 | } 636 | 637 | if (binColumnDef.staticName != null) { 638 | return; 639 | } 640 | 641 | if ((binColumnDef.nameDef.columnPos < 0) && binColumnDef.nameDef.columnName == null) { 642 | throw new Exception("Information missing(columnName, columnPos) in config file: " + binColumnDef); 643 | } 644 | } 645 | 646 | private static void validateBinColumnsValueInfo(BinDefinition binColumnDef) throws Exception { 647 | 648 | if (binColumnDef.staticValue != null) { 649 | return; 650 | } 651 | 652 | // check for missing entries in config file 653 | if (binColumnDef.valueDef.columnPos < 0 && binColumnDef.valueDef.columnName == null) { 654 | throw new Exception( 655 | "Information missing(columnName, columnPos) in config file: " + binColumnDef); 656 | } 657 | 658 | // src_type is mandatory. 659 | if (binColumnDef.valueDef.srcType == null) { 660 | throw new Exception("Source data type is not properly mentioned:" + binColumnDef); 661 | } 662 | 663 | // TIMESTAMP, BLOB src_type needs a dst_type. 664 | if ((binColumnDef.valueDef.srcType.equals(SrcColumnType.TIMESTAMP) 665 | || binColumnDef.valueDef.srcType.equals(SrcColumnType.BLOB)) 666 | && binColumnDef.valueDef.dstType == null) { 667 | throw new Exception("Destination type is not mentioned: " + binColumnDef); 668 | } 669 | 670 | // Encoding should be given if dst_type is given(not needed in dst_type CDT_LIST) 671 | if (binColumnDef.valueDef.dstType != null 672 | && binColumnDef.valueDef.encoding == null) { 673 | throw new Exception("Encoding is not given for src-dst type conversion:" + binColumnDef); 674 | } 675 | 676 | // BLOB dst_type needs HEX_ENCODING. 677 | if (binColumnDef.valueDef.srcType.equals(SrcColumnType.BLOB) 678 | && !binColumnDef.valueDef.encoding.equals(Constants.HEX_ENCODING)) { 679 | throw new Exception("Wrong encoding for blob data:" + binColumnDef); 680 | } 681 | } 682 | 683 | /* 684 | * Write help/usage to console. 685 | */ 686 | private static void printUsage(Options options) { 687 | HelpFormatter formatter = new HelpFormatter(); 688 | StringWriter sw = new StringWriter(); 689 | PrintWriter pw = new PrintWriter(sw); 690 | String syntax = AerospikeLoad.class.getName() + " []"; 691 | formatter.printHelp(pw, 100, syntax, "options:", options, 0, 2, null); 692 | log.info(sw.toString()); 693 | } 694 | 695 | private static boolean dsvHasHeader() { 696 | return Boolean.valueOf(dsvConfigs.get(Constants.HEADER_EXIST)); 697 | } 698 | 699 | /* 700 | * Constructor 701 | */ 702 | public AerospikeLoad(String fileName, AerospikeClient client) throws AerospikeException { 703 | this.client = client; 704 | this.fileName = fileName; 705 | } 706 | 707 | /* 708 | * Process a single file 709 | */ 710 | private void processFile() { 711 | int lineNumber = 0; 712 | 713 | log.trace("Hosts: " + this.client.getNodeNames()); 714 | long start = System.currentTimeMillis(); 715 | 716 | try { 717 | 718 | BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(this.fileName), "UTF8")); 719 | log.debug("Reading file: " + Utils.getFileName(fileName)); 720 | boolean hasHeader = dsvHasHeader(); 721 | 722 | while (br.ready()) { 723 | String line; 724 | 725 | // Skip reading 1st line of data file 726 | if (hasHeader) { 727 | lineNumber++; 728 | String header = br.readLine(); 729 | counters.write.bytesTotal = counters.write.bytesTotal - header.length(); 730 | } 731 | 732 | while ((line = br.readLine()) != null) { 733 | lineNumber++; 734 | log.trace("Read line " + lineNumber + " from file: " + Utils.getFileName(fileName)); 735 | 736 | // Throttle if read to write difference goes beyond given number. 737 | while ((counters.write.processingQueued.get() 738 | - counters.write.processingCount.get()) > Constants.RW_THROTTLE_LIMIT) { 739 | Thread.sleep(20); 740 | } 741 | 742 | List columns = Parser.getDSVRawColumns(line, dsvConfigs.get(Constants.DELIMITER)); 743 | 744 | if (columns == null) { 745 | continue; 746 | } 747 | 748 | for (MappingDefinition mappingDef : mappingDefs) { 749 | counters.write.processingQueued.getAndIncrement(); 750 | writerPool.submit(new AsWriterTask(fileName, lineNumber, (line.length() + 1), this.client, columns, 751 | dsvConfigs, mappingDef, params, counters)); 752 | } 753 | 754 | log.trace("Submitting line " + lineNumber + " in file: " + Utils.getFileName(fileName)); 755 | counters.write.readCount.incrementAndGet(); 756 | } 757 | } 758 | 759 | br.close(); 760 | 761 | } catch (Exception e) { 762 | log.error("Error processing file: " + Utils.getFileName(fileName) + ": Line: " + lineNumber); 763 | if (log.isDebugEnabled()) { 764 | e.printStackTrace(); 765 | } 766 | counters.write.readErrors.getAndIncrement(); 767 | } 768 | 769 | long stop = System.currentTimeMillis(); 770 | log.info(String.format("Reader completed %d-lines in %.3fsec, From file: %s", lineNumber, 771 | (float) (stop - start) / 1000, Utils.getFileName(fileName))); 772 | } 773 | 774 | public void run() { 775 | log.info("Processing: " + Utils.getFileName(fileName)); 776 | 777 | try { 778 | log.trace("Processing File: " + Utils.getFileName(fileName)); 779 | processFile(); 780 | } catch (Exception e) { 781 | log.error("Cannot process file: " + Utils.getFileName(fileName)); 782 | if (log.isDebugEnabled()) { 783 | e.printStackTrace(); 784 | } 785 | } 786 | } 787 | } 788 | -------------------------------------------------------------------------------- /src/main/java/com/aerospike/load/AsWriterTask.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2022 by Aerospike. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to 6 | * deal in the Software without restriction, including without limitation the 7 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | * sell copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | * IN THE SOFTWARE. 21 | ******************************************************************************/ 22 | 23 | package com.aerospike.load; 24 | 25 | import java.text.DateFormat; 26 | import java.text.SimpleDateFormat; 27 | import java.util.ArrayList; 28 | import java.util.Date; 29 | import java.util.HashMap; 30 | import java.util.List; 31 | import java.util.TreeMap; 32 | import java.util.concurrent.Callable; 33 | 34 | import org.apache.logging.log4j.LogManager; 35 | import org.apache.logging.log4j.Logger; 36 | import org.json.simple.JSONArray; 37 | import org.json.simple.JSONObject; 38 | import org.json.simple.parser.JSONParser; 39 | import org.json.simple.parser.ParseException; 40 | 41 | import com.aerospike.client.AerospikeClient; 42 | import com.aerospike.client.AerospikeException; 43 | import com.aerospike.client.Bin; 44 | import com.aerospike.client.Key; 45 | import com.aerospike.client.Value; 46 | import com.aerospike.client.ResultCode; 47 | 48 | /** 49 | * 50 | * @author Aerospike 51 | * 52 | * Main writer class to write data in Aerospike. 53 | * 54 | */ 55 | public class AsWriterTask implements Callable { 56 | 57 | // File and line info variable. 58 | private String fileName; 59 | private int lineNumber; 60 | private int lineSize; 61 | 62 | // Aerospike related variable. 63 | private AerospikeClient client; 64 | 65 | // Data definition related variable 66 | private HashMap dsvConfigs; 67 | private MappingDefinition mappingDef; 68 | private List columns; 69 | 70 | private Parameters params; 71 | private Counter counters; 72 | private JSONParser jsonParser; 73 | 74 | private static Logger log = LogManager.getLogger(AsWriterTask.class); 75 | 76 | /** 77 | * AsWriterTask process given data columns for a record and create Set and Key and Bins. 78 | * It writes these Bins to created Key. If its secondary mapping then it will do CDT append 79 | * over all created Bins. 80 | * 81 | * @param fileName Name of the data file 82 | * @param lineNumber Line number in the file fileName 83 | * @param lineSize Size of the line to keep track of record processed 84 | * @param client AerospikeClient object 85 | * @param columns List of column separated entries in this lineNumber 86 | * @param dsvConfig Map of DSV configurations 87 | * @param mappingDef MappingDefinition of a mapping from config file 88 | * @param params User given parameters 89 | * @param counters Counter for stats 90 | * 91 | */ 92 | public AsWriterTask(String fileName, int lineNumber, int lineSize,AerospikeClient client, List columns, 93 | HashMap dsvConfigs, MappingDefinition mappingDef, Parameters params, Counter counters) { 94 | 95 | // Passed to print log error with filename, line number, increment byteprocessed. 96 | this.fileName = fileName; 97 | this.lineNumber = lineNumber; 98 | this.lineSize = lineSize; 99 | 100 | this.client = client; 101 | 102 | this.dsvConfigs = dsvConfigs; 103 | this.mappingDef = mappingDef; 104 | this.columns = columns; 105 | 106 | this.params = params; 107 | this.counters = counters; 108 | 109 | } 110 | 111 | /* 112 | * Writes a record to the Aerospike Cluster 113 | */ 114 | private void writeToAs(Key key, List bins) { 115 | 116 | try { 117 | // Connection could be broken at actual write time. 118 | if (this.client == null) { 119 | throw new Exception("Null Aerospike client !!"); 120 | } 121 | 122 | 123 | if (bins.isEmpty()) { 124 | counters.write.noBinsSkipped.getAndIncrement(); 125 | log.trace("No bins to insert"); 126 | return; 127 | } 128 | // All bins will have append operation if secondary mapping. 129 | if (this.mappingDef.secondaryMapping) { 130 | for (Bin b : bins) { 131 | client.operate(this.params.writePolicy, key, 132 | com.aerospike.client.cdt.ListOperation.append(b.name, b.value)); 133 | } 134 | counters.write.mappingWriteCount.getAndIncrement(); 135 | } else { 136 | this.client.put(this.params.writePolicy, key, bins.toArray(new Bin[bins.size()])); 137 | counters.write.bytesProcessed.addAndGet(this.lineSize); 138 | } 139 | counters.write.writeCount.getAndIncrement(); 140 | 141 | log.trace("Wrote line " + lineNumber + " Key: " + key.userKey + " to Aerospike."); 142 | 143 | } catch (AerospikeException ae) { 144 | 145 | handleAerospikeWriteError(ae); 146 | checkAndAbort(); 147 | 148 | } catch (Exception e) { 149 | 150 | handleWriteError(e); 151 | checkAndAbort(); 152 | 153 | } 154 | } 155 | 156 | private void handleAerospikeWriteError(AerospikeException ae) { 157 | 158 | log.error("File: " + Utils.getFileName(this.fileName) + " Line: " + lineNumber + "Aerospike Write Error: " 159 | + ae.getResultCode()); 160 | 161 | if (log.isDebugEnabled()) { 162 | ae.printStackTrace(); 163 | } 164 | 165 | switch (ae.getResultCode()) { 166 | 167 | case ResultCode.TIMEOUT: 168 | counters.write.writeTimeouts.getAndIncrement(); 169 | break; 170 | case ResultCode.KEY_EXISTS_ERROR: 171 | counters.write.writeKeyExists.getAndIncrement(); 172 | break; 173 | default: 174 | //.. 175 | } 176 | 177 | if (!this.mappingDef.secondaryMapping) { 178 | counters.write.bytesProcessed.addAndGet(this.lineSize); 179 | } 180 | 181 | counters.write.writeErrors.getAndIncrement(); 182 | } 183 | 184 | private void handleWriteError(Exception e) { 185 | log.error("File: " + Utils.getFileName(this.fileName) + " Line: " + lineNumber + " Write Error: " + e); 186 | if (log.isDebugEnabled()) { 187 | e.printStackTrace(); 188 | } 189 | if (!this.mappingDef.secondaryMapping) { 190 | counters.write.bytesProcessed.addAndGet(this.lineSize); 191 | } 192 | counters.write.writeErrors.getAndIncrement(); 193 | } 194 | 195 | private void checkAndAbort(){ 196 | long errorTotal; 197 | errorTotal = (counters.write.readErrors.get() + counters.write.writeErrors.get() 198 | + counters.write.processingErrors.get()); 199 | if (this.params.abortErrorCount != 0 && this.params.abortErrorCount < errorTotal) { 200 | System.exit(-1); 201 | } 202 | } 203 | 204 | /* 205 | * Create Set and Key from provided data for given mappingDef. 206 | * Create Bin for given binList in mappingDef. 207 | */ 208 | private Key getKeyAndBinsFromDataline(List bins) { 209 | log.debug("processing File: " + Utils.getFileName(fileName) + "line: " + this.lineNumber); 210 | Key key = null; 211 | 212 | try { 213 | 214 | validateNColumnInDataline(); 215 | 216 | // Set couldn't be null here. Its been validated earlier. 217 | String set = getSetName(); 218 | 219 | key = createRecordKey(this.params.namespace, set); 220 | 221 | populateAsBinFromColumnDef(bins); 222 | 223 | log.trace("Formed key and bins for line: " + lineNumber + " Key: " + key.userKey + " Bins: " 224 | + bins.toString()); 225 | 226 | } catch (AerospikeException ae) { 227 | 228 | log.error("File: " + Utils.getFileName(this.fileName) + " Line: " + lineNumber 229 | + " Aerospike Bin processing Error: " + ae.getResultCode()); 230 | handleProcessLineError(ae); 231 | 232 | } catch (ParseException pe) { 233 | 234 | log.error("File: " + Utils.getFileName(this.fileName) + " Line: " + lineNumber + " Parsing Error: " + pe); 235 | handleProcessLineError(pe); 236 | 237 | } catch (Exception e) { 238 | 239 | log.error("File: " + Utils.getFileName(this.fileName) + " Line: " + lineNumber + " Unknown Error: " + e); 240 | handleProcessLineError(e); 241 | 242 | } 243 | return key; 244 | } 245 | 246 | /* 247 | * Validate if number of column in data line are same as provided in config file. 248 | * Throw exception the more columns are present then given. 249 | */ 250 | private void validateNColumnInDataline() throws ParseException { 251 | 252 | // Throw exception if n_columns(datafile) are more than n_columns(configfile). 253 | int n_column = Integer.parseInt(dsvConfigs.get(Constants.N_COLUMN)); 254 | if (columns.size() == n_column) { 255 | return; 256 | } 257 | if (columns.size() < n_column) { 258 | log.warn("File: " + Utils.getFileName(fileName) + " Line: " + lineNumber 259 | + " Number of column mismatch:Columns in data file is less than number of column in config file."); 260 | } else { 261 | throw new ParseException(lineNumber); 262 | } 263 | } 264 | 265 | private void handleProcessLineError(Exception e) { 266 | if (log.isDebugEnabled()) { 267 | e.printStackTrace(); 268 | } 269 | counters.write.processingErrors.getAndIncrement(); 270 | counters.write.bytesProcessed.addAndGet(this.lineSize); 271 | checkAndAbort(); 272 | } 273 | 274 | 275 | private String getSetName() { 276 | MetaDefinition setColumn = this.mappingDef.setColumnDef; 277 | 278 | if (setColumn.staticName != null) { 279 | return setColumn.staticName; 280 | } 281 | 282 | String set = null; 283 | String setRawText = this.columns.get(setColumn.nameDef.columnPos); 284 | if (setColumn.nameDef.removePrefix != null) { 285 | if (setRawText != null && setRawText.startsWith(setColumn.nameDef.removePrefix)) { 286 | set = setRawText.substring(setColumn.nameDef.removePrefix.length()); 287 | } 288 | } else { 289 | set = setRawText; 290 | } 291 | return set; 292 | } 293 | 294 | private Key createRecordKey(String namespace, String set) throws Exception { 295 | // Use 'SET' name to create key. 296 | Key key = null; 297 | 298 | MetaDefinition keyColumn = this.mappingDef.keyColumnDef; 299 | 300 | String keyRawText = this.columns.get(keyColumn.nameDef.columnPos); 301 | 302 | if (keyRawText == null || keyRawText.trim().length() == 0) { 303 | counters.write.keyNullSkipped.getAndIncrement(); 304 | throw new Exception("Key value is null in datafile."); 305 | } 306 | 307 | if ((keyColumn.nameDef.removePrefix != null) 308 | && (keyRawText.startsWith(keyColumn.nameDef.removePrefix))) { 309 | keyRawText = keyRawText.substring(keyColumn.nameDef.removePrefix.length()); 310 | } 311 | 312 | if (keyColumn.nameDef.srcType == SrcColumnType.INTEGER) { 313 | Long integer = Long.parseLong(keyRawText); 314 | key = new Key(namespace, set, integer); 315 | } else { 316 | key = new Key(namespace, set, keyRawText); 317 | } 318 | 319 | return key; 320 | } 321 | 322 | private void populateAsBinFromColumnDef(List bins) { 323 | for (BinDefinition binColumn : this.mappingDef.binColumnDefs) { 324 | Bin bin = null; 325 | String binName = null; 326 | String binRawValue = null; 327 | 328 | // Get binName. 329 | if (binColumn.staticName != null) { 330 | binName = binColumn.staticName; 331 | } else if (binColumn.nameDef != null) { 332 | binName = this.columns.get(binColumn.nameDef.columnPos); 333 | } 334 | 335 | // Get BinValue. 336 | if (binColumn.staticValue != null) { 337 | 338 | binRawValue = binColumn.staticValue; 339 | bin = new Bin(binName, binRawValue); 340 | 341 | } else if (binColumn.valueDef != null) { 342 | 343 | binRawValue = getbinRawValue(binColumn); 344 | if (binRawValue == null || binRawValue.equals("")) { 345 | continue; 346 | } 347 | 348 | switch (binColumn.valueDef.srcType) { 349 | 350 | case INTEGER: 351 | bin = createBinForInteger(binName, binRawValue); 352 | break; 353 | case FLOAT: 354 | bin = createBinForFloat(binName, binRawValue); 355 | break; 356 | case STRING: 357 | bin = createBinForString(binName, binRawValue); 358 | break; 359 | case JSON: 360 | /* 361 | * JSON could take any valid JSON. There are two type of JSON: 362 | * JsonArray: this can be used to insert List (Any generic JSON list) 363 | * JsonObj: this can be used to insert Map (Any generic JSON object) 364 | */ 365 | bin = createBinForJson(binName, binRawValue); 366 | break; 367 | case GEOJSON: 368 | bin = createBinForGeoJson(binName, binRawValue); 369 | break; 370 | case BLOB: 371 | bin = createBinForBlob(binColumn, binName, binRawValue); 372 | break; 373 | case TIMESTAMP: 374 | bin = createBinForTimestamp(binColumn, binName, binRawValue); 375 | break; 376 | default: 377 | //.... 378 | } 379 | } 380 | 381 | if (bin != null) { 382 | bins.add(bin); 383 | } 384 | } 385 | } 386 | 387 | private String getbinRawValue(BinDefinition binColumn) { 388 | /* 389 | * User may want to store the time when record is written in Aerospike. 390 | * Assign system_time to binvalue. This bin will be written as part of 391 | * record. 392 | */ 393 | if (binColumn.valueDef.columnName != null 394 | && binColumn.valueDef.columnName.toLowerCase().equals(Constants.SYSTEM_TIME)) { 395 | 396 | SimpleDateFormat sdf = 397 | new SimpleDateFormat(binColumn.valueDef.encoding); // dd/MM/yyyy 398 | Date now = new Date(); 399 | return sdf.format(now); 400 | } 401 | 402 | 403 | String binRawValue = this.columns.get(binColumn.valueDef.columnPos); 404 | 405 | if ((binColumn.valueDef.removePrefix != null) 406 | && (binRawValue != null && binRawValue.startsWith(binColumn.valueDef.removePrefix))) { 407 | binRawValue = 408 | binRawValue.substring(binColumn.valueDef.removePrefix.length()); 409 | } 410 | return binRawValue; 411 | } 412 | 413 | private Bin createBinForInteger(String binName, String binRawValue) { 414 | 415 | try { 416 | // Server stores all integer type data in 64bit so use long. 417 | Long integer = Long.parseLong(binRawValue); 418 | 419 | return new Bin(binName, integer); 420 | 421 | } catch (Exception pi) { 422 | 423 | log.error("File: " + Utils.getFileName(this.fileName) + " Line: " + lineNumber 424 | + " Integer/Long Parse Error: " + pi); 425 | return null; 426 | 427 | } 428 | } 429 | 430 | private Bin createBinForFloat(String binName, String binRawValue) { 431 | 432 | try { 433 | // parse as a double to get greater precision 434 | double binfloat = Double.parseDouble(binRawValue); 435 | 436 | return new Bin(binName, binfloat); 437 | 438 | } catch (Exception e) { 439 | log.error("File: " + Utils.getFileName(this.fileName) + " Line: " + lineNumber 440 | + " Floating number Parse Error: " + e); 441 | return null; 442 | } 443 | 444 | } 445 | 446 | private Bin createBinForString(String binName, String binRawValue) { 447 | return new Bin(binName, binRawValue); 448 | } 449 | 450 | private Bin createBinForJson(String binName, String binRawValue) { 451 | try { 452 | log.debug(binRawValue); 453 | 454 | if (jsonParser == null) { 455 | jsonParser = new JSONParser(); 456 | } 457 | 458 | Object obj = jsonParser.parse(binRawValue); 459 | 460 | if (obj instanceof JSONArray) { 461 | JSONArray jsonArray = (JSONArray) obj; 462 | return new Bin(binName, jsonArray); 463 | } else { 464 | JSONObject jsonObj = (JSONObject) obj; 465 | 466 | if (this.params.unorderdMaps) { 467 | return new Bin(binName, jsonObj); 468 | } 469 | 470 | TreeMap sortedMap = new TreeMap<>(); 471 | sortedMap.putAll(jsonObj); 472 | return new Bin(binName, sortedMap); 473 | } 474 | 475 | } catch (ParseException e) { 476 | log.error("Failed to parse JSON: " + e); 477 | return null; 478 | } 479 | 480 | } 481 | 482 | private Bin createBinForGeoJson(String binName, String binRawValue) { 483 | try { 484 | return new Bin(binName, Value.getAsGeoJSON(binRawValue)); 485 | } catch (Exception e) { 486 | log.error("File: " + Utils.getFileName(this.fileName) + " Line: " + lineNumber 487 | + " GeoJson Parse Error: " + e); 488 | return null; 489 | } 490 | } 491 | 492 | private Bin createBinForBlob(BinDefinition binColumn, String binName, String binRawValue) { 493 | try { 494 | if ((binColumn.valueDef.dstType.equals(DstColumnType.BLOB)) 495 | && (binColumn.valueDef.encoding.equalsIgnoreCase(Constants.HEX_ENCODING))) { 496 | return new Bin(binName, this.toByteArray(binRawValue)); 497 | } 498 | } catch (Exception e) { 499 | log.error("File: " + Utils.getFileName(this.fileName) + " Line: " + lineNumber 500 | + " Blob Parse Error: " + e); 501 | return null; 502 | } 503 | 504 | return null; 505 | } 506 | 507 | private Bin createBinForTimestamp(BinDefinition binColumn, String binName, String binRawValue) { 508 | 509 | if (! binColumn.valueDef.dstType.equals(DstColumnType.INTEGER)) { 510 | return new Bin(binName, binRawValue); 511 | } 512 | 513 | DateFormat format = new SimpleDateFormat(binColumn.valueDef.encoding); 514 | 515 | try { 516 | 517 | Date formatDate = format.parse(binRawValue); 518 | long milliSecondForDate = formatDate.getTime() - this.params.timeZoneOffset; 519 | 520 | if (!(binColumn.valueDef.encoding.contains(".SSS") 521 | && binName.equals(Constants.SYSTEM_TIME))) { 522 | // We need time in milliseconds so no need to change it to milliseconds. 523 | milliSecondForDate = milliSecondForDate / 1000; 524 | } 525 | 526 | log.trace("Date format: " + binRawValue + " in seconds: " + milliSecondForDate); 527 | 528 | return new Bin(binName, milliSecondForDate); 529 | 530 | } catch (java.text.ParseException e) { 531 | e.printStackTrace(); 532 | return null; 533 | } 534 | 535 | } 536 | 537 | private byte[] toByteArray(String s) { 538 | 539 | if ((s.length() % 2) != 0) { 540 | log.error("blob exception: " + s); 541 | throw new IllegalArgumentException("Input hex formated string must contain an even number of characters."); 542 | } 543 | 544 | int len = s.length(); 545 | byte[] data = new byte[len / 2]; 546 | 547 | try { 548 | for (int i = 0; i < len; i += 2) { 549 | data[i / 2] = (byte) ((Character.digit(s.charAt(i), 16) << 4) + Character.digit(s.charAt(i + 1), 16)); 550 | } 551 | } catch (Exception e) { 552 | log.error("blob exception: " + e); 553 | } 554 | return data; 555 | } 556 | 557 | private boolean exceedingThroughput() { 558 | long transactions; 559 | long timeLapse; 560 | long throughput; 561 | 562 | transactions = counters.write.writeCount.get() 563 | + counters.write.mappingWriteCount.get() 564 | + counters.write.writeErrors.get(); 565 | 566 | 567 | timeLapse = (System.currentTimeMillis() - counters.write.writeStartTime) / 1000L; 568 | 569 | if (timeLapse > 0) { 570 | throughput = transactions / timeLapse; 571 | 572 | if (throughput > params.maxThroughput) { 573 | return true; 574 | } 575 | } 576 | return false; 577 | } 578 | public Integer call() throws Exception { 579 | 580 | List bins = new ArrayList(); 581 | 582 | 583 | try { 584 | 585 | counters.write.processingCount.getAndIncrement(); 586 | 587 | Key key = getKeyAndBinsFromDataline(bins); 588 | 589 | if (key != null) { 590 | writeToAs(key, bins); 591 | bins.clear(); 592 | 593 | if (params.maxThroughput == 0) { 594 | return 0; 595 | } 596 | 597 | while(exceedingThroughput()) { 598 | Thread.sleep(20); 599 | } 600 | 601 | return 0; 602 | } 603 | 604 | } catch (Exception e) { 605 | 606 | log.error("File: " + Utils.getFileName(this.fileName) + " Line: " + lineNumber + " Parsing Error: " + e); 607 | log.debug(e); 608 | } 609 | 610 | return 0; 611 | 612 | } 613 | } 614 | -------------------------------------------------------------------------------- /src/main/java/com/aerospike/load/BinDefinition.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2022 by Aerospike. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to 6 | * deal in the Software without restriction, including without limitation the 7 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | * sell copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | * IN THE SOFTWARE. 21 | ******************************************************************************/ 22 | package com.aerospike.load; 23 | 24 | /** 25 | * 26 | * @author Aerospike 27 | * 28 | * BinDefinition (BinName/Def, BinValu/Def) populated by parsing config file. 29 | * 30 | */ 31 | public class BinDefinition { 32 | 33 | String staticName; 34 | String staticValue; 35 | ColumnDefinition nameDef; 36 | ColumnDefinition valueDef; 37 | 38 | public BinDefinition( 39 | String staticName, 40 | String staticValue, 41 | ColumnDefinition nameDef, 42 | ColumnDefinition valueDef 43 | 44 | ) { 45 | this.staticName = staticName; 46 | this.staticValue = staticValue; 47 | this.nameDef = nameDef; 48 | this.valueDef = valueDef; 49 | } 50 | 51 | public String getBinStaticName() { 52 | return this.staticName; 53 | } 54 | 55 | public String getBinStaticValue() { 56 | return this.staticValue; 57 | } 58 | 59 | public ColumnDefinition getBinNameDef() { 60 | return this.nameDef; 61 | } 62 | 63 | public ColumnDefinition getValueDef() { 64 | return this.valueDef; 65 | } 66 | 67 | @Override 68 | public String toString() { 69 | return "ColumnDefinition [staticName=" + this.staticName + ", staticValue=" + this.staticValue 70 | + ", nameDef=" + this.nameDef + ", valueDef=" + this.valueDef + "]"; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/main/java/com/aerospike/load/ColumnDefinition.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2022 by Aerospike. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to 6 | * deal in the Software without restriction, including without limitation the 7 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | * sell copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | * IN THE SOFTWARE. 21 | ******************************************************************************/ 22 | package com.aerospike.load; 23 | 24 | /** 25 | * List of source data types 26 | * 27 | */ 28 | enum SrcColumnType { 29 | INTEGER, STRING, BLOB, GEOJSON, JSON, TIMESTAMP, FLOAT; 30 | } 31 | /** 32 | * List of datatypes supported by Aerospike 33 | * 34 | */ 35 | enum DstColumnType { 36 | INTEGER, STRING, BLOB, LIST, MAP, GEOJSON; 37 | } 38 | 39 | /** 40 | * Column Definition class for data file. 41 | * It has same params as column_def(key_def, set_def, Bin.name_def, Bin.Value_def) 42 | * section in config file. 43 | * 44 | */ 45 | public class ColumnDefinition { 46 | 47 | int columnPos; 48 | String columnName; 49 | SrcColumnType srcType; 50 | DstColumnType dstType; 51 | String encoding; 52 | String removePrefix; 53 | String jsonPath; 54 | 55 | public ColumnDefinition( 56 | int columnPos, 57 | String columnName, 58 | String srcType, 59 | String dstType, 60 | String encoding, 61 | String jsonPath, 62 | String removePrefix 63 | ) { 64 | 65 | this.columnPos = columnPos; 66 | this.columnName = columnName; 67 | this.jsonPath = jsonPath; 68 | this.removePrefix = removePrefix; 69 | this.encoding = encoding; 70 | 71 | setSrcType(srcType); 72 | setDstType(dstType); 73 | } 74 | 75 | public void setSrcType(String type) { 76 | if ("string".equalsIgnoreCase(type)) { 77 | this.srcType = SrcColumnType.STRING; 78 | } else if ("integer".equalsIgnoreCase(type)) { 79 | this.srcType = SrcColumnType.INTEGER; 80 | } else if ("blob".equalsIgnoreCase(type)) { 81 | this.srcType = SrcColumnType.BLOB; 82 | } else if ("geojson".equalsIgnoreCase(type)) { 83 | this.srcType = SrcColumnType.GEOJSON; 84 | } else if ("json".equalsIgnoreCase(type)) { 85 | this.srcType = SrcColumnType.JSON; 86 | } else if ("timestamp".equalsIgnoreCase(type)) { 87 | this.srcType = SrcColumnType.TIMESTAMP; 88 | } else if ("float".equalsIgnoreCase(type)) { 89 | this.srcType = SrcColumnType.FLOAT; 90 | } 91 | } 92 | 93 | public void setDstType(String type) { 94 | if ("string".equalsIgnoreCase(type)) { 95 | this.dstType = DstColumnType.STRING; 96 | } else if ("integer".equalsIgnoreCase(type)) { 97 | this.dstType = DstColumnType.INTEGER; 98 | } else if ("blob".equalsIgnoreCase(type)) { 99 | this.dstType = DstColumnType.BLOB; 100 | } else if ("list".equalsIgnoreCase(type)) { 101 | this.dstType = DstColumnType.LIST; 102 | } else if ("map".equalsIgnoreCase(type)) { 103 | this.dstType = DstColumnType.MAP; 104 | } else if ("geojson".equalsIgnoreCase(type)) { 105 | this.dstType = DstColumnType.GEOJSON; 106 | } 107 | } 108 | @Override 109 | 110 | public String toString() { 111 | return "ColumnDefinition [columnPos=" + columnPos + ", columnName=" + columnName 112 | + ", srcType=" + srcType + ", dstType=" + dstType + ", encoding=" + encoding + ", removePrefix=" + removePrefix 113 | + ", jsonPath=" + jsonPath + "]"; 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/main/java/com/aerospike/load/Constants.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2022 by Aerospike. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to 6 | * deal in the Software without restriction, including without limitation the 7 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | * sell copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | * IN THE SOFTWARE. 21 | ******************************************************************************/ 22 | package com.aerospike.load; 23 | 24 | /** 25 | * Constants used for this tool and keywords for Json config file. 26 | * @author Aerospike 27 | * 28 | */ 29 | public class Constants { 30 | 31 | // Config keywords 32 | public static final String VERSION = "version"; 33 | public static final String DSV_CONFIG = "dsv_config"; 34 | public static final String DELIMITER = "delimiter"; 35 | public static final String N_COLUMN = "n_columns_datafile"; 36 | public static final String HEADER_EXIST = "header_exist"; 37 | 38 | public static final String KEY = "key"; 39 | public static final String SET = "set"; 40 | public static final String BIN = "bin"; 41 | public static final String MAPPINGS = "mappings"; 42 | public static final String SECONDARY_MAPPING = "secondary_mapping"; 43 | 44 | public static final String BINLIST = "bin_list"; 45 | public static final String NAME = "name"; 46 | public static final String VALUE = "value"; 47 | public static final String COLUMN_POSITION = "column_position"; 48 | public static final String COLUMN_NAME = "column_name"; 49 | public static final String TYPE = "type"; 50 | public static final String DST_TYPE = "dst_type"; 51 | public static final String ENCODING = "encoding"; 52 | public static final String HEX_ENCODING = "hex"; 53 | public static final String REMOVE_PREFIX = "remove_prefix"; 54 | public static final String JSON_PATH = "json_path"; 55 | 56 | // Constants 57 | public static final int BIN_NAME_LENGTH = 14; 58 | public static final int SET_NAME_LENGTH = 63; 59 | 60 | public static final int MAX_THREADS = 128; 61 | 62 | public static final int RW_THROTTLE_LIMIT = 10000; 63 | public static final char DOUBLE_QUOTE_DELEMITER = '"'; 64 | 65 | // keywords to insert extra information specified by user. Reserved 66 | public static final String SYSTEM_TIME = "system_time"; 67 | 68 | public static final String ABORT_AT_ERROR = "abort_at_error"; 69 | public static final int MajorV = 2; 70 | public static final int MinorV = 0; 71 | 72 | 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/com/aerospike/load/Counter.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2022 by Aerospike. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to 6 | * deal in the Software without restriction, including without limitation the 7 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | * sell copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | * IN THE SOFTWARE. 21 | ******************************************************************************/ 22 | package com.aerospike.load; 23 | 24 | import java.util.concurrent.atomic.AtomicInteger; 25 | import java.util.concurrent.atomic.AtomicLong; 26 | 27 | /** 28 | * Counter class to keep track of statistic of Loader. 29 | * 30 | * @author Aerospike 31 | * 32 | */ 33 | public class Counter { 34 | 35 | public static class Current { 36 | // Read from File 37 | AtomicInteger readCount = new AtomicInteger(); 38 | AtomicInteger readErrors = new AtomicInteger(); 39 | 40 | // Queued for processing 41 | AtomicInteger processingQueued = new AtomicInteger(); 42 | AtomicInteger processingCount = new AtomicInteger(); 43 | AtomicInteger processingErrors = new AtomicInteger(); 44 | 45 | // Write to Aerospike 46 | AtomicInteger writeCount = new AtomicInteger(); 47 | AtomicInteger mappingWriteCount = new AtomicInteger(); 48 | AtomicInteger writeErrors = new AtomicInteger(); 49 | AtomicInteger writeTimeouts = new AtomicInteger(); 50 | AtomicInteger writeKeyExists = new AtomicInteger(); 51 | AtomicInteger keyNullSkipped = new AtomicInteger(); 52 | AtomicInteger noBinsSkipped = new AtomicInteger(); 53 | 54 | // Write start time tracker 55 | long writeStartTime; 56 | 57 | // Progress tracker 58 | long bytesTotal; 59 | AtomicLong bytesProcessed = new AtomicLong(); 60 | } 61 | 62 | Current write = new Current(); 63 | static int numCol; 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/com/aerospike/load/MappingDefinition.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2022 by Aerospike. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to 6 | * deal in the Software without restriction, including without limitation the 7 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | * sell copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | * IN THE SOFTWARE. 21 | ******************************************************************************/ 22 | 23 | package com.aerospike.load; 24 | 25 | import java.util.List; 26 | 27 | /** 28 | * 29 | * @author Aerospike 30 | * 31 | * MappinfDefinition (Key, Set, Bin definition) populated by parsing config file. 32 | * 33 | */ 34 | public class MappingDefinition { 35 | public boolean secondaryMapping = false; 36 | public MetaDefinition keyColumnDef; 37 | public MetaDefinition setColumnDef; 38 | public List binColumnDefs; 39 | 40 | public MappingDefinition(boolean secondary_mapping, MetaDefinition keyColumnDef, MetaDefinition setColumnDef, List binColumnDefs) { 41 | super(); 42 | this.secondaryMapping = secondary_mapping; 43 | this.keyColumnDef = keyColumnDef; 44 | this.setColumnDef = setColumnDef; 45 | this.binColumnDefs = binColumnDefs; 46 | } 47 | 48 | @Override 49 | public String toString() { 50 | return "MappingDefinition [secondary_mapping=" + this.secondaryMapping + " keyColumnDef=" + this.keyColumnDef 51 | + " setColumnDef=" + this.setColumnDef + "binColumnDefs=" + this.binColumnDefs + "]"; 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/com/aerospike/load/MetaDefinition.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2022 by Aerospike. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to 6 | * deal in the Software without restriction, including without limitation the 7 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | * sell copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | * IN THE SOFTWARE. 21 | ******************************************************************************/ 22 | package com.aerospike.load; 23 | 24 | /** 25 | * 26 | * @author Aerospike 27 | * 28 | * MetaDefinition (KeyDefinition/SetDefinition) populated by parsing config file. 29 | * 30 | */ 31 | public class MetaDefinition { 32 | String staticName; 33 | ColumnDefinition nameDef; 34 | 35 | public MetaDefinition(String staticName, ColumnDefinition nameDef) { 36 | this.staticName = staticName; 37 | this.nameDef = nameDef; 38 | } 39 | 40 | public String getBinStaticName() { 41 | return this.staticName; 42 | } 43 | 44 | public ColumnDefinition getBinNameDef() { 45 | return this.nameDef; 46 | } 47 | 48 | @Override 49 | public String toString() { 50 | return "ColumnDefinition [staticName=" + this.staticName + ", nameDef=" + this.nameDef + "]"; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/com/aerospike/load/Parameters.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2022 by Aerospike. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to 6 | * deal in the Software without restriction, including without limitation the 7 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | * sell copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | * IN THE SOFTWARE. 21 | ******************************************************************************/ 22 | package com.aerospike.load; 23 | 24 | import com.aerospike.client.policy.WritePolicy; 25 | import com.aerospike.client.Host; 26 | 27 | /** 28 | * Configuration data. 29 | */ 30 | public class Parameters { 31 | Host[] hosts; 32 | String namespace; 33 | WritePolicy writePolicy; 34 | long maxThroughput; 35 | long timeZoneOffset; 36 | int abortErrorCount; 37 | boolean verbose; 38 | boolean unorderdMaps; 39 | 40 | /** 41 | * Set parameters from commandline argument. 42 | * @param hosts 43 | * @param namespace 44 | * @param writePolicy 45 | * @param maxThroughput 46 | * @param timeZoneOffset 47 | * @param abortAtError 48 | * @param verbose 49 | * @param unorderdMaps 50 | */ 51 | protected Parameters( 52 | Host[] hosts, 53 | String namespace, 54 | WritePolicy writePolicy, 55 | long maxThroughput, 56 | long timeZoneOffset, 57 | int abortErrorCount, 58 | boolean verbose, 59 | boolean unorderdMaps 60 | ) { 61 | this.hosts = hosts; 62 | this.namespace = namespace; 63 | this.writePolicy = writePolicy; 64 | this.maxThroughput = maxThroughput; 65 | this.timeZoneOffset = timeZoneOffset; 66 | this.abortErrorCount = abortErrorCount; 67 | this.verbose = verbose; 68 | this.unorderdMaps = unorderdMaps; 69 | } 70 | 71 | @Override 72 | public String toString() { 73 | return "Parameters:[ hosts=" + this.hosts + 74 | ", ns=" + this.namespace + 75 | ", maxThroughput=" + this.maxThroughput + 76 | ", write-action=" + this.writePolicy.recordExistsAction.toString() + 77 | ", timeZoneOffset=" + this.timeZoneOffset + 78 | ", abortErrorCount=" + this.abortErrorCount + 79 | ", verbose=" + this.verbose + 80 | ", unorderdMaps=" + this.unorderdMaps + "]"; 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/main/java/com/aerospike/load/Parser.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2022 by Aerospike. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to 6 | * deal in the Software without restriction, including without limitation the 7 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | * sell copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | * IN THE SOFTWARE. 21 | ******************************************************************************/ 22 | package com.aerospike.load; 23 | 24 | import java.io.File; 25 | import java.io.FileReader; 26 | import java.io.IOException; 27 | import java.util.ArrayList; 28 | import java.util.Arrays; 29 | import java.util.HashMap; 30 | import java.util.Iterator; 31 | import java.util.List; 32 | 33 | import org.apache.logging.log4j.LogManager; 34 | import org.apache.logging.log4j.Logger; 35 | import org.json.simple.JSONArray; 36 | import org.json.simple.JSONObject; 37 | import org.json.simple.parser.JSONParser; 38 | import org.json.simple.parser.ParseException; 39 | 40 | /** 41 | * Parser class to parse different schema/(data definition) and data files. 42 | * 43 | */ 44 | public class Parser { 45 | private static Logger log = LogManager.getLogger(Parser.class); 46 | 47 | /** 48 | * Process column definitions in JSON formated file and create two lists for metadata and 49 | * bindata and one list for metadataLoader 50 | * 51 | * @param configFile Config/schema/definition file name 52 | * @param dsvConfigs DSV configuration for loader to use, given in config file (version, n_columns, delimiter...) 53 | * @param mappingDefs List of schema/definitions for all mappings. primary + secondary mappings. 54 | * @param params User given parameters 55 | * @throws ParseException 56 | * @throws IOException 57 | */ 58 | public static boolean parseJSONColumnDefinitions(File configFile, HashMap dsvConfigs, List mappingDefs) { 59 | FileReader fr = null; 60 | try { 61 | JSONParser jsonParser = new JSONParser(); 62 | 63 | Object obj; 64 | fr = new FileReader(configFile); 65 | obj = jsonParser.parse(fr); 66 | 67 | JSONObject jobj; 68 | if (obj == null) { 69 | log.error("Empty config File."); 70 | if (fr != null) 71 | fr.close(); 72 | return false; 73 | } else { 74 | jobj = (JSONObject) obj; 75 | log.debug("Config file contents: " + jobj.toJSONString()); 76 | } 77 | 78 | /* 79 | * Get Metadata, DSV_CONFIG parameters. version, (n_columns, delimiter, header_exist..) 80 | * Get mapping definition 81 | */ 82 | if (getUpdateDsvConfig(jobj, dsvConfigs) == false 83 | || getUpdateMappingColumnDefs(jobj, mappingDefs) == false) { 84 | if (fr != null) 85 | fr.close(); 86 | return false; 87 | } 88 | 89 | } catch (IOException ie) { 90 | log.error("File: " + Utils.getFileName(configFile.getName()) + " Config i/o Error: " + ie.toString()); 91 | if (log.isDebugEnabled()) { 92 | ie.printStackTrace(); 93 | } 94 | } catch (ParseException pe) { 95 | log.error("File: " + Utils.getFileName(configFile.getName()) + " Config parsing Error: " + pe.toString()); 96 | if (log.isDebugEnabled()) { 97 | pe.printStackTrace(); 98 | } 99 | 100 | } catch (Exception e) { 101 | log.error("File: " + Utils.getFileName(configFile.getName()) + " Config unknown Error: " + e.toString()); 102 | if (log.isDebugEnabled()) { 103 | e.printStackTrace(); 104 | } 105 | } finally { 106 | if (fr == null) { 107 | return true; 108 | } 109 | try { 110 | fr.close(); 111 | } catch (IOException fce) { 112 | log.error("File: " + Utils.getFileName(configFile.getName()) + " File reader closing error: " 113 | + fce.toString()); 114 | if (log.isDebugEnabled()) { 115 | fce.printStackTrace(); 116 | } 117 | } 118 | } 119 | return true; 120 | } 121 | 122 | /* 123 | * Parse Dsv_configs from config file. 124 | */ 125 | private static boolean getUpdateDsvConfig(JSONObject jobj, HashMap dsvConfigs) { 126 | Object obj = null; 127 | 128 | // Get Metadata of loader (update dsvConfigs) 129 | if ((obj = getFromJsonObject(jobj, Constants.VERSION)) == null) { 130 | log.error("\"" + Constants.VERSION + "\" Key is missing in config file."); 131 | return false; 132 | } 133 | dsvConfigs.put(Constants.VERSION, obj.toString()); 134 | 135 | 136 | // Get DSV_CONFIG parameters. (n_columns, delimiter, header_exist..) 137 | if ((obj = getFromJsonObject(jobj, Constants.DSV_CONFIG)) == null) { 138 | log.error("\"" + Constants.DSV_CONFIG + "\" Key is missing in config file."); 139 | return false; 140 | } 141 | JSONObject dsvConfigObj = (JSONObject) obj; 142 | 143 | if ((obj = getFromJsonObject(dsvConfigObj, Constants.N_COLUMN)) == null) { 144 | log.error("\"" + Constants.N_COLUMN + "\" Key is missing in config file."); 145 | return false; 146 | } 147 | dsvConfigs.put(Constants.N_COLUMN, obj.toString()); 148 | 149 | // Delimiter and Header_exist config are optional. 150 | if ((obj = getFromJsonObject(dsvConfigObj, Constants.DELIMITER)) != null) 151 | dsvConfigs.put(Constants.DELIMITER, obj.toString()); 152 | 153 | if ((obj = getFromJsonObject(dsvConfigObj, Constants.HEADER_EXIST)) != null) 154 | dsvConfigs.put(Constants.HEADER_EXIST, obj.toString()); 155 | 156 | return true; 157 | } 158 | 159 | /* 160 | * Parse all mapping given in config file. 161 | */ 162 | private static boolean getUpdateMappingColumnDefs(JSONObject jobj, List mappingDefs) throws Exception { 163 | Object obj = null; 164 | JSONArray mappings; 165 | if ((obj = getFromJsonObject(jobj, Constants.MAPPINGS)) != null) { 166 | mappings = (JSONArray) obj; 167 | Iterator it = mappings.iterator(); 168 | while (it.hasNext()) { 169 | JSONObject mappingObj = (JSONObject) it.next(); 170 | MappingDefinition md = getMappingDef(mappingObj); 171 | if (md != null) { 172 | mappingDefs.add(md); 173 | } else { 174 | log.error("Error in parsing mappingdef: " + mappingObj.toString()); 175 | return false; 176 | } 177 | } 178 | } 179 | return true; 180 | } 181 | 182 | private static Object getFromJsonObject(JSONObject jobj, String key) { 183 | return jobj.get(key); 184 | } 185 | 186 | /* 187 | * Parse mapping definition from config file to get mappingDef 188 | * This will have (secondary_mapping_ keyDefinition, setDefinition, binDefinition). 189 | */ 190 | private static MappingDefinition getMappingDef(JSONObject mappingObj) throws Exception { 191 | 192 | boolean secondary_mapping = false; 193 | MetaDefinition keyColumnDef = null; 194 | MetaDefinition setColumnDef = null; 195 | List binColumnDefs = new ArrayList(); 196 | 197 | Object obj; 198 | 199 | // Get secondary_mapping info. 200 | obj = getFromJsonObject(mappingObj, Constants.SECONDARY_MAPPING); 201 | if (obj != null) { 202 | secondary_mapping = Boolean.valueOf((String) obj); 203 | } 204 | 205 | if ((obj = getFromJsonObject(mappingObj, Constants.KEY)) != null) { 206 | keyColumnDef = getMetaDefs((JSONObject) obj, Constants.KEY); 207 | } else { 208 | log.error("\"" + Constants.KEY + "\" Key is missing in mapping. Mapping: " + mappingObj.toString()); 209 | return null; 210 | } 211 | 212 | 213 | if ((obj = getFromJsonObject(mappingObj, Constants.SET)) == null) { 214 | log.error("\"" + Constants.SET + "\" Key is missing in mapping. Mapping: " + mappingObj.toString()); 215 | return null; 216 | } else if (obj instanceof String) { 217 | setColumnDef = new MetaDefinition(obj.toString(), null); 218 | } else { 219 | setColumnDef = getMetaDefs((JSONObject) obj, Constants.SET); 220 | } 221 | 222 | if ((obj = getFromJsonObject(mappingObj, Constants.BINLIST)) != null) { 223 | JSONArray binObjList = (JSONArray) obj; 224 | Iterator it = binObjList.iterator(); 225 | while (it.hasNext()) { 226 | JSONObject binObj = (JSONObject) it.next(); 227 | BinDefinition binDef = getBinDefs(binObj); 228 | if (binDef != null) { 229 | binColumnDefs.add(binDef); 230 | } else { 231 | log.error("Error in parsing binDef: " + binObj.toString()); 232 | return null; 233 | } 234 | } 235 | } else { 236 | log.error("\"" + Constants.BINLIST + "\" Key is missing in mapping. Mapping: " + mappingObj.toString()); 237 | return null; 238 | } 239 | 240 | return new MappingDefinition(secondary_mapping, keyColumnDef, setColumnDef, binColumnDefs); 241 | } 242 | 243 | /* 244 | * Parsing Meta definition(for Set or Key) from config file and populate metaDef object. 245 | */ 246 | private static MetaDefinition getMetaDefs(JSONObject jobj, String jobjName) { 247 | // Parsing Key, Set definition 248 | ColumnDefinition valueDef = new ColumnDefinition(-1, null, null, null, null, null, null); 249 | 250 | if ((jobj.get(Constants.COLUMN_POSITION)) != null) { 251 | 252 | valueDef.columnPos = (Integer.parseInt(jobj.get(Constants.COLUMN_POSITION).toString()) - 1); 253 | 254 | } else if ((jobj.get(Constants.COLUMN_NAME)) != null) { 255 | 256 | valueDef.columnName = (String) (jobj.get(Constants.COLUMN_NAME)); 257 | 258 | } else { 259 | log.error("Column_name or pos info is missing. Specify proper key/set mapping in config file for: " + jobjName + ":" 260 | + jobj.toString()); 261 | } 262 | 263 | valueDef.setSrcType((String) jobj.get(Constants.TYPE)); 264 | 265 | // Default set type is 'string'. what is default key type? 266 | if (Constants.SET.equalsIgnoreCase(jobjName) && valueDef.srcType == null) { 267 | valueDef.setSrcType("string"); 268 | } 269 | 270 | // Get prefix to remove. Prefix will be removed from data 271 | if ((jobj.get(Constants.REMOVE_PREFIX)) != null) { 272 | valueDef.removePrefix = (String) jobj.get(Constants.REMOVE_PREFIX); 273 | } 274 | 275 | return new MetaDefinition(null, valueDef); 276 | } 277 | 278 | /* 279 | * Parsing Bin definition from config file and populate BinDef object 280 | */ 281 | private static BinDefinition getBinDefs(JSONObject jobj) { 282 | /* 283 | * Sample Bin object 284 | * {"name": "age", "value": {"column_name": "age", "type" : "integer"} } 285 | */ 286 | 287 | Object obj; 288 | 289 | // Parsing Bin name 290 | ColumnDefinition nameDef = new ColumnDefinition(-1, null, null, null, null, null, null); 291 | String staticBinName = null; 292 | 293 | if ((obj = jobj.get(Constants.NAME)) == null) { 294 | log.error(Constants.NAME + " key is missing object: " + jobj.toString()); 295 | return null; 296 | } else if (!(obj instanceof JSONObject)) { 297 | staticBinName = (String) obj; 298 | } else { 299 | JSONObject nameObj = (JSONObject) obj; 300 | if ((nameObj.get(Constants.COLUMN_POSITION)) != null) { 301 | 302 | nameDef.columnPos = (Integer.parseInt(nameObj.get(Constants.COLUMN_POSITION).toString()) - 1); 303 | 304 | } else if ((nameObj.get(Constants.COLUMN_NAME)) != null) { 305 | 306 | nameDef.columnName = (String) (nameObj.get(Constants.COLUMN_NAME)); 307 | 308 | } else { 309 | log.error("Column_name or pos info is missing. Specify proper bin name mapping in config file for: " + jobj.toString()); 310 | } 311 | } 312 | 313 | // Parsing Bin value 314 | ColumnDefinition valueDef = new ColumnDefinition(-1, null, null, null, null, null, null); 315 | String staticBinValue = null; 316 | 317 | if ((obj = jobj.get(Constants.VALUE)) == null) { 318 | log.error(Constants.VALUE + " key is missing in bin object:" + jobj.toString()); 319 | return null; 320 | } else if (!(obj instanceof JSONObject)) { 321 | staticBinValue = (String) obj; 322 | } else { 323 | JSONObject valueObj = (JSONObject) obj; 324 | 325 | if ((valueObj.get(Constants.COLUMN_POSITION)) != null) { 326 | 327 | valueDef.columnPos = (Integer.parseInt(valueObj.get(Constants.COLUMN_POSITION).toString()) - 1); 328 | 329 | } else if ((valueObj.get(Constants.COLUMN_NAME)) != null) { 330 | 331 | valueDef.columnName = (String) (valueObj.get(Constants.COLUMN_NAME)); 332 | 333 | } else { 334 | log.error("Column_name or pos info is missing. Specify proper bin value mapping in config file for: " + jobj.toString()); 335 | } 336 | 337 | valueDef.setSrcType((String) (valueObj.get(Constants.TYPE))); 338 | if (valueDef.srcType == null) { 339 | log.error(Constants.TYPE + " key is missing in bin object: " + jobj.toString()); 340 | } 341 | valueDef.setDstType((String) (valueObj.get(Constants.DST_TYPE))); 342 | valueDef.encoding = (String) (valueObj.get(Constants.ENCODING)); 343 | valueDef.removePrefix = ((String) (valueObj.get(Constants.REMOVE_PREFIX))); 344 | } 345 | 346 | return new BinDefinition(staticBinName, staticBinValue, nameDef, valueDef); 347 | } 348 | 349 | /** 350 | * Parses the line into list of raw column string values 351 | * Format of data should follow one rule: Data should not contain delimiter as part of data. 352 | * 353 | * @param line: line from the data file with DSV formated 354 | * @return List: List of column entries from line 355 | */ 356 | public static List getDSVRawColumns(String line, String delimiter){ 357 | if(line==null || line.trim().length()==0){ 358 | return null; 359 | } 360 | List store = new ArrayList(); 361 | StringBuilder curVal = new StringBuilder(); 362 | boolean inquotes = false; 363 | boolean prevDelimiter = false; 364 | char[] delimiterChars = delimiter.toCharArray(); 365 | int delimiterIndex = 0; 366 | for (int i=0; i 0){ 395 | // Appending partial delimiters read till now 396 | curVal.append(Arrays.copyOfRange(delimiterChars, 0, delimiterIndex)); 397 | delimiterIndex = 0; 398 | 399 | if (ch == delimiterChars[delimiterIndex]) { 400 | if (delimiterIndex == delimiterChars.length-1){ 401 | prevDelimiter = true; 402 | // Trim will remove all whitespace character from end of string or 403 | // after ending double quotes 404 | store.add(curVal.toString().trim()); 405 | curVal = new StringBuilder(); 406 | delimiterIndex = 0; 407 | inquotes = false; 408 | } 409 | else{ 410 | delimiterIndex++; 411 | } 412 | continue; 413 | } 414 | } 415 | if (inquotes) { 416 | if (ch== Constants.DOUBLE_QUOTE_DELEMITER) { 417 | inquotes = false; 418 | } 419 | else { 420 | curVal.append(ch); 421 | } 422 | } 423 | else { 424 | 425 | if (ch == Constants.DOUBLE_QUOTE_DELEMITER) { 426 | inquotes = true; 427 | if (curVal.length()>0) { 428 | inquotes = false; 429 | //If this is the second quote in a value, add a quote 430 | //this is for the double quote in the middle of a value 431 | curVal.append('\"'); 432 | } 433 | } 434 | else{ 435 | curVal.append(ch); 436 | } 437 | } 438 | } 439 | store.add(curVal.toString().trim()); 440 | return store; 441 | } 442 | 443 | } 444 | -------------------------------------------------------------------------------- /src/main/java/com/aerospike/load/PrintStat.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2022 by Aerospike. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to 6 | * deal in the Software without restriction, including without limitation the 7 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | * sell copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | * IN THE SOFTWARE. 21 | ******************************************************************************/ 22 | package com.aerospike.load; 23 | 24 | import java.text.SimpleDateFormat; 25 | import java.util.Date; 26 | 27 | import org.apache.logging.log4j.LogManager; 28 | import org.apache.logging.log4j.Logger; 29 | 30 | 31 | 32 | /** 33 | * Prints Progress of Aerospike-Loader 34 | * 35 | */ 36 | public class PrintStat implements Runnable{ 37 | private static Logger log = LogManager.getLogger(PrintStat.class); 38 | private static final SimpleDateFormat SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 39 | 40 | private Counter counters; 41 | 42 | PrintStat(Counter counters) { 43 | this.counters = counters; 44 | } 45 | 46 | /** 47 | * Print write_count, TPS, errors(timeouts, keyExists, otherWrites, readErrors, processing), 48 | * skipped, noBins, progress of loading. 49 | */ 50 | @Override 51 | public void run() { 52 | 53 | int tps = 0; 54 | int rtps = 0; 55 | int nWrites = 0; 56 | int nErrors = 0; 57 | int nReader = 0; 58 | int progress = 0; 59 | while (!Thread.currentThread().isInterrupted()) { 60 | 61 | if (log.isDebugEnabled()) { 62 | Runtime runtime = Runtime.getRuntime(); 63 | int mb = 1024 * 1024; 64 | log.debug("Used Memory: " + (runtime.totalMemory() - runtime.freeMemory()) / mb + " Free Memory: " 65 | + runtime.freeMemory() / mb + " Total Memory: " + runtime.totalMemory() / mb + " Max Memory: " 66 | + runtime.maxMemory() / mb); 67 | } 68 | // Get current time 69 | long time = System.currentTimeMillis(); 70 | String date = SimpleDateFormat.format(new Date(time)); 71 | 72 | // Calculate progress 73 | if (counters.write.bytesTotal != 0) 74 | progress = (int) ((counters.write.bytesProcessed.get() * 100) / counters.write.bytesTotal); 75 | 76 | // Calculate transaction per second and store current count 77 | tps = ( counters.write.writeCount.get() + 78 | counters.write.writeErrors.get() + 79 | counters.write.readErrors.get() + 80 | counters.write.processingErrors.get()) - 81 | (nWrites + nErrors); 82 | 83 | nWrites = counters.write.writeCount.get(); 84 | nErrors = (counters.write.writeErrors.get() + 85 | counters.write.readErrors.get() + 86 | counters.write.processingErrors.get()); 87 | 88 | rtps = counters.write.readCount.get() - nReader; 89 | nReader = counters.write.readCount.get(); 90 | 91 | log.debug(date.toString() + ": Read/process tps:" + rtps); 92 | // Print stats 93 | log.info(date.toString() + " load(Write count=" + counters.write.writeCount.get() + 94 | " tps=" + tps + 95 | " Errors=" + nErrors + 96 | " (Timeout:"+counters.write.writeTimeouts.get()+" KeyExists:" + counters.write.writeKeyExists.get() + 97 | " othersWrites:" + (counters.write.writeErrors.get() - counters.write.writeTimeouts.get()-counters.write.writeKeyExists.get() ) + 98 | " ReadErrors:" + counters.write.readErrors.get() + 99 | " Processing:" + counters.write.processingErrors.get() + ")" + 100 | " Skiped (NullKey:" + counters.write.keyNullSkipped.get() + " NoBins:" + counters.write.noBinsSkipped + ")" + 101 | " Progress:" + progress + "%"); 102 | 103 | // Wait for 1 second 104 | try { 105 | Thread.sleep(1000); 106 | } catch (InterruptedException e) { 107 | Thread.currentThread().interrupt(); 108 | } 109 | } 110 | } 111 | 112 | } 113 | -------------------------------------------------------------------------------- /src/main/java/com/aerospike/load/Utils.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2022 by Aerospike. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to 6 | * deal in the Software without restriction, including without limitation the 7 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 | * sell copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | * IN THE SOFTWARE. 21 | ******************************************************************************/ 22 | package com.aerospike.load; 23 | 24 | import java.io.File; 25 | import java.io.FileFilter; 26 | import java.util.ArrayList; 27 | import java.util.List; 28 | import java.util.TimeZone; 29 | 30 | import org.apache.commons.cli.CommandLine; 31 | import org.apache.logging.log4j.LogManager; 32 | import org.apache.logging.log4j.Logger; 33 | 34 | import com.aerospike.client.Host; 35 | import com.aerospike.client.policy.RecordExistsAction; 36 | import com.aerospike.client.policy.WritePolicy; 37 | 38 | public class Utils { 39 | private static Logger log = LogManager.getLogger(Utils.class); 40 | 41 | /** 42 | * Get list of data file names from given files(filenames, directories). 43 | * 44 | * @param files This includes filenames and directory name. 45 | * @return Return list of absolute filenames. 46 | */ 47 | protected static List getFileNames(String[] files) { 48 | List dataFileNames = new ArrayList(); 49 | // Expand directories 50 | for (String fileName : files) { 51 | File file = new File(fileName); 52 | if (!file.exists()) { 53 | log.error("File: " + fileName + " does not exist."); 54 | continue; 55 | } 56 | if (file.isDirectory()) { 57 | File[] subFiles = file.listFiles(new FileFilter() { 58 | public boolean accept(File file) { 59 | return !file.getName().endsWith("."); 60 | } 61 | }); 62 | for (File subFile : subFiles) { 63 | dataFileNames.add(subFile.getAbsolutePath()); 64 | } 65 | } else if (!file.getName().endsWith(".")) { 66 | dataFileNames.add(file.getAbsolutePath()); 67 | } 68 | } 69 | 70 | return dataFileNames; 71 | } 72 | 73 | /** 74 | * Get absolute file name from file path. 75 | */ 76 | protected static String getFileName(String filePath) { 77 | return (new File(filePath).getName()); 78 | } 79 | 80 | /** 81 | * Parse command line parameters. 82 | * 83 | * @param cl Commandline arguments 84 | * @return Parameters Group of abstract Params. 85 | * @throws Exception 86 | */ 87 | protected static Parameters parseParameters(CommandLine cl) throws Exception { 88 | 89 | String portString = cl.getOptionValue("p", "3000"); 90 | int port = Integer.parseInt(portString); 91 | 92 | Host[] hosts; 93 | if (cl.hasOption("hosts")) { 94 | hosts = Host.parseHosts(cl.getOptionValue("hosts"), port); 95 | } else { 96 | hosts = new Host[1]; 97 | hosts[0] = new Host("127.0.0.1", port); 98 | } 99 | 100 | String namespace = cl.getOptionValue("n", "test"); 101 | 102 | long maxThroughput = Long.parseLong(cl.getOptionValue("g", "0")); 103 | 104 | if (cl.hasOption("tz")) { 105 | if (!Utils.checkTimeZoneID(cl.getOptionValue("tz"))) 106 | log.error("TimeZone given is not a valid ID"); 107 | } 108 | String timeZone = cl.getOptionValue("TZ", TimeZone.getDefault().getID()); 109 | TimeZone source = TimeZone.getTimeZone(timeZone); 110 | TimeZone local = TimeZone.getDefault(); 111 | long timeZoneOffset = local.getRawOffset() - source.getRawOffset(); 112 | 113 | String errorCount = cl.getOptionValue("ec", "0"); 114 | int abortErrorCount = Integer.parseInt(errorCount); 115 | 116 | String timeToLive = cl.getOptionValue("e", "-1"); 117 | int ttl = (Integer.parseInt(timeToLive)); 118 | 119 | String timeout = cl.getOptionValue("T", "0"); 120 | int timeout_int = (Integer.parseInt(timeout)); 121 | 122 | boolean sendKey = false; 123 | if (cl.hasOption("uk")) { 124 | sendKey = true; 125 | } 126 | 127 | String writeAction = cl.getOptionValue("wa", "UPDATE"); 128 | 129 | WritePolicy writePolicy = new WritePolicy(); 130 | writePolicy.recordExistsAction = RecordExistsAction.valueOf(writeAction.toUpperCase()); 131 | writePolicy.setTimeout(timeout_int); 132 | writePolicy.expiration = ttl; 133 | writePolicy.sendKey = sendKey; 134 | 135 | 136 | boolean verbose = false; 137 | if (cl.hasOption("v")) { 138 | verbose = true; 139 | } 140 | 141 | boolean unorderdMaps = false; 142 | if (cl.hasOption("um")) { 143 | unorderdMaps = true; 144 | } 145 | 146 | return new Parameters(hosts, namespace, writePolicy, maxThroughput, timeZoneOffset, abortErrorCount, verbose, unorderdMaps); 147 | } 148 | 149 | /** 150 | * Check existence of user provided timezone. 151 | * 152 | * @param timeZone timezone_id. 153 | * @return true if timezone_id is a valid id else false. 154 | */ 155 | protected static boolean checkTimeZoneID(String timeZone) { 156 | boolean sourceTZ = false; 157 | for (String timezone : TimeZone.getAvailableIDs()) { 158 | if (timezone.equalsIgnoreCase(timeZone)) { 159 | sourceTZ = true; 160 | } 161 | } 162 | return sourceTZ; 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- 1 | appenders = console, file 2 | 3 | appender.console.type=Console 4 | appender.console.name=STDOUT 5 | appender.console.layout.type=PatternLayout 6 | appender.console.layout.pattern=%-5p %-17c{1}:%L - %m%n 7 | 8 | appender.file.type=File 9 | appender.file.name=LOGFILE 10 | appender.file.fileName=aerospike-load.log 11 | appender.file.append=false 12 | appender.file.layout.type=PatternLayout 13 | appender.file.layout.pattern=%-5p %-17c{1}:%L - %m%n 14 | 15 | loggers=loadlogger 16 | logger.loadlogger.name=com.aerospike.load 17 | logger.loadlogger.level=info 18 | logger.loadlogger.appenderRefs=file 19 | logger.loadlogger.appenderRef.file.ref=LOGFILE 20 | 21 | rootLogger.appenderRefs=stdout 22 | rootLogger.level=info 23 | rootLogger.appenderRef.stdout.ref=STDOUT -------------------------------------------------------------------------------- /src/main/resources/project.properties: -------------------------------------------------------------------------------- 1 | name=${project.name} 2 | version=${project.version} 3 | -------------------------------------------------------------------------------- /src/test/java/com/aerospike/load/DataTypeTest.java: -------------------------------------------------------------------------------- 1 | package com.aerospike.load; 2 | 3 | import static org.junit.Assert.assertTrue; 4 | 5 | import java.io.BufferedReader; 6 | import java.io.BufferedWriter; 7 | import java.io.File; 8 | import java.io.FileNotFoundException; 9 | import java.io.FileOutputStream; 10 | import java.io.FileReader; 11 | import java.io.IOException; 12 | import java.io.OutputStreamWriter; 13 | import java.io.Writer; 14 | import java.text.DateFormat; 15 | import java.text.SimpleDateFormat; 16 | import java.util.ArrayList; 17 | import java.util.Date; 18 | import java.util.HashMap; 19 | import java.util.SortedMap; 20 | import java.util.Iterator; 21 | import java.util.List; 22 | import java.util.Map; 23 | import java.util.Map.Entry; 24 | import java.util.Random; 25 | 26 | import org.json.simple.JSONObject; 27 | import org.json.simple.parser.JSONParser; 28 | import org.json.simple.parser.ParseException; 29 | import org.junit.After; 30 | import org.junit.Before; 31 | import org.junit.Test; 32 | 33 | import com.aerospike.client.AerospikeClient; 34 | import com.aerospike.client.AerospikeException; 35 | import com.aerospike.client.Bin; 36 | import com.aerospike.client.Key; 37 | import com.aerospike.client.Record; 38 | import com.aerospike.client.policy.Policy; 39 | import com.aerospike.client.cdt.MapOrder; 40 | 41 | enum BinType { 42 | INTEGER, STRING, BLOB, LIST, MAP, JSON, TIMESTAMP; 43 | } 44 | 45 | 46 | /** 47 | * @author jyoti 48 | * 49 | */ 50 | public class DataTypeTest { 51 | 52 | String host = "127.0.0.1"; 53 | String port = "3100"; 54 | String ns = "test"; 55 | String set = null; 56 | MapOrder expectedMapOrder = MapOrder.KEY_ORDERED; 57 | //String config = "src/test/resources/allDatatypeCsv.json"; 58 | String error_count = "0"; 59 | String write_action = "update"; 60 | String timeout = "10"; 61 | String rootDir = "src/test/resources/"; 62 | //String configFile = ""; 63 | String dataFile = ""; 64 | String testSchemaFile = "src/test/resources/testSchema.json"; 65 | // String dataFile = "src/test/resources/data.csv"; 66 | String log = "aerospike-load.log"; 67 | JSONObject testSchema = null; 68 | AerospikeClient client; 69 | 70 | @Before 71 | public void setUp() { 72 | 73 | try { 74 | client = new AerospikeClient(host, Integer.parseInt(port)); 75 | } catch (NumberFormatException e) { 76 | e.printStackTrace(); 77 | } catch (AerospikeException e) { 78 | e.printStackTrace(); 79 | } 80 | testSchema = parseConfigFile(testSchemaFile); 81 | } 82 | 83 | @After 84 | public void tearDown() { 85 | client.close(); 86 | } 87 | 88 | public List> parseDataFile(String dataFile) { 89 | BufferedReader br = null; 90 | String delimiter = ","; 91 | List> recordDataList = new ArrayList>(); 92 | try{ 93 | String curLine; 94 | br = new BufferedReader(new FileReader(dataFile)); 95 | List binDataList = null; 96 | while ((curLine = br.readLine()) != null) { 97 | binDataList = Parser.getDSVRawColumns(curLine, delimiter); 98 | } 99 | if (binDataList != null) { 100 | recordDataList.add(binDataList); 101 | } 102 | } catch (IOException e) { 103 | // Print error 104 | } 105 | return recordDataList; 106 | } 107 | 108 | public JSONObject parseConfigFile(String configFile) { 109 | JSONParser parser = new JSONParser(); 110 | JSONObject jsonObject = null; 111 | try{ 112 | Object obj = parser.parse(new FileReader(configFile)); 113 | jsonObject = (JSONObject) obj; 114 | } catch (IOException e) { 115 | // Print error/abort/skip 116 | } catch (ParseException e) { 117 | // throw error/abort test/skip/test 118 | } 119 | return jsonObject; 120 | } 121 | 122 | // String type data validation 123 | //@Test 124 | public void testValidateString() throws Exception { 125 | System.out.println("TestValidateString: start"); 126 | if(!client.isConnected()) { 127 | System.out.println("Client is not able to connect:" + host + ":" + port); 128 | return; 129 | } 130 | // Create datafile 131 | 132 | HashMap binMap = (HashMap) testSchema.get("test_string"); 133 | 134 | 135 | int setMod = 5, range = 100, seed = 10, nrecords = 10; 136 | dataFile = rootDir + "dataString.dsv"; 137 | writeDataMap(dataFile, nrecords, setMod, range, seed, binMap); 138 | 139 | // Run Aerospike loader 140 | AerospikeLoad.main(new String[]{"-h", host,"-p", port,"-n", ns, "-ec", error_count,"-wa", write_action,"-c", "src/test/resources/configString.json", dataFile}); 141 | 142 | // Validate loaded data 143 | String dstType = null; 144 | boolean dataValid = validateMap(client, dataFile, nrecords, setMod, range, seed, binMap, dstType); 145 | boolean error = getError(log); 146 | 147 | assertTrue(dataValid); 148 | assertTrue(!error); 149 | 150 | System.out.println("TestValidateString: Complete"); 151 | } 152 | 153 | //Integer type data validation 154 | @Test 155 | public void testValidateInteger() throws Exception { 156 | System.out.println("TestValidateInteger: start"); 157 | if(!client.isConnected()) { 158 | System.out.println("Client is not able to connect:" + host + ":" + port); 159 | return; 160 | } 161 | 162 | // Create datafile 163 | 164 | HashMap binMap = (HashMap) testSchema.get("test_integer"); 165 | 166 | 167 | int setMod = 5, range = 100, seed = 10, nrecords = 10; 168 | dataFile = rootDir + "dataInt.dsv"; 169 | writeDataMap(dataFile, nrecords, setMod, range, seed, binMap); 170 | 171 | // Run Aerospike loader 172 | AerospikeLoad.main(new String[]{"-h", host,"-p", port,"-n", ns, "-ec", error_count,"-wa", write_action,"-c", "src/test/resources/configInt.json", dataFile}); 173 | 174 | // Validate loaded data 175 | String dstType = null; 176 | boolean dataValid = validateMap(client, dataFile, nrecords, setMod, range, seed, binMap, dstType); 177 | boolean error = getError(log); 178 | 179 | assertTrue(dataValid); 180 | assertTrue(!error); 181 | 182 | System.out.println("TestValidateInteger: Complete"); 183 | } 184 | 185 | //Utf8 string type data validation 186 | //@Test 187 | public void testValidateStringUtf8() throws Exception { 188 | System.out.println("TestValidateStringUtf8: start"); 189 | if(!client.isConnected()) { 190 | System.out.println("Client is not able to connect:" + host + ":" + port); 191 | return; 192 | } 193 | 194 | // Create datafile 195 | 196 | HashMap binMap = (HashMap) testSchema.get("test_utf8"); 197 | 198 | 199 | int setMod = 5, range = 100, seed = 10, nrecords = 10; 200 | dataFile = rootDir + "dataUtf8.dsv"; 201 | writeDataMap(dataFile, nrecords, setMod, range, seed, binMap); 202 | 203 | // Run Aerospike loader 204 | AerospikeLoad.main(new String[]{"-h", host,"-p", port,"-n", ns, "-ec", error_count,"-wa", write_action,"-c", "src/test/resources/configUtf8.json", dataFile}); 205 | 206 | // Validate loaded data 207 | String dstType = null; 208 | boolean dataValid = validateMap(client, dataFile, nrecords, setMod, range, seed, binMap, dstType); 209 | boolean error = getError(log); 210 | 211 | assertTrue(dataValid); 212 | assertTrue(!error); 213 | 214 | System.out.println("TestValidateStringutf8: Complete"); 215 | } 216 | 217 | //timestamp type data validation 218 | //@Test 219 | public void testValidateTimestampInteger() throws Exception { 220 | System.out.println("TestValidateTimestampInteger: start"); 221 | if(!client.isConnected()) { 222 | System.out.println("Client is not able to connect:" + host + ":" + port); 223 | return; 224 | } 225 | // Create datafile 226 | 227 | HashMap binMap = (HashMap) testSchema.get("test_date"); 228 | 229 | 230 | int setMod = 5, range = 100, seed = 10, nrecords = 10; 231 | dataFile = rootDir + "dataDate.dsv"; 232 | writeDataMap(dataFile, nrecords, setMod, range, seed, binMap); 233 | 234 | // Run Aerospike loader 235 | AerospikeLoad.main(new String[]{"-h", host,"-p", port,"-n", ns, "-ec", error_count,"-wa", write_action,"-c", "src/test/resources/configDate.json", dataFile}); 236 | 237 | // Validate loaded data 238 | String dst_type = "integer"; 239 | boolean dataValid = validateMap(client, dataFile, nrecords, setMod, range, seed, binMap, dst_type); 240 | boolean error = getError(log); 241 | 242 | assertTrue(dataValid); 243 | assertTrue(!error); 244 | 245 | System.out.println("TestValidateTimestampInteger: Complete"); 246 | } 247 | 248 | //Blob type data validation 249 | //@Test 250 | public void testValidateBlob() throws Exception { 251 | System.out.println("TestValidateBlob: start"); 252 | if(!client.isConnected()) { 253 | System.out.println("Client is not able to connect:" + host + ":" + port); 254 | return; 255 | } 256 | // Create datafile 257 | 258 | HashMap binMap = (HashMap) testSchema.get("test_blob"); 259 | 260 | 261 | int setMod = 5, range = 100, seed = 10, nrecords = 10; 262 | dataFile = rootDir + "dataBlob.dsv"; 263 | writeDataMap(dataFile, nrecords, setMod, range, seed, binMap); 264 | 265 | // Run Aerospike loader 266 | AerospikeLoad.main(new String[]{"-h", host,"-p", port,"-n", ns, "-ec", error_count,"-wa", write_action,"-c", "src/test/resources/configBlob.json", dataFile}); 267 | 268 | // Validate loaded data 269 | String dstType = "blob"; 270 | boolean dataValid = validateMap(client, dataFile, nrecords, setMod, range, seed, binMap, dstType); 271 | boolean error = getError(log); 272 | 273 | assertTrue(dataValid); 274 | assertTrue(!error); 275 | 276 | System.out.println("TestValidateBlob: Complete"); 277 | } 278 | 279 | //List type data validation 280 | @Test 281 | public void testValidateList() throws Exception { 282 | System.out.println("TestValidateList: start"); 283 | if(!client.isConnected()) { 284 | System.out.println("Client is not able to connect:" + host + ":" + port); 285 | return; 286 | } 287 | // Create datafile 288 | 289 | HashMap binMap = (HashMap) testSchema.get("test_list"); 290 | 291 | 292 | int setMod = 5, range = 100, seed = 10, nrecords = 10; 293 | dataFile = rootDir + "dataList.dsv"; 294 | writeDataMap(dataFile, nrecords, setMod, range, seed, binMap); 295 | 296 | // Run Aerospike loader 297 | AerospikeLoad.main(new String[]{"-h", host,"-p", port,"-n", ns, "-ec", error_count,"-wa", write_action,"-c", "src/test/resources/configList.json", dataFile}); 298 | 299 | // Validate loaded data 300 | String dstType = "list"; 301 | boolean dataValid = validateMap(client, dataFile, nrecords, setMod, range, seed, binMap, dstType); 302 | boolean error = getError(log); 303 | 304 | assertTrue(dataValid); 305 | assertTrue(!error); 306 | 307 | System.out.println("TestValidateList: Complete"); 308 | } 309 | 310 | //Map type data validation 311 | @Test 312 | public void testValidateMap() throws Exception { 313 | System.out.println("TestValidateMap: start"); 314 | if(!client.isConnected()) { 315 | System.out.println("Client is not able to connect:" + host + ":" + port); 316 | return; 317 | } 318 | 319 | // Create datafile 320 | 321 | HashMap binMap = (HashMap) testSchema.get("test_map"); 322 | 323 | 324 | int setMod = 5, range = 100, seed = 10, nrecords = 10; 325 | dataFile = rootDir + "dataMap.dsv"; 326 | writeDataMap(dataFile, nrecords, setMod, range, seed, binMap); 327 | 328 | // Run Aerospike loader 329 | AerospikeLoad.main(new String[]{"-h", host,"-p", port,"-n", ns, "-ec", error_count,"-wa", write_action,"-c", "src/test/resources/configMap.json", dataFile}); 330 | 331 | // Validate loaded data 332 | String dstType = "map"; 333 | boolean dataValid = validateMap(client, dataFile, nrecords, setMod, range, seed, binMap, dstType); 334 | boolean error = getError(log); 335 | 336 | assertTrue(dataValid); 337 | assertTrue(!error); 338 | 339 | System.out.println("TestValidateMap: Complete"); 340 | } 341 | 342 | //JSON type data validation 343 | @Test 344 | public void testValidateJSON() throws Exception { 345 | System.out.println("TestValidateJSON: start"); 346 | if(!client.isConnected()) { 347 | System.out.println("Client is not able to connect:" + host + ":" + port); 348 | return; 349 | } 350 | 351 | // Create datafile 352 | 353 | HashMap binMap = (HashMap) testSchema.get("test_json"); 354 | 355 | 356 | int setMod = 5, range = 100, seed = 10, nrecords = 10; 357 | dataFile = rootDir + "dataJson.dsv"; 358 | writeDataMap(dataFile, nrecords, setMod, range, seed, binMap); 359 | 360 | // Run Aerospike loader 361 | AerospikeLoad.main(new String[]{"-h", host,"-p", port,"-n", ns, "-v", "-ec", error_count,"-wa", write_action,"-c", "src/test/resources/configJson.json", dataFile}); 362 | 363 | // Validate loaded data 364 | String dstType = "json"; 365 | boolean dataValid = validateMap(client, dataFile, nrecords, setMod, range, seed, binMap, dstType); 366 | boolean error = getError(log); 367 | 368 | assertTrue(dataValid); 369 | assertTrue(!error); 370 | 371 | System.out.println("TestValidateJSON: Complete"); 372 | } 373 | 374 | //Multiple data type insert 375 | //@Test 376 | public void testAllDatatype() throws Exception { 377 | System.out.println("TestAllDatatype: start"); 378 | if(!client.isConnected()) { 379 | System.out.println("Client is not able to connect:" + host + ":" + port); 380 | return; 381 | } 382 | 383 | // Create datafile 384 | 385 | HashMap binMap = (HashMap) testSchema.get("test_alltype"); 386 | 387 | 388 | int setMod = 5, range = 100, seed = 10, nrecords = 10; 389 | dataFile = rootDir + "configAllDataType.dsv"; 390 | writeDataMap(dataFile, nrecords, setMod, range, seed, binMap); 391 | 392 | // Run Aerospike loader 393 | AerospikeLoad.main(new String[]{"-h", host,"-p", port,"-n", ns, "-ec", error_count,"-wa", write_action,"-c", "src/test/resources/configAllDataType.json", dataFile}); 394 | 395 | boolean error = getError(log); 396 | 397 | assertTrue(!error); 398 | 399 | System.out.println("TestAllDatatype: Complete"); 400 | } 401 | 402 | //Dynamic bin name 403 | //@Test 404 | public void testDynamicBinName() throws Exception { 405 | System.out.println("Test Dynamic BinName: start"); 406 | if(!client.isConnected()) { 407 | System.out.println("Client is not able to connect:" + host + ":" + port); 408 | return; 409 | } 410 | 411 | AerospikeLoad.main(new String[]{"-h", host,"-p", port,"-n", ns, "-ec", error_count,"-wa", write_action,"-c", "src/test/resources/configDynamicBinName.json", "src/test/resources/dataDynamicBin.csv"}); 412 | 413 | boolean error = getError(log); 414 | 415 | assertTrue(!error); 416 | 417 | System.out.println("Test Dynamic BinName: Complete"); 418 | } 419 | 420 | //Static binName 421 | //@Test 422 | public void testStaticBinName() throws Exception { 423 | System.out.println("Test static BinName: start"); 424 | if(!client.isConnected()) { 425 | System.out.println("Client is not able to connect:" + host + ":" + port); 426 | return; 427 | } 428 | 429 | AerospikeLoad.main(new String[]{"-h", host,"-p", port,"-n", ns, "-ec", error_count,"-wa", write_action,"-c", "src/test/resources/configStaticBinName.json", "src/test/resources/dataStaticBin.csv"}); 430 | 431 | boolean error = getError(log); 432 | 433 | assertTrue(!error); 434 | 435 | System.out.println("Test static BinName: Complete"); 436 | } 437 | 438 | //Validate map sort order 439 | @Test 440 | public void testValidateMapOrder() throws Exception { 441 | System.out.println("TestValidateMapOrder: start"); 442 | if(!client.isConnected()) { 443 | System.out.println("Client is not able to connect:" + host + ":" + port); 444 | return; 445 | } 446 | 447 | // Create datafile 448 | 449 | HashMap binMap = (HashMap) testSchema.get("test_map"); 450 | 451 | 452 | int setMod = 5, range = 100, seed = 10, nrecords = 10; 453 | dataFile = rootDir + "dataMap.dsv"; 454 | writeDataMap(dataFile, nrecords, setMod, range, seed, binMap); 455 | 456 | // Run Aerospike loader 457 | this.expectedMapOrder = MapOrder.UNORDERED; 458 | AerospikeLoad.main(new String[]{"-h", host,"-p", port,"-n", ns, "-ec", error_count,"-wa", write_action, "-um", "-c", "src/test/resources/configMap.json", dataFile}); 459 | 460 | // Validate loaded data 461 | String dstType = "map"; 462 | boolean dataValid = validateMap(client, dataFile, nrecords, setMod, range, seed, binMap, dstType); 463 | boolean error = getError(log); 464 | 465 | assertTrue(dataValid); 466 | assertTrue(!error); 467 | this.expectedMapOrder = MapOrder.KEY_ORDERED; 468 | 469 | System.out.println("TestValidateMap: Complete"); 470 | } 471 | 472 | // Helper functions 473 | public void writeDataMap(String fileName, int nrecords, int setMod, int range, int seed, 474 | HashMap binMap) { 475 | String delimiter = (String) testSchema.get("delimiter"); 476 | File file = new File(fileName); 477 | // if file doesnt exists, then create it 478 | 479 | try { 480 | if (!file.exists()) { 481 | file.createNewFile(); 482 | } 483 | Writer bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF8")); 484 | Random r = new Random(seed); 485 | int rint; 486 | for (int i = 0; i <= nrecords; i++) { 487 | int comma = 0; 488 | 489 | rint = r.nextInt(range); 490 | 491 | Iterator> iterator = binMap.entrySet().iterator(); 492 | String binName; 493 | String binType; 494 | Map.Entry mapEntry; 495 | while (iterator.hasNext()) { 496 | mapEntry = iterator.next(); 497 | binName = mapEntry.getKey(); 498 | binType = mapEntry.getValue(); 499 | if (binName != null) { 500 | if (i == 0) { 501 | bw.write(binName); 502 | } else { 503 | 504 | if (binName.equalsIgnoreCase("key")) { 505 | bw.write(getValue(binName, binType, i)); 506 | } else if (binName.equalsIgnoreCase("set")) { 507 | bw.write(getValue(binName, "string", i % setMod)); 508 | } else { 509 | //bw.write(getValue(binName, binType, rint)); 510 | // TODO this int shouln't be random. 511 | bw.write(getValue(binName, binType, i)); 512 | } 513 | } 514 | } 515 | if (binMap.size() > ++comma) { 516 | bw.write(String.format(delimiter)); 517 | } 518 | } 519 | bw.write("\n"); 520 | } 521 | bw.flush(); 522 | bw.close(); 523 | } catch (IOException e) { 524 | e.printStackTrace(); 525 | } 526 | } 527 | 528 | public boolean validateMap(AerospikeClient client, String filename, int nrecords, int setMod, int range, int seed, 529 | HashMap binMap, String dstType) { 530 | boolean valid = false; 531 | Random r = new Random(seed); 532 | int rint; 533 | String as_binname_suffix = (String) testSchema.get("as_binname_suffix"); 534 | 535 | for (int i = 1; i <= nrecords; i++) { 536 | 537 | String key = null; 538 | String set = null; 539 | Key key1 = null; 540 | Bin bin1 = null; 541 | String bin1Type = null; 542 | Record record = null; 543 | 544 | rint = r.nextInt(range); 545 | 546 | Iterator> iterator = binMap.entrySet().iterator(); 547 | String binName; 548 | String binType = null; 549 | Map.Entry mapEntry; 550 | while (iterator.hasNext()) { 551 | mapEntry = iterator.next(); 552 | if ((binName = mapEntry.getKey()) == null) { 553 | continue; 554 | } 555 | /* 556 | if (i == 0) { 557 | // skip 1st row data 558 | continue; 559 | } 560 | */ 561 | binType = mapEntry.getValue(); 562 | 563 | if (binName.equalsIgnoreCase("key")) { 564 | key = (String.format(getValue(binName, binType, i))); 565 | } else if (binName.equalsIgnoreCase("set")) { 566 | set = String.format(getValue(binName, "string", i % setMod)); 567 | } else { 568 | // TODO this int shouln't be random. 569 | //String value = (String.format(getValue(binName, binType, rint))); 570 | String value = (String.format(getValue(binName, binType, i))); 571 | 572 | // We are writing Binname in aerospike as (Column name + 573 | // as_binname_suffix) 574 | // Just to make more flexible naming. 575 | binName = binName + as_binname_suffix; 576 | bin1 = new Bin(binName, value); 577 | // bin1Type = value; 578 | bin1Type = binType; 579 | } 580 | } 581 | /* 582 | if (i != 0) { 583 | continue; 584 | } 585 | */ 586 | try { 587 | key1 = new Key(ns, set, key); 588 | record = client.get(new Policy(), key1); 589 | } catch (AerospikeException e) { 590 | e.printStackTrace(); 591 | } 592 | 593 | if (validateBin(key1, bin1, bin1Type, dstType, record)) 594 | valid = true; 595 | } 596 | 597 | return valid; 598 | } 599 | 600 | /** 601 | * @param key key to validate 602 | * @param bin bin to validate 603 | * @param binType type of bin 604 | * @param dstType dst bin type 605 | * @param record received record 606 | * @return 607 | */ 608 | private boolean validateBin(Key key, Bin bin, String binType, String dstType, Record record) { 609 | boolean valid = false; 610 | String expected = null; 611 | 612 | Object received = record.bins.get(bin.name); 613 | if (binType != null && binType.equalsIgnoreCase("timestamp") 614 | && dstType != null && dstType.equalsIgnoreCase("integer")) { 615 | 616 | DateFormat format = new SimpleDateFormat("MM/dd/yy"); 617 | try { 618 | Date formatDate = format.parse(bin.value.toString()); 619 | long miliSecondForDate = formatDate.getTime() / 1000; 620 | expected = String.format("%d", miliSecondForDate); 621 | } catch (java.text.ParseException e) { 622 | e.printStackTrace(); 623 | } 624 | 625 | } else if (dstType != null && dstType.equalsIgnoreCase("blob")) { 626 | expected = convertHexToString(bin.value.toString()); 627 | received = new String((byte[]) received); 628 | } else if (dstType != null && (dstType.equalsIgnoreCase("list"))) { 629 | received = received.toString().replace("=", ":"); 630 | expected = bin.value.toString().replace("'", ""); 631 | expected = expected.replace("\"", ""); 632 | 633 | } else if (dstType != null && (dstType.equalsIgnoreCase("json"))) { 634 | System.out.println(String.format("Currently json can not be matched.")); 635 | 636 | //received = received.toString().replace("=", ":"); 637 | //expected = bin.value.toString().replace("'", ""); 638 | //expected = expected.replace("\"", ""); 639 | 640 | return true; 641 | } else if (dstType != null && (dstType.equalsIgnoreCase("map"))) { 642 | System.out.println(String.format("Currently only map order is checked.")); 643 | 644 | //received = received.toString().replace("=", ":"); 645 | //expected = bin.value.toString().replace("'", ""); 646 | //expected = expected.replace("\"", ""); 647 | 648 | MapOrder mapOrder = (received instanceof SortedMap)? MapOrder.KEY_ORDERED : MapOrder.UNORDERED; 649 | 650 | if (mapOrder == this.expectedMapOrder) { 651 | return true; 652 | } 653 | 654 | return false; 655 | } else { 656 | expected = bin.value.toString(); 657 | } 658 | 659 | if (received != null && received.toString().equals(expected)) { 660 | System.out.println(String.format( 661 | "Bin matched: namespace=%s set=%s key=%s bin=%s value=%s generation=%d expiration=%d", 662 | key.namespace, key.setName, key.userKey, bin.name, received, record.generation, record.expiration)); 663 | valid = true; 664 | } else { 665 | System.out.println(String.format("Put/Get mismatch: Expected %s. Received %s.", expected, received)); 666 | } 667 | 668 | 669 | return valid; 670 | } 671 | 672 | 673 | /** 674 | * @param log log file name 675 | * @return return true if get any error 676 | */ 677 | public boolean getError(String log) { 678 | boolean error = false; 679 | 680 | BufferedReader br = null; 681 | try { 682 | String line; 683 | br = new BufferedReader(new FileReader(log)); 684 | try { 685 | while ((line = br.readLine()) != null) { 686 | if ((line.substring(0, 5)).contains("ERROR")) { 687 | error = true; 688 | } 689 | } 690 | } catch (IOException e) { 691 | e.printStackTrace(); 692 | } 693 | } catch (FileNotFoundException e) { 694 | e.printStackTrace(); 695 | } 696 | 697 | try { 698 | br.close(); 699 | } catch (IOException e) { 700 | e.printStackTrace(); 701 | } 702 | return error; 703 | } 704 | 705 | /** 706 | * @param binName binName prefix 707 | * @param binType type of binValue 708 | * @param i index of insert 709 | * @return 710 | */ 711 | public String getValue(String binName, String binType, int i) { 712 | 713 | String value = null; 714 | 715 | switch (getBinType(binType.toLowerCase())) { 716 | case BLOB: 717 | value = convertStringToHex(String.format("%s%d", binName, i)); 718 | break; 719 | case INTEGER: 720 | value = String.format("%d", i); 721 | break; 722 | case JSON: 723 | JSONParser jsonParser = new JSONParser(); 724 | value = "{\"k1\": \"v1\", \"k2\": [\"lv1\", \"lv2\"], \"k3\": {\"mk1\": \"mv1\"}}"; 725 | break; 726 | case LIST: 727 | value = "[\"a\", \"b\", \"c\", [\"d\", \"e\"]]"; 728 | break; 729 | case MAP: 730 | value = "{\"a\":\"b\", \"c\":{\"e\":\"d\"}, \"b\":\"c\"}"; 731 | // sorted value should be "{\"a\":\"b\", \"b\":\"c\", \"c\":{\"d\":\"e\"}}"; 732 | break; 733 | case STRING: 734 | if (binName.equalsIgnoreCase("utf8")) { 735 | value = String.format("%s%d", "Ûtf8", i); 736 | } else 737 | value = String.format("%s%d", binName, i); 738 | break; 739 | case TIMESTAMP: 740 | value = String.format("%d/%d/%d", i % 12, i % 30, i % 100); 741 | break; 742 | default: 743 | break; 744 | } 745 | 746 | return value; 747 | } 748 | 749 | public BinType getBinType(String type) { 750 | if ("string".equalsIgnoreCase(type)) { 751 | return BinType.STRING; 752 | } else if ("integer".equalsIgnoreCase(type)) { 753 | return BinType.INTEGER; 754 | } else if ("blob".equalsIgnoreCase(type)) { 755 | return BinType.BLOB; 756 | } else if ("list".equalsIgnoreCase(type)) { 757 | return BinType.LIST; 758 | } else if ("map".equalsIgnoreCase(type)) { 759 | return BinType.MAP; 760 | } else if ("json".equalsIgnoreCase(type)) { 761 | return BinType.JSON; 762 | } else if ("timestamp".equalsIgnoreCase(type)) { 763 | return BinType.TIMESTAMP; 764 | } 765 | return null; 766 | } 767 | 768 | public String convertStringToHex(String str) { 769 | 770 | char[] chars = str.toCharArray(); 771 | StringBuffer hex = new StringBuffer(); 772 | for (int i = 0; i < chars.length; i++) { 773 | hex.append(Integer.toHexString((int) chars[i])); 774 | } 775 | return hex.toString(); 776 | } 777 | 778 | public String convertHexToString(String hex) { 779 | 780 | StringBuilder sb = new StringBuilder(); 781 | StringBuilder temp = new StringBuilder(); 782 | 783 | for (int i = 0; i < hex.length() - 1; i += 2) { 784 | // get the hex in pairs 785 | String pair = hex.substring(i, (i + 2)); 786 | // convert hex to decimal 787 | int numpair = Integer.parseInt(pair, 16); 788 | // convert the decimal to character 789 | sb.append((char) numpair); 790 | 791 | temp.append(numpair); 792 | } 793 | return sb.toString(); 794 | } 795 | } 796 | -------------------------------------------------------------------------------- /src/test/resources/configAllDataType.json: -------------------------------------------------------------------------------- 1 | { 2 | "version" : "2.0", 3 | "input_type" : "dsv", 4 | "dsv_config": { "delimiter": "##" , "n_columns_datafile": 8, "header_exist": true}, 5 | 6 | "mappings": [ 7 | { 8 | "key": {"column_name":"key", "type": "string"}, 9 | 10 | "set": { "column_name":"set" , "type": "string"}, 11 | 12 | "bin_list": [ 13 | { 14 | "name": "intDataBin", 15 | "value": {"column_name": "intData", "type" : "integer"} 16 | }, 17 | { 18 | "name": "floatDataBin", 19 | "value": {"column_name": "floatData", "type" : "float"} 20 | }, 21 | { 22 | "name": "stringDataBin", 23 | "value": {"column_name": "stringData", "type" : "string"} 24 | }, 25 | { 26 | "name": "listDataBin", 27 | "value": {"column_name": "listData", "type" : "json"} 28 | }, 29 | { 30 | "name": "mapDataBin", 31 | "value": {"column_name": "mapData", "type" : "json"} 32 | }, 33 | { 34 | "name": "dateDataBin", 35 | "value": {"column_name": "dateData", "type" : "timestamp", "dst_type": "integer", "encoding":"MM/dd/yy"} 36 | }, 37 | { 38 | "name": "blobDataBin", 39 | "value": {"column_name": "blobData", "type" : "blob", "dst_type": "blob", "encoding":"hex"} 40 | } 41 | ] 42 | } 43 | ] 44 | } 45 | -------------------------------------------------------------------------------- /src/test/resources/configBlob.json: -------------------------------------------------------------------------------- 1 | { 2 | "version" : "2.0", 3 | "dsv_config": { "delimiter": "##" , "n_columns_datafile": 8, "header_exist": true}, 4 | 5 | "mappings": [ 6 | { 7 | "key": {"column_name":"key", "type": "string"}, 8 | "set": { "column_name":"set" , "type": "string"}, 9 | "bin_list": [ 10 | { 11 | "name": "blobDataBin", 12 | "value": {"column_name": "blobData", "type" : "blob", "dst_type": "blob", "encoding":"hex"} 13 | } 14 | 15 | ] 16 | } 17 | ] 18 | 19 | } 20 | -------------------------------------------------------------------------------- /src/test/resources/configDate.json: -------------------------------------------------------------------------------- 1 | { 2 | "version" : "2.0", 3 | "dsv_config": { "delimiter": "##" , "n_columns_datafile": 3, "header_exist": true}, 4 | 5 | "mappings": [ 6 | { 7 | "key": {"column_name":"key", "type": "string"}, 8 | 9 | "set": { "column_name":"set" , "type": "string"}, 10 | 11 | "bin_list": [ 12 | 13 | { 14 | "name": "dateDataBin", 15 | "value": {"column_name": "dateData", "type" : "timestamp", "dst_type": "integer", "encoding":"MM/dd/yy"} 16 | }, 17 | { 18 | "name": "timestamp", 19 | "value": {"column_name": "system_time", "type" : "timestamp", "dst_type": "integer", "encoding":"MM/dd/yy"} 20 | } 21 | ] 22 | } 23 | 24 | ] 25 | 26 | } 27 | -------------------------------------------------------------------------------- /src/test/resources/configDynamicBinName.json: -------------------------------------------------------------------------------- 1 | { 2 | "version" : "2.0", 3 | "dsv_config": { 4 | "delimiter": ",", 5 | "n_columns_datafile": 8, 6 | "header_exist": true 7 | }, 8 | "mappings": [ 9 | { 10 | "key": { 11 | "column_name": "col4", 12 | "type": "string" 13 | }, 14 | "set": { 15 | "column_name": "col8" 16 | }, 17 | "bin_list": [ 18 | { 19 | "name": {"column_name": "col1"}, 20 | "value": { 21 | "column_name": "col2", 22 | "type": "string" 23 | } 24 | }, 25 | { 26 | "name":{"column_name": "col5"}, 27 | "value": { 28 | "column_name": "col6", 29 | "type": "timestamp", 30 | "dst_type": "integer", 31 | "encoding" : "MM/dd/yyyy" 32 | } 33 | } 34 | ] 35 | } 36 | ] 37 | } 38 | -------------------------------------------------------------------------------- /src/test/resources/configInt.json: -------------------------------------------------------------------------------- 1 | { 2 | "version" : "2.0", 3 | "dsv_config": { "delimiter": "##" , "n_columns_datafile": 3, "header_exist": true}, 4 | 5 | "mappings": [ 6 | { 7 | "key": {"column_name":"key", "type": "string"}, 8 | 9 | "set": { "column_name":"set" , "type": "string"}, 10 | 11 | 12 | "bin_list": [ 13 | { 14 | "name": "intDataBin", 15 | "value": {"column_name": "intData", "type" : "integer"} 16 | } 17 | 18 | ] 19 | }, 20 | { 21 | "secondary_mapping": "true", 22 | "key": {"column_name":"intData", "type": "string"}, 23 | 24 | "set": "newset", 25 | 26 | 27 | "bin_list": [ 28 | { 29 | "name": "revIntBin", 30 | "value": {"column_name": "key", "type" : "string"} 31 | } 32 | 33 | ] 34 | } 35 | ] 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/test/resources/configJson.json: -------------------------------------------------------------------------------- 1 | { 2 | "version" : "2.0", 3 | "dsv_config": { "delimiter": "##" , "n_columns_datafile": 3, "header_exist": true}, 4 | 5 | "mappings": [ 6 | { 7 | "key": {"column_name":"key", "type": "string"}, 8 | 9 | "set": { "column_name":"set" , "type": "string"}, 10 | 11 | "bin_list": [ 12 | 13 | { 14 | "name": "jsonDataBin", 15 | "value": {"column_name": "jsonData", "type" : "json"} 16 | } 17 | 18 | ] 19 | } 20 | ] 21 | 22 | } 23 | -------------------------------------------------------------------------------- /src/test/resources/configList.json: -------------------------------------------------------------------------------- 1 | { 2 | "version" : "2.0", 3 | "dsv_config": { "delimiter": "##" , "n_columns_datafile": 3, "header_exist": true}, 4 | 5 | "mappings": [ 6 | { 7 | "key": {"column_name":"key", "type": "string"}, 8 | 9 | "set": { "column_name":"set" , "type": "string"}, 10 | 11 | 12 | "bin_list": [ 13 | 14 | { 15 | "name": "listDataBin", 16 | "value": {"column_name": "listData", "type" : "json"} 17 | } 18 | 19 | ] 20 | } 21 | ] 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/test/resources/configMap.json: -------------------------------------------------------------------------------- 1 | { 2 | "version" : "2.0", 3 | "dsv_config": { "delimiter": "##" , "n_columns_datafile": 3, "header_exist": true}, 4 | 5 | "mappings": [ 6 | { 7 | "key": {"column_name":"key", "type": "string"}, 8 | 9 | "set": { "column_name":"set" , "type": "string"}, 10 | 11 | 12 | "bin_list": [ 13 | { 14 | "name": "mapDataBin", 15 | "value": {"column_name": "mapData", "type" : "json"} 16 | } 17 | 18 | ] 19 | } 20 | ] 21 | 22 | } 23 | -------------------------------------------------------------------------------- /src/test/resources/configStaticBinName.json: -------------------------------------------------------------------------------- 1 | { 2 | "version" : "2.0", 3 | "dsv_config": { 4 | "delimiter": ",", 5 | "n_columns_datafile": 4, 6 | "header_exist": true 7 | }, 8 | "mappings": [ 9 | { 10 | "key": { 11 | "column_position": 2, 12 | "type": "string" 13 | }, 14 | "set": { 15 | "column_position": 4 16 | }, 17 | "bin_list": [ 18 | { 19 | "name": "Inserted_from", 20 | "value": "abc_db" 21 | } 22 | ] 23 | } 24 | ] 25 | } -------------------------------------------------------------------------------- /src/test/resources/configString.json: -------------------------------------------------------------------------------- 1 | { 2 | "version" : "2.0", 3 | "dsv_config": { "delimiter": "##" , "n_columns_datafile": 3, "header_exist": true}, 4 | 5 | "mappings": [ 6 | { 7 | "key": {"column_name":"key", "type": "string"}, 8 | 9 | "set": { "column_name":"set" , "type": "string"}, 10 | 11 | 12 | "bin_list": [ 13 | 14 | { 15 | "name": "stringDataBin", 16 | "value": {"column_name": "stringData", "type" : "string"} 17 | } 18 | 19 | ] 20 | } 21 | ] 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/test/resources/configUtf8.json: -------------------------------------------------------------------------------- 1 | { 2 | "version" : "2.0", 3 | "dsv_config": { "delimiter": "##" , "n_columns_datafile": 3, "header_exist": true}, 4 | 5 | "mappings": [ 6 | { 7 | "key": {"column_name":"key", "type": "string"}, 8 | 9 | "set": { "column_name":"set" , "type": "string"}, 10 | 11 | 12 | "bin_list": [ 13 | 14 | { 15 | "name": "utf8DataBin", 16 | "value": {"column_name": "utf8Data", "type" : "string"} 17 | } 18 | 19 | ] 20 | } 21 | ] 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/test/resources/data.dsv: -------------------------------------------------------------------------------- 1 | loc#~#locblob#~#set#~#dob#~#key#~#age 2 | loc80#~#6c6f63626c6f623830#~#set1#~#8/20/80#~#1#~#80 3 | loc93#~#6c6f63626c6f623933#~#set2#~#9/3/93#~#2#~#93 4 | loc90#~#6c6f63626c6f623930#~#set3#~#6/0/90#~#3#~#90 5 | loc46#~#6c6f63626c6f623436#~#set4#~#10/16/46#~#4#~#46 6 | loc56#~#6c6f63626c6f623536#~#set0#~#8/26/56#~#5#~#56 7 | loc97#~#6c6f63626c6f623937#~#set1#~#1/7/97#~#6#~#97 8 | loc88#~#6c6f63626c6f623838#~#set2#~#4/28/88#~#7#~#88 9 | loc81#~#6c6f63626c6f623831#~#set3#~#9/21/81#~#8#~#81 10 | loc14#~#6c6f63626c6f623134#~#set4#~#2/14/14#~#9#~#14 11 | loc23#~#6c6f63626c6f623233#~#set0#~#11/23/23#~#10#~#23 12 | loc99#~#6c6f63626c6f623939#~#set1#~#3/9/99#~#11#~#99 13 | loc91#~#6c6f63626c6f623931#~#set2#~#7/1/91#~#12#~#91 14 | loc8#~#6c6f63626c6f6238#~#set3#~#8/8/8#~#13#~#8 15 | loc95#~#6c6f63626c6f623935#~#set4#~#11/5/95#~#14#~#95 16 | loc80#~#6c6f63626c6f623830#~#set0#~#8/20/80#~#15#~#80 17 | loc86#~#6c6f63626c6f623836#~#set1#~#2/26/86#~#16#~#86 18 | loc53#~#6c6f63626c6f623533#~#set2#~#5/23/53#~#17#~#53 19 | loc73#~#6c6f63626c6f623733#~#set3#~#1/13/73#~#18#~#73 20 | loc38#~#6c6f63626c6f623338#~#set4#~#2/8/38#~#19#~#38 21 | loc93#~#6c6f63626c6f623933#~#set0#~#9/3/93#~#20#~#93 22 | loc9#~#6c6f63626c6f6239#~#set1#~#9/9/9#~#21#~#9 23 | loc95#~#6c6f63626c6f623935#~#set2#~#11/5/95#~#22#~#95 24 | loc8#~#6c6f63626c6f6238#~#set3#~#8/8/8#~#23#~#8 25 | loc35#~#6c6f63626c6f623335#~#set4#~#11/5/35#~#24#~#35 26 | loc49#~#6c6f63626c6f623439#~#set0#~#1/19/49#~#25#~#49 27 | loc74#~#6c6f63626c6f623734#~#set1#~#2/14/74#~#26#~#74 28 | loc70#~#6c6f63626c6f623730#~#set2#~#10/10/70#~#27#~#70 29 | loc8#~#6c6f63626c6f6238#~#set3#~#8/8/8#~#28#~#8 30 | loc50#~#6c6f63626c6f623530#~#set4#~#2/20/50#~#29#~#50 31 | loc48#~#6c6f63626c6f623438#~#set0#~#0/18/48#~#30#~#48 32 | loc2#~#6c6f63626c6f6232#~#set1#~#2/2/2#~#31#~#2 33 | loc72#~#6c6f63626c6f623732#~#set2#~#0/12/72#~#32#~#72 34 | loc96#~#6c6f63626c6f623936#~#set3#~#0/6/96#~#33#~#96 35 | loc17#~#6c6f63626c6f623137#~#set4#~#5/17/17#~#34#~#17 36 | loc75#~#6c6f63626c6f623735#~#set0#~#3/15/75#~#35#~#75 37 | loc10#~#6c6f63626c6f623130#~#set1#~#10/10/10#~#36#~#10 38 | loc10#~#6c6f63626c6f623130#~#set2#~#10/10/10#~#37#~#10 39 | loc55#~#6c6f63626c6f623535#~#set3#~#7/25/55#~#38#~#55 40 | loc33#~#6c6f63626c6f623333#~#set4#~#9/3/33#~#39#~#33 41 | loc39#~#6c6f63626c6f623339#~#set0#~#3/9/39#~#40#~#39 42 | loc37#~#6c6f63626c6f623337#~#set1#~#1/7/37#~#41#~#37 43 | loc92#~#6c6f63626c6f623932#~#set2#~#8/2/92#~#42#~#92 44 | loc12#~#6c6f63626c6f623132#~#set3#~#0/12/12#~#43#~#12 45 | loc68#~#6c6f63626c6f623638#~#set4#~#8/8/68#~#44#~#68 46 | loc99#~#6c6f63626c6f623939#~#set0#~#3/9/99#~#45#~#99 47 | loc98#~#6c6f63626c6f623938#~#set1#~#2/8/98#~#46#~#98 48 | loc94#~#6c6f63626c6f623934#~#set2#~#10/4/94#~#47#~#94 49 | loc41#~#6c6f63626c6f623431#~#set3#~#5/11/41#~#48#~#41 50 | loc43#~#6c6f63626c6f623433#~#set4#~#7/13/43#~#49#~#43 51 | loc5#~#6c6f63626c6f6235#~#set0#~#5/5/5#~#50#~#5 52 | loc61#~#6c6f63626c6f623631#~#set1#~#1/1/61#~#51#~#61 53 | loc71#~#6c6f63626c6f623731#~#set2#~#11/11/71#~#52#~#71 54 | loc95#~#6c6f63626c6f623935#~#set3#~#11/5/95#~#53#~#95 55 | loc81#~#6c6f63626c6f623831#~#set4#~#9/21/81#~#54#~#81 56 | loc89#~#6c6f63626c6f623839#~#set0#~#5/29/89#~#55#~#89 57 | loc8#~#6c6f63626c6f6238#~#set1#~#8/8/8#~#56#~#8 58 | loc92#~#6c6f63626c6f623932#~#set2#~#8/2/92#~#57#~#92 59 | loc8#~#6c6f63626c6f6238#~#set3#~#8/8/8#~#58#~#8 60 | loc68#~#6c6f63626c6f623638#~#set4#~#8/8/68#~#59#~#68 61 | loc60#~#6c6f63626c6f623630#~#set0#~#0/0/60#~#60#~#60 62 | loc14#~#6c6f63626c6f623134#~#set1#~#2/14/14#~#61#~#14 63 | loc36#~#6c6f63626c6f623336#~#set2#~#0/6/36#~#62#~#36 64 | loc99#~#6c6f63626c6f623939#~#set3#~#3/9/99#~#63#~#99 65 | loc36#~#6c6f63626c6f623336#~#set4#~#0/6/36#~#64#~#36 66 | loc44#~#6c6f63626c6f623434#~#set0#~#8/14/44#~#65#~#44 67 | loc90#~#6c6f63626c6f623930#~#set1#~#6/0/90#~#66#~#90 68 | loc45#~#6c6f63626c6f623435#~#set2#~#9/15/45#~#67#~#45 69 | loc99#~#6c6f63626c6f623939#~#set3#~#3/9/99#~#68#~#99 70 | loc28#~#6c6f63626c6f623238#~#set4#~#4/28/28#~#69#~#28 71 | loc22#~#6c6f63626c6f623232#~#set0#~#10/22/22#~#70#~#22 72 | loc80#~#6c6f63626c6f623830#~#set1#~#8/20/80#~#71#~#80 73 | loc68#~#6c6f63626c6f623638#~#set2#~#8/8/68#~#72#~#68 74 | loc37#~#6c6f63626c6f623337#~#set3#~#1/7/37#~#73#~#37 75 | loc51#~#6c6f63626c6f623531#~#set4#~#3/21/51#~#74#~#51 76 | loc63#~#6c6f63626c6f623633#~#set0#~#3/3/63#~#75#~#63 77 | loc63#~#6c6f63626c6f623633#~#set1#~#3/3/63#~#76#~#63 78 | loc81#~#6c6f63626c6f623831#~#set2#~#9/21/81#~#77#~#81 79 | loc79#~#6c6f63626c6f623739#~#set3#~#7/19/79#~#78#~#79 80 | loc24#~#6c6f63626c6f623234#~#set4#~#0/24/24#~#79#~#24 81 | loc33#~#6c6f63626c6f623333#~#set0#~#9/3/33#~#80#~#33 82 | loc62#~#6c6f63626c6f623632#~#set1#~#2/2/62#~#81#~#62 83 | loc8#~#6c6f63626c6f6238#~#set2#~#8/8/8#~#82#~#8 84 | loc2#~#6c6f63626c6f6232#~#set3#~#2/2/2#~#83#~#2 85 | loc88#~#6c6f63626c6f623838#~#set4#~#4/28/88#~#84#~#88 86 | loc61#~#6c6f63626c6f623631#~#set0#~#1/1/61#~#85#~#61 87 | loc79#~#6c6f63626c6f623739#~#set1#~#7/19/79#~#86#~#79 88 | loc59#~#6c6f63626c6f623539#~#set2#~#11/29/59#~#87#~#59 89 | loc48#~#6c6f63626c6f623438#~#set3#~#0/18/48#~#88#~#48 90 | loc7#~#6c6f63626c6f6237#~#set4#~#7/7/7#~#89#~#7 91 | loc37#~#6c6f63626c6f623337#~#set0#~#1/7/37#~#90#~#37 92 | loc57#~#6c6f63626c6f623537#~#set1#~#9/27/57#~#91#~#57 93 | loc7#~#6c6f63626c6f6237#~#set2#~#7/7/7#~#92#~#7 94 | loc7#~#6c6f63626c6f6237#~#set3#~#7/7/7#~#93#~#7 95 | loc36#~#6c6f63626c6f623336#~#set4#~#0/6/36#~#94#~#36 96 | loc39#~#6c6f63626c6f623339#~#set0#~#3/9/39#~#95#~#39 97 | loc23#~#6c6f63626c6f623233#~#set1#~#11/23/23#~#96#~#23 98 | loc48#~#6c6f63626c6f623438#~#set2#~#0/18/48#~#97#~#48 99 | loc32#~#6c6f63626c6f623332#~#set3#~#8/2/32#~#98#~#32 100 | loc39#~#6c6f63626c6f623339#~#set4#~#3/9/39#~#99#~#39 101 | loc13#~#6c6f63626c6f623133#~#set0#~#1/13/13#~#100#~#13 102 | -------------------------------------------------------------------------------- /src/test/resources/dataAllTypes.dsv: -------------------------------------------------------------------------------- 1 | set##key##intData##doubleData##stringData##listData##mapData##dateData##blobData 2 | set1##key1##11##1.1##string1##["a","b","e"]##{"a":"c"}##01/16/2011##6c6f63626c6f623830 3 | set2##key2##12##1.2##string2##["d","e","c"]##{"c":"b"}##02/16/2011##6c6f63626c6f623830 4 | set3##key3##13##1.3##string3##["a","b","e"]##{"a":"c"}##03/16/2011##6c6f63626c6f623830 5 | set4##key4##14##1.4##string4##["d","e","c"]##{"c":"b"}##04/16/2011##6c6f63626c6f623830 6 | set5##key5##15##1.5##string5##["a","b","e"]##{"a":"c"}##05/16/2011##6c6f63626c6f623830 7 | set6##key6##16##1.6##string6##["d","e","c"]##{"c":"b"}##06/16/2011##6c6f63626c6f623830 8 | set7##key7##17##1.7##string7##["a","b","e"]##{"a":"c"}##07/16/2011##6c6f63626c6f623830 9 | set8##key8##18##1.8##string8##["d","b","c"]##{"c":"b"}##08/16/2011##6c6f63626c6f623830 -------------------------------------------------------------------------------- /src/test/resources/dataBlob.dsv: -------------------------------------------------------------------------------- 1 | set##blobData##key 2 | set1##626c6f62446174613830##key1 3 | set2##626c6f62446174613933##key2 4 | set3##626c6f62446174613930##key3 5 | set4##626c6f62446174613436##key4 6 | set0##626c6f62446174613536##key5 7 | set1##626c6f62446174613937##key6 8 | set2##626c6f62446174613838##key7 9 | set3##626c6f62446174613831##key8 10 | set4##626c6f62446174613134##key9 11 | set0##626c6f62446174613233##key10 12 | -------------------------------------------------------------------------------- /src/test/resources/dataDate.dsv: -------------------------------------------------------------------------------- 1 | set##dateData##key 2 | set1##8/20/80##key1 3 | set2##9/3/93##key2 4 | set3##6/0/90##key3 5 | set4##10/16/46##key4 6 | set0##8/26/56##key5 7 | set1##1/7/97##key6 8 | set2##4/28/88##key7 9 | set3##9/21/81##key8 10 | set4##2/14/14##key9 11 | set0##11/23/23##key10 12 | -------------------------------------------------------------------------------- /src/test/resources/dataDynamicBin.csv: -------------------------------------------------------------------------------- 1 | col1, col2, col3, col4, col5, col6, col7, col8 2 | user_location, India, key, userid1, last_visited, 08/16/2011, set, facebook 3 | user_location, India, key, userid1, last_visited, 08/17/2011, set, Tweeter 4 | user_location, USA, key, userid2, last_visited, 08/16/2011, set, Tweeter -------------------------------------------------------------------------------- /src/test/resources/dataInt.dsv: -------------------------------------------------------------------------------- 1 | set##key##intData 2 | set1##key1##1 3 | set2##key2##2 4 | set3##key3##3 5 | set4##key4##4 6 | set0##key5##5 7 | set1##key6##6 8 | set2##key7##7 9 | set3##key8##8 10 | set4##key9##9 11 | set0##key10##10 12 | -------------------------------------------------------------------------------- /src/test/resources/dataJson.dsv: -------------------------------------------------------------------------------- 1 | set##jsonData##key 2 | set1##{"k1": "v1", "k2": ["lv1", "lv2"], "k3": {"mk1": "mv1"}}##key1 3 | set2##{"k1": "v1", "k2": ["lv1", "lv2"], "k3": {"mk1": "mv1"}}##key2 4 | set3##{"k1": "v1", "k2": ["lv1", "lv2"], "k3": {"mk1": "mv1"}}##key3 5 | set4##{"k1": "v1", "k2": ["lv1", "lv2"], "k3": {"mk1": "mv1"}}##key4 6 | set0##{"k1": "v1", "k2": ["lv1", "lv2"], "k3": {"mk1": "mv1"}}##key5 7 | set1##{"k1": "v1", "k2": ["lv1", "lv2"], "k3": {"mk1": "mv1"}}##key6 8 | set2##{"k1": "v1", "k2": ["lv1", "lv2"], "k3": {"mk1": "mv1"}}##key7 9 | set3##{"k1": "v1", "k2": ["lv1", "lv2"], "k3": {"mk1": "mv1"}}##key8 10 | set4##{"k1": "v1", "k2": ["lv1", "lv2"], "k3": {"mk1": "mv1"}}##key9 11 | set0##{"k1": "v1", "k2": ["lv1", "lv2"], "k3": {"mk1": "mv1"}}##key10 12 | -------------------------------------------------------------------------------- /src/test/resources/dataList.dsv: -------------------------------------------------------------------------------- 1 | set##listData##key 2 | set1##["a", "b", "c", ["d", "e"]]##key1 3 | set2##["a", "b", "c", ["d", "e"]]##key2 4 | set3##["a", "b", "c", ["d", "e"]]##key3 5 | set4##["a", "b", "c", ["d", "e"]]##key4 6 | set0##["a", "b", "c", ["d", "e"]]##key5 7 | set1##["a", "b", "c", ["d", "e"]]##key6 8 | set2##["a", "b", "c", ["d", "e"]]##key7 9 | set3##["a", "b", "c", ["d", "e"]]##key8 10 | set4##["a", "b", "c", ["d", "e"]]##key9 11 | set0##["a", "b", "c", ["d", "e"]]##key10 12 | -------------------------------------------------------------------------------- /src/test/resources/dataMap.dsv: -------------------------------------------------------------------------------- 1 | mapData##set##key 2 | {"a":"b", "c":{"e":"d"}, "b":"c"}##set1##key1 3 | {"a":"b", "c":{"e":"d"}, "b":"c"}##set2##key2 4 | {"a":"b", "c":{"e":"d"}, "b":"c"}##set3##key3 5 | {"a":"b", "c":{"e":"d"}, "b":"c"}##set4##key4 6 | {"a":"b", "c":{"e":"d"}, "b":"c"}##set0##key5 7 | {"a":"b", "c":{"e":"d"}, "b":"c"}##set1##key6 8 | {"a":"b", "c":{"e":"d"}, "b":"c"}##set2##key7 9 | {"a":"b", "c":{"e":"d"}, "b":"c"}##set3##key8 10 | {"a":"b", "c":{"e":"d"}, "b":"c"}##set4##key9 11 | {"a":"b", "c":{"e":"d"}, "b":"c"}##set0##key10 12 | -------------------------------------------------------------------------------- /src/test/resources/dataStaticBin.csv: -------------------------------------------------------------------------------- 1 | user_location,key,last_visited, set 2 | India, userid1, 08/16/2011, facebook 3 | India, userid2, 08/17/2011, Tweeter 4 | USA, userid3, 08/16/2011, Tweeter -------------------------------------------------------------------------------- /src/test/resources/dataString.dsv: -------------------------------------------------------------------------------- 1 | set,key,strData 2 | set1,key1,str1 3 | set2,key2,str2 4 | set3,key3,str3 5 | set4,key4,str4 6 | set0,key5,str5 7 | set1,key6,str6 8 | set2,key7,str7 9 | set3,key8,str8 10 | set4,key9,str9 -------------------------------------------------------------------------------- /src/test/resources/dataUtf8.dsv: -------------------------------------------------------------------------------- 1 | set##utf8Data##key 2 | set1##utf8Data80##key1 3 | set2##utf8Data93##key2 4 | set3##utf8Data90##key3 5 | set4##utf8Data46##key4 6 | set0##utf8Data56##key5 7 | set1##utf8Data97##key6 8 | set2##utf8Data88##key7 9 | set3##utf8Data81##key8 10 | set4##utf8Data14##key9 11 | set0##utf8Data23##key10 12 | -------------------------------------------------------------------------------- /src/test/resources/testSchema.json: -------------------------------------------------------------------------------- 1 | { 2 | "delimiter" : "##", 3 | "as_binname_suffix" : "Bin", 4 | "test_integer" : { 5 | "key" : "string", 6 | "set" : "string", 7 | "intData" : "integer" 8 | }, 9 | "test_float" : { 10 | "key" : "string", 11 | "set" : "string", 12 | "floatData" : "float" 13 | }, 14 | "test_string" : { 15 | "key" : "string", 16 | "set" : "string", 17 | "stringData" : "string" 18 | }, 19 | "test_utf8" : { 20 | "key" : "string", 21 | "set" : "string", 22 | "utf8Data" : "string" 23 | }, 24 | "test_blob" : { 25 | "key" : "string", 26 | "set" : "string", 27 | "blobData" : "blob" 28 | }, 29 | "test_date" : { 30 | "key" : "string", 31 | "set" : "string", 32 | "dateData" : "timestamp" 33 | }, 34 | "test_list" : { 35 | "key" : "string", 36 | "set" : "string", 37 | "listData" : "list" 38 | }, 39 | "test_map" : { 40 | "key" : "string", 41 | "set" : "string", 42 | "mapData" : "map" 43 | }, 44 | "test_json" : { 45 | "key" : "string", 46 | "set" : "string", 47 | "jsonData" : "json" 48 | }, 49 | "test_alltype" : { 50 | "key" : "string", 51 | "set" : "string", 52 | "intData" : "integer", 53 | "floatData" : "float", 54 | "stringData" : "string", 55 | "blobData" : "blob", 56 | "dateData" : "timestamp", 57 | "listdata" : "list", 58 | "mapData" : "list" 59 | } 60 | } --------------------------------------------------------------------------------