├── .gitignore ├── src ├── main │ ├── resources │ │ ├── comments.csv │ │ ├── comment_split_1.json │ │ ├── config-jdbc.json │ │ ├── post.json │ │ ├── config.json │ │ ├── comment_split_2.json │ │ ├── comment.json │ │ └── config-dbpedia.json │ └── java │ │ └── com │ │ └── orientechnologies │ │ └── orient │ │ └── etl │ │ ├── OETLPipelineComponent.java │ │ ├── source │ │ ├── OAbstractSource.java │ │ ├── OSource.java │ │ ├── OSourceException.java │ │ ├── OInputSource.java │ │ ├── OContentSource.java │ │ ├── OHttpSource.java │ │ └── OFileSource.java │ │ ├── loader │ │ ├── OLoaderException.java │ │ ├── OLoader.java │ │ ├── OAbstractLoader.java │ │ └── OOutputLoader.java │ │ ├── transformer │ │ ├── OTransformer.java │ │ ├── OTransformException.java │ │ ├── OJSONTransformer.java │ │ ├── OAbstractTransformer.java │ │ ├── OFlowTransformer.java │ │ ├── OBlockTransformer.java │ │ ├── OLogTransformer.java │ │ ├── OCommandTransformer.java │ │ ├── OCodeTransformer.java │ │ ├── OVertexTransformer.java │ │ ├── OMergeTransformer.java │ │ ├── OAbstractLookupTransformer.java │ │ ├── OFieldTransformer.java │ │ ├── OLinkTransformer.java │ │ ├── OEdgeTransformer.java │ │ └── OCSVTransformer.java │ │ ├── block │ │ ├── OBlock.java │ │ ├── OAbstractBlock.java │ │ ├── OLetBlock.java │ │ ├── OCodeBlock.java │ │ └── OConsoleBlock.java │ │ ├── OETLProcessHaltedException.java │ │ ├── extractor │ │ ├── OExtractorException.java │ │ ├── OExtractor.java │ │ ├── OAbstractSourceExtractor.java │ │ ├── OAbstractExtractor.java │ │ ├── OJsonExtractor.java │ │ ├── ORowExtractor.java │ │ └── OJDBCExtractor.java │ │ ├── OAbstractETLPipelineComponent.java │ │ ├── OExtractedItem.java │ │ ├── OETLComponent.java │ │ ├── listener │ │ ├── OImporterListener.java │ │ ├── ODefaultImporterListener.java │ │ └── OScriptImporterListener.java │ │ ├── OETLPipeline.java │ │ ├── OAbstractETLComponent.java │ │ └── OETLComponentFactory.java └── test │ └── java │ └── com │ └── orientechnologies │ └── orient │ └── etl │ ├── OETLProcessorTest.java │ ├── loader │ └── OOrientDBLoaderTest.java │ ├── TestLoader.java │ ├── transformer │ ├── OLogTransformerTest.java │ ├── OFlowTransformerTest.java │ ├── OFieldTransformerTest.java │ ├── OVertexTransformerTest.java │ └── OEdgeTransformerTest.java │ ├── ETLBaseTest.java │ ├── RandomExtractor.java │ └── extractor │ ├── OJsonRandomExtractorTest.java │ └── OJsonExtractorTest.java ├── .travis.yml ├── script ├── oetl.sh └── oetl.bat ├── CONTRIBUTING.md ├── README.md └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | pom.xml.tag 3 | pom.xml.releaseBackup 4 | pom.xml.next 5 | release.properties 6 | 7 | *.iml 8 | .idea 9 | -------------------------------------------------------------------------------- /src/main/resources/comments.csv: -------------------------------------------------------------------------------- 1 | id;comment 2 | 1;text of comment 1 3 | 2;text of comment 2 4 | 3;text of comment 3 5 | 4;text of comment 4 -------------------------------------------------------------------------------- /src/main/resources/comment_split_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "source": { 3 | "file": { 4 | "path": "./src/main/resources/comments.csv" 5 | } 6 | } 7 | 8 | } 9 | 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: java 3 | branches: 4 | only: 5 | - develop 6 | #jdk: 7 | # - oraclejdk8 8 | before_install: 9 | - sed -i.bak -e 's|https://nexus.codehaus.org/snapshots/|https://oss.sonatype.org/content/repositories/codehaus-snapshots/|g' ~/.m2/settings.xml 10 | -------------------------------------------------------------------------------- /src/main/resources/config-jdbc.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "verbose": true 4 | }, 5 | "extractor": { 6 | "jdbc": { 7 | "driver": "com.mysql.jdbc.Driver", 8 | "url": "jdbc:mysql://localhost/test", 9 | "userName": "root", 10 | "userPassword": "", 11 | "query": "select * from Client" 12 | } 13 | }, 14 | "transformers": [ 15 | { 16 | "vertex": { 17 | "class": "Client" 18 | } 19 | } 20 | ], 21 | "loader": { 22 | "orientdb": { 23 | "dbURL": "plocal:/temp/databases/jdbctest", 24 | "dbAutoCreate": true 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/resources/post.json: -------------------------------------------------------------------------------- 1 | { 2 | "source": { "file": { "path": "/temp/datasets/posts.csv" } }, 3 | "extractor": { "row": {} }, 4 | "transformers": [ 5 | { "csv": {} }, 6 | { "vertex": { "class": "Post" } } 7 | ], 8 | "loader": { 9 | "orientdb": { 10 | "dbURL": "plocal:/temp/databases/blog", 11 | "dbType": "graph", 12 | "classes": [ 13 | {"name": "Post", "extends": "V"}, 14 | {"name": "Comment", "extends": "V"}, 15 | {"name": "HasComments", "extends": "E"} 16 | ], "indexes": [ 17 | {"class":"Post", "fields":["id:integer"], "type":"UNIQUE" } 18 | ] 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/main/resources/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "extractor": { 3 | "line": { 4 | "path": "/temp/test.csv", 5 | "lock": true 6 | } 7 | }, 8 | "transformers": [ 9 | { 10 | "csv": { 11 | "separator": ";", 12 | "columnsOnFirstLine": false, 13 | "columns": [ 14 | "id", 15 | "beginDate", 16 | "aams", 17 | "wow", 18 | "serial", 19 | "dontknow", 20 | "endDate" 21 | ] 22 | } 23 | }, 24 | { 25 | "field": { 26 | "fieldName": "dontknow", 27 | "expression": "dontknow.trim()" 28 | } 29 | }, 30 | { 31 | "field": { 32 | "fieldName": "time", 33 | "expression": "sysdate()" 34 | } 35 | }, 36 | { 37 | "skip": { 38 | "expression": "dontknow is null" 39 | } 40 | } 41 | ], 42 | "loader": { 43 | "orientdb_doc": { 44 | "class": "Client" 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/OETLPipelineComponent.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl; 20 | 21 | /** 22 | * ETL basic component. 23 | */ 24 | public interface OETLPipelineComponent extends OETLComponent { 25 | void setPipeline(OETLPipeline iPipeline); 26 | } 27 | -------------------------------------------------------------------------------- /src/main/resources/comment_split_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "extractor": { 3 | "row": {} 4 | }, 5 | "transformers": [ 6 | { 7 | "csv": {} 8 | }, 9 | { 10 | "vertex": { 11 | "class": "Comment" 12 | } 13 | }, 14 | { 15 | "edge": { 16 | "class": "HasComments", 17 | "joinFieldName": "postId", 18 | "lookup": "Post.id" 19 | } 20 | } 21 | ], 22 | "loader": { 23 | "orientdb": { 24 | "dbURL": "plocal:./target/databases/blog", 25 | "dbType": "graph", 26 | "classes": [ 27 | { 28 | "name": "Post", 29 | "extends": "V" 30 | }, 31 | { 32 | "name": "Comment", 33 | "extends": "V" 34 | }, 35 | { 36 | "name": "HasComments", 37 | "extends": "E" 38 | } 39 | ], 40 | "indexes": [ 41 | { 42 | "class": "Post", 43 | "fields": [ 44 | "id:integer" 45 | ], 46 | "type": "UNIQUE" 47 | } 48 | ] 49 | } 50 | } 51 | } 52 | 53 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/source/OAbstractSource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.source; 20 | 21 | import com.orientechnologies.orient.etl.OAbstractETLComponent; 22 | 23 | /** 24 | * ETL Abstract Source component. 25 | */ 26 | public abstract class OAbstractSource extends OAbstractETLComponent implements OSource { 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/loader/OLoaderException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.loader; 20 | 21 | public class OLoaderException extends RuntimeException { 22 | 23 | public OLoaderException(final Exception e) { 24 | super(e); 25 | } 26 | 27 | public OLoaderException(String s) { 28 | super(s); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/transformer/OTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.orient.etl.OETLPipelineComponent; 22 | 23 | /** 24 | * ETL Transformer. 25 | */ 26 | public interface OTransformer extends OETLPipelineComponent { 27 | Object transform(final Object input); 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/source/OSource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.source; 20 | 21 | import com.orientechnologies.orient.etl.OETLComponent; 22 | 23 | import java.io.Reader; 24 | 25 | /** 26 | * ETL Source interface. 27 | */ 28 | public interface OSource extends OETLComponent { 29 | String getUnit(); 30 | 31 | Reader read(); 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/transformer/OTransformException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | public class OTransformException extends RuntimeException { 22 | 23 | public OTransformException(final Exception e) { 24 | super(e); 25 | } 26 | 27 | public OTransformException(String s) { 28 | super(s); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/block/OBlock.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.block; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.etl.OETLComponent; 23 | 24 | /** 25 | * ETL Generic Block. 26 | */ 27 | public interface OBlock extends OETLComponent { 28 | Object execute(); 29 | 30 | void setContext(OCommandContext context); 31 | } 32 | -------------------------------------------------------------------------------- /src/main/resources/comment.json: -------------------------------------------------------------------------------- 1 | { 2 | "source": { 3 | "file": { 4 | "path": "./src/main/resources/comments.csv" 5 | } 6 | }, 7 | "extractor": { 8 | "row": {} 9 | }, 10 | "transformers": [ 11 | { 12 | "csv": {} 13 | }, 14 | { 15 | "vertex": { 16 | "class": "Comment" 17 | } 18 | }, 19 | { 20 | "edge": { 21 | "class": "HasComments", 22 | "joinFieldName": "postId", 23 | "lookup": "Post.id" 24 | } 25 | } 26 | ], 27 | "loader": { 28 | "orientdb": { 29 | "dbURL": "plocal:./target/databases/blog", 30 | "dbType": "graph", 31 | "classes": [ 32 | { 33 | "name": "Post", 34 | "extends": "V" 35 | }, 36 | { 37 | "name": "Comment", 38 | "extends": "V" 39 | }, 40 | { 41 | "name": "HasComments", 42 | "extends": "E" 43 | } 44 | ], 45 | "indexes": [ 46 | { 47 | "class": "Post", 48 | "fields": [ 49 | "id:integer" 50 | ], 51 | "type": "UNIQUE" 52 | } 53 | ] 54 | } 55 | } 56 | } 57 | 58 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/OETLProcessHaltedException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl; 20 | 21 | import com.orientechnologies.common.exception.OException; 22 | 23 | public class OETLProcessHaltedException extends OException { 24 | public OETLProcessHaltedException(Throwable iNested) { 25 | super(iNested); 26 | } 27 | 28 | public OETLProcessHaltedException(final String s) { 29 | super(s); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/source/OSourceException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.source; 20 | 21 | public class OSourceException extends RuntimeException { 22 | 23 | public OSourceException(final Exception e) { 24 | super(e); 25 | } 26 | 27 | public OSourceException(String s) { 28 | super(s); 29 | } 30 | 31 | public OSourceException(String s, Exception e) { 32 | super(s, e); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/extractor/OExtractorException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.extractor; 20 | 21 | public class OExtractorException extends RuntimeException { 22 | 23 | public OExtractorException(final Exception e) { 24 | super(e); 25 | } 26 | 27 | public OExtractorException(String s) { 28 | super(s); 29 | } 30 | 31 | public OExtractorException(String s, Exception e) { 32 | super(s, e); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/OAbstractETLPipelineComponent.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl; 20 | 21 | /** 22 | * ETL Pipeline abstract component. 23 | */ 24 | public abstract class OAbstractETLPipelineComponent extends OAbstractETLComponent implements OETLPipelineComponent { 25 | protected OETLPipeline pipeline; 26 | 27 | @Override 28 | public void setPipeline(final OETLPipeline iPipeline) { 29 | pipeline = iPipeline; 30 | context = iPipeline.getContext(); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/OExtractedItem.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * * 17 | * * For more information: http://www.orientechnologies.com 18 | * 19 | */ 20 | 21 | package com.orientechnologies.orient.etl; 22 | 23 | /** 24 | * Immutable Object representing extracted item. 25 | */ 26 | public class OExtractedItem { 27 | public final long num; 28 | public final Object payload; 29 | 30 | public OExtractedItem(final long iCurrent, final Object iPayload) { 31 | num = iCurrent; 32 | payload = iPayload; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/loader/OLoader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.loader; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.etl.OETLPipelineComponent; 23 | 24 | /** 25 | * ETL Loader. 26 | */ 27 | public interface OLoader extends OETLPipelineComponent { 28 | void load(final Object input, OCommandContext context); 29 | 30 | long getProgress(); 31 | 32 | String getUnit(); 33 | 34 | void rollback(); 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/OETLComponent.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.record.impl.ODocument; 23 | 24 | /** 25 | * ETL basic component. 26 | */ 27 | public interface OETLComponent { 28 | ODocument getConfiguration(); 29 | 30 | void configure(OETLProcessor iProcessor, ODocument iConfiguration, OCommandContext iSettings); 31 | 32 | void begin(); 33 | 34 | void end(); 35 | 36 | String getName(); 37 | } 38 | -------------------------------------------------------------------------------- /src/test/java/com/orientechnologies/orient/etl/OETLProcessorTest.java: -------------------------------------------------------------------------------- 1 | package com.orientechnologies.orient.etl; 2 | 3 | import com.orientechnologies.orient.etl.transformer.OCSVTransformer; 4 | import org.junit.Test; 5 | 6 | import static org.assertj.core.api.Assertions.assertThat; 7 | 8 | 9 | /** 10 | * Created by frank on 9/18/15. 11 | */ 12 | public class OETLProcessorTest { 13 | 14 | @Test 15 | public void testMain() throws Exception { 16 | 17 | final OETLProcessor processor = OETLProcessor.parseConfigAndParameters(new String[] { "-dburl=local:/tmp/db", 18 | "./src/main/resources/comment.json" }); 19 | 20 | assertThat(processor.getContext().getVariable("dburl")).isEqualTo("local:/tmp/db"); 21 | 22 | } 23 | 24 | @Test 25 | public void shouldParseSplittedConfiguration() throws Exception { 26 | 27 | final OETLProcessor processor = OETLProcessor.parseConfigAndParameters(new String[] { "-dburl=local:/tmp/db", 28 | "./src/main/resources/comment_split_1.json", "./src/main/resources/comment_split_2.json" }); 29 | 30 | assertThat(processor.getContext().getVariable("dburl")).isEqualTo("local:/tmp/db"); 31 | assertThat(processor.getTransformers().get(0)).isInstanceOf(OCSVTransformer.class); 32 | assertThat(processor.getExtractor().getName()).isEqualTo("row"); 33 | } 34 | 35 | } -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/extractor/OExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.extractor; 20 | 21 | import java.io.Reader; 22 | import java.util.Iterator; 23 | 24 | import com.orientechnologies.orient.etl.OETLComponent; 25 | import com.orientechnologies.orient.etl.OExtractedItem; 26 | 27 | /** 28 | * ETL Extractor. 29 | */ 30 | public interface OExtractor extends OETLComponent, Iterator { 31 | void extract(final Reader iReader); 32 | 33 | long getProgress(); 34 | 35 | long getTotal(); 36 | 37 | String getUnit(); 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/loader/OAbstractLoader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.loader; 20 | 21 | import com.orientechnologies.orient.etl.OAbstractETLPipelineComponent; 22 | 23 | import java.util.concurrent.atomic.AtomicLong; 24 | 25 | /** 26 | * ETL Abstract Loader component. 27 | */ 28 | public abstract class OAbstractLoader extends OAbstractETLPipelineComponent implements OLoader { 29 | protected AtomicLong progress = new AtomicLong(0); 30 | 31 | @Override 32 | public long getProgress() { 33 | return progress.get(); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/block/OAbstractBlock.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.block; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.etl.OAbstractETLComponent; 23 | 24 | /** 25 | * Abstract Block. 26 | */ 27 | public abstract class OAbstractBlock extends OAbstractETLComponent implements OBlock { 28 | @Override 29 | public Object execute() { 30 | if (!skip(null)) 31 | return executeBlock(); 32 | return null; 33 | } 34 | 35 | @Override 36 | public void setContext(final OCommandContext iContext) { 37 | context = iContext; 38 | } 39 | 40 | protected abstract Object executeBlock(); 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/transformer/OJSONTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.orient.core.record.impl.ODocument; 22 | 23 | public class OJSONTransformer extends OAbstractTransformer { 24 | @Override 25 | public String getName() { 26 | return "json"; 27 | } 28 | 29 | @Override 30 | public Object executeTransform(final Object input) { 31 | if (input instanceof ODocument) 32 | return input; 33 | else if (input instanceof String) 34 | return new ODocument((String) input); 35 | else 36 | throw new OTransformException(getName() + ": unknown input '" + input + "' of class '" + input.getClass() + "'"); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /script/oetl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # Copyright (c) 2014 Luca Garulli 4 | # 5 | 6 | #set current working directory 7 | cd `dirname $0` 8 | 9 | # resolve links - $0 may be a softlink 10 | PRG="$0" 11 | 12 | while [ -h "$PRG" ]; do 13 | ls=`ls -ld "$PRG"` 14 | link=`expr "$ls" : '.*-> \(.*\)$'` 15 | if expr "$link" : '/.*' > /dev/null; then 16 | PRG="$link" 17 | else 18 | PRG=`dirname "$PRG"`/"$link" 19 | fi 20 | done 21 | 22 | # Get standard environment variables 23 | PRGDIR=`dirname "$PRG"` 24 | 25 | # Only set ORIENTDB_HOME if not already set 26 | [ -f "$ORIENTDB_HOME"/lib/orientdb-etl-@VERSION@.jar ] || ORIENTDB_HOME=`cd "$PRGDIR/.." ; pwd` 27 | export ORIENTDB_HOME 28 | 29 | # Set JavaHome if it exists 30 | if [ -f "${JAVA_HOME}/bin/java" ]; then 31 | JAVA=${JAVA_HOME}/bin/java 32 | else 33 | JAVA=java 34 | fi 35 | export JAVA 36 | 37 | ORIENTDB_SETTINGS="-Djava.util.logging.config.file="$ORIENTDB_HOME/config/orientdb-client-log.properties" -Djava.awt.headless=true" 38 | JAVA_OPTS=-Xmx512m 39 | KEYSTORE=$ORIENTDB_HOME/config/cert/orientdb-console.ks 40 | KEYSTORE_PASS=password 41 | TRUSTSTORE=$ORIENTDB_HOME/config/cert/orientdb-console.ts 42 | TRUSTSTORE_PASS=password 43 | SSL_OPTS="-Dclient.ssl.enabled=false -Djavax.net.ssl.keyStore=$KEYSTORE -Djavax.net.ssl.keyStorePassword=$KEYSTORE_PASS -Djavax.net.ssl.trustStore=$TRUSTSTORE -Djavax.net.ssl.trustStorePassword=$TRUSTSTORE_PASS" 44 | 45 | $JAVA -server $JAVA_OPTS $ORIENTDB_SETTINGS $SSL_OPTS -Dfile.encoding=utf-8 -Dorientdb.build.number="@BUILD@" -cp "$ORIENTDB_HOME/lib/*" com.orientechnologies.orient.etl.OETLProcessor $* 46 | -------------------------------------------------------------------------------- /script/oetl.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | rem 3 | rem Copyright (c) 2014 Luca Garulli @www.orientechnologies.com 4 | rem 5 | rem Guess ORIENTDB_HOME if not defined 6 | set CURRENT_DIR=%cd% 7 | 8 | if exist "%JAVA_HOME%\bin\java.exe" goto setJavaHome 9 | set JAVA="java" 10 | goto okJava 11 | 12 | :setJavaHome 13 | set JAVA="%JAVA_HOME%\bin\java" 14 | 15 | :okJava 16 | if not "%ORIENTDB_HOME%" == "" goto gotHome 17 | set ORIENTDB_HOME=%CURRENT_DIR% 18 | if exist "%ORIENTDB_HOME%\bin\oetl.bat" goto okHome 19 | cd .. 20 | set ORIENTDB_HOME=%cd% 21 | cd %CURRENT_DIR% 22 | 23 | :gotHome 24 | if exist "%ORIENTDB_HOME%\bin\oetl.bat" goto okHome 25 | echo The ORIENTDB_HOME environment variable is not defined correctly 26 | echo This environment variable is needed to run this program 27 | goto end 28 | 29 | :okHome 30 | rem Get the command line arguments and save them in the 31 | set CMD_LINE_ARGS=%* 32 | 33 | set KEYSTORE=%ORIENTDB_HOME%\config\cert\orientdb-console.ks 34 | set KEYSTORE_PASS=password 35 | set TRUSTSTORE=%ORIENTDB_HOME%\config\cert\orientdb-console.ts 36 | set TRUSTSTORE_PASS=password 37 | set SSL_OPTS="-Dclient.ssl.enabled=false -Djavax.net.ssl.keyStore=%KEYSTORE% -Djavax.net.ssl.keyStorePassword=%KEYSTORE_PASS% -Djavax.net.ssl.trustStore=%TRUSTSTORE% -Djavax.net.ssl.trustStorePassword=%TRUSTSTORE_PASS%" 38 | 39 | set ORIENTDB_SETTINGS=-Xmx512m -Djava.util.logging.config.file="%ORIENTDB_HOME%\config\orientdb-client-log.properties" -Djava.awt.headless=true 40 | call %JAVA% -server %SSL_OPTS% %ORIENTDB_SETTINGS% -Dfile.encoding=utf-8 -Dorientdb.build.number="@BUILD@" -cp "%ORIENTDB_HOME%\lib\*;" com.orientechnologies.orient.etl.OETLProcessor %CMD_LINE_ARGS% 41 | 42 | :end 43 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/source/OInputSource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.source; 20 | 21 | import com.orientechnologies.orient.core.record.impl.ODocument; 22 | 23 | import java.io.BufferedReader; 24 | import java.io.InputStreamReader; 25 | import java.io.Reader; 26 | 27 | /** 28 | * ETL Source that reads from System.in 29 | */ 30 | public class OInputSource extends OAbstractSource { 31 | protected final BufferedReader reader = new BufferedReader(new InputStreamReader(System.in)); 32 | 33 | @Override 34 | public ODocument getConfiguration() { 35 | return new ODocument().fromJSON("{}"); 36 | } 37 | 38 | @Override 39 | public String getUnit() { 40 | return "bytes"; 41 | } 42 | 43 | @Override 44 | public String getName() { 45 | return "input"; 46 | } 47 | 48 | @Override 49 | public Reader read() { 50 | return reader; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/loader/OOutputLoader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.loader; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.record.impl.ODocument; 23 | 24 | /** 25 | * ETL Loader that saves record into OrientDB database. 26 | */ 27 | public class OOutputLoader extends OAbstractLoader { 28 | @Override 29 | public void load(final Object input, final OCommandContext context) { 30 | progress.incrementAndGet(); 31 | System.out.println(input); 32 | } 33 | 34 | @Override 35 | public String getUnit() { 36 | return "bytes"; 37 | } 38 | 39 | @Override 40 | public void rollback() { 41 | } 42 | 43 | @Override 44 | public ODocument getConfiguration() { 45 | return new ODocument(); 46 | } 47 | 48 | @Override 49 | public String getName() { 50 | return "output"; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/extractor/OAbstractSourceExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.extractor; 20 | 21 | import java.io.IOException; 22 | import java.io.Reader; 23 | 24 | /** 25 | * ETL abstract extractor. 26 | */ 27 | public abstract class OAbstractSourceExtractor extends OAbstractExtractor { 28 | protected Reader reader; 29 | 30 | @Override 31 | public void extract(final Reader iReader) { 32 | reader = iReader; 33 | } 34 | 35 | @Override 36 | public boolean hasNext() { 37 | if (reader == null) 38 | return false; 39 | 40 | try { 41 | return reader.ready(); 42 | } catch (IOException e) { 43 | throw new OExtractorException(e); 44 | } 45 | } 46 | 47 | @Override 48 | public void end() { 49 | if (reader != null) 50 | try { 51 | reader.close(); 52 | } catch (IOException e) { 53 | } 54 | 55 | super.end(); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/extractor/OAbstractExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.extractor; 20 | 21 | import com.orientechnologies.orient.core.record.impl.ODocument; 22 | import com.orientechnologies.orient.etl.OAbstractETLComponent; 23 | 24 | /** 25 | * ETL abstract extractor. 26 | */ 27 | public abstract class OAbstractExtractor extends OAbstractETLComponent implements OExtractor { 28 | protected long current = 0; 29 | protected long total = -1; 30 | 31 | @Override 32 | public long getProgress() { 33 | return current; 34 | } 35 | 36 | @Override 37 | public long getTotal() { 38 | return total; 39 | } 40 | 41 | @Override 42 | public void remove() { 43 | throw new UnsupportedOperationException("remove()"); 44 | } 45 | 46 | @Override 47 | public ODocument getConfiguration() { 48 | return new ODocument().fromJSON("{parameters:[],output:'String'}"); 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /src/main/resources/config-dbpedia.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "log": "info", 4 | "fileDirectory": "/temp/datasets/dbpedia_csv/", 5 | "fileName": "Person.csv.gz", 6 | "parallel": false 7 | }, 8 | "begin": [ 9 | { 10 | "let": { 11 | "name": "$filePath", 12 | "expression": "$fileDirectory.append( $fileName )" 13 | } 14 | }, 15 | { 16 | let: { 17 | "name": "$className", 18 | "expression": "$fileName.substring( 0, $fileName.indexOf('.') )" 19 | } 20 | } 21 | ], 22 | "source": { 23 | "file": { 24 | "path": "$filePath", 25 | "lock": true 26 | } 27 | }, 28 | "extractor": { 29 | "row": {} 30 | }, 31 | "transformers": [ 32 | { 33 | "csv": { 34 | "separator": ",", 35 | "nullValue": "NULL", 36 | "skipFrom": 1, 37 | "skipTo": 3 38 | } 39 | }, 40 | { 41 | "merge": { 42 | "joinFieldName": "URI", 43 | "lookup": "V.URI" 44 | } 45 | }, 46 | { 47 | "vertex": { 48 | "class": "$className" 49 | } 50 | } 51 | ], 52 | "loader": { 53 | "orientdb": { 54 | "dbURL": "plocal:/temp/databases/dbpedia", 55 | "dbUser": "admin", 56 | "dbPassword": "admin", 57 | "dbAutoCreateProperties": false, 58 | "dbAutoDropIfExists": true, 59 | "dbAutoCreate": true, 60 | "tx": false, 61 | "wal": false, 62 | "batchCommit": 1000, 63 | "dbType": "graph", 64 | "indexes": [ 65 | { 66 | "class": "V", 67 | "fields": [ 68 | "URI:string" 69 | ], 70 | "type": "UNIQUE" 71 | } 72 | ] 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/listener/OImporterListener.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.listener; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.db.document.ODatabaseDocumentTx; 23 | import com.orientechnologies.orient.core.index.OIndex; 24 | import com.orientechnologies.orient.core.record.impl.ODocument; 25 | 26 | public interface OImporterListener { 27 | void onBeforeFile(ODatabaseDocumentTx db, OCommandContext iContext); 28 | 29 | void onAfterFile(ODatabaseDocumentTx db, OCommandContext iContext); 30 | 31 | boolean onBeforeLine(ODatabaseDocumentTx db, OCommandContext iContext); 32 | 33 | void onAfterLine(ODatabaseDocumentTx db, OCommandContext iContext); 34 | 35 | void onDump(ODatabaseDocumentTx db, OCommandContext iContext); 36 | 37 | void onJoinNotFound(ODatabaseDocumentTx db, OCommandContext iContext, final OIndex iIndex, final Object iKey); 38 | 39 | void validate(ODatabaseDocumentTx db, OCommandContext iContext, ODocument iRecord); 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/com/orientechnologies/orient/etl/loader/OOrientDBLoaderTest.java: -------------------------------------------------------------------------------- 1 | package com.orientechnologies.orient.etl.loader; 2 | 3 | import com.orientechnologies.orient.core.index.OIndexManagerProxy; 4 | import com.orientechnologies.orient.core.record.impl.ODocument; 5 | import com.orientechnologies.orient.etl.ETLBaseTest; 6 | import org.junit.Test; 7 | 8 | import static org.assertj.core.api.Assertions.assertThat; 9 | 10 | 11 | /** 12 | * Created by frank on 9/14/15. 13 | */ 14 | public class OOrientDBLoaderTest extends ETLBaseTest { 15 | 16 | @Test 17 | public void testAddMetadataToIndex() { 18 | 19 | process("{source: { content: { value: 'name,surname\nJay,Miner' } }, extractor : { row: {} }, transformers: [{ csv: {} }], loader: { orientdb: {\n" 20 | + " dbURL: \"memory:ETLBaseTest\",\n" 21 | + " dbUser: \"admin\",\n" 22 | + " dbPassword: \"admin\",\n" 23 | + " dbAutoCreate: true,\n" 24 | + " tx: false,\n" 25 | + " batchCommit: 1000,\n" 26 | + " wal : false,\n" 27 | + " dbType: \"graph\",\n" 28 | + " classes: [\n" 29 | + " {name:\"Person\", extends: \"V\" },\n" 30 | + " ],\n" 31 | + " indexes: [{class:\"V\" , fields:[\"surname:String\"], \"type\":\"NOTUNIQUE\", \"metadata\": { \"ignoreNullValues\" : \"false\"}} ] } } }"); 32 | 33 | final OIndexManagerProxy indexManager = graph.getRawGraph().getMetadata().getIndexManager(); 34 | 35 | assertThat(indexManager.existsIndex("V.surname")).isTrue(); 36 | 37 | final ODocument indexMetadata = indexManager.getIndex("V.surname").getMetadata(); 38 | assertThat(indexMetadata.containsField("ignoreNullValues")).isTrue(); 39 | assertThat(indexMetadata.field("ignoreNullValues")).isEqualTo("false"); 40 | 41 | } 42 | 43 | } -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/transformer/OAbstractTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.orient.etl.OAbstractETLPipelineComponent; 22 | import com.orientechnologies.orient.etl.OETLProcessor; 23 | 24 | /** 25 | * Abstract Transformer. 26 | */ 27 | public abstract class OAbstractTransformer extends OAbstractETLPipelineComponent implements OTransformer { 28 | @Override 29 | public Object transform(final Object input) { 30 | log(OETLProcessor.LOG_LEVELS.DEBUG, "Transformer input: %s", input); 31 | 32 | if (input == null) 33 | return null; 34 | 35 | if (!skip(input)) { 36 | context.setVariable("input", input); 37 | final Object result = executeTransform(input); 38 | if (output == null) { 39 | log(OETLProcessor.LOG_LEVELS.DEBUG, "Transformer output: %s", result); 40 | return result; 41 | } 42 | context.setVariable(output, result); 43 | } 44 | log(OETLProcessor.LOG_LEVELS.DEBUG, "Transformer output (same as input): %s", input); 45 | return input; 46 | } 47 | 48 | protected abstract Object executeTransform(final Object input); 49 | } 50 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contribute to OrientDB 2 | 3 | In order to contribute issues and pull requests, please sign OrientDB's [Contributor License Agreement](https://www.clahub.com/agreements/orientechnologies/orientdb). The purpose of this agreement is to protect users of this codebase by ensuring that all code is free to use under the stipulations of the [Apache2 license](http://www.apache.org/licenses/LICENSE-2.0.html). 4 | 5 | ## Pushing into main repository 6 | If you'd like to contribute to OrientDB with a patch follow the following steps: 7 | * fork the repository interested in your change. The main one is https://github.com/orientechnologies/orientdb, but plugins, drivers and other components reside in other projects under [Orient Technologies](https://github.com/orientechnologies/) umbrella. 8 | * select the "develop" branch i present 9 | * apply your changes, 10 | * test that Test Suite hasn't been broken by running: 11 | * `mvn clean test` 12 | * if all the tests pass, then do a **Pull Request** (PR) against **"develop"** branch on GitHub repository and write a comment about the change. Please don't send PR to "master" because we use that branch only for releasing 13 | * if you want the fix is backported to a previous version, please write it in your comments and if the OrientDB team agree they will do that as soon as the PR is merged 14 | 15 | ## Documentation 16 | 17 | If you want to contribute to the OrientDB documentation, the right repository is: https://github.com/orientechnologies/orientdb-docs. Every 24-48 hours all the contributions are reviewed and published on the public [documentation](http://orientdb.com/docs/last/). 18 | 19 | ## Code formatting 20 | You can find eclipse java formatter config file here: [_base/ide/eclipse-formatter.xml](https://github.com/orientechnologies/orientdb/blob/master/_base/ide/eclipse-formatter.xml). 21 | 22 | If you use IntelliJ IDEA you can install [this](http://plugins.jetbrains.com/plugin/?id=6546) plugin and use formatter profile mentioned above. 23 | 24 | -------------------------------------------------------------------------------- /src/test/java/com/orientechnologies/orient/etl/TestLoader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * * 17 | * * For more information: http://www.orientechnologies.com 18 | * 19 | */ 20 | 21 | package com.orientechnologies.orient.etl; 22 | 23 | import com.orientechnologies.orient.core.command.OCommandContext; 24 | import com.orientechnologies.orient.core.record.impl.ODocument; 25 | import com.orientechnologies.orient.etl.loader.OAbstractLoader; 26 | 27 | import java.util.ArrayList; 28 | import java.util.List; 29 | 30 | /** 31 | * ETL Mock loader to check the result in tests. 32 | * 33 | * @author Luca Garulli on 27/11/14. 34 | */ 35 | public class TestLoader extends OAbstractLoader { 36 | public final List loadedRecords = new ArrayList(); 37 | 38 | public TestLoader() { 39 | } 40 | 41 | @Override 42 | public void load(Object input, OCommandContext context) { 43 | synchronized (loadedRecords) { 44 | loadedRecords.add((ODocument) input); 45 | } 46 | } 47 | 48 | @Override 49 | public long getProgress() { 50 | return loadedRecords.size(); 51 | } 52 | 53 | @Override 54 | public String getUnit() { 55 | return "document"; 56 | } 57 | 58 | @Override 59 | public void rollback() { 60 | } 61 | 62 | @Override 63 | public String getName() { 64 | return "test"; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/test/java/com/orientechnologies/orient/etl/transformer/OLogTransformerTest.java: -------------------------------------------------------------------------------- 1 | package com.orientechnologies.orient.etl.transformer; 2 | 3 | import com.orientechnologies.orient.core.record.impl.ODocument; 4 | import com.orientechnologies.orient.etl.ETLBaseTest; 5 | import org.junit.Test; 6 | 7 | import java.io.ByteArrayOutputStream; 8 | import java.io.PrintStream; 9 | import java.util.List; 10 | 11 | import static org.junit.Assert.assertEquals; 12 | 13 | public class OLogTransformerTest extends ETLBaseTest { 14 | 15 | private final PrintStream OUT = System.out; 16 | 17 | @Test 18 | public void testPrefix() throws Exception { 19 | ByteArrayOutputStream output = getByteArrayOutputStream(); 20 | String cfgJson = "{source: { content: { value: 'id,text\n1,Hello\n2,Bye'} }, extractor : { row : {} }, transformers : [{ csv : {} },{ log : {prefix:'-> '}}], loader : { test: {} } }"; 21 | process(cfgJson); 22 | List res = getResult(); 23 | ODocument doc = res.get(0); 24 | String[] stringList = output.toString().split("\n"); 25 | assertEquals("[1:log] INFO -> {id:1,text:Hello}", stringList[1]); 26 | assertEquals("[2:log] INFO -> {id:2,text:Bye}", stringList[2]); 27 | } 28 | 29 | @Test 30 | public void testPostfix() throws Exception { 31 | ByteArrayOutputStream output = getByteArrayOutputStream(); 32 | String cfgJson = "{source: { content: { value: 'id,text\n1,Hello\n2,Bye'} }, extractor : { row : {} }, transformers : [{ csv : {} },{ log : {postfix:'-> '}}], loader : { test: {} } }"; 33 | process(cfgJson); 34 | List res = getResult(); 35 | ODocument doc = res.get(0); 36 | String[] stringList = output.toString().split("\n"); 37 | 38 | assertEquals("[1:log] INFO {id:1,text:Hello}-> ", stringList[1]); 39 | assertEquals("[2:log] INFO {id:2,text:Bye}-> ", stringList[2]); 40 | } 41 | 42 | private ByteArrayOutputStream getByteArrayOutputStream() { 43 | ByteArrayOutputStream output = new ByteArrayOutputStream(); 44 | System.setOut(new PrintStream(output, true)); 45 | return output; 46 | } 47 | 48 | } -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/listener/ODefaultImporterListener.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.listener; 20 | 21 | import com.orientechnologies.common.log.OLogManager; 22 | import com.orientechnologies.orient.core.command.OCommandContext; 23 | import com.orientechnologies.orient.core.db.document.ODatabaseDocumentTx; 24 | import com.orientechnologies.orient.core.index.OIndex; 25 | import com.orientechnologies.orient.core.record.impl.ODocument; 26 | 27 | public class ODefaultImporterListener implements OImporterListener { 28 | 29 | @Override 30 | public void onBeforeFile(final ODatabaseDocumentTx db, final OCommandContext iContext) { 31 | } 32 | 33 | @Override 34 | public void onAfterFile(final ODatabaseDocumentTx db, final OCommandContext iContext) { 35 | } 36 | 37 | @Override 38 | public boolean onBeforeLine(final ODatabaseDocumentTx db, final OCommandContext iContext) { 39 | return true; 40 | } 41 | 42 | @Override 43 | public void onAfterLine(final ODatabaseDocumentTx db, final OCommandContext iContext) { 44 | } 45 | 46 | @Override 47 | public void onDump(final ODatabaseDocumentTx db, final OCommandContext iContext) { 48 | } 49 | 50 | @Override 51 | public void onJoinNotFound(final ODatabaseDocumentTx db, final OCommandContext iContext, final OIndex iIndex, final Object iKey) { 52 | iContext.setVariable("joinNotFound", ((Integer) iContext.getVariable("joinNotFound", 0)) + 1); 53 | OLogManager.instance().warn(this, " + %d line: join record not found in index '%s' for key='%s'", 54 | iContext.getVariable("currentLine"), iIndex, iKey); 55 | } 56 | 57 | @Override 58 | public void validate(ODatabaseDocumentTx db, OCommandContext iContext, ODocument iRecord) { 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/test/java/com/orientechnologies/orient/etl/ETLBaseTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl; 20 | 21 | import com.orientechnologies.orient.core.command.OBasicCommandContext; 22 | import com.orientechnologies.orient.core.command.OCommandContext; 23 | import com.orientechnologies.orient.core.record.impl.ODocument; 24 | import com.tinkerpop.blueprints.impls.orient.OrientGraph; 25 | import org.junit.After; 26 | import org.junit.Before; 27 | 28 | import java.util.List; 29 | 30 | /** 31 | * Tests ETL JSON Extractor. 32 | * 33 | * @author Luca Garulli 34 | */ 35 | public abstract class ETLBaseTest { 36 | protected String[] names = new String[] { "Jay", "Luca", "Bill", "Steve", "Jill", "Luigi", "Enrico", "Emanuele" }; 37 | protected String[] surnames = new String[] { "Miner", "Ferguson", "Cancelli", "Lavori", "Raggio", "Eagles", "Smiles", "Ironcutter" }; 38 | 39 | protected OrientGraph graph; 40 | protected OETLProcessor proc; 41 | 42 | @Before 43 | public void setUp() { 44 | graph = new OrientGraph("memory:ETLBaseTest"); 45 | graph.setUseLightweightEdges(false); 46 | proc = new OETLProcessor(); 47 | proc.getFactory().registerLoader(TestLoader.class); 48 | } 49 | 50 | @After 51 | public void tearDown() { 52 | graph.drop(); 53 | } 54 | 55 | protected List getResult() { 56 | return ((TestLoader) proc.getLoader()).loadedRecords; 57 | } 58 | 59 | protected void process(final String cfgJson) { 60 | ODocument cfg = new ODocument().fromJSON(cfgJson, "noMap"); 61 | proc.parse(cfg, null); 62 | proc.execute(); 63 | } 64 | 65 | protected void process(final String cfgJson, final OCommandContext iContext) { 66 | ODocument cfg = new ODocument().fromJSON(cfgJson, "noMap"); 67 | proc.parse(cfg, iContext); 68 | proc.execute(); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/block/OLetBlock.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.block; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.record.impl.ODocument; 23 | import com.orientechnologies.orient.core.sql.filter.OSQLFilter; 24 | import com.orientechnologies.orient.etl.OETLProcessor; 25 | 26 | public class OLetBlock extends OAbstractBlock { 27 | protected String name; 28 | protected OSQLFilter expression; 29 | protected Object value; 30 | 31 | @Override 32 | public ODocument getConfiguration() { 33 | return new ODocument().fromJSON("{parameters:[{name:{optional:false,description:'Variable name'}}," 34 | + "{value:{optional:true,description:'Variable value'}}" 35 | + "{expression:{optional:true,description:'Expression to evaluate'}}" + "]}"); 36 | } 37 | 38 | @Override 39 | public void configure(OETLProcessor iProcessor, final ODocument iConfiguration, final OCommandContext iContext) { 40 | super.configure(iProcessor, iConfiguration, iContext); 41 | 42 | name = iConfiguration.field("name"); 43 | if (iConfiguration.containsField("value")) { 44 | value = iConfiguration.field("value"); 45 | } else 46 | expression = new OSQLFilter((String) iConfiguration.field("expression"), iContext, null); 47 | 48 | if (value == null && expression == null) 49 | throw new IllegalArgumentException("'value' or 'expression' parameter are mandatory in Let Transformer"); 50 | } 51 | 52 | @Override 53 | public String getName() { 54 | return "let"; 55 | } 56 | 57 | @Override 58 | public Object executeBlock() { 59 | final Object v = expression != null ? expression.evaluate(null, null, context) : resolve(value); 60 | context.setVariable(name, v); 61 | return v; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/transformer/OFlowTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.exception.OConfigurationException; 23 | import com.orientechnologies.orient.core.record.impl.ODocument; 24 | import com.orientechnologies.orient.etl.OETLProcessHaltedException; 25 | import com.orientechnologies.orient.etl.OETLProcessor; 26 | 27 | public class OFlowTransformer extends OAbstractTransformer { 28 | private String operation; 29 | 30 | @Override 31 | public ODocument getConfiguration() { 32 | return new ODocument().fromJSON("{parameters:[" + getCommonConfigurationParameters() + "," 33 | + "{operation:{optional:false,description:'Flow operation between: skip and halt'}}]," 34 | + "input:['Object'],output:'Object'}"); 35 | } 36 | 37 | @Override 38 | public void configure(OETLProcessor iProcessor, final ODocument iConfiguration, OCommandContext iContext) { 39 | super.configure(iProcessor, iConfiguration, iContext); 40 | operation = iConfiguration.field("operation"); 41 | if (operation == null) 42 | throw new OConfigurationException("Flow transformer has not mandatory 'operation' field"); 43 | if (!operation.equalsIgnoreCase("halt") && !operation.equalsIgnoreCase("skip")) 44 | throw new OConfigurationException("Flow transformer has invalid 'operation' field='" + operation 45 | + "', while supported are: 'skip' and 'halt'"); 46 | } 47 | 48 | @Override 49 | public String getName() { 50 | return "flow"; 51 | } 52 | 53 | @Override 54 | public Object executeTransform(final Object input) { 55 | if (operation.equalsIgnoreCase("skip")) 56 | return null; 57 | 58 | throw new OETLProcessHaltedException("Process stopped because this condition: " + ifExpression); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/transformer/OBlockTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.exception.OConfigurationException; 23 | import com.orientechnologies.orient.core.record.impl.ODocument; 24 | import com.orientechnologies.orient.etl.OETLPipeline; 25 | import com.orientechnologies.orient.etl.OETLProcessor; 26 | import com.orientechnologies.orient.etl.block.OBlock; 27 | 28 | /** 29 | * Pass-through Transformer that execute a block. 30 | */ 31 | public class OBlockTransformer extends OAbstractTransformer { 32 | private OBlock block; 33 | 34 | @Override 35 | public ODocument getConfiguration() { 36 | return new ODocument().fromJSON("{parameters:[" + getCommonConfigurationParameters() + "," 37 | + "{block:{optional:false,description:'Block to execute'}}]}"); 38 | } 39 | 40 | @Override 41 | public void configure(OETLProcessor iProcessor, final ODocument iConfiguration, OCommandContext iContext) { 42 | super.configure(iProcessor, iConfiguration, iContext); 43 | final String[] fieldNames = iConfiguration.fieldNames(); 44 | 45 | try { 46 | block = processor.getFactory().getBlock(fieldNames[0]); 47 | block.configure(processor, (ODocument) iConfiguration.field(fieldNames[0]), context); 48 | } catch (Exception e) { 49 | throw new OConfigurationException("[Block transformer] Error on configuring inner block", e); 50 | } 51 | } 52 | 53 | @Override 54 | public String getName() { 55 | return "block"; 56 | } 57 | 58 | @Override 59 | public void setPipeline(OETLPipeline iPipeline) { 60 | super.setPipeline(iPipeline); 61 | block.setContext( context ); 62 | } 63 | 64 | @Override 65 | protected Object executeTransform(final Object input) { 66 | context.setVariable("input", input); 67 | block.execute(); 68 | return input; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/transformer/OLogTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.record.impl.ODocument; 23 | import com.orientechnologies.orient.etl.OETLProcessor; 24 | 25 | import java.io.PrintStream; 26 | 27 | /** 28 | * ETL Transformer that logs the input. 29 | */ 30 | public class OLogTransformer extends OAbstractTransformer { 31 | private final PrintStream out = System.out; 32 | private String prefix = ""; 33 | private String postfix = ""; 34 | 35 | @Override 36 | public ODocument getConfiguration() { 37 | return new ODocument().fromJSON("{parameters:[" + getCommonConfigurationParameters() + "," 38 | + "{prefix:{optional:true,description:'Custom prefix to prepend to the message'}}," 39 | + "{postfix:{optional:true,description:'Custom postfix to append to the message'}}" + "]}"); 40 | } 41 | 42 | @Override 43 | public void configure(OETLProcessor iProcessor, final ODocument iConfiguration, OCommandContext iContext) { 44 | super.configure(iProcessor, iConfiguration, iContext); 45 | if (iConfiguration.containsField("prefix")) 46 | prefix = iConfiguration.field("prefix"); 47 | if (iConfiguration.containsField("postfix")) 48 | postfix = iConfiguration.field("postfix"); 49 | } 50 | 51 | @Override 52 | public String getName() { 53 | return "log"; 54 | } 55 | 56 | @Override 57 | public Object executeTransform(final Object input) { 58 | final StringBuilder buffer = new StringBuilder(); 59 | 60 | if (prefix != null && !prefix.isEmpty()) 61 | buffer.append(resolve(prefix)); 62 | 63 | if (input != null) 64 | buffer.append(input); 65 | 66 | if (postfix != null && !postfix.isEmpty()) 67 | buffer.append(resolve(postfix)); 68 | 69 | log(OETLProcessor.LOG_LEVELS.INFO, buffer.toString()); 70 | 71 | return input; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/block/OCodeBlock.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.block; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.command.script.OCommandExecutorScript; 23 | import com.orientechnologies.orient.core.command.script.OCommandScript; 24 | import com.orientechnologies.orient.core.record.impl.ODocument; 25 | import com.orientechnologies.orient.etl.OETLProcessor; 26 | 27 | import java.util.HashMap; 28 | import java.util.Map; 29 | 30 | /** 31 | * Executes arbitrary code in any supported language by JVM. 32 | */ 33 | public class OCodeBlock extends OAbstractBlock { 34 | protected String language = "javascript"; 35 | protected String code; 36 | protected OCommandExecutorScript cmd; 37 | protected Map params = new HashMap(); 38 | 39 | @Override 40 | public ODocument getConfiguration() { 41 | return new ODocument().fromJSON("{parameters:[{language:{optional:true,description:'Code language, default is Javascript'}}," 42 | + "{code:{optional:false,description:'Code to execute'}}]," + "input:['Object'],output:'Object'}"); 43 | } 44 | 45 | @Override 46 | public void configure(OETLProcessor iProcessor, final ODocument iConfiguration, OCommandContext iContext) { 47 | super.configure(iProcessor, iConfiguration, iContext); 48 | if (iConfiguration.containsField("language")) 49 | language = iConfiguration.field("language"); 50 | 51 | if (iConfiguration.containsField("code")) 52 | code = iConfiguration.field("code"); 53 | else 54 | throw new IllegalArgumentException("'code' parameter is mandatory in Code Transformer"); 55 | 56 | cmd = new OCommandExecutorScript().parse(new OCommandScript(language, code)); 57 | } 58 | 59 | @Override 60 | public String getName() { 61 | return "code"; 62 | } 63 | 64 | @Override 65 | public Object executeBlock() { 66 | return cmd.executeInContext(context, params); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/test/java/com/orientechnologies/orient/etl/transformer/OFlowTransformerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.orient.etl.ETLBaseTest; 22 | import com.tinkerpop.blueprints.Vertex; 23 | import org.junit.Test; 24 | 25 | import java.util.Iterator; 26 | 27 | import static org.junit.Assert.assertEquals; 28 | 29 | /** 30 | * Tests ETL Flow Transformer. 31 | * 32 | * @author Luca Garulli 33 | */ 34 | public class OFlowTransformerTest extends ETLBaseTest { 35 | @Test 36 | public void testSkip() { 37 | process("{source: { content: { value: 'name,surname\nJay,Miner\nJay,Test' } }, extractor : { row: {} }," 38 | + " transformers: [{csv: {}}, {vertex: {class:'V'}}, {flow:{operation:'skip',if: 'name <> \'Jay\''}},{field:{fieldName:'name', value:'3'}}" 39 | + "], loader: { orientdb: { dbURL: 'memory:ETLBaseTest', dbType:'graph' } } }"); 40 | 41 | assertEquals(2, graph.countVertices("V")); 42 | 43 | Iterator it = graph.getVertices().iterator(); 44 | 45 | Vertex v1 = it.next(); 46 | Object value1 = v1.getProperty("name"); 47 | assertEquals("3", value1); 48 | 49 | Vertex v2 = it.next(); 50 | Object value2 = v2.getProperty("name"); 51 | assertEquals("3", value2); 52 | } 53 | 54 | @Test 55 | public void testSkipNever() { 56 | process("{source: { content: { value: 'name,surname\nJay,Miner\nTest,Test' } }, extractor : { row: {} }," 57 | + " transformers: [{csv: {}}, {vertex: {class:'V'}}, {flow:{operation:'skip',if: 'name = \'Jay\''}},{field:{fieldName:'name', value:'3'}}" 58 | + "], loader: { orientdb: { dbURL: 'memory:ETLBaseTest', dbType:'graph'} } }"); 59 | 60 | assertEquals(2, graph.countVertices("V")); 61 | 62 | Iterator it = graph.getVertices().iterator(); 63 | 64 | Vertex v1 = it.next(); 65 | Object value1 = v1.getProperty("name"); 66 | assertEquals("Jay", value1); 67 | 68 | Vertex v2 = it.next(); 69 | Object value2 = v2.getProperty("name"); 70 | assertEquals("3", value2); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/block/OConsoleBlock.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.block; 20 | 21 | import com.orientechnologies.orient.console.OConsoleDatabaseApp; 22 | import com.orientechnologies.orient.core.command.OCommandContext; 23 | import com.orientechnologies.orient.core.exception.OConfigurationException; 24 | import com.orientechnologies.orient.core.record.impl.ODocument; 25 | import com.orientechnologies.orient.etl.OETLProcessor; 26 | 27 | import java.util.List; 28 | 29 | /** 30 | * Executes the OrientDB console. Useful to execute batches. 31 | */ 32 | public class OConsoleBlock extends OAbstractBlock { 33 | protected String file; 34 | protected List commands; 35 | protected OConsoleDatabaseApp console; 36 | 37 | @Override 38 | public ODocument getConfiguration() { 39 | return new ODocument().fromJSON("{parameters:[" + getCommonConfigurationParameters() 40 | + "{file:{optional:true,description:'Input filename with commands to execute'}}" 41 | + "{commands:{optional:true,description:'Commands to execute in sequence as an array of strings'}}" + "]}"); 42 | } 43 | 44 | @Override 45 | public void configure(OETLProcessor iProcessor, final ODocument iConfiguration, OCommandContext iContext) { 46 | super.configure(iProcessor, iConfiguration, iContext); 47 | if (iConfiguration.containsField("file")) 48 | file = iConfiguration.field("file"); 49 | 50 | if (iConfiguration.containsField("commands")) 51 | commands = iConfiguration.field("commands"); 52 | 53 | if (file == null && commands == null) 54 | throw new OConfigurationException("file or commands are mandatory"); 55 | 56 | if (file != null) 57 | console = new OConsoleDatabaseApp(new String[] { file }); 58 | else 59 | console = new OConsoleDatabaseApp(commands.toArray(new String[commands.size()])); 60 | } 61 | 62 | @Override 63 | public String getName() { 64 | return "console"; 65 | } 66 | 67 | @Override 68 | public Object executeBlock() { 69 | return console.run(); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/test/java/com/orientechnologies/orient/etl/RandomExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * * 17 | * * For more information: http://www.orientechnologies.com 18 | * 19 | */ 20 | 21 | package com.orientechnologies.orient.etl; 22 | 23 | import com.orientechnologies.orient.core.command.OCommandContext; 24 | import com.orientechnologies.orient.core.record.impl.ODocument; 25 | import com.orientechnologies.orient.etl.extractor.OAbstractExtractor; 26 | 27 | import java.io.Reader; 28 | import java.util.Random; 29 | 30 | /** 31 | * ETL Mock loader to check the result in tests. 32 | * 33 | * @author Luca Garulli on 27/11/14. 34 | */ 35 | public class RandomExtractor extends OAbstractExtractor { 36 | private long current = 0; 37 | private int fields; 38 | private long items; 39 | private int delay = 0; 40 | 41 | @Override 42 | public void configure(OETLProcessor iProcessor, ODocument iConfiguration, OCommandContext iContext) { 43 | super.configure(iProcessor, iConfiguration, iContext); 44 | 45 | if (iConfiguration.containsField("items")) 46 | items = ((Number) iConfiguration.field("items")).longValue(); 47 | if (iConfiguration.containsField("fields")) 48 | fields = iConfiguration.field("fields"); 49 | if (iConfiguration.containsField("delay")) 50 | delay = iConfiguration.field("delay"); 51 | } 52 | 53 | @Override 54 | public void extract(final Reader iReader) { 55 | } 56 | 57 | @Override 58 | public String getUnit() { 59 | return "row"; 60 | } 61 | 62 | @Override 63 | public boolean hasNext() { 64 | return current < items; 65 | } 66 | 67 | @Override 68 | public OExtractedItem next() { 69 | final ODocument doc = new ODocument(); 70 | 71 | for (int i = 0; i < fields; ++i) { 72 | doc.field("field" + i, "value_" + new Random().nextInt(30)); 73 | } 74 | 75 | if (delay > 0) 76 | // SIMULATE A SLOW DOWN 77 | try { 78 | Thread.sleep(delay); 79 | } catch (InterruptedException e) { 80 | } 81 | 82 | return new OExtractedItem(current++, doc); 83 | } 84 | 85 | @Override 86 | public String getName() { 87 | return "random"; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/test/java/com/orientechnologies/orient/etl/extractor/OJsonRandomExtractorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.extractor; 20 | 21 | import org.junit.Ignore; 22 | import org.junit.Test; 23 | 24 | import com.orientechnologies.orient.core.command.OBasicCommandContext; 25 | import com.orientechnologies.orient.core.record.impl.ODocument; 26 | import com.orientechnologies.orient.etl.ETLBaseTest; 27 | import com.orientechnologies.orient.etl.RandomExtractor; 28 | 29 | import static org.junit.Assert.assertEquals; 30 | 31 | /** 32 | * Tests ETL JSON Extractor. 33 | * 34 | * @author Luca Garulli 35 | */ 36 | public class OJsonRandomExtractorTest extends ETLBaseTest { 37 | 38 | private final static int TOTAL = 1000000; 39 | 40 | @Ignore 41 | public void testNonParallel() { 42 | proc.getFactory().registerExtractor(RandomExtractor.class); 43 | 44 | process("{extractor : { random: {items: " + TOTAL + ", fields: 10} }, " 45 | + "loader: { orientdb: { dbURL: 'memory:ETLBaseTest', dbType:'graph', class: 'Person', useLightweightEdges:false, " 46 | + "classes: [{name: 'Person', extends: 'V'}] } } }"); 47 | 48 | assertEquals(TOTAL, graph.countVertices("Person")); 49 | 50 | int i = 0; 51 | for (ODocument doc : graph.getRawGraph().browseClass("Person")) { 52 | assertEquals(10, doc.fields()); 53 | i++; 54 | } 55 | } 56 | 57 | @Test 58 | public void testParallel() { 59 | proc.getFactory().registerExtractor(RandomExtractor.class); 60 | 61 | process("{extractor : { random: {items: " + TOTAL + ", fields: 10, delay: 0} }, " 62 | + "loader: { orientdb: { dbURL: 'memory:ETLBaseTest', dbType:'graph', class: 'Person', useLightweightEdges:false, " 63 | + "classes: [{name: 'Person', extends: 'V', clusters: 8 }] } } }", new OBasicCommandContext() 64 | .setVariable("parallel", Boolean.TRUE).setVariable("dumpEveryMs", 1000)); 65 | 66 | assertEquals(TOTAL, graph.countVertices("Person")); 67 | 68 | int i = 0; 69 | for (ODocument doc : graph.getRawGraph().browseClass("Person")) { 70 | assertEquals(10, doc.fields()); 71 | i++; 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/source/OContentSource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.source; 20 | 21 | import com.orientechnologies.common.collection.OMultiValue; 22 | import com.orientechnologies.orient.core.command.OCommandContext; 23 | import com.orientechnologies.orient.core.record.impl.ODocument; 24 | import com.orientechnologies.orient.etl.OETLProcessor; 25 | 26 | import java.io.BufferedReader; 27 | import java.io.Reader; 28 | import java.io.StringReader; 29 | 30 | /** 31 | * ETL Source created with a string content. 32 | */ 33 | public class OContentSource extends OAbstractSource { 34 | protected BufferedReader reader; 35 | 36 | @Override 37 | public void configure(OETLProcessor iProcessor, ODocument iConfiguration, OCommandContext iContext) { 38 | final Object value = iConfiguration.field("value"); 39 | if (value != null) { 40 | String stringContent; 41 | if (value instanceof ODocument) 42 | stringContent = ((ODocument) value).toJSON(null); 43 | else if (OMultiValue.isMultiValue(value)) { 44 | stringContent = "["; 45 | int i = 0; 46 | for (Object o : OMultiValue.getMultiValueIterable(value)) { 47 | if (o != null) { 48 | if (i > 0) 49 | stringContent += ","; 50 | 51 | if (o instanceof ODocument) 52 | stringContent += ((ODocument) o).toJSON(null); 53 | else 54 | stringContent += o.toString(); 55 | ++i; 56 | } 57 | } 58 | stringContent += "]"; 59 | } else 60 | stringContent = value.toString(); 61 | 62 | this.reader = new BufferedReader(new StringReader(stringContent)); 63 | } else 64 | throw new IllegalArgumentException(getName() + " Source has no 'value' set"); 65 | } 66 | 67 | @Override 68 | public ODocument getConfiguration() { 69 | return new ODocument().fromJSON("{}"); 70 | } 71 | 72 | @Override 73 | public String getUnit() { 74 | return "bytes"; 75 | } 76 | 77 | @Override 78 | public String getName() { 79 | return "content"; 80 | } 81 | 82 | @Override 83 | public Reader read() { 84 | return reader; 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/test/java/com/orientechnologies/orient/etl/extractor/OJsonExtractorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.extractor; 20 | 21 | import org.junit.Test; 22 | 23 | import com.orientechnologies.orient.core.record.impl.ODocument; 24 | import com.orientechnologies.orient.etl.ETLBaseTest; 25 | 26 | import static org.junit.Assert.assertEquals; 27 | 28 | /** 29 | * Tests ETL JSON Extractor. 30 | * 31 | * @author Luca Garulli 32 | */ 33 | public class OJsonExtractorTest extends ETLBaseTest { 34 | 35 | @Test 36 | public void testEmptyCollection() { 37 | process("{source: { content: { value: [] } }, extractor : { json: {} }, loader: { test: {} } }"); 38 | assertEquals(0, getResult().size()); 39 | } 40 | 41 | @Test 42 | public void testEmptyObject() { 43 | process("{source: { content: { value: {} } }, extractor : { json: {} }, loader: { test: {} } }"); 44 | assertEquals(1, getResult().size()); 45 | ODocument doc = getResult().get(0); 46 | assertEquals(0, doc.fields()); 47 | } 48 | 49 | @Test 50 | public void testOneObject() { 51 | process("{source: { content: { value: { name: 'Jay', surname: 'Miner' } } }, extractor : { json: {} }, loader: { test: {} } }"); 52 | assertEquals(1, getResult().size()); 53 | ODocument doc = getResult().get(0); 54 | assertEquals(2, doc.fields()); 55 | assertEquals("Jay", doc.field("name")); 56 | assertEquals("Miner", doc.field("surname")); 57 | } 58 | 59 | @Test 60 | public void testSmallSet() { 61 | String content = ""; 62 | for (int i = 0; i < names.length; ++i) { 63 | if (i > 0) 64 | content += ","; 65 | content += "{name:'" + names[i] + "',surname:'" + surnames[i] + "',id:" + i + "}"; 66 | } 67 | 68 | process("{source: { content: { value: [" + content + "] } }, extractor : { json: {} }, loader: { test: {} } }"); 69 | 70 | assertEquals(getResult().size(), names.length); 71 | 72 | int i = 0; 73 | for (ODocument doc : getResult()) { 74 | assertEquals(3, doc.fields()); 75 | assertEquals(names[i], doc.field("name")); 76 | assertEquals(surnames[i], doc.field("surname")); 77 | assertEquals(i, doc.field("id")); 78 | i++; 79 | } 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/test/java/com/orientechnologies/orient/etl/transformer/OFieldTransformerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.orient.core.record.impl.ODocument; 22 | import com.orientechnologies.orient.etl.ETLBaseTest; 23 | import org.junit.Test; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | /** 28 | * Tests ETL Field Transformer. 29 | * 30 | * @author Luca Garulli 31 | */ 32 | public class OFieldTransformerTest extends ETLBaseTest { 33 | 34 | @Test 35 | public void testValue() { 36 | process("{source: { content: { value: 'name,surname\nJay,Miner' } }, extractor : { row: {} }, transformers: [{ csv: {} }, {field: {fieldName:'test', value: 33}}], loader: { test: {} } }"); 37 | assertEquals(1, getResult().size()); 38 | 39 | ODocument doc = getResult().get(0); 40 | assertEquals(3, doc.fields()); 41 | assertEquals("Jay", doc.field("name")); 42 | assertEquals("Miner", doc.field("surname")); 43 | assertEquals(33, doc.field("test")); 44 | } 45 | 46 | @Test 47 | public void testExpression() { 48 | process("{source: { content: { value: 'name,surname\nJay,Miner' } }, extractor : { row: {} }, transformers: [{ csv: {} }, {field: {fieldName:'test', expression: 'surname'}}], loader: { test: {} } }"); 49 | assertEquals(1, getResult().size()); 50 | 51 | ODocument doc = getResult().get(0); 52 | assertEquals(3, doc.fields()); 53 | assertEquals("Jay", doc.field("name")); 54 | assertEquals("Miner", doc.field("surname")); 55 | assertEquals("Miner", doc.field("test")); 56 | } 57 | 58 | @Test 59 | public void testRemove() { 60 | process("{source: { content: { value: 'name,surname\nJay,Miner' } }, extractor : { row: {} }, transformers: [{ csv: {} }, {field: {fieldName:'surname', operation: 'remove'}}], loader: { test: {} } }"); 61 | assertEquals(1, getResult().size()); 62 | 63 | ODocument doc = getResult().get(0); 64 | assertEquals(1, doc.fields()); 65 | assertEquals("Jay", doc.field("name")); 66 | } 67 | 68 | @Test 69 | public void testSave() { 70 | process("{source: { content: { value: 'name,surname\nJay,Miner' } }, extractor : { row: {} }, transformers: [{ csv: {} }, {field:{fieldName:'@class', value:'Test'}}, {field:{ fieldName:'test', value: 33, save: true}}], loader: { orientdb: { dbURL: 'memory:ETLBaseTest' } } }"); 71 | assertEquals(1, graph.countVertices("Test")); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/transformer/OCommandTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.command.OCommandRequest; 23 | import com.orientechnologies.orient.core.command.script.OCommandScript; 24 | import com.orientechnologies.orient.core.record.impl.ODocument; 25 | import com.orientechnologies.orient.core.sql.OCommandSQL; 26 | import com.orientechnologies.orient.etl.OETLProcessor; 27 | import com.orientechnologies.orient.graph.gremlin.OCommandGremlin; 28 | 29 | /** 30 | * Executes a command. 31 | */ 32 | public class OCommandTransformer extends OAbstractTransformer { 33 | private String language = "sql"; 34 | private String command; 35 | 36 | @Override 37 | public ODocument getConfiguration() { 38 | return new ODocument().fromJSON("{parameters:[" + getCommonConfigurationParameters() + "," 39 | + "{language:{optional:true,description:'Command language, SQL by default'}}," 40 | + "{command:{optional:false,description:'Command to execute'}}]," + "input:['ODocument'],output:'ODocument'}"); 41 | } 42 | 43 | @Override 44 | public void configure(final OETLProcessor iProcessor, final ODocument iConfiguration, final OCommandContext iContext) { 45 | super.configure(iProcessor, iConfiguration, iContext); 46 | 47 | if (iConfiguration.containsField("language")) 48 | language = ((String) iConfiguration.field("language")).toLowerCase(); 49 | command = (String) iConfiguration.field("command"); 50 | } 51 | 52 | @Override 53 | public String getName() { 54 | return "command"; 55 | } 56 | 57 | @Override 58 | public Object executeTransform(final Object input) { 59 | String runtimeCommand = (String) resolve(command); 60 | final OCommandRequest cmd; 61 | if (language.equals("sql")) { 62 | cmd = new OCommandSQL(runtimeCommand); 63 | log(OETLProcessor.LOG_LEVELS.DEBUG, "executing command=%s...", runtimeCommand); 64 | } else if (language.equals("gremlin")) { 65 | cmd = new OCommandGremlin(runtimeCommand); 66 | } else { 67 | cmd = new OCommandScript(language, runtimeCommand); 68 | } 69 | cmd.setContext(context); 70 | Object result = pipeline.getDocumentDatabase().command(cmd).execute(); 71 | log(OETLProcessor.LOG_LEVELS.DEBUG, "executed command=%s, result=%s", cmd, result); 72 | return result; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Project MOVED 2 | 3 | OrientDB ETL has been moved to OrientDB core modules: https://github.com/orientechnologies/orientdb/tree/develop/etl 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | # ETL 13 | 14 | The OrientDB-ETL module is an amazing tool to move data from and to OrientDB by executing an [ETL process](http://en.wikipedia.org/wiki/Extract,_transform,_load). It's super easy to use. OrientDB ETL is based on the following principles: 15 | - one [configuration file](http://www.orientechnologies.com/docs/last/Configuration-File.html) in [JSON](http://en.wikipedia.org/wiki/JSON) format 16 | - one [Extractor](http://www.orientechnologies.com/docs/last/Extractor.html) is allowed to extract data from a source 17 | - one [Loader](http://www.orientechnologies.com/docs/last/Loader.html) is allowed to load data to a destination 18 | - multiple [Transformers](http://www.orientechnologies.com/docs/last/Transformer.html) that transform data in pipeline. They receive something in input, do something, return something as output that will be processed as input by the next component 19 | 20 | ## How ETL works 21 | ``` 22 | EXTRACTOR => TRANSFORMERS[] => LOADER 23 | ``` 24 | Example of a process that extract from a CSV file, apply some change, lookup if the record has already been created and then store the record as document against OrientDB database: 25 | 26 | ``` 27 | +-----------+-----------------------+-----------+ 28 | | | PIPELINE | 29 | + EXTRACTOR +-----------------------+-----------+ 30 | | | TRANSFORMERS | LOADER | 31 | +-----------+-----------------------+-----------+ 32 | | FILE ==> CSV->FIELD->MERGE ==> OrientDB | 33 | +-----------+-----------------------+-----------+ 34 | ``` 35 | 36 | The pipeline, made of transformation and loading phases, can run in parallel by setting the configuration ```{"parallel":true}```. 37 | 38 | ## Installation 39 | Starting from OrientDB v2.0 the ETL module will be distributed in bundle with the official release. If you want to use it, then follow these steps: 40 | - Clone the repository on your computer, by executing: 41 | - ```git clone https://github.com/orientechnologies/orientdb-etl.git``` 42 | - Compile the module, by executing: 43 | - ```mvn clean install``` 44 | - Copy ```script/oetl.sh``` (or .bat under Windows) to $ORIENTDB_HOME/bin 45 | - Copy ```target/orientdb-etl-2.0-SNAPSHOT.jar``` to $ORIENTDB_HOME/lib 46 | 47 | ## Usage 48 | 49 | ``` 50 | $ cd $ORIENTDB_HOME/bin 51 | $ ./oetl.sh config-dbpedia.json 52 | ``` 53 | 54 | ## Available Components 55 | - [Blocks](http://www.orientechnologies.com/docs/last/Block.html) 56 | - [Sources](http://www.orientechnologies.com/docs/last/Source.html) 57 | - [Extractors](http://www.orientechnologies.com/docs/last/Extractor.html) 58 | - [Transformers](http://www.orientechnologies.com/docs/last/Transformer.html) 59 | - [Loaders](http://www.orientechnologies.com/docs/last/Loader.html) 60 | 61 | Examples: 62 | - [Import DBPedia](http://www.orientechnologies.com/docs/last/Import-from-DBPedia.html) 63 | - [Import from a DBMS](http://www.orientechnologies.com/docs/last/Import-from-DBMS.html) 64 | 65 | 66 | Look to the [Documentation](http://www.orientechnologies.com/docs/last/Introduction.html) for more information. 67 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/transformer/OCodeTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.command.script.OCommandExecutorScript; 23 | import com.orientechnologies.orient.core.command.script.OCommandScript; 24 | import com.orientechnologies.orient.core.db.record.OIdentifiable; 25 | import com.orientechnologies.orient.core.record.impl.ODocument; 26 | import com.orientechnologies.orient.etl.OETLProcessor; 27 | 28 | import java.util.HashMap; 29 | import java.util.Map; 30 | 31 | /** 32 | * Executes arbitrary code in any supported language by JVM. 33 | */ 34 | public class OCodeTransformer extends OAbstractTransformer { 35 | private String language = "javascript"; 36 | private OCommandExecutorScript cmd; 37 | private final Map params = new HashMap(); 38 | 39 | @Override 40 | public ODocument getConfiguration() { 41 | return new ODocument().fromJSON("{parameters:[" + getCommonConfigurationParameters() + "," 42 | + "{language:{optional:true,description:'Code language, default is Javascript'}}," 43 | + "{code:{optional:false,description:'Code to execute'}}" + "]}"); 44 | } 45 | 46 | @Override 47 | public void configure(OETLProcessor iProcessor, final ODocument iConfiguration, OCommandContext iContext) { 48 | super.configure(iProcessor, iConfiguration, iContext); 49 | if (iConfiguration.containsField("language")) 50 | language = iConfiguration.field("language"); 51 | 52 | String code; 53 | if (iConfiguration.containsField("code")) 54 | code = iConfiguration.field("code"); 55 | else 56 | throw new IllegalArgumentException("'code' parameter is mandatory in Code Transformer"); 57 | 58 | cmd = new OCommandExecutorScript().parse(new OCommandScript(language, code)); 59 | } 60 | 61 | @Override 62 | public String getName() { 63 | return "code"; 64 | } 65 | 66 | @Override 67 | public Object executeTransform(final Object input) { 68 | if (input == null) 69 | return null; 70 | 71 | params.put("input", input); 72 | if (input instanceof OIdentifiable) 73 | params.put("record", ((OIdentifiable) input).getRecord()); 74 | 75 | Object result = cmd.executeInContext(context, params); 76 | 77 | log(OETLProcessor.LOG_LEVELS.DEBUG, "executed code=%s, result=%s", cmd, result); 78 | 79 | return result; 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/transformer/OVertexTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.metadata.schema.OClass; 23 | import com.orientechnologies.orient.core.record.impl.ODocument; 24 | import com.orientechnologies.orient.core.storage.ORecordDuplicatedException; 25 | import com.orientechnologies.orient.etl.OETLProcessor; 26 | import com.tinkerpop.blueprints.impls.orient.OrientVertex; 27 | import com.tinkerpop.blueprints.impls.orient.OrientVertexType; 28 | 29 | public class OVertexTransformer extends OAbstractTransformer { 30 | private String vertexClass; 31 | private boolean skipDuplicates = false; 32 | 33 | @Override 34 | public ODocument getConfiguration() { 35 | return new ODocument().fromJSON("{parameters:[" + getCommonConfigurationParameters() + "," 36 | + "{class:{optional:true,description:'Vertex class name to assign. Default is " + OrientVertexType.CLASS_NAME + "'}}" 37 | + ",skipDuplicates:{optional:true,description:'Vertices with duplicate keys are skipped', default:false}" + "]" 38 | + ",input:['OrientVertex','ODocument'],output:'OrientVertex'}"); 39 | } 40 | 41 | @Override 42 | public void configure(final OETLProcessor iProcessor, final ODocument iConfiguration, final OCommandContext iContext) { 43 | super.configure(iProcessor, iConfiguration, iContext); 44 | if (iConfiguration.containsField("class")) 45 | vertexClass = (String) resolve(iConfiguration.field("class")); 46 | if (iConfiguration.containsField("skipDuplicates")) 47 | skipDuplicates = (Boolean) resolve(iConfiguration.field("skipDuplicates")); 48 | } 49 | 50 | @Override 51 | public String getName() { 52 | return "vertex"; 53 | } 54 | 55 | @Override 56 | public Object executeTransform(final Object input) { 57 | vertexClass = (String) resolve(vertexClass); 58 | if (vertexClass != null) { 59 | final OClass cls = pipeline.getGraphDatabase().getVertexType(vertexClass); 60 | if (cls == null) 61 | pipeline.getGraphDatabase().createVertexType(vertexClass); 62 | } 63 | 64 | final OrientVertex v = pipeline.getGraphDatabase().getVertex(input); 65 | if (v == null) 66 | return null; 67 | 68 | if (vertexClass != null && !vertexClass.equals(v.getRecord().getClassName())) 69 | try { 70 | v.setProperty("@class", vertexClass); 71 | } catch (ORecordDuplicatedException e) { 72 | if (skipDuplicates) { 73 | return null; 74 | } else { 75 | throw e; 76 | } 77 | } 78 | return v; 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/transformer/OMergeTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.common.collection.OMultiValue; 22 | import com.orientechnologies.orient.core.db.record.OIdentifiable; 23 | import com.orientechnologies.orient.core.record.impl.ODocument; 24 | import com.orientechnologies.orient.etl.OETLProcessHaltedException; 25 | import com.orientechnologies.orient.etl.OETLProcessor; 26 | 27 | /** 28 | * Merges two records. Useful when a record needs to be updated rather than created. 29 | */ 30 | public class OMergeTransformer extends OAbstractLookupTransformer { 31 | @Override 32 | public ODocument getConfiguration() { 33 | return new ODocument().fromJSON("{parameters:[" + getCommonConfigurationParameters() + "," 34 | + "{joinFieldName:{optional:false,description:'field name containing the value to join'}}," 35 | + "{lookup:{optional:false,description:'. or Query to execute'}}," 36 | + "{unresolvedLinkAction:{optional:true,description:'action when a unresolved link is found',values:" 37 | + stringArray2Json(ACTION.values()) + "}}]," + "input:['ODocument'],output:'ODocument'}"); 38 | } 39 | 40 | @Override 41 | public String getName() { 42 | return "merge"; 43 | } 44 | 45 | @Override 46 | public Object executeTransform(final Object input) { 47 | Object joinValue = ((ODocument) ((OIdentifiable) input).getRecord()).field(joinFieldName); 48 | final Object result = lookup(joinValue, false); 49 | 50 | log(OETLProcessor.LOG_LEVELS.DEBUG, "joinValue=%s, lookupResult=%s", joinValue, result); 51 | 52 | if (result == null || OMultiValue.getSize(result) == 0) { 53 | // APPLY THE STRATEGY DEFINED IN unresolvedLinkAction 54 | switch (unresolvedLinkAction) { 55 | case NOTHING: 56 | break; 57 | case ERROR: 58 | processor.getStats().incrementErrors(); 59 | log(OETLProcessor.LOG_LEVELS.ERROR, "%s: ERROR Cannot resolve join for value '%s'", getName(), joinValue); 60 | break; 61 | case WARNING: 62 | processor.getStats().incrementWarnings(); 63 | log(OETLProcessor.LOG_LEVELS.INFO, "%s: WARN Cannot resolve join for value '%s'", getName(), joinValue); 64 | break; 65 | case SKIP: 66 | return null; 67 | case HALT: 68 | throw new OETLProcessHaltedException("[Merge transformer] Cannot resolve join for value '" + joinValue + "'"); 69 | } 70 | } else if (OMultiValue.getSize(result) > 1) 71 | throw new OETLProcessHaltedException("[Merge transformer] Multiple results returned from join for value '" + joinValue + "'"); 72 | else { 73 | final Object o = OMultiValue.getFirstValue(result); 74 | ((ODocument) o).merge((ODocument) ((OIdentifiable) input).getRecord(), true, false); 75 | log(OETLProcessor.LOG_LEVELS.DEBUG, "merged record %s with found record=%s", result, input); 76 | return o; 77 | } 78 | 79 | return input; 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/test/java/com/orientechnologies/orient/etl/transformer/OVertexTransformerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | package com.orientechnologies.orient.etl.transformer; 19 | 20 | import com.orientechnologies.orient.core.config.OGlobalConfiguration; 21 | import com.orientechnologies.orient.etl.ETLBaseTest; 22 | import com.orientechnologies.orient.etl.OETLProcessHaltedException; 23 | import com.tinkerpop.blueprints.Parameter; 24 | import com.tinkerpop.blueprints.Vertex; 25 | import junit.framework.Assert; 26 | import org.junit.Before; 27 | import org.junit.Test; 28 | 29 | import static org.junit.Assert.assertEquals; 30 | 31 | /** 32 | * Tests ETL Vertex Transformer. 33 | * 34 | * @author Gregor Frey 35 | */ 36 | public class OVertexTransformerTest extends ETLBaseTest { 37 | 38 | @Before 39 | public void setUp() { 40 | super.setUp(); 41 | OGlobalConfiguration.USE_WAL.setValue(true); 42 | 43 | graph.createVertexType("Person"); 44 | graph.createKeyIndex("name", Vertex.class, new Parameter("type", "UNIQUE"), new Parameter( 45 | "class", "Person")); 46 | graph.commit(); 47 | } 48 | 49 | @Test 50 | public void testCreateVertex() { 51 | process("{source: { content: { value: 'name,\nGregor' } }, extractor : { row: {} }," 52 | + " transformers: [{csv: {}}, {vertex: {class:'Person', skipDuplicates:false}}," 53 | + "], loader: { orientdb: { dbURL: 'memory:ETLBaseTest', dbType:'graph', useLightweightEdges:false } } }"); 54 | assertEquals(1, graph.countVertices("Person")); 55 | } 56 | 57 | @Test 58 | public void testCreateTargetVertexIfNotExists() { 59 | process("{source: { content: { value: 'name,idf,parent\nParent,1,\nChild,2,1' } }, extractor : { row: {} }," 60 | + " transformers: [{csv: {}}, {merge: { joinFieldName:'idf', lookup:'V.idf'}}, {vertex: {class:'V'}}," 61 | + "{edge:{ class: 'E', joinFieldName: 'parent', lookup: 'V.idf', unresolvedLinkAction: 'CREATE' }, if: '$input.parent IS NOT NULL'}" 62 | + "], loader: { orientdb: { dbURL: 'memory:ETLBaseTest', dbType:'graph', useLightweightEdges:false } } }"); 63 | 64 | assertEquals(2, graph.countVertices("V")); 65 | } 66 | 67 | @Test 68 | public void testErrorOnDuplicateVertex() { 69 | try { 70 | process("{source: { content: { value: 'name,\nGregor\nGregor\nHans' } }, extractor : { row: {} }," 71 | + " transformers: [{csv: {}}, {vertex: {class:'Person', skipDuplicates:false}}," 72 | + "], loader: { orientdb: { dbURL: 'memory:ETLBaseTest', dbType:'graph', useLightweightEdges:false } } }"); 73 | Assert.fail(); 74 | 75 | } catch (OETLProcessHaltedException e) { 76 | Assert.assertTrue(true); 77 | } 78 | 79 | } 80 | 81 | @Test 82 | public void testSkipDuplicateVertex() { 83 | process("{source: { content: { value: 'name,\nGregor\nGregor\nHans' } }, extractor : { row: {} }," 84 | + " transformers: [{csv: {}}, {vertex: {class:'Person', skipDuplicates:true}}," 85 | + "], loader: { orientdb: { dbURL: 'memory:ETLBaseTest', dbType:'graph', useLightweightEdges:false } } }"); 86 | assertEquals(2, graph.countVertices("Person")); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/listener/OScriptImporterListener.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.listener; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.command.script.OCommandScript; 23 | import com.orientechnologies.orient.core.db.document.ODatabaseDocumentTx; 24 | import com.orientechnologies.orient.core.index.OIndex; 25 | import com.orientechnologies.orient.core.record.impl.ODocument; 26 | 27 | import java.util.HashMap; 28 | import java.util.Map; 29 | 30 | public class OScriptImporterListener implements OImporterListener { 31 | private final Map events; 32 | private Map scripts = new HashMap(); 33 | 34 | public OScriptImporterListener() { 35 | events = new HashMap(); 36 | } 37 | 38 | public OScriptImporterListener(final Map iEvents) { 39 | events = iEvents; 40 | } 41 | 42 | @Override 43 | public void onBeforeFile(final ODatabaseDocumentTx db, final OCommandContext iContext) { 44 | executeEvent(db, "onBeforeFile", iContext); 45 | } 46 | 47 | @Override 48 | public void onAfterFile(final ODatabaseDocumentTx db, final OCommandContext iContext) { 49 | executeEvent(db, "onAfterFile", iContext); 50 | } 51 | 52 | @Override 53 | public boolean onBeforeLine(final ODatabaseDocumentTx db, final OCommandContext iContext) { 54 | final Object ret = executeEvent(db, "onBeforeLine", iContext); 55 | if (ret != null && ret instanceof Boolean) 56 | return (Boolean) ret; 57 | return true; 58 | } 59 | 60 | @Override 61 | public void onAfterLine(final ODatabaseDocumentTx db, final OCommandContext iContext) { 62 | executeEvent(db, "onAfterLine", iContext); 63 | } 64 | 65 | @Override 66 | public void onDump(final ODatabaseDocumentTx db, final OCommandContext iContext) { 67 | executeEvent(db, "onDump", iContext); 68 | } 69 | 70 | @Override 71 | public void onJoinNotFound(ODatabaseDocumentTx db, OCommandContext iContext, OIndex iIndex, Object iKey) { 72 | executeEvent(db, "onJoinNotFound", iContext); 73 | } 74 | 75 | @Override 76 | public void validate(ODatabaseDocumentTx db, OCommandContext iContext, ODocument iRecord) { 77 | } 78 | 79 | private Object executeEvent(final ODatabaseDocumentTx db, final String iEventName, final OCommandContext iContext) { 80 | if (events == null) 81 | return null; 82 | 83 | OCommandScript script = scripts.get(iEventName); 84 | 85 | if (script == null) { 86 | final String code = events.get(iEventName); 87 | if (code != null) { 88 | // CACHE IT 89 | script = new OCommandScript(code).setLanguage("Javascript"); 90 | scripts.put(iEventName, script); 91 | } 92 | } 93 | 94 | if (script != null) { 95 | final Map pars = new HashMap(); 96 | pars.put("task", iContext); 97 | pars.put("importer", this); 98 | 99 | return db.command(script).execute(pars); 100 | } 101 | return null; 102 | } 103 | 104 | } 105 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/source/OHttpSource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.source; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.exception.OConfigurationException; 23 | import com.orientechnologies.orient.core.record.impl.ODocument; 24 | import com.orientechnologies.orient.etl.OETLProcessor; 25 | 26 | import java.io.BufferedReader; 27 | import java.io.IOException; 28 | import java.io.InputStreamReader; 29 | import java.io.Reader; 30 | import java.net.HttpURLConnection; 31 | import java.net.URL; 32 | 33 | /** 34 | * Extracts data from HTTP endpoint. 35 | */ 36 | public class OHttpSource extends OAbstractSource { 37 | protected BufferedReader reader; 38 | protected String url; 39 | protected String method = "GET"; 40 | protected HttpURLConnection conn; 41 | protected ODocument headers; 42 | 43 | @Override 44 | public ODocument getConfiguration() { 45 | return new ODocument().fromJSON("{parameters:[{url:{optional:false,description:'HTTP URL to fetch'}}," 46 | + "{httpMethod:{optional:true,description:'HTTP method to use between GET (default), POST, PUT, DELETE, HEAD'}}]," 47 | + "output:'String'}"); 48 | } 49 | 50 | @Override 51 | public void configure(OETLProcessor iProcessor, ODocument iConfiguration, OCommandContext iContext) { 52 | super.configure(iProcessor, iConfiguration, iContext); 53 | url = iConfiguration.field("url"); 54 | if (url == null || url.isEmpty()) 55 | throw new OConfigurationException("HTTP Source missing URL"); 56 | if (iConfiguration.containsField("method")) 57 | method = iConfiguration.field("method"); 58 | 59 | if (iConfiguration.containsField("headers")) 60 | headers = iConfiguration.field("headers"); 61 | } 62 | 63 | @Override 64 | public String getUnit() { 65 | return "bytes"; 66 | } 67 | 68 | @Override 69 | public String getName() { 70 | return "http"; 71 | } 72 | 73 | @Override 74 | public void begin() { 75 | try { 76 | final URL obj = new URL(url); 77 | conn = (HttpURLConnection) obj.openConnection(); 78 | conn.setRequestMethod(method); 79 | 80 | if (headers != null) 81 | for (String k : headers.fieldNames()) 82 | conn.setRequestProperty(k, (String) headers.field(k)); 83 | 84 | log(OETLProcessor.LOG_LEVELS.DEBUG, "Connecting to %s (method=%s)", url, method); 85 | 86 | final int responseCode = conn.getResponseCode(); 87 | 88 | log(OETLProcessor.LOG_LEVELS.DEBUG, "Connected: response code %d", responseCode); 89 | 90 | } catch (Exception e) { 91 | throw new OSourceException("[HTTP source] error on opening connection in " + method + " to URL: " + url, e); 92 | } 93 | } 94 | 95 | @Override 96 | public void end() { 97 | if (reader != null) 98 | try { 99 | reader.close(); 100 | } catch (IOException e) { 101 | e.printStackTrace(); 102 | } 103 | 104 | if (conn != null) 105 | conn.disconnect(); 106 | } 107 | 108 | @Override 109 | public Reader read() { 110 | try { 111 | reader = new BufferedReader(new InputStreamReader(conn.getInputStream())); 112 | return reader; 113 | } catch (Exception e) { 114 | throw new OSourceException("[HTTP source] Error on reading by using " + method + " from URL: " + url, e); 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/extractor/OJsonExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.extractor; 20 | 21 | import com.orientechnologies.orient.core.record.impl.ODocument; 22 | import com.orientechnologies.orient.core.serialization.serializer.OJSONReader; 23 | import com.orientechnologies.orient.etl.OExtractedItem; 24 | 25 | import java.io.IOException; 26 | import java.io.Reader; 27 | import java.text.ParseException; 28 | import java.util.NoSuchElementException; 29 | 30 | public class OJsonExtractor extends OAbstractSourceExtractor { 31 | protected OJSONReader jsonReader; 32 | protected Character first = null; 33 | protected OExtractedItem next; 34 | 35 | @Override 36 | public String getName() { 37 | return "json"; 38 | } 39 | 40 | @Override 41 | public boolean hasNext() { 42 | if (next != null) 43 | return true; 44 | 45 | if (jsonReader == null) 46 | return false; 47 | 48 | try { 49 | next = fetchNext(); 50 | return next != null; 51 | } catch (Exception e) { 52 | throw new OExtractorException("[JSON extractor] error on extract json", e); 53 | } 54 | } 55 | 56 | @Override 57 | public OExtractedItem next() { 58 | if (next != null) { 59 | final OExtractedItem ret = next; 60 | next = null; 61 | return ret; 62 | } 63 | 64 | if (!hasNext()) 65 | throw new NoSuchElementException("EOF"); 66 | 67 | try { 68 | return fetchNext(); 69 | 70 | } catch (Exception e) { 71 | throw new OExtractorException("[JSON extractor] error on extract json", e); 72 | } 73 | } 74 | 75 | @Override 76 | public void extract(final Reader iReader) { 77 | super.extract(iReader); 78 | try { 79 | final int read = reader.read(); 80 | if (read == -1) 81 | return; 82 | 83 | first = (char) read; 84 | if (first == '[') 85 | first = null; 86 | else if (first == '{') 87 | total = 1; 88 | else 89 | throw new OExtractorException("[JSON extractor] found unexpected character '" + first + "' at the beginning of input"); 90 | 91 | jsonReader = new OJSONReader(reader); 92 | 93 | } catch (Exception e) { 94 | throw new OExtractorException(e); 95 | } 96 | } 97 | 98 | @Override 99 | public ODocument getConfiguration() { 100 | return new ODocument().fromJSON("{parameters:[],output:'ODocument'}"); 101 | } 102 | 103 | @Override 104 | public String getUnit() { 105 | return "entries"; 106 | } 107 | 108 | protected OExtractedItem fetchNext() throws IOException, ParseException { 109 | if (!jsonReader.hasNext()) 110 | return null; 111 | 112 | String value = jsonReader.readString(new char[] { '}', ']' }, true); 113 | if (first != null) { 114 | // USE THE FIRST CHAR READ 115 | value = first + value; 116 | first = null; 117 | } 118 | 119 | if (total == 1 && jsonReader.lastChar() == '}') { 120 | jsonReader = null; 121 | } else if (total != 1 && jsonReader.lastChar() == ']') { 122 | if (!value.isEmpty()) 123 | value = value.substring(0, value.length() - 1); 124 | jsonReader = null; 125 | } else { 126 | jsonReader.readNext(OJSONReader.NEXT_IN_ARRAY); 127 | if (jsonReader.lastChar() == ']') 128 | jsonReader = null; 129 | } 130 | 131 | value = value.trim(); 132 | 133 | if (value.isEmpty()) 134 | return null; 135 | 136 | return new OExtractedItem(current++, new ODocument().fromJSON(value)); 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/extractor/ORowExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.extractor; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.record.impl.ODocument; 23 | import com.orientechnologies.orient.etl.OETLProcessor; 24 | import com.orientechnologies.orient.etl.OExtractedItem; 25 | 26 | import java.io.BufferedReader; 27 | import java.io.IOException; 28 | import java.io.Reader; 29 | import java.util.NoSuchElementException; 30 | 31 | public class ORowExtractor extends OAbstractSourceExtractor { 32 | protected BufferedReader bReader; 33 | protected OExtractedItem next; 34 | protected boolean multiLine = true; 35 | protected String lineFeed = "\r\n"; 36 | 37 | @Override 38 | public String getName() { 39 | return "row"; 40 | } 41 | 42 | @Override 43 | public void configure(final OETLProcessor iProcessor, final ODocument iConfiguration, final OCommandContext iContext) { 44 | super.configure(iProcessor, iConfiguration, iContext); 45 | 46 | if (iConfiguration.containsField("multiLine")) 47 | multiLine = (Boolean) iConfiguration.field("multiLine"); 48 | 49 | if (iConfiguration.containsField("lineFeed")) 50 | lineFeed = (String) iConfiguration.field("lineFeed"); 51 | } 52 | 53 | @Override 54 | public boolean hasNext() { 55 | if (next != null) 56 | return true; 57 | 58 | if (bReader == null) 59 | return false; 60 | 61 | try { 62 | next = fetchNext(); 63 | return next != null; 64 | } catch (IOException e) { 65 | throw new OExtractorException(e); 66 | } 67 | } 68 | 69 | @Override 70 | public OExtractedItem next() { 71 | if (next != null) { 72 | final OExtractedItem ret = next; 73 | next = null; 74 | return ret; 75 | } 76 | 77 | if (!hasNext()) 78 | throw new NoSuchElementException("EOF"); 79 | 80 | try { 81 | return fetchNext(); 82 | } catch (IOException e) { 83 | throw new OExtractorException(e); 84 | } 85 | } 86 | 87 | @Override 88 | public void extract(final Reader iReader) { 89 | super.extract(iReader); 90 | bReader = new BufferedReader(reader); 91 | } 92 | 93 | @Override 94 | public void end() { 95 | if (bReader != null) 96 | try { 97 | bReader.close(); 98 | } catch (IOException e) { 99 | } 100 | 101 | super.end(); 102 | } 103 | 104 | @Override 105 | public String getUnit() { 106 | return "rows"; 107 | } 108 | 109 | protected OExtractedItem fetchNext() throws IOException { 110 | if (!bReader.ready()) 111 | return null; 112 | 113 | final String line = readLine(); 114 | 115 | if (line == null || line.isEmpty()) 116 | return null; 117 | 118 | return new OExtractedItem(current++, line); 119 | } 120 | 121 | protected String readLine() throws IOException { 122 | if (multiLine) { 123 | // CONSIDER MULTIPLE LINES 124 | final StringBuilder sbLine = new StringBuilder(); 125 | boolean isOpenQuote = false; 126 | do { 127 | if (isOpenQuote) { 128 | sbLine.append(lineFeed); 129 | } 130 | 131 | final String l = bReader.readLine(); 132 | if (l == null) 133 | break; 134 | 135 | sbLine.append(l); 136 | 137 | // CHECK FOR OPEN QUOTE 138 | for (char c : l.toCharArray()) 139 | if ('"' == c) 140 | isOpenQuote = !isOpenQuote; 141 | 142 | } while (isOpenQuote); 143 | 144 | return sbLine.toString(); 145 | } 146 | 147 | return bReader.readLine(); 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/transformer/OAbstractLookupTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.db.record.OIdentifiable; 23 | import com.orientechnologies.orient.core.id.ORID; 24 | import com.orientechnologies.orient.core.index.OIndex; 25 | import com.orientechnologies.orient.core.metadata.schema.OType; 26 | import com.orientechnologies.orient.core.record.impl.ODocument; 27 | import com.orientechnologies.orient.core.sql.query.OSQLQuery; 28 | import com.orientechnologies.orient.core.sql.query.OSQLSynchQuery; 29 | import com.orientechnologies.orient.etl.OETLProcessor; 30 | 31 | import java.util.ArrayList; 32 | import java.util.Collection; 33 | import java.util.List; 34 | 35 | /** 36 | * Merges two records. Useful when a record needs to be updated rather than created. 37 | */ 38 | public abstract class OAbstractLookupTransformer extends OAbstractTransformer { 39 | protected String joinFieldName; 40 | protected Object joinValue; 41 | protected String lookup; 42 | protected ACTION unresolvedLinkAction = ACTION.NOTHING; 43 | private OSQLQuery sqlQuery; 44 | private OIndex index; 45 | 46 | protected enum ACTION { 47 | NOTHING, WARNING, ERROR, HALT, SKIP, CREATE 48 | } 49 | 50 | @Override 51 | public void configure(final OETLProcessor iProcessor, final ODocument iConfiguration, OCommandContext iContext) { 52 | super.configure(iProcessor, iConfiguration, iContext); 53 | 54 | joinFieldName = iConfiguration.field("joinFieldName"); 55 | 56 | if (iConfiguration.containsField("joinValue")) 57 | joinValue = iConfiguration.field("joinValue"); 58 | 59 | if (iConfiguration.containsField("lookup")) 60 | lookup = iConfiguration.field("lookup"); 61 | 62 | if (iConfiguration.containsField("unresolvedLinkAction")) 63 | unresolvedLinkAction = ACTION.valueOf(iConfiguration.field("unresolvedLinkAction").toString().toUpperCase()); 64 | } 65 | 66 | protected Object lookup(Object joinValue, final boolean iReturnRIDS) { 67 | Object result = null; 68 | 69 | if (joinValue != null) { 70 | if (sqlQuery == null && index == null) { 71 | // ONLY THE FIRST TIME 72 | if (lookup.toUpperCase().startsWith("SELECT")) 73 | sqlQuery = new OSQLSynchQuery(lookup); 74 | else { 75 | index = pipeline.getDocumentDatabase().getMetadata().getIndexManager().getIndex(lookup); 76 | if (index == null) { 77 | log(OETLProcessor.LOG_LEVELS.DEBUG, "WARNING: index %s not found. Lookups could be really slow", lookup); 78 | final String[] parts = lookup.split("\\."); 79 | sqlQuery = new OSQLSynchQuery("SELECT FROM " + parts[0] + " WHERE " + parts[1] + " = ?"); 80 | } 81 | } 82 | } 83 | 84 | if (index != null) { 85 | final OType idxFieldType = index.getDefinition().getTypes()[0]; 86 | joinValue = OType.convert(joinValue, idxFieldType.getDefaultJavaType()); 87 | result = index.get(joinValue); 88 | } else { 89 | if (sqlQuery instanceof OSQLSynchQuery) 90 | ((OSQLSynchQuery) sqlQuery).resetPagination(); 91 | 92 | result = pipeline.getDocumentDatabase().query(sqlQuery, joinValue); 93 | } 94 | 95 | if (result != null && result instanceof Collection) { 96 | final Collection coll = (Collection) result; 97 | 98 | if (!coll.isEmpty()) { 99 | if (iReturnRIDS) { 100 | // CONVERT COLLECTION OF RECORDS IN RIDS 101 | final List resultRIDs = new ArrayList(coll.size()); 102 | for (Object o : coll) { 103 | if (o instanceof OIdentifiable) 104 | resultRIDs.add(((OIdentifiable) o).getIdentity()); 105 | } 106 | result = resultRIDs; 107 | } 108 | } else 109 | result = null; 110 | } else if (result instanceof OIdentifiable) { 111 | if (iReturnRIDS) 112 | result = ((OIdentifiable) result).getIdentity(); 113 | else 114 | result = ((OIdentifiable) result).getRecord(); 115 | } 116 | } 117 | 118 | return result; 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/source/OFileSource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.source; 20 | 21 | import com.orientechnologies.common.log.OLogManager; 22 | import com.orientechnologies.orient.core.command.OCommandContext; 23 | import com.orientechnologies.orient.core.record.impl.ODocument; 24 | import com.orientechnologies.orient.etl.OETLProcessor; 25 | 26 | import java.io.*; 27 | import java.nio.channels.FileChannel; 28 | import java.nio.channels.FileLock; 29 | import java.nio.charset.Charset; 30 | import java.util.zip.GZIPInputStream; 31 | 32 | public class OFileSource extends OAbstractSource { 33 | protected String fileName; 34 | protected String path; 35 | protected boolean lockFile = false; 36 | protected long byteParsed = 0; 37 | protected long byteToParse = -1; 38 | protected long skipFirst = 0; 39 | protected long skipLast = 0; 40 | 41 | protected RandomAccessFile raf = null; 42 | protected FileChannel channel = null; 43 | protected InputStreamReader fileReader = null; 44 | protected FileInputStream fis = null; 45 | protected FileLock lock = null; 46 | private Charset encoding = Charset.forName("UTF-8"); 47 | private File input; 48 | 49 | @Override 50 | public String getUnit() { 51 | return "bytes"; 52 | } 53 | 54 | @Override 55 | public ODocument getConfiguration() { 56 | return null; 57 | } 58 | 59 | @Override 60 | public void configure(OETLProcessor iProcessor, ODocument iConfiguration, OCommandContext iContext) { 61 | super.configure(iProcessor, iConfiguration, iContext); 62 | 63 | if (iConfiguration.containsField("lock")) 64 | lockFile = iConfiguration. field("lock"); 65 | 66 | if (iConfiguration.containsField("skipFirst")) 67 | skipFirst = Long.parseLong(iConfiguration. field("skipFirst")); 68 | 69 | if (iConfiguration.containsField("skipLast")) 70 | skipLast = Long.parseLong(iConfiguration. field("skipLast")); 71 | 72 | if (iConfiguration.containsField("encoding")) 73 | encoding = Charset.forName(iConfiguration. field("encoding")); 74 | 75 | path = (String) resolve(iConfiguration.field("path")); 76 | 77 | input = new File((String) path); 78 | 79 | if (!input.exists()) 80 | throw new OSourceException("[File source] path '" + path + "' not exists"); 81 | fileName = input.getName(); 82 | 83 | } 84 | 85 | @Override 86 | public void end() { 87 | if (lock != null) 88 | try { 89 | lock.release(); 90 | } catch (IOException e) { 91 | } 92 | 93 | if (fis != null) 94 | try { 95 | fis.close(); 96 | } catch (IOException e) { 97 | } 98 | 99 | if (fileReader != null) 100 | try { 101 | fileReader.close(); 102 | } catch (IOException e) { 103 | } 104 | 105 | if (channel != null) 106 | try { 107 | channel.close(); 108 | } catch (IOException e) { 109 | } 110 | 111 | if (raf != null) 112 | try { 113 | raf.close(); 114 | } catch (IOException e) { 115 | } 116 | } 117 | 118 | @Override 119 | public String getName() { 120 | return "file"; 121 | } 122 | 123 | @Override 124 | public void begin() { 125 | 126 | try { 127 | final String fileMode = lockFile ? "rw" : "r"; 128 | raf = new RandomAccessFile(input, fileMode); 129 | channel = raf.getChannel(); 130 | fis = new FileInputStream(input); 131 | if (fileName.endsWith(".gz")) 132 | fileReader = new InputStreamReader(new GZIPInputStream(fis),encoding); 133 | else { 134 | fileReader = new InputStreamReader(new FileInputStream(input),encoding); 135 | byteToParse = input.length(); 136 | } 137 | 138 | } catch (Exception e) { 139 | end(); 140 | } 141 | 142 | byteParsed = 0; 143 | 144 | if (lockFile) 145 | try { 146 | lock = channel.lock(); 147 | } catch (IOException e) { 148 | OLogManager.instance().error(this, "Error on locking file: %s", e, fileName); 149 | } 150 | 151 | log(OETLProcessor.LOG_LEVELS.INFO, "Reading from file " + path + " with encoding " + encoding.displayName()); 152 | } 153 | 154 | public boolean isClosed() { 155 | return fileReader != null; 156 | } 157 | 158 | public Reader getFileReader() { 159 | return fileReader; 160 | } 161 | 162 | @Override 163 | public Reader read() { 164 | return fileReader; 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/transformer/OFieldTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.db.document.ODatabaseDocumentTx; 23 | import com.orientechnologies.orient.core.db.record.OIdentifiable; 24 | import com.orientechnologies.orient.core.record.ORecord; 25 | import com.orientechnologies.orient.core.record.impl.ODocument; 26 | import com.orientechnologies.orient.core.sql.filter.OSQLFilter; 27 | import com.orientechnologies.orient.etl.OETLProcessor; 28 | 29 | import java.util.List; 30 | 31 | public class OFieldTransformer extends OAbstractTransformer { 32 | private String fieldName; 33 | private List fieldNames; 34 | private String expression; 35 | private Object value; 36 | private boolean setOperation = true; 37 | private OSQLFilter sqlFilter; 38 | private boolean save = false; 39 | 40 | @Override 41 | public ODocument getConfiguration() { 42 | return new ODocument().fromJSON("{parameters:[" + getCommonConfigurationParameters() + "," 43 | + "{fieldName:{optional:true,description:'field name to apply the result'}}," 44 | + "{fieldNames:{optional:true,description:'field names to apply the result'}}," 45 | + "{expression:{optional:true,description:'expression to evaluate. Mandatory with operation=set (default)'}}" 46 | + "{value:{optional:true,description:'value to set'}}" 47 | + "{operation:{optional:false,description:'operation to execute against the field: set, remove. Default is set'}}" 48 | + "{save:{optional:true,description:'save the vertex/edge/document right after the setting of the field'}}" + "]," 49 | + "input:['ODocument'],output:'ODocument'}"); 50 | } 51 | 52 | @Override 53 | public void configure(OETLProcessor iProcessor, final ODocument iConfiguration, OCommandContext iContext) { 54 | super.configure(iProcessor, iConfiguration, iContext); 55 | fieldName = (String) resolve(iConfiguration.field("fieldName")); 56 | fieldNames = (List) resolve(iConfiguration.field("fieldNames")); 57 | 58 | if (fieldNames == null && fieldName == null) 59 | throw new IllegalArgumentException("Field transformer must specify 'fieldName' or 'fieldNames'"); 60 | 61 | expression = iConfiguration.field("expression"); 62 | value = iConfiguration.field("value"); 63 | 64 | if (expression != null && value != null) 65 | throw new IllegalArgumentException("Field transformer cannot specify both 'expression' and 'value'"); 66 | 67 | if (iConfiguration.containsField("save")) 68 | save = (Boolean) iConfiguration.field("save"); 69 | 70 | if (iConfiguration.containsField("operation")) 71 | setOperation = "set".equalsIgnoreCase((String) iConfiguration.field("operation")); 72 | } 73 | 74 | @Override 75 | public String getName() { 76 | return "field"; 77 | } 78 | 79 | @Override 80 | public Object executeTransform(final Object input) { 81 | if (input instanceof OIdentifiable) { 82 | final ORecord rec = ((OIdentifiable) input).getRecord(); 83 | 84 | if (rec instanceof ODocument) { 85 | final ODocument doc = (ODocument) rec; 86 | 87 | if (setOperation) { 88 | final Object newValue; 89 | if (expression != null) { 90 | if (sqlFilter == null) 91 | // ONLY THE FIRST TIME 92 | sqlFilter = new OSQLFilter(expression, context, null); 93 | 94 | newValue = sqlFilter.evaluate(doc, null, context); 95 | } else 96 | newValue = value; 97 | 98 | // SET THE TRANSFORMED FIELD BACK 99 | doc.field(fieldName, newValue); 100 | 101 | log(OETLProcessor.LOG_LEVELS.DEBUG, "set %s=%s in document=%s", fieldName, newValue, doc); 102 | } else { 103 | if (fieldName != null) { 104 | final Object prev = doc.removeField(fieldName); 105 | log(OETLProcessor.LOG_LEVELS.DEBUG, "removed %s (value=%s) from document=%s", fieldName, prev, doc); 106 | } else { 107 | for (String f : fieldNames) { 108 | final Object prev = doc.removeField(f); 109 | log(OETLProcessor.LOG_LEVELS.DEBUG, "removed %s (value=%s) from document=%s", f, prev, doc); 110 | } 111 | } 112 | } 113 | 114 | if (save) { 115 | log(OETLProcessor.LOG_LEVELS.DEBUG, "saving record %s", doc); 116 | final ODatabaseDocumentTx db = super.pipeline.getDocumentDatabase(); 117 | db.save(doc); 118 | } 119 | } 120 | } 121 | 122 | return input; 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/OETLPipeline.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl; 20 | 21 | import com.orientechnologies.common.concur.ONeedRetryException; 22 | import com.orientechnologies.common.log.OLogManager; 23 | import com.orientechnologies.orient.core.command.OBasicCommandContext; 24 | import com.orientechnologies.orient.core.command.OCommandContext; 25 | import com.orientechnologies.orient.core.db.document.ODatabaseDocumentTx; 26 | import com.orientechnologies.orient.etl.loader.OLoader; 27 | import com.orientechnologies.orient.etl.transformer.OTransformer; 28 | import com.tinkerpop.blueprints.impls.orient.OrientBaseGraph; 29 | 30 | import java.util.List; 31 | 32 | /** 33 | * ETL processor class. 34 | * 35 | * @author Luca Garulli (l.garulli-at-orientechnologies.com) 36 | */ 37 | public class OETLPipeline { 38 | protected final OETLProcessor processor; 39 | protected final List transformers; 40 | protected final OLoader loader; 41 | protected final OCommandContext context; 42 | protected final OETLProcessor.LOG_LEVELS logLevel; 43 | protected boolean haltOnError = true; 44 | protected final int maxRetries; 45 | protected ODatabaseDocumentTx db; 46 | protected OrientBaseGraph graph; 47 | 48 | public OETLPipeline(final OETLProcessor iProcessor, final List iTransformers, final OLoader iLoader, 49 | final OETLProcessor.LOG_LEVELS iLogLevel, final int iMaxRetries, final boolean iHaltOnError) { 50 | logLevel = iLogLevel; 51 | haltOnError = iHaltOnError; 52 | processor = iProcessor; 53 | context = new OBasicCommandContext(); 54 | 55 | transformers = iTransformers; 56 | loader = iLoader; 57 | 58 | for (OTransformer t : transformers) 59 | t.setPipeline(this); 60 | loader.setPipeline(this); 61 | 62 | maxRetries = iMaxRetries; 63 | } 64 | 65 | public void begin() { 66 | loader.begin(); 67 | for (OTransformer t : transformers) 68 | t.begin(); 69 | } 70 | 71 | public OLoader getLoader() { 72 | return loader; 73 | } 74 | 75 | public List getTransformers() { 76 | return transformers; 77 | } 78 | 79 | public ODatabaseDocumentTx getDocumentDatabase() { 80 | if (db != null) 81 | db.activateOnCurrentThread(); 82 | return db; 83 | } 84 | 85 | public OETLPipeline setDocumentDatabase(final ODatabaseDocumentTx iDb) { 86 | db = iDb; 87 | return this; 88 | } 89 | 90 | public OrientBaseGraph getGraphDatabase() { 91 | if (graph != null) 92 | graph.makeActive(); 93 | return graph; 94 | } 95 | 96 | public OETLPipeline setGraphDatabase(final OrientBaseGraph iGraph) { 97 | graph = iGraph; 98 | return this; 99 | } 100 | 101 | public OCommandContext getContext() { 102 | return context; 103 | } 104 | 105 | protected Object execute(final OExtractedItem source) { 106 | int retry = 0; 107 | do { 108 | try { 109 | Object current = source.payload; 110 | 111 | context.setVariable("extractedNum", source.num); 112 | context.setVariable("extractedPayload", source.payload); 113 | 114 | for (OTransformer t : transformers) { 115 | current = t.transform(current); 116 | if (current == null) { 117 | if (logLevel == OETLProcessor.LOG_LEVELS.DEBUG) 118 | OLogManager.instance().warn(this, "Transformer [%s] returned null, skip rest of pipeline execution", t); 119 | break; 120 | } 121 | } 122 | 123 | if (current != null) 124 | // LOAD 125 | loader.load(current, context); 126 | 127 | return current; 128 | } catch (ONeedRetryException e) { 129 | loader.rollback(); 130 | retry++; 131 | processor.out(OETLProcessor.LOG_LEVELS.INFO, "Error in pipeline execution, retry = %d/%d (exception=%s)", retry, 132 | maxRetries, e); 133 | } catch (OETLProcessHaltedException e) { 134 | processor.out(OETLProcessor.LOG_LEVELS.ERROR, "Pipeline execution halted"); 135 | processor.getStats().incrementErrors(); 136 | 137 | loader.rollback(); 138 | throw e; 139 | 140 | } catch (Exception e) { 141 | processor.out(OETLProcessor.LOG_LEVELS.ERROR, "Error in Pipeline execution: %s", e); 142 | processor.getStats().incrementErrors(); 143 | 144 | if (!haltOnError) 145 | return null; 146 | 147 | e.printStackTrace(); 148 | loader.rollback(); 149 | throw new OETLProcessHaltedException(e); 150 | 151 | } 152 | } while (retry < maxRetries); 153 | 154 | return this; 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/OAbstractETLComponent.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl; 20 | 21 | import com.orientechnologies.common.parser.OSystemVariableResolver; 22 | import com.orientechnologies.common.parser.OVariableParser; 23 | import com.orientechnologies.common.parser.OVariableParserListener; 24 | import com.orientechnologies.orient.core.command.OCommandContext; 25 | import com.orientechnologies.orient.core.db.record.OIdentifiable; 26 | import com.orientechnologies.orient.core.exception.OConfigurationException; 27 | import com.orientechnologies.orient.core.record.impl.ODocument; 28 | import com.orientechnologies.orient.core.sql.filter.OSQLFilter; 29 | import com.orientechnologies.orient.core.sql.filter.OSQLPredicate; 30 | 31 | /** 32 | * ETL abstract component. 33 | */ 34 | public abstract class OAbstractETLComponent implements OETLComponent { 35 | protected OETLProcessor processor; 36 | protected OCommandContext context; 37 | protected OETLProcessor.LOG_LEVELS logLevel; 38 | protected String output = null; 39 | protected String ifExpression; 40 | 41 | @Override 42 | public void configure(final OETLProcessor iProcessor, final ODocument iConfiguration, final OCommandContext iContext) { 43 | processor = iProcessor; 44 | context = iContext; 45 | 46 | ifExpression = iConfiguration.field("if"); 47 | 48 | if (iConfiguration.containsField("log")) 49 | logLevel = OETLProcessor.LOG_LEVELS.valueOf(iConfiguration.field("log").toString().toUpperCase()); 50 | else 51 | logLevel = iProcessor.getLogLevel(); 52 | 53 | if (iConfiguration.containsField("output")) 54 | output = (String) iConfiguration.field("output"); 55 | } 56 | 57 | @Override 58 | public void begin() { 59 | } 60 | 61 | @Override 62 | public void end() { 63 | } 64 | 65 | @Override 66 | public ODocument getConfiguration() { 67 | return new ODocument().fromJSON("{parameters:[" + getCommonConfigurationParameters() + "]"); 68 | } 69 | 70 | @Override 71 | public String toString() { 72 | return getName(); 73 | } 74 | 75 | protected boolean skip(final Object input) { 76 | final OSQLFilter ifFilter = getIfFilter(); 77 | if (ifFilter != null) { 78 | final ODocument doc = input instanceof OIdentifiable ? (ODocument) ((OIdentifiable) input).getRecord() : null; 79 | 80 | log(OETLProcessor.LOG_LEVELS.DEBUG, "Evaluating conditional expression if=%s...", ifFilter); 81 | 82 | final Object result = ifFilter.evaluate(doc, null, context); 83 | if (!(result instanceof Boolean)) 84 | throw new OConfigurationException("'if' expression in Transformer " + getName() + " returned '" + result 85 | + "' instead of boolean"); 86 | 87 | return !(Boolean) result; 88 | } 89 | return false; 90 | } 91 | 92 | protected OSQLFilter getIfFilter() { 93 | if (ifExpression != null) 94 | return new OSQLFilter(ifExpression, context, null); 95 | return null; 96 | } 97 | 98 | protected String getCommonConfigurationParameters() { 99 | return "{log:{optional:true,description:'Can be any of [NONE, ERROR, INFO, DEBUG]. Default is INFO'}}," 100 | + "{if:{optional:true,description:'Conditional expression. If true, the block is executed, otherwise is skipped'}}," 101 | + "{output:{optional:true,description:'Variable name to store the transformer output. If null, the output will be passed to the pipeline as input for the next component.'}}"; 102 | 103 | } 104 | 105 | protected String stringArray2Json(final Object[] iObject) { 106 | final StringBuilder buffer = new StringBuilder(256); 107 | buffer.append('['); 108 | for (int i = 0; i < iObject.length; ++i) { 109 | if (i > 0) 110 | buffer.append(','); 111 | 112 | final Object value = iObject[i]; 113 | if (value != null) { 114 | buffer.append("'"); 115 | buffer.append(value.toString()); 116 | buffer.append("'"); 117 | } 118 | } 119 | buffer.append(']'); 120 | return buffer.toString(); 121 | } 122 | 123 | protected Object resolve(final Object iContent) { 124 | if (context == null || iContent == null) 125 | return iContent; 126 | 127 | Object value; 128 | if (iContent instanceof String) { 129 | if (((String) iContent).startsWith("$") && !((String) iContent).startsWith(OSystemVariableResolver.VAR_BEGIN)) 130 | value = context.getVariable(iContent.toString()); 131 | else 132 | value = OVariableParser.resolveVariables((String) iContent, OSystemVariableResolver.VAR_BEGIN, 133 | OSystemVariableResolver.VAR_END, new OVariableParserListener() { 134 | @Override 135 | public Object resolve(final String iVariable) { 136 | return context.getVariable(iVariable); 137 | } 138 | }); 139 | } else 140 | value = iContent; 141 | 142 | if (value instanceof String) 143 | value = OVariableParser.resolveVariables((String) value, "={", "}", new OVariableParserListener() { 144 | 145 | @Override 146 | public Object resolve(final String iVariable) { 147 | return new OSQLPredicate(iVariable).evaluate(context); 148 | } 149 | 150 | }); 151 | return value; 152 | } 153 | 154 | protected void log(final OETLProcessor.LOG_LEVELS iLevel, String iText, final Object... iArgs) { 155 | if (logLevel.ordinal() >= iLevel.ordinal()) { 156 | final Long extractedNum = (Long) context.getVariable("extractedNum"); 157 | if (extractedNum != null) 158 | System.out.println("[" + extractedNum + ":" + getName() + "] " + iLevel + " " + String.format(iText, iArgs)); 159 | else 160 | System.out.println("[" + getName() + "] " + iLevel + " " + String.format(iText, iArgs)); 161 | } 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /src/test/java/com/orientechnologies/orient/etl/transformer/OEdgeTransformerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.orient.core.config.OGlobalConfiguration; 22 | import com.orientechnologies.orient.core.metadata.schema.OClass; 23 | import com.orientechnologies.orient.core.metadata.schema.OType; 24 | import com.orientechnologies.orient.etl.ETLBaseTest; 25 | import com.orientechnologies.orient.etl.OETLProcessHaltedException; 26 | import com.tinkerpop.blueprints.Direction; 27 | import com.tinkerpop.blueprints.Edge; 28 | import com.tinkerpop.blueprints.Vertex; 29 | import com.tinkerpop.blueprints.impls.orient.OrientEdgeType; 30 | import com.tinkerpop.blueprints.impls.orient.OrientVertexType; 31 | import org.junit.Test; 32 | 33 | import java.util.Iterator; 34 | import java.util.Set; 35 | 36 | import static org.junit.Assert.*; 37 | 38 | /** 39 | * Tests ETL Field Transformer. 40 | * 41 | * @author Luca Garulli 42 | */ 43 | public class OEdgeTransformerTest extends ETLBaseTest { 44 | 45 | @Override 46 | public void setUp() { 47 | OGlobalConfiguration.USE_WAL.setValue(true); 48 | 49 | super.setUp(); 50 | final OrientVertexType v1 = graph.createVertexType("V1"); 51 | final OrientVertexType v2 = graph.createVertexType("V2"); 52 | 53 | final OrientEdgeType edgeType = graph.createEdgeType("Friend"); 54 | edgeType.createProperty("in", OType.LINK, v2); 55 | edgeType.createProperty("out", OType.LINK, v1); 56 | 57 | // ASSURE NOT DUPLICATES 58 | edgeType.createIndex("out_in", OClass.INDEX_TYPE.UNIQUE, "in", "out"); 59 | 60 | graph.addVertex("class:V2").setProperty("name", "Luca"); 61 | graph.commit(); 62 | } 63 | 64 | @Test 65 | public void testNotLightweightEdge() { 66 | process("{source: { content: { value: 'name,surname,friend\nJay,Miner,Luca' } }, extractor : { row: {} }," 67 | + " transformers: [{csv: {}}, {vertex: {class:'V1'}}, {edge:{class:'Friend',joinFieldName:'friend',lookup:'V2.name'}}," 68 | + "], loader: { orientdb: { dbURL: 'memory:ETLBaseTest', dbType:'graph', useLightweightEdges:false } } }"); 69 | 70 | assertEquals(1, graph.countVertices("V1")); 71 | assertEquals(1, graph.countVertices("V2")); 72 | assertEquals(1, graph.countEdges("Friend")); 73 | } 74 | 75 | @Test 76 | public void testLookupMultipleValues() { 77 | graph.addVertex("class:V2").setProperty("name", "Luca"); 78 | graph.commit(); 79 | 80 | process("{source: { content: { value: 'name,surname,friend\nJay,Miner,Luca' } }, extractor : { row: {} }," 81 | + " transformers: [{csv: {}}, {vertex: {class:'V1'}}, {edge:{class:'Friend',joinFieldName:'friend',lookup:'V2.name'}}," 82 | + "], loader: { orientdb: { dbURL: 'memory:ETLBaseTest', dbType:'graph', useLightweightEdges:false } } }"); 83 | 84 | assertEquals(1, graph.countVertices("V1")); 85 | assertEquals(2, graph.countVertices("V2")); 86 | assertEquals(2, graph.countEdges("Friend")); 87 | } 88 | 89 | @Test 90 | public void testEdgeWithProperties() { 91 | process("{source: { content: { value: 'id,name,surname,friendSince,friendId,friendName,friendSurname\n0,Jay,Miner,1996,1,Luca,Garulli' } }, extractor : { row: {} }," 92 | + " transformers: [{csv: {}}, {vertex: {class:'V1'}}, " 93 | + "{edge:{unresolvedLinkAction:'CREATE',class:'Friend',joinFieldName:'friendId',lookup:'V2.fid',targetVertexFields:{name:'${input.friendName}',surname:'${input.friendSurname}'},edgeFields:{since:'${input.friendSince}'}}}," 94 | + "{field:{fieldNames:['friendSince','friendId','friendName','friendSurname'],operation:'remove'}}" 95 | + "], loader: { orientdb: { dbURL: 'memory:ETLBaseTest', dbType:'graph', useLightweightEdges:false } } }"); 96 | 97 | assertEquals(1, graph.countVertices("V1")); 98 | assertEquals(2, graph.countVertices("V2")); 99 | assertEquals(1, graph.countEdges("Friend")); 100 | 101 | final Iterator v = graph.getVerticesOfClass("V2").iterator(); 102 | assertTrue(v.hasNext()); 103 | assertNotNull(v.next()); 104 | assertTrue(v.hasNext()); 105 | final Vertex v1 = v.next(); 106 | assertNotNull(v1); 107 | 108 | final Set v1Props = v1.getPropertyKeys(); 109 | 110 | assertEquals(3, v1Props.size()); 111 | assertEquals(v1.getProperty("name"), "Luca"); 112 | assertEquals(v1.getProperty("surname"), "Garulli"); 113 | assertEquals(v1.getProperty("fid"), 1); 114 | 115 | final Iterator edge = v1.getEdges(Direction.IN).iterator(); 116 | assertTrue(edge.hasNext()); 117 | 118 | final Edge e = edge.next(); 119 | assertNotNull(e); 120 | final Set eProps = e.getPropertyKeys(); 121 | assertEquals(1, eProps.size()); 122 | assertEquals(e.getProperty("since"), 1996); 123 | 124 | final Vertex v0 = e.getVertex(Direction.OUT); 125 | assertNotNull(v0); 126 | 127 | final Set v0Props = v0.getPropertyKeys(); 128 | 129 | assertEquals(3, v0Props.size()); 130 | assertEquals(v0.getProperty("name"), "Jay"); 131 | assertEquals(v0.getProperty("surname"), "Miner"); 132 | assertEquals(v0.getProperty("id"), 0); 133 | } 134 | 135 | @Test(expected = OETLProcessHaltedException.class) 136 | public void testErrorOnDuplicateVertex() { 137 | process("{source: { content: { value: 'name,surname,friend\nJay,Miner,Luca\nJay,Miner,Luca' } }, extractor : { row: {} }," 138 | + " transformers: [{csv: {}}, {merge: {joinFieldName:'name',lookup:'V1.name'}}, {vertex: {class:'V1'}}, {edge:{class:'Friend',joinFieldName:'friend',lookup:'V2.name'}}," 139 | + "], loader: { orientdb: { dbURL: 'memory:ETLBaseTest', dbType:'graph', useLightweightEdges:false } } }"); 140 | 141 | } 142 | 143 | @Test 144 | public void testSkipDuplicateVertex() { 145 | process("{source: { content: { value: 'name,surname,friend\nJay,Miner,Luca\nJay,Miner,Luca' } }, extractor : { row: {} }," 146 | + " transformers: [{csv: {}}, {merge: {joinFieldName:'name',lookup:'V1.name'}}, {vertex: {class:'V1'}}, {edge:{class:'Friend',skipDuplicates:true, joinFieldName:'friend',lookup:'V2.name'}}," 147 | + "], loader: { orientdb: { dbURL: 'memory:ETLBaseTest', dbType:'graph', useLightweightEdges:false } } }"); 148 | 149 | assertEquals(1, graph.countVertices("V1")); 150 | assertEquals(1, graph.countVertices("V2")); 151 | assertEquals(1, graph.countEdges("Friend")); 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/transformer/OLinkTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.common.collection.OMultiValue; 22 | import com.orientechnologies.orient.core.command.OCommandContext; 23 | import com.orientechnologies.orient.core.db.record.OIdentifiable; 24 | import com.orientechnologies.orient.core.exception.OConfigurationException; 25 | import com.orientechnologies.orient.core.metadata.schema.OType; 26 | import com.orientechnologies.orient.core.record.impl.ODocument; 27 | import com.orientechnologies.orient.etl.OETLProcessHaltedException; 28 | import com.orientechnologies.orient.etl.OETLProcessor; 29 | 30 | import java.util.*; 31 | 32 | /** 33 | * Converts a JOIN in LINK 34 | */ 35 | public class OLinkTransformer extends OAbstractLookupTransformer { 36 | private String joinValue; 37 | private String linkFieldName; 38 | private OType linkFieldType; 39 | 40 | @Override 41 | public ODocument getConfiguration() { 42 | return new ODocument() 43 | .fromJSON("{parameters:[" 44 | + getCommonConfigurationParameters() 45 | + "," 46 | + "{joinFieldName:{optional:true,description:'field name containing the value to join'}}," 47 | + "{joinValue:{optional:true,description:'value to use in lookup query'}}," 48 | + "{linkFieldName:{optional:false,description:'field name containing the link to set'}}," 49 | + "{linkFieldType:{optional:true,description:'field type containing the link to set. Use LINK for single link and LINKSET or LINKLIST for many'}}," 50 | + "{lookup:{optional:false,description:'. or Query to execute'}}," 51 | + "{unresolvedLinkAction:{optional:true,description:'action when a unresolved link is found',values:" 52 | + stringArray2Json(ACTION.values()) + "}}]," + "input:['ODocument'],output:'ODocument'}"); 53 | } 54 | 55 | @Override 56 | public void configure(OETLProcessor iProcessor, final ODocument iConfiguration, OCommandContext iContext) { 57 | super.configure(iProcessor, iConfiguration, iContext); 58 | 59 | joinValue = iConfiguration.field("joinValue"); 60 | linkFieldName = iConfiguration.field("linkFieldName"); 61 | if (iConfiguration.containsField("linkFieldType")) 62 | linkFieldType = OType.valueOf((String) iConfiguration.field("linkFieldType")); 63 | } 64 | 65 | @Override 66 | public String getName() { 67 | return "link"; 68 | } 69 | 70 | @Override 71 | public Object executeTransform(final Object input) { 72 | if (!(input instanceof OIdentifiable)) { 73 | log(OETLProcessor.LOG_LEVELS.DEBUG, "skip because input value is not a record, but rather an instance of class: %s", input.getClass()); 74 | return null; 75 | } 76 | 77 | final ODocument doc = ((OIdentifiable) input).getRecord(); 78 | final Object joinRuntimeValue; 79 | if (joinFieldName != null) 80 | joinRuntimeValue = doc.field(joinFieldName); 81 | else if (joinValue != null) 82 | joinRuntimeValue = resolve(joinValue); 83 | else 84 | joinRuntimeValue = null; 85 | 86 | Object result; 87 | if (OMultiValue.isMultiValue(joinRuntimeValue)) { 88 | // RESOLVE SINGLE JOINS 89 | final Collection singleJoinsResult = new ArrayList(); 90 | for (Object o : OMultiValue.getMultiValueIterable(joinRuntimeValue)) { 91 | singleJoinsResult.add(lookup(o, true)); 92 | } 93 | result = singleJoinsResult; 94 | } else 95 | result = lookup(joinRuntimeValue, true); 96 | 97 | log(OETLProcessor.LOG_LEVELS.DEBUG, "joinRuntimeValue=%s, lookupResult=%s", joinRuntimeValue, result); 98 | 99 | if (result != null) { 100 | if (linkFieldType != null) { 101 | // CONVERT IT 102 | if (linkFieldType == OType.LINK) { 103 | if (result instanceof Collection) { 104 | if (!((Collection) result).isEmpty()) 105 | result = ((Collection) result).iterator().next(); 106 | else 107 | result = null; 108 | } 109 | } else if (linkFieldType == OType.LINKSET) { 110 | if (!(result instanceof Collection)) { 111 | final Set res = new HashSet(); 112 | res.add((OIdentifiable) result); 113 | result = res; 114 | } 115 | } else if (linkFieldType == OType.LINKLIST) { 116 | if (!(result instanceof Collection)) { 117 | final List res = new ArrayList(); 118 | res.add((OIdentifiable) result); 119 | result = res; 120 | } 121 | } 122 | } 123 | 124 | if (result == null) { 125 | // APPLY THE STRATEGY DEFINED IN unresolvedLinkAction 126 | switch (unresolvedLinkAction) { 127 | case CREATE: 128 | if (lookup != null) { 129 | final String[] lookupParts = lookup.split("\\."); 130 | final ODocument linkedDoc = new ODocument(lookupParts[0]); 131 | linkedDoc.field(lookupParts[1], joinRuntimeValue); 132 | linkedDoc.save(); 133 | 134 | log(OETLProcessor.LOG_LEVELS.DEBUG, "created new document=%s", linkedDoc.getRecord()); 135 | 136 | result = linkedDoc; 137 | } else 138 | throw new OConfigurationException("Cannot create linked document because target class is unknown. Use 'lookup' field"); 139 | break; 140 | case ERROR: 141 | processor.getStats().incrementErrors(); 142 | log(OETLProcessor.LOG_LEVELS.ERROR, "%s: ERROR Cannot resolve join for value '%s'", getName(), joinRuntimeValue); 143 | break; 144 | case WARNING: 145 | processor.getStats().incrementWarnings(); 146 | log(OETLProcessor.LOG_LEVELS.INFO, "%s: WARN Cannot resolve join for value '%s'", getName(), joinRuntimeValue); 147 | break; 148 | case SKIP: 149 | return null; 150 | case HALT: 151 | throw new OETLProcessHaltedException("[Link transformer] Cannot resolve join for value '" + joinRuntimeValue + "'"); 152 | } 153 | } 154 | } 155 | 156 | // SET THE TRANSFORMED FIELD BACK 157 | doc.field(linkFieldName, result); 158 | 159 | log(OETLProcessor.LOG_LEVELS.DEBUG, "set %s=%s in document=%s", linkFieldName, result, input); 160 | 161 | return input; 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/OETLComponentFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl; 20 | 21 | import com.orientechnologies.common.log.OLogManager; 22 | import com.orientechnologies.orient.etl.block.OBlock; 23 | import com.orientechnologies.orient.etl.block.OCodeBlock; 24 | import com.orientechnologies.orient.etl.block.OConsoleBlock; 25 | import com.orientechnologies.orient.etl.block.OLetBlock; 26 | import com.orientechnologies.orient.etl.extractor.OExtractor; 27 | import com.orientechnologies.orient.etl.extractor.OJDBCExtractor; 28 | import com.orientechnologies.orient.etl.extractor.OJsonExtractor; 29 | import com.orientechnologies.orient.etl.extractor.ORowExtractor; 30 | import com.orientechnologies.orient.etl.loader.OLoader; 31 | import com.orientechnologies.orient.etl.loader.OOrientDBLoader; 32 | import com.orientechnologies.orient.etl.loader.OOutputLoader; 33 | import com.orientechnologies.orient.etl.source.OContentSource; 34 | import com.orientechnologies.orient.etl.source.OFileSource; 35 | import com.orientechnologies.orient.etl.source.OHttpSource; 36 | import com.orientechnologies.orient.etl.source.OInputSource; 37 | import com.orientechnologies.orient.etl.source.OSource; 38 | import com.orientechnologies.orient.etl.transformer.*; 39 | 40 | import java.util.HashMap; 41 | import java.util.Map; 42 | 43 | /** 44 | * ETL component factory. Registers all the ETL components. 45 | * 46 | * @author Luca Garulli (l.garulli-at-orientechnologies.com) 47 | */ 48 | public class OETLComponentFactory { 49 | protected final Map> sources = new HashMap>(); 50 | protected final Map> blocks = new HashMap>(); 51 | protected final Map> extractors = new HashMap>(); 52 | protected final Map> transformers = new HashMap>(); 53 | protected final Map> loaders = new HashMap>(); 54 | 55 | public OETLComponentFactory() { 56 | registerSource(OFileSource.class); 57 | registerSource(OHttpSource.class); 58 | registerSource(OInputSource.class); 59 | registerSource(OContentSource.class); 60 | 61 | registerBlock(OCodeBlock.class); 62 | registerBlock(OLetBlock.class); 63 | registerBlock(OConsoleBlock.class); 64 | 65 | registerExtractor(OJDBCExtractor.class); 66 | registerExtractor(ORowExtractor.class); 67 | registerExtractor(OJsonExtractor.class); 68 | 69 | registerTransformer(OBlockTransformer.class); 70 | registerTransformer(OCodeTransformer.class); 71 | registerTransformer(OCSVTransformer.class); 72 | registerTransformer(OCommandTransformer.class); 73 | registerTransformer(OEdgeTransformer.class); 74 | registerTransformer(OFieldTransformer.class); 75 | registerTransformer(OJSONTransformer.class); 76 | registerTransformer(OLinkTransformer.class); 77 | registerTransformer(OLogTransformer.class); 78 | registerTransformer(OMergeTransformer.class); 79 | registerTransformer(OFlowTransformer.class); 80 | registerTransformer(OVertexTransformer.class); 81 | 82 | registerLoader(OOrientDBLoader.class); 83 | registerLoader(OOutputLoader.class); 84 | } 85 | 86 | public OETLComponentFactory registerSource(final Class iComponent) { 87 | try { 88 | sources.put(iComponent.newInstance().getName(), iComponent); 89 | } catch (Exception e) { 90 | OLogManager.instance().error(this, "Error on registering source: %s", iComponent.getName()); 91 | } 92 | return this; 93 | } 94 | 95 | public OETLComponentFactory registerBlock(final Class iComponent) { 96 | try { 97 | blocks.put(iComponent.newInstance().getName(), iComponent); 98 | } catch (Exception e) { 99 | OLogManager.instance().error(this, "Error on registering block: %s", iComponent.getName()); 100 | } 101 | return this; 102 | } 103 | 104 | public OETLComponentFactory registerExtractor(final Class iComponent) { 105 | try { 106 | extractors.put(iComponent.newInstance().getName(), iComponent); 107 | } catch (Exception e) { 108 | OLogManager.instance().error(this, "Error on registering extractor: %s", iComponent.getName()); 109 | } 110 | return this; 111 | } 112 | 113 | public OETLComponentFactory registerTransformer(final Class iComponent) { 114 | try { 115 | transformers.put(iComponent.newInstance().getName(), iComponent); 116 | } catch (Exception e) { 117 | OLogManager.instance().error(this, "Error on registering transformer: %s", iComponent.getName()); 118 | } 119 | return this; 120 | } 121 | 122 | public OETLComponentFactory registerLoader(final Class iComponent) { 123 | try { 124 | loaders.put(iComponent.newInstance().getName(), iComponent); 125 | } catch (Exception e) { 126 | OLogManager.instance().error(this, "Error on registering loader: %s", iComponent.getName()); 127 | } 128 | return this; 129 | } 130 | 131 | public OExtractor getExtractor(final String iName) throws IllegalAccessException, InstantiationException { 132 | final Class cls = extractors.get(iName); 133 | if (cls == null) 134 | throw new IllegalArgumentException("Extractor '" + iName + "' not found"); 135 | return cls.newInstance(); 136 | } 137 | 138 | public OTransformer getTransformer(final String iName) throws IllegalAccessException, InstantiationException { 139 | final Class cls = transformers.get(iName); 140 | if (cls == null) 141 | throw new IllegalArgumentException("Transformer '" + iName + "' not found"); 142 | return cls.newInstance(); 143 | } 144 | 145 | public OBlock getBlock(final String iName) throws IllegalAccessException, InstantiationException { 146 | final Class cls = blocks.get(iName); 147 | if (cls == null) 148 | throw new IllegalArgumentException("Block '" + iName + "' not found"); 149 | return cls.newInstance(); 150 | } 151 | 152 | public OLoader getLoader(final String iName) throws IllegalAccessException, InstantiationException { 153 | final Class cls = loaders.get(iName); 154 | if (cls == null) 155 | throw new IllegalArgumentException("Loader '" + iName + "' not found"); 156 | return cls.newInstance(); 157 | } 158 | 159 | public OSource getSource(final String iName) throws IllegalAccessException, InstantiationException { 160 | final Class cls = sources.get(iName); 161 | if (cls == null) 162 | throw new IllegalArgumentException("Source '" + iName + "' not found"); 163 | return cls.newInstance(); 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/extractor/OJDBCExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.extractor; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.exception.OConfigurationException; 23 | import com.orientechnologies.orient.core.metadata.schema.OType; 24 | import com.orientechnologies.orient.core.record.impl.ODocument; 25 | import com.orientechnologies.orient.etl.OETLProcessor; 26 | import com.orientechnologies.orient.etl.OExtractedItem; 27 | 28 | import java.io.Reader; 29 | import java.sql.Connection; 30 | import java.sql.DriverManager; 31 | import java.sql.ResultSet; 32 | import java.sql.SQLException; 33 | import java.sql.Statement; 34 | import java.sql.Types; 35 | import java.util.ArrayList; 36 | import java.util.List; 37 | import java.util.NoSuchElementException; 38 | 39 | public class OJDBCExtractor extends OAbstractExtractor { 40 | protected String url; 41 | protected String userName; 42 | protected String userPassword; 43 | protected String query; 44 | protected String queryCount; 45 | 46 | protected String driverClass; 47 | protected Connection conn; 48 | protected Statement stm; 49 | protected ResultSet rs; 50 | protected boolean didNext = false; 51 | protected boolean hasNext = false; 52 | protected int rsColumns; 53 | protected List columnNames = null; 54 | protected List columnTypes = null; 55 | protected int fetchSize = 10000; 56 | 57 | @Override 58 | public void configure(OETLProcessor iProcessor, ODocument iConfiguration, OCommandContext iContext) { 59 | super.configure(iProcessor, iConfiguration, iContext); 60 | 61 | driverClass = (String) resolve(iConfiguration.field("driver")); 62 | url = (String) resolve(iConfiguration.field("url")); 63 | userName = (String) resolve(iConfiguration.field("userName")); 64 | userPassword = (String) resolve(iConfiguration.field("userPassword")); 65 | query = (String) resolve(iConfiguration.field("query")); 66 | queryCount = (String) resolve(iConfiguration.field("queryCount")); 67 | if (iConfiguration.containsField("fetchSize")) 68 | fetchSize = (Integer) resolve(iConfiguration.field("fetchSize")); 69 | 70 | try { 71 | Class.forName(driverClass).newInstance(); 72 | } catch (Exception e) { 73 | throw new OConfigurationException("[JDBC extractor] JDBC Driver " + driverClass + " not found", e); 74 | } 75 | 76 | try { 77 | conn = DriverManager.getConnection(url, userName, userPassword); 78 | } catch (Exception e) { 79 | throw new OConfigurationException("[JDBC extractor] error on connecting to JDBC url '" + url + "' using user '" + userName 80 | + "' and the password provided", e); 81 | } 82 | } 83 | 84 | @Override 85 | public void begin() { 86 | try { 87 | stm = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); 88 | stm.setFetchSize(fetchSize); 89 | if (queryCount != null) { 90 | // GET THE TOTAL COUNTER 91 | final ResultSet countRs = stm.executeQuery(query); 92 | try { 93 | if (countRs != null && countRs.next()) 94 | total = countRs.getInt(1); 95 | } finally { 96 | if (countRs != null) 97 | try { 98 | countRs.close(); 99 | } catch (SQLException e) { 100 | } 101 | } 102 | } 103 | 104 | rs = stm.executeQuery(query); 105 | rsColumns = rs.getMetaData().getColumnCount(); 106 | columnNames = new ArrayList(rsColumns); 107 | columnTypes = new ArrayList(rsColumns); 108 | 109 | for (int i = 1; i <= rsColumns; ++i) { 110 | final String colName = rs.getMetaData().getColumnName(i); 111 | columnNames.add(colName); 112 | 113 | OType type = OType.ANY; 114 | final int sqlType = rs.getMetaData().getColumnType(i); 115 | switch (sqlType) { 116 | case Types.BOOLEAN: 117 | type = OType.BOOLEAN; 118 | break; 119 | case Types.SMALLINT: 120 | type = OType.SHORT; 121 | break; 122 | case Types.INTEGER: 123 | type = OType.INTEGER; 124 | break; 125 | case Types.FLOAT: 126 | type = OType.FLOAT; 127 | break; 128 | case Types.DOUBLE: 129 | type = OType.DOUBLE; 130 | break; 131 | case Types.BIGINT: 132 | type = OType.LONG; 133 | break; 134 | case Types.DECIMAL: 135 | type = OType.DECIMAL; 136 | break; 137 | case Types.DATE: 138 | type = OType.DATE; 139 | break; 140 | case Types.TIMESTAMP: 141 | type = OType.DATETIME; 142 | break; 143 | case Types.VARCHAR: 144 | case Types.LONGNVARCHAR: 145 | case Types.LONGVARCHAR: 146 | type = OType.STRING; 147 | break; 148 | case Types.BINARY: 149 | case Types.BLOB: 150 | type = OType.BINARY; 151 | break; 152 | case Types.CHAR: 153 | case Types.TINYINT: 154 | type = OType.BYTE; 155 | break; 156 | } 157 | columnTypes.add(type); 158 | } 159 | 160 | } catch (SQLException e) { 161 | throw new OExtractorException("[JDBC extractor] error on executing query '" + query + "'", e); 162 | } 163 | } 164 | 165 | @Override 166 | public void end() { 167 | if (rs != null) 168 | try { 169 | rs.close(); 170 | } catch (SQLException e) { 171 | } 172 | if (stm != null) 173 | try { 174 | stm.close(); 175 | } catch (SQLException e) { 176 | } 177 | if (conn != null) 178 | try { 179 | conn.close(); 180 | } catch (SQLException e) { 181 | } 182 | } 183 | 184 | @Override 185 | public void extract(final Reader iReader) { 186 | } 187 | 188 | @Override 189 | public String getUnit() { 190 | return "records"; 191 | } 192 | 193 | @Override 194 | public boolean hasNext() { 195 | try { 196 | if (!didNext) { 197 | hasNext = rs.next(); 198 | current++; 199 | didNext = true; 200 | } 201 | return hasNext; 202 | } catch (SQLException e) { 203 | throw new OExtractorException("[JDBC extractor] error on moving forward in resultset of query '" + query 204 | + "'. Previous position was " + current, e); 205 | } 206 | } 207 | 208 | @Override 209 | public OExtractedItem next() { 210 | try { 211 | if (!didNext) { 212 | if (!rs.next()) 213 | throw new NoSuchElementException("[JDBC extractor] previous position was " + current); 214 | } 215 | didNext = false; 216 | 217 | final ODocument doc = new ODocument(); 218 | for (int i = 0; i < rsColumns; i++) { 219 | // final OType fieldType = columnTypes != null ? columnTypes.get(i) : null; 220 | Object fieldValue = rs.getObject(i + 1); 221 | doc.field(columnNames.get(i), fieldValue); 222 | } 223 | 224 | return new OExtractedItem(current++, doc); 225 | 226 | } catch (SQLException e) { 227 | throw new OExtractorException("[JDBC extractor] error on moving forward in resultset of query '" + query 228 | + "'. Previous position was " + current, e); 229 | } 230 | } 231 | 232 | @Override 233 | public ODocument getConfiguration() { 234 | return new ODocument().fromJSON("{parameters:[{driver:{optional:false,description:'JDBC Driver class'}}," 235 | + "{url:{optional:false,description:'Connection URL'}}," + "{userName:{optional:false,description:'User name'}}," 236 | + "{userPassword:{optional:false,description:'User password'}}," 237 | + "{fetchSize:{optional:true,description:'JDBC cursor fetch size. Default is 10000'}}," 238 | + "{query:{optional:false,description:'Query that extract records'}}," 239 | + "{queryCount:{optional:true,description:'Query that returns the count to have a correct progress status'}}]," 240 | + "output:'ODocument'}"); 241 | } 242 | 243 | @Override 244 | public String getName() { 245 | return "jdbc"; 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 18 | 19 | 21 | 4.0.0 22 | 23 | com.orientechnologies 24 | orientdb-etl 25 | 2.1.3 26 | jar 27 | 28 | 29 | ${project.version} 30 | 2.6.0 31 | 1.6 32 | ${jdkVersion} 33 | ${jdkVersion} 34 | UTF-8 35 | 36 | 37 | orientdb-etl 38 | ETL tools for OrientDB NoSQL document graph dbms 39 | http://maven.apache.org 40 | 2014 41 | 42 | Orient Technologies 43 | http://www.orientechnologies.com 44 | 45 | 46 | 47 | Apache 2 48 | http://www.apache.org/licenses/LICENSE-2.0.txt 49 | 50 | 51 | 52 | 53 | l.garulli 54 | Luca Garulli 55 | l.garulli@orientechnologies.com 56 | Orient Technologies 57 | http://www.orientechnologies.com 58 | 59 | architect 60 | developer 61 | founder 62 | 63 | +1 64 | 65 | 66 | r.franchini 67 | Roberto Franchini 68 | r.franchini@orientechnologies.com 69 | Orient Technologies 70 | http://www.orientechnologies.com 71 | 72 | architect 73 | developer 74 | 75 | +1 76 | 77 | 78 | 79 | 80 | scm:git:https://github.com/orientechnologies/orientdb-etl.git 81 | scm:git:git@github.com:orientechnologies/orientdb-etl.git 82 | https://github.com/orientechnologies/orientdb-etl 83 | HEAD 84 | 85 | 86 | GitHub Issues 87 | https://github.com/orientechnologies/orientdb-etl/issues 88 | 89 | 90 | jenkins 91 | http://helios.orientechnologies.com/ 92 | 93 | 94 | 95 | sonatype-nexus-staging 96 | OrientDB Maven2 Repository 97 | https://oss.sonatype.org/service/local/staging/deploy/maven2 98 | 99 | 100 | sonatype-nexus-snapshots 101 | OrientDB Maven2 Snapshot Repository 102 | https://oss.sonatype.org/content/repositories/snapshots 103 | 104 | 105 | 106 | 107 | 108 | sonatype-nexus-snapshots 109 | Sonatype Nexus Snapshots 110 | https://oss.sonatype.org/content/repositories/snapshots 111 | 112 | false 113 | 114 | 115 | true 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | org.apache.maven.plugins 124 | maven-compiler-plugin 125 | 3.1 126 | 127 | true 128 | ${maven.compile.sourceLevel} 129 | ${maven.compile.targetLevel} 130 | ${project.build.sourceEncoding} 131 | 132 | 133 | 134 | 135 | org.apache.maven.plugins 136 | maven-release-plugin 137 | 2.5.1 138 | 139 | forked-path 140 | false 141 | true 142 | false 143 | true 144 | 145 | release 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | release-sign-artifacts 154 | 155 | 156 | performRelease 157 | true 158 | 159 | 160 | 161 | 162 | 163 | org.apache.maven.plugins 164 | maven-gpg-plugin 165 | 166 | 167 | sign-artifacts 168 | verify 169 | 170 | sign 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | junit 182 | junit 183 | 4.12 184 | test 185 | 186 | 187 | org.hamcrest 188 | hamcrest-all 189 | 1.3 190 | test 191 | 192 | 193 | 194 | org.assertj 195 | assertj-core 196 | 2.2.0 197 | test 198 | 199 | 200 | 201 | com.orientechnologies 202 | orientdb-core 203 | ${orientdb.version} 204 | compile 205 | 206 | 207 | com.orientechnologies 208 | orientdb-client 209 | ${orientdb.version} 210 | compile 211 | 212 | 213 | com.orientechnologies 214 | orientdb-enterprise 215 | ${orientdb.version} 216 | compile 217 | 218 | 219 | com.orientechnologies 220 | orientdb-graphdb 221 | ${orientdb.version} 222 | compile 223 | 224 | 225 | com.tinkerpop.blueprints 226 | blueprints-core 227 | ${blueprints.version} 228 | 229 | 230 | 231 | 232 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/transformer/OEdgeTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.common.collection.OMultiValue; 22 | import com.orientechnologies.orient.core.command.OCommandContext; 23 | import com.orientechnologies.orient.core.db.record.OIdentifiable; 24 | import com.orientechnologies.orient.core.exception.OConfigurationException; 25 | import com.orientechnologies.orient.core.metadata.schema.OClass; 26 | import com.orientechnologies.orient.core.record.impl.ODocument; 27 | import com.orientechnologies.orient.core.storage.ORecordDuplicatedException; 28 | import com.orientechnologies.orient.etl.OETLProcessHaltedException; 29 | import com.orientechnologies.orient.etl.OETLProcessor; 30 | import com.tinkerpop.blueprints.impls.orient.OrientEdge; 31 | import com.tinkerpop.blueprints.impls.orient.OrientEdgeType; 32 | import com.tinkerpop.blueprints.impls.orient.OrientVertex; 33 | 34 | import java.util.ArrayList; 35 | import java.util.List; 36 | 37 | public class OEdgeTransformer extends OAbstractLookupTransformer { 38 | private String edgeClass = OrientEdgeType.CLASS_NAME; 39 | private boolean directionOut = true; 40 | private ODocument targetVertexFields; 41 | private ODocument edgeFields; 42 | private boolean skipDuplicates = false; 43 | 44 | @Override 45 | public ODocument getConfiguration() { 46 | return new ODocument() 47 | .fromJSON("{parameters:[" 48 | + getCommonConfigurationParameters() 49 | + "," 50 | + "{joinValue:{optional:true,description:'value to use for join'}}," 51 | + "{joinFieldName:{optional:true,description:'field name containing the value to join'}}," 52 | + "{lookup:{optional:false,description:'. or Query to execute'}}," 53 | + "{direction:{optional:true,description:'Direction between \'in\' and \'out\'. Default is \'out\''}}," 54 | + "{class:{optional:true,description:'Edge class name. Default is \'E\''}}," 55 | + "{targetVertexFields:{optional:true,description:'Map of fields to set in target vertex. Use ${$input.} to get input field values'}}," 56 | + "{edgeFields:{optional:true,description:'Map of fields to set in edge. Use ${$input.} to get input field values'}}," 57 | + "{skipDuplicates:{optional:true,description:'Duplicated edges (with a composite index built on both out and in properties) are skipped', default:false}}," 58 | + "{unresolvedVertexAction:{optional:true,description:'action when a unresolved vertices is found',values:" 59 | + stringArray2Json(ACTION.values()) + "}}]," + "input:['ODocument','OrientVertex'],output:'OrientVertex'}"); 60 | } 61 | 62 | @Override 63 | public void configure(OETLProcessor iProcessor, final ODocument iConfiguration, final OCommandContext iContext) { 64 | super.configure(iProcessor, iConfiguration, iContext); 65 | edgeClass = iConfiguration.field("class"); 66 | if (iConfiguration.containsField("direction")) { 67 | final String direction = iConfiguration.field("direction"); 68 | if ("out".equalsIgnoreCase(direction)) 69 | directionOut = true; 70 | else if ("in".equalsIgnoreCase(direction)) 71 | directionOut = false; 72 | else 73 | throw new OConfigurationException("Direction can be 'in' or 'out', but found: " + direction); 74 | } 75 | 76 | if (iConfiguration.containsField("targetVertexFields")) 77 | targetVertexFields = (ODocument) iConfiguration.field("targetVertexFields"); 78 | if (iConfiguration.containsField("edgeFields")) 79 | edgeFields = (ODocument) iConfiguration.field("edgeFields"); 80 | if (iConfiguration.containsField("skipDuplicates")) 81 | skipDuplicates = (Boolean) resolve(iConfiguration.field("skipDuplicates")); 82 | } 83 | 84 | @Override 85 | public String getName() { 86 | return "edge"; 87 | } 88 | 89 | @Override 90 | public void begin() { 91 | final OClass cls = pipeline.getGraphDatabase().getEdgeType(edgeClass); 92 | if (cls == null) 93 | pipeline.getGraphDatabase().createEdgeType(edgeClass); 94 | super.begin(); 95 | } 96 | 97 | @Override 98 | public Object executeTransform(final Object input) { 99 | for (Object o : OMultiValue.getMultiValueIterable(input)) { 100 | // GET JOIN VALUE 101 | final OrientVertex vertex; 102 | if (o instanceof OrientVertex) 103 | vertex = (OrientVertex) o; 104 | else if (o instanceof OIdentifiable) 105 | vertex = pipeline.getGraphDatabase().getVertex(o); 106 | else 107 | throw new OTransformException(getName() + ": input type '" + o + "' is not supported"); 108 | 109 | final Object joinCurrentValue = joinValue != null ? joinValue : vertex.getProperty(joinFieldName); 110 | 111 | if (OMultiValue.isMultiValue(joinCurrentValue)) { 112 | // RESOLVE SINGLE JOINS 113 | for (Object ob : OMultiValue.getMultiValueIterable(joinCurrentValue)) { 114 | final Object r = lookup(ob, true); 115 | if (createEdge(vertex, ob, r) == null) { 116 | if (unresolvedLinkAction == ACTION.SKIP) 117 | // RETURN NULL ONLY IN CASE SKIP ACTION IS REQUESTED 118 | return null; 119 | } 120 | } 121 | } else { 122 | final Object result = lookup(joinCurrentValue, true); 123 | if (createEdge(vertex, joinCurrentValue, result) == null) { 124 | if (unresolvedLinkAction == ACTION.SKIP) 125 | // RETURN NULL ONLY IN CASE SKIP ACTION IS REQUESTED 126 | return null; 127 | } 128 | } 129 | } 130 | 131 | return input; 132 | } 133 | 134 | private List createEdge(final OrientVertex vertex, final Object joinCurrentValue, Object result) { 135 | log(OETLProcessor.LOG_LEVELS.DEBUG, "joinCurrentValue=%s, lookupResult=%s", joinCurrentValue, result); 136 | 137 | if (result == null) { 138 | // APPLY THE STRATEGY DEFINED IN unresolvedLinkAction 139 | switch (unresolvedLinkAction) { 140 | case CREATE: 141 | // Don't try to create a Vertex with a null value 142 | if (joinCurrentValue != null) { 143 | if (lookup != null) { 144 | final String[] lookupParts = lookup.split("\\."); 145 | final OrientVertex linkedV = pipeline.getGraphDatabase().addTemporaryVertex(lookupParts[0]); 146 | linkedV.setProperty(lookupParts[1], joinCurrentValue); 147 | 148 | if (targetVertexFields != null) { 149 | for (String f : targetVertexFields.fieldNames()) 150 | linkedV.setProperty(f, resolve(targetVertexFields.field(f))); 151 | } 152 | 153 | linkedV.save(); 154 | 155 | log(OETLProcessor.LOG_LEVELS.DEBUG, "created new vertex=%s", linkedV.getRecord()); 156 | 157 | result = linkedV.getIdentity(); 158 | } else { 159 | throw new OConfigurationException("Cannot create linked document because target class is unknown. Use 'lookup' field"); 160 | } 161 | } 162 | break; 163 | case ERROR: 164 | processor.getStats().incrementErrors(); 165 | log(OETLProcessor.LOG_LEVELS.ERROR, "%s: ERROR Cannot resolve join for value '%s'", getName(), joinCurrentValue); 166 | break; 167 | case WARNING: 168 | processor.getStats().incrementWarnings(); 169 | log(OETLProcessor.LOG_LEVELS.INFO, "%s: WARN Cannot resolve join for value '%s'", getName(), joinCurrentValue); 170 | break; 171 | case SKIP: 172 | return null; 173 | case HALT: 174 | throw new OETLProcessHaltedException("Cannot resolve join for value '" + joinCurrentValue + "'"); 175 | case NOTHING: 176 | default: 177 | return null; 178 | } 179 | } 180 | 181 | if (result != null) { 182 | final List edges; 183 | if (OMultiValue.isMultiValue(result)) { 184 | final int size = OMultiValue.getSize(result); 185 | if (size == 0) 186 | // NO EDGES 187 | return null; 188 | 189 | edges = new ArrayList(size); 190 | } else 191 | edges = new ArrayList(1); 192 | 193 | for (Object o : OMultiValue.getMultiValueIterable(result)) { 194 | OIdentifiable oid = (OIdentifiable) o; 195 | final OrientVertex targetVertex = pipeline.getGraphDatabase().getVertex(oid); 196 | 197 | try { 198 | // CREATE THE EDGE 199 | final OrientEdge edge; 200 | if (directionOut) 201 | edge = (OrientEdge) vertex.addEdge(edgeClass, targetVertex); 202 | else 203 | edge = (OrientEdge) targetVertex.addEdge(edgeClass, vertex); 204 | 205 | if (edgeFields != null) { 206 | for (String f : edgeFields.fieldNames()) 207 | edge.setProperty(f, resolve(edgeFields.field(f))); 208 | } 209 | 210 | edges.add(edge); 211 | log(OETLProcessor.LOG_LEVELS.DEBUG, "created new edge=%s", edge); 212 | } catch (ORecordDuplicatedException e) { 213 | if (skipDuplicates) { 214 | log(OETLProcessor.LOG_LEVELS.DEBUG, "skipped creation of new edge because already exists"); 215 | continue; 216 | } else { 217 | log(OETLProcessor.LOG_LEVELS.ERROR, "error on creation of new edge because it already exists (skipDuplicates=false)"); 218 | throw e; 219 | } 220 | } 221 | } 222 | 223 | return edges; 224 | } 225 | 226 | // NO EDGES 227 | return null; 228 | } 229 | } 230 | -------------------------------------------------------------------------------- /src/main/java/com/orientechnologies/orient/etl/transformer/OCSVTransformer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * * Copyright 2010-2014 Orient Technologies LTD (info(at)orientechnologies.com) 4 | * * 5 | * * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * * you may not use this file except in compliance with the License. 7 | * * You may obtain a copy of the License at 8 | * * 9 | * * http://www.apache.org/licenses/LICENSE-2.0 10 | * * 11 | * * Unless required by applicable law or agreed to in writing, software 12 | * * distributed under the License is distributed on an "AS IS" BASIS, 13 | * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * * See the License for the specific language governing permissions and 15 | * * limitations under the License. 16 | * 17 | */ 18 | 19 | package com.orientechnologies.orient.etl.transformer; 20 | 21 | import com.orientechnologies.orient.core.command.OCommandContext; 22 | import com.orientechnologies.orient.core.metadata.schema.OType; 23 | import com.orientechnologies.orient.core.record.impl.ODocument; 24 | import com.orientechnologies.orient.core.serialization.serializer.OStringSerializerHelper; 25 | import com.orientechnologies.orient.etl.OETLProcessor; 26 | import sun.misc.FloatConsts; 27 | 28 | import java.text.DateFormat; 29 | import java.text.ParseException; 30 | import java.text.SimpleDateFormat; 31 | import java.util.ArrayList; 32 | import java.util.List; 33 | 34 | public class OCSVTransformer extends OAbstractTransformer { 35 | private char separator = ','; 36 | private boolean columnsOnFirstLine = true; 37 | private List columnNames = null; 38 | private List columnTypes = null; 39 | private long skipFrom = -1; 40 | private long skipTo = -1; 41 | private long line = -1; 42 | private String nullValue; 43 | private Character stringCharacter = '"'; 44 | private boolean unicode = true; 45 | 46 | @Override 47 | public ODocument getConfiguration() { 48 | return new ODocument().fromJSON("{parameters:[" + getCommonConfigurationParameters() 49 | + ",{separator:{optional:true,description:'Column separator'}}," 50 | + "{columnsOnFirstLine:{optional:true,description:'Columns are described in the first line'}}," 51 | + "{columns:{optional:true,description:'Columns array containing names, and optionally type after :'}}," 52 | + "{nullValue:{optional:true,description:'Value to consider as NULL. Default is not declared'}}," 53 | + "{unicode:{optional:true,description:'Support unicode values as \\u'}}," 54 | + "{stringCharacter:{optional:true,description:'String character delimiter. Use \"\" to do not use any delimitator'}}," 55 | + "{skipFrom:{optional:true,description:'Line number where start to skip',type:'int'}}," 56 | + "{skipTo:{optional:true,description:'Line number where skip ends',type:'int'}}" 57 | + "],input:['String'],output:'ODocument'}"); 58 | } 59 | 60 | @Override 61 | public void configure(final OETLProcessor iProcessor, final ODocument iConfiguration, final OCommandContext iContext) { 62 | super.configure(iProcessor, iConfiguration, iContext); 63 | 64 | if (iConfiguration.containsField("separator")) 65 | separator = iConfiguration.field("separator").toString().charAt(0); 66 | if (iConfiguration.containsField("columnsOnFirstLine")) 67 | columnsOnFirstLine = (Boolean) iConfiguration.field("columnsOnFirstLine"); 68 | if (iConfiguration.containsField("columns")) { 69 | final List columns = iConfiguration.field("columns"); 70 | columnNames = new ArrayList(columns.size()); 71 | columnTypes = new ArrayList(columns.size()); 72 | for (String c : columns) { 73 | final String[] parts = c.split(":"); 74 | 75 | columnNames.add(parts[0]); 76 | if (parts.length > 1) 77 | columnTypes.add(OType.valueOf(parts[1].toUpperCase())); 78 | else 79 | columnTypes.add(OType.ANY); 80 | } 81 | } 82 | if (iConfiguration.containsField("skipFrom")) 83 | skipFrom = ((Number) iConfiguration.field("skipFrom")).longValue(); 84 | if (iConfiguration.containsField("skipTo")) 85 | skipTo = ((Number) iConfiguration.field("skipTo")).longValue(); 86 | if (iConfiguration.containsField("nullValue")) 87 | nullValue = iConfiguration.field("nullValue"); 88 | if (iConfiguration.containsField("unicode")) 89 | unicode = iConfiguration.field("unicode"); 90 | if (iConfiguration.containsField("stringCharacter")) { 91 | final String value = iConfiguration.field("stringCharacter").toString(); 92 | if (value.isEmpty()) 93 | stringCharacter = null; 94 | else 95 | stringCharacter = value.charAt(0); 96 | } 97 | } 98 | 99 | @Override 100 | public String getName() { 101 | return "csv"; 102 | } 103 | 104 | @Override 105 | public Object executeTransform(final Object input) { 106 | if (skipTransform()) 107 | return null; 108 | 109 | log(OETLProcessor.LOG_LEVELS.DEBUG, "parsing=%s", input); 110 | 111 | final List fields = OStringSerializerHelper.smartSplit(input.toString(), new char[] { separator }, 0, -1, false, false, 112 | false, false, unicode); 113 | 114 | if (!isColumnNamesCorrect(fields)) 115 | return null; 116 | 117 | final ODocument doc = new ODocument(); 118 | for (int i = 0; i < columnNames.size() && i < fields.size(); ++i) { 119 | final String fieldName = columnNames.get(i); 120 | Object fieldValue = null; 121 | try { 122 | final String fieldStringValue = getCellContent(fields.get(i)); 123 | final OType fieldType = columnTypes != null ? columnTypes.get(i) : null; 124 | 125 | if (fieldType != null && fieldType != OType.ANY) { 126 | // DEFINED TYPE 127 | fieldValue = processKnownType(doc, i, fieldName, fieldStringValue, fieldType); 128 | } else { 129 | // DETERMINE THE TYPE 130 | if (fieldStringValue != null) 131 | fieldValue = determineTheType(fieldStringValue); 132 | } 133 | doc.field(fieldName, fieldValue); 134 | 135 | } catch (Exception e) { 136 | processor.getStats().incrementErrors(); 137 | log(OETLProcessor.LOG_LEVELS.ERROR, "Error on setting document field %s=%s (cause=%s)", fieldName, fieldValue, e.toString()); 138 | } 139 | } 140 | 141 | log(OETLProcessor.LOG_LEVELS.DEBUG, "document=%s", doc); 142 | return doc; 143 | } 144 | 145 | public static boolean isFinite(final float value) { 146 | return Math.abs(value) <= FloatConsts.MAX_VALUE; 147 | } 148 | 149 | private Object processKnownType(ODocument doc, int i, String fieldName, String fieldStringValue, OType fieldType) { 150 | Object fieldValue; 151 | fieldValue = getCellContent(fieldStringValue); 152 | try { 153 | fieldValue = OType.convert(fieldValue, fieldType.getDefaultJavaType()); 154 | doc.field(fieldName, fieldValue); 155 | } catch (Exception e) { 156 | processor.getStats().incrementErrors(); 157 | log(OETLProcessor.LOG_LEVELS.ERROR, "Error on converting row %d field '%s' (%d), value '%s' (class:%s) to type: %s", 158 | processor.getExtractor().getProgress(), fieldName, i, fieldValue, fieldValue.getClass().getName(), fieldType); 159 | } 160 | return fieldValue; 161 | } 162 | 163 | private Object determineTheType(String fieldStringValue) { 164 | Object fieldValue; 165 | if ((fieldValue = transformToDate(fieldStringValue)) == null)// try maybe Date type 166 | if ((fieldValue = transformToNumeric(fieldStringValue)) == null)// try maybe Numeric type 167 | fieldValue = fieldStringValue; // type String 168 | return fieldValue; 169 | } 170 | 171 | private Object transformToDate(String fieldStringValue) { 172 | // DATE 173 | DateFormat df = new SimpleDateFormat("yyyy-MM-dd"); 174 | df.setLenient(true); 175 | Object fieldValue; 176 | try { 177 | fieldValue = df.parse(fieldStringValue); 178 | } catch (ParseException pe) { 179 | fieldValue = null; 180 | } 181 | return fieldValue; 182 | } 183 | 184 | private Object transformToNumeric(final String fieldStringValue) { 185 | if (fieldStringValue.isEmpty()) 186 | return fieldStringValue; 187 | 188 | final char c = fieldStringValue.charAt(0); 189 | if (c != '-' && !Character.isDigit(c)) 190 | // NOT A NUMBER FOR SURE 191 | return fieldStringValue; 192 | 193 | Object fieldValue; 194 | try { 195 | if (fieldStringValue.contains(".") || fieldStringValue.contains(",")) { 196 | String numberAsString = fieldStringValue.replaceAll(",", "."); 197 | fieldValue = new Float(numberAsString); 198 | if (!isFinite((Float) fieldValue)) { 199 | fieldValue = new Double(numberAsString); 200 | } 201 | } else 202 | try { 203 | fieldValue = new Integer(fieldStringValue); 204 | } catch (Exception e) { 205 | fieldValue = new Long(fieldStringValue); 206 | } 207 | } catch (NumberFormatException nf) { 208 | fieldValue = fieldStringValue; 209 | } 210 | return fieldValue; 211 | } 212 | 213 | private boolean isColumnNamesCorrect(List fields) { 214 | if (columnNames == null) { 215 | if (!columnsOnFirstLine) 216 | throw new OTransformException(getName() + ": columnsOnFirstLine=false and no columns declared"); 217 | columnNames = fields; 218 | 219 | // REMOVE ANY STRING CHARACTERS IF ANY 220 | for (int i = 0; i < columnNames.size(); ++i) 221 | columnNames.set(i, getCellContent(columnNames.get(i))); 222 | 223 | return false; 224 | } 225 | 226 | if (columnsOnFirstLine && line == 0) 227 | // JUST SKIP FIRST LINE 228 | return false; 229 | 230 | return true; 231 | } 232 | 233 | private boolean skipTransform() { 234 | line++; 235 | 236 | if (skipFrom > -1) { 237 | if (skipTo > -1) { 238 | if (line >= skipFrom && line <= skipTo) 239 | return true; 240 | } else if (line >= skipFrom) 241 | // SKIP IT 242 | return true; 243 | } 244 | return false; 245 | } 246 | 247 | /** 248 | * Backport copy of Float.isFinite() method that was introduced since Java 1.8 but we must support 1.6. TODO replace after 249 | * choosing Java 1.8 as minimal supported 250 | **/ 251 | protected boolean isFinite(Float f) { 252 | return Math.abs(f) <= FloatConsts.MAX_VALUE; 253 | } 254 | 255 | // TODO Test, and double doubleqoutes case 256 | public String getCellContent(String iValue) { 257 | if (iValue == null || iValue.isEmpty() || "NULL".equals(iValue)) 258 | return null; 259 | 260 | if (stringCharacter != null && iValue.length() > 1 261 | && (iValue.charAt(0) == stringCharacter && iValue.charAt(iValue.length() - 1) == stringCharacter)) 262 | return iValue.substring(1, iValue.length() - 1); 263 | 264 | return iValue; 265 | } 266 | } 267 | --------------------------------------------------------------------------------