├── VERSION ├── neptune-gremlin-js ├── .gitignore ├── .npmignore ├── cdk-test-app │ ├── .npmignore │ ├── jest.config.js │ ├── .gitignore │ ├── README.md │ ├── package.json │ ├── cdk.json │ ├── lambda │ │ └── package.json │ ├── bin │ │ └── cdk-test-app.js │ └── lib │ │ └── cdk-test-app-stack.js ├── CHANGELOG.md ├── .eslintrc.js ├── test │ └── aws-neptune-gremlin.test.js ├── package.json └── README.md ├── .gitignore ├── csv-to-neptune-bulk-format ├── .gitignore ├── .images │ └── Solution-CSVConverter.png ├── data-config-spotify-no-node.json ├── notebooks │ ├── Spotify-Data-Query.ipynb │ └── Prepare-Data-Spotify.ipynb ├── csv_converter.py └── data-config-spotify.json ├── neptune-streams-utils ├── examples │ ├── java8 │ │ ├── build.sh │ │ ├── readme.md │ │ ├── install-dependencies.sh │ │ ├── src │ │ │ └── main │ │ │ │ └── java │ │ │ │ ├── utils │ │ │ │ └── EnvironmentVariablesUtils.java │ │ │ │ └── stream_handler │ │ │ │ ├── AbstractStreamHandler.java │ │ │ │ └── StreamHandler.java │ │ └── pom.xml │ ├── python3.8 │ │ ├── build.sh │ │ └── stream_handler.py │ └── streams-to-firehose │ │ ├── build.sh │ │ ├── readme.md │ │ └── stream_handler.py ├── readme.md └── provisioning │ └── readme.md ├── NOTICE ├── csv-gremlin └── test-files │ ├── bad-edges-with-sets.csv │ ├── doubles.csv │ ├── header-with-spaces-edge.csv │ ├── header-with-spaces.csv │ ├── edges.csv │ ├── edges-with-bad-header.csv │ ├── edges-with-repeat-ids.csv │ ├── vertices-with-sets.csv │ ├── bad-edges.csv │ ├── bad-vertices.csv │ ├── vertices-with-bools.csv │ ├── vertices-with-repeat-ids.csv │ ├── dates.csv │ ├── vertices-quotes.csv │ ├── vertices.csv │ └── vertices-with-bad-header.csv ├── export-neptune-to-elasticsearch ├── lambda │ ├── .Python │ ├── requirements.txt │ ├── build.sh │ ├── README.md │ ├── export_neptune_to_kinesis.py │ └── kinesis_to_elasticsearch.py ├── NOTICE └── export-neptune-to-elasticsearch.png ├── neo4j-to-neptune ├── bin │ └── neo4j-to-neptune.sh ├── docs │ ├── example-bulk-load-config.yaml │ ├── example-conversion-config.yaml │ └── bulk-load-config.md └── src │ ├── main │ └── java │ │ └── com │ │ └── amazonaws │ │ └── services │ │ └── neptune │ │ ├── metadata │ │ ├── Header.java │ │ ├── PropertyValueParserPolicy.java │ │ ├── DateTimeUtils.java │ │ ├── Headers.java │ │ ├── PropertyValue.java │ │ ├── Property.java │ │ ├── Token.java │ │ ├── MultiValuedRelationshipPropertyPolicy.java │ │ ├── MultiValuedNodePropertyPolicy.java │ │ ├── PropertyValueParser.java │ │ ├── DataType.java │ │ └── ConversionConfig.java │ │ ├── util │ │ ├── Timer.java │ │ ├── Utils.java │ │ └── CSVUtils.java │ │ ├── Neo4jToNeptuneCli.java │ │ └── io │ │ ├── RawCsvPrinter.java │ │ ├── Directories.java │ │ └── OutputFile.java │ └── test │ └── java │ └── com │ └── amazonaws │ └── services │ └── neptune │ ├── metadata │ ├── MultiValuedRelationshipPropertyPolicyTest.java │ ├── PropertyValueParserTest.java │ └── MultiValuedNodePropertyPolicyTest.java │ └── util │ └── UtilsTest.java ├── neptune-serverless-evaluator ├── requirements.txt ├── LICENSE └── README.md ├── dynamic-custom-endpoints └── lambda │ ├── build.sh │ └── dynamic-custom-endpoints │ └── build.sh ├── .github ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── neo4j-to-neptune-ci.yml ├── CODE_OF_CONDUCT.md ├── neptune-export └── readme.md ├── neptune-python-utils ├── setup.py ├── neptune_python_utils │ ├── __init__.py │ ├── glue_gremlin_csv_transforms.py │ ├── glue_neptune_connection_info.py │ ├── glue_gremlin_client.py │ └── bulkload.py ├── build.sh └── build-lambda-layer.sh ├── glue-neptune ├── build.sh ├── glue_neptune │ ├── __init__.py │ ├── NeptuneConnectionInfo.py │ ├── GremlinCsvTransforms.py │ └── NeptuneGremlinClient.py └── readme.md ├── neptune-gremlin-client └── readme.md ├── opencypher-compatability-checker ├── input.json ├── output.json └── README.md ├── graphml2csv └── README.md ├── README.md ├── release.sh └── CONTRIBUTING.md /VERSION: -------------------------------------------------------------------------------- 1 | 12 2 | -------------------------------------------------------------------------------- /neptune-gremlin-js/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | artifacts/ 3 | **/target/ 4 | pom.xml.versionsBackup 5 | -------------------------------------------------------------------------------- /csv-to-neptune-bulk-format/.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | __pycache__ 3 | .data 4 | .vscode 5 | -------------------------------------------------------------------------------- /neptune-gremlin-js/.npmignore: -------------------------------------------------------------------------------- 1 | cdk-test-app 2 | test 3 | .eslintrc.js 4 | .gitignore 5 | 6 | -------------------------------------------------------------------------------- /neptune-streams-utils/examples/java8/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | mvn clean compile install -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Amazon Neptune Tools 2 | Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | -------------------------------------------------------------------------------- /neptune-gremlin-js/cdk-test-app/.npmignore: -------------------------------------------------------------------------------- 1 | # CDK asset staging directory 2 | .cdk.staging 3 | cdk.out 4 | -------------------------------------------------------------------------------- /neptune-gremlin-js/cdk-test-app/jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | testEnvironment: "node", 3 | } 4 | -------------------------------------------------------------------------------- /csv-gremlin/test-files/bad-edges-with-sets.csv: -------------------------------------------------------------------------------- 1 | ~id,~label,~from,~to,my-set:Int[] 2 | ms-001,SetTest,p1,p2,1;2;3;4;5;6 3 | -------------------------------------------------------------------------------- /csv-gremlin/test-files/doubles.csv: -------------------------------------------------------------------------------- 1 | ~id,~label,value:double,set:double[],optional 2 | abc-1,person,23.6,12.34;56.789, 3 | -------------------------------------------------------------------------------- /export-neptune-to-elasticsearch/lambda/.Python: -------------------------------------------------------------------------------- 1 | /usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/Python -------------------------------------------------------------------------------- /csv-gremlin/test-files/header-with-spaces-edge.csv: -------------------------------------------------------------------------------- 1 | ~id, ~label, ~from, ~to 2 | e1, likes, a1, a2 3 | e2, " likes ", a1, a2 4 | -------------------------------------------------------------------------------- /csv-gremlin/test-files/header-with-spaces.csv: -------------------------------------------------------------------------------- 1 | ~id, ~label, type 2 | a1, animal, cat 3 | a2, " animal ", cat 4 | a3, animal , cat 5 | -------------------------------------------------------------------------------- /csv-gremlin/test-files/edges.csv: -------------------------------------------------------------------------------- 1 | ~id,~label,~from,~to,confidence:Double 2 | e-1,Knows,p-1,p-2, 3 | e-2,Knows,p-2,p-3,0.5 4 | e-3,Knows,p-2,p-4,5 5 | -------------------------------------------------------------------------------- /export-neptune-to-elasticsearch/NOTICE: -------------------------------------------------------------------------------- 1 | export-neptune-to-elasticsearch 2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. -------------------------------------------------------------------------------- /neo4j-to-neptune/bin/neo4j-to-neptune.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | jar=$(find . -name neo4j-to-neptune.jar -print -quit) 4 | java -jar ${jar} "$@" 5 | -------------------------------------------------------------------------------- /neptune-serverless-evaluator/requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.11.1 2 | boto3==1.22.8 3 | python_dateutil==2.8.2 4 | numpy==1.22.0 5 | requests==2.32.2 -------------------------------------------------------------------------------- /csv-gremlin/test-files/edges-with-bad-header.csv: -------------------------------------------------------------------------------- 1 | ~id,~label,~from,target,confidence:Double 2 | e-1,Knows,p-1,p-2, 3 | e-2,Knows,p-2,p-3,0.5 4 | e-3,Knows,p-2,p-4,5 5 | -------------------------------------------------------------------------------- /neptune-gremlin-js/cdk-test-app/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | 3 | # CDK asset staging directory 4 | .cdk.staging 5 | cdk.out 6 | lambda/neptune-gremlin.js 7 | invoke-test.sh -------------------------------------------------------------------------------- /csv-gremlin/test-files/edges-with-repeat-ids.csv: -------------------------------------------------------------------------------- 1 | ~id,~label,~from,~to 2 | e1,Knows,p1,p2 3 | e2,Knows,p2,p3 4 | e3,Knows,p3,p4 5 | e2,Knows,p5,p6 6 | e1,Knows,p6,p7 7 | -------------------------------------------------------------------------------- /csv-to-neptune-bulk-format/.images/Solution-CSVConverter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/amazon-neptune-tools/HEAD/csv-to-neptune-bulk-format/.images/Solution-CSVConverter.png -------------------------------------------------------------------------------- /dynamic-custom-endpoints/lambda/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | rm -rf target 4 | mkdir target 5 | 6 | pushd dynamic-custom-endpoints 7 | sh build.sh 8 | popd 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /export-neptune-to-elasticsearch/export-neptune-to-elasticsearch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/amazon-neptune-tools/HEAD/export-neptune-to-elasticsearch/export-neptune-to-elasticsearch.png -------------------------------------------------------------------------------- /csv-gremlin/test-files/vertices-with-sets.csv: -------------------------------------------------------------------------------- 1 | ~id,~label,simple,single:String,my-set:Int[],dates:Date[],oneInt:Int,oneFloat:Double,flag:Bool 2 | ms-001,SetTest,Hello,World,1;2;3;4;5;6,2020-11-17;2020-11-18,25,50,true 3 | -------------------------------------------------------------------------------- /neptune-gremlin-js/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # 0.0.5 (2022-02-09) 2 | 3 | Added a license header to `neptune-gremlin.js` and a link to a sample app in the Readme. 4 | Added dev dependencies for `retire.js`. No functional changes. 5 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | *Issue #, if available:* 2 | 3 | *Description of changes:* 4 | 5 | 6 | By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. 7 | -------------------------------------------------------------------------------- /csv-gremlin/test-files/bad-edges.csv: -------------------------------------------------------------------------------- 1 | ~id,~label,~from,~to,since:Date,weight:Float 2 | 1,test,p1,p3,2020-11-25,5, 3 | 1,test,p2,p4,3.2 4 | 1,test,p2,p4 5 | 1,test,p2,p4 6 | 1,test,,p4 7 | 1,test,p2, 8 | 1,test,p2,p4 9 | -------------------------------------------------------------------------------- /csv-gremlin/test-files/bad-vertices.csv: -------------------------------------------------------------------------------- 1 | ~id,~label,value:Int,date:Date,score:Double,str 2 | p1,Person,1,2020-13-25,X 3 | p2,Person,A,, 4 | ,Person,2,, 5 | ,Person,2, 6 | p5,Person,2,X,1.0 7 | p6,Person,1,2020-02-30,5,Hello 8 | -------------------------------------------------------------------------------- /export-neptune-to-elasticsearch/lambda/requirements.txt: -------------------------------------------------------------------------------- 1 | protobuf==3.18.3 2 | six==1.16.0 3 | elasticsearch==6.4.0 4 | rdflib==6.2.0 5 | retrying 6 | requests 7 | requests_aws4auth 8 | cachetools 9 | certifi 10 | aws-kinesis-agg==1.1.7 11 | -------------------------------------------------------------------------------- /csv-gremlin/test-files/vertices-with-bools.csv: -------------------------------------------------------------------------------- 1 | ~id,~label,b1:Bool,b2:Bool,b3:Bool,b4:Bool,b5:Bool,b6:Boolean,b7:Bool 2 | pid-001,Person,true,True,TRUE,false,abc,123,tRuE 3 | pid-002,Person,true,True,TRUE,false,abc,456,TRue 4 | pid-003,Person,1,0,1,0,1,0,1 5 | -------------------------------------------------------------------------------- /neptune-streams-utils/examples/java8/readme.md: -------------------------------------------------------------------------------- 1 | # Java handler dependencies 2 | 3 | This Java handler depends on two libraries that are not currently available in Maven. Before building the Java hander, run the _install-dependencies.sh_ script to install the libraries in your local Maven repository. 4 | -------------------------------------------------------------------------------- /csv-gremlin/test-files/vertices-with-repeat-ids.csv: -------------------------------------------------------------------------------- 1 | ~id,~label,score:Int[],extra 2 | p1,Player,55,XYZ 3 | p1,Player,58,DEF 4 | p1,Player,43, 5 | p2,Player,43, 6 | p3,Player,67, 7 | p1,Player,88,HIJ 8 | p1,Player,31, 9 | p1,Player,90;12, 10 | p2,Player,55, 11 | p3,Player,18, 12 | p4,Player,81,ABC 13 | p5,Player,6, 14 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /neptune-export/readme.md: -------------------------------------------------------------------------------- 1 | # Deprecation Notice 2 | 3 | Neptune Export has been migrated to a [new standalone repository](https://github.com/aws/neptune-export). 4 | Ongoing development and releases will take place in the new repository, and this module here will no longer be maintained. 5 | Any export tool related issues should be reported in the Issues section under the new repository. 6 | -------------------------------------------------------------------------------- /export-neptune-to-elasticsearch/lambda/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | pip install virtualenv 4 | rm -rf target 5 | rm -rf temp 6 | mkdir target 7 | virtualenv temp --python=python3.8 8 | source temp/bin/activate 9 | pip install -r requirements.txt 10 | cd temp/lib/python3.8/site-packages 11 | cp -r ../../../../*.py . 12 | zip -r ../../../../target/export-neptune-to-elasticsearch.zip ./* 13 | deactivate 14 | cd ../../../../ 15 | rm -rf temp -------------------------------------------------------------------------------- /neptune-python-utils/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='neptune_python_utils', 5 | version='1.0', 6 | description='Python 3 library that simplifies using Gremlin-Python to connect to Amazon Neptune', 7 | author='Ian Robinson', 8 | author_email='ianrob@amazon.co.uk', 9 | packages=['neptune_python_utils'], 10 | install_requires=['gremlinpython', 'requests', 'backoff', 'cchardet', 'aiodns', 'idna-ssl'], 11 | ) -------------------------------------------------------------------------------- /csv-gremlin/test-files/dates.csv: -------------------------------------------------------------------------------- 1 | ~id,~label,date:Date 2 | P1,person,0000-11-21T00:00:00.000Z 3 | P1,person,0000-11-21T000:00:00.000Z 4 | P2,person,2022-12-19T25:00:00.000Z 5 | P3,person,2023-01-04T00:00:00.000Z 6 | P4,person,2023-01-05T12:00:30.000 7 | P5,person,1965-01-24 8 | P6,person,1981-08-31T11:60:00.000 9 | P7,person,2001-06-14T11:12:61.000 10 | P8,person,2001-06-14T11:12:xy.000 11 | P9,person,2011-xy-14T11:12:00.000 12 | P10,person,1901-01-01T11:12:00.123 13 | -------------------------------------------------------------------------------- /csv-gremlin/test-files/vertices-quotes.csv: -------------------------------------------------------------------------------- 1 | ~id,text,moreText:String 2 | No quotes,No quotes,No quotes 3 | 'Single quotes','Single quotes','Single quotes' 4 | "Double quotes","Double quotes","Double quotes" 5 | Apostrophe',Apostrophe',Apostrophe' 6 | "Apostrophe2'","Apostrophe2'","Apostrophe2'" 7 | Dollar$,Dollar$,Dollar$ 8 | "Dollar2$","Dollar$","Dollar$" 9 | Escaped,The \"near\" future,The \"near\" future 10 | Escaped2,"The \"near\" future","The \"near\" future" 11 | -------------------------------------------------------------------------------- /glue-neptune/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | pushd . 4 | pip install virtualenv 5 | rm -rf target 6 | rm -rf temp 7 | mkdir target 8 | virtualenv temp --python=python2.7 9 | source temp/bin/activate 10 | cd temp 11 | pip install gremlinpython 12 | cd lib/python2.7/site-packages 13 | rm -rf certifi 14 | rm -rf certifi-* 15 | cp -r ../../../../glue_neptune . 16 | zip -r glue_neptune.zip * 17 | mv glue_neptune.zip ../../../../target/glue_neptune.zip 18 | deactivate 19 | popd 20 | rm -rf temp -------------------------------------------------------------------------------- /csv-gremlin/test-files/vertices.csv: -------------------------------------------------------------------------------- 1 | ~id,~label,firstName,lastName,dob:Date,grade:Double,position:Int 2 | p-1,Person,Tommy,Frazier,1949-07-03,86.5,1 3 | p-2,Person,Angelyn,Crooks,1978-11-16,77.4,2 4 | p-3,Person,Travis,Tucker,2006-01-14,66.9,3 5 | p-4,Person,Nigel,Smith,2006-01-14,66.9,3 6 | p-5,Person,Ian,York,2006-01-14,,3 7 | p-6,Person,Jon,Wilson,2006-01-14,, 8 | p-7,Person,Jane,,2006-01-14,, 9 | p-8,Person,Frank,Jennings,2001-08-17,, 10 | p-9,Person,Rod,Arthurs,1965-02-01T09:01:30Z,88,1 11 | p-10,Person,Albert,Newyear,1999-12-31T23:59:59-0500,1,1 12 | -------------------------------------------------------------------------------- /dynamic-custom-endpoints/lambda/dynamic-custom-endpoints/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | pip install virtualenv 4 | rm -rf target 5 | rm -rf temp 6 | mkdir target 7 | virtualenv temp --python=python3.9 8 | source temp/bin/activate 9 | pushd temp 10 | cd lib/python3.9/site-packages 11 | rm -rf certifi-* 12 | rm -rf easy_install.py 13 | cp -r ../../../../*.py . 14 | zip -r dynamic_custom_endpoints.zip *.py -x "_virtualenv.py" 15 | mv dynamic_custom_endpoints.zip ../../../../../target/dynamic_custom_endpoints.zip 16 | deactivate 17 | popd 18 | rm -rf temp 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /csv-gremlin/test-files/vertices-with-bad-header.csv: -------------------------------------------------------------------------------- 1 | id,~label,firstName,lastName,dob:Date,grade:Double,position:Int 2 | p-1,Person,Tommy,Frazier,1949-07-03,86.5,1 3 | p-2,Person,Angelyn,Crooks,1978-11-16,77.4,2 4 | p-3,Person,Travis,Tucker,2006-01-14,66.9,3 5 | p-4,Person,Nigel,Smith,2006-01-14,66.9,3 6 | p-5,Person,Ian,York,2006-01-14,,3 7 | p-6,Person,Jon,Wilson,2006-01-14,, 8 | p-7,Person,Jane,,2006-01-14,, 9 | p-8,Person,Frank,Jennings,2001-08-17,, 10 | p-9,Person,Rod,Arthurs,1965-02-01T09:01:30Z,88,1 11 | p-10,Person,Albert,Newyear,1999-12-31T23:59:59-0500,1,1 12 | -------------------------------------------------------------------------------- /neptune-gremlin-js/cdk-test-app/README.md: -------------------------------------------------------------------------------- 1 | # Welcome to your CDK JavaScript project! 2 | 3 | This is a blank project for JavaScript development with CDK. 4 | 5 | The `cdk.json` file tells the CDK Toolkit how to execute your app. The build step is not required when using JavaScript. 6 | 7 | ## Useful commands 8 | 9 | * `npm run test` perform the jest unit tests 10 | * `cdk deploy` deploy this stack to your default AWS account/region 11 | * `cdk diff` compare deployed stack with current state 12 | * `cdk synth` emits the synthesized CloudFormation template 13 | -------------------------------------------------------------------------------- /neptune-gremlin-js/cdk-test-app/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cdk-test-app", 3 | "version": "0.2.0", 4 | "bin": { 5 | "cdk-test-app": "bin/cdk-test-app.js" 6 | }, 7 | "scripts": { 8 | "build": "npm i && npm run synth && cd lambda && npm i && cd ..", 9 | "cdk": "cdk", 10 | "synth": "cdk synth", 11 | "test": "jest" 12 | }, 13 | "devDependencies": { 14 | "aws-cdk": "^2.1005.0", 15 | "jest": "^29.7.0" 16 | }, 17 | "dependencies": { 18 | "@aws-cdk/aws-neptune-alpha": "^2.185.0-alpha.0", 19 | "aws-cdk-lib": "2.189.1", 20 | "constructs": "^10.0.0" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /glue-neptune/glue_neptune/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. 2 | # All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). 5 | # You may not use this file except in compliance with the License. 6 | # A copy of the License is located at 7 | # 8 | # http://aws.amazon.com/apache2.0/ 9 | # 10 | # or in the "license" file accompanying this file. 11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions 13 | # and limitations under the License. 14 | 15 | __author__ = 'Ian Robinson (ianrob@amazon.com)' 16 | -------------------------------------------------------------------------------- /neptune-gremlin-js/cdk-test-app/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "node bin/cdk-test-app.js", 3 | "watch": { 4 | "include": [ 5 | "**" 6 | ], 7 | "exclude": [ 8 | "README.md", 9 | "cdk*.json", 10 | "jest.config.js", 11 | "package*.json", 12 | "yarn.lock", 13 | "node_modules", 14 | "test" 15 | ] 16 | }, 17 | "context": { 18 | "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true, 19 | "@aws-cdk/core:stackRelativeExports": true, 20 | "@aws-cdk/aws-rds:lowercaseDbIdentifier": true, 21 | "@aws-cdk/aws-lambda:recognizeVersionProps": true, 22 | "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /neptune-python-utils/neptune_python_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. 2 | # All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). 5 | # You may not use this file except in compliance with the License. 6 | # A copy of the License is located at 7 | # 8 | # http://aws.amazon.com/apache2.0/ 9 | # 10 | # or in the "license" file accompanying this file. 11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions 13 | # and limitations under the License. 14 | 15 | __author__ = 'Ian Robinson (ianrob@amazon.com)' -------------------------------------------------------------------------------- /neptune-gremlin-js/cdk-test-app/lambda/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "lambda", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "integration-test.js", 6 | "scripts": { 7 | "test": "jest" 8 | }, 9 | "author": "", 10 | "license": "ISC", 11 | "dependencies": { 12 | "@aws-crypto/sha256-js": "^2.0.1", 13 | "@aws-sdk/signature-v4": "^3.110.0", 14 | "async": "^3.2.1", 15 | "aws4": "^1.11.0", 16 | "axios": "^1.8.2", 17 | "gremlin": "^3.5.1", 18 | "jsonwebtoken": "^9.0.0", 19 | "jwk-to-pem": "^2.0.5", 20 | "qs": "^6.10.1", 21 | "util": "^0.12.4", 22 | "uuid": "^8.3.2" 23 | }, 24 | "devDependencies": { 25 | "aws-sdk": "^2.1025.0", 26 | "jest": "^29.7.0" 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /neptune-streams-utils/examples/python3.8/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | pushd . 4 | pip install virtualenv 5 | rm -rf target 6 | rm -rf temp 7 | mkdir target 8 | virtualenv temp 9 | source temp/bin/activate 10 | cd temp 11 | #pip install requests 12 | cd lib/python3.8/site-packages 13 | aws s3 cp s3://aws-neptune-customer-samples-us-east-1/neptune-sagemaker/bin/neptune-python-utils/neptune_python_utils.zip . 14 | unzip neptune_python_utils.zip 15 | rm -rf certifi-* 16 | rm -rf easy_install.py 17 | rm -rf six.py 18 | cp -r ../../../../*.py . 19 | zip -r stream_handler.zip ./* -x "*pycache*" -x "*.so" -x "*dist-info*" -x "*.virtualenv" -x "pip*" -x "pkg_resources*" -x "setuptools*" -x "wheel*" -x "certifi*" 20 | mv stream_handler.zip ../../../../target/stream_handler.zip 21 | deactivate 22 | popd 23 | rm -rf temp -------------------------------------------------------------------------------- /neptune-python-utils/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | pushd . 4 | sudo pip install virtualenv 5 | rm -rf target 6 | rm -rf temp 7 | mkdir target 8 | virtualenv temp --python=python3.8 9 | source temp/bin/activate 10 | cd temp 11 | pip install gremlinpython==3.5.1 12 | pip install requests 13 | pip install backoff 14 | pip install cchardet 15 | pip install aiodns 16 | pip install idna-ssl 17 | cd lib/python3.8/site-packages 18 | rm -rf certifi-* 19 | rm -rf easy_install.py 20 | rm -rf six.py 21 | cp -r ../../../../neptune_python_utils . 22 | zip -r neptune_python_utils.zip ./* -x "*pycache*" -x "*.so" -x "*dist-info*" -x "*.virtualenv" -x "pip*" -x "pkg_resources*" -x "setuptools*" -x "wheel*" -x "certifi*" 23 | mv neptune_python_utils.zip ../../../../target/neptune_python_utils.zip 24 | deactivate 25 | popd 26 | rm -rf temp -------------------------------------------------------------------------------- /neo4j-to-neptune/docs/example-bulk-load-config.yaml: -------------------------------------------------------------------------------- 1 | # Example bulk load configuration for Neptune 2 | # This file demonstrates the configuration options for automated bulk loading 3 | # of converted CSV data into Amazon Neptune 4 | 5 | # Required S3 Configuration 6 | bucketName: my-neptune-data-bucket 7 | 8 | # Required Neptune Configuration 9 | neptuneEndpoint: my-cluster.cluster-abc123.us-east-1.neptune.amazonaws.com 10 | 11 | # IAM Configuration 12 | iamRoleArn: arn:aws:iam::123456789012:role/NeptuneLoadFromS3Role 13 | 14 | # Optional S3 Configuration 15 | s3Prefix: neptune 16 | 17 | # Optional Load Performance Configuration 18 | parallelism: OVERSUBSCRIBE # Options: LOW, MEDIUM, HIGH, OVERSUBSCRIBE 19 | 20 | # Optional Monitoring Configuration 21 | monitor: true # Set to false if you want to manually monitor load progress 22 | -------------------------------------------------------------------------------- /neptune-streams-utils/examples/java8/install-dependencies.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | pushd . 4 | rm -rf neptune-streams-layer.zip 5 | rm -rf lib 6 | aws s3 cp s3://aws-neptune-customer-samples/neptune-stream/lambda/java8/neptune-streams-layer.zip . 7 | unzip neptune-streams-layer.zip 8 | mv java/lib/ . 9 | rm -rf java 10 | rm -rf neptune-streams-layer.zip 11 | mvn install:install-file -Dfile=lib/amazon-neptune-streams-replicator-core-1.0.0.jar \ 12 | -DgroupId=com.amazonaws \ 13 | -DartifactId=amazon-neptune-streams-replicator-core \ 14 | -Dversion=1.0.0 \ 15 | -Dpackaging=jar 16 | mvn install:install-file -Dfile=lib/amazon-neptune-streams-replicator-lambda-1.0.0.jar \ 17 | -DgroupId=com.amazonaws \ 18 | -DartifactId=amazon-neptune-streams-replicator-lambda \ 19 | -Dversion=1.0.0 \ 20 | -Dpackaging=jar 21 | popd 22 | -------------------------------------------------------------------------------- /neptune-streams-utils/examples/streams-to-firehose/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | pushd . 4 | pip install virtualenv 5 | rm -rf target 6 | rm -rf temp 7 | mkdir target 8 | virtualenv temp 9 | source temp/bin/activate 10 | cd temp 11 | #pip install requests 12 | cd lib/python3.8/site-packages 13 | aws s3 cp s3://aws-neptune-customer-samples-us-east-1/neptune-sagemaker/bin/neptune-python-utils/neptune_python_utils.zip . 14 | unzip neptune_python_utils.zip 15 | rm -rf certifi-* 16 | rm -rf easy_install.py 17 | rm -rf six.py 18 | cp -r ../../../../*.py . 19 | zip -r neptune_firehose_handler.zip ./* -x "*pycache*" -x "*.so" -x "*dist-info*" -x "*.virtualenv" -x "pip*" -x "pkg_resources*" -x "setuptools*" -x "wheel*" -x "certifi*" 20 | mv neptune_firehose_handler.zip ../../../../target/neptune_firehose_handler.zip 21 | deactivate 22 | popd 23 | rm -rf temp -------------------------------------------------------------------------------- /neptune-gremlin-client/readme.md: -------------------------------------------------------------------------------- 1 | # Deprecation Notice 2 | 3 | The Gremlin Client for Amazon Neptune has been migrated to a [new standalone repository](https://github.com/aws/neptune-gremlin-client). Ongoing development and releases will take place in the new repository, and this module here will no longer be maintained. 4 | 5 | Any Neptune Gremlin Client related issues should be reported in the Issues section under the new repository. 6 | 7 | Version 1.1.0 of the client is the last release of the client from this repository. The new repository is accompanied by a release of version 2.0.0 of the Neptune Gremlin Client. 8 | 9 | See [Migrating from version 1 of the Neptune Gremlin Client](https://github.com/aws/neptune-gremlin-client#migrating-from-version-1-of-the-neptune-gremlin-client) if you are migrating an application from version 1.x.x of the Neptune Gremlin Client to version 2.x.x. 10 | -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/Header.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.metadata; 14 | 15 | public interface Header { 16 | 17 | void updateDataType(DataType newDataType); 18 | 19 | void setIsMultiValued(boolean isMultiValued); 20 | 21 | String value(); 22 | } 23 | -------------------------------------------------------------------------------- /neptune-serverless-evaluator/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /neptune-python-utils/build-lambda-layer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | pushd . 4 | pip install virtualenv 5 | rm -rf target 6 | rm -rf temp 7 | mkdir target 8 | virtualenv temp --python=python3.8 9 | source temp/bin/activate 10 | cd temp 11 | pip install gremlinpython==3.5.1 12 | pip install requests 13 | pip install backoff 14 | pip install cchardet 15 | pip install aiodns 16 | pip install idna-ssl 17 | pushd lib/python3.8/site-packages 18 | #rm -rf certifi-* 19 | rm -rf easy_install.py 20 | rm -rf six.py 21 | cp -r ../../../../neptune_python_utils . 22 | popd 23 | mkdir python 24 | mv lib python/lib 25 | zip -r neptune_python_utils_lambda_layer.zip python \ 26 | -x "*pycache*" \ 27 | -x "*.so" \ 28 | -x "*dist-info*" \ 29 | -x "*.virtualenv" \ 30 | -x "*/pip*" \ 31 | -x "*/pkg_resources*" \ 32 | -x "*/setuptools*" \ 33 | -x "*/wheel*" \ 34 | -x "*distutils*" \ 35 | -x "*/_virtualenv.*" \ 36 | #-x "*/certifi*" 37 | deactivate 38 | popd 39 | mv temp/neptune_python_utils_lambda_layer.zip target/neptune_python_utils_lambda_layer.zip 40 | rm -rf temp -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/PropertyValueParserPolicy.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.metadata; 14 | 15 | import com.fasterxml.jackson.databind.node.ArrayNode; 16 | 17 | public interface PropertyValueParserPolicy { 18 | PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser); 19 | void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser); 20 | } 21 | -------------------------------------------------------------------------------- /neptune-gremlin-js/cdk-test-app/bin/cdk-test-app.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | const cdk = require("aws-cdk-lib") 4 | const { CdkTestAppStack } = require("../lib/cdk-test-app-stack") 5 | 6 | const app = new cdk.App() 7 | new CdkTestAppStack(app, "neptune-gremlin-test", { 8 | /* If you don't specify 'env', this stack will be environment-agnostic. 9 | * Account/Region-dependent features and context lookups will not work, 10 | * but a single synthesized template can be deployed anywhere. */ 11 | 12 | /* Uncomment the next line to specialize this stack for the AWS Account 13 | * and Region that are implied by the current CLI configuration. */ 14 | // env: { account: process.env.CDK_DEFAULT_ACCOUNT, region: process.env.CDK_DEFAULT_REGION }, 15 | 16 | /* Uncomment the next line if you know exactly what Account and Region you 17 | * want to deploy the stack to. */ 18 | // env: { account: '123456789012', region: 'us-east-1' }, 19 | 20 | /* For more information, see https://docs.aws.amazon.com/cdk/latest/guide/environments.html */ 21 | }) 22 | -------------------------------------------------------------------------------- /neptune-gremlin-js/.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | "env": { 3 | "browser": true, 4 | "commonjs": true, 5 | "es2021": true, 6 | "jest": true, 7 | "node": true, 8 | }, 9 | "extends": ["eslint:recommended"], 10 | "parserOptions": { 11 | "ecmaVersion": 12, 12 | }, 13 | "rules": { 14 | "indent": [ 15 | "error", 16 | 4, 17 | { "SwitchCase": 1 }, 18 | ], 19 | "linebreak-style": [ 20 | "error", 21 | "unix", 22 | ], 23 | "quotes": [ 24 | "error", 25 | "double", 26 | ], 27 | "semi": [ 28 | "error", 29 | "never", 30 | ], 31 | "no-new": ["off"], 32 | "comma-dangle": ["error", "always-multiline"], 33 | "padded-blocks": ["off"], 34 | }, 35 | ignorePatterns: [ 36 | "node_modules/", 37 | "cdk.out/", 38 | "vendor/", 39 | "build/", 40 | "dist/", 41 | "plugins.js", 42 | ], 43 | } 44 | -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/util/Timer.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.util; 14 | 15 | public class Timer implements AutoCloseable { 16 | 17 | private final long start = System.currentTimeMillis(); 18 | 19 | @Override 20 | public void close() throws Exception { 21 | System.err.println(); 22 | System.err.println(String.format("Completed in %s second(s)", (System.currentTimeMillis() - start) / 1000)); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /opencypher-compatability-checker/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "targetSystem": "NA", 3 | "queries": [ 4 | { 5 | "id": 1, 6 | "query": "MATCH (n:Person) RETURN n LIMIT 10" 7 | }, 8 | { 9 | "id": 2, 10 | "query": "MATCH (team:Team {name: 'Team C'}) CALL { WITH team MATCH (p:Player)-[:PLAYS_FOR]->(team) WITH collect(p) AS players FOREACH (player IN players | SET player.retired = false) RETURN size(players) AS updatedPlayers } RETURN updatedPlayers" 11 | }, 12 | { 13 | "id": 3, 14 | "query": "MATCH (p:Product)-[:BELONGS_TO]->(c:Category) RETURN p.name, c.name" 15 | }, 16 | { 17 | "id": 4, 18 | "query": "RETURN apoc.coll.intersection([1,2,3,4,5], [3,4,5]) AS output" 19 | }, 20 | { 21 | "id": 5, 22 | "query": "RETURN reduce(product = 1, n IN [1, 2, 3] | product / n)" 23 | }, 24 | { 25 | "id": 6, 26 | "query": "CALL apoc.json.validate('{\"foo\": [{\"baz\": 18446744062065078016838}],\"baz\": 18446744062065078016838}', '$')" 27 | }, 28 | { 29 | "id": 7, 30 | "query": "gggg" 31 | } 32 | ] 33 | } 34 | -------------------------------------------------------------------------------- /neptune-gremlin-js/test/aws-neptune-gremlin.test.js: -------------------------------------------------------------------------------- 1 | const {getHeaders} = require("../neptune-gremlin.js") 2 | 3 | test("getHeaders", async () => { 4 | 5 | const expected = { 6 | Host: "myneptunecluster.us-east-1.neptune.amazonaws.com:8182", 7 | "X-Amz-Security-Token": "................", 8 | "X-Amz-Date": "20211123T191311Z", 9 | Authorization: "AWS4-HMAC-SHA256 Credential=.../20211123/us-east-1/neptune-db/aws4_request, SignedHeaders=host;x-amz-date;x-amz-security-token, Signature=...", 10 | } 11 | 12 | const headers = await getHeaders( 13 | "myneptunecluster.us-east-1.neptune.amazonaws.com", 14 | 8182, 15 | { 16 | accessKey: "...", 17 | secretKey: "...", 18 | sessionToken: "AAAAAA1111111", 19 | region: "us-east-1", 20 | }, 21 | "/gremlin") 22 | 23 | console.log(headers) 24 | 25 | expect(headers.host).toEqual(expected.Host) 26 | expect(headers["x-amz-security-token"]).toBeTruthy() // ? 27 | expect(headers["x-amz-date"].length).toEqual(16) 28 | expect(headers.authorization.indexOf("AWS4-HMAC-SHA256 Credential=")).toEqual(0) 29 | 30 | }) 31 | 32 | -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/DateTimeUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.metadata; 14 | 15 | import org.joda.time.format.DateTimeFormatter; 16 | import org.joda.time.format.ISODateTimeFormat; 17 | 18 | import java.util.Date; 19 | 20 | public class DateTimeUtils { 21 | 22 | private static final DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTimeParser().withZoneUTC(); 23 | 24 | public static Date parseISODate(final String dateStr) { 25 | return dateTimeFormatter.parseDateTime(dateStr).toDate(); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/util/Utils.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.util; 14 | 15 | public class Utils { 16 | private Utils() {} 17 | /** 18 | * Format file size for display 19 | */ 20 | public static String formatFileSize(long bytes) { 21 | if (bytes < 1024) return bytes + " B"; 22 | if (bytes < 1024 * 1024) return String.format("%.1f KB", bytes / 1024.0); 23 | if (bytes < 1024 * 1024 * 1024) return String.format("%.1f MB", bytes / (1024.0 * 1024.0)); 24 | return String.format("%.1f GB", bytes / (1024.0 * 1024.0 * 1024.0)); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /.github/workflows/neo4j-to-neptune-ci.yml: -------------------------------------------------------------------------------- 1 | name: Neo4j-to-Neptune CI 2 | 3 | on: 4 | workflow_dispatch: 5 | pull_request: 6 | paths: 7 | - 'neo4j-to-neptune/**' 8 | branches: 9 | - master 10 | push: 11 | paths: 12 | - 'neo4j-to-neptune/**' 13 | branches: 14 | - master 15 | 16 | jobs: 17 | build-and-test: 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - name: Checkout code 22 | uses: actions/checkout@v4 23 | 24 | - name: Set up JDK 17 25 | uses: actions/setup-java@v4 26 | with: 27 | java-version: '17' 28 | distribution: 'corretto' 29 | 30 | - name: Cache Maven dependencies 31 | uses: actions/cache@v4 32 | with: 33 | path: ~/.m2 34 | key: ${{ runner.os }}-m2-${{ hashFiles('neo4j-to-neptune/pom.xml') }} 35 | restore-keys: ${{ runner.os }}-m2 36 | 37 | - name: Maven install and tests 38 | working-directory: ./neo4j-to-neptune 39 | run: mvn clean install --batch-mode --fail-at-end 40 | 41 | - name: Upload test results 42 | uses: actions/upload-artifact@v4 43 | if: always() 44 | with: 45 | name: test-results 46 | path: neo4j-to-neptune/target/surefire-reports/ 47 | -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/Headers.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.metadata; 14 | 15 | import java.util.ArrayList; 16 | import java.util.List; 17 | import java.util.stream.Collectors; 18 | 19 | class Headers { 20 | 21 | private final List
headers = new ArrayList<>(); 22 | 23 | void add(Header header) { 24 | headers.add(header); 25 | } 26 | 27 | Header get(int index){ 28 | return headers.get(index); 29 | } 30 | 31 | List values(){ 32 | return headers.stream().map(Header::value).collect(Collectors.toList()); 33 | } 34 | 35 | public int count() { 36 | return headers.size(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/PropertyValue.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.metadata; 14 | 15 | class PropertyValue { 16 | 17 | private final String value; 18 | private final boolean isMultiValued; 19 | private final DataType dataType; 20 | 21 | PropertyValue(String value, boolean isMultiValued, DataType dataType) { 22 | this.value = value; 23 | this.isMultiValued = isMultiValued; 24 | this.dataType = dataType; 25 | } 26 | 27 | String value() { 28 | return value; 29 | } 30 | 31 | boolean isMultiValued() { 32 | return isMultiValued; 33 | } 34 | 35 | public DataType dataType() { 36 | return dataType; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /neptune-streams-utils/examples/java8/src/main/java/utils/EnvironmentVariablesUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package utils; 14 | 15 | public class EnvironmentVariablesUtils { 16 | public static String getMandatoryEnv(String name) { 17 | 18 | if (isNullOrEmpty(System.getenv(name))) { 19 | 20 | throw new IllegalStateException(String.format("Missing environment variable: %s", name)); 21 | } 22 | return System.getenv(name); 23 | } 24 | 25 | public static String getOptionalEnv(String name, String defaultValue) { 26 | if (isNullOrEmpty(System.getenv(name))) { 27 | return defaultValue; 28 | } 29 | return System.getenv(name); 30 | } 31 | 32 | private static boolean isNullOrEmpty(String value) { 33 | return value == null || value.isEmpty(); 34 | } 35 | } -------------------------------------------------------------------------------- /export-neptune-to-elasticsearch/lambda/README.md: -------------------------------------------------------------------------------- 1 | # Export Neptune to Elasticsearch Lambda Functions 2 | 3 | This stack deploys two lambda functions: 4 | - export-neptune-to-kinesis-\ 5 | - kinesis-to-elasticsearch-\ 6 | 7 | Both lambda functions leverage the same lambda code package, yet use different handler configurations. The following instructions detail how to build the unified code package for both of these lambda functions. This uses an included `build.sh` script to create a target ZIP file that can be directly pushed to AWS Lambda. 8 | 9 | NOTE: Each lambda function also has dependencies on other Lambda Layers. The Lambda Layers are not part of this code repository and are deployed as part of the base [Neptune Streams Poller stack](https://docs.aws.amazon.com/neptune/latest/userguide/full-text-search-cfn-create.html). 10 | 11 | ## Build 12 | 13 | The entire package can be built using the following: 14 | 15 | `sh build.sh` 16 | 17 | This will create a new `target` directory with the ZIP file package used in both lambda functions. 18 | 19 | To update either lambda function, you can use: 20 | 21 | `aws lambda update-function-code --function-name export-neptune-to-kinesis- --zip-file fileb://./target/export-neptune-to-elasticsearch.zip` 22 | 23 | or 24 | 25 | `aws lambda update-function-code --function-name kinesis-to-elasticsearch- --zip-file fileb://./target/export-neptune-to-elasticsearch.zip` 26 | 27 | -------------------------------------------------------------------------------- /glue-neptune/readme.md: -------------------------------------------------------------------------------- 1 | # glue-neptune 2 | 3 | __Updated Feb 2020: This library is now deprecated in favour of [_neptune-python-utils_](https://github.com/awslabs/amazon-neptune-tools/tree/master/neptune-python-utils)__ 4 | 5 | _glue-neptune_ is a Python library for AWS Glue that helps writing data to Amazon Neptune from Glue jobs. 6 | 7 | With _glue-neptune_ you can: 8 | 9 | - Get Neptune connection information from the Glue Data Catalog 10 | - Create label and node and edge ID columns in DynamicFrames, named in accordance with the Neptune CSV bulk load format for property graphs 11 | - Write from DynamicFrames directly to Neptune 12 | 13 | ## Build 14 | 15 | `sh build.sh` 16 | 17 | This creates a zip file: `target/glue_neptune.zip`. Copy this zip file to an S3 bucket. 18 | 19 | You can then refer to this library from your Glue Development Endpoint or Glue job. See [Using Python Libraries with AWS Glue](https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-libraries.html). 20 | 21 | ## Examples 22 | 23 | See [Migrating from MySQL to Amazon Neptune using AWS Glue](https://github.com/aws-samples/amazon-neptune-samples/tree/master/gremlin/glue-neptune). 24 | 25 | ## Cross Account/Region Datasources 26 | If you have a datasource in a different region and/or different account from Glue and your Neptune database, you can follow the instructions in this [blog](https://aws.amazon.com/blogs/big-data/create-cross-account-and-cross-region-aws-glue-connections/) to allow access. 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /neptune-gremlin-js/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "neptune-gremlin", 3 | "version": "0.0.7", 4 | "description": "An SDK for querying an Amazon Neptune graph database using gremlin", 5 | "main": "neptune-gremlin.js", 6 | "homepage": "https://github.com/awslabs/amazon-neptune-tools/neptune-gremlin-js", 7 | "bugs": { 8 | "url": "https://github.com/awslabs/amazon-neptune-tools/issues" 9 | }, 10 | "repository": { 11 | "type": "git", 12 | "url": "https://github.com/awslabs/amazon-neptune-tools" 13 | }, 14 | "scripts": { 15 | "test": "jest", 16 | "build": "npm i && npm run lint && cp neptune-gremlin.js ./cdk-test-app/lambda && cd cdk-test-app && npm run build && cd .. && npm run test", 17 | "lint": "eslint . --fix" 18 | }, 19 | "keywords": [ 20 | "aws", 21 | "amazon", 22 | "neptune", 23 | "gremlin", 24 | "tinkerpop", 25 | "graph" 26 | ], 27 | "author": "Eric Z. Beard", 28 | "license": "Apache-2.0", 29 | "dependencies": { 30 | "@aws-crypto/sha256-js": "^5.2.0", 31 | "@smithy/signature-v4": "^5.0.1", 32 | "async": "^3.2.2", 33 | "gremlin": "^3.7.2" 34 | }, 35 | "devDependencies": { 36 | "@swc/core": "^1.2.137", 37 | "@swc/wasm": "^1.2.137", 38 | "bufferutil": "^4.0.6", 39 | "canvas": "^2.9.0", 40 | "encoding": "^0.1.13", 41 | "eslint": "^8.4.1", 42 | "jest": "^27.4.3", 43 | "node-notifier": "^10.0.1", 44 | "retire": "^4.2.1", 45 | "ts-node": "^10.5.0", 46 | "utf-8-validate": "^5.0.8" 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /neptune-streams-utils/examples/java8/src/main/java/stream_handler/AbstractStreamHandler.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package stream_handler; 14 | 15 | import com.amazonaws.neptune.StreamRecordsHandler; 16 | import com.amazonaws.neptune.config.CredentialsConfig; 17 | 18 | import java.util.Map; 19 | 20 | public abstract class AbstractStreamHandler implements StreamRecordsHandler { 21 | 22 | protected final String neptuneEndpoint; 23 | protected final Integer neptunePort; 24 | protected final CredentialsConfig credentialsConfig; 25 | protected final Map additionalParams; 26 | 27 | public AbstractStreamHandler(String neptuneEndpoint, Integer neptunePort, CredentialsConfig credentialsConfig, Map additionalParams) { 28 | this.neptuneEndpoint = neptuneEndpoint; 29 | this.neptunePort = neptunePort; 30 | this.credentialsConfig = credentialsConfig; 31 | this.additionalParams = additionalParams; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/Property.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.metadata; 14 | 15 | public class Property implements Header { 16 | 17 | private final String name; 18 | private boolean isMultiValued = false; 19 | private DataType dataType = DataType.None; 20 | 21 | Property(String name) { 22 | this.name = name; 23 | } 24 | 25 | @Override 26 | public void updateDataType(DataType newDataType) { 27 | this.dataType = DataType.getBroadestType(dataType, newDataType); 28 | } 29 | 30 | @Override 31 | public void setIsMultiValued(boolean isMultiValued) { 32 | this.isMultiValued = isMultiValued; 33 | } 34 | 35 | @Override 36 | public String value() { 37 | return isMultiValued ? 38 | String.format("%s%s[]", name, dataType.typeDescription()) : 39 | String.format("%s%s", name, dataType.typeDescription()); 40 | } 41 | 42 | public String getName() { 43 | return name; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/Neo4jToNeptuneCli.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune; 14 | 15 | import com.github.rvesse.airline.annotations.Cli; 16 | import com.github.rvesse.airline.help.Help; 17 | 18 | @Cli(name = "neo4j-to-neptune.sh", 19 | description = "Export data from Neo4j to Neptune", 20 | defaultCommand = Help.class, 21 | commands = { 22 | ConvertCsv.class, 23 | Help.class 24 | }) 25 | public class Neo4jToNeptuneCli { 26 | public static void main(String[] args) { 27 | 28 | com.github.rvesse.airline.Cli cli = new com.github.rvesse.airline.Cli<>(Neo4jToNeptuneCli.class); 29 | 30 | try { 31 | Runnable cmd = cli.parse(args); 32 | cmd.run(); 33 | } catch (Exception e) { 34 | 35 | System.err.println(e.getMessage()); 36 | System.err.println(); 37 | 38 | Runnable cmd = cli.parse("help", args[0]); 39 | cmd.run(); 40 | 41 | System.exit(-1); 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /neptune-streams-utils/examples/streams-to-firehose/readme.md: -------------------------------------------------------------------------------- 1 | #streams-to-firehose 2 | 3 | This Neptune Streams handler publishes Neptune Streams records to an Amazon Kinesis Data Firehose. 4 | 5 | ## Installing 6 | 7 | 1. Build the handler using the _build.sh_ file. 8 | 2. Upload the _neptune_firehose_handler.zip_ file to an S3 bucket. 9 | 3. Create a Kinesis Data Firehose delivery stream called 'neptune-firehose'. 10 | 4. Create an IAM policy called 'neptune-firehose-handler-policy' using the snippet below, replacing the `` and `` placeholders. 11 | 5. Provision the handler using the _provision_neptune_streams_handler.py_ script in the _provisioning_ folder. Ensure you supply the correct delivery stream name and IAM policy ARN. 12 | 13 | 14 | ### Example IAM policy 15 | 16 | ``` 17 | { 18 | "Version": "2012-10-17", 19 | "Statement": [ 20 | { 21 | "Effect": "Allow", 22 | "Action": "firehose:PutRecordBatch", 23 | "Resource": "arn:aws:firehose:::deliverystream/neptune-firehose" 24 | } 25 | ] 26 | } 27 | ``` 28 | 29 | ### Example provisioning command 30 | 31 | Here's an example of using the script to install the handler (after having built the handler and uploading it in S3): 32 | 33 | ``` 34 | python provision_neptune_streams_handler.py \ 35 | --cluster_id=neptunedbcluster-abcdefghijkl \ 36 | --handler_s3_bucket=my-bucket \ 37 | --handler_s3_key=neptune_firehose_handler.zip \ 38 | --additional_params='{"delivery_stream_name": "neptune-firehose"}' \ 39 | --managed_policy_arns='["arn:aws:iam::123456789:policy/neptune-firehose-handler-policy"]' \ 40 | --region=us-east-1 41 | ``` 42 | -------------------------------------------------------------------------------- /neo4j-to-neptune/docs/example-conversion-config.yaml: -------------------------------------------------------------------------------- 1 | # Example label mapping and filtering configuration for Neo4j to Neptune conversion 2 | # This file demonstrates how to map vertex and edge labels and skip certain records during conversion 3 | 4 | # Vertex id transformation configuration 5 | vertexIdTransformation: 6 | ~id: "{_labels}_{born}_{name}_{releases}_{tagline}_{title}_{_id}" 7 | 8 | # Edge id transformation configuration 9 | # You can use {_from} or {~from} to reference the transformed source vertex ID 10 | # You can use {_to} or {~to} to reference the transformed target vertex ID 11 | edgeIdTransformation: 12 | ~id: "{_start}!{_end}!{_type}!{~from}!{~to}!{~label}!{_id}" 13 | 14 | # Vertex label mappings 15 | # Format: OldLabel: NewLabel 16 | vertexLabels: 17 | Person: Individual 18 | Company: Organization 19 | Product: Item 20 | Location: Place 21 | User: Customer 22 | 23 | # Edge label mappings 24 | # Format: OLD_RELATIONSHIP_TYPE: NEW_RELATIONSHIP_TYPE 25 | edgeLabels: 26 | WORKS_FOR: EMPLOYED_BY 27 | LIVES_IN: RESIDES_IN 28 | OWNS: POSSESSES 29 | KNOWS: CONNECTED_TO 30 | PURCHASED: BOUGHT 31 | MANAGES: SUPERVISES 32 | 33 | # Skip vertices configuration 34 | skipVertices: 35 | # Skip vertices by their specific IDs 36 | byId: 37 | - "vertex_123" 38 | - "vertex_456" 39 | - "user_999" 40 | 41 | # Skip vertices by their labels (any vertex with these labels will be skipped) 42 | byLabel: 43 | - "TestData" 44 | - "Deprecated" 45 | - "TempNode" 46 | 47 | # Skip edges configuration 48 | skipEdges: 49 | # Skip edges by their relationship types 50 | byLabel: 51 | - "TEMP_RELATIONSHIP" 52 | - "DEBUG_LINK" 53 | - "OLD_CONNECTION" 54 | -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/Token.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.metadata; 14 | 15 | public class Token implements Header { 16 | 17 | static final Token NEO4J_ID = new Token("_id"); 18 | static final Token NEO4J_LABELS = new Token("_labels"); 19 | static final Token NEO4J_START = new Token("_start"); 20 | static final Token NEO4J_END = new Token("_end"); 21 | static final Token NEO4J_TYPE = new Token("_type"); 22 | 23 | static final Token GREMLIN_ID = new Token("~id"); 24 | static final Token GREMLIN_LABEL = new Token("~label"); 25 | static final Token GREMLIN_FROM = new Token("~from"); 26 | static final Token GREMLIN_TO = new Token("~to"); 27 | 28 | private final String name; 29 | 30 | private Token(String name) { 31 | this.name = name; 32 | } 33 | 34 | @Override 35 | public void updateDataType(DataType newDataType) { 36 | // Do nothing 37 | } 38 | 39 | @Override 40 | public void setIsMultiValued(boolean isMultiValued) { 41 | // Do nothing 42 | } 43 | 44 | @Override 45 | public String value() { 46 | return name; 47 | } 48 | 49 | public static String valueWithCurlyBraces(Token token) { 50 | return "{" + token.value() + "}"; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /graphml2csv/README.md: -------------------------------------------------------------------------------- 1 | # GraphML 2 Neptune CSV 2 | 3 | This Python script provides a utility to convert GraphML files into the CSV format that is used by Amazon Neptune for [Bulk Loading](https://docs.aws.amazon.com/neptune/latest/userguide/bulk-load-tutorial-format-gremlin.html). This script is compatible with Python2 and Python3. 4 | 5 | ## Usage 6 | 7 | ``` 8 | Usage: graphml2csv.py [options] 9 | 10 | Copyright 2018 Amazon.com, Inc. or its affiliates. 11 | Licensed under the Apache License 2.0 http://aws.amazon.com/apache2.0/ 12 | 13 | Options: 14 | --version show program's version number and exit 15 | -h, --help show this help message and exit 16 | -i FILE, --in=FILE set input path [default: none] 17 | -d DELIMITER, --delimiter=DELIMITER 18 | Set the output file delimiter [default: ,] 19 | -e ENCODING, --encoding=ENCODING 20 | Set the input file encoding [default: utf-8] 21 | 22 | A utility python script to convert GraphML files into the Amazon Neptune CSV 23 | format for bulk ingestion. See 24 | https://docs.aws.amazon.com/neptune/latest/userguide/bulk-load-tutorial-format-gremlin.html. 25 | ``` 26 | 27 | ## Example Using the Tinkerpop modern graph. 28 | 29 | Download the tinkerpop-modern.xml graphml file. 30 | 31 | ``` 32 | $ curl https://raw.githubusercontent.com/apache/tinkerpop/master/data/tinkerpop-modern.xml -o tinkerpop-modern.xml 33 | ``` 34 | 35 | Execute the Python script to produce two csv files: nodes and edges. 36 | 37 | ``` 38 | $ ./graphml2csv.py -i tinkerpop-modern.xml 39 | infile = tinkerpop-modern.xml 40 | Processing tinkerpop-modern.xml 41 | Wrote 6 nodes and 18 attributes to tinkerpop-modern-nodes.csv. 42 | Wrote 6 edges and 12 attributes to tinkerpop-modern-edges.csv. 43 | ``` 44 | 45 | Upload the csv files into your S3 bucket and [bulk load](https://docs.aws.amazon.com/neptune/latest/userguide/bulk-load.html) into Neptune. 46 | -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/util/CSVUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.util; 14 | 15 | import org.apache.commons.csv.CSVFormat; 16 | import org.apache.commons.csv.CSVParser; 17 | import org.apache.commons.csv.CSVRecord; 18 | 19 | import java.io.File; 20 | import java.io.IOException; 21 | import java.nio.charset.StandardCharsets; 22 | import java.nio.file.Path; 23 | import java.util.List; 24 | 25 | public class CSVUtils { 26 | private CSVUtils() {} 27 | 28 | public static CSVParser newParser(File file) throws IOException { 29 | return newParser(file.toPath()); 30 | } 31 | 32 | public static CSVParser newParser(Path filePath) throws IOException { 33 | return CSVParser.parse(filePath, StandardCharsets.UTF_8, CSVFormat.DEFAULT); 34 | } 35 | 36 | public static CSVRecord firstRecord(String s) { 37 | try { 38 | CSVParser parser = CSVParser.parse(s, CSVFormat.DEFAULT); 39 | List records = parser.getRecords(); 40 | if (records.isEmpty()) { 41 | throw new IllegalArgumentException("Unable to find first record: " + s); 42 | } 43 | return records.get(0); 44 | } catch (IOException e) { 45 | throw new IllegalArgumentException("Unable to find first record: " + s); 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /neo4j-to-neptune/src/test/java/com/amazonaws/services/neptune/metadata/MultiValuedRelationshipPropertyPolicyTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.metadata; 14 | 15 | import org.junit.Test; 16 | 17 | import static org.junit.Assert.*; 18 | 19 | public class MultiValuedRelationshipPropertyPolicyTest { 20 | 21 | @Test 22 | public void shouldReturnStringPropertyValueIfPolicyIsLeaveAsString() { 23 | 24 | String value = "[\"toy\",\"electronics\",\"gifts\"]"; 25 | 26 | PropertyValueParser parser = new PropertyValueParser(MultiValuedRelationshipPropertyPolicy.LeaveAsString, "", false); 27 | PropertyValue propertyValue = parser.parse(value); 28 | 29 | assertEquals("\"[\"\"toy\"\",\"\"electronics\"\",\"\"gifts\"\"]\"", propertyValue.value()); 30 | assertFalse(propertyValue.isMultiValued()); 31 | } 32 | 33 | @Test 34 | public void shouldThrowExceptionIfPolicyIsHalt() { 35 | String value = "[\"toy\",\"electronics\",\"gifts\"]"; 36 | 37 | PropertyValueParser parser = new PropertyValueParser(MultiValuedRelationshipPropertyPolicy.Halt, "", false); 38 | 39 | try { 40 | parser.parse(value); 41 | fail(); 42 | } catch (RuntimeException e) { 43 | assertEquals("Halt: found multivalued relationship property value", e.getMessage()); 44 | 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/MultiValuedRelationshipPropertyPolicy.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.metadata; 14 | 15 | import com.fasterxml.jackson.databind.node.ArrayNode; 16 | 17 | public enum MultiValuedRelationshipPropertyPolicy implements PropertyValueParserPolicy { 18 | 19 | LeaveAsString { 20 | @Override 21 | public PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser) { 22 | return parser.stringValue(s); 23 | } 24 | 25 | @Override 26 | public void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser) { 27 | // Do nothing 28 | } 29 | }, 30 | Halt { 31 | @Override 32 | public PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser) { 33 | throw new RuntimeException("Halt: found multivalued relationship property value"); 34 | } 35 | 36 | @Override 37 | public void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser) { 38 | // Do nothing 39 | } 40 | }; 41 | 42 | @Override 43 | public abstract PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser); 44 | 45 | @Override 46 | public abstract void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser); 47 | } 48 | -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/io/RawCsvPrinter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.io; 14 | 15 | import java.io.FileWriter; 16 | import java.io.Flushable; 17 | import java.io.IOException; 18 | import java.io.PrintWriter; 19 | import java.nio.file.Path; 20 | 21 | public class RawCsvPrinter implements Flushable, AutoCloseable { 22 | 23 | static RawCsvPrinter newPrinter(Path filePath, boolean append) throws IOException { 24 | return new RawCsvPrinter(filePath, append); 25 | } 26 | 27 | static RawCsvPrinter newPrinter(Path filePath) throws IOException { 28 | return newPrinter(filePath, false); 29 | } 30 | 31 | private final PrintWriter printer; 32 | 33 | private RawCsvPrinter(Path filePath, boolean append) throws IOException { 34 | this.printer = new PrintWriter(new FileWriter(filePath.toFile(), append)); 35 | } 36 | 37 | void printRecord(Iterable values){ 38 | printer.write(String.join(",", values)); 39 | printer.write(System.lineSeparator()); 40 | } 41 | 42 | void printRecord(String value){ 43 | printer.write(value); 44 | printer.write(System.lineSeparator()); 45 | } 46 | 47 | @Override 48 | public void flush() throws IOException { 49 | printer.flush(); 50 | } 51 | 52 | @Override 53 | public void close() throws IOException { 54 | printer.close(); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/io/Directories.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.io; 14 | 15 | import java.io.File; 16 | import java.io.IOException; 17 | import java.nio.file.Files; 18 | import java.nio.file.Path; 19 | 20 | public class Directories { 21 | 22 | public static Directories createFor(File root) throws IOException { 23 | if (root == null) { 24 | throw new IllegalArgumentException("You must supply a directory"); 25 | } 26 | 27 | String directoryName = String.valueOf(System.currentTimeMillis()); 28 | Path rootDirectory = root.toPath(); 29 | 30 | Path directory = rootDirectory.resolve(directoryName); 31 | Files.createDirectories(directory); 32 | 33 | return new Directories(directory); 34 | } 35 | 36 | private final Path directory; 37 | 38 | private Directories(Path directory) { 39 | this.directory = directory; 40 | } 41 | 42 | public Path outputDirectory() { 43 | return directory; 44 | } 45 | 46 | Path createFilePath(String name) { 47 | return createFilePath(name, null); 48 | } 49 | 50 | Path createFilePath(String name, Object index) { 51 | 52 | String filename = index == null ? 53 | String.format("%s.csv", name) : 54 | String.format("%s-%s.csv", name, index); 55 | 56 | return directory.resolve(filename); 57 | } 58 | } 59 | 60 | -------------------------------------------------------------------------------- /neptune-serverless-evaluator/README.md: -------------------------------------------------------------------------------- 1 | ## Neptune Serverless Cost Evaluator 2 | Neptune offers On-Demand provisioned instances and serverless instances which to accommodate variety of scaling up or down needs. Choosing between 2 modes is often a decision of cost and requires understanding access patterns. I will walk through some of the decision factors for new workloads and also show how you can use cloudwatch logs to check if current workload on Neptune Serverless instance will be cheaper on Neptune Provisioned instances. 3 | 4 | ### Minimum IAM policies required 5 | * AmazonRDSReadOnlyAccess 6 | * AWSPriceListServiceFullAccess 7 | 8 | 9 | ### How to run the script 10 | * Configure AWS Cli [https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html] 11 | * Install python3.7 or above 12 | * install pip3 13 | * Clone this repository to the directory 14 | * Install required libraries 15 | 16 | 17 | ``` 18 | pip3 install requirements.txt 19 | ``` 20 | 21 | 22 | Parameter names: 23 | | Parameter | Details | Default | 24 | | ------------- |:-------------:| -----:| -----: | 25 | | -n, --name | Neptune instance name | | 26 | | -r, --region | Region name for instance | | 27 | | -p, --period | Number of days datapoints collected from cloudwatch | 14 | 28 | 29 | 30 | Example of checking if OnDemand provisioned instances are cheaper than running Neptune serverless: 31 | 32 | ``` 33 | python pricing.py -n database-2-instance-1 -p 1 -r us-east-1 34 | 35 | Region : us-east-1 36 | Instance name : database-2-instance-1 37 | Instance type : db.serverless 38 | Data collection period : 1 days 39 | Total cost of running serverless for last 1 days : $4.1 40 | Minimum NCU utilization : 1.0 ,Equivalent OnDemand Instance Costs: (db.r6g.large) $6.903 41 | Maximum NCU utilization : 5.0 ,Equivalent OnDemand Instance Costs: (db.r6g.xlarge) $13.805 42 | 90th Percentile NCU Utilization : 1.5 ,Equivalent OnDemand Instance Costs: (db.r6g.large) $6.903 43 | Average NCU utilization : 1 ,Equivalent OnDemand Instance Costs: (db.r6g.large) $6.903 44 | Total data points : 1208 45 | ``` 46 | 47 | -------------------------------------------------------------------------------- /opencypher-compatability-checker/output.json: -------------------------------------------------------------------------------- 1 | { 2 | "results" : [ { 3 | "id" : 1, 4 | "supported" : true, 5 | "errorDefinitions" : [ ] 6 | }, { 7 | "id" : 2, 8 | "supported" : false, 9 | "errorDefinitions" : [ { 10 | "position" : "line 1, column 43 (offset: 42)", 11 | "name" : "CALL", 12 | "replacement" : "", 13 | "description" : "Update clauses like MERGE, CREATE, SET, REMOVE, DELETE, FOREACH not supported in call subquery" 14 | }, { 15 | "position" : "line 1, column 118 (offset: 117)", 16 | "name" : "FOREACH", 17 | "replacement" : "", 18 | "description" : "FOREACH is not supported in this release" 19 | } ] 20 | }, { 21 | "id" : 3, 22 | "supported" : true, 23 | "errorDefinitions" : [ ] 24 | }, { 25 | "id" : 4, 26 | "supported" : false, 27 | "errorDefinitions" : [ { 28 | "position" : "line 1, column 8 (offset: 7)", 29 | "name" : "apoc.coll.intersection", 30 | "replacement" : "collintersection", 31 | "description" : "apoc.coll.intersection is not supported in this release but try replacing with collintersection" 32 | } ] 33 | }, { 34 | "id" : 5, 35 | "supported" : false, 36 | "errorDefinitions" : [ { 37 | "position" : "line 1, column 8 (offset: 7)", 38 | "name" : "reduce", 39 | "replacement" : "", 40 | "description" : "reduce only supported with add or multiply expressions" 41 | } ] 42 | }, { 43 | "id" : 6, 44 | "supported" : false, 45 | "errorDefinitions" : [ { 46 | "position" : "line 1, column 1 (offset: 0)", 47 | "name" : "apoc.json.validate", 48 | "replacement" : "", 49 | "description" : "apoc.json.validate is not supported in this release" 50 | } ] 51 | }, { 52 | "id" : 7, 53 | "supported" : false, 54 | "errorDefinitions" : [ { 55 | "position" : "", 56 | "name" : "", 57 | "replacement" : "", 58 | "description" : "Invalid Open Cypher Query :org.opencypher.v9_0.util.SyntaxException: Invalid input 'g': expected (line 1, column 1 (offset: 0))" 59 | } ] 60 | } ] 61 | } -------------------------------------------------------------------------------- /glue-neptune/glue_neptune/NeptuneConnectionInfo.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. 2 | # All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). 5 | # You may not use this file except in compliance with the License. 6 | # A copy of the License is located at 7 | # 8 | # http://aws.amazon.com/apache2.0/ 9 | # 10 | # or in the "license" file accompanying this file. 11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions 13 | # and limitations under the License. 14 | 15 | import sys, boto3, os 16 | 17 | from awsglue.utils import getResolvedOptions 18 | from awsglue.context import GlueContext 19 | 20 | class NeptuneConnectionInfo: 21 | 22 | def __init__(self, glue_context): 23 | self.glue_context = glue_context 24 | 25 | def __neptune_connection(self, connection_name): 26 | proxy_url = self.glue_context._jvm.AWSConnectionUtils.getGlueProxyUrl() 27 | glue_endpoint = self.glue_context._jvm.AWSConnectionUtils.getGlueEndpoint() 28 | region = self.glue_context._jvm.AWSConnectionUtils.getRegion() 29 | if not proxy_url[8:].startswith('null'): 30 | os.environ['https_proxy'] = proxy_url 31 | glue = boto3.client('glue', endpoint_url=glue_endpoint, region_name=region) 32 | connection = glue.get_connection(Name=connection_name) 33 | del os.environ['https_proxy'] 34 | return connection['Connection']['ConnectionProperties']['JDBC_CONNECTION_URL'] 35 | 36 | def neptune_endpoint(self, connection_name): 37 | """Gets Neptune endpoint information from the Glue Data Catalog. 38 | 39 | You can store Neptune endpoint information as JDBC connections in the Glue Data Catalog. 40 | JDBC connection strings must begin 'jdbc:'. To store a Neptune endpoint, use the following format: 41 | 42 | 'jdbc:://:/' 43 | 44 | For example, if you store: 45 | 46 | 'jdbc:ws://my-neptune-cluster.us-east-1.neptune.amazonaws.com:8182/gremlin' 47 | 48 | – this method will return: 49 | 50 | 'ws://my-neptune-cluster.us-east-1.neptune.amazonaws.com:8182/gremlin' 51 | 52 | Example: 53 | >>> gremlin_endpoint = NeptuneConnectionInfo(glueContext).neptune_endpoint('neptune') 54 | """ 55 | return self.__neptune_connection(connection_name)[5:] -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/io/OutputFile.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.io; 14 | 15 | import java.io.IOException; 16 | import java.nio.file.Files; 17 | import java.nio.file.Path; 18 | import java.util.stream.Stream; 19 | 20 | public class OutputFile implements AutoCloseable { 21 | 22 | private RawCsvPrinter printer; 23 | 24 | private final Directories directories; 25 | private final String filename; 26 | 27 | public OutputFile(Directories directories, String filename) throws IOException { 28 | 29 | this.directories = directories; 30 | this.filename = filename; 31 | 32 | this.printer = RawCsvPrinter.newPrinter(directories.createFilePath(filename)); 33 | } 34 | 35 | public void printRecord(Iterable values) throws IOException { 36 | printer.printRecord(values); 37 | } 38 | 39 | public void printHeaders(Iterable headers) throws IOException { 40 | 41 | Path originalFilePath = directories.createFilePath(filename); 42 | Path tempFilePath = directories.createFilePath(filename, "temp"); 43 | 44 | printer.flush(); 45 | printer.close(); 46 | 47 | try (Stream stream = Files.lines(originalFilePath); 48 | RawCsvPrinter tempFilePrinter = RawCsvPrinter.newPrinter(tempFilePath)) { 49 | 50 | tempFilePrinter.printRecord(headers); 51 | stream.forEach(tempFilePrinter::printRecord); 52 | 53 | tempFilePrinter.flush(); 54 | } 55 | 56 | Files.deleteIfExists(originalFilePath); 57 | if (!tempFilePath.toFile().renameTo(originalFilePath.toFile())) { 58 | throw new RuntimeException("Unable to rename temp file: " + tempFilePath); 59 | } 60 | 61 | printer = RawCsvPrinter.newPrinter(originalFilePath, true); 62 | } 63 | 64 | @Override 65 | public void close() throws Exception { 66 | printer.flush(); 67 | printer.close(); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /neptune-python-utils/neptune_python_utils/glue_gremlin_csv_transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. 2 | # All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). 5 | # You may not use this file except in compliance with the License. 6 | # A copy of the License is located at 7 | # 8 | # http://aws.amazon.com/apache2.0/ 9 | # 10 | # or in the "license" file accompanying this file. 11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions 13 | # and limitations under the License. 14 | 15 | from awsglue.dynamicframe import DynamicFrame 16 | from pyspark.sql.functions import lit 17 | from pyspark.sql.functions import format_string 18 | 19 | class GlueGremlinCsvTransforms: 20 | 21 | @classmethod 22 | def create_prefixed_columns(cls, datasource, mappings): 23 | """Creates columns in a DynamicFrame whose values are based on prefixed values from another column in the DynamicFrame. 24 | 25 | Example: 26 | >>> df = GlueGremlinCsvTransforms.create_prefixed_columns(df, [('~id', 'productId', 'p'),('~to', 'supplierId', 's')]) 27 | """ 28 | dataframe = datasource.toDF() 29 | for (column_name, source_column, prefix) in mappings: 30 | dataframe = dataframe.withColumn(column_name, format_string(prefix + "-%s", dataframe[source_column])) 31 | return DynamicFrame.fromDF(dataframe, datasource.glue_ctx, 'create_vertex_id_columns') 32 | 33 | @classmethod 34 | def create_edge_id_column(cls, datasource, from_column, to_column): 35 | """Creates an '~id' column in a DynamicFrame whose values are based on the specified from and to columns. 36 | 37 | Example: 38 | >>> df = GlueGremlinCsvTransforms.create_edge_id_column(df, 'supplierId', 'productId') 39 | """ 40 | dataframe = datasource.toDF() 41 | dataframe = dataframe.withColumn('~id', format_string("%s-%s", dataframe[from_column], dataframe[to_column])) 42 | return DynamicFrame.fromDF(dataframe, datasource.glue_ctx, 'create_edge_id_column') 43 | 44 | @classmethod 45 | def addLabel(cls, datasource, label): 46 | """Adds a '~label' column to a DynamicFrame. 47 | 48 | Example: 49 | >>> df = GlueGremlinCsvTransforms.addLabel(df, 'Product') 50 | """ 51 | dataframe = datasource.toDF() 52 | dataframe = dataframe.withColumn("~label", lit(label)) 53 | return DynamicFrame.fromDF(dataframe, datasource.glue_ctx, label) -------------------------------------------------------------------------------- /neptune-streams-utils/readme.md: -------------------------------------------------------------------------------- 1 | #neptune-stream-utils 2 | 3 | This project includes example [Neptune Streams](https://docs.aws.amazon.com/neptune/latest/userguide/streams.html) handlers and build scripts, and a command-line tool that installs a handler in the Neptune Streams polling framework. 4 | 5 | ## Example handlers 6 | 7 | The _examples_ directory contains sample handlers written in Python and Java, and scripts for building them. You can use these as the basis of your own handlers. Run _build.sh_, and then copy the zip file in the _target_ directory to your S3 bucket. 8 | 9 | ### Java handler dependencies 10 | 11 | The Java handler depends on two libraries that are not currently available in Maven. Before building the Java hander, run the _install-dependencies.sh_ script to install the libraries in your local Maven repository. 12 | 13 | ## Provisioning script 14 | 15 | The _provisioning_ folder contains a command-line tool that installs a handler in the Neptune Streams polling framework. The handler and polling framework are created using a [CloudFormation template](https://s3.amazonaws.com/aws-neptune-customer-samples/neptune-stream/neptune_stream_poller_nested_full_stack.json) provided by Neptune. This CloudFormation template has over 25 input parameters. The script here simplifies running the CloudFormation template. The script queries the AWS Management APIs to get details of the Neptune cluster, VPC, subnets, security groups, etc, and then populates and invokes the CloudFormation template. 16 | 17 | Here's an example of using the script to install the example Python handler (after having built the handler and putting it in S3): 18 | 19 | ``` 20 | python provision_neptune_streams_handler.py \ 21 | --cluster_id=neptunedbcluster-abcdefghijkl \ 22 | --handler_s3_bucket=my-bucket \ 23 | --handler_s3_key=streams/stream_handler.zip \ 24 | --region=us-east-1 25 | ``` 26 | 27 | Here's an example of using the script to install the example Java handler (after having built the handler and putting it in S3): 28 | 29 | ``` 30 | python provision_neptune_streams_handler.py \ 31 | --cluster_id=neptunedbcluster-abcdefghijkl \ 32 | --handler_s3_bucket=my-bucket \ 33 | --handler_s3_key=streams/stream_handler.jar \ 34 | --lambda_runtime=java8 \ 35 | --region=us-east-1 36 | ``` 37 | 38 | If you supply an additional `–dry_run=true` parameter, the tool will simply create all the CloudFormation parameters, but not actually run the template. 39 | 40 | ## Additional resources 41 | 42 | More details on creating your own custom handlers can be found in the blog post [Capture graph changes using Neptune Streams](https://aws.amazon.com/blogs/database/capture-graph-changes-using-neptune-streams/). 43 | 44 | -------------------------------------------------------------------------------- /neo4j-to-neptune/src/test/java/com/amazonaws/services/neptune/metadata/PropertyValueParserTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.metadata; 14 | 15 | import org.junit.Test; 16 | 17 | import static org.junit.Assert.*; 18 | 19 | public class PropertyValueParserTest { 20 | 21 | @Test 22 | public void shouldDoubleUpDoubleQuotesAndSurroundWithDoubleQuotesAStringValueContainingDoubleQuotes() { 23 | String originalValue = "First \"second\" third fourth"; 24 | PropertyValue propertyValue = new PropertyValueParser(MultiValuedNodePropertyPolicy.LeaveAsString, "", false).parse(originalValue); 25 | 26 | assertEquals("\"First \"\"second\"\" third fourth\"", propertyValue.value()); 27 | assertFalse(propertyValue.isMultiValued()); 28 | } 29 | 30 | @Test 31 | public void shouldSurroundWithDoubleQuotesAStringValueContainingComma() { 32 | String originalValue = "one, two, three"; 33 | PropertyValue propertyValue = new PropertyValueParser(MultiValuedNodePropertyPolicy.LeaveAsString, "", false).parse(originalValue); 34 | 35 | assertEquals("\"one, two, three\"", propertyValue.value()); 36 | assertFalse(propertyValue.isMultiValued()); 37 | } 38 | 39 | @Test 40 | public void shouldSurroundWithDoubleQuotesAStringValueContainingNewLine() { 41 | String originalValue = "one" + System.lineSeparator() + "two"; 42 | PropertyValue propertyValue = new PropertyValueParser(MultiValuedNodePropertyPolicy.LeaveAsString, "", false).parse(originalValue); 43 | 44 | assertEquals("\"one" + System.lineSeparator() + "two\"", propertyValue.value()); 45 | assertFalse(propertyValue.isMultiValued()); 46 | } 47 | 48 | @Test 49 | public void shouldReplaceSemicolonInMultiValuePropertiesWithReplacementString() { 50 | String originalValue = "[\"one\",\"two;three\"]"; 51 | PropertyValue propertyValue = new PropertyValueParser(MultiValuedNodePropertyPolicy.PutInSetIgnoringDuplicates, "SEMICOLON", false).parse(originalValue); 52 | 53 | assertEquals("one;twoSEMICOLONthree", propertyValue.value()); 54 | assertTrue(propertyValue.isMultiValued()); 55 | } 56 | 57 | } -------------------------------------------------------------------------------- /neptune-python-utils/neptune_python_utils/glue_neptune_connection_info.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. 2 | # All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). 5 | # You may not use this file except in compliance with the License. 6 | # A copy of the License is located at 7 | # 8 | # http://aws.amazon.com/apache2.0/ 9 | # 10 | # or in the "license" file accompanying this file. 11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions 13 | # and limitations under the License. 14 | 15 | import sys 16 | import boto3 17 | import requests 18 | from neptune_python_utils.endpoints import Endpoints 19 | 20 | class GlueNeptuneConnectionInfo: 21 | 22 | def __init__(self, region, role_arn): 23 | self.region = region 24 | self.role_arn = role_arn 25 | 26 | def neptune_endpoints(self, connection_name): 27 | """Gets Neptune endpoint information from the AWS Glue Data Catalog. 28 | 29 | You may need to install a Glue VPC Endpoint in your VPC for this method to work. 30 | 31 | You can either create a Glue Connection type of 'JDBC' or 'NETWORK'. 32 | 33 | When you use Glue Connection Type of 'JDBC' store the Amazon Neptune endpoint in 'JDBC_CONNECTION_URL' field, e.g. 'jdbc:wss://my-neptune-cluster.us-east-1.neptune.amazonaws.com:8182/gremlin'. 34 | 35 | When you use Glue Connection Type of 'NETWORK' store the Amazon Neptune endpoint in 'Description' field, e.g. 'wss://my-neptune-cluster.us-east-1.neptune.amazonaws.com:8182/gremlin'. 36 | 37 | When you invoke the method it returns Neptune endpoint, e.g. 'wss://my-neptune-cluster.us-east-1.neptune.amazonaws.com:8182/gremlin' 38 | 39 | Example: 40 | >>> gremlin_endpoint = GlueNeptuneConnectionInfo(glueContext).neptune_endpoint('neptune') 41 | """ 42 | glue = boto3.client('glue', region_name=self.region) 43 | connection = glue.get_connection(Name=connection_name)['Connection'] 44 | 45 | if connection['ConnectionType'] == "JDBC": 46 | neptune_uri = connection['ConnectionProperties']['JDBC_CONNECTION_URL'][5:] 47 | 48 | if connection['ConnectionType'] == "NETWORK": 49 | neptune_uri = connection['Description'] 50 | 51 | parse_result = requests.utils.urlparse(neptune_uri) 52 | netloc_parts = parse_result.netloc.split(':') 53 | host = netloc_parts[0] 54 | port = netloc_parts[1] 55 | 56 | return Endpoints(neptune_endpoint=host, neptune_port=port, region_name=self.region, role_arn=self.role_arn) -------------------------------------------------------------------------------- /neptune-streams-utils/provisioning/readme.md: -------------------------------------------------------------------------------- 1 | #provision-neptune-streams-handler 2 | 3 | Provisions a Neptune Streams handler. 4 | 5 | ### Prerequisites 6 | 7 | ``` 8 | pip install boto3 9 | pip install fire 10 | pip install tabulate 11 | ``` 12 | 13 | Before provisioning a handler using this script ensure the following conditions are met: 14 | 15 | - You have an existing Neptune cluster 16 | - Neptune Streams is [enabled](https://docs.aws.amazon.com/neptune/latest/userguide/streams-using.html#streams-using-enabling) 17 | 18 | ### Usage 19 | 20 | ``` 21 | NAME 22 | provision_neptune_streams_handler.py 23 | 24 | SYNOPSIS 25 | provision_neptune_streams_handler.py CLUSTER_ID HANDLER_S3_BUCKET HANDLER_S3_KEY 26 | 27 | POSITIONAL ARGUMENTS 28 | CLUSTER_ID 29 | HANDLER_S3_BUCKET 30 | HANDLER_S3_KEY 31 | 32 | FLAGS 33 | --handler_name=HANDLER_NAME 34 | --additional_params=ADDITIONAL_PARAMS 35 | --query_engine=QUERY_ENGINE 36 | --region=REGION 37 | --lambda_memory_size_mb=LAMBDA_MEMORY_SIZE_MB 38 | --lambda_runtime=LAMBDA_RUNTIME 39 | --lambda_logging_level=LAMBDA_LOGGING_LEVEL 40 | --managed_policy_arns=MANAGED_POLICY_ARNS 41 | --batch_size=BATCH_SIZE 42 | --max_polling_wait_time_seconds=MAX_POLLING_WAIT_TIME_SECONDS 43 | --max_polling_interval_seconds=MAX_POLLING_INTERVAL_SECONDS 44 | --step_function_fallback_period=STEP_FUNCTION_FALLBACK_PERIOD 45 | --step_function_fallback_period_unit=STEP_FUNCTION_FALLBACK_PERIOD_UNIT 46 | --notification_email=NOTIFICATION_EMAIL 47 | --create_cloudwatch_alarm=CREATE_CLOUDWATCH_ALARM 48 | --application_name=APPLICATION_NAME 49 | --dry_run=DRY_RUN 50 | 51 | NOTES 52 | You can also use flags syntax for POSITIONAL ARGUMENTS 53 | ``` 54 | 55 | ### Examples 56 | 57 | Here's an example that provisions a handler with the default handler name (`stream_handler.StreamHandler`), and which has been uploaded to _s3://my-bucket/handlers/example_handler.zip_: 58 | 59 | ``` 60 | python provision_neptune_streams_handler.py --cluster_id=neptunedbcluster-xyz0a0a0abc \ 61 | --handler_s3_bucket=my-bucket \ 62 | --handler_s3_key=handlers/example_handler.zip \ 63 | --region=us-east-1 64 | ``` 65 | 66 | Here's an example of using the script to install a handler with an additional parameter (`delivery_stream_name`, which will be supplied to the handler via an environment variable when it is invoked), and a managed policy that allows the handler to invoke an Amazon Kinesis Data Firehose API: 67 | 68 | ``` 69 | python provision_neptune_streams_handler.py \ 70 | --cluster_id=neptunedbcluster-abcdefghijkl \ 71 | --handler_s3_bucket=my-bucket \ 72 | --handler_s3_key=neptune_firehose_handler.zip \ 73 | --additional_params='{"delivery_stream_name": "neptune-firehose"}' \ 74 | --managed_policy_arns='["arn:aws:iam::123456789:policy/neptune-firehose-handler-policy"]' \ 75 | --region=us-east-1 76 | ``` -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/MultiValuedNodePropertyPolicy.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.metadata; 14 | 15 | import com.fasterxml.jackson.databind.node.ArrayNode; 16 | 17 | public enum MultiValuedNodePropertyPolicy implements PropertyValueParserPolicy { 18 | LeaveAsString { 19 | @Override 20 | public PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser) { 21 | return parser.stringValue(s); 22 | } 23 | 24 | @Override 25 | public void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser) { 26 | // Do nothing 27 | } 28 | }, 29 | Halt { 30 | @Override 31 | public PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser) { 32 | throw new RuntimeException("Halt: found multivalued node property value"); 33 | } 34 | 35 | @Override 36 | public void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser) { 37 | // Do nothing 38 | } 39 | }, 40 | PutInSetIgnoringDuplicates { 41 | @Override 42 | public PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser) { 43 | return parser.parseArrayValue(s, arrayNode); 44 | } 45 | 46 | @Override 47 | public void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser) { 48 | // Do nothing 49 | } 50 | }, 51 | PutInSetButHaltIfDuplicates { 52 | @Override 53 | public PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser) { 54 | return parser.parseArrayValue(s, arrayNode); 55 | } 56 | 57 | @Override 58 | public void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser) { 59 | throw new RuntimeException("Halt: found multivalued node property value with duplicate values"); 60 | } 61 | }; 62 | 63 | @Override 64 | public abstract PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser); 65 | 66 | @Override 67 | public abstract void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser); 68 | } 69 | -------------------------------------------------------------------------------- /csv-to-neptune-bulk-format/data-config-spotify-no-node.json: -------------------------------------------------------------------------------- 1 | { 2 | "source_folder": ".source", 3 | "data_folder": ".data", 4 | "fileNames": [ 5 | "spotify_songs.csv" 6 | ], 7 | "nodes": [], 8 | "edges": [ 9 | { 10 | "csvFileName": "Track_Album_Edges.csv", 11 | "select": "'track_name' in row and 'track_artist' in row and row['track_name'] != '' and row['track_artist'] != '' and 'track_album_id' in row and row['track_album_id'] != ''", 12 | "id": "uuid()", 13 | "label": "'IN_ALBUM'", 14 | "from": "row['track_artist'] + '-' + row['track_name']", 15 | "to": "row['track_album_id']", 16 | "fromLabel": "'Track'", 17 | "toLabel": "'Album'", 18 | "properties": [] 19 | }, 20 | { 21 | "csvFileName": "Track_Artist_Edges.csv", 22 | "select": "'track_name' in row and 'track_artist' in row and row['track_name'] != '' and row['track_artist'] != ''", 23 | "id": "uuid()", 24 | "label": "'BY_ARTIST'", 25 | "from": "row['track_artist'] + '-' + row['track_name']", 26 | "to": "row['track_artist']", 27 | "fromLabel": "'Track'", 28 | "toLabel": "'Artist'", 29 | "properties": [] 30 | }, 31 | { 32 | "csvFileName": "Track_Playlist_Edges.csv", 33 | "select": "'track_name' in row and 'track_artist' in row and row['track_name'] != '' and row['track_artist'] != '' and 'playlist_id' in row and row['playlist_id'] != ''", 34 | "id": "uuid()", 35 | "label": "'IN_PLAYLIST'", 36 | "from": "row['track_artist'] + '-' + row['track_name']", 37 | "to": "row['playlist_id']", 38 | "fromLabel": "'Track'", 39 | "toLabel": "'Playlist'", 40 | "properties": [] 41 | }, 42 | { 43 | "csvFileName": "Playlist_Genre_Edges.csv", 44 | "select": "'playlist_id' in row and row['playlist_id'] != ''and 'playlist_genre' in row and row['playlist_genre'] != ''", 45 | "id": "uuid()", 46 | "label": "'HAS_GENRE'", 47 | "from": "row['playlist_id']", 48 | "to": "row['playlist_genre']", 49 | "fromLabel": "'Playlist'", 50 | "toLabel": "'Genre'", 51 | "properties": [] 52 | }, 53 | { 54 | "csvFileName": "Genre_SubGenre_Edges.csv", 55 | "select": "'playlist_genre' in row and row['playlist_genre'] != '' and 'playlist_subgenre' in row and row['playlist_subgenre'] != ''", 56 | "id": "uuid()", 57 | "label": "'HAS_SUBGENRE'", 58 | "from": "row['playlist_genre']", 59 | "to": "row['playlist_subgenre']", 60 | "fromLabel": "'Genre'", 61 | "toLabel": "'SubGenre'", 62 | "properties": [] 63 | } 64 | ] 65 | } -------------------------------------------------------------------------------- /glue-neptune/glue_neptune/GremlinCsvTransforms.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. 2 | # All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). 5 | # You may not use this file except in compliance with the License. 6 | # A copy of the License is located at 7 | # 8 | # http://aws.amazon.com/apache2.0/ 9 | # 10 | # or in the "license" file accompanying this file. 11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions 13 | # and limitations under the License. 14 | 15 | import sys, os 16 | 17 | from awsglue.utils import getResolvedOptions 18 | from pyspark.context import SparkContext 19 | from awsglue.context import GlueContext 20 | from awsglue.job import Job 21 | from awsglue.transforms import ApplyMapping 22 | from awsglue.transforms import RenameField 23 | from awsglue.transforms import SelectFields 24 | from awsglue.dynamicframe import DynamicFrame 25 | from pyspark.sql.functions import lit 26 | from pyspark.sql.functions import format_string 27 | 28 | class GremlinCsvTransforms: 29 | 30 | @classmethod 31 | def create_prefixed_columns(cls, datasource, mappings): 32 | """Creates columns in a DynamicFrame whose values are based on prefixed values from another column in the DynamicFrame. 33 | 34 | Example: 35 | >>> df = GremlinCsvTransforms.create_prefixed_columns(df, [('~id', 'productId', 'p'),('~to', 'supplierId', 's')]) 36 | """ 37 | dataframe = datasource.toDF() 38 | for (column_name, source_column, prefix) in mappings: 39 | dataframe = dataframe.withColumn(column_name, format_string(prefix + "-%s", dataframe[source_column])) 40 | return DynamicFrame.fromDF(dataframe, datasource.glue_ctx, 'create_vertex_id_columns') 41 | 42 | @classmethod 43 | def create_edge_id_column(cls, datasource, from_column, to_column): 44 | """Creates an '~id' column in a DynamicFrame whose values are based on the specified from and to columns. 45 | 46 | Example: 47 | >>> df = GremlinCsvTransforms.create_edge_id_column(df, 'supplierId', 'productId') 48 | """ 49 | dataframe = datasource.toDF() 50 | dataframe = dataframe.withColumn('~id', format_string("%s-%s", dataframe[from_column], dataframe[to_column])) 51 | return DynamicFrame.fromDF(dataframe, datasource.glue_ctx, 'create_edge_id_column') 52 | 53 | @classmethod 54 | def addLabel(cls, datasource, label): 55 | """Adds a '~label' column to a DynamicFrame whose values comprise the supplier label. 56 | 57 | Example: 58 | >>> df = GremlinCsvTransforms.addLabel(df, 'Product') 59 | """ 60 | dataframe = datasource.toDF() 61 | dataframe = dataframe.withColumn("~label", lit(label)) 62 | return DynamicFrame.fromDF(dataframe, datasource.glue_ctx, label) -------------------------------------------------------------------------------- /neo4j-to-neptune/src/test/java/com/amazonaws/services/neptune/metadata/MultiValuedNodePropertyPolicyTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.metadata; 14 | 15 | import org.junit.Test; 16 | 17 | import static org.junit.Assert.*; 18 | 19 | public class MultiValuedNodePropertyPolicyTest { 20 | 21 | @Test 22 | public void shouldReturnStringPropertyValueIfPolicyIsLeaveAsString() { 23 | 24 | String value = "[\"toy\",\"electronics\",\"gifts\"]"; 25 | 26 | PropertyValueParser parser = new PropertyValueParser(MultiValuedNodePropertyPolicy.LeaveAsString, "", false); 27 | PropertyValue propertyValue = parser.parse(value); 28 | 29 | assertEquals("\"[\"\"toy\"\",\"\"electronics\"\",\"\"gifts\"\"]\"", propertyValue.value()); 30 | assertFalse(propertyValue.isMultiValued()); 31 | } 32 | 33 | @Test 34 | public void shouldThrowExceptionIfPolicyIsHalt() { 35 | String value = "[\"toy\",\"electronics\",\"gifts\"]"; 36 | 37 | PropertyValueParser parser = new PropertyValueParser(MultiValuedNodePropertyPolicy.Halt, "", false); 38 | 39 | try { 40 | parser.parse(value); 41 | fail(); 42 | } catch (RuntimeException e) { 43 | assertEquals("Halt: found multivalued node property value", e.getMessage()); 44 | } 45 | } 46 | 47 | @Test 48 | public void shouldReturnSetFormattedPropertyValueIfPolicyIsPutInSetIgnoringDuplicates() { 49 | 50 | String value = "[\"toy\",\"electronics\",\"gifts\"]"; 51 | 52 | PropertyValueParser parser = new PropertyValueParser(MultiValuedNodePropertyPolicy.PutInSetIgnoringDuplicates, "", false); 53 | PropertyValue propertyValue = parser.parse(value); 54 | 55 | assertEquals("electronics;toy;gifts", propertyValue.value()); 56 | assertTrue(propertyValue.isMultiValued()); 57 | } 58 | 59 | @Test 60 | public void shouldThrowExceptionIfPolicyIsPutInSetButHaltIfDuplicates() { 61 | String value = "[\"toy\",\"electronics\",\"gifts\",\"gifts\"]"; 62 | 63 | PropertyValueParser parser = new PropertyValueParser(MultiValuedNodePropertyPolicy.PutInSetButHaltIfDuplicates, "", false); 64 | 65 | try { 66 | parser.parse(value); 67 | fail(); 68 | } catch (RuntimeException e) { 69 | assertEquals("Halt: found multivalued node property value with duplicate values", e.getMessage()); 70 | } 71 | } 72 | 73 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Amazon Neptune Tools 2 | 3 | Utilities to enable loading data and building graph applications with Amazon Neptune. 4 | 5 | ### Examples 6 | 7 | You may also be interested in the [Neptune Samples github repository](https://github.com/aws-samples/amazon-neptune-samples), which includes samples and example code. 8 | 9 | ### GraphML 2 CSV 10 | This is a [utility](graphml2csv/README.md) to convert graphml files into the Neptune CSV format. 11 | 12 | ### Export Neptune to Elasticsearch 13 | Backfills Elasticsearch with data from an existing Amazon Neptune database. 14 | 15 | The [Neptune Full-text Search](https://docs.aws.amazon.com/neptune/latest/userguide/full-text-search-cfn-create.html) CloudFormation templates provide a mechanism for indexing all _new_ data that is added to an Amazon Neptune database in Elasticsearch. However, there are situations in which you may want to index _existing_ data in a Neptune database prior to enabling the full-text search integration. 16 | 17 | You can use this [export Neptune to Elasticsearch solution](export-neptune-to-elasticsearch/) to index existing data in an Amazon Neptune database in Elasticsearch. 18 | 19 | ### Neo4j to Neptune 20 | A [command-line utility](neo4j-to-neptune/readme.md) for migrating data to Neptune from Neo4j. 21 | 22 | ### Glue Neptune 23 | 24 | [glue-neptune](glue-neptune/) is a Python library for AWS Glue that helps writing data to Amazon Neptune from Glue jobs. With glue-neptune you can: 25 | * Get Neptune connection information from the Glue Data Catalog 26 | * Create label and node and edge ID columns in DynamicFrames, named in accordance with the Neptune CSV bulk load format for property graphs 27 | * Write from DynamicFrames directly to Neptune 28 | 29 | ### Neptune CSV to RDF 30 | 31 | If you're interested in converting Neptune's CSV format to RDF, see [amazon-neptune-csv-to-rdf-converter](https://github.com/aws/amazon-neptune-csv-to-rdf-converter). 32 | 33 | ### Neptune CSV to Gremlin 34 | 35 | [csv-gremlin](csv-gremlin/README.md) is a tool that can turn Amazon Neptune format CSV files into Gremlin steps allowing them to be loaded into different Apache TinkerPop compliant stores (including Amazon Neptune) using Gremlin queries. The tool also tries to validate that the CSV files do not contain errors and can be use to inspect CSV files prior to starting a bulk load. 36 | 37 | ### CSV to Neptune Bulk Format CSV 38 | 39 | [csv-to-neptune-bulk-format](csv-to-neptune-bulk-format/README.md) is a utility to identify nodes and edges in the source CSV data file(s) and generate the Amazon Neptune gremlin load data format files. A configuration file (JSON) defines the source and target files, nodes/edges definition, and selection logic. The script interprets one or more configuration files and generates Amazon Neptune gremlin load data format files. The generated files can be loaded into the Neptune database. 40 | 41 | ### neptune-gremlin-js 42 | 43 | A Javascript SDK for querying Neptune with gremlin. 44 | 45 | ## License 46 | 47 | This library is licensed under the Apache 2.0 License. 48 | -------------------------------------------------------------------------------- /neptune-streams-utils/examples/streams-to-firehose/stream_handler.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. 2 | # All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). 5 | # You may not use this file except in compliance with the License. 6 | # A copy of the License is located at 7 | # 8 | # http://aws.amazon.com/apache2.0/ 9 | # 10 | # or in the "license" file accompanying this file. 11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions 13 | # and limitations under the License. 14 | 15 | import json 16 | import logging 17 | import os 18 | import boto3 19 | import lambda_function 20 | from commons import * 21 | from handler import AbstractHandler, HandlerResponse 22 | 23 | logger = logging.getLogger('StreamHandler') 24 | logger.setLevel(logging.INFO) 25 | 26 | class StreamHandler(AbstractHandler): 27 | 28 | def handle_records(self, stream_log): 29 | 30 | params = json.loads(os.environ['AdditionalParams']) 31 | delivery_stream_name = params['delivery_stream_name'] 32 | 33 | client = boto3.client('firehose') 34 | 35 | records = stream_log[RECORDS_STR] 36 | 37 | last_op_num = None 38 | last_commit_num = None 39 | count = 1 40 | 41 | firehose_records = [] 42 | 43 | try: 44 | for record in records: 45 | 46 | # Process record 47 | if count % 500 == 0: 48 | response = client.put_record_batch( 49 | DeliveryStreamName=delivery_stream_name, 50 | Records= firehose_records 51 | ) 52 | logger.info(response) 53 | logger.info(len(firehose_records)) 54 | firehose_records.clear() 55 | firehose_record = { 56 | "Data": '{}\n'.format(json.dumps(record)) 57 | } 58 | firehose_records.append(firehose_record) 59 | 60 | # Update local checkpoint info 61 | last_op_num = record[EVENT_ID_STR][OP_NUM_STR] 62 | last_commit_num = record[EVENT_ID_STR][COMMIT_NUM_STR] 63 | count += 1 64 | 65 | if len(firehose_records) > 0: 66 | logger.info(len(firehose_records)) 67 | response = client.put_record_batch( 68 | DeliveryStreamName=delivery_stream_name, 69 | Records=firehose_records 70 | ) 71 | logger.info(response) 72 | 73 | 74 | except Exception as e: 75 | logger.error('Error occurred - {}'.format(str(e))) 76 | raise e 77 | finally: 78 | try: 79 | yield HandlerResponse(last_op_num, last_commit_num, count) 80 | except Exception as e: 81 | logger.error('Error occurred - {}'.format(str(e))) 82 | raise e 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright 2020 Amazon.com, Inc. or its affiliates. 4 | # All Rights Reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"). 7 | # You may not use this file except in compliance with the License. 8 | # A copy of the License is located at 9 | # 10 | # http://aws.amazon.com/apache2.0/ 11 | # 12 | # or in the "license" file accompanying this file. 13 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 14 | # either express or implied. See the License for the specific language governing permissions 15 | # and limitations under the License. 16 | VERSION_FILE="VERSION" 17 | GIT_CMD=`which git` 18 | 19 | GIT_BRANCH=`${GIT_CMD} branch | grep \* | cut -d' ' -f2` 20 | 21 | if [ ! -f "$VERSION_FILE" ] ; then 22 | echo "Version file not present. Please create and retry." 23 | exit 1 24 | fi 25 | 26 | if [ "$GIT_BRANCH" != "master" ] ; then 27 | echo "WARNING: Starting a release from a non-master branch." 28 | fi 29 | 30 | let NEW_VERSION=`cat $VERSION_FILE`+1 31 | VERSION_STRING="1.${NEW_VERSION}" 32 | echo $NEW_VERSION > $VERSION_FILE 33 | 34 | 35 | echo "Starting release for $VERSION_STRING" 36 | 37 | RELEASE_BRANCH="amazon-neptune-tools-$VERSION_STRING" 38 | 39 | 40 | echo "Creating new release branch: $RELEASE_BRANCH" 41 | 42 | $GIT_CMD checkout -b $RELEASE_BRANCH 43 | 44 | #Utility script to build the jars to make a release. 45 | ARTIFACT_DIR=`pwd`/artifacts 46 | rm -rf $ARTIFACT_DIR 47 | mkdir -p $ARTIFACT_DIR 48 | MAVEN_ARTIFACTS="neo4j-to-neptune" 49 | for artifact in $MAVEN_ARTIFACTS; do 50 | pushd $artifact >& /dev/null 51 | mvn versions:set -DnewVersion=${VERSION_STRING} versions:update-child-modules 52 | mvn clean 53 | mvn install 54 | #All of the jars are shaded. Only take the shaded, bundled jars. 55 | for jar in `find . -name "*.jar" -print | grep -vE "SNAPSHOT|original|\-$VERSION_STRING"`; do 56 | cp $jar $ARTIFACT_DIR 57 | done 58 | popd >& /dev/null 59 | done 60 | 61 | #Build the neptune-python-utils artifact 62 | pushd neptune-python-utils >& /dev/null 63 | ./build.sh 64 | cp target/neptune_python_utils.zip $ARTIFACT_DIR 65 | popd >& /dev/null 66 | 67 | cp ./graphml2csv/graphml2csv.py $ARTIFACT_DIR 68 | 69 | #drop-graph needs to be installed as a module 70 | #cp ./drop-graph/drop-graph.py $ARTIFACT_DIR 71 | 72 | ${GIT_CMD} commit -a -m "POM version updates for $RELEASE_BRANCH" 73 | 74 | echo "Creating Release Tag" 75 | 76 | ${GIT_CMD} tag -a $RELEASE_BRANCH -m "amazon-neptune-tools Release ${VERSION_STRING}" 77 | 78 | repo=origin 79 | 80 | echo "Pushing the release branch to $repo." 81 | ${GIT_CMD} push "${repo}" refs/heads/${RELEASE_BRANCH} 82 | 83 | echo "Pushing the release tags to $repo." 84 | ${GIT_CMD} push "${repo}" refs/tags/${RELEASE_BRANCH} 85 | 86 | #Update the VERSION on master 87 | 88 | ${GIT_CMD} checkout master 89 | echo $NEW_VERSION > $VERSION_FILE 90 | ${GIT_CMD} pull $repo master 91 | ${GIT_CMD} commit -am "Incremented release version to `cat $VERSION_FILE`" 92 | ${GIT_CMD} push $repo master 93 | 94 | #Return to the initial branch 95 | ${GIT_CMD} checkout ${GIT_BRANCH} 96 | 97 | echo "To complete the release, upload the contents of artifacts/ to the ${RELEASE_BRANCH} tag on github: https://github.com/awslabs/amazon-neptune-tools/releases." 98 | -------------------------------------------------------------------------------- /neptune-streams-utils/examples/python3.8/stream_handler.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. 2 | # All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). 5 | # You may not use this file except in compliance with the License. 6 | # A copy of the License is located at 7 | # 8 | # http://aws.amazon.com/apache2.0/ 9 | # 10 | # or in the "license" file accompanying this file. 11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions 13 | # and limitations under the License. 14 | 15 | import json 16 | import logging 17 | import os 18 | import lambda_function 19 | from commons import * 20 | from handler import AbstractHandler, HandlerResponse 21 | 22 | from neptune_python_utils.gremlin_utils import GremlinUtils 23 | from neptune_python_utils.endpoints import Endpoints 24 | 25 | logger = logging.getLogger('StreamHandler') 26 | logger.setLevel(logging.INFO) 27 | 28 | ''' 29 | This handler processes a batch of Neptune Stream events. 30 | If the event represents the creation of a vertex or edge, the handler queries Neptune for the details of the element. 31 | The handler yields one HandlerResponse per batch of stream events. 32 | ''' 33 | class StreamHandler(AbstractHandler): 34 | 35 | def handle_records(self, stream_log): 36 | 37 | params = json.loads(os.environ['AdditionalParams']) 38 | 39 | neptune_endpoint = params['neptune_cluster_endpoint'] 40 | neptune_port = params['neptune_port'] 41 | 42 | GremlinUtils.init_statics(globals()) 43 | 44 | endpoints = Endpoints(neptune_endpoint=neptune_endpoint, neptune_port=neptune_port) 45 | gremlin_utils = GremlinUtils(endpoints) 46 | 47 | conn = gremlin_utils.remote_connection() 48 | g = gremlin_utils.traversal_source(connection=conn) 49 | 50 | records = stream_log[RECORDS_STR] 51 | 52 | last_op_num = None 53 | last_commit_num = None 54 | count = 0 55 | 56 | try: 57 | for record in records: 58 | 59 | # Process record 60 | op = record[OPERATION_STR] 61 | data = record[DATA_STR] 62 | type = data['type'] 63 | id = data['id'] 64 | 65 | if op == ADD_OPERATION: 66 | if type == 'vl': 67 | logger.info(g.V(id).valueMap(True).toList()) 68 | if type == 'e': 69 | logger.info(g.E(id).valueMap(True).toList()) 70 | 71 | # Update local checkpoint info 72 | last_op_num = record[EVENT_ID_STR][OP_NUM_STR] 73 | last_commit_num = record[EVENT_ID_STR][COMMIT_NUM_STR] 74 | count += 1 75 | 76 | 77 | except Exception as e: 78 | logger.error('Error occurred - {}'.format(str(e))) 79 | raise e 80 | finally: 81 | try: 82 | conn.close() 83 | yield HandlerResponse(last_op_num, last_commit_num, count) 84 | except Exception as e: 85 | logger.error('Error occurred - {}'.format(str(e))) 86 | raise e 87 | finally: 88 | conn.close() 89 | 90 | 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /export-neptune-to-elasticsearch/lambda/export_neptune_to_kinesis.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | import json 15 | import os 16 | import boto3 17 | import logging 18 | from datetime import datetime 19 | 20 | client = boto3.client('batch') 21 | 22 | logger = logging.getLogger() 23 | logger.setLevel(logging.INFO) 24 | 25 | def trigger_neptune_export(): 26 | 27 | neptune_export_jar_uri = os.environ['NEPTUNE_EXPORT_JAR_URI'] 28 | neptune_endpoint = os.environ['NEPTUNE_ENDPOINT'] 29 | neptune_port = os.environ['NEPTUNE_PORT'] 30 | neptune_engine = os.environ['NEPTUNE_ENGINE'] 31 | stream_name = os.environ['STREAM_NAME'] 32 | job_suffix = os.environ['JOB_SUFFIX'] 33 | region = os.environ['AWS_REGION'] 34 | concurrency = os.environ['CONCURRENCY'] 35 | scope = os.environ['EXPORT_SCOPE'] 36 | additional_params = os.environ['ADDITIONAL_PARAMS'] 37 | clone_cluster = os.environ.get('CLONE_CLUSTER') 38 | 39 | if additional_params: 40 | additional_params = additional_params if additional_params.startswith(' ') else ' {}'.format(additional_params) 41 | else: 42 | additional_params = '' 43 | 44 | use_iam_auth = '' if neptune_engine == 'sparql' else ' --use-iam-auth' 45 | export_command = 'export-pg' if neptune_engine == 'gremlin' else 'export-rdf' 46 | concurrency_param = ' --concurrency {}'.format(concurrency) if neptune_engine == 'gremlin' else '' 47 | scope_param = ' --scope {}'.format(scope) if neptune_engine == 'gremlin' else '' 48 | clone_cluster_param = ' --clone-cluster' if clone_cluster and clone_cluster.lower() == 'true' else '' 49 | 50 | command = 'df -h && rm -rf neptune-export.jar && wget {} -nv && export SERVICE_REGION="{}" && java -Xms16g -Xmx16g -jar neptune-export.jar {} -e {} -p {} -d /neptune/results --output stream --stream-name {} --region {} --format neptuneStreamsJson --use-ssl{}{}{}{}{}'.format( 51 | neptune_export_jar_uri, 52 | region, 53 | export_command, 54 | neptune_endpoint, 55 | neptune_port, 56 | stream_name, 57 | region, 58 | use_iam_auth, 59 | concurrency_param, 60 | scope_param, 61 | clone_cluster_param, 62 | additional_params) 63 | 64 | logger.info('Command: {}'.format(command)) 65 | 66 | submit_job_response = client.submit_job( 67 | jobName='export-neptune-to-kinesis-{}-{}'.format(job_suffix, round(datetime.utcnow().timestamp() * 1000)), 68 | jobQueue='export-neptune-to-kinesis-queue-{}'.format(job_suffix), 69 | jobDefinition='export-neptune-to-kinesis-job-{}'.format(job_suffix), 70 | containerOverrides={ 71 | 'command': [ 72 | 'sh', 73 | '-c', 74 | command 75 | ] 76 | } 77 | ) 78 | 79 | return submit_job_response 80 | 81 | def lambda_handler(event, context): 82 | 83 | result = trigger_neptune_export() 84 | 85 | job_name = result['jobName'] 86 | job_id = result['jobId'] 87 | 88 | return { 89 | 'jobName': job_name, 90 | 'jobId': job_id 91 | } 92 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check [existing open](https://github.com/awslabs/amazon-neptune-tools/issues), or [recently closed](https://github.com/awslabs/amazon-neptune-tools/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *master* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/awslabs/amazon-neptune-tools/labels/help%20wanted) issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](https://github.com/awslabs/amazon-neptune-tools/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | 61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. 62 | -------------------------------------------------------------------------------- /neptune-gremlin-js/cdk-test-app/lib/cdk-test-app-stack.js: -------------------------------------------------------------------------------- 1 | const { Stack } = require("aws-cdk-lib") 2 | const cdk = require("aws-cdk-lib") 3 | const ec2 = require("aws-cdk-lib/aws-ec2") 4 | const lambda = require("aws-cdk-lib/aws-lambda") 5 | const neptune = require("@aws-cdk/aws-neptune-alpha") 6 | 7 | class CdkTestAppStack extends Stack { 8 | /** 9 | * 10 | * @param {Construct} scope 11 | * @param {string} id 12 | * @param {StackProps=} props 13 | */ 14 | constructor(scope, id, props) { 15 | super(scope, id, props) 16 | 17 | // Create a dedicated VPC for the cluster 18 | const vpc = new ec2.Vpc(this, "vpc") 19 | 20 | // Cluster parameter group 21 | const clusterParameterGroup = new neptune.ClusterParameterGroup(this, 22 | "ClusterParams", 23 | { 24 | description: "Cluster parameter group", 25 | parameters: { 26 | neptune_enable_audit_log: "1", 27 | }, 28 | }, 29 | ) 30 | 31 | // Db parameter group 32 | const parameterGroup = new neptune.ParameterGroup(this, "DbParams", { 33 | description: "Db parameter group", 34 | parameters: { 35 | neptune_query_timeout: "10000", 36 | }, 37 | }) 38 | 39 | // Create the security group for the cluster 40 | const clusterSecurityGroup = new ec2.SecurityGroup(this, "ClusterSG", { 41 | vpc: vpc, 42 | description: "Neptune Gremlin Test Security Group", 43 | }) 44 | 45 | // Create the cluster 46 | const cluster = new neptune.DatabaseCluster(this, "cluster", { 47 | vpc: vpc, 48 | instanceType: neptune.InstanceType.T3_MEDIUM, 49 | clusterParameterGroup, 50 | parameterGroup, 51 | backupRetention: cdk.Duration.days(7), 52 | deletionProtection: true, 53 | securityGroups: [clusterSecurityGroup], 54 | }) 55 | 56 | // Output the writer endpoint host:port 57 | new cdk.CfnOutput(this, "WriteEndpointOutput", { 58 | value: cluster.clusterEndpoint.socketAddress, 59 | }) 60 | 61 | // Create a security group for the lambda function 62 | const lambdaSecurityGroup = new ec2.SecurityGroup(this, "LambdaSG", { 63 | vpc: vpc, 64 | description: "Neptune Gremlin Test Lambda Security Group", 65 | }) 66 | 67 | // Add an ingress rule to the cluster's security group from the lambda sg 68 | const port = cluster.clusterEndpoint.port 69 | clusterSecurityGroup.addIngressRule( 70 | lambdaSecurityGroup, ec2.Port.tcp(port)) 71 | 72 | // Environment variables for the lambda function 73 | const envVars = { 74 | "NEPTUNE_ENDPOINT": cluster.clusterEndpoint.hostname, 75 | "NEPTUNE_PORT": cluster.clusterEndpoint.port, 76 | "USE_IAM": "true", 77 | "USE_AWS4": "true", 78 | } 79 | 80 | // Create the integration test Lambda 81 | const testLambda = new lambda.Function(this, "neptune-gremlin-test", { 82 | runtime: lambda.Runtime.NODEJS_14_X, 83 | code: lambda.Code.fromAsset("lambda"), 84 | handler: "integration-test.handler", 85 | vpc: vpc, 86 | timeout: cdk.Duration.seconds(10), 87 | memorySize: 1536, 88 | environment: envVars, 89 | securityGroups: [lambdaSecurityGroup], 90 | }) 91 | 92 | // Give the lambda function access to the cluster 93 | cluster.grantConnect(testLambda) 94 | } 95 | 96 | } 97 | 98 | 99 | module.exports = { CdkTestAppStack } 100 | -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/PropertyValueParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.metadata; 14 | 15 | import com.fasterxml.jackson.core.JsonProcessingException; 16 | import com.fasterxml.jackson.databind.JsonNode; 17 | import com.fasterxml.jackson.databind.ObjectMapper; 18 | import com.fasterxml.jackson.databind.node.ArrayNode; 19 | 20 | import java.util.HashSet; 21 | import java.util.Set; 22 | 23 | public class PropertyValueParser { 24 | 25 | private static final ObjectMapper MAPPER = new ObjectMapper(); 26 | 27 | private final PropertyValueParserPolicy policy; 28 | private final String semicolonReplacement; 29 | private final boolean inferType; 30 | 31 | public PropertyValueParser(PropertyValueParserPolicy policy, String semicolonReplacement, boolean inferType) { 32 | this.policy = policy; 33 | this.semicolonReplacement = semicolonReplacement; 34 | this.inferType = inferType; 35 | } 36 | 37 | public PropertyValue parse(String s){ 38 | if (isArrayCandidate(s)){ 39 | try { 40 | JsonNode jsonNode = MAPPER.readTree(s); 41 | if (isArray(jsonNode)){ 42 | return policy.handleArray(s, (ArrayNode) jsonNode, this); 43 | } else { 44 | return stringValue(s); 45 | } 46 | } catch (JsonProcessingException e) { 47 | return stringValue(s); 48 | } 49 | } else { 50 | return stringValue(s); 51 | } 52 | } 53 | 54 | PropertyValue parseArrayValue(String s, ArrayNode arrayNode) { 55 | Set values = new HashSet<>(); 56 | for (JsonNode node : arrayNode) { 57 | values.add(format(node.asText().replace(";", semicolonReplacement))); 58 | } 59 | if (values.size() < arrayNode.size()){ 60 | policy.handleDuplicates(s, arrayNode, this); 61 | } 62 | return arrayValue(values); 63 | } 64 | 65 | PropertyValue stringValue(String s){ 66 | DataType dataType = inferType ? DataType.identifyType(s) : DataType.None; 67 | return new PropertyValue(format(s), false, dataType); 68 | } 69 | 70 | private PropertyValue arrayValue(Set values) { 71 | 72 | DataType dataType = DataType.None; 73 | 74 | if (inferType){ 75 | for (String value : values) { 76 | dataType = DataType.getBroadestType(dataType, DataType.identifyType(value)); 77 | } 78 | } 79 | 80 | return new PropertyValue(String.join(";", values), true, dataType); 81 | } 82 | 83 | private static boolean isArrayCandidate(String s) { 84 | return s.startsWith("[") && s.endsWith("]"); 85 | } 86 | 87 | private static boolean isArray(JsonNode jsonNode) { 88 | return jsonNode.isArray(); 89 | } 90 | 91 | private static String format(String s){ 92 | 93 | if (s.contains("\"")){ 94 | s = s.replace("\"", "\"\""); 95 | } 96 | 97 | if (s.contains("\"") || s.contains(",") || s.contains(System.lineSeparator())){ 98 | s = String.format("\"%s\"", s); 99 | } 100 | 101 | return s; 102 | } 103 | 104 | } 105 | -------------------------------------------------------------------------------- /neo4j-to-neptune/docs/bulk-load-config.md: -------------------------------------------------------------------------------- 1 | # Bulk Load Configuration 2 | 3 | The `convert-csv` utility supports automated bulk loading of converted CSV data directly into Amazon Neptune using the `--bulk-load-config` parameter. 4 | 5 | ## Usage 6 | 7 | Use the `--bulk-load-config` parameter to specify a YAML file containing the bulk load configuration: 8 | 9 | ```bash 10 | java -jar neo4j-to-neptune.jar convert-csv \ 11 | -i /tmp/neo4j-export.csv \ 12 | -d output \ 13 | --bulk-load-config bulk-load.yaml 14 | ``` 15 | 16 | ## YAML Configuration Format 17 | 18 | The configuration file should be in YAML format, using camelCase: 19 | 20 | ```yaml 21 | # Required parameters 22 | bucketName: my-neptune-data-bucket 23 | neptuneEndpoint: my-cluster.cluster-abc123.us-east-1.neptune.amazonaws.com 24 | iamRoleArn: arn:aws:iam::123456789012:role/NeptuneLoadFromS3Role 25 | 26 | # Optional parameters 27 | s3Prefix: neptune 28 | parallelism: OVERSUBSCRIBE 29 | monitor: true 30 | ``` 31 | 32 | ## Configuration Parameters 33 | 34 | ### Required Parameters 35 | 36 | - **`bucketName`**: S3 bucket name for CSV file storage 37 | - **`neptuneEndpoint`**: Neptune cluster endpoint URL 38 | - **`iamRoleArn`**: IAM role ARN with S3 and Neptune permissions 39 | 40 | ### Optional Parameters 41 | 42 | - **`s3Prefix`**: S3 prefix for uploaded files 43 | - **`parallelism`**: Load parallelism level - `LOW`, `MEDIUM`, `HIGH`, `OVERSUBSCRIBE` (default: `OVERSUBSCRIBE`) 44 | - **`monitor`**: Monitor load progress until completion (default: `false`) 45 | 46 | ## Command Line Override 47 | 48 | Individual CLI parameters can override configuration file values: 49 | 50 | ```bash 51 | java -jar neo4j-to-neptune.jar convert-csv \ 52 | -i /tmp/neo4j-export.csv \ 53 | -d output \ 54 | --bulk-load-config bulk-load.yaml \ 55 | --bucket-name override-bucket \ 56 | --parallelism HIGH 57 | ``` 58 | 59 | Available override parameters: 60 | - `--bucket-name` 61 | - `--s3-prefix` 62 | - `--neptune-endpoint` 63 | - `--iam-role-arn` 64 | - `--parallelism` 65 | - `--monitor` 66 | 67 | ## Behavior 68 | 69 | - **Optional**: Bulk loading only occurs when `--bulk-load-config` or `--neptune-endpoint` is provided 70 | - **Validation**: All required parameters are validated before conversion begins 71 | - **Process**: Conversion happens first, then files are uploaded to S3 and bulk load is initiated 72 | - **Monitoring**: When enabled, the tool waits and reports progress until completion 73 | 74 | ## Example Output 75 | 76 | ``` 77 | Vertices: 171 78 | Edges : 253 79 | Output : output/1751656971039 80 | output/1751656971039 81 | 82 | Completed in x second(s) 83 | S3 Bucket: my-bucket 84 | S3 Prefix: neptune 85 | AWS Region: us-east-2 86 | IAM Role ARN: arn:aws:iam::123456789000:role/NeptunePolicy 87 | Neptune Endpoint: my-neptune-db.cluster-xxxxxxxxxxxx.us-east-2.neptune.amazonaws.com 88 | Bulk Load Parallelism: MEDIUM 89 | Uploading Gremlin load data to S3... 90 | Starting async upload of files from /tmp/output/1751656971039 to s3://my-bucket/neptune/1751656971039 91 | Starting async upload of /tmp/output/1751656971039/vertices.csv to s3://my-bucket/neptune/1751656971039/vertices.csv 92 | Starting async upload of /tmp/output/1751656971039/edges.csv to s3://my-bucket/neptune/1751656971039/edges.csv 93 | Successfully uploaded vertices.csv - ETag: "abc123..." 94 | Successfully uploaded edges.csv - ETag: "def456..." 95 | Successfully uploaded 2 files from /tmp/output/1751656971039 96 | Files uploaded successfully to S3. Files available at: s3://my-bucket/neptune/1751656971039/ 97 | Starting Neptune bulk load... 98 | Testing connectivity to Neptune endpoint... 99 | Successful connected to Neptune. Status: 200 healthy 100 | Neptune bulk load started successfully! Load ID: 12345678-1234-1234-1234-123456789012 101 | Monitoring load progress for job: 12345678-1234-1234-1234-123456789012 102 | Neptune bulk load status: LOAD_IN_PROGRESS 103 | Neptune bulk load status: LOAD_IN_PROGRESS 104 | Neptune bulk load completed with status: LOAD_COMPLETED 105 | ``` 106 | -------------------------------------------------------------------------------- /neptune-python-utils/neptune_python_utils/glue_gremlin_client.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. 2 | # All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). 5 | # You may not use this file except in compliance with the License. 6 | # A copy of the License is located at 7 | # 8 | # http://aws.amazon.com/apache2.0/ 9 | # 10 | # or in the "license" file accompanying this file. 11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions 13 | # and limitations under the License. 14 | 15 | from neptune_python_utils.batch_utils import BatchUtils 16 | 17 | class GlueGremlinClient: 18 | 19 | def __init__(self, endpoints, job_name=None, **kwargs): 20 | self.endpoints = endpoints 21 | self.job_name = job_name 22 | self.kwargs = kwargs 23 | 24 | def __execute_batch(self, f, pool_size=1): 25 | print('endpoints: {}'.format(self.endpoints)) 26 | print('job_name: {}'.format(self.job_name)) 27 | print('pool_size: {}'.format(pool_size)) 28 | print('kwargs: {}'.format(self.kwargs)) 29 | def execute_batch(rows): 30 | batch_utils = None 31 | try: 32 | batch_utils = BatchUtils(self.endpoints, job_name=self.job_name, to_dict=lambda x: x.asDict(), pool_size=pool_size, **self.kwargs) 33 | return f(batch_utils, rows) 34 | finally: 35 | if batch_utils: 36 | batch_utils.close() 37 | return execute_batch 38 | 39 | def add_vertices(self, label, batch_size=1, pool_size=1, **kwargs): 40 | """Adds a vertex with the supplied label for each row in a DataFrame partition. 41 | If the DataFrame contains an '~id' column, the values in this column will be treated as user-supplied IDs for the new vertices. 42 | If the DataFrame does not have an '~id' column, Neptune will autogenerate a UUID for each vertex. 43 | 44 | Example: 45 | >>> dynamicframe.toDF().foreachPartition(neptune.add_vertices('Product')) 46 | """ 47 | 48 | return self.__execute_batch(lambda b, rows: b.add_vertices(batch_size=batch_size, rows=rows, label=label, **kwargs), pool_size=pool_size) 49 | 50 | 51 | def upsert_vertices(self, label, batch_size=1, pool_size=1, **kwargs): 52 | """Conditionally adds vertices for the rows in a DataFrame partition using the Gremlin coalesce() idiom. 53 | The DataFrame must contain an '~id' column. 54 | 55 | Example: 56 | >>> dynamicframe.toDF().foreachPartition(neptune.upsert_vertices('Product')) 57 | """ 58 | 59 | return self.__execute_batch(lambda b, rows: b.upsert_vertices(batch_size=batch_size, rows=rows, label=label, **kwargs), pool_size=pool_size) 60 | 61 | def add_edges(self, label, batch_size=1, pool_size=1, **kwargs): 62 | """Adds an edge with the supplied label for each row in a DataFrame partition. 63 | If the DataFrame contains an '~id' column, the values in this column will be treated as user-supplied IDs for the new edges. 64 | If the DataFrame does not have an '~id' column, Neptune will autogenerate a UUID for each edge. 65 | 66 | Example: 67 | >>> dynamicframe.toDF().foreachPartition(neptune.add_edges('ORDER_DETAIL')) 68 | """ 69 | 70 | return self.__execute_batch(lambda b, rows: b.add_edges(batch_size=batch_size, rows=rows, label=label, **kwargs), pool_size=pool_size) 71 | 72 | def upsert_edges(self, label, batch_size=1, pool_size=1, **kwargs): 73 | """Conditionally adds edges for the rows in a DataFrame partition using the Gremlin coalesce() idiom. 74 | The DataFrame must contain '~id', '~from', '~to' and '~label' columns. 75 | 76 | Example: 77 | >>> dynamicframe.toDF().foreachPartition(neptune.upsert_edges('ORDER_DETAIL')) 78 | """ 79 | 80 | return self.__execute_batch(lambda b, rows: b.upsert_edges(batch_size=batch_size, rows=rows, label=label, **kwargs), pool_size=pool_size) 81 | 82 | -------------------------------------------------------------------------------- /neptune-streams-utils/examples/java8/src/main/java/stream_handler/StreamHandler.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package stream_handler; 14 | 15 | import com.amazonaws.neptune.StreamsRecord; 16 | import com.amazonaws.neptune.StreamsResponse; 17 | import com.amazonaws.neptune.config.CredentialsConfig; 18 | import org.apache.tinkerpop.gremlin.driver.Cluster; 19 | import org.apache.tinkerpop.gremlin.driver.SigV4WebSocketChannelizer; 20 | import org.apache.tinkerpop.gremlin.driver.remote.DriverRemoteConnection; 21 | import org.apache.tinkerpop.gremlin.driver.ser.Serializers; 22 | import org.apache.tinkerpop.gremlin.process.traversal.AnonymousTraversalSource; 23 | import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; 24 | import utils.EnvironmentVariablesUtils; 25 | 26 | import java.io.IOException; 27 | import java.util.Map; 28 | 29 | public class StreamHandler extends AbstractStreamHandler { 30 | 31 | private final GraphTraversalSource g; 32 | private final Cluster cluster; 33 | 34 | public StreamHandler(String neptuneEndpoint, 35 | Integer neptunePort, 36 | CredentialsConfig credentialsConfig, 37 | Map additionalParams) { 38 | super(neptuneEndpoint, neptunePort, credentialsConfig, additionalParams); 39 | 40 | this.cluster = createCluster(); 41 | this.g = AnonymousTraversalSource 42 | .traversal() 43 | .withRemote(DriverRemoteConnection.using(cluster)); 44 | } 45 | 46 | @Override 47 | public void handleRecords(StreamsResponse streamsResponse) throws IOException { 48 | 49 | StreamsResponse.LastEventId lastEventId = new StreamsResponse.LastEventId(); 50 | int recordCount = 0; 51 | 52 | for (StreamsRecord record : streamsResponse.getRecords()) { 53 | 54 | String op = record.getOp(); 55 | 56 | if (op.equals("ADD")) { 57 | String id = record.getData().getId(); 58 | String type = record.getData().getType(); 59 | if (type.equals("vl")) { 60 | System.out.println(g.V(id).valueMap(true).toList()); 61 | } else if (type.equals("e")) { 62 | System.out.println(g.E(id).valueMap(true).toList()); 63 | } 64 | } 65 | 66 | StreamsRecord.EventId eventId = record.getEventId(); 67 | 68 | lastEventId.setCommitNum(eventId.getCommitNum()); 69 | lastEventId.setOpNum(record.getEventId().getOpNum()); 70 | 71 | recordCount++; 72 | } 73 | 74 | streamsResponse.setLastEventId(lastEventId); 75 | streamsResponse.setTotalRecords(recordCount); 76 | 77 | } 78 | 79 | @Override 80 | public void close() throws IOException { 81 | cluster.close(); 82 | } 83 | 84 | private Cluster createCluster() { 85 | Cluster.Builder builder = Cluster.build() 86 | .addContactPoint(String.valueOf(additionalParams.get("neptune_cluster_endpoint"))) 87 | .port((int) additionalParams.get("neptune_port")) 88 | .enableSsl(true) 89 | .minConnectionPoolSize(1) 90 | .maxConnectionPoolSize(1) 91 | .serializer(Serializers.GRAPHBINARY_V1D0) 92 | .reconnectInterval(2000); 93 | 94 | if (Boolean.parseBoolean(EnvironmentVariablesUtils.getOptionalEnv("iam_auth_enabled", "false"))) { 95 | builder = builder.channelizer(SigV4WebSocketChannelizer.class); 96 | } 97 | 98 | return builder.create(); 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /csv-to-neptune-bulk-format/notebooks/Spotify-Data-Query.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%%gremlin\n", 10 | "g.V().groupCount().by(label).unfold()" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "%%gremlin\n", 20 | "g.E().groupCount().by(label).unfold()" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "%%gremlin\n", 30 | "\n", 31 | "//count for each artist\n", 32 | "\n", 33 | "g.V().hasLabel('Artist').as('art')\n", 34 | ".in('BY_ARTIST').select('art')\n", 35 | ".groupCount().by('name').order(local).by(values,desc).unfold()" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "%%gremlin \n", 45 | "// given an artist, find tracks\n", 46 | "\n", 47 | "g.V()\n", 48 | ".has('name', 'Martin Garrix')\n", 49 | ".hasLabel('Artist').as('art')\n", 50 | ".in('BY_ARTIST').as('trk')\n", 51 | ".valueMap('track_name')\n" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "%%gremlin \n", 61 | "// given an artist, find Albums\n", 62 | "\n", 63 | "g.V()\n", 64 | ".has('name', 'Martin Garrix')\n", 65 | ".hasLabel('Artist').as('art')\n", 66 | ".in('BY_ARTIST').as('trk')\n", 67 | ".out('IN_ALBUM').as('alb')\n", 68 | ".order().by('name')\n", 69 | ".dedup()\n", 70 | ".valueMap(true)\n" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "%%gremlin\n", 80 | "// find playlists for Genre\n", 81 | "g.V().hasLabel('Genre').has('name', 'pop')\n", 82 | ".in('HAS_GENRE')\n", 83 | ".valueMap('name')" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "# Validate Notebook configuration\n", 91 | "Execute `%graph_notebook_config` to find the configuration\n", 92 | "\n", 93 | "The \"auth_mode\" should be \"IAM\", if it is \"DEFAULT\"\n", 94 | "\n", 95 | " Execute `%%graph_notebook_config`\n", 96 | " with output from `%graph_notebook_config` copied over, and\n", 97 | " replacing \"auth_mode\" to \"IAM\"\n", 98 | "\n", 99 | "Execute `%status` to check the connectivity\n", 100 | "\n", 101 | "The \"status\" should be \"healthy\"\n" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": {}, 108 | "outputs": [], 109 | "source": [ 110 | "%status" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "%graph_notebook_config" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "%%graph_notebook_config\n", 129 | "{\n", 130 | " \"host\": \"\",\n", 131 | " \"port\": 8182,\n", 132 | " \"auth_mode\": \"IAM\",\n", 133 | " \"iam_credentials_provider_type\": \"ROLE\",\n", 134 | " \"load_from_s3_arn\": \"arn:aws:iam:::role/\",\n", 135 | " \"ssl\": true,\n", 136 | " \"aws_region\": \"\",\n", 137 | " \"sparql\": {\n", 138 | " \"endpoint_prefix\": \"\"\n", 139 | " }\n", 140 | "}" 141 | ] 142 | } 143 | ], 144 | "metadata": { 145 | "kernelspec": { 146 | "display_name": "Python 3", 147 | "language": "python", 148 | "name": "python3" 149 | }, 150 | "language_info": { 151 | "codemirror_mode": { 152 | "name": "ipython", 153 | "version": 3 154 | }, 155 | "file_extension": ".py", 156 | "mimetype": "text/x-python", 157 | "name": "python", 158 | "nbconvert_exporter": "python", 159 | "pygments_lexer": "ipython3", 160 | "version": "3.6.12" 161 | } 162 | }, 163 | "nbformat": 4, 164 | "nbformat_minor": 4 165 | } -------------------------------------------------------------------------------- /neptune-gremlin-js/README.md: -------------------------------------------------------------------------------- 1 | # neptune-gremlin 2 | 3 | The `neptune-gremlin` package is an SDK for querying an Amazon Neptune graph 4 | database using gremlin. Amazon Neptune is is a fast, reliable, fully managed 5 | graph database service that makes it easy to build and run applications. It 6 | allows you to build connections between identities, build knowledge graphs, 7 | detect fraud patterns, and make predictions. It can also simply be used as a 8 | general purpose database, which is made easier by this package. 9 | 10 | The source for this package includes an AWS CDK application that creates a Neptune 11 | cluster and a Lambda function in the same VPC to facilitate integration testing. 12 | 13 | *NOTICE* _This is an experimental package that is not supported in any way by AWS. 14 | Please do not use it for mission critical production workloads!_ 15 | 16 | ## Installation 17 | 18 | ```sh 19 | npm install neptune-gremlin 20 | ``` 21 | 22 | ## Usage 23 | 24 | ### Connect to Neptune: 25 | 26 | ```Javascript 27 | const gremlin = require("neptune-gremlin") 28 | 29 | // Get configuration values from the environment 30 | const host = process.env.NEPTUNE_ENDPOINT 31 | const port = process.env.NEPTUNE_PORT 32 | const useIam = process.env.USE_IAM === "true" 33 | 34 | // Create a new connection to the Neptune database 35 | const connection = new gremlin.Connection(host, port, {useIam}) 36 | await connection.connect() 37 | ``` 38 | 39 | ### Save a node (vertex): 40 | 41 | ```Javascript 42 | const node1 = { 43 | "unique-id-1", 44 | properties: { 45 | name: "Test Node", 46 | a: "A", 47 | b: "B", 48 | }, 49 | labels: ["label1", "label2"], 50 | } 51 | await connection.saveNode(node1) 52 | ``` 53 | 54 | ### Save an edge 55 | 56 | ```JavaScript 57 | 58 | const edge1 = { 59 | id: uuid.v4(), 60 | label: "points_to", 61 | to: node2.id, 62 | from: node1.id, 63 | properties: { 64 | "a": "b", 65 | }, 66 | } 67 | 68 | await connection.saveEdge(edge1) 69 | ``` 70 | 71 | ### Get all nodes and edges in the graph 72 | 73 | ```JavaScript 74 | const searchResult = await connection.search({}) 75 | ``` 76 | 77 | ### Run a custom traversal: 78 | 79 | ```Javascript 80 | const f = (g) => { 81 | return await g.V() 82 | .has("person", "name", "Eric") 83 | .bothE().bothV().dedup() 84 | .valueMap(true).toList() 85 | } 86 | const result = await connection.query(f) 87 | ``` 88 | 89 | ### Partition the graph 90 | 91 | A Neptune cluster does not have a native paritioning concept. All nodes are in the same database. 92 | Gremlin has a feature called a partition strategy that adds a property to each node and edge 93 | automatically to segment your graph into different sub graphs. 94 | 95 | All you have to do with this library is set the partition on the connection: 96 | 97 | ```JavaScript 98 | connection.setPartition("test_partition") 99 | ``` 100 | 101 | All subsequent calls using that connection will have the `_partition` property added by default. 102 | 103 | ## Development 104 | 105 | ### Building the project 106 | 107 | This package is all Javascript, so the build script just runs eslint, copies `neptune-gremlin.js` 108 | into the cdk app's lambda folder, and then synthesizes the cdk app. 109 | 110 | ```sh 111 | npm run build 112 | ``` 113 | 114 | ### Sample application 115 | 116 | There is a sample app at [https://github.com/aws-samples/cdk-neptune-knowledge-graph](https://github.com/aws-samples/cdk-neptune-knowledge-graph) where you can see how to incorporate this library into a REST API. 117 | 118 | ### Making changes 119 | 120 | If you want to make a change or an addition to the package (contributions welcome!), please 121 | add a test to `cdk-test-app/lambda/integration-test.js`. Deploy the stack to your AWS account 122 | and invoke the function to make sure everything works as expected. 123 | 124 | ### Deploying the cdk test app 125 | 126 | Make sure you run the top level build script, since it copies the latest `neptune-gremlin.js` 127 | file into the cdk app's lambda folder. 128 | 129 | Also keep in mind that the very first call to a new Neptune cluster tends to fail with a 500, 130 | so if that happens, just try again. 131 | 132 | _Note that this app will result in charges in your AWS account! Be sure to destroy the stack 133 | when you are done!_ 134 | 135 | ```sh 136 | npm run build 137 | cd cdk-test-app 138 | npm install 139 | cd lambda 140 | npm install 141 | cd .. 142 | npx cdk bootstrap 143 | npx cdk synth 144 | npx cdk diff 145 | npx cdk deploy 146 | ``` 147 | 148 | ### Cleaning up the CDK stack to avoid charges to your AWS account 149 | 150 | ```sh 151 | cdk destroy 152 | ``` 153 | -------------------------------------------------------------------------------- /csv-to-neptune-bulk-format/notebooks/Prepare-Data-Spotify.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "run -i csv_converter.py -h" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "%%bash\n", 19 | "# clean the source and data folders\n", 20 | "rm -rf .data\n", 21 | "rm -rf .source\n" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "%run -i csv_converter.py -v ./data-config-spotify.json --s3" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "%tb" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "%%bash\n", 49 | "pwd\n", 50 | "ls\n", 51 | "echo '.source'\n", 52 | "ls -l .source\n", 53 | "echo '.data'\n", 54 | "ls -l .data\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "# Generate the token to reset and clean the database\n", 64 | "%db_reset --generate-token" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "# Use the token generated via --generate-token here\n", 74 | "%db_reset --token 3ebbc751-40e8-44d8-99c8-fad1213e9be4" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": {}, 81 | "outputs": [], 82 | "source": [ 83 | "# if you just reset the db, try the %status few times to make sure it is healthy\n", 84 | "%status" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "%%bash\n", 94 | "DB_HOST=\n", 95 | "awscurl --service neptune-db \\\n", 96 | " -X POST \\\n", 97 | " -H 'Content-Type: application/json' \\\n", 98 | " https://$DB_HOST:8182/loader -d '{\n", 99 | " \"source\": \"s3:///\",\n", 100 | " \"format\": \"csv\",\n", 101 | " \"iamRoleArn\": \"arn:aws:iam:::role/\",\n", 102 | " \"mode\": \"AUTO\",\n", 103 | " \"region\": \"\",\n", 104 | " \"failOnError\": \"FALSE\",\n", 105 | " \"parallelism\": \"OVERSUBSCRIBE\",\n", 106 | " \"updateSingleCardinalityProperties\": \"TRUE\"\n", 107 | "}'" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "%%bash\n", 117 | "LOAD_ID=ff90fd71-66c5-427b-be1f-f95ded45ae70\n", 118 | "DB_HOST=\n", 119 | "awscurl --service neptune-db \\\n", 120 | "-X GET \\\n", 121 | "https://$DB_HOST:8182/loader/$LOAD_ID?details=true&errors=true" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "# s3:///\n", 131 | "%load" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "%load_status ff90fd71-66c5-427b-be1f-f95ded45ae70" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "%%gremlin\n", 150 | "g.V().groupCount().by(label).unfold()" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "%%gremlin\n", 160 | "g.E().groupCount().by(label).unfold()" 161 | ] 162 | } 163 | ], 164 | "metadata": { 165 | "kernelspec": { 166 | "display_name": "Python 3", 167 | "language": "python", 168 | "name": "python3" 169 | }, 170 | "language_info": { 171 | "codemirror_mode": { 172 | "name": "ipython", 173 | "version": 3 174 | }, 175 | "file_extension": ".py", 176 | "mimetype": "text/x-python", 177 | "name": "python", 178 | "nbconvert_exporter": "python", 179 | "pygments_lexer": "ipython3", 180 | "version": "3.6.12" 181 | } 182 | }, 183 | "nbformat": 4, 184 | "nbformat_minor": 4 185 | } -------------------------------------------------------------------------------- /neptune-streams-utils/examples/java8/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | org.example 8 | neptune-streams-handler 9 | 1.0-SNAPSHOT 10 | 11 | 12 | UTF-8 13 | 1.8 14 | stream_handler 15 | 3.4.3 16 | 2.0.2 17 | 18 | 19 | 20 | 21 | 22 | com.amazonaws 23 | amazon-neptune-streams-replicator-core 24 | 1.0.0 25 | provided 26 | 27 | 28 | org.slf4j 29 | slf4j-api 30 | 31 | 32 | 33 | 34 | 35 | com.amazonaws 36 | amazon-neptune-streams-replicator-lambda 37 | 1.0.0 38 | provided 39 | 40 | 41 | org.slf4j 42 | slf4j-api 43 | 44 | 45 | 46 | 47 | 48 | org.apache.tinkerpop 49 | gremlin-driver 50 | ${gremlin.version} 51 | 52 | 53 | 54 | com.amazonaws 55 | amazon-neptune-sigv4-signer 56 | ${sig4.signer.version} 57 | 58 | 59 | 60 | com.amazonaws 61 | amazon-neptune-gremlin-java-sigv4 62 | ${sig4.signer.version} 63 | 64 | 65 | org.slf4j 66 | slf4j-api 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | org.apache.maven.plugins 78 | maven-shade-plugin 79 | 3.2.4 80 | 81 | false 82 | 83 | 84 | 85 | package 86 | 87 | shade 88 | 89 | 90 | ${uberjar.name} 91 | 92 | 93 | 94 | 95 | *:* 96 | 97 | META-INF/*.SF 98 | META-INF/*.DSA 99 | META-INF/*.RSA 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | maven-compiler-plugin 111 | 3.8.1 112 | 113 | 1.8 114 | 1.8 115 | 116 | 117 | 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/DataType.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.metadata; 14 | 15 | import org.apache.commons.lang3.StringUtils; 16 | 17 | import java.util.regex.Pattern; 18 | 19 | public enum DataType { 20 | None { 21 | @Override 22 | public String typeDescription() { 23 | return ""; 24 | } 25 | }, 26 | Boolean, 27 | Byte { 28 | @Override 29 | public boolean isNumeric() { 30 | return true; 31 | } 32 | }, 33 | Short { 34 | @Override 35 | public boolean isNumeric() { 36 | return true; 37 | } 38 | }, 39 | Int { 40 | @Override 41 | public boolean isNumeric() { 42 | return true; 43 | } 44 | }, 45 | Long { 46 | @Override 47 | public boolean isNumeric() { 48 | return true; 49 | } 50 | }, 51 | Double { 52 | @Override 53 | public boolean isNumeric() { 54 | return true; 55 | } 56 | }, 57 | Date, 58 | String; 59 | 60 | public boolean isNumeric() { 61 | return false; 62 | } 63 | 64 | public String typeDescription() { 65 | return java.lang.String.format(":%s", name().toLowerCase()); 66 | } 67 | 68 | public static DataType identifyType(String s) { 69 | 70 | if (isBoolean(s)) { 71 | return DataType.Boolean; 72 | } else if (isByte(s)) { 73 | return DataType.Byte; 74 | } else if (isShort(s)) { 75 | return DataType.Short; 76 | } else if (isInt(s)) { 77 | return DataType.Int; 78 | } else if (isLong(s)) { 79 | return DataType.Long; 80 | } else if (isDouble(s)) { 81 | return DataType.Double; 82 | } else { 83 | if (StringUtils.isEmpty(s)) { 84 | return DataType.None; 85 | } 86 | try { 87 | DateTimeUtils.parseISODate(s); 88 | return DataType.Date; 89 | } catch (Exception e) { 90 | return DataType.String; 91 | } 92 | } 93 | } 94 | 95 | private static final String BOOLEAN_PATTERN = "true|false"; 96 | private static final Pattern boolPattern = Pattern.compile(BOOLEAN_PATTERN, Pattern.CASE_INSENSITIVE); 97 | 98 | 99 | private static boolean isBoolean(String s) { 100 | return boolPattern.matcher(s).matches(); 101 | } 102 | 103 | private static boolean isByte(String s) { 104 | try { 105 | java.lang.Byte.parseByte(s); 106 | return true; 107 | } catch (NumberFormatException e) { 108 | return false; 109 | } 110 | } 111 | 112 | private static boolean isShort(String s) { 113 | try { 114 | java.lang.Short.parseShort(s); 115 | return true; 116 | } catch (NumberFormatException e) { 117 | return false; 118 | } 119 | } 120 | 121 | private static boolean isInt(String s) { 122 | try { 123 | java.lang.Integer.parseInt(s); 124 | return true; 125 | } catch (NumberFormatException e) { 126 | return false; 127 | } 128 | } 129 | 130 | private static boolean isLong(String s) { 131 | try { 132 | java.lang.Long.parseLong(s); 133 | return true; 134 | } catch (NumberFormatException e) { 135 | return false; 136 | } 137 | } 138 | 139 | private static boolean isDouble(String s) { 140 | try { 141 | java.lang.Double.parseDouble(s); 142 | return true; 143 | } catch (NumberFormatException e) { 144 | return false; 145 | } 146 | } 147 | 148 | public static DataType getBroadestType(DataType oldType, DataType newType) { 149 | 150 | if (oldType == newType) { 151 | return oldType; 152 | } 153 | 154 | if (oldType == DataType.None) { 155 | return newType; 156 | } 157 | 158 | if (newType == DataType.None) { 159 | return oldType; 160 | } 161 | 162 | if (oldType == DataType.String || newType == DataType.String) { 163 | return DataType.String; 164 | } 165 | 166 | if (oldType.isNumeric() && newType.isNumeric()) { 167 | if (newType.ordinal() > oldType.ordinal()) { 168 | return newType; 169 | } else { 170 | return oldType; 171 | } 172 | } 173 | 174 | if (newType.ordinal() > oldType.ordinal()) { 175 | return DataType.String; 176 | } 177 | 178 | return DataType.String; 179 | } 180 | 181 | } 182 | -------------------------------------------------------------------------------- /opencypher-compatability-checker/README.md: -------------------------------------------------------------------------------- 1 | # openCypher Compatibility Checker 2 | 3 | A migration helper tool that validates openCypher queries for compatibility with Amazon Neptune, identifying unsupported functions and clauses to help assess migration effort from Neo4j to Neptune. 4 | 5 | ## Overview 6 | 7 | This tool analyzes openCypher queries and reports: 8 | - Compatibility status for each query 9 | - Specific unsupported functions/clauses with their positions 10 | - Suggested replacements where available 11 | - Detailed error descriptions 12 | 13 | Supports validation for both **Neptune Analytics (NA)** and **Neptune Database (NDB)**. 14 | 15 | To select the appropriate version go to the [Releases](https://github.com/awslabs/amazon-neptune-tools/releases) page. 16 | 17 | For Neptune Analytics, find the release [opencypher-compatability-checker-analytics](https://github.com/awslabs/amazon-neptune-tools/releases/tag/opencypher-compatability-checker-analytics) 18 | 19 | For Neptune Database, find the [release](https://github.com/awslabs/amazon-neptune-tools/releases) tagged with the version of your Neptune Database cluster. Support for openCypher clauses and functions are version dependant so please ensure you select the correct version. 20 | 21 | ## Prerequisites 22 | 23 | - **Java 17** or higher 24 | - Pre-built JAR file: `NeptuneNeo4jMigrationHelper-.jar` located on the [Releases](https://github.com/awslabs/amazon-neptune-tools/releases) page. 25 | 26 | ## Installation & Build 27 | 28 | The tool is distributed as a fat JAR created during the build process. No additional dependencies are required at runtime other than Java 17. 29 | 30 | ## Usage 31 | 32 | ### Basic Command 33 | 34 | ```bash 35 | java -jar NeptuneNeo4jMigrationHelper-.jar --input [--output ] 36 | ``` 37 | 38 | ### Parameters 39 | 40 | | Parameter | Required | Description | 41 | |-----------|----------|-------------| 42 | | `--input` | Yes | Path to JSON file containing queries to validate. A sample is provided in [input.json](./input.json)| 43 | | `--output` | No | Path for JSON output file. If omitted, prints to stdout. A sample of the output is provided in [output.json](./output.json) | 44 | 45 | ### Examples 46 | 47 | ```bash 48 | # Validate queries and save results to file 49 | java -jar NeptuneNeo4jMigrationHelper-1.0.jar --input queries.json --output results.json 50 | 51 | # Validate queries and print to console 52 | java -jar NeptuneNeo4jMigrationHelper-1.0.jar --input queries.json 53 | ``` 54 | 55 | ## Configuration 56 | 57 | ### Logging 58 | 59 | Control log verbosity with the `LOG_LEVEL` environment variable: 60 | 61 | ```bash 62 | export LOG_LEVEL=DEBUG # Detailed debugging information 63 | export LOG_LEVEL=INFO # Default level 64 | export LOG_LEVEL=WARN # Warnings only 65 | export LOG_LEVEL=ERROR # Errors only 66 | ``` 67 | 68 | A log file `migration-helper-.log` is created in the execution directory. 69 | 70 | ## Input Format 71 | 72 | Create a JSON file with the following structure: 73 | 74 | ```json 75 | { 76 | "targetSystem": "NA", 77 | "queries": [ 78 | { 79 | "id": 1, 80 | "query": "MATCH (n:Person) RETURN n LIMIT 10" 81 | }, 82 | { 83 | "id": 2, 84 | "query": "RETURN apoc.coll.intersection([1,2,3], [2,3,4])" 85 | } 86 | ] 87 | } 88 | ``` 89 | 90 | ### Fields 91 | 92 | - **targetSystem**: Target Neptune system 93 | - `"NA"` - Neptune Analytics 94 | - `"NDB"` - Neptune Database 95 | - **queries**: Array of query objects 96 | - **id**: Unique identifier for the query 97 | - **query**: openCypher query string to validate 98 | 99 | ## Output Format 100 | 101 | The tool generates a JSON report with validation results: 102 | 103 | ```json 104 | { 105 | "results": [ 106 | { 107 | "id": 1, 108 | "supported": true, 109 | "errorDefinitions": [] 110 | }, 111 | { 112 | "id": 2, 113 | "supported": false, 114 | "errorDefinitions": [ 115 | { 116 | "position": "line 1, column 8 (offset: 7)", 117 | "name": "apoc.coll.intersection", 118 | "replacement": "collintersection", 119 | "description": "apoc.coll.intersection is not supported in this release but try replacing with collintersection" 120 | } 121 | ] 122 | } 123 | ] 124 | } 125 | ``` 126 | 127 | ### Result Fields 128 | 129 | - **id**: Matches the input query ID 130 | - **supported**: Boolean indicating Neptune compatibility 131 | - **errorDefinitions**: Array of compatibility issues 132 | - **position**: Location of the issue in the query 133 | - **name**: Unsupported function/clause name 134 | - **replacement**: Suggested Neptune-compatible alternative (if available) 135 | - **description**: Detailed explanation of the compatibility issue 136 | 137 | ## Troubleshooting 138 | 139 | ### Common Errors 140 | 141 | 1. **Invalid input file**: Ensure JSON is properly formatted 142 | 2. **Java version**: Requires Java 17+ 143 | 3. **File permissions**: Ensure read access to input file and write access to output directory 144 | 145 | ### Getting Help 146 | 147 | - Check log files for detailed error information 148 | - Verify input JSON format matches the specification 149 | - Ensure target system value is either "NA" or "NDB" 150 | 151 | For all issues with the tool please file an issue on this GitHub repository. 152 | 153 | ## License 154 | 155 | This tool is part of the AWS Samples repository and follows the same licensing terms. -------------------------------------------------------------------------------- /neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/ConversionConfig.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.metadata; 14 | 15 | import lombok.Data; 16 | import lombok.NoArgsConstructor; 17 | import org.yaml.snakeyaml.LoaderOptions; 18 | import org.yaml.snakeyaml.Yaml; 19 | import org.yaml.snakeyaml.constructor.Constructor; 20 | 21 | import java.io.File; 22 | import java.io.FileInputStream; 23 | import java.io.IOException; 24 | import java.util.*; 25 | 26 | /** 27 | * Configuration class for label mapping and filtering from YAML file. 28 | *

29 | * Expected YAML format: 30 | * vertexLabels: 31 | * OldVertexLabel: NewVertexLabel 32 | * AnotherOldLabel: AnotherNewLabel 33 | * edgeLabels: 34 | * OLD_EDGE_TYPE: NEW_EDGE_TYPE 35 | * ANOTHER_OLD_TYPE: ANOTHER_NEW_TYPE 36 | * vertexIdTransformation: 37 | * ~id: "{_labels}_{_id}" 38 | * edgeIdTransformation: 39 | * ~id: "{_type}_{_start}_{_end}" 40 | * skipVertices: 41 | * byId: 42 | * - "vertex_id_1" 43 | * - "vertex_id_2" 44 | * byLabel: 45 | * - "LabelToSkip" 46 | * - "AnotherLabelToSkip" 47 | * skipEdges: 48 | * byLabel: 49 | * - "RELATIONSHIP_TYPE_TO_SKIP" 50 | * - "ANOTHER_TYPE_TO_SKIP" 51 | */ 52 | @Data 53 | @NoArgsConstructor 54 | public class ConversionConfig { 55 | 56 | // Label mapping configurations 57 | private Map vertexLabels = new HashMap<>(); 58 | private Map edgeLabels = new HashMap<>(); 59 | 60 | // ID transformation configurations 61 | private Map vertexIdTransformation = new HashMap<>(); 62 | private Map edgeIdTransformation = new HashMap<>(); 63 | 64 | // Skip configurations 65 | private SkipVertices skipVertices = new SkipVertices(); 66 | private SkipEdges skipEdges = new SkipEdges(); 67 | 68 | /** 69 | * Nested class for skipVertices configuration 70 | */ 71 | @Data 72 | @NoArgsConstructor 73 | public static class SkipVertices { 74 | private Set byId = new HashSet<>(); 75 | private Set byLabel = new HashSet<>(); 76 | } 77 | 78 | /** 79 | * Nested class for skipEdges configuration 80 | */ 81 | @Data 82 | @NoArgsConstructor 83 | public static class SkipEdges { 84 | private Set byLabel = new HashSet<>(); 85 | } 86 | 87 | /** 88 | * Factory method to create LabelMappingConfig from YAML file using automatic object mapping 89 | */ 90 | public static ConversionConfig fromFile(File yamlFile) throws IOException { 91 | if (yamlFile == null || !yamlFile.exists()) { 92 | return new ConversionConfig(); // Return empty config if no file provided 93 | } 94 | 95 | Constructor constructor = new Constructor(ConversionConfig.class, new LoaderOptions()); 96 | Yaml yaml = new Yaml(constructor); 97 | 98 | try (FileInputStream inputStream = new FileInputStream(yamlFile)) { 99 | ConversionConfig config = yaml.load(inputStream); 100 | 101 | // Handle null case if YAML file is empty or malformed 102 | if (config == null) { 103 | config = new ConversionConfig(); 104 | } 105 | 106 | // Ensure nested objects are initialized when yaml fields are left empty 107 | if (config.skipVertices == null) { 108 | config.skipVertices = new SkipVertices(); 109 | } 110 | if (config.skipEdges == null) { 111 | config.skipEdges = new SkipEdges(); 112 | } 113 | if (config.vertexLabels == null) { 114 | config.vertexLabels = new HashMap<>(); 115 | } 116 | if (config.edgeLabels == null) { 117 | config.edgeLabels = new HashMap<>(); 118 | } 119 | if (config.vertexIdTransformation == null) { 120 | config.vertexIdTransformation = new HashMap<>(); 121 | } 122 | if (config.edgeIdTransformation == null) { 123 | config.edgeIdTransformation = new HashMap<>(); 124 | } 125 | 126 | // Ensure nested sets are initialized 127 | if (config.skipVertices.byId == null) { 128 | config.skipVertices.byId = new HashSet<>(); 129 | } 130 | if (config.skipVertices.byLabel == null) { 131 | config.skipVertices.byLabel = new HashSet<>(); 132 | } 133 | if (config.skipEdges.byLabel == null) { 134 | config.skipEdges.byLabel = new HashSet<>(); 135 | } 136 | 137 | return config; 138 | } 139 | } 140 | 141 | /** 142 | * Checks if any skip rules are configured. 143 | */ 144 | public boolean hasSkipRules() { 145 | return !skipVertices.byId.isEmpty() || !skipVertices.byLabel.isEmpty() || !skipEdges.byLabel.isEmpty(); 146 | } 147 | 148 | /** 149 | * Checks if any ID transformations are configured. 150 | */ 151 | public boolean hasIdTransformations() { 152 | return !vertexIdTransformation.isEmpty() || !edgeIdTransformation.isEmpty(); 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /csv-to-neptune-bulk-format/csv_converter.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import logging 4 | logger = logging.getLogger(__name__) 5 | 6 | import argparse 7 | import csv 8 | import data_config 9 | 10 | import boto3 11 | 12 | import locale 13 | 14 | __all__ = [] 15 | __version__ = 0.1 16 | __date__ = '2021-01-21' 17 | __updated__ = '2021-01-21' 18 | 19 | class RawCSVConverter: 20 | def __init__(self, conf_file_names, gen_dup_file=False, use_s3=False, local_enc='utf-8'): 21 | self.gen_dup_file = gen_dup_file 22 | self.use_s3 = use_s3 23 | self.local_enc = local_enc 24 | self.conf_defs =[] 25 | try: 26 | data_config.BaseDef.log_stats() 27 | for conf_file_name in conf_file_names: 28 | self.conf_defs.append(data_config.ConfigDef(conf_file_name, self.gen_dup_file, self.local_enc)) 29 | except Exception as ex: 30 | raise Exception(f'Unable to read configuration file {conf_file_name} \nexception: {str(ex)}') 31 | if self.use_s3 : 32 | try : 33 | #self.s3 = boto3.client('s3') 34 | self.s3 = boto3.resource('s3') 35 | except Exception as ex: 36 | raise Exception(f'Unable to connect to s3 \nexception: {str(ex)}') 37 | 38 | def convert_to_csv(self): 39 | for index1, conf_def in enumerate(self.conf_defs) : 40 | data_file_names = conf_def.file_names 41 | # initialize writers 42 | conf_def.init_writers() 43 | 44 | for index2, data_file_name in enumerate(data_file_names): 45 | try: 46 | if self.use_s3 : 47 | data_file_name = conf_def.download_source_file(self.s3, data_file_name) 48 | else : 49 | data_file_name = conf_def.source_folder + '/' + data_file_name 50 | logger.info(f'Processing Data File:{index2}:{data_file_name}') 51 | with open(data_file_name, newline='', encoding=self.local_enc) as csv_file: 52 | reader = csv.DictReader(csv_file, escapechar="\\") 53 | #process the file 54 | try: 55 | conf_def.process_csv_to_csv(reader) 56 | except Exception as ex: 57 | raise Exception(f'Unable to process the CSV file: {data_file_name} \nexception: {str(ex)}') 58 | #close the file 59 | csv_file.close() 60 | except Exception as ex: 61 | logger.error(f'Unable to load the CSV file: {data_file_name} \nexception: {str(ex)}') 62 | 63 | # close files 64 | conf_def.close_writers() 65 | # delete current files and upload new files 66 | if self.use_s3 : 67 | if index1 == 0 : conf_def.delete_data_files(self.s3) 68 | conf_def.upload_data_files(self.s3) 69 | 70 | if self.gen_dup_file: data_config.BaseDef.write_dup_files() 71 | #log stats 72 | data_config.BaseDef.log_stats() 73 | data_config.BaseDef.clean_stats() 74 | 75 | def main(argv=None): 76 | program_name = os.path.basename(sys.argv[0]) 77 | program_version = "v0.1" 78 | program_build_date = "%s" % __updated__ 79 | 80 | program_version_string = '%%prog %s (%s)' % ( 81 | program_version, program_build_date) 82 | program_longdesc = ("A utility python script to convert CSV data file into the Amazon Neptune CSV format " 83 | "for bulk ingestion. See " 84 | "https://docs.aws.amazon.com/neptune/latest/userguide/bulk-load-tutorial-format-gremlin.html." 85 | ) 86 | program_license = "Copyright 2018 Amazon.com, Inc. or its affiliates. \ 87 | Licensed under the Apache License 2.0\nhttp://aws.amazon.com/apache2.0/" 88 | system_enc = locale.getpreferredencoding() 89 | 90 | if argv is None: 91 | argv = sys.argv[1:] 92 | try: 93 | # setup argument parser 94 | parser = argparse.ArgumentParser( description=program_license, epilog=program_longdesc) 95 | parser.add_argument("conf_file_names", nargs='+', 96 | help="Space separated, one or more Data Configuration File(s) (json)\nUse separate files if Node/Edges are different.", metavar="DATA_CONF_FILES") 97 | parser.add_argument("-e", "--enc", default='utf-8', dest='local_enc', 98 | help="Optional: encoding for the source files 'utf-8' or 'cp1252'") 99 | parser.add_argument("--s3", dest='use_s3', action='store_true', 100 | help="Use S3 as source and destination of files") 101 | parser.add_argument("--dup", dest='gen_dup_file', action='store_true', 102 | help="Generate file for duplicates") 103 | parser.add_argument("-v", "--verbose", dest='verbose', action='store_true', 104 | help="Emit Verbose logging") 105 | 106 | # process arguments 107 | args = parser.parse_args(argv) 108 | 109 | conf_file_names = args.conf_file_names 110 | log_level = logging.DEBUG if args.verbose else logging.INFO 111 | logging.basicConfig(format='%(asctime)s %(name)s:%(levelname)s:%(message)s', datefmt='%H:%M:%S') 112 | logging.getLogger(__name__).setLevel(log_level) 113 | logging.getLogger('data_config').setLevel(log_level) 114 | #logging.getLogger('botocore').setLevel(logging.ERROR) 115 | #logging.getLogger('s3transfer').setLevel(logging.ERROR) 116 | #logging.getLogger('urllib3').setLevel(logging.ERROR) 117 | 118 | # MAIN BODY # 119 | 120 | logger.info(f'Processing {conf_file_names}') 121 | logger.debug(f'System File Encoding: {system_enc}') 122 | csvConverter = RawCSVConverter(conf_file_names, args.gen_dup_file, args.use_s3, args.local_enc) 123 | csvConverter.convert_to_csv() 124 | return 0 125 | 126 | except Exception as e: 127 | indent = len(program_name) * " " 128 | logger.error(program_name + ": " + f'{e}' + "\n") 129 | logger.error(indent + " for help use --help") 130 | return 2 131 | 132 | if __name__ == '__main__': 133 | sys.exit(main()) -------------------------------------------------------------------------------- /neptune-python-utils/neptune_python_utils/bulkload.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates. 2 | # All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). 5 | # You may not use this file except in compliance with the License. 6 | # A copy of the License is located at 7 | # 8 | # http://aws.amazon.com/apache2.0/ 9 | # 10 | # or in the "license" file accompanying this file. 11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions 13 | # and limitations under the License. 14 | 15 | import json 16 | import requests 17 | import os 18 | import sys 19 | import time 20 | from neptune_python_utils.endpoints import Endpoints 21 | 22 | class BulkLoad: 23 | 24 | def __init__(self, 25 | source, 26 | format='csv', 27 | role=None, 28 | mode='AUTO', 29 | region=None, 30 | fail_on_error=False, 31 | parallelism='OVERSUBSCRIBE', 32 | base_uri='http://aws.amazon.com/neptune/default', 33 | named_graph_uri='http://aws.amazon.com/neptune/vocab/v01/DefaultNamedGraph', 34 | update_single_cardinality_properties=False, 35 | queue_request=False, 36 | dependencies=[], 37 | endpoints=None, 38 | **kwargs): 39 | 40 | self.source = source 41 | self.format = format 42 | 43 | if role is None: 44 | assert ('NEPTUNE_LOAD_FROM_S3_ROLE_ARN' in os.environ), 'role is missing.' 45 | self.role = os.environ['NEPTUNE_LOAD_FROM_S3_ROLE_ARN'] 46 | else: 47 | self.role = role 48 | 49 | self.mode = mode 50 | 51 | if region is None: 52 | assert ('AWS_REGION' in os.environ or 'SERVICE_REGION' in os.environ), 'region is missing.' 53 | self.region = os.environ.get('SERVICE_REGION', os.environ.get('AWS_REGION', None)) 54 | else: 55 | self.region = region 56 | 57 | if endpoints is None: 58 | self.endpoints = Endpoints(region_name=region) 59 | else: 60 | self.endpoints = endpoints 61 | 62 | self.fail_on_error = 'TRUE' if fail_on_error else 'FALSE' 63 | self.parallelism = parallelism 64 | self.base_uri = base_uri 65 | self.named_graph_uri = named_graph_uri 66 | self.update_single_cardinality_properties = 'TRUE' if update_single_cardinality_properties else 'FALSE' 67 | self.queue_request = 'TRUE' if queue_request else 'FALSE' 68 | self.dependencies = dependencies 69 | self.kwargs = kwargs 70 | 71 | def __load_from(self, source): 72 | return { 73 | 'source' : source, 74 | 'format' : self.format, 75 | 'iamRoleArn' : self.role, 76 | 'mode': self.mode, 77 | 'region' : self.region, 78 | 'failOnError' : self.fail_on_error, 79 | 'parallelism' : self.parallelism, 80 | 'parserConfiguration': { 81 | 'baseUri': self.base_uri, 82 | 'namedGraphUri': self.named_graph_uri 83 | }, 84 | 'updateSingleCardinalityProperties': self.update_single_cardinality_properties, 85 | 'queueRequest': self.queue_request, 86 | 'dependencies': self.dependencies 87 | } 88 | 89 | def __load(self, loader_endpoint, data): 90 | 91 | json_string = json.dumps(data) 92 | 93 | request_parameters = loader_endpoint.prepare_request('POST', json_string, headers={'Content-Type':'application/json'}) 94 | 95 | response = requests.post(request_parameters.uri, data=json_string, headers=request_parameters.headers, **self.kwargs) 96 | response.encoding = 'utf-8' 97 | 98 | if response.status_code != 200: 99 | raise Exception('{}: {}'.format(response.status_code, response.text)) 100 | 101 | json_response = response.json() 102 | 103 | return json_response['payload']['loadId'] 104 | 105 | def load_async(self): 106 | localised_source = self.source.replace('${AWS_REGION}', self.region) 107 | loader_endpoint = self.endpoints.loader_endpoint() 108 | json_payload = self.__load_from(localised_source) 109 | print('''curl -X POST \\ 110 | -H 'Content-Type: application/json' \\ 111 | {} -d \'{}\''''.format(loader_endpoint, json.dumps(json_payload, indent=4))) 112 | load_id = self.__load(loader_endpoint, json_payload) 113 | return BulkLoadStatus(self.endpoints.load_status_endpoint(load_id), **self.kwargs) 114 | 115 | def load(self, interval=2): 116 | status = self.load_async() 117 | print('status_uri: {}'.format(status.load_status_endpoint)) 118 | status.wait(interval) 119 | 120 | class BulkLoadStatus: 121 | 122 | def __init__(self, load_status_endpoint, **kwargs): 123 | self.load_status_endpoint = load_status_endpoint 124 | self.kwargs = kwargs 125 | 126 | def status(self, details=False, errors=False, page=1, errors_per_page=10): 127 | params = { 128 | 'errors': 'TRUE' if errors else 'FALSE', 129 | 'details': 'TRUE' if details else 'FALSE', 130 | 'page': page, 131 | 'errorsPerPage': errors_per_page 132 | } 133 | request_parameters = self.load_status_endpoint.prepare_request(querystring=params) 134 | 135 | response = requests.get(request_parameters.uri, params=params, headers=request_parameters.headers, **self.kwargs) 136 | response.encoding = 'utf-8' 137 | 138 | if response.status_code != 200: 139 | raise Exception('{}: {}'.format(response.status_code, response.text)) 140 | 141 | json_response = response.json() 142 | 143 | status = json_response['payload']['overallStatus']['status'] 144 | 145 | return (status, json_response) 146 | 147 | 148 | def uri(self): 149 | return self.load_status_endpoint 150 | 151 | def wait(self, interval=2): 152 | while True: 153 | status, json_response = self.status() 154 | if status == 'LOAD_COMPLETED': 155 | print('load completed') 156 | break 157 | if status == 'LOAD_IN_PROGRESS': 158 | print('loading... {} records inserted'.format(json_response['payload']['overallStatus']['totalRecords'])) 159 | time.sleep(interval) 160 | else: 161 | raise Exception(json_response) -------------------------------------------------------------------------------- /export-neptune-to-elasticsearch/lambda/kinesis_to_elasticsearch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"). 4 | # You may not use this file except in compliance with the License. 5 | # A copy of the License is located at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # or in the "license" file accompanying this file. This file is distributed 10 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 11 | # express or implied. See the License for the specific language governing 12 | # permissions and limitations under the License. 13 | 14 | from aws_kinesis_agg.deaggregator import deaggregate_records, iter_deaggregate_records 15 | #from metrics_publisher import MetricsPublisher 16 | import importlib 17 | import logging 18 | import base64 19 | import six 20 | import os 21 | import json 22 | import neptune_to_es 23 | from queue import Queue 24 | import time 25 | 26 | logger = logging.getLogger() 27 | logger.setLevel(logging.INFO) 28 | 29 | log_commit_nums = os.environ.get('log_commit_nums', 'false').lower() == 'true' 30 | neptune_engine = os.environ['NEPTUNE_ENGINE'] 31 | stream_name = os.environ['STREAM_NAME'] 32 | handler_name = 'neptune_to_es.neptune_sparql_es_handler.ElasticSearchSparqlHandler' if neptune_engine == 'sparql' else 'neptune_to_es.neptune_gremlin_es_handler.ElasticSearchGremlinHandler' 33 | 34 | # Dummy values 35 | os.environ["StreamRecordsBatchSize"] = "100" 36 | os.environ["MaxPollingWaitTime"] = "1" 37 | os.environ["Application"] = "" 38 | os.environ["LeaseTable"] = "" 39 | os.environ["LoggingLevel"] = "INFO" 40 | os.environ["MaxPollingInterval"] = "1" 41 | os.environ["NeptuneStreamEndpoint"] = "" 42 | os.environ["StreamRecordsHandler"] = handler_name 43 | 44 | from stream_records_processor import StreamRecordsProcessor 45 | 46 | stream_records_processor = StreamRecordsProcessor() 47 | 48 | #metrics_publisher_client = MetricsPublisher() 49 | 50 | def get_handler_instance(handler_name, retry_count=0): 51 | 52 | """ 53 | Get Handler instance given a handler name with module 54 | :param handler_name: the handler class name with module. 55 | :return: Handler instance 56 | 57 | """ 58 | logger.info('Handler: {}'.format(handler_name)) 59 | 60 | try: 61 | parts = handler_name.rsplit('.', 1) 62 | module = importlib.import_module(parts[0]) 63 | cls = getattr(module, parts[1]) 64 | return cls() 65 | except Exception as e: 66 | error_msg = str(e) 67 | if 'resource_already_exists_exception' in error_msg: 68 | if retry_count > 3: 69 | logger.info('Elastic Search Index - amazon_neptune already exists') 70 | raise e; 71 | else: 72 | return get_handler_instance(handler_name, retry_count + 1) 73 | else: 74 | logger.error('Error occurred while creating handler instance for {} - {}.'.format(handler_name, error_msg)) 75 | raise e 76 | 77 | handler = get_handler_instance(handler_name) 78 | 79 | def lambda_bulk_handler(event, context): 80 | """A Python AWS Lambda function to process Kinesis aggregated 81 | records in a bulk fashion.""" 82 | 83 | logger.info('Starting bulk loading') 84 | 85 | raw_kinesis_records = event['Records'] 86 | 87 | logger.info('Aggregated Kinesis record count: {}'.format(len(raw_kinesis_records))) 88 | 89 | # Deaggregate all records in one call 90 | user_records = deaggregate_records(raw_kinesis_records) 91 | 92 | total_records = len(user_records) 93 | 94 | logger.info('Deaggregated record count: {}'.format(total_records)) 95 | 96 | log_stream = { 97 | "records": [], 98 | "lastEventId": { 99 | "commitNum": -1, 100 | "opNum": 0 101 | }, 102 | "totalRecords": total_records 103 | } 104 | 105 | first_commit_num = None 106 | first_op_num = None 107 | prev_commit_num = None 108 | prev_op_num = None 109 | commit_nums = set() 110 | 111 | for user_record in user_records: 112 | records_json = base64.b64decode(user_record['kinesis']['data']) 113 | try: 114 | records = json.loads(records_json) 115 | except Exception as e: 116 | logger.error('Error parsing JSON: \'{}\': {}'.format(records_json, str(e))) 117 | raise e 118 | for record in records: 119 | 120 | commit_num = record['eventId']['commitNum'] 121 | op_num = record['eventId']['opNum'] 122 | 123 | if log_commit_nums: 124 | commit_nums.add(commit_num) 125 | 126 | if not first_commit_num: 127 | first_commit_num = commit_num 128 | 129 | if not first_op_num: 130 | first_op_num = op_num 131 | 132 | #logger.info('Stream record: (commitNum: {}, opNum: {})'.format(commit_num, op_num)) 133 | 134 | #if prev_commit_num and commit_num < prev_commit_num: 135 | # logger.warn('Current commitNum [{}] is less than previous commitNum [{}]'.format(commit_num, prev_commit_num)) 136 | 137 | if prev_commit_num and commit_num == prev_commit_num: 138 | if prev_op_num and op_num < prev_op_num: 139 | logger.warn('Current opNum [{}] is less than previous opNum [{}] (commitNum [{}])'.format(op_num, prev_op_num, commit_num)) 140 | 141 | log_stream['records'].append(record) 142 | 143 | prev_commit_num = commit_num 144 | prev_op_num = op_num 145 | 146 | log_stream['lastEventId']['commitNum'] = prev_commit_num if prev_commit_num else -1 147 | log_stream['lastEventId']['opNum'] = prev_op_num if prev_op_num else 0 148 | log_stream['lastTrxTimestamp'] = str(round(time.time() * 1000)) 149 | 150 | logger.info('Log stream record count: {}'.format(len(log_stream['records']))) 151 | logger.info('First record: (commitNum: {}, opNum: {})'.format(first_commit_num, first_op_num)) 152 | logger.info('Last record: (commitNum: {}, opNum: {})'.format(prev_commit_num, prev_op_num)) 153 | 154 | if log_commit_nums: 155 | logger.info('Commit nums: {}'.format(commit_nums)) 156 | 157 | query_queue = Queue(maxsize=0) 158 | for result in handler.handle_records(log_stream, query_queue): 159 | records_processed = result.records_processed 160 | logger.info('{} records processed'.format(records_processed)) 161 | #metrics_publisher_client.publish_metrics(metrics_publisher_client.generate_record_processed_metrics(records_processed)) 162 | 163 | logger.info('Executing Opensearch queries') 164 | 165 | while not query_queue.empty(): 166 | stream_records_processor.write(query_queue) 167 | 168 | logger.info('Finished bulk loading') 169 | -------------------------------------------------------------------------------- /glue-neptune/glue_neptune/NeptuneGremlinClient.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates. 2 | # All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). 5 | # You may not use this file except in compliance with the License. 6 | # A copy of the License is located at 7 | # 8 | # http://aws.amazon.com/apache2.0/ 9 | # 10 | # or in the "license" file accompanying this file. 11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 | # either express or implied. See the License for the specific language governing permissions 13 | # and limitations under the License. 14 | 15 | import sys 16 | 17 | from pyspark.sql.functions import lit 18 | from pyspark.sql.functions import format_string 19 | from gremlin_python import statics 20 | from gremlin_python.structure.graph import Graph 21 | from gremlin_python.process.graph_traversal import __ 22 | from gremlin_python.process.strategies import * 23 | from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection 24 | from gremlin_python.process.traversal import * 25 | 26 | class NeptuneGremlinClient: 27 | 28 | def __init__(self, endpoint): 29 | self.endpoint = endpoint 30 | statics.load_statics(globals()) 31 | del globals()['range'] 32 | del globals()['map'] 33 | 34 | def remote_connection(self): 35 | """Creates a connection to a Neptune database. 36 | 37 | Example: 38 | >>> gremlin_endpoint = NeptuneConnectionInfo(glueContext).neptune_endpoint('neptune') 39 | >>> neptune = NeptuneGremlinClient(gremlin_endpoint) 40 | >>> conn = neptune.remote_connection() 41 | >>> g = neptune.traversal_source(conn) 42 | >>> count = g.V().count().next() 43 | >>> conn.close() 44 | """ 45 | return DriverRemoteConnection(self.endpoint,'g') 46 | 47 | def traversal_source(self, connection=None): 48 | """Creates a traversal source. 49 | 50 | Example: 51 | >>> gremlin_endpoint = NeptuneConnectionInfo(glueContext).neptune_endpoint('neptune') 52 | >>> neptune = NeptuneGremlinClient(gremlin_endpoint) 53 | >>> g = neptune.traversal_source() 54 | >>> count = g.V().count().next() 55 | """ 56 | if connection is not None: 57 | return Graph().traversal().withRemote(connection) 58 | else: 59 | return Graph().traversal().withRemote(self.remote_connection()) 60 | 61 | def add_vertices(self, label): 62 | """Adds a vertex with the supplied label for each row in a DataFrame partition. 63 | If the DataFrame contains an '~id' column, the values in this column will be treated as user-supplied IDs for the new vertices. 64 | If the DataFrame does not have an '~id' column, Neptune will autogenerate a UUID for each vertex. 65 | 66 | Example: 67 | >>> dynamicframe.toDF().foreachPartition(neptune.add_vertices('Product')) 68 | """ 69 | def add_vertices_for_label(rows): 70 | conn = self.remote_connection() 71 | g = self.traversal_source(conn) 72 | for row in rows: 73 | entries = row.asDict() 74 | traversal = g.addV(label) 75 | for key, value in entries.iteritems(): 76 | key = key.split(':')[0] 77 | if key == '~id': 78 | traversal.property(id, value) 79 | elif key == '~label': 80 | pass 81 | else: 82 | traversal.property(key, value) 83 | traversal.next() 84 | conn.close() 85 | return add_vertices_for_label 86 | 87 | def upsert_vertices(self, label): 88 | """Conditionally adds vertices for the rows in a DataFrame partition using the Gremlin coalesce() idiom. 89 | The DataFrame must contain an '~id' column. 90 | 91 | Example: 92 | >>> dynamicframe.toDF().foreachPartition(neptune.upsert_vertices('Product')) 93 | """ 94 | def upsert_vertices_for_label(rows): 95 | conn = self.remote_connection() 96 | g = self.traversal_source(conn) 97 | for row in rows: 98 | entries = row.asDict() 99 | create_traversal = __.addV(label) 100 | for key, value in entries.iteritems(): 101 | key = key.split(':')[0] 102 | if key == '~id': 103 | create_traversal.property(id, value) 104 | elif key == '~label': 105 | pass 106 | else: 107 | create_traversal.property(key, value) 108 | g.V(entries['~id']).fold().coalesce(__.unfold(), create_traversal).next() 109 | conn.close() 110 | return upsert_vertices_for_label 111 | 112 | def add_edges(self, label): 113 | """Adds an edge with the supplied label for each row in a DataFrame partition. 114 | If the DataFrame contains an '~id' column, the values in this column will be treated as user-supplied IDs for the new edges. 115 | If the DataFrame does not have an '~id' column, Neptune will autogenerate a UUID for each edge. 116 | 117 | Example: 118 | >>> dynamicframe.toDF().foreachPartition(neptune.add_edges('ORDER_DETAIL')) 119 | """ 120 | def add_edges_for_label(rows): 121 | conn = self.remote_connection() 122 | g = self.traversal_source(conn) 123 | for row in rows: 124 | entries = row.asDict() 125 | traversal = g.V(row['~from']).addE(label).to(V(row['~to'])).property(id, row['~id']) 126 | for key, value in entries.iteritems(): 127 | key = key.split(':')[0] 128 | if key not in ['~id', '~from', '~to', '~label']: 129 | traversal.property(key, value) 130 | traversal.next() 131 | conn.close() 132 | return add_edges_for_label 133 | 134 | def upsert_edges(self, label): 135 | """Conditionally adds edges for the rows in a DataFrame partition using the Gremlin coalesce() idiom. 136 | The DataFrame must contain '~id', '~from', '~to' and '~label' columns. 137 | 138 | Example: 139 | >>> dynamicframe.toDF().foreachPartition(neptune.upsert_edges('ORDER_DETAIL')) 140 | """ 141 | def add_edges_for_label(rows): 142 | conn = self.remote_connection() 143 | g = self.traversal_source(conn) 144 | for row in rows: 145 | entries = row.asDict() 146 | create_traversal = __.V(row['~from']).addE(label).to(V(row['~to'])).property(id, row['~id']) 147 | for key, value in entries.iteritems(): 148 | key = key.split(':')[0] 149 | if key not in ['~id', '~from', '~to', '~label']: 150 | create_traversal.property(key, value) 151 | g.E(entries['~id']).fold().coalesce(__.unfold(), create_traversal).next() 152 | conn.close() 153 | return add_edges_for_label -------------------------------------------------------------------------------- /neo4j-to-neptune/src/test/java/com/amazonaws/services/neptune/util/UtilsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | Licensed under the Apache License, Version 2.0 (the "License"). 4 | You may not use this file except in compliance with the License. 5 | A copy of the License is located at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | or in the "license" file accompanying this file. This file is distributed 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 9 | express or implied. See the License for the specific language governing 10 | permissions and limitations under the License. 11 | */ 12 | 13 | package com.amazonaws.services.neptune.util; 14 | 15 | import org.junit.Test; 16 | import static org.junit.Assert.*; 17 | 18 | /** 19 | * Unit tests for Utils class 20 | */ 21 | public class UtilsTest { 22 | 23 | @Test 24 | public void testFormatFileSize_Bytes() { 25 | // Test bytes (0-1023) 26 | assertEquals("0 B", Utils.formatFileSize(0)); 27 | assertEquals("1 B", Utils.formatFileSize(1)); 28 | assertEquals("512 B", Utils.formatFileSize(512)); 29 | assertEquals("1023 B", Utils.formatFileSize(1023)); 30 | } 31 | 32 | @Test 33 | public void testFormatFileSize_Kilobytes() { 34 | // Test kilobytes (1024 - 1048575) 35 | assertEquals("1.0 KB", Utils.formatFileSize(1024)); 36 | assertEquals("1.5 KB", Utils.formatFileSize(1536)); // 1024 + 512 37 | assertEquals("2.0 KB", Utils.formatFileSize(2048)); 38 | assertEquals("10.5 KB", Utils.formatFileSize(10752)); // 10.5 * 1024 39 | assertEquals("1024.0 KB", Utils.formatFileSize(1048575)); // Just under 1MB 40 | } 41 | 42 | @Test 43 | public void testFormatFileSize_Megabytes() { 44 | // Test megabytes (1048576 - 1073741823) 45 | assertEquals("1.0 MB", Utils.formatFileSize(1048576)); // 1024 * 1024 46 | assertEquals("1.5 MB", Utils.formatFileSize(1572864)); // 1.5 * 1024 * 1024 47 | assertEquals("2.0 MB", Utils.formatFileSize(2097152)); // 2 * 1024 * 1024 48 | assertEquals("10.5 MB", Utils.formatFileSize(11010048)); // 10.5 * 1024 * 1024 49 | assertEquals("500.0 MB", Utils.formatFileSize(524288000)); // 500 * 1024 * 1024 50 | assertEquals("1024.0 MB", Utils.formatFileSize(1073741823)); // Just under 1GB 51 | } 52 | 53 | @Test 54 | public void testFormatFileSize_Gigabytes() { 55 | // Test gigabytes (1073741824 and above) 56 | assertEquals("1.0 GB", Utils.formatFileSize(1073741824L)); // 1024 * 1024 * 1024 57 | assertEquals("1.5 GB", Utils.formatFileSize(1610612736L)); // 1.5 * 1024 * 1024 * 1024 58 | assertEquals("2.0 GB", Utils.formatFileSize(2147483648L)); // 2 * 1024 * 1024 * 1024 59 | assertEquals("10.5 GB", Utils.formatFileSize(11274289152L)); // 10.5 * 1024 * 1024 * 1024 60 | assertEquals("100.0 GB", Utils.formatFileSize(107374182400L)); // 100 * 1024 * 1024 * 1024 61 | assertEquals("1000.0 GB", Utils.formatFileSize(1073741824000L)); // 1000 * 1024 * 1024 * 1024 62 | } 63 | 64 | @Test 65 | public void testFormatFileSize_BoundaryValues() { 66 | // Test exact boundary values 67 | assertEquals("1023 B", Utils.formatFileSize(1023)); 68 | assertEquals("1.0 KB", Utils.formatFileSize(1024)); 69 | 70 | assertEquals("1024.0 KB", Utils.formatFileSize(1048575)); // 1024*1024 - 1 71 | assertEquals("1.0 MB", Utils.formatFileSize(1048576)); // 1024*1024 72 | 73 | assertEquals("1024.0 MB", Utils.formatFileSize(1073741823)); // 1024*1024*1024 - 1 74 | assertEquals("1.0 GB", Utils.formatFileSize(1073741824)); // 1024*1024*1024 75 | } 76 | 77 | @Test 78 | public void testFormatFileSize_DecimalPrecision() { 79 | // Test decimal precision (should be 1 decimal place) 80 | assertEquals("1.1 KB", Utils.formatFileSize(1126)); // 1.1 * 1024 81 | assertEquals("1.9 KB", Utils.formatFileSize(1946)); // 1.9 * 1024 82 | assertEquals("1.1 MB", Utils.formatFileSize(1153434)); // 1.1 * 1024 * 1024 83 | assertEquals("1.9 MB", Utils.formatFileSize(1992294)); // 1.9 * 1024 * 1024 84 | assertEquals("1.1 GB", Utils.formatFileSize(1181116006L)); // 1.1 * 1024 * 1024 * 1024 85 | assertEquals("1.9 GB", Utils.formatFileSize(2040109465L)); // 1.9 * 1024 * 1024 * 1024 86 | } 87 | 88 | @Test 89 | public void testFormatFileSize_LargeValues() { 90 | // Test very large values 91 | assertEquals("1024.0 GB", Utils.formatFileSize(1099511627776L)); // 1TB in GB format 92 | assertEquals("2048.0 GB", Utils.formatFileSize(2199023255552L)); // 2TB in GB format 93 | assertEquals("10240.0 GB", Utils.formatFileSize(10995116277760L)); // 10TB in GB format 94 | } 95 | 96 | @Test 97 | public void testFormatFileSize_EdgeCases() { 98 | // Test edge cases 99 | assertEquals("0 B", Utils.formatFileSize(0)); 100 | assertEquals("1 B", Utils.formatFileSize(1)); 101 | 102 | // Test maximum long value (though unrealistic for file sizes) 103 | long maxLong = Long.MAX_VALUE; 104 | String result = Utils.formatFileSize(maxLong); 105 | assertTrue("Should format very large numbers as GB", result.endsWith(" GB")); 106 | assertTrue("Should be a very large number", result.startsWith("8589934592")); // ~8.6 billion GB 107 | } 108 | 109 | @Test 110 | public void testFormatFileSize_ConsistentFormatting() { 111 | // Verify consistent decimal formatting 112 | String result1KB = Utils.formatFileSize(1024); 113 | String result1MB = Utils.formatFileSize(1048576); 114 | String result1GB = Utils.formatFileSize(1073741824); 115 | 116 | assertTrue("KB should have .0 format", result1KB.contains(".0")); 117 | assertTrue("MB should have .0 format", result1MB.contains(".0")); 118 | assertTrue("GB should have .0 format", result1GB.contains(".0")); 119 | } 120 | 121 | @Test 122 | public void testFormatFileSize_RoundingBehavior() { 123 | // Test rounding behavior for edge cases 124 | assertEquals("1.0 KB", Utils.formatFileSize(1024)); // Exact 125 | assertEquals("1.0 KB", Utils.formatFileSize(1025)); // Should round to 1.0 126 | assertEquals("1.0 KB", Utils.formatFileSize(1075)); // Should round to 1.0 (1075/1024 = 1.05) 127 | assertEquals("1.1 KB", Utils.formatFileSize(1126)); // Should round to 1.1 (1126/1024 = 1.1) 128 | assertEquals("1.1 KB", Utils.formatFileSize(1177)); // Should round to 1.1 (1177/1024 = 1.15) 129 | } 130 | 131 | @Test 132 | public void testUtilsClassCannotBeInstantiated() { 133 | // Test that Utils class has private constructor (utility class pattern) 134 | try { 135 | // This should work since we're in the same package, but constructor should be private 136 | java.lang.reflect.Constructor constructor = Utils.class.getDeclaredConstructor(); 137 | assertTrue("Constructor should be private", 138 | java.lang.reflect.Modifier.isPrivate(constructor.getModifiers())); 139 | } catch (NoSuchMethodException e) { 140 | fail("Utils class should have a private no-args constructor"); 141 | } 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /csv-to-neptune-bulk-format/data-config-spotify.json: -------------------------------------------------------------------------------- 1 | { 2 | "source_folder": ".source", 3 | "data_folder": ".data", 4 | "fileNames": [ 5 | "spotify_songs.csv" 6 | ], 7 | "nodes": [ 8 | { 9 | "csvFileName": "Track.csv", 10 | "select": "'track_name' in row and 'track_artist' in row and row['track_name'] != '' and row['track_artist'] != ''", 11 | "id": "uuid()", 12 | "label": "'Track'", 13 | "uniqueKey": "row['track_artist'] + '-' + row['track_name']", 14 | "properties": [ 15 | { 16 | "property": "track_name", 17 | "key": "track_name" 18 | }, 19 | { 20 | "property": "track_artist", 21 | "key": "track_artist" 22 | }, 23 | { 24 | "property": "track_popularity", 25 | "key": "track_popularity" 26 | }, 27 | { 28 | "property": "danceability", 29 | "key": "danceability" 30 | }, 31 | { 32 | "property": "energy", 33 | "key": "energy" 34 | }, 35 | { 36 | "property": "key", 37 | "key": "key" 38 | }, 39 | { 40 | "property": "loudness", 41 | "key": "loudness" 42 | }, 43 | 44 | { 45 | "property": "mode", 46 | "key": "mode" 47 | }, 48 | { 49 | "property": "speechiness", 50 | "key": "speechiness" 51 | }, 52 | { 53 | "property": "acousticness", 54 | "key": "acousticness" 55 | }, 56 | { 57 | "property": "instrumentalness", 58 | "key": "instrumentalness" 59 | }, 60 | { 61 | "property": "liveness", 62 | "key": "liveness" 63 | }, 64 | { 65 | "property": "valence", 66 | "key": "valence" 67 | }, 68 | { 69 | "property": "tempo", 70 | "key": "tempo" 71 | }, 72 | { 73 | "property": "duration_ms", 74 | "key": "duration_ms" 75 | } 76 | ] 77 | }, 78 | { 79 | "csvFileName": "Artist.csv", 80 | "select": "'track_artist' in row and row['track_artist'] != ''", 81 | "id": "uuid()", 82 | "label": "'Artist'", 83 | "uniqueKey": "row['track_artist']", 84 | "properties": [ 85 | { 86 | "property": "name", 87 | "value": "row['track_artist']" 88 | } 89 | ] 90 | }, 91 | { 92 | "csvFileName": "Album.csv", 93 | "select": "'track_album_name' in row and row['track_album_name'] != '' and 'track_album_id' in row and row['track_album_id'] != ''", 94 | "id": "uuid()", 95 | "label": "'Album'", 96 | "uniqueKey": "row['track_album_id']", 97 | "properties": [ 98 | { 99 | "property": "name", 100 | "value": "row['track_album_name']" 101 | }, 102 | { 103 | "property": "album_release_date", 104 | "value": "row['track_album_release_date']" 105 | } 106 | ] 107 | }, 108 | { 109 | "csvFileName": "Playlist.csv", 110 | "select": "'playlist_name' in row and row['playlist_name'] != '' and 'playlist_id' in row and row['playlist_id'] != ''", 111 | "id": "uuid()", 112 | "label": "'Playlist'", 113 | "uniqueKey": "row['playlist_id']", 114 | "properties": [ 115 | { 116 | "property": "name", 117 | "value": "row['playlist_name']" 118 | } 119 | ] 120 | }, 121 | { 122 | "csvFileName": "Playlist_Genre.csv", 123 | "select": "'playlist_genre' in row and row['playlist_genre'] != ''", 124 | "id": "uuid()", 125 | "label": "'Genre'", 126 | "uniqueKey": "row['playlist_genre']", 127 | "properties": [ 128 | { 129 | "property": "name", 130 | "value": "row['playlist_genre']" 131 | } 132 | ] 133 | }, 134 | { 135 | "csvFileName": "Playlist_SubGenre.csv", 136 | "select": "'playlist_subgenre' in row and row['playlist_subgenre'] != ''", 137 | "id": "uuid()", 138 | "label": "'SubGenre'", 139 | "uniqueKey": "row['playlist_subgenre']", 140 | "properties": [ 141 | { 142 | "property": "name", 143 | "value": "row['playlist_subgenre']" 144 | } 145 | ] 146 | } 147 | ], 148 | "edges": [ 149 | { 150 | "csvFileName": "Track_Album_Edges.csv", 151 | "select": "'track_name' in row and 'track_artist' in row and row['track_name'] != '' and row['track_artist'] != '' and 'track_album_id' in row and row['track_album_id'] != ''", 152 | "id": "uuid()", 153 | "label": "'IN_ALBUM'", 154 | "from": "row['track_artist'] + '-' + row['track_name']", 155 | "to": "row['track_album_id']", 156 | "fromLabel": "'Track'", 157 | "toLabel": "'Album'", 158 | "properties": [] 159 | }, 160 | { 161 | "csvFileName": "Track_Artist_Edges.csv", 162 | "select": "'track_name' in row and 'track_artist' in row and row['track_name'] != '' and row['track_artist'] != ''", 163 | "id": "uuid()", 164 | "label": "'BY_ARTIST'", 165 | "from": "row['track_artist'] + '-' + row['track_name']", 166 | "to": "row['track_artist']", 167 | "fromLabel": "'Track'", 168 | "toLabel": "'Artist'", 169 | "properties": [] 170 | }, 171 | { 172 | "csvFileName": "Track_Playlist_Edges.csv", 173 | "select": "'track_name' in row and 'track_artist' in row and row['track_name'] != '' and row['track_artist'] != '' and 'playlist_id' in row and row['playlist_id'] != ''", 174 | "id": "uuid()", 175 | "label": "'IN_PLAYLIST'", 176 | "from": "row['track_artist'] + '-' + row['track_name']", 177 | "to": "row['playlist_id']", 178 | "fromLabel": "'Track'", 179 | "toLabel": "'Playlist'", 180 | "properties": [] 181 | }, 182 | { 183 | "csvFileName": "Playlist_Genre_Edges.csv", 184 | "select": "'playlist_id' in row and row['playlist_id'] != ''and 'playlist_genre' in row and row['playlist_genre'] != ''", 185 | "id": "uuid()", 186 | "label": "'HAS_GENRE'", 187 | "from": "row['playlist_id']", 188 | "to": "row['playlist_genre']", 189 | "fromLabel": "'Playlist'", 190 | "toLabel": "'Genre'", 191 | "properties": [] 192 | }, 193 | { 194 | "csvFileName": "Genre_SubGenre_Edges.csv", 195 | "select": "'playlist_genre' in row and row['playlist_genre'] != '' and 'playlist_subgenre' in row and row['playlist_subgenre'] != ''", 196 | "id": "uuid()", 197 | "label": "'HAS_SUBGENRE'", 198 | "from": "row['playlist_genre']", 199 | "to": "row['playlist_subgenre']", 200 | "fromLabel": "'Genre'", 201 | "toLabel": "'SubGenre'", 202 | "properties": [] 203 | } 204 | ] 205 | } --------------------------------------------------------------------------------