├── VERSION
├── neptune-gremlin-js
    ├── .gitignore
    ├── .npmignore
    ├── cdk-test-app
    │   ├── .npmignore
    │   ├── jest.config.js
    │   ├── .gitignore
    │   ├── README.md
    │   ├── package.json
    │   ├── cdk.json
    │   ├── lambda
    │   │   └── package.json
    │   ├── bin
    │   │   └── cdk-test-app.js
    │   └── lib
    │   │   └── cdk-test-app-stack.js
    ├── CHANGELOG.md
    ├── .eslintrc.js
    ├── test
    │   └── aws-neptune-gremlin.test.js
    ├── package.json
    └── README.md
├── .gitignore
├── csv-to-neptune-bulk-format
    ├── .gitignore
    ├── .images
    │   └── Solution-CSVConverter.png
    ├── data-config-spotify-no-node.json
    ├── notebooks
    │   ├── Spotify-Data-Query.ipynb
    │   └── Prepare-Data-Spotify.ipynb
    ├── csv_converter.py
    └── data-config-spotify.json
├── neptune-streams-utils
    ├── examples
    │   ├── java8
    │   │   ├── build.sh
    │   │   ├── readme.md
    │   │   ├── install-dependencies.sh
    │   │   ├── src
    │   │   │   └── main
    │   │   │   │   └── java
    │   │   │   │       ├── utils
    │   │   │   │           └── EnvironmentVariablesUtils.java
    │   │   │   │       └── stream_handler
    │   │   │   │           ├── AbstractStreamHandler.java
    │   │   │   │           └── StreamHandler.java
    │   │   └── pom.xml
    │   ├── python3.8
    │   │   ├── build.sh
    │   │   └── stream_handler.py
    │   └── streams-to-firehose
    │   │   ├── build.sh
    │   │   ├── readme.md
    │   │   └── stream_handler.py
    ├── readme.md
    └── provisioning
    │   └── readme.md
├── NOTICE
├── csv-gremlin
    └── test-files
    │   ├── bad-edges-with-sets.csv
    │   ├── doubles.csv
    │   ├── header-with-spaces-edge.csv
    │   ├── header-with-spaces.csv
    │   ├── edges.csv
    │   ├── edges-with-bad-header.csv
    │   ├── edges-with-repeat-ids.csv
    │   ├── vertices-with-sets.csv
    │   ├── bad-edges.csv
    │   ├── bad-vertices.csv
    │   ├── vertices-with-bools.csv
    │   ├── vertices-with-repeat-ids.csv
    │   ├── dates.csv
    │   ├── vertices-quotes.csv
    │   ├── vertices.csv
    │   └── vertices-with-bad-header.csv
├── export-neptune-to-elasticsearch
    ├── lambda
    │   ├── .Python
    │   ├── requirements.txt
    │   ├── build.sh
    │   ├── README.md
    │   ├── export_neptune_to_kinesis.py
    │   └── kinesis_to_elasticsearch.py
    ├── NOTICE
    └── export-neptune-to-elasticsearch.png
├── neo4j-to-neptune
    ├── bin
    │   └── neo4j-to-neptune.sh
    ├── docs
    │   ├── example-bulk-load-config.yaml
    │   ├── example-conversion-config.yaml
    │   └── bulk-load-config.md
    └── src
    │   ├── main
    │       └── java
    │       │   └── com
    │       │       └── amazonaws
    │       │           └── services
    │       │               └── neptune
    │       │                   ├── metadata
    │       │                       ├── Header.java
    │       │                       ├── PropertyValueParserPolicy.java
    │       │                       ├── DateTimeUtils.java
    │       │                       ├── Headers.java
    │       │                       ├── PropertyValue.java
    │       │                       ├── Property.java
    │       │                       ├── Token.java
    │       │                       ├── MultiValuedRelationshipPropertyPolicy.java
    │       │                       ├── MultiValuedNodePropertyPolicy.java
    │       │                       ├── PropertyValueParser.java
    │       │                       ├── DataType.java
    │       │                       └── ConversionConfig.java
    │       │                   ├── util
    │       │                       ├── Timer.java
    │       │                       ├── Utils.java
    │       │                       └── CSVUtils.java
    │       │                   ├── Neo4jToNeptuneCli.java
    │       │                   └── io
    │       │                       ├── RawCsvPrinter.java
    │       │                       ├── Directories.java
    │       │                       └── OutputFile.java
    │   └── test
    │       └── java
    │           └── com
    │               └── amazonaws
    │                   └── services
    │                       └── neptune
    │                           ├── metadata
    │                               ├── MultiValuedRelationshipPropertyPolicyTest.java
    │                               ├── PropertyValueParserTest.java
    │                               └── MultiValuedNodePropertyPolicyTest.java
    │                           └── util
    │                               └── UtilsTest.java
├── neptune-serverless-evaluator
    ├── requirements.txt
    ├── LICENSE
    └── README.md
├── dynamic-custom-endpoints
    └── lambda
    │   ├── build.sh
    │   └── dynamic-custom-endpoints
    │       └── build.sh
├── .github
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   └── neo4j-to-neptune-ci.yml
├── CODE_OF_CONDUCT.md
├── neptune-export
    └── readme.md
├── neptune-python-utils
    ├── setup.py
    ├── neptune_python_utils
    │   ├── __init__.py
    │   ├── glue_gremlin_csv_transforms.py
    │   ├── glue_neptune_connection_info.py
    │   ├── glue_gremlin_client.py
    │   └── bulkload.py
    ├── build.sh
    └── build-lambda-layer.sh
├── glue-neptune
    ├── build.sh
    ├── glue_neptune
    │   ├── __init__.py
    │   ├── NeptuneConnectionInfo.py
    │   ├── GremlinCsvTransforms.py
    │   └── NeptuneGremlinClient.py
    └── readme.md
├── neptune-gremlin-client
    └── readme.md
├── opencypher-compatability-checker
    ├── input.json
    ├── output.json
    └── README.md
├── graphml2csv
    └── README.md
├── README.md
├── release.sh
└── CONTRIBUTING.md


/VERSION:
--------------------------------------------------------------------------------
1 | 12
2 | 


--------------------------------------------------------------------------------
/neptune-gremlin-js/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | 
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | artifacts/
3 | **/target/
4 | pom.xml.versionsBackup
5 | 


--------------------------------------------------------------------------------
/csv-to-neptune-bulk-format/.gitignore:
--------------------------------------------------------------------------------
1 | .venv
2 | __pycache__
3 | .data
4 | .vscode
5 | 


--------------------------------------------------------------------------------
/neptune-gremlin-js/.npmignore:
--------------------------------------------------------------------------------
1 | cdk-test-app
2 | test
3 | .eslintrc.js
4 | .gitignore
5 | 
6 | 


--------------------------------------------------------------------------------
/neptune-streams-utils/examples/java8/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 | 
3 | mvn clean compile install


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Amazon Neptune Tools
2 | Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 
3 | 


--------------------------------------------------------------------------------
/neptune-gremlin-js/cdk-test-app/.npmignore:
--------------------------------------------------------------------------------
1 | # CDK asset staging directory
2 | .cdk.staging
3 | cdk.out
4 | 


--------------------------------------------------------------------------------
/neptune-gremlin-js/cdk-test-app/jest.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |     testEnvironment: "node",
3 | }
4 | 


--------------------------------------------------------------------------------
/csv-gremlin/test-files/bad-edges-with-sets.csv:
--------------------------------------------------------------------------------
1 | ~id,~label,~from,~to,my-set:Int[]
2 | ms-001,SetTest,p1,p2,1;2;3;4;5;6
3 | 


--------------------------------------------------------------------------------
/csv-gremlin/test-files/doubles.csv:
--------------------------------------------------------------------------------
1 | ~id,~label,value:double,set:double[],optional
2 | abc-1,person,23.6,12.34;56.789,
3 | 


--------------------------------------------------------------------------------
/export-neptune-to-elasticsearch/lambda/.Python:
--------------------------------------------------------------------------------
1 | /usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/Python


--------------------------------------------------------------------------------
/csv-gremlin/test-files/header-with-spaces-edge.csv:
--------------------------------------------------------------------------------
1 |  ~id, ~label, ~from,    ~to
2 | e1, likes, a1, a2
3 | e2, " likes ", a1, a2
4 | 


--------------------------------------------------------------------------------
/csv-gremlin/test-files/header-with-spaces.csv:
--------------------------------------------------------------------------------
1 | ~id, ~label,   type
2 | a1, animal, cat
3 | a2, " animal ", cat
4 | a3, animal , cat
5 | 


--------------------------------------------------------------------------------
/csv-gremlin/test-files/edges.csv:
--------------------------------------------------------------------------------
1 | ~id,~label,~from,~to,confidence:Double
2 | e-1,Knows,p-1,p-2,
3 | e-2,Knows,p-2,p-3,0.5
4 | e-3,Knows,p-2,p-4,5
5 | 


--------------------------------------------------------------------------------
/export-neptune-to-elasticsearch/NOTICE:
--------------------------------------------------------------------------------
1 | export-neptune-to-elasticsearch
2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 


--------------------------------------------------------------------------------
/neo4j-to-neptune/bin/neo4j-to-neptune.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 | 
3 | jar=$(find . -name neo4j-to-neptune.jar -print -quit)
4 | java -jar ${jar} "$@"
5 | 


--------------------------------------------------------------------------------
/neptune-serverless-evaluator/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.11.1
2 | boto3==1.22.8
3 | python_dateutil==2.8.2
4 | numpy==1.22.0
5 | requests==2.32.2


--------------------------------------------------------------------------------
/csv-gremlin/test-files/edges-with-bad-header.csv:
--------------------------------------------------------------------------------
1 | ~id,~label,~from,target,confidence:Double
2 | e-1,Knows,p-1,p-2,
3 | e-2,Knows,p-2,p-3,0.5
4 | e-3,Knows,p-2,p-4,5
5 | 


--------------------------------------------------------------------------------
/neptune-gremlin-js/cdk-test-app/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | 
3 | # CDK asset staging directory
4 | .cdk.staging
5 | cdk.out
6 | lambda/neptune-gremlin.js
7 | invoke-test.sh


--------------------------------------------------------------------------------
/csv-gremlin/test-files/edges-with-repeat-ids.csv:
--------------------------------------------------------------------------------
1 | ~id,~label,~from,~to
2 | e1,Knows,p1,p2
3 | e2,Knows,p2,p3
4 | e3,Knows,p3,p4
5 | e2,Knows,p5,p6
6 | e1,Knows,p6,p7
7 | 


--------------------------------------------------------------------------------
/csv-to-neptune-bulk-format/.images/Solution-CSVConverter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-neptune-tools/HEAD/csv-to-neptune-bulk-format/.images/Solution-CSVConverter.png


--------------------------------------------------------------------------------
/dynamic-custom-endpoints/lambda/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | rm -rf target
 4 | mkdir target
 5 | 
 6 | pushd dynamic-custom-endpoints
 7 | sh build.sh
 8 | popd
 9 | 
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/export-neptune-to-elasticsearch/export-neptune-to-elasticsearch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-neptune-tools/HEAD/export-neptune-to-elasticsearch/export-neptune-to-elasticsearch.png


--------------------------------------------------------------------------------
/csv-gremlin/test-files/vertices-with-sets.csv:
--------------------------------------------------------------------------------
1 | ~id,~label,simple,single:String,my-set:Int[],dates:Date[],oneInt:Int,oneFloat:Double,flag:Bool
2 | ms-001,SetTest,Hello,World,1;2;3;4;5;6,2020-11-17;2020-11-18,25,50,true
3 | 


--------------------------------------------------------------------------------
/neptune-gremlin-js/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # 0.0.5 (2022-02-09)
2 | 
3 | Added a license header to `neptune-gremlin.js` and a link to a sample app in the Readme. 
4 | Added dev dependencies for `retire.js`. No functional changes.
5 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | *Issue #, if available:*
2 | 
3 | *Description of changes:*
4 | 
5 | 
6 | By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
7 | 


--------------------------------------------------------------------------------
/csv-gremlin/test-files/bad-edges.csv:
--------------------------------------------------------------------------------
1 | ~id,~label,~from,~to,since:Date,weight:Float
2 | 1,test,p1,p3,2020-11-25,5,
3 | 1,test,p2,p4,3.2
4 | 1,test,p2,p4
5 | 1,test,p2,p4
6 | 1,test,,p4
7 | 1,test,p2,
8 | 1,test,p2,p4
9 | 


--------------------------------------------------------------------------------
/csv-gremlin/test-files/bad-vertices.csv:
--------------------------------------------------------------------------------
1 | ~id,~label,value:Int,date:Date,score:Double,str
2 | p1,Person,1,2020-13-25,X
3 | p2,Person,A,,
4 | ,Person,2,,
5 | ,Person,2,
6 | p5,Person,2,X,1.0
7 | p6,Person,1,2020-02-30,5,Hello
8 | 


--------------------------------------------------------------------------------
/export-neptune-to-elasticsearch/lambda/requirements.txt:
--------------------------------------------------------------------------------
 1 | protobuf==3.18.3
 2 | six==1.16.0
 3 | elasticsearch==6.4.0
 4 | rdflib==6.2.0
 5 | retrying
 6 | requests
 7 | requests_aws4auth
 8 | cachetools
 9 | certifi
10 | aws-kinesis-agg==1.1.7
11 | 


--------------------------------------------------------------------------------
/csv-gremlin/test-files/vertices-with-bools.csv:
--------------------------------------------------------------------------------
1 | ~id,~label,b1:Bool,b2:Bool,b3:Bool,b4:Bool,b5:Bool,b6:Boolean,b7:Bool
2 | pid-001,Person,true,True,TRUE,false,abc,123,tRuE
3 | pid-002,Person,true,True,TRUE,false,abc,456,TRue
4 | pid-003,Person,1,0,1,0,1,0,1    
5 | 


--------------------------------------------------------------------------------
/neptune-streams-utils/examples/java8/readme.md:
--------------------------------------------------------------------------------
1 | # Java handler dependencies
2 | 
3 | This Java handler depends on two libraries that are not currently available in Maven. Before building the Java hander, run the _install-dependencies.sh_ script to install the libraries in your local Maven repository.
4 | 


--------------------------------------------------------------------------------
/csv-gremlin/test-files/vertices-with-repeat-ids.csv:
--------------------------------------------------------------------------------
 1 | ~id,~label,score:Int[],extra
 2 | p1,Player,55,XYZ
 3 | p1,Player,58,DEF
 4 | p1,Player,43,
 5 | p2,Player,43,
 6 | p3,Player,67,
 7 | p1,Player,88,HIJ
 8 | p1,Player,31,
 9 | p1,Player,90;12,
10 | p2,Player,55,
11 | p3,Player,18,
12 | p4,Player,81,ABC
13 | p5,Player,6,
14 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/neptune-export/readme.md:
--------------------------------------------------------------------------------
1 | # Deprecation Notice
2 | 
3 | Neptune Export has been migrated to a [new standalone repository](https://github.com/aws/neptune-export).
4 | Ongoing development and releases will take place in the new repository, and this module here will no longer be maintained.
5 | Any export tool related issues should be reported in the Issues section under the new repository.
6 | 


--------------------------------------------------------------------------------
/export-neptune-to-elasticsearch/lambda/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | pip install virtualenv
 4 | rm -rf target
 5 | rm -rf temp
 6 | mkdir target
 7 | virtualenv temp --python=python3.8
 8 | source temp/bin/activate
 9 | pip install -r requirements.txt
10 | cd temp/lib/python3.8/site-packages
11 | cp -r ../../../../*.py .
12 | zip -r ../../../../target/export-neptune-to-elasticsearch.zip ./*
13 | deactivate
14 | cd ../../../../
15 | rm -rf temp


--------------------------------------------------------------------------------
/neptune-python-utils/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |    name='neptune_python_utils',
 5 |    version='1.0',
 6 |    description='Python 3 library that simplifies using Gremlin-Python to connect to Amazon Neptune',
 7 |    author='Ian Robinson',
 8 |    author_email='ianrob@amazon.co.uk',
 9 |    packages=['neptune_python_utils'],
10 |    install_requires=['gremlinpython', 'requests', 'backoff', 'cchardet', 'aiodns', 'idna-ssl'], 
11 | )


--------------------------------------------------------------------------------
/csv-gremlin/test-files/dates.csv:
--------------------------------------------------------------------------------
 1 | ~id,~label,date:Date
 2 | P1,person,0000-11-21T00:00:00.000Z
 3 | P1,person,0000-11-21T000:00:00.000Z
 4 | P2,person,2022-12-19T25:00:00.000Z
 5 | P3,person,2023-01-04T00:00:00.000Z
 6 | P4,person,2023-01-05T12:00:30.000
 7 | P5,person,1965-01-24
 8 | P6,person,1981-08-31T11:60:00.000
 9 | P7,person,2001-06-14T11:12:61.000
10 | P8,person,2001-06-14T11:12:xy.000
11 | P9,person,2011-xy-14T11:12:00.000
12 | P10,person,1901-01-01T11:12:00.123
13 | 


--------------------------------------------------------------------------------
/csv-gremlin/test-files/vertices-quotes.csv:
--------------------------------------------------------------------------------
 1 | ~id,text,moreText:String
 2 | No quotes,No quotes,No quotes
 3 | 'Single quotes','Single quotes','Single quotes'
 4 | "Double quotes","Double quotes","Double quotes"
 5 | Apostrophe',Apostrophe',Apostrophe'
 6 | "Apostrophe2'","Apostrophe2'","Apostrophe2'"
 7 | Dollar$,Dollar$,Dollar$
 8 | "Dollar2$","Dollar$","Dollar$"
 9 | Escaped,The \"near\" future,The \"near\" future
10 | Escaped2,"The \"near\" future","The \"near\" future"
11 | 


--------------------------------------------------------------------------------
/glue-neptune/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | pushd .
 4 | pip install virtualenv
 5 | rm -rf target
 6 | rm -rf temp
 7 | mkdir target
 8 | virtualenv temp --python=python2.7
 9 | source temp/bin/activate
10 | cd temp
11 | pip install gremlinpython
12 | cd lib/python2.7/site-packages
13 | rm -rf certifi
14 | rm -rf certifi-*
15 | cp -r ../../../../glue_neptune .
16 | zip -r glue_neptune.zip *
17 | mv glue_neptune.zip ../../../../target/glue_neptune.zip
18 | deactivate
19 | popd
20 | rm -rf temp


--------------------------------------------------------------------------------
/csv-gremlin/test-files/vertices.csv:
--------------------------------------------------------------------------------
 1 | ~id,~label,firstName,lastName,dob:Date,grade:Double,position:Int
 2 | p-1,Person,Tommy,Frazier,1949-07-03,86.5,1
 3 | p-2,Person,Angelyn,Crooks,1978-11-16,77.4,2
 4 | p-3,Person,Travis,Tucker,2006-01-14,66.9,3
 5 | p-4,Person,Nigel,Smith,2006-01-14,66.9,3
 6 | p-5,Person,Ian,York,2006-01-14,,3
 7 | p-6,Person,Jon,Wilson,2006-01-14,,
 8 | p-7,Person,Jane,,2006-01-14,,
 9 | p-8,Person,Frank,Jennings,2001-08-17,,
10 | p-9,Person,Rod,Arthurs,1965-02-01T09:01:30Z,88,1
11 | p-10,Person,Albert,Newyear,1999-12-31T23:59:59-0500,1,1
12 | 


--------------------------------------------------------------------------------
/dynamic-custom-endpoints/lambda/dynamic-custom-endpoints/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | pip install virtualenv
 4 | rm -rf target
 5 | rm -rf temp
 6 | mkdir target
 7 | virtualenv temp --python=python3.9
 8 | source temp/bin/activate
 9 | pushd temp
10 | cd lib/python3.9/site-packages
11 | rm -rf certifi-*
12 | rm -rf easy_install.py
13 | cp -r ../../../../*.py .
14 | zip -r dynamic_custom_endpoints.zip *.py -x "_virtualenv.py"
15 | mv dynamic_custom_endpoints.zip ../../../../../target/dynamic_custom_endpoints.zip
16 | deactivate
17 | popd
18 | rm -rf temp
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/csv-gremlin/test-files/vertices-with-bad-header.csv:
--------------------------------------------------------------------------------
 1 | id,~label,firstName,lastName,dob:Date,grade:Double,position:Int
 2 | p-1,Person,Tommy,Frazier,1949-07-03,86.5,1
 3 | p-2,Person,Angelyn,Crooks,1978-11-16,77.4,2
 4 | p-3,Person,Travis,Tucker,2006-01-14,66.9,3
 5 | p-4,Person,Nigel,Smith,2006-01-14,66.9,3
 6 | p-5,Person,Ian,York,2006-01-14,,3
 7 | p-6,Person,Jon,Wilson,2006-01-14,,
 8 | p-7,Person,Jane,,2006-01-14,,
 9 | p-8,Person,Frank,Jennings,2001-08-17,,
10 | p-9,Person,Rod,Arthurs,1965-02-01T09:01:30Z,88,1
11 | p-10,Person,Albert,Newyear,1999-12-31T23:59:59-0500,1,1
12 | 


--------------------------------------------------------------------------------
/neptune-gremlin-js/cdk-test-app/README.md:
--------------------------------------------------------------------------------
 1 | # Welcome to your CDK JavaScript project!
 2 | 
 3 | This is a blank project for JavaScript development with CDK.
 4 | 
 5 | The `cdk.json` file tells the CDK Toolkit how to execute your app. The build step is not required when using JavaScript.
 6 | 
 7 | ## Useful commands
 8 | 
 9 |  * `npm run test`         perform the jest unit tests
10 |  * `cdk deploy`           deploy this stack to your default AWS account/region
11 |  * `cdk diff`             compare deployed stack with current state
12 |  * `cdk synth`            emits the synthesized CloudFormation template
13 | 


--------------------------------------------------------------------------------
/neptune-gremlin-js/cdk-test-app/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "cdk-test-app",
 3 |   "version": "0.2.0",
 4 |   "bin": {
 5 |     "cdk-test-app": "bin/cdk-test-app.js"
 6 |   },
 7 |   "scripts": {
 8 |     "build": "npm i && npm run synth && cd lambda && npm i && cd ..",
 9 |     "cdk": "cdk",
10 |     "synth": "cdk synth",
11 |     "test": "jest"
12 |   },
13 |   "devDependencies": {
14 |     "aws-cdk": "^2.1005.0",
15 |     "jest": "^29.7.0"
16 |   },
17 |   "dependencies": {
18 |     "@aws-cdk/aws-neptune-alpha": "^2.185.0-alpha.0",
19 |     "aws-cdk-lib": "2.189.1",
20 |     "constructs": "^10.0.0"
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/glue-neptune/glue_neptune/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates.
 2 | # All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License").
 5 | # You may not use this file except in compliance with the License.
 6 | # A copy of the License is located at
 7 | #
 8 | #    http://aws.amazon.com/apache2.0/
 9 | #
10 | # or in the "license" file accompanying this file.
11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions
13 | # and limitations under the License.
14 | 
15 | __author__ = 'Ian Robinson (ianrob@amazon.com)'
16 | 


--------------------------------------------------------------------------------
/neptune-gremlin-js/cdk-test-app/cdk.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "app": "node bin/cdk-test-app.js",
 3 |   "watch": {
 4 |     "include": [
 5 |       "**"
 6 |     ],
 7 |     "exclude": [
 8 |       "README.md",
 9 |       "cdk*.json",
10 |       "jest.config.js",
11 |       "package*.json",
12 |       "yarn.lock",
13 |       "node_modules",
14 |       "test"
15 |     ]
16 |   },
17 |   "context": {
18 |     "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true,
19 |     "@aws-cdk/core:stackRelativeExports": true,
20 |     "@aws-cdk/aws-rds:lowercaseDbIdentifier": true,
21 |     "@aws-cdk/aws-lambda:recognizeVersionProps": true,
22 |     "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/neptune-python-utils/neptune_python_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates.
 2 | # All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License").
 5 | # You may not use this file except in compliance with the License.
 6 | # A copy of the License is located at
 7 | #
 8 | #    http://aws.amazon.com/apache2.0/
 9 | #
10 | # or in the "license" file accompanying this file.
11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions
13 | # and limitations under the License.
14 | 
15 | __author__ = 'Ian Robinson (ianrob@amazon.com)'


--------------------------------------------------------------------------------
/neptune-gremlin-js/cdk-test-app/lambda/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "lambda",
 3 |   "version": "1.0.0",
 4 |   "description": "",
 5 |   "main": "integration-test.js",
 6 |   "scripts": {
 7 |     "test": "jest"
 8 |   },
 9 |   "author": "",
10 |   "license": "ISC",
11 |   "dependencies": {
12 |     "@aws-crypto/sha256-js": "^2.0.1",
13 |     "@aws-sdk/signature-v4": "^3.110.0",
14 |     "async": "^3.2.1",
15 |     "aws4": "^1.11.0",
16 |     "axios": "^1.8.2",
17 |     "gremlin": "^3.5.1",
18 |     "jsonwebtoken": "^9.0.0",
19 |     "jwk-to-pem": "^2.0.5",
20 |     "qs": "^6.10.1",
21 |     "util": "^0.12.4",
22 |     "uuid": "^8.3.2"
23 |   },
24 |   "devDependencies": {
25 |     "aws-sdk": "^2.1025.0",
26 |     "jest": "^29.7.0"
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/neptune-streams-utils/examples/python3.8/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | pushd .
 4 | pip install virtualenv
 5 | rm -rf target
 6 | rm -rf temp
 7 | mkdir target
 8 | virtualenv temp
 9 | source temp/bin/activate
10 | cd temp
11 | #pip install requests
12 | cd lib/python3.8/site-packages
13 | aws s3 cp s3://aws-neptune-customer-samples-us-east-1/neptune-sagemaker/bin/neptune-python-utils/neptune_python_utils.zip .
14 | unzip neptune_python_utils.zip
15 | rm -rf certifi-*
16 | rm -rf easy_install.py
17 | rm -rf six.py
18 | cp -r ../../../../*.py .
19 | zip -r stream_handler.zip ./* -x "*pycache*" -x "*.so" -x "*dist-info*" -x "*.virtualenv" -x "pip*" -x "pkg_resources*" -x "setuptools*" -x "wheel*" -x "certifi*"
20 | mv stream_handler.zip ../../../../target/stream_handler.zip
21 | deactivate
22 | popd
23 | rm -rf temp


--------------------------------------------------------------------------------
/neptune-python-utils/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | pushd .
 4 | sudo pip install virtualenv
 5 | rm -rf target
 6 | rm -rf temp
 7 | mkdir target
 8 | virtualenv temp --python=python3.8
 9 | source temp/bin/activate
10 | cd temp
11 | pip install gremlinpython==3.5.1
12 | pip install requests
13 | pip install backoff
14 | pip install cchardet
15 | pip install aiodns
16 | pip install idna-ssl
17 | cd lib/python3.8/site-packages
18 | rm -rf certifi-*
19 | rm -rf easy_install.py
20 | rm -rf six.py
21 | cp -r ../../../../neptune_python_utils .
22 | zip -r neptune_python_utils.zip ./* -x "*pycache*" -x "*.so" -x "*dist-info*" -x "*.virtualenv" -x "pip*" -x "pkg_resources*" -x "setuptools*" -x "wheel*" -x "certifi*"
23 | mv neptune_python_utils.zip ../../../../target/neptune_python_utils.zip
24 | deactivate
25 | popd
26 | rm -rf temp


--------------------------------------------------------------------------------
/neo4j-to-neptune/docs/example-bulk-load-config.yaml:
--------------------------------------------------------------------------------
 1 | # Example bulk load configuration for Neptune
 2 | # This file demonstrates the configuration options for automated bulk loading
 3 | # of converted CSV data into Amazon Neptune
 4 | 
 5 | # Required S3 Configuration
 6 | bucketName: my-neptune-data-bucket
 7 | 
 8 | # Required Neptune Configuration
 9 | neptuneEndpoint: my-cluster.cluster-abc123.us-east-1.neptune.amazonaws.com
10 | 
11 | # IAM Configuration
12 | iamRoleArn: arn:aws:iam::123456789012:role/NeptuneLoadFromS3Role
13 | 
14 | # Optional S3 Configuration
15 | s3Prefix: neptune
16 | 
17 | # Optional Load Performance Configuration
18 | parallelism: OVERSUBSCRIBE  # Options: LOW, MEDIUM, HIGH, OVERSUBSCRIBE
19 | 
20 | # Optional Monitoring Configuration
21 | monitor: true # Set to false if you want to manually monitor load progress
22 | 


--------------------------------------------------------------------------------
/neptune-streams-utils/examples/java8/install-dependencies.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | pushd .
 4 | rm -rf neptune-streams-layer.zip
 5 | rm -rf lib
 6 | aws s3 cp s3://aws-neptune-customer-samples/neptune-stream/lambda/java8/neptune-streams-layer.zip .
 7 | unzip neptune-streams-layer.zip
 8 | mv java/lib/ .
 9 | rm -rf java
10 | rm -rf neptune-streams-layer.zip
11 | mvn install:install-file -Dfile=lib/amazon-neptune-streams-replicator-core-1.0.0.jar \
12 |   -DgroupId=com.amazonaws \
13 |   -DartifactId=amazon-neptune-streams-replicator-core \
14 |   -Dversion=1.0.0 \
15 |   -Dpackaging=jar
16 | mvn install:install-file -Dfile=lib/amazon-neptune-streams-replicator-lambda-1.0.0.jar \
17 |   -DgroupId=com.amazonaws \
18 |   -DartifactId=amazon-neptune-streams-replicator-lambda \
19 |   -Dversion=1.0.0 \
20 |   -Dpackaging=jar
21 | popd
22 | 


--------------------------------------------------------------------------------
/neptune-streams-utils/examples/streams-to-firehose/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | pushd .
 4 | pip install virtualenv
 5 | rm -rf target
 6 | rm -rf temp
 7 | mkdir target
 8 | virtualenv temp
 9 | source temp/bin/activate
10 | cd temp
11 | #pip install requests
12 | cd lib/python3.8/site-packages
13 | aws s3 cp s3://aws-neptune-customer-samples-us-east-1/neptune-sagemaker/bin/neptune-python-utils/neptune_python_utils.zip .
14 | unzip neptune_python_utils.zip
15 | rm -rf certifi-*
16 | rm -rf easy_install.py
17 | rm -rf six.py
18 | cp -r ../../../../*.py .
19 | zip -r neptune_firehose_handler.zip ./* -x "*pycache*" -x "*.so" -x "*dist-info*" -x "*.virtualenv" -x "pip*" -x "pkg_resources*" -x "setuptools*" -x "wheel*" -x "certifi*"
20 | mv neptune_firehose_handler.zip ../../../../target/neptune_firehose_handler.zip
21 | deactivate
22 | popd
23 | rm -rf temp


--------------------------------------------------------------------------------
/neptune-gremlin-client/readme.md:
--------------------------------------------------------------------------------
 1 | # Deprecation Notice
 2 | 
 3 | The Gremlin Client for Amazon Neptune has been migrated to a [new standalone repository](https://github.com/aws/neptune-gremlin-client). Ongoing development and releases will take place in the new repository, and this module here will no longer be maintained.
 4 | 
 5 | Any Neptune Gremlin Client related issues should be reported in the Issues section under the new repository.
 6 | 
 7 | Version 1.1.0 of the client is the last release of the client from this repository. The new repository is accompanied by a release of version 2.0.0 of the Neptune Gremlin Client.
 8 | 
 9 | See [Migrating from version 1 of the Neptune Gremlin Client](https://github.com/aws/neptune-gremlin-client#migrating-from-version-1-of-the-neptune-gremlin-client) if you are migrating an application from version 1.x.x of the Neptune Gremlin Client to version 2.x.x.
10 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/Header.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.metadata;
14 | 
15 | public interface Header {
16 | 
17 |     void updateDataType(DataType newDataType);
18 | 
19 |     void setIsMultiValued(boolean isMultiValued);
20 | 
21 |     String value();
22 | }
23 | 


--------------------------------------------------------------------------------
/neptune-serverless-evaluator/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software is furnished to do so.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/neptune-python-utils/build-lambda-layer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | pushd .
 4 | pip install virtualenv
 5 | rm -rf target
 6 | rm -rf temp
 7 | mkdir target
 8 | virtualenv temp --python=python3.8
 9 | source temp/bin/activate
10 | cd temp
11 | pip install gremlinpython==3.5.1
12 | pip install requests
13 | pip install backoff
14 | pip install cchardet
15 | pip install aiodns
16 | pip install idna-ssl
17 | pushd lib/python3.8/site-packages
18 | #rm -rf certifi-*
19 | rm -rf easy_install.py
20 | rm -rf six.py
21 | cp -r ../../../../neptune_python_utils .
22 | popd
23 | mkdir python
24 | mv lib python/lib
25 | zip -r neptune_python_utils_lambda_layer.zip python \
26 | 	-x "*pycache*" \
27 | 	-x "*.so" \
28 | 	-x "*dist-info*" \
29 | 	-x "*.virtualenv" \
30 | 	-x "*/pip*" \
31 | 	-x "*/pkg_resources*" \
32 | 	-x "*/setuptools*" \
33 | 	-x "*/wheel*" \
34 | 	-x "*distutils*" \
35 | 	-x "*/_virtualenv.*" \
36 | 	#-x "*/certifi*"
37 | deactivate
38 | popd
39 | mv temp/neptune_python_utils_lambda_layer.zip target/neptune_python_utils_lambda_layer.zip
40 | rm -rf temp


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/PropertyValueParserPolicy.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.metadata;
14 | 
15 | import com.fasterxml.jackson.databind.node.ArrayNode;
16 | 
17 | public interface PropertyValueParserPolicy {
18 |     PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser);
19 |     void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser);
20 | }
21 | 


--------------------------------------------------------------------------------
/neptune-gremlin-js/cdk-test-app/bin/cdk-test-app.js:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env node
 2 | 
 3 | const cdk = require("aws-cdk-lib")
 4 | const { CdkTestAppStack } = require("../lib/cdk-test-app-stack")
 5 | 
 6 | const app = new cdk.App()
 7 | new CdkTestAppStack(app, "neptune-gremlin-test", {
 8 |     /* If you don't specify 'env', this stack will be environment-agnostic.
 9 |    * Account/Region-dependent features and context lookups will not work,
10 |    * but a single synthesized template can be deployed anywhere. */
11 | 
12 |     /* Uncomment the next line to specialize this stack for the AWS Account
13 |    * and Region that are implied by the current CLI configuration. */
14 |     // env: { account: process.env.CDK_DEFAULT_ACCOUNT, region: process.env.CDK_DEFAULT_REGION },
15 | 
16 |     /* Uncomment the next line if you know exactly what Account and Region you
17 |    * want to deploy the stack to. */
18 |     // env: { account: '123456789012', region: 'us-east-1' },
19 | 
20 |     /* For more information, see https://docs.aws.amazon.com/cdk/latest/guide/environments.html */
21 | })
22 | 


--------------------------------------------------------------------------------
/neptune-gremlin-js/.eslintrc.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |     "env": {
 3 |         "browser": true,
 4 |         "commonjs": true,
 5 |         "es2021": true,
 6 |         "jest": true,
 7 |         "node": true,
 8 |     },
 9 |     "extends": ["eslint:recommended"],
10 |     "parserOptions": {
11 |         "ecmaVersion": 12,
12 |     },
13 |     "rules": {
14 |         "indent": [
15 |             "error",
16 |             4,
17 |             { "SwitchCase": 1 },
18 |         ],
19 |         "linebreak-style": [
20 |             "error",
21 |             "unix",
22 |         ],
23 |         "quotes": [
24 |             "error",
25 |             "double",
26 |         ],
27 |         "semi": [
28 |             "error",
29 |             "never",
30 |         ],
31 |         "no-new": ["off"],
32 |         "comma-dangle": ["error", "always-multiline"],
33 |         "padded-blocks": ["off"],
34 |     },
35 |     ignorePatterns: [
36 |         "node_modules/",
37 |         "cdk.out/",
38 |         "vendor/",
39 |         "build/",
40 |         "dist/",
41 |         "plugins.js",
42 |     ],
43 | }
44 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/util/Timer.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.util;
14 | 
15 | public class Timer implements AutoCloseable {
16 | 
17 |     private final long start = System.currentTimeMillis();
18 | 
19 |     @Override
20 |     public void close() throws Exception {
21 |         System.err.println();
22 |         System.err.println(String.format("Completed in %s second(s)", (System.currentTimeMillis() - start) / 1000));
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/opencypher-compatability-checker/input.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "targetSystem": "NA",
 3 |   "queries": [
 4 |     {
 5 |       "id": 1,
 6 |       "query": "MATCH (n:Person) RETURN n LIMIT 10"
 7 |     },
 8 |     {
 9 |       "id": 2,
10 |       "query": "MATCH (team:Team {name: 'Team C'}) CALL { WITH team MATCH (p:Player)-[:PLAYS_FOR]->(team) WITH collect(p) AS players FOREACH (player IN players | SET player.retired = false) RETURN size(players) AS updatedPlayers } RETURN updatedPlayers"
11 |     },
12 |     {
13 |       "id": 3,
14 |       "query": "MATCH (p:Product)-[:BELONGS_TO]->(c:Category) RETURN p.name, c.name"
15 |     },
16 |     {
17 |       "id": 4,
18 |       "query": "RETURN apoc.coll.intersection([1,2,3,4,5], [3,4,5]) AS output"
19 |     },
20 |     {
21 |       "id": 5,
22 |       "query": "RETURN reduce(product = 1, n IN [1, 2, 3] | product / n)"
23 |     },
24 |     {
25 |       "id": 6,
26 |       "query": "CALL apoc.json.validate('{\"foo\": [{\"baz\":  18446744062065078016838}],\"baz\": 18446744062065078016838}', '$')"
27 |     },
28 |     {
29 |       "id": 7,
30 |       "query": "gggg"
31 |     }
32 |   ]
33 | }
34 | 


--------------------------------------------------------------------------------
/neptune-gremlin-js/test/aws-neptune-gremlin.test.js:
--------------------------------------------------------------------------------
 1 | const {getHeaders} = require("../neptune-gremlin.js")
 2 | 
 3 | test("getHeaders", async () => {
 4 | 
 5 |     const expected = {
 6 |         Host: "myneptunecluster.us-east-1.neptune.amazonaws.com:8182",
 7 |         "X-Amz-Security-Token": "................",
 8 |         "X-Amz-Date": "20211123T191311Z",
 9 |         Authorization: "AWS4-HMAC-SHA256 Credential=.../20211123/us-east-1/neptune-db/aws4_request, SignedHeaders=host;x-amz-date;x-amz-security-token, Signature=...",
10 |     }
11 | 
12 |     const headers = await getHeaders(
13 |         "myneptunecluster.us-east-1.neptune.amazonaws.com",
14 |         8182,
15 |         {
16 |             accessKey: "...",
17 |             secretKey: "...",
18 |             sessionToken: "AAAAAA1111111",
19 |             region: "us-east-1",
20 |         },
21 |         "/gremlin")
22 | 
23 |     console.log(headers)
24 | 
25 |     expect(headers.host).toEqual(expected.Host)
26 |     expect(headers["x-amz-security-token"]).toBeTruthy() // ?
27 |     expect(headers["x-amz-date"].length).toEqual(16)
28 |     expect(headers.authorization.indexOf("AWS4-HMAC-SHA256 Credential=")).toEqual(0)
29 | 
30 | })
31 | 
32 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/DateTimeUtils.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.metadata;
14 | 
15 | import org.joda.time.format.DateTimeFormatter;
16 | import org.joda.time.format.ISODateTimeFormat;
17 | 
18 | import java.util.Date;
19 | 
20 | public class DateTimeUtils {
21 | 
22 |     private static final DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTimeParser().withZoneUTC();
23 | 
24 |     public static Date parseISODate(final String dateStr) {
25 |         return dateTimeFormatter.parseDateTime(dateStr).toDate();
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/util/Utils.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.util;
14 | 
15 | public class Utils {
16 |     private Utils() {}
17 |     /**
18 |      * Format file size for display
19 |      */
20 |     public static String formatFileSize(long bytes) {
21 |         if (bytes < 1024) return bytes + " B";
22 |         if (bytes < 1024 * 1024) return String.format("%.1f KB", bytes / 1024.0);
23 |         if (bytes < 1024 * 1024 * 1024) return String.format("%.1f MB", bytes / (1024.0 * 1024.0));
24 |         return String.format("%.1f GB", bytes / (1024.0 * 1024.0 * 1024.0));
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/.github/workflows/neo4j-to-neptune-ci.yml:
--------------------------------------------------------------------------------
 1 | name: Neo4j-to-Neptune CI
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   pull_request:
 6 |     paths:
 7 |       - 'neo4j-to-neptune/**'
 8 |     branches:
 9 |       - master
10 |   push:
11 |     paths:
12 |       - 'neo4j-to-neptune/**'
13 |     branches:
14 |       - master
15 | 
16 | jobs:
17 |   build-and-test:
18 |     runs-on: ubuntu-latest
19 | 
20 |     steps:
21 |     - name: Checkout code
22 |       uses: actions/checkout@v4
23 | 
24 |     - name: Set up JDK 17
25 |       uses: actions/setup-java@v4
26 |       with:
27 |         java-version: '17'
28 |         distribution: 'corretto'
29 | 
30 |     - name: Cache Maven dependencies
31 |       uses: actions/cache@v4
32 |       with:
33 |         path: ~/.m2
34 |         key: ${{ runner.os }}-m2-${{ hashFiles('neo4j-to-neptune/pom.xml') }}
35 |         restore-keys: ${{ runner.os }}-m2
36 | 
37 |     - name: Maven install and tests
38 |       working-directory: ./neo4j-to-neptune
39 |       run: mvn clean install --batch-mode --fail-at-end
40 | 
41 |     - name: Upload test results
42 |       uses: actions/upload-artifact@v4
43 |       if: always()
44 |       with:
45 |         name: test-results
46 |         path: neo4j-to-neptune/target/surefire-reports/
47 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/Headers.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.metadata;
14 | 
15 | import java.util.ArrayList;
16 | import java.util.List;
17 | import java.util.stream.Collectors;
18 | 
19 | class Headers {
20 | 
21 |     private final List<Header> headers = new ArrayList<>();
22 | 
23 |     void add(Header header) {
24 |         headers.add(header);
25 |     }
26 | 
27 |     Header get(int index){
28 |         return headers.get(index);
29 |     }
30 | 
31 |     List<String> values(){
32 |         return headers.stream().map(Header::value).collect(Collectors.toList());
33 |     }
34 | 
35 |     public int count() {
36 |         return headers.size();
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/PropertyValue.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.metadata;
14 | 
15 | class PropertyValue {
16 | 
17 |     private final String value;
18 |     private final boolean isMultiValued;
19 |     private final DataType dataType;
20 | 
21 |     PropertyValue(String value, boolean isMultiValued, DataType dataType) {
22 |         this.value = value;
23 |         this.isMultiValued = isMultiValued;
24 |         this.dataType = dataType;
25 |     }
26 | 
27 |     String value() {
28 |         return value;
29 |     }
30 | 
31 |     boolean isMultiValued() {
32 |         return isMultiValued;
33 |     }
34 | 
35 |     public DataType dataType() {
36 |         return dataType;
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/neptune-streams-utils/examples/java8/src/main/java/utils/EnvironmentVariablesUtils.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package utils;
14 | 
15 | public class EnvironmentVariablesUtils {
16 |     public static String getMandatoryEnv(String name) {
17 | 
18 |         if (isNullOrEmpty(System.getenv(name))) {
19 | 
20 |             throw new IllegalStateException(String.format("Missing environment variable: %s", name));
21 |         }
22 |         return System.getenv(name);
23 |     }
24 | 
25 |     public static String getOptionalEnv(String name, String defaultValue) {
26 |         if (isNullOrEmpty(System.getenv(name))) {
27 |             return defaultValue;
28 |         }
29 |         return System.getenv(name);
30 |     }
31 | 
32 |     private static boolean isNullOrEmpty(String value) {
33 |         return value == null || value.isEmpty();
34 |     }
35 | }


--------------------------------------------------------------------------------
/export-neptune-to-elasticsearch/lambda/README.md:
--------------------------------------------------------------------------------
 1 | # Export Neptune to Elasticsearch Lambda Functions
 2 | 
 3 | This stack deploys two lambda functions:
 4 | - export-neptune-to-kinesis-\<stackId\>
 5 | - kinesis-to-elasticsearch-\<stackId\>
 6 | 
 7 | Both lambda functions leverage the same lambda code package, yet use different handler configurations.  The following instructions detail how to build the unified code package for both of these lambda functions.  This uses an included `build.sh` script to create a target ZIP file that can be directly pushed to AWS Lambda.
 8 | 
 9 | NOTE:  Each lambda function also has dependencies on other Lambda Layers.  The Lambda Layers are not part of this code repository and are deployed as part of the base [Neptune Streams Poller stack](https://docs.aws.amazon.com/neptune/latest/userguide/full-text-search-cfn-create.html).
10 | 
11 | ## Build
12 | 
13 | The entire package can be built using the following:
14 | 
15 | `sh build.sh`
16 | 
17 | This will create a new `target` directory with the ZIP file package used in both lambda functions.
18 | 
19 | To update either lambda function, you can use:
20 | 
21 | `aws lambda update-function-code --function-name export-neptune-to-kinesis-<stackId> --zip-file fileb://./target/export-neptune-to-elasticsearch.zip`
22 | 
23 | or
24 | 
25 | `aws lambda update-function-code --function-name kinesis-to-elasticsearch-<stackId> --zip-file fileb://./target/export-neptune-to-elasticsearch.zip`
26 | 
27 | 


--------------------------------------------------------------------------------
/glue-neptune/readme.md:
--------------------------------------------------------------------------------
 1 | # glue-neptune
 2 | 
 3 | __Updated Feb 2020: This library is now deprecated in favour of [_neptune-python-utils_](https://github.com/awslabs/amazon-neptune-tools/tree/master/neptune-python-utils)__
 4 | 
 5 | _glue-neptune_ is a Python library for AWS Glue that helps writing data to Amazon Neptune from Glue jobs.
 6 | 
 7 | With _glue-neptune_ you can:
 8 | 
 9 |  - Get Neptune connection information from the Glue Data Catalog
10 |  - Create label and node and edge ID columns in DynamicFrames, named in accordance with the Neptune CSV bulk load format for property graphs
11 |  - Write from DynamicFrames directly to Neptune 
12 |  
13 | ## Build
14 | 
15 | `sh build.sh`
16 | 
17 | This creates a zip file: `target/glue_neptune.zip`. Copy this zip file to an S3 bucket.
18 | 
19 | You can then refer to this library from your Glue Development Endpoint or Glue job. See [Using Python Libraries with AWS Glue](https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-libraries.html). 
20 |  
21 | ## Examples
22 | 
23 | See [Migrating from MySQL to Amazon Neptune using AWS Glue](https://github.com/aws-samples/amazon-neptune-samples/tree/master/gremlin/glue-neptune).
24 |  
25 | ## Cross Account/Region Datasources
26 | If you have a datasource in a different region and/or different account from Glue and your Neptune database, you can follow the instructions in this [blog](https://aws.amazon.com/blogs/big-data/create-cross-account-and-cross-region-aws-glue-connections/) to allow access.
27 |  
28 | 
29 |  
30 | 


--------------------------------------------------------------------------------
/neptune-gremlin-js/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "neptune-gremlin",
 3 |   "version": "0.0.7",
 4 |   "description": "An SDK for querying an Amazon Neptune graph database using gremlin",
 5 |   "main": "neptune-gremlin.js",
 6 |   "homepage": "https://github.com/awslabs/amazon-neptune-tools/neptune-gremlin-js",
 7 |   "bugs": {
 8 |     "url": "https://github.com/awslabs/amazon-neptune-tools/issues"
 9 |   },
10 |   "repository": {
11 |     "type": "git",
12 |     "url": "https://github.com/awslabs/amazon-neptune-tools"
13 |   },
14 |   "scripts": {
15 |     "test": "jest",
16 |     "build": "npm i && npm run lint && cp neptune-gremlin.js ./cdk-test-app/lambda && cd cdk-test-app && npm run build && cd .. && npm run test",
17 |     "lint": "eslint . --fix"
18 |   },
19 |   "keywords": [
20 |     "aws",
21 |     "amazon",
22 |     "neptune",
23 |     "gremlin",
24 |     "tinkerpop",
25 |     "graph"
26 |   ],
27 |   "author": "Eric Z. Beard",
28 |   "license": "Apache-2.0",
29 |   "dependencies": {
30 |     "@aws-crypto/sha256-js": "^5.2.0",
31 |     "@smithy/signature-v4": "^5.0.1",
32 |     "async": "^3.2.2",
33 |     "gremlin": "^3.7.2"
34 |   },
35 |   "devDependencies": {
36 |     "@swc/core": "^1.2.137",
37 |     "@swc/wasm": "^1.2.137",
38 |     "bufferutil": "^4.0.6",
39 |     "canvas": "^2.9.0",
40 |     "encoding": "^0.1.13",
41 |     "eslint": "^8.4.1",
42 |     "jest": "^27.4.3",
43 |     "node-notifier": "^10.0.1",
44 |     "retire": "^4.2.1",
45 |     "ts-node": "^10.5.0",
46 |     "utf-8-validate": "^5.0.8"
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/neptune-streams-utils/examples/java8/src/main/java/stream_handler/AbstractStreamHandler.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package stream_handler;
14 | 
15 | import com.amazonaws.neptune.StreamRecordsHandler;
16 | import com.amazonaws.neptune.config.CredentialsConfig;
17 | 
18 | import java.util.Map;
19 | 
20 | public abstract class AbstractStreamHandler implements StreamRecordsHandler {
21 | 
22 |     protected final String neptuneEndpoint;
23 |     protected final Integer neptunePort;
24 |     protected final CredentialsConfig credentialsConfig;
25 |     protected final Map<String, Object> additionalParams;
26 | 
27 |     public AbstractStreamHandler(String neptuneEndpoint, Integer neptunePort, CredentialsConfig credentialsConfig, Map<String, Object> additionalParams) {
28 |         this.neptuneEndpoint = neptuneEndpoint;
29 |         this.neptunePort = neptunePort;
30 |         this.credentialsConfig = credentialsConfig;
31 |         this.additionalParams = additionalParams;
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/Property.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.metadata;
14 | 
15 | public class Property implements Header {
16 | 
17 |     private final String name;
18 |     private boolean isMultiValued = false;
19 |     private DataType dataType = DataType.None;
20 | 
21 |     Property(String name) {
22 |         this.name = name;
23 |     }
24 | 
25 |     @Override
26 |     public void updateDataType(DataType newDataType) {
27 |         this.dataType = DataType.getBroadestType(dataType, newDataType);
28 |     }
29 | 
30 |     @Override
31 |     public void setIsMultiValued(boolean isMultiValued) {
32 |         this.isMultiValued = isMultiValued;
33 |     }
34 | 
35 |     @Override
36 |     public String value() {
37 |         return isMultiValued ?
38 |                 String.format("%s%s[]", name, dataType.typeDescription()) :
39 |                 String.format("%s%s", name, dataType.typeDescription());
40 |     }
41 | 
42 |     public String getName() {
43 |         return name;
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/Neo4jToNeptuneCli.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune;
14 | 
15 | import com.github.rvesse.airline.annotations.Cli;
16 | import com.github.rvesse.airline.help.Help;
17 | 
18 | @Cli(name = "neo4j-to-neptune.sh",
19 |         description = "Export data from Neo4j to Neptune",
20 |         defaultCommand = Help.class,
21 |         commands = {
22 |                 ConvertCsv.class,
23 |                 Help.class
24 |         })
25 | public class Neo4jToNeptuneCli {
26 |     public static void main(String[] args) {
27 | 
28 |         com.github.rvesse.airline.Cli<Runnable> cli = new com.github.rvesse.airline.Cli<>(Neo4jToNeptuneCli.class);
29 | 
30 |         try {
31 |             Runnable cmd = cli.parse(args);
32 |             cmd.run();
33 |         } catch (Exception e) {
34 | 
35 |             System.err.println(e.getMessage());
36 |             System.err.println();
37 | 
38 |             Runnable cmd = cli.parse("help", args[0]);
39 |             cmd.run();
40 | 
41 |             System.exit(-1);
42 |         }
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/neptune-streams-utils/examples/streams-to-firehose/readme.md:
--------------------------------------------------------------------------------
 1 | #streams-to-firehose
 2 | 
 3 | This Neptune Streams handler publishes Neptune Streams records to an Amazon Kinesis Data Firehose.
 4 | 
 5 | ## Installing
 6 | 
 7 |   1. Build the handler using the _build.sh_ file.
 8 |   2. Upload the _neptune_firehose_handler.zip_ file to an S3 bucket.
 9 |   3. Create a Kinesis Data Firehose delivery stream called 'neptune-firehose'.
10 |   4. Create an IAM policy called 'neptune-firehose-handler-policy' using the snippet below, replacing the `<region>` and `<account>` placeholders.
11 |   5. Provision the handler using the _provision_neptune_streams_handler.py_ script in the _provisioning_ folder. Ensure you supply the correct delivery stream name and IAM policy ARN.
12 |   
13 | 
14 | ### Example IAM policy
15 |   
16 | ```
17 | {
18 |     "Version": "2012-10-17",
19 |     "Statement": [
20 |         {
21 |             "Effect": "Allow",
22 |             "Action": "firehose:PutRecordBatch",
23 |             "Resource": "arn:aws:firehose:<region>:<account>:deliverystream/neptune-firehose"
24 |         }
25 |     ]
26 | }
27 | ```
28 | 
29 | ### Example provisioning command
30 | 
31 | Here's an example of using the script to install the handler (after having built the handler and uploading it in S3):
32 | 
33 | ```
34 | python provision_neptune_streams_handler.py \
35 |   --cluster_id=neptunedbcluster-abcdefghijkl \
36 |   --handler_s3_bucket=my-bucket \
37 |   --handler_s3_key=neptune_firehose_handler.zip \
38 |   --additional_params='{"delivery_stream_name": "neptune-firehose"}' \
39 |   --managed_policy_arns='["arn:aws:iam::123456789:policy/neptune-firehose-handler-policy"]' \
40 |   --region=us-east-1
41 | ```
42 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/docs/example-conversion-config.yaml:
--------------------------------------------------------------------------------
 1 | # Example label mapping and filtering configuration for Neo4j to Neptune conversion
 2 | # This file demonstrates how to map vertex and edge labels and skip certain records during conversion
 3 | 
 4 | # Vertex id transformation configuration
 5 | vertexIdTransformation:
 6 |   ~id: "{_labels}_{born}_{name}_{releases}_{tagline}_{title}_{_id}"
 7 | 
 8 | # Edge id transformation configuration
 9 | # You can use {_from} or {~from} to reference the transformed source vertex ID
10 | # You can use {_to} or {~to} to reference the transformed target vertex ID
11 | edgeIdTransformation:
12 |   ~id: "{_start}!{_end}!{_type}!{~from}!{~to}!{~label}!{_id}"
13 | 
14 | # Vertex label mappings
15 | # Format: OldLabel: NewLabel
16 | vertexLabels:
17 |   Person: Individual
18 |   Company: Organization
19 |   Product: Item
20 |   Location: Place
21 |   User: Customer
22 | 
23 | # Edge label mappings
24 | # Format: OLD_RELATIONSHIP_TYPE: NEW_RELATIONSHIP_TYPE
25 | edgeLabels:
26 |   WORKS_FOR: EMPLOYED_BY
27 |   LIVES_IN: RESIDES_IN
28 |   OWNS: POSSESSES
29 |   KNOWS: CONNECTED_TO
30 |   PURCHASED: BOUGHT
31 |   MANAGES: SUPERVISES
32 | 
33 | # Skip vertices configuration
34 | skipVertices:
35 |   # Skip vertices by their specific IDs
36 |   byId:
37 |     - "vertex_123"
38 |     - "vertex_456"
39 |     - "user_999"
40 | 
41 |   # Skip vertices by their labels (any vertex with these labels will be skipped)
42 |   byLabel:
43 |     - "TestData"
44 |     - "Deprecated"
45 |     - "TempNode"
46 | 
47 | # Skip edges configuration
48 | skipEdges:
49 |   # Skip edges by their relationship types
50 |   byLabel:
51 |     - "TEMP_RELATIONSHIP"
52 |     - "DEBUG_LINK"
53 |     - "OLD_CONNECTION"
54 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/Token.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.metadata;
14 | 
15 | public class Token implements Header {
16 | 
17 |     static final Token NEO4J_ID = new Token("_id");
18 |     static final Token NEO4J_LABELS = new Token("_labels");
19 |     static final Token NEO4J_START = new Token("_start");
20 |     static final Token NEO4J_END = new Token("_end");
21 |     static final Token NEO4J_TYPE = new Token("_type");
22 | 
23 |     static final Token GREMLIN_ID = new Token("~id");
24 |     static final Token GREMLIN_LABEL = new Token("~label");
25 |     static final Token GREMLIN_FROM = new Token("~from");
26 |     static final Token GREMLIN_TO = new Token("~to");
27 | 
28 |     private final String name;
29 | 
30 |     private Token(String name) {
31 |         this.name = name;
32 |     }
33 | 
34 |     @Override
35 |     public void updateDataType(DataType newDataType) {
36 |         // Do nothing
37 |     }
38 | 
39 |     @Override
40 |     public void setIsMultiValued(boolean isMultiValued) {
41 |         // Do nothing
42 |     }
43 | 
44 |     @Override
45 |     public String value() {
46 |         return name;
47 |     }
48 | 
49 |     public static String valueWithCurlyBraces(Token token) {
50 |         return "{" + token.value() + "}";
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/graphml2csv/README.md:
--------------------------------------------------------------------------------
 1 | # GraphML 2 Neptune CSV
 2 | 
 3 | This Python script provides a utility to convert GraphML files into the CSV format that is used by Amazon Neptune for [Bulk Loading](https://docs.aws.amazon.com/neptune/latest/userguide/bulk-load-tutorial-format-gremlin.html). This script is compatible with Python2 and Python3.
 4 | 
 5 | ## Usage
 6 | 
 7 | ```
 8 | Usage: graphml2csv.py [options]
 9 | 
10 | Copyright 2018 Amazon.com, Inc. or its affiliates.
11 | Licensed under the Apache License 2.0 http://aws.amazon.com/apache2.0/
12 | 
13 | Options:
14 |   --version             show program's version number and exit
15 |   -h, --help            show this help message and exit
16 |   -i FILE, --in=FILE    set input path [default: none]
17 |   -d DELIMITER, --delimiter=DELIMITER
18 |                         Set the output file delimiter [default: ,]
19 |   -e ENCODING, --encoding=ENCODING
20 |                         Set the input file encoding [default: utf-8]
21 | 
22 | A utility python script to convert GraphML files into the Amazon Neptune CSV
23 | format for bulk ingestion. See
24 | https://docs.aws.amazon.com/neptune/latest/userguide/bulk-load-tutorial-format-gremlin.html.
25 | ```
26 | 
27 | ## Example Using the Tinkerpop modern graph.
28 | 
29 | Download the tinkerpop-modern.xml graphml file.
30 | 
31 | ```
32 | $ curl https://raw.githubusercontent.com/apache/tinkerpop/master/data/tinkerpop-modern.xml -o tinkerpop-modern.xml
33 | ```
34 | 
35 | Execute the Python script to produce two csv files: nodes and edges.
36 | 
37 | ```
38 | $ ./graphml2csv.py -i tinkerpop-modern.xml 
39 | infile = tinkerpop-modern.xml
40 | Processing tinkerpop-modern.xml
41 | Wrote 6 nodes and 18 attributes to tinkerpop-modern-nodes.csv.
42 | Wrote 6 edges and 12 attributes to tinkerpop-modern-edges.csv.
43 | ```
44 | 
45 | Upload the csv files into your S3 bucket and [bulk load](https://docs.aws.amazon.com/neptune/latest/userguide/bulk-load.html) into Neptune.
46 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/util/CSVUtils.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.util;
14 | 
15 | import org.apache.commons.csv.CSVFormat;
16 | import org.apache.commons.csv.CSVParser;
17 | import org.apache.commons.csv.CSVRecord;
18 | 
19 | import java.io.File;
20 | import java.io.IOException;
21 | import java.nio.charset.StandardCharsets;
22 | import java.nio.file.Path;
23 | import java.util.List;
24 | 
25 | public class CSVUtils {
26 |     private CSVUtils() {}
27 | 
28 |     public static CSVParser newParser(File file) throws IOException {
29 |         return newParser(file.toPath());
30 |     }
31 | 
32 |     public static CSVParser newParser(Path filePath) throws IOException {
33 |         return CSVParser.parse(filePath, StandardCharsets.UTF_8, CSVFormat.DEFAULT);
34 |     }
35 | 
36 |     public static CSVRecord firstRecord(String s) {
37 |         try {
38 |             CSVParser parser = CSVParser.parse(s, CSVFormat.DEFAULT);
39 |             List<CSVRecord> records = parser.getRecords();
40 |             if (records.isEmpty()) {
41 |                 throw new IllegalArgumentException("Unable to find first record: " + s);
42 |             }
43 |             return records.get(0);
44 |         } catch (IOException e) {
45 |             throw new IllegalArgumentException("Unable to find first record: " + s);
46 |         }
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/test/java/com/amazonaws/services/neptune/metadata/MultiValuedRelationshipPropertyPolicyTest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.metadata;
14 | 
15 | import org.junit.Test;
16 | 
17 | import static org.junit.Assert.*;
18 | 
19 | public class MultiValuedRelationshipPropertyPolicyTest {
20 | 
21 |     @Test
22 |     public void shouldReturnStringPropertyValueIfPolicyIsLeaveAsString() {
23 | 
24 |         String value = "[\"toy\",\"electronics\",\"gifts\"]";
25 | 
26 |         PropertyValueParser parser = new PropertyValueParser(MultiValuedRelationshipPropertyPolicy.LeaveAsString, "", false);
27 |         PropertyValue propertyValue = parser.parse(value);
28 | 
29 |         assertEquals("\"[\"\"toy\"\",\"\"electronics\"\",\"\"gifts\"\"]\"", propertyValue.value());
30 |         assertFalse(propertyValue.isMultiValued());
31 |     }
32 | 
33 |     @Test
34 |     public void shouldThrowExceptionIfPolicyIsHalt() {
35 |         String value = "[\"toy\",\"electronics\",\"gifts\"]";
36 | 
37 |         PropertyValueParser parser = new PropertyValueParser(MultiValuedRelationshipPropertyPolicy.Halt, "", false);
38 | 
39 |         try {
40 |             parser.parse(value);
41 |             fail();
42 |         } catch (RuntimeException e) {
43 |             assertEquals("Halt: found multivalued relationship property value", e.getMessage());
44 | 
45 |         }
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/MultiValuedRelationshipPropertyPolicy.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.metadata;
14 | 
15 | import com.fasterxml.jackson.databind.node.ArrayNode;
16 | 
17 | public enum MultiValuedRelationshipPropertyPolicy implements PropertyValueParserPolicy {
18 | 
19 |     LeaveAsString {
20 |         @Override
21 |         public PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser) {
22 |             return parser.stringValue(s);
23 |         }
24 | 
25 |         @Override
26 |         public void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser) {
27 |             // Do nothing
28 |         }
29 |     },
30 |     Halt {
31 |         @Override
32 |         public PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser) {
33 |             throw new RuntimeException("Halt: found multivalued relationship property value");
34 |         }
35 | 
36 |         @Override
37 |         public void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser) {
38 |             // Do nothing
39 |         }
40 |     };
41 | 
42 |     @Override
43 |     public abstract PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser);
44 | 
45 |     @Override
46 |     public abstract void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser);
47 | }
48 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/io/RawCsvPrinter.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.io;
14 | 
15 | import java.io.FileWriter;
16 | import java.io.Flushable;
17 | import java.io.IOException;
18 | import java.io.PrintWriter;
19 | import java.nio.file.Path;
20 | 
21 | public class RawCsvPrinter implements Flushable, AutoCloseable {
22 | 
23 |     static RawCsvPrinter newPrinter(Path filePath, boolean append) throws IOException {
24 |         return new RawCsvPrinter(filePath, append);
25 |     }
26 | 
27 |     static RawCsvPrinter newPrinter(Path filePath) throws IOException {
28 |         return newPrinter(filePath, false);
29 |     }
30 | 
31 |     private final PrintWriter printer;
32 | 
33 |     private RawCsvPrinter(Path filePath, boolean append) throws IOException {
34 |         this.printer = new PrintWriter(new FileWriter(filePath.toFile(), append));
35 |     }
36 | 
37 |     void printRecord(Iterable<String> values){
38 |         printer.write(String.join(",", values));
39 |         printer.write(System.lineSeparator());
40 |     }
41 | 
42 |     void printRecord(String value){
43 |         printer.write(value);
44 |         printer.write(System.lineSeparator());
45 |     }
46 | 
47 |     @Override
48 |     public void flush() throws IOException {
49 |         printer.flush();
50 |     }
51 | 
52 |     @Override
53 |     public void close() throws IOException {
54 |         printer.close();
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/io/Directories.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.io;
14 | 
15 | import java.io.File;
16 | import java.io.IOException;
17 | import java.nio.file.Files;
18 | import java.nio.file.Path;
19 | 
20 | public class Directories {
21 | 
22 |     public static Directories createFor(File root) throws IOException {
23 |         if (root == null) {
24 |             throw new IllegalArgumentException("You must supply a directory");
25 |         }
26 | 
27 |         String directoryName = String.valueOf(System.currentTimeMillis());
28 |         Path rootDirectory = root.toPath();
29 | 
30 |         Path directory = rootDirectory.resolve(directoryName);
31 |         Files.createDirectories(directory);
32 | 
33 |         return new Directories(directory);
34 |     }
35 | 
36 |     private final Path directory;
37 | 
38 |     private Directories(Path directory) {
39 |         this.directory = directory;
40 |     }
41 | 
42 |     public Path outputDirectory() {
43 |         return directory;
44 |     }
45 | 
46 |     Path createFilePath(String name) {
47 |         return createFilePath(name, null);
48 |     }
49 | 
50 |     Path createFilePath(String name, Object index) {
51 | 
52 |         String filename = index == null ?
53 |                 String.format("%s.csv", name) :
54 |                 String.format("%s-%s.csv", name, index);
55 | 
56 |         return directory.resolve(filename);
57 |     }
58 | }
59 | 
60 | 


--------------------------------------------------------------------------------
/neptune-serverless-evaluator/README.md:
--------------------------------------------------------------------------------
 1 | ## Neptune Serverless Cost Evaluator
 2 | Neptune offers On-Demand provisioned instances and serverless instances which to accommodate variety of scaling up or down needs.   Choosing between 2 modes is often a decision of cost and requires understanding access patterns. I will walk through some of the decision factors for new workloads and also show how you can use cloudwatch logs to check if current workload on Neptune Serverless instance will be cheaper on Neptune Provisioned instances.
 3 | 
 4 | ### Minimum IAM policies required
 5 | * AmazonRDSReadOnlyAccess
 6 | * AWSPriceListServiceFullAccess
 7 | 
 8 | 
 9 | ### How to run the script
10 | * Configure AWS Cli [https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html]
11 | * Install python3.7 or above 
12 | * install pip3
13 | * Clone this repository to the directory
14 | * Install required libraries 
15 | 
16 | 
17 | ```
18 | pip3 install requirements.txt
19 | ```
20 | 
21 | 
22 | Parameter names:
23 | | Parameter        | Details          | Default  | 
24 | | ------------- |:-------------:| -----:| -----: |
25 | | -n, --name      | Neptune instance name |  |
26 | | -r, --region     | Region name for instance      |    |
27 | | -p, --period | Number of days datapoints collected from cloudwatch      |    14 |
28 | 
29 | 
30 | Example of checking if OnDemand provisioned instances are cheaper than running Neptune serverless:
31 | 
32 | ```
33 | python pricing.py -n database-2-instance-1 -p 1 -r us-east-1
34 | 
35 | Region : us-east-1
36 | Instance name : database-2-instance-1
37 | Instance type : db.serverless
38 | Data collection period : 1 days
39 | Total cost of running serverless for last 1 days : $4.1
40 | Minimum NCU utilization : 1.0 ,Equivalent OnDemand Instance Costs: (db.r6g.large) $6.903
41 | Maximum NCU utilization : 5.0 ,Equivalent OnDemand Instance Costs: (db.r6g.xlarge) $13.805
42 | 90th Percentile NCU Utilization : 1.5 ,Equivalent OnDemand Instance Costs: (db.r6g.large) $6.903
43 | Average NCU utilization : 1 ,Equivalent OnDemand Instance Costs: (db.r6g.large) $6.903
44 | Total data points : 1208
45 | ```
46 | 
47 | 


--------------------------------------------------------------------------------
/opencypher-compatability-checker/output.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "results" : [ {
 3 |     "id" : 1,
 4 |     "supported" : true,
 5 |     "errorDefinitions" : [ ]
 6 |   }, {
 7 |     "id" : 2,
 8 |     "supported" : false,
 9 |     "errorDefinitions" : [ {
10 |       "position" : "line 1, column 43 (offset: 42)",
11 |       "name" : "CALL",
12 |       "replacement" : "",
13 |       "description" : "Update clauses like MERGE, CREATE, SET, REMOVE, DELETE, FOREACH not supported in call subquery"
14 |     }, {
15 |       "position" : "line 1, column 118 (offset: 117)",
16 |       "name" : "FOREACH",
17 |       "replacement" : "",
18 |       "description" : "FOREACH is not supported in this release"
19 |     } ]
20 |   }, {
21 |     "id" : 3,
22 |     "supported" : true,
23 |     "errorDefinitions" : [ ]
24 |   }, {
25 |     "id" : 4,
26 |     "supported" : false,
27 |     "errorDefinitions" : [ {
28 |       "position" : "line 1, column 8 (offset: 7)",
29 |       "name" : "apoc.coll.intersection",
30 |       "replacement" : "collintersection",
31 |       "description" : "apoc.coll.intersection is not supported in this release but try replacing with collintersection"
32 |     } ]
33 |   }, {
34 |     "id" : 5,
35 |     "supported" : false,
36 |     "errorDefinitions" : [ {
37 |       "position" : "line 1, column 8 (offset: 7)",
38 |       "name" : "reduce",
39 |       "replacement" : "",
40 |       "description" : "reduce only supported with add or multiply expressions"
41 |     } ]
42 |   }, {
43 |     "id" : 6,
44 |     "supported" : false,
45 |     "errorDefinitions" : [ {
46 |       "position" : "line 1, column 1 (offset: 0)",
47 |       "name" : "apoc.json.validate",
48 |       "replacement" : "",
49 |       "description" : "apoc.json.validate is not supported in this release"
50 |     } ]
51 |   }, {
52 |     "id" : 7,
53 |     "supported" : false,
54 |     "errorDefinitions" : [ {
55 |       "position" : "",
56 |       "name" : "",
57 |       "replacement" : "",
58 |       "description" : "Invalid Open Cypher Query :org.opencypher.v9_0.util.SyntaxException: Invalid input 'g': expected <init> (line 1, column 1 (offset: 0))"
59 |     } ]
60 |   } ]
61 | }


--------------------------------------------------------------------------------
/glue-neptune/glue_neptune/NeptuneConnectionInfo.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates.
 2 | # All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License").
 5 | # You may not use this file except in compliance with the License.
 6 | # A copy of the License is located at
 7 | #
 8 | #    http://aws.amazon.com/apache2.0/
 9 | #
10 | # or in the "license" file accompanying this file.
11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions
13 | # and limitations under the License.
14 | 
15 | import sys, boto3, os
16 | 
17 | from awsglue.utils import getResolvedOptions
18 | from awsglue.context import GlueContext
19 | 
20 | class NeptuneConnectionInfo:
21 |     
22 |     def __init__(self, glue_context):
23 |         self.glue_context = glue_context
24 |         
25 |     def __neptune_connection(self, connection_name):
26 |         proxy_url = self.glue_context._jvm.AWSConnectionUtils.getGlueProxyUrl()
27 |         glue_endpoint = self.glue_context._jvm.AWSConnectionUtils.getGlueEndpoint()
28 |         region = self.glue_context._jvm.AWSConnectionUtils.getRegion()
29 |         if not proxy_url[8:].startswith('null'):
30 |             os.environ['https_proxy'] = proxy_url
31 |         glue = boto3.client('glue', endpoint_url=glue_endpoint, region_name=region)
32 |         connection = glue.get_connection(Name=connection_name)
33 |         del os.environ['https_proxy']
34 |         return connection['Connection']['ConnectionProperties']['JDBC_CONNECTION_URL']
35 |     
36 |     def neptune_endpoint(self, connection_name):
37 |         """Gets Neptune endpoint information from the Glue Data Catalog.
38 |         
39 |         You can store Neptune endpoint information as JDBC connections in the Glue Data Catalog.
40 |         JDBC connection strings must begin 'jdbc:'. To store a Neptune endpoint, use the following format:
41 |         
42 |         'jdbc:<protocol>://<dns_name>:<port>/<endpoint>'
43 |         
44 |         For example, if you store:
45 |         
46 |         'jdbc:ws://my-neptune-cluster.us-east-1.neptune.amazonaws.com:8182/gremlin'
47 |         
48 |         – this method will return:
49 |         
50 |         'ws://my-neptune-cluster.us-east-1.neptune.amazonaws.com:8182/gremlin' 
51 |         
52 |         Example:
53 |         >>> gremlin_endpoint = NeptuneConnectionInfo(glueContext).neptune_endpoint('neptune')
54 |         """
55 |         return self.__neptune_connection(connection_name)[5:]


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/io/OutputFile.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.io;
14 | 
15 | import java.io.IOException;
16 | import java.nio.file.Files;
17 | import java.nio.file.Path;
18 | import java.util.stream.Stream;
19 | 
20 | public class OutputFile implements AutoCloseable {
21 | 
22 |     private RawCsvPrinter printer;
23 | 
24 |     private final Directories directories;
25 |     private final String filename;
26 | 
27 |     public OutputFile(Directories directories, String filename) throws IOException {
28 | 
29 |         this.directories = directories;
30 |         this.filename = filename;
31 | 
32 |         this.printer = RawCsvPrinter.newPrinter(directories.createFilePath(filename));
33 |     }
34 | 
35 |     public void printRecord(Iterable<String> values) throws IOException {
36 |         printer.printRecord(values);
37 |     }
38 | 
39 |     public void printHeaders(Iterable<String> headers) throws IOException {
40 | 
41 |         Path originalFilePath = directories.createFilePath(filename);
42 |         Path tempFilePath = directories.createFilePath(filename, "temp");
43 | 
44 |         printer.flush();
45 |         printer.close();
46 | 
47 |         try (Stream<String> stream = Files.lines(originalFilePath);
48 |              RawCsvPrinter tempFilePrinter = RawCsvPrinter.newPrinter(tempFilePath)) {
49 | 
50 |             tempFilePrinter.printRecord(headers);
51 |             stream.forEach(tempFilePrinter::printRecord);
52 | 
53 |             tempFilePrinter.flush();
54 |         }
55 | 
56 |         Files.deleteIfExists(originalFilePath);
57 |         if (!tempFilePath.toFile().renameTo(originalFilePath.toFile())) {
58 |             throw new RuntimeException("Unable to rename temp file: " + tempFilePath);
59 |         }
60 | 
61 |         printer = RawCsvPrinter.newPrinter(originalFilePath, true);
62 |     }
63 | 
64 |     @Override
65 |     public void close() throws Exception {
66 |         printer.flush();
67 |         printer.close();
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/neptune-python-utils/neptune_python_utils/glue_gremlin_csv_transforms.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates.
 2 | # All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License").
 5 | # You may not use this file except in compliance with the License.
 6 | # A copy of the License is located at
 7 | #
 8 | #    http://aws.amazon.com/apache2.0/
 9 | #
10 | # or in the "license" file accompanying this file.
11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions
13 | # and limitations under the License.
14 | 
15 | from awsglue.dynamicframe import DynamicFrame
16 | from pyspark.sql.functions import lit
17 | from pyspark.sql.functions import format_string
18 | 
19 | class GlueGremlinCsvTransforms:
20 |     
21 |     @classmethod
22 |     def create_prefixed_columns(cls, datasource, mappings):
23 |         """Creates columns in a DynamicFrame whose values are based on prefixed values from another column in the DynamicFrame.
24 |         
25 |         Example:
26 |         >>> df = GlueGremlinCsvTransforms.create_prefixed_columns(df, [('~id', 'productId', 'p'),('~to', 'supplierId', 's')])
27 |         """
28 |         dataframe = datasource.toDF()
29 |         for (column_name, source_column, prefix) in mappings:
30 |             dataframe = dataframe.withColumn(column_name, format_string(prefix + "-%s", dataframe[source_column]))
31 |         return DynamicFrame.fromDF(dataframe, datasource.glue_ctx, 'create_vertex_id_columns')
32 |     
33 |     @classmethod
34 |     def create_edge_id_column(cls, datasource, from_column, to_column):
35 |         """Creates an '~id' column in a DynamicFrame whose values are based on the specified from and to columns.
36 |         
37 |         Example:
38 |         >>> df = GlueGremlinCsvTransforms.create_edge_id_column(df, 'supplierId', 'productId')
39 |         """
40 |         dataframe = datasource.toDF()
41 |         dataframe = dataframe.withColumn('~id', format_string("%s-%s", dataframe[from_column], dataframe[to_column]))
42 |         return DynamicFrame.fromDF(dataframe,  datasource.glue_ctx, 'create_edge_id_column')
43 |     
44 |     @classmethod    
45 |     def addLabel(cls, datasource, label):
46 |         """Adds a '~label' column to a DynamicFrame.
47 |         
48 |         Example:
49 |         >>> df = GlueGremlinCsvTransforms.addLabel(df, 'Product')
50 |         """
51 |         dataframe = datasource.toDF()
52 |         dataframe = dataframe.withColumn("~label", lit(label))
53 |         return DynamicFrame.fromDF(dataframe, datasource.glue_ctx, label)


--------------------------------------------------------------------------------
/neptune-streams-utils/readme.md:
--------------------------------------------------------------------------------
 1 | #neptune-stream-utils
 2 | 
 3 | This project includes example [Neptune Streams](https://docs.aws.amazon.com/neptune/latest/userguide/streams.html) handlers and build scripts, and a command-line tool that installs a handler in the Neptune Streams polling framework.
 4 | 
 5 | ## Example handlers
 6 | 
 7 | The _examples_ directory contains sample handlers written in Python and Java, and scripts for building them. You can use these as the basis of your own handlers. Run _build.sh_, and then copy the zip file in the _target_ directory to your S3 bucket.
 8 | 
 9 | ### Java handler dependencies
10 | 
11 | The Java handler depends on two libraries that are not currently available in Maven. Before building the Java hander, run the _install-dependencies.sh_ script to install the libraries in your local Maven repository.
12 | 
13 | ## Provisioning script
14 | 
15 | The _provisioning_ folder contains a command-line tool that installs a handler in the Neptune Streams polling framework. The handler and polling framework are created using a [CloudFormation template](https://s3.amazonaws.com/aws-neptune-customer-samples/neptune-stream/neptune_stream_poller_nested_full_stack.json) provided by Neptune. This CloudFormation template has over 25 input parameters. The script here simplifies running the CloudFormation template. The script queries the AWS Management APIs to get details of the Neptune cluster, VPC, subnets, security groups, etc, and then populates and invokes the CloudFormation template.
16 | 
17 | Here's an example of using the script to install the example Python handler (after having built the handler and putting it in S3):
18 | 
19 | ```
20 | python provision_neptune_streams_handler.py \
21 |   --cluster_id=neptunedbcluster-abcdefghijkl \
22 |   --handler_s3_bucket=my-bucket \
23 |   --handler_s3_key=streams/stream_handler.zip \
24 |   --region=us-east-1
25 | ```
26 | 
27 | Here's an example of using the script to install the example Java handler (after having built the handler and putting it in S3):
28 | 
29 | ```
30 | python provision_neptune_streams_handler.py \
31 |   --cluster_id=neptunedbcluster-abcdefghijkl \
32 |   --handler_s3_bucket=my-bucket \
33 |   --handler_s3_key=streams/stream_handler.jar \
34 |   --lambda_runtime=java8 \
35 |   --region=us-east-1
36 | ```
37 | 
38 | If you supply an additional `–dry_run=true` parameter, the tool will simply create all the CloudFormation parameters, but not actually run the template.
39 | 
40 | ## Additional resources
41 | 
42 | More details on creating your own custom handlers can be found in the blog post [Capture graph changes using Neptune Streams](https://aws.amazon.com/blogs/database/capture-graph-changes-using-neptune-streams/).
43 | 
44 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/test/java/com/amazonaws/services/neptune/metadata/PropertyValueParserTest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.metadata;
14 | 
15 | import org.junit.Test;
16 | 
17 | import static org.junit.Assert.*;
18 | 
19 | public class PropertyValueParserTest {
20 | 
21 |     @Test
22 |     public void shouldDoubleUpDoubleQuotesAndSurroundWithDoubleQuotesAStringValueContainingDoubleQuotes() {
23 |         String originalValue = "First \"second\" third fourth";
24 |         PropertyValue propertyValue = new PropertyValueParser(MultiValuedNodePropertyPolicy.LeaveAsString, "", false).parse(originalValue);
25 | 
26 |         assertEquals("\"First \"\"second\"\" third fourth\"", propertyValue.value());
27 |         assertFalse(propertyValue.isMultiValued());
28 |     }
29 | 
30 |     @Test
31 |     public void shouldSurroundWithDoubleQuotesAStringValueContainingComma() {
32 |         String originalValue = "one, two, three";
33 |         PropertyValue propertyValue = new PropertyValueParser(MultiValuedNodePropertyPolicy.LeaveAsString, "", false).parse(originalValue);
34 | 
35 |         assertEquals("\"one, two, three\"", propertyValue.value());
36 |         assertFalse(propertyValue.isMultiValued());
37 |     }
38 | 
39 |     @Test
40 |     public void shouldSurroundWithDoubleQuotesAStringValueContainingNewLine() {
41 |         String originalValue = "one" + System.lineSeparator() + "two";
42 |         PropertyValue propertyValue = new PropertyValueParser(MultiValuedNodePropertyPolicy.LeaveAsString, "", false).parse(originalValue);
43 | 
44 |         assertEquals("\"one" + System.lineSeparator() + "two\"", propertyValue.value());
45 |         assertFalse(propertyValue.isMultiValued());
46 |     }
47 | 
48 |     @Test
49 |     public void shouldReplaceSemicolonInMultiValuePropertiesWithReplacementString() {
50 |         String originalValue = "[\"one\",\"two;three\"]";
51 |         PropertyValue propertyValue = new PropertyValueParser(MultiValuedNodePropertyPolicy.PutInSetIgnoringDuplicates, "SEMICOLON", false).parse(originalValue);
52 | 
53 |         assertEquals("one;twoSEMICOLONthree", propertyValue.value());
54 |         assertTrue(propertyValue.isMultiValued());
55 |     }
56 | 
57 | }


--------------------------------------------------------------------------------
/neptune-python-utils/neptune_python_utils/glue_neptune_connection_info.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Amazon.com, Inc. or its affiliates.
 2 | # All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License").
 5 | # You may not use this file except in compliance with the License.
 6 | # A copy of the License is located at
 7 | #
 8 | #    http://aws.amazon.com/apache2.0/
 9 | #
10 | # or in the "license" file accompanying this file.
11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions
13 | # and limitations under the License.
14 | 
15 | import sys
16 | import boto3
17 | import requests
18 | from neptune_python_utils.endpoints import Endpoints
19 | 
20 | class GlueNeptuneConnectionInfo:
21 |     
22 |     def __init__(self, region, role_arn):
23 |             self.region = region
24 |             self.role_arn = role_arn
25 |     
26 |     def neptune_endpoints(self, connection_name):
27 |         """Gets Neptune endpoint information from the AWS Glue Data Catalog.
28 |         
29 |         You may need to install a Glue VPC Endpoint in your VPC for this method to work.
30 |         
31 |         You can either create a Glue Connection type of 'JDBC' or 'NETWORK'. 
32 |         
33 |         When you use Glue Connection Type of 'JDBC' store the Amazon Neptune endpoint in 'JDBC_CONNECTION_URL' field, e.g. 'jdbc:wss://my-neptune-cluster.us-east-1.neptune.amazonaws.com:8182/gremlin'. 
34 |         
35 |         When you use Glue Connection Type of 'NETWORK' store the Amazon Neptune endpoint in 'Description' field, e.g. 'wss://my-neptune-cluster.us-east-1.neptune.amazonaws.com:8182/gremlin'.
36 |         
37 |         When you invoke the method it returns Neptune endpoint, e.g. 'wss://my-neptune-cluster.us-east-1.neptune.amazonaws.com:8182/gremlin' 
38 |         
39 |         Example:
40 |         >>> gremlin_endpoint = GlueNeptuneConnectionInfo(glueContext).neptune_endpoint('neptune')
41 |         """
42 |         glue = boto3.client('glue', region_name=self.region)
43 |         connection = glue.get_connection(Name=connection_name)['Connection']
44 | 
45 |         if connection['ConnectionType'] == "JDBC":
46 |             neptune_uri = connection['ConnectionProperties']['JDBC_CONNECTION_URL'][5:]
47 | 
48 |         if connection['ConnectionType'] == "NETWORK":
49 |             neptune_uri = connection['Description']
50 | 
51 |         parse_result = requests.utils.urlparse(neptune_uri)
52 |         netloc_parts = parse_result.netloc.split(':')
53 |         host = netloc_parts[0]
54 |         port = netloc_parts[1]
55 |         
56 |         return Endpoints(neptune_endpoint=host, neptune_port=port, region_name=self.region, role_arn=self.role_arn)


--------------------------------------------------------------------------------
/neptune-streams-utils/provisioning/readme.md:
--------------------------------------------------------------------------------
 1 | #provision-neptune-streams-handler
 2 | 
 3 | Provisions a Neptune Streams handler.
 4 | 
 5 | ### Prerequisites
 6 | 
 7 | ```
 8 | pip install boto3
 9 | pip install fire
10 | pip install tabulate
11 | ```
12 | 
13 | Before provisioning a handler using this script ensure the following conditions are met:
14 |   
15 |   - You have an existing Neptune cluster
16 |   - Neptune Streams is [enabled](https://docs.aws.amazon.com/neptune/latest/userguide/streams-using.html#streams-using-enabling)
17 |   
18 | ### Usage
19 | 
20 | ```
21 | NAME
22 |     provision_neptune_streams_handler.py
23 | 
24 | SYNOPSIS
25 |     provision_neptune_streams_handler.py CLUSTER_ID HANDLER_S3_BUCKET HANDLER_S3_KEY <flags>
26 | 
27 | POSITIONAL ARGUMENTS
28 |     CLUSTER_ID
29 |     HANDLER_S3_BUCKET
30 |     HANDLER_S3_KEY
31 | 
32 | FLAGS
33 |     --handler_name=HANDLER_NAME
34 |     --additional_params=ADDITIONAL_PARAMS
35 |     --query_engine=QUERY_ENGINE
36 |     --region=REGION
37 |     --lambda_memory_size_mb=LAMBDA_MEMORY_SIZE_MB
38 |     --lambda_runtime=LAMBDA_RUNTIME
39 |     --lambda_logging_level=LAMBDA_LOGGING_LEVEL
40 |     --managed_policy_arns=MANAGED_POLICY_ARNS
41 |     --batch_size=BATCH_SIZE
42 |     --max_polling_wait_time_seconds=MAX_POLLING_WAIT_TIME_SECONDS
43 |     --max_polling_interval_seconds=MAX_POLLING_INTERVAL_SECONDS
44 |     --step_function_fallback_period=STEP_FUNCTION_FALLBACK_PERIOD
45 |     --step_function_fallback_period_unit=STEP_FUNCTION_FALLBACK_PERIOD_UNIT
46 |     --notification_email=NOTIFICATION_EMAIL
47 |     --create_cloudwatch_alarm=CREATE_CLOUDWATCH_ALARM
48 |     --application_name=APPLICATION_NAME
49 |     --dry_run=DRY_RUN
50 | 
51 | NOTES
52 |     You can also use flags syntax for POSITIONAL ARGUMENTS
53 | ```
54 | 
55 | ### Examples
56 | 
57 | Here's an example that provisions a handler with the default handler name (`stream_handler.StreamHandler`), and which has been uploaded to _s3://my-bucket/handlers/example_handler.zip_:
58 | 
59 | ```
60 | python provision_neptune_streams_handler.py --cluster_id=neptunedbcluster-xyz0a0a0abc \
61 |   --handler_s3_bucket=my-bucket \
62 |   --handler_s3_key=handlers/example_handler.zip \
63 |   --region=us-east-1
64 | ```
65 | 
66 | Here's an example of using the script to install a handler with an additional parameter (`delivery_stream_name`, which will be supplied to the handler via an environment variable when it is invoked), and a managed policy that allows the handler to invoke an Amazon Kinesis Data Firehose API:
67 | 
68 | ```
69 | python provision_neptune_streams_handler.py \
70 |   --cluster_id=neptunedbcluster-abcdefghijkl \
71 |   --handler_s3_bucket=my-bucket \
72 |   --handler_s3_key=neptune_firehose_handler.zip \
73 |   --additional_params='{"delivery_stream_name": "neptune-firehose"}' \
74 |   --managed_policy_arns='["arn:aws:iam::123456789:policy/neptune-firehose-handler-policy"]' \
75 |   --region=us-east-1
76 | ```


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/MultiValuedNodePropertyPolicy.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.metadata;
14 | 
15 | import com.fasterxml.jackson.databind.node.ArrayNode;
16 | 
17 | public enum MultiValuedNodePropertyPolicy implements PropertyValueParserPolicy {
18 |     LeaveAsString {
19 |         @Override
20 |         public PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser) {
21 |             return parser.stringValue(s);
22 |         }
23 | 
24 |         @Override
25 |         public void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser) {
26 |             // Do nothing
27 |         }
28 |     },
29 |     Halt {
30 |         @Override
31 |         public PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser) {
32 |             throw new RuntimeException("Halt: found multivalued node property value");
33 |         }
34 | 
35 |         @Override
36 |         public void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser) {
37 |             // Do nothing
38 |         }
39 |     },
40 |     PutInSetIgnoringDuplicates {
41 |         @Override
42 |         public PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser) {
43 |             return parser.parseArrayValue(s, arrayNode);
44 |         }
45 | 
46 |         @Override
47 |         public void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser) {
48 |             // Do nothing
49 |         }
50 |     },
51 |     PutInSetButHaltIfDuplicates {
52 |         @Override
53 |         public PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser) {
54 |             return parser.parseArrayValue(s, arrayNode);
55 |         }
56 | 
57 |         @Override
58 |         public void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser) {
59 |             throw new RuntimeException("Halt: found multivalued node property value with duplicate values");
60 |         }
61 |     };
62 | 
63 |     @Override
64 |     public abstract PropertyValue handleArray(String s, ArrayNode arrayNode, PropertyValueParser parser);
65 | 
66 |     @Override
67 |     public abstract void handleDuplicates(String s, ArrayNode arrayNode, PropertyValueParser parser);
68 | }
69 | 


--------------------------------------------------------------------------------
/csv-to-neptune-bulk-format/data-config-spotify-no-node.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "source_folder": ".source",
 3 |     "data_folder": ".data",
 4 |     "fileNames": [
 5 |         "spotify_songs.csv"
 6 |     ],
 7 |     "nodes": [],
 8 |     "edges": [
 9 |         {
10 |             "csvFileName": "Track_Album_Edges.csv",
11 |             "select": "'track_name' in row and 'track_artist' in row and row['track_name'] != '' and row['track_artist'] != '' and 'track_album_id' in row and row['track_album_id'] != ''",
12 |             "id": "uuid()",
13 |             "label": "'IN_ALBUM'",
14 |             "from": "row['track_artist'] + '-' + row['track_name']",
15 |             "to": "row['track_album_id']",
16 |             "fromLabel": "'Track'",
17 |             "toLabel": "'Album'",
18 |             "properties": []
19 |         },
20 |         {
21 |             "csvFileName": "Track_Artist_Edges.csv",
22 |             "select": "'track_name' in row and 'track_artist' in row and row['track_name'] != '' and row['track_artist'] != ''",
23 |             "id": "uuid()",
24 |             "label": "'BY_ARTIST'",
25 |             "from": "row['track_artist'] + '-' + row['track_name']",
26 |             "to": "row['track_artist']",
27 |             "fromLabel": "'Track'",
28 |             "toLabel": "'Artist'",
29 |             "properties": []
30 |         },
31 |         {
32 |             "csvFileName": "Track_Playlist_Edges.csv",
33 |             "select": "'track_name' in row and 'track_artist' in row and row['track_name'] != '' and row['track_artist'] != '' and 'playlist_id' in row and row['playlist_id'] != ''",
34 |             "id": "uuid()",
35 |             "label": "'IN_PLAYLIST'",
36 |             "from": "row['track_artist'] + '-' + row['track_name']",
37 |             "to": "row['playlist_id']",
38 |             "fromLabel": "'Track'",
39 |             "toLabel": "'Playlist'",
40 |             "properties": []
41 |         },
42 |         {
43 |             "csvFileName": "Playlist_Genre_Edges.csv",
44 |             "select": "'playlist_id' in row and row['playlist_id'] != ''and 'playlist_genre' in row and row['playlist_genre'] != ''",
45 |             "id": "uuid()",
46 |             "label": "'HAS_GENRE'",
47 |             "from": "row['playlist_id']",
48 |             "to": "row['playlist_genre']",
49 |             "fromLabel": "'Playlist'",
50 |             "toLabel": "'Genre'",
51 |             "properties": []
52 |         },
53 |         {
54 |             "csvFileName": "Genre_SubGenre_Edges.csv",
55 |             "select": "'playlist_genre' in row and row['playlist_genre'] != '' and 'playlist_subgenre' in row and row['playlist_subgenre'] != ''",
56 |             "id": "uuid()",
57 |             "label": "'HAS_SUBGENRE'",
58 |             "from": "row['playlist_genre']",
59 |             "to": "row['playlist_subgenre']",
60 |             "fromLabel": "'Genre'",
61 |             "toLabel": "'SubGenre'",
62 |             "properties": []
63 |         }
64 |     ]
65 | }


--------------------------------------------------------------------------------
/glue-neptune/glue_neptune/GremlinCsvTransforms.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2018 Amazon.com, Inc. or its affiliates.
 2 | # All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License").
 5 | # You may not use this file except in compliance with the License.
 6 | # A copy of the License is located at
 7 | #
 8 | #    http://aws.amazon.com/apache2.0/
 9 | #
10 | # or in the "license" file accompanying this file.
11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions
13 | # and limitations under the License.
14 | 
15 | import sys, os
16 | 
17 | from awsglue.utils import getResolvedOptions
18 | from pyspark.context import SparkContext
19 | from awsglue.context import GlueContext
20 | from awsglue.job import Job
21 | from awsglue.transforms import ApplyMapping
22 | from awsglue.transforms import RenameField
23 | from awsglue.transforms import SelectFields
24 | from awsglue.dynamicframe import DynamicFrame
25 | from pyspark.sql.functions import lit
26 | from pyspark.sql.functions import format_string
27 | 
28 | class GremlinCsvTransforms:
29 |     
30 |     @classmethod
31 |     def create_prefixed_columns(cls, datasource, mappings):
32 |         """Creates columns in a DynamicFrame whose values are based on prefixed values from another column in the DynamicFrame.
33 |         
34 |         Example:
35 |         >>> df = GremlinCsvTransforms.create_prefixed_columns(df, [('~id', 'productId', 'p'),('~to', 'supplierId', 's')])
36 |         """
37 |         dataframe = datasource.toDF()
38 |         for (column_name, source_column, prefix) in mappings:
39 |             dataframe = dataframe.withColumn(column_name, format_string(prefix + "-%s", dataframe[source_column]))
40 |         return DynamicFrame.fromDF(dataframe, datasource.glue_ctx, 'create_vertex_id_columns')
41 |     
42 |     @classmethod
43 |     def create_edge_id_column(cls, datasource, from_column, to_column):
44 |         """Creates an '~id' column in a DynamicFrame whose values are based on the specified from and to columns.
45 |         
46 |         Example:
47 |         >>> df = GremlinCsvTransforms.create_edge_id_column(df, 'supplierId', 'productId')
48 |         """
49 |         dataframe = datasource.toDF()
50 |         dataframe = dataframe.withColumn('~id', format_string("%s-%s", dataframe[from_column], dataframe[to_column]))
51 |         return DynamicFrame.fromDF(dataframe,  datasource.glue_ctx, 'create_edge_id_column')
52 |     
53 |     @classmethod    
54 |     def addLabel(cls, datasource, label):
55 |         """Adds a '~label' column to a DynamicFrame whose values comprise the supplier label.
56 |         
57 |         Example:
58 |         >>> df = GremlinCsvTransforms.addLabel(df, 'Product')
59 |         """
60 |         dataframe = datasource.toDF()
61 |         dataframe = dataframe.withColumn("~label", lit(label))
62 |         return DynamicFrame.fromDF(dataframe, datasource.glue_ctx, label)


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/test/java/com/amazonaws/services/neptune/metadata/MultiValuedNodePropertyPolicyTest.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License").
 4 | You may not use this file except in compliance with the License.
 5 | A copy of the License is located at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | or in the "license" file accompanying this file. This file is distributed
 8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 9 | express or implied. See the License for the specific language governing
10 | permissions and limitations under the License.
11 | */
12 | 
13 | package com.amazonaws.services.neptune.metadata;
14 | 
15 | import org.junit.Test;
16 | 
17 | import static org.junit.Assert.*;
18 | 
19 | public class MultiValuedNodePropertyPolicyTest {
20 | 
21 |     @Test
22 |     public void shouldReturnStringPropertyValueIfPolicyIsLeaveAsString() {
23 | 
24 |         String value = "[\"toy\",\"electronics\",\"gifts\"]";
25 | 
26 |         PropertyValueParser parser = new PropertyValueParser(MultiValuedNodePropertyPolicy.LeaveAsString, "", false);
27 |         PropertyValue propertyValue = parser.parse(value);
28 | 
29 |         assertEquals("\"[\"\"toy\"\",\"\"electronics\"\",\"\"gifts\"\"]\"", propertyValue.value());
30 |         assertFalse(propertyValue.isMultiValued());
31 |     }
32 | 
33 |     @Test
34 |     public void shouldThrowExceptionIfPolicyIsHalt() {
35 |         String value = "[\"toy\",\"electronics\",\"gifts\"]";
36 | 
37 |         PropertyValueParser parser = new PropertyValueParser(MultiValuedNodePropertyPolicy.Halt, "", false);
38 | 
39 |         try {
40 |             parser.parse(value);
41 |             fail();
42 |         } catch (RuntimeException e) {
43 |             assertEquals("Halt: found multivalued node property value", e.getMessage());
44 |         }
45 |     }
46 | 
47 |     @Test
48 |     public void shouldReturnSetFormattedPropertyValueIfPolicyIsPutInSetIgnoringDuplicates() {
49 | 
50 |         String value = "[\"toy\",\"electronics\",\"gifts\"]";
51 | 
52 |         PropertyValueParser parser = new PropertyValueParser(MultiValuedNodePropertyPolicy.PutInSetIgnoringDuplicates, "", false);
53 |         PropertyValue propertyValue = parser.parse(value);
54 | 
55 |         assertEquals("electronics;toy;gifts", propertyValue.value());
56 |         assertTrue(propertyValue.isMultiValued());
57 |     }
58 | 
59 |     @Test
60 |     public void shouldThrowExceptionIfPolicyIsPutInSetButHaltIfDuplicates() {
61 |         String value = "[\"toy\",\"electronics\",\"gifts\",\"gifts\"]";
62 | 
63 |         PropertyValueParser parser = new PropertyValueParser(MultiValuedNodePropertyPolicy.PutInSetButHaltIfDuplicates, "", false);
64 | 
65 |         try {
66 |             parser.parse(value);
67 |             fail();
68 |         } catch (RuntimeException e) {
69 |             assertEquals("Halt: found multivalued node property value with duplicate values", e.getMessage());
70 |         }
71 |     }
72 | 
73 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Amazon Neptune Tools
 2 | 
 3 | Utilities to enable loading data and building graph applications with Amazon Neptune.
 4 | 
 5 | ### Examples
 6 | 
 7 | You may also be interested in the [Neptune Samples github repository](https://github.com/aws-samples/amazon-neptune-samples), which includes samples and example code.
 8 | 
 9 | ### GraphML 2 CSV
10 | This is a [utility](graphml2csv/README.md) to convert graphml files into the Neptune CSV format.
11 | 
12 | ### Export Neptune to Elasticsearch
13 | Backfills Elasticsearch with data from an existing Amazon Neptune database.
14 | 
15 | The [Neptune Full-text Search](https://docs.aws.amazon.com/neptune/latest/userguide/full-text-search-cfn-create.html) CloudFormation templates provide a mechanism for indexing all _new_ data that is added to an Amazon Neptune database in Elasticsearch. However, there are situations in which you may want to index _existing_ data in a Neptune database prior to enabling the full-text search integration.
16 | 
17 | You can use this [export Neptune to Elasticsearch solution](export-neptune-to-elasticsearch/) to index existing data in an Amazon Neptune database in Elasticsearch.
18 | 
19 | ### Neo4j to Neptune
20 | A [command-line utility](neo4j-to-neptune/readme.md) for migrating data to Neptune from Neo4j.
21 | 
22 | ### Glue Neptune
23 | 
24 | [glue-neptune](glue-neptune/) is a Python library for AWS Glue that helps writing data to Amazon Neptune from Glue jobs. With glue-neptune you can:
25 | * Get Neptune connection information from the Glue Data Catalog
26 | * Create label and node and edge ID columns in DynamicFrames, named in accordance with the Neptune CSV bulk load format for property graphs
27 | * Write from DynamicFrames directly to Neptune
28 | 
29 | ### Neptune CSV to RDF
30 | 
31 | If you're interested in converting Neptune's CSV format to RDF, see [amazon-neptune-csv-to-rdf-converter](https://github.com/aws/amazon-neptune-csv-to-rdf-converter).
32 | 
33 | ### Neptune CSV to Gremlin
34 | 
35 | [csv-gremlin](csv-gremlin/README.md) is a tool that can turn Amazon Neptune format CSV files into Gremlin steps allowing them to be loaded into different Apache TinkerPop compliant stores (including Amazon Neptune) using Gremlin queries. The tool also tries to validate that the CSV files do not contain errors and can be use to inspect CSV files prior to starting a bulk load.
36 | 
37 | ### CSV to Neptune Bulk Format CSV
38 | 
39 | [csv-to-neptune-bulk-format](csv-to-neptune-bulk-format/README.md) is a utility to identify nodes and edges in the source CSV data file(s) and generate the Amazon Neptune gremlin load data format files. A configuration file (JSON) defines the source and target files, nodes/edges definition, and selection logic. The script interprets one or more configuration files and generates Amazon Neptune gremlin load data format files. The generated files can be loaded into the Neptune database.
40 | 
41 | ### neptune-gremlin-js
42 | 
43 | A Javascript SDK for querying Neptune with gremlin.
44 | 
45 | ## License
46 | 
47 | This library is licensed under the Apache 2.0 License. 
48 | 


--------------------------------------------------------------------------------
/neptune-streams-utils/examples/streams-to-firehose/stream_handler.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates.
 2 | # All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License").
 5 | # You may not use this file except in compliance with the License.
 6 | # A copy of the License is located at
 7 | #
 8 | #    http://aws.amazon.com/apache2.0/
 9 | #
10 | # or in the "license" file accompanying this file.
11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions
13 | # and limitations under the License.
14 | 
15 | import json
16 | import logging
17 | import os
18 | import boto3
19 | import lambda_function
20 | from commons import *
21 | from handler import AbstractHandler, HandlerResponse
22 | 
23 | logger = logging.getLogger('StreamHandler')
24 | logger.setLevel(logging.INFO)
25 | 
26 | class StreamHandler(AbstractHandler):
27 | 
28 |     def handle_records(self, stream_log):
29 |         
30 |         params = json.loads(os.environ['AdditionalParams'])
31 |         delivery_stream_name = params['delivery_stream_name']
32 |         
33 |         client = boto3.client('firehose')
34 | 
35 |         records = stream_log[RECORDS_STR]
36 |         
37 |         last_op_num = None
38 |         last_commit_num = None    
39 |         count = 1
40 |         
41 |         firehose_records = []
42 |         
43 |         try:
44 |             for record in records:
45 |                 
46 |                 # Process record
47 |                 if count % 500 == 0:
48 |                     response = client.put_record_batch(
49 |                         DeliveryStreamName=delivery_stream_name,
50 |                         Records= firehose_records
51 |                     )
52 |                     logger.info(response)
53 |                     logger.info(len(firehose_records))
54 |                     firehose_records.clear()
55 |                 firehose_record = {
56 |                     "Data": '{}\n'.format(json.dumps(record))
57 |                 }
58 |                 firehose_records.append(firehose_record)
59 |                         
60 |                 # Update local checkpoint info
61 |                 last_op_num = record[EVENT_ID_STR][OP_NUM_STR]
62 |                 last_commit_num = record[EVENT_ID_STR][COMMIT_NUM_STR]
63 |                 count += 1
64 |                 
65 |             if len(firehose_records) > 0:
66 |                 logger.info(len(firehose_records))
67 |                 response = client.put_record_batch(
68 |                     DeliveryStreamName=delivery_stream_name,
69 |                     Records=firehose_records
70 |                 )
71 |                 logger.info(response)
72 |                    
73 |             
74 |         except Exception as e:
75 |             logger.error('Error occurred - {}'.format(str(e)))
76 |             raise e
77 |         finally:
78 |             try:
79 |                 yield HandlerResponse(last_op_num, last_commit_num, count)     
80 |             except Exception as e:
81 |                 logger.error('Error occurred - {}'.format(str(e)))
82 |                 raise e
83 | 
84 |             
85 |        
86 |         
87 |             
88 |         
89 | 
90 | 


--------------------------------------------------------------------------------
/release.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright 2020 Amazon.com, Inc. or its affiliates.
 4 | # All Rights Reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License").
 7 | # You may not use this file except in compliance with the License.
 8 | # A copy of the License is located at
 9 | #
10 | #    http://aws.amazon.com/apache2.0/
11 | #
12 | # or in the "license" file accompanying this file.
13 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
14 | # either express or implied. See the License for the specific language governing permissions
15 | # and limitations under the License.
16 | VERSION_FILE="VERSION"
17 | GIT_CMD=`which git`
18 | 
19 | GIT_BRANCH=`${GIT_CMD} branch | grep \* | cut -d' ' -f2`
20 | 
21 | if [ ! -f "$VERSION_FILE" ] ; then
22 | 	echo "Version file not present. Please create and retry."
23 | 	exit 1
24 | fi
25 | 
26 | if [ "$GIT_BRANCH" != "master" ] ; then
27 | 	echo "WARNING: Starting a release from a non-master branch."
28 | fi
29 | 
30 | let NEW_VERSION=`cat $VERSION_FILE`+1
31 | VERSION_STRING="1.${NEW_VERSION}"
32 | echo $NEW_VERSION > $VERSION_FILE
33 | 
34 | 
35 | echo "Starting release for $VERSION_STRING"
36 | 
37 | RELEASE_BRANCH="amazon-neptune-tools-$VERSION_STRING"
38 | 
39 | 
40 | echo "Creating new release branch:  $RELEASE_BRANCH"
41 | 
42 | $GIT_CMD checkout -b $RELEASE_BRANCH
43 | 
44 | #Utility script to build the jars to make a release.
45 | ARTIFACT_DIR=`pwd`/artifacts
46 | rm -rf $ARTIFACT_DIR
47 | mkdir -p $ARTIFACT_DIR
48 | MAVEN_ARTIFACTS="neo4j-to-neptune"
49 | for artifact in $MAVEN_ARTIFACTS; do
50 | 	pushd $artifact >& /dev/null
51 | 	mvn versions:set -DnewVersion=${VERSION_STRING} versions:update-child-modules
52 | 	mvn clean
53 | 	mvn install
54 | 	#All of the jars are shaded. Only take the shaded, bundled jars.
55 | 	for jar in `find . -name "*.jar" -print | grep -vE "SNAPSHOT|original|\-$VERSION_STRING"`; do
56 | 		cp $jar $ARTIFACT_DIR
57 | 	done
58 | 	popd >& /dev/null
59 | done
60 | 
61 | #Build the neptune-python-utils artifact
62 | pushd neptune-python-utils >& /dev/null
63 | ./build.sh
64 | cp target/neptune_python_utils.zip $ARTIFACT_DIR
65 | popd >& /dev/null
66 | 
67 | cp ./graphml2csv/graphml2csv.py $ARTIFACT_DIR
68 | 
69 | #drop-graph needs to be installed as a module
70 | #cp ./drop-graph/drop-graph.py $ARTIFACT_DIR
71 | 
72 | ${GIT_CMD} commit -a -m "POM version updates for $RELEASE_BRANCH"
73 | 
74 | echo "Creating Release Tag"
75 | 
76 | ${GIT_CMD} tag -a $RELEASE_BRANCH  -m "amazon-neptune-tools Release ${VERSION_STRING}"
77 | 
78 | repo=origin
79 | 
80 | echo "Pushing the release branch to $repo."
81 | ${GIT_CMD} push "${repo}" refs/heads/${RELEASE_BRANCH}
82 | 
83 | echo "Pushing the release tags to $repo."
84 | ${GIT_CMD} push "${repo}" refs/tags/${RELEASE_BRANCH}
85 | 
86 | #Update the VERSION on master
87 | 
88 | ${GIT_CMD} checkout master
89 | echo $NEW_VERSION > $VERSION_FILE
90 | ${GIT_CMD} pull $repo master
91 | ${GIT_CMD} commit -am "Incremented release version to `cat $VERSION_FILE`"
92 | ${GIT_CMD} push $repo master
93 | 
94 | #Return to the initial branch
95 | ${GIT_CMD} checkout ${GIT_BRANCH}
96 | 
97 | echo "To complete the release, upload the contents of artifacts/ to the ${RELEASE_BRANCH} tag on github: https://github.com/awslabs/amazon-neptune-tools/releases."
98 | 


--------------------------------------------------------------------------------
/neptune-streams-utils/examples/python3.8/stream_handler.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates.
 2 | # All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License").
 5 | # You may not use this file except in compliance with the License.
 6 | # A copy of the License is located at
 7 | #
 8 | #    http://aws.amazon.com/apache2.0/
 9 | #
10 | # or in the "license" file accompanying this file.
11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions
13 | # and limitations under the License.
14 | 
15 | import json
16 | import logging
17 | import os
18 | import lambda_function
19 | from commons import *
20 | from handler import AbstractHandler, HandlerResponse
21 | 
22 | from neptune_python_utils.gremlin_utils import GremlinUtils
23 | from neptune_python_utils.endpoints import Endpoints
24 | 
25 | logger = logging.getLogger('StreamHandler')
26 | logger.setLevel(logging.INFO)
27 | 
28 | '''
29 | This handler processes a batch of Neptune Stream events. 
30 | If the event represents the creation of a vertex or edge, the handler queries Neptune for the details of the element.
31 | The handler yields one HandlerResponse per batch of stream events.
32 | '''
33 | class StreamHandler(AbstractHandler):
34 | 
35 |     def handle_records(self, stream_log):
36 |         
37 |         params = json.loads(os.environ['AdditionalParams'])
38 |         
39 |         neptune_endpoint = params['neptune_cluster_endpoint']
40 |         neptune_port = params['neptune_port']
41 |         
42 |         GremlinUtils.init_statics(globals())
43 | 
44 |         endpoints = Endpoints(neptune_endpoint=neptune_endpoint, neptune_port=neptune_port)
45 |         gremlin_utils = GremlinUtils(endpoints)
46 | 
47 |         conn = gremlin_utils.remote_connection()
48 |         g = gremlin_utils.traversal_source(connection=conn)
49 | 
50 |         records = stream_log[RECORDS_STR]
51 |         
52 |         last_op_num = None
53 |         last_commit_num = None    
54 |         count = 0
55 |         
56 |         try:
57 |             for record in records:
58 |                 
59 |                 # Process record
60 |                 op = record[OPERATION_STR]
61 |                 data = record[DATA_STR]
62 |                 type = data['type']
63 |                 id = data['id']
64 |                 
65 |                 if op == ADD_OPERATION:
66 |                     if type == 'vl':
67 |                         logger.info(g.V(id).valueMap(True).toList())
68 |                     if type == 'e':
69 |                         logger.info(g.E(id).valueMap(True).toList())
70 |                         
71 |                 # Update local checkpoint info
72 |                 last_op_num = record[EVENT_ID_STR][OP_NUM_STR]
73 |                 last_commit_num = record[EVENT_ID_STR][COMMIT_NUM_STR]
74 |                 count += 1
75 |                    
76 |             
77 |         except Exception as e:
78 |             logger.error('Error occurred - {}'.format(str(e)))
79 |             raise e
80 |         finally:
81 |             try:
82 |                 conn.close()
83 |                 yield HandlerResponse(last_op_num, last_commit_num, count)     
84 |             except Exception as e:
85 |                 logger.error('Error occurred - {}'.format(str(e)))
86 |                 raise e
87 |             finally:
88 |                 conn.close()
89 |             
90 |        
91 |         
92 |             
93 |         
94 | 
95 | 


--------------------------------------------------------------------------------
/export-neptune-to-elasticsearch/lambda/export_neptune_to_kinesis.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | #  
 3 | #  Licensed under the Apache License, Version 2.0 (the "License").
 4 | #  You may not use this file except in compliance with the License.
 5 | #  A copy of the License is located at
 6 | #  
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #  
 9 | #  or in the "license" file accompanying this file. This file is distributed 
10 | #  on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
11 | #  express or implied. See the License for the specific language governing 
12 | #  permissions and limitations under the License.
13 | 
14 | import json
15 | import os
16 | import boto3
17 | import logging
18 | from datetime import datetime
19 | 
20 | client = boto3.client('batch')
21 | 
22 | logger = logging.getLogger()
23 | logger.setLevel(logging.INFO)
24 | 
25 | def trigger_neptune_export():
26 | 
27 |     neptune_export_jar_uri = os.environ['NEPTUNE_EXPORT_JAR_URI']
28 |     neptune_endpoint = os.environ['NEPTUNE_ENDPOINT']
29 |     neptune_port = os.environ['NEPTUNE_PORT']
30 |     neptune_engine = os.environ['NEPTUNE_ENGINE']
31 |     stream_name = os.environ['STREAM_NAME']
32 |     job_suffix = os.environ['JOB_SUFFIX']
33 |     region = os.environ['AWS_REGION']
34 |     concurrency = os.environ['CONCURRENCY']
35 |     scope = os.environ['EXPORT_SCOPE']
36 |     additional_params = os.environ['ADDITIONAL_PARAMS']
37 |     clone_cluster = os.environ.get('CLONE_CLUSTER')
38 | 
39 |     if additional_params:
40 |         additional_params = additional_params if additional_params.startswith(' ') else ' {}'.format(additional_params)
41 |     else:
42 |         additional_params = ''
43 |         
44 |     use_iam_auth = '' if neptune_engine == 'sparql' else ' --use-iam-auth' 
45 |     export_command = 'export-pg' if neptune_engine == 'gremlin' else 'export-rdf'
46 |     concurrency_param = ' --concurrency {}'.format(concurrency) if neptune_engine == 'gremlin' else ''
47 |     scope_param = ' --scope {}'.format(scope) if neptune_engine == 'gremlin' else ''
48 |     clone_cluster_param = ' --clone-cluster' if clone_cluster and clone_cluster.lower() == 'true' else ''
49 |             
50 |     command = 'df -h && rm -rf neptune-export.jar && wget {} -nv && export SERVICE_REGION="{}" && java -Xms16g -Xmx16g -jar neptune-export.jar {} -e {} -p {} -d /neptune/results --output stream --stream-name {} --region {} --format neptuneStreamsJson --use-ssl{}{}{}{}{}'.format(
51 |         neptune_export_jar_uri, 
52 |         region,
53 |         export_command, 
54 |         neptune_endpoint, 
55 |         neptune_port,
56 |         stream_name, 
57 |         region,
58 |         use_iam_auth,
59 |         concurrency_param,
60 |         scope_param,
61 |         clone_cluster_param,
62 |         additional_params)
63 |         
64 |     logger.info('Command: {}'.format(command))
65 |     
66 |     submit_job_response = client.submit_job(
67 |         jobName='export-neptune-to-kinesis-{}-{}'.format(job_suffix, round(datetime.utcnow().timestamp() * 1000)),
68 |         jobQueue='export-neptune-to-kinesis-queue-{}'.format(job_suffix),
69 |         jobDefinition='export-neptune-to-kinesis-job-{}'.format(job_suffix),
70 |         containerOverrides={
71 |             'command': [
72 |                 'sh',
73 |                 '-c',
74 |                 command
75 |             ]
76 |         }
77 |     )
78 |     
79 |     return submit_job_response
80 | 
81 | def lambda_handler(event, context):
82 |     
83 |     result = trigger_neptune_export()
84 |     
85 |     job_name = result['jobName']
86 |     job_id = result['jobId']
87 |     
88 |     return {
89 |             'jobName': job_name,
90 |             'jobId': job_id
91 |         }
92 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check [existing open](https://github.com/awslabs/amazon-neptune-tools/issues), or [recently closed](https://github.com/awslabs/amazon-neptune-tools/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already 
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *master* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels ((enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/awslabs/amazon-neptune-tools/labels/help%20wanted) issues is a great place to start. 
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](https://github.com/awslabs/amazon-neptune-tools/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 
61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.
62 | 


--------------------------------------------------------------------------------
/neptune-gremlin-js/cdk-test-app/lib/cdk-test-app-stack.js:
--------------------------------------------------------------------------------
  1 | const { Stack } = require("aws-cdk-lib")
  2 | const cdk = require("aws-cdk-lib")
  3 | const ec2 = require("aws-cdk-lib/aws-ec2")
  4 | const lambda = require("aws-cdk-lib/aws-lambda")
  5 | const neptune = require("@aws-cdk/aws-neptune-alpha")
  6 | 
  7 | class CdkTestAppStack extends Stack {
  8 |     /**
  9 |      *
 10 |      * @param {Construct} scope
 11 |      * @param {string} id
 12 |      * @param {StackProps=} props
 13 |      */
 14 |     constructor(scope, id, props) {
 15 |         super(scope, id, props)
 16 | 
 17 |         // Create a dedicated VPC for the cluster
 18 |         const vpc = new ec2.Vpc(this, "vpc")
 19 | 
 20 |         // Cluster parameter group
 21 |         const clusterParameterGroup = new neptune.ClusterParameterGroup(this,
 22 |             "ClusterParams",
 23 |             {
 24 |                 description: "Cluster parameter group",
 25 |                 parameters: {
 26 |                     neptune_enable_audit_log: "1",
 27 |                 },
 28 |             },
 29 |         )
 30 | 
 31 |         // Db parameter group
 32 |         const parameterGroup = new neptune.ParameterGroup(this, "DbParams", {
 33 |             description: "Db parameter group",
 34 |             parameters: {
 35 |                 neptune_query_timeout: "10000",
 36 |             },
 37 |         })
 38 | 
 39 |         // Create the security group for the cluster
 40 |         const clusterSecurityGroup = new ec2.SecurityGroup(this, "ClusterSG", {
 41 |             vpc: vpc,
 42 |             description: "Neptune Gremlin Test Security Group",
 43 |         })
 44 | 
 45 |         // Create the cluster
 46 |         const cluster = new neptune.DatabaseCluster(this, "cluster", {
 47 |             vpc: vpc,
 48 |             instanceType: neptune.InstanceType.T3_MEDIUM,
 49 |             clusterParameterGroup,
 50 |             parameterGroup,
 51 |             backupRetention: cdk.Duration.days(7),
 52 |             deletionProtection: true,
 53 |             securityGroups: [clusterSecurityGroup],
 54 |         })
 55 | 
 56 |         // Output the writer endpoint host:port
 57 |         new cdk.CfnOutput(this, "WriteEndpointOutput", {
 58 |             value: cluster.clusterEndpoint.socketAddress,
 59 |         })
 60 | 
 61 |         // Create a security group for the lambda function
 62 |         const lambdaSecurityGroup = new ec2.SecurityGroup(this, "LambdaSG", {
 63 |             vpc: vpc,
 64 |             description: "Neptune Gremlin Test Lambda Security Group",
 65 |         })
 66 | 
 67 |         // Add an ingress rule to the cluster's security group from the lambda sg
 68 |         const port = cluster.clusterEndpoint.port
 69 |         clusterSecurityGroup.addIngressRule(
 70 |             lambdaSecurityGroup, ec2.Port.tcp(port))
 71 | 
 72 |         // Environment variables for the lambda function
 73 |         const envVars = {
 74 |             "NEPTUNE_ENDPOINT": cluster.clusterEndpoint.hostname,
 75 |             "NEPTUNE_PORT": cluster.clusterEndpoint.port,
 76 |             "USE_IAM": "true",
 77 |             "USE_AWS4": "true",
 78 |         }
 79 | 
 80 |         // Create the integration test Lambda
 81 |         const testLambda = new lambda.Function(this, "neptune-gremlin-test", {
 82 |             runtime: lambda.Runtime.NODEJS_14_X,
 83 |             code: lambda.Code.fromAsset("lambda"),
 84 |             handler: "integration-test.handler",
 85 |             vpc: vpc,
 86 |             timeout: cdk.Duration.seconds(10),
 87 |             memorySize: 1536,
 88 |             environment: envVars,
 89 |             securityGroups: [lambdaSecurityGroup],
 90 |         })
 91 | 
 92 |         // Give the lambda function access to the cluster
 93 |         cluster.grantConnect(testLambda)
 94 |     }
 95 | 
 96 | }
 97 | 
 98 | 
 99 | module.exports = { CdkTestAppStack }
100 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/PropertyValueParser.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3 | Licensed under the Apache License, Version 2.0 (the "License").
  4 | You may not use this file except in compliance with the License.
  5 | A copy of the License is located at
  6 |     http://www.apache.org/licenses/LICENSE-2.0
  7 | or in the "license" file accompanying this file. This file is distributed
  8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
  9 | express or implied. See the License for the specific language governing
 10 | permissions and limitations under the License.
 11 | */
 12 | 
 13 | package com.amazonaws.services.neptune.metadata;
 14 | 
 15 | import com.fasterxml.jackson.core.JsonProcessingException;
 16 | import com.fasterxml.jackson.databind.JsonNode;
 17 | import com.fasterxml.jackson.databind.ObjectMapper;
 18 | import com.fasterxml.jackson.databind.node.ArrayNode;
 19 | 
 20 | import java.util.HashSet;
 21 | import java.util.Set;
 22 | 
 23 | public class PropertyValueParser {
 24 | 
 25 |     private static final ObjectMapper MAPPER = new ObjectMapper();
 26 | 
 27 |     private final PropertyValueParserPolicy policy;
 28 |     private final String semicolonReplacement;
 29 |     private final boolean inferType;
 30 | 
 31 |     public PropertyValueParser(PropertyValueParserPolicy policy, String semicolonReplacement, boolean inferType) {
 32 |         this.policy = policy;
 33 |         this.semicolonReplacement = semicolonReplacement;
 34 |         this.inferType = inferType;
 35 |     }
 36 | 
 37 |     public PropertyValue parse(String s){
 38 |         if (isArrayCandidate(s)){
 39 |             try {
 40 |                 JsonNode jsonNode = MAPPER.readTree(s);
 41 |                 if (isArray(jsonNode)){
 42 |                     return policy.handleArray(s, (ArrayNode) jsonNode, this);
 43 |                 } else {
 44 |                     return stringValue(s);
 45 |                 }
 46 |             } catch (JsonProcessingException e) {
 47 |                 return stringValue(s);
 48 |             }
 49 |         } else {
 50 |             return stringValue(s);
 51 |         }
 52 |     }
 53 | 
 54 |     PropertyValue parseArrayValue(String s, ArrayNode arrayNode) {
 55 |         Set<String> values = new HashSet<>();
 56 |         for (JsonNode node : arrayNode) {
 57 |             values.add(format(node.asText().replace(";", semicolonReplacement)));
 58 |         }
 59 |         if (values.size() < arrayNode.size()){
 60 |             policy.handleDuplicates(s, arrayNode, this);
 61 |         }
 62 |         return arrayValue(values);
 63 |     }
 64 | 
 65 |     PropertyValue stringValue(String s){
 66 |         DataType dataType = inferType ? DataType.identifyType(s) : DataType.None;
 67 |         return new PropertyValue(format(s), false, dataType);
 68 |     }
 69 | 
 70 |     private PropertyValue arrayValue(Set<String> values) {
 71 | 
 72 |         DataType dataType = DataType.None;
 73 | 
 74 |         if (inferType){
 75 |             for (String value : values) {
 76 |                 dataType = DataType.getBroadestType(dataType, DataType.identifyType(value));
 77 |             }
 78 |         }
 79 | 
 80 |         return new PropertyValue(String.join(";", values), true, dataType);
 81 |     }
 82 | 
 83 |     private static boolean isArrayCandidate(String s) {
 84 |         return s.startsWith("[") && s.endsWith("]");
 85 |     }
 86 | 
 87 |     private static boolean isArray(JsonNode jsonNode) {
 88 |         return jsonNode.isArray();
 89 |     }
 90 | 
 91 |     private static String format(String s){
 92 | 
 93 |         if (s.contains("\"")){
 94 |             s = s.replace("\"", "\"\"");
 95 |         }
 96 | 
 97 |         if (s.contains("\"") || s.contains(",") || s.contains(System.lineSeparator())){
 98 |             s = String.format("\"%s\"", s);
 99 |         }
100 | 
101 |         return s;
102 |     }
103 | 
104 | }
105 | 


--------------------------------------------------------------------------------
/neo4j-to-neptune/docs/bulk-load-config.md:
--------------------------------------------------------------------------------
  1 | # Bulk Load Configuration
  2 | 
  3 | The `convert-csv` utility supports automated bulk loading of converted CSV data directly into Amazon Neptune using the `--bulk-load-config` parameter.
  4 | 
  5 | ## Usage
  6 | 
  7 | Use the `--bulk-load-config` parameter to specify a YAML file containing the bulk load configuration:
  8 | 
  9 | ```bash
 10 | java -jar neo4j-to-neptune.jar convert-csv \
 11 |   -i /tmp/neo4j-export.csv \
 12 |   -d output \
 13 |   --bulk-load-config bulk-load.yaml
 14 | ```
 15 | 
 16 | ## YAML Configuration Format
 17 | 
 18 | The configuration file should be in YAML format, using camelCase:
 19 | 
 20 | ```yaml
 21 | # Required parameters
 22 | bucketName: my-neptune-data-bucket
 23 | neptuneEndpoint: my-cluster.cluster-abc123.us-east-1.neptune.amazonaws.com
 24 | iamRoleArn: arn:aws:iam::123456789012:role/NeptuneLoadFromS3Role
 25 | 
 26 | # Optional parameters
 27 | s3Prefix: neptune
 28 | parallelism: OVERSUBSCRIBE
 29 | monitor: true
 30 | ```
 31 | 
 32 | ## Configuration Parameters
 33 | 
 34 | ### Required Parameters
 35 | 
 36 | - **`bucketName`**: S3 bucket name for CSV file storage
 37 | - **`neptuneEndpoint`**: Neptune cluster endpoint URL
 38 | - **`iamRoleArn`**: IAM role ARN with S3 and Neptune permissions
 39 | 
 40 | ### Optional Parameters
 41 | 
 42 | - **`s3Prefix`**: S3 prefix for uploaded files
 43 | - **`parallelism`**: Load parallelism level - `LOW`, `MEDIUM`, `HIGH`, `OVERSUBSCRIBE` (default: `OVERSUBSCRIBE`)
 44 | - **`monitor`**: Monitor load progress until completion (default: `false`)
 45 | 
 46 | ## Command Line Override
 47 | 
 48 | Individual CLI parameters can override configuration file values:
 49 | 
 50 | ```bash
 51 | java -jar neo4j-to-neptune.jar convert-csv \
 52 |   -i /tmp/neo4j-export.csv \
 53 |   -d output \
 54 |   --bulk-load-config bulk-load.yaml \
 55 |   --bucket-name override-bucket \
 56 |   --parallelism HIGH
 57 | ```
 58 | 
 59 | Available override parameters:
 60 | - `--bucket-name`
 61 | - `--s3-prefix`
 62 | - `--neptune-endpoint`
 63 | - `--iam-role-arn`
 64 | - `--parallelism`
 65 | - `--monitor`
 66 | 
 67 | ## Behavior
 68 | 
 69 | - **Optional**: Bulk loading only occurs when `--bulk-load-config` or `--neptune-endpoint` is provided
 70 | - **Validation**: All required parameters are validated before conversion begins
 71 | - **Process**: Conversion happens first, then files are uploaded to S3 and bulk load is initiated
 72 | - **Monitoring**: When enabled, the tool waits and reports progress until completion
 73 | 
 74 | ## Example Output
 75 | 
 76 | ```
 77 | Vertices: 171
 78 | Edges   : 253
 79 | Output  : output/1751656971039
 80 | output/1751656971039
 81 | 
 82 | Completed in x second(s)
 83 | S3 Bucket: my-bucket
 84 | S3 Prefix: neptune
 85 | AWS Region: us-east-2
 86 | IAM Role ARN: arn:aws:iam::123456789000:role/NeptunePolicy
 87 | Neptune Endpoint: my-neptune-db.cluster-xxxxxxxxxxxx.us-east-2.neptune.amazonaws.com
 88 | Bulk Load Parallelism: MEDIUM
 89 | Uploading Gremlin load data to S3...
 90 | Starting async upload of files from /tmp/output/1751656971039 to s3://my-bucket/neptune/1751656971039
 91 | Starting async upload of /tmp/output/1751656971039/vertices.csv to s3://my-bucket/neptune/1751656971039/vertices.csv
 92 | Starting async upload of /tmp/output/1751656971039/edges.csv to s3://my-bucket/neptune/1751656971039/edges.csv
 93 | Successfully uploaded vertices.csv - ETag: "abc123..."
 94 | Successfully uploaded edges.csv - ETag: "def456..."
 95 | Successfully uploaded 2 files from /tmp/output/1751656971039
 96 | Files uploaded successfully to S3. Files available at: s3://my-bucket/neptune/1751656971039/
 97 | Starting Neptune bulk load...
 98 | Testing connectivity to Neptune endpoint...
 99 | Successful connected to Neptune. Status: 200 healthy
100 | Neptune bulk load started successfully! Load ID: 12345678-1234-1234-1234-123456789012
101 | Monitoring load progress for job: 12345678-1234-1234-1234-123456789012
102 | Neptune bulk load status: LOAD_IN_PROGRESS
103 | Neptune bulk load status: LOAD_IN_PROGRESS
104 | Neptune bulk load completed with status: LOAD_COMPLETED
105 | ```
106 | 


--------------------------------------------------------------------------------
/neptune-python-utils/neptune_python_utils/glue_gremlin_client.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates.
 2 | # All Rights Reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License").
 5 | # You may not use this file except in compliance with the License.
 6 | # A copy of the License is located at
 7 | #
 8 | #    http://aws.amazon.com/apache2.0/
 9 | #
10 | # or in the "license" file accompanying this file.
11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
12 | # either express or implied. See the License for the specific language governing permissions
13 | # and limitations under the License.
14 | 
15 | from neptune_python_utils.batch_utils import BatchUtils
16 | 
17 | class GlueGremlinClient:
18 |     
19 |     def __init__(self, endpoints, job_name=None, **kwargs): 
20 |         self.endpoints = endpoints
21 |         self.job_name = job_name
22 |         self.kwargs = kwargs
23 |         
24 |     def __execute_batch(self, f, pool_size=1):
25 |         print('endpoints: {}'.format(self.endpoints))
26 |         print('job_name: {}'.format(self.job_name))
27 |         print('pool_size: {}'.format(pool_size))
28 |         print('kwargs: {}'.format(self.kwargs))
29 |         def execute_batch(rows):
30 |             batch_utils = None
31 |             try:
32 |                 batch_utils = BatchUtils(self.endpoints, job_name=self.job_name, to_dict=lambda x: x.asDict(), pool_size=pool_size, **self.kwargs)
33 |                 return f(batch_utils, rows)
34 |             finally:
35 |                 if batch_utils:
36 |                     batch_utils.close()
37 |         return execute_batch        
38 |         
39 |     def add_vertices(self, label, batch_size=1, pool_size=1, **kwargs):
40 |         """Adds a vertex with the supplied label for each row in a DataFrame partition.
41 |         If the DataFrame contains an '~id' column, the values in this column will be treated as user-supplied IDs for the new vertices.
42 |         If the DataFrame does not have an '~id' column, Neptune will autogenerate a UUID for each vertex. 
43 |         
44 |         Example:
45 |         >>> dynamicframe.toDF().foreachPartition(neptune.add_vertices('Product'))
46 |         """
47 |         
48 |         return self.__execute_batch(lambda b, rows: b.add_vertices(batch_size=batch_size, rows=rows, label=label, **kwargs), pool_size=pool_size)        
49 |        
50 |         
51 |     def upsert_vertices(self, label, batch_size=1, pool_size=1, **kwargs):
52 |         """Conditionally adds vertices for the rows in a DataFrame partition using the Gremlin coalesce() idiom.
53 |         The DataFrame must contain an '~id' column. 
54 |         
55 |         Example:
56 |         >>> dynamicframe.toDF().foreachPartition(neptune.upsert_vertices('Product'))
57 |         """
58 |         
59 |         return self.__execute_batch(lambda b, rows: b.upsert_vertices(batch_size=batch_size, rows=rows, label=label, **kwargs), pool_size=pool_size) 
60 | 
61 |     def add_edges(self, label, batch_size=1, pool_size=1, **kwargs):
62 |         """Adds an edge with the supplied label for each row in a DataFrame partition.
63 |         If the DataFrame contains an '~id' column, the values in this column will be treated as user-supplied IDs for the new edges.
64 |         If the DataFrame does not have an '~id' column, Neptune will autogenerate a UUID for each edge. 
65 |         
66 |         Example:
67 |         >>> dynamicframe.toDF().foreachPartition(neptune.add_edges('ORDER_DETAIL'))
68 |         """
69 |         
70 |         return self.__execute_batch(lambda b, rows: b.add_edges(batch_size=batch_size, rows=rows, label=label, **kwargs), pool_size=pool_size) 
71 |         
72 |     def upsert_edges(self, label, batch_size=1, pool_size=1, **kwargs):
73 |         """Conditionally adds edges for the rows in a DataFrame partition using the Gremlin coalesce() idiom.
74 |         The DataFrame must contain '~id', '~from', '~to' and '~label' columns. 
75 |         
76 |         Example:
77 |         >>> dynamicframe.toDF().foreachPartition(neptune.upsert_edges('ORDER_DETAIL'))
78 |         """
79 |         
80 |         return self.__execute_batch(lambda b, rows: b.upsert_edges(batch_size=batch_size, rows=rows, label=label, **kwargs), pool_size=pool_size) 
81 |         
82 |         


--------------------------------------------------------------------------------
/neptune-streams-utils/examples/java8/src/main/java/stream_handler/StreamHandler.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3 | Licensed under the Apache License, Version 2.0 (the "License").
  4 | You may not use this file except in compliance with the License.
  5 | A copy of the License is located at
  6 |     http://www.apache.org/licenses/LICENSE-2.0
  7 | or in the "license" file accompanying this file. This file is distributed
  8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
  9 | express or implied. See the License for the specific language governing
 10 | permissions and limitations under the License.
 11 | */
 12 | 
 13 | package stream_handler;
 14 | 
 15 | import com.amazonaws.neptune.StreamsRecord;
 16 | import com.amazonaws.neptune.StreamsResponse;
 17 | import com.amazonaws.neptune.config.CredentialsConfig;
 18 | import org.apache.tinkerpop.gremlin.driver.Cluster;
 19 | import org.apache.tinkerpop.gremlin.driver.SigV4WebSocketChannelizer;
 20 | import org.apache.tinkerpop.gremlin.driver.remote.DriverRemoteConnection;
 21 | import org.apache.tinkerpop.gremlin.driver.ser.Serializers;
 22 | import org.apache.tinkerpop.gremlin.process.traversal.AnonymousTraversalSource;
 23 | import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
 24 | import utils.EnvironmentVariablesUtils;
 25 | 
 26 | import java.io.IOException;
 27 | import java.util.Map;
 28 | 
 29 | public class StreamHandler extends AbstractStreamHandler {
 30 | 
 31 |     private final GraphTraversalSource g;
 32 |     private final Cluster cluster;
 33 | 
 34 |     public StreamHandler(String neptuneEndpoint,
 35 |                          Integer neptunePort,
 36 |                          CredentialsConfig credentialsConfig,
 37 |                          Map<String, Object> additionalParams) {
 38 |         super(neptuneEndpoint, neptunePort, credentialsConfig, additionalParams);
 39 | 
 40 |         this.cluster = createCluster();
 41 |         this.g = AnonymousTraversalSource
 42 |                 .traversal()
 43 |                 .withRemote(DriverRemoteConnection.using(cluster));
 44 |     }
 45 | 
 46 |     @Override
 47 |     public void handleRecords(StreamsResponse streamsResponse) throws IOException {
 48 | 
 49 |         StreamsResponse.LastEventId lastEventId = new StreamsResponse.LastEventId();
 50 |         int recordCount = 0;
 51 | 
 52 |         for (StreamsRecord record : streamsResponse.getRecords()) {
 53 | 
 54 |             String op = record.getOp();
 55 | 
 56 |             if (op.equals("ADD")) {
 57 |                 String id = record.getData().getId();
 58 |                 String type = record.getData().getType();
 59 |                 if (type.equals("vl")) {
 60 |                     System.out.println(g.V(id).valueMap(true).toList());
 61 |                 } else if (type.equals("e")) {
 62 |                     System.out.println(g.E(id).valueMap(true).toList());
 63 |                 }
 64 |             }
 65 | 
 66 |             StreamsRecord.EventId eventId = record.getEventId();
 67 | 
 68 |             lastEventId.setCommitNum(eventId.getCommitNum());
 69 |             lastEventId.setOpNum(record.getEventId().getOpNum());
 70 | 
 71 |             recordCount++;
 72 |         }
 73 | 
 74 |         streamsResponse.setLastEventId(lastEventId);
 75 |         streamsResponse.setTotalRecords(recordCount);
 76 | 
 77 |     }
 78 | 
 79 |     @Override
 80 |     public void close() throws IOException {
 81 |         cluster.close();
 82 |     }
 83 | 
 84 |     private Cluster createCluster() {
 85 |         Cluster.Builder builder = Cluster.build()
 86 |                 .addContactPoint(String.valueOf(additionalParams.get("neptune_cluster_endpoint")))
 87 |                 .port((int) additionalParams.get("neptune_port"))
 88 |                 .enableSsl(true)
 89 |                 .minConnectionPoolSize(1)
 90 |                 .maxConnectionPoolSize(1)
 91 |                 .serializer(Serializers.GRAPHBINARY_V1D0)
 92 |                 .reconnectInterval(2000);
 93 | 
 94 |         if (Boolean.parseBoolean(EnvironmentVariablesUtils.getOptionalEnv("iam_auth_enabled", "false"))) {
 95 |             builder = builder.channelizer(SigV4WebSocketChannelizer.class);
 96 |         }
 97 | 
 98 |         return builder.create();
 99 |     }
100 | }
101 | 


--------------------------------------------------------------------------------
/csv-to-neptune-bulk-format/notebooks/Spotify-Data-Query.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%%gremlin\n",
 10 |     "g.V().groupCount().by(label).unfold()"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "%%gremlin\n",
 20 |     "g.E().groupCount().by(label).unfold()"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "%%gremlin\n",
 30 |     "\n",
 31 |     "//count for each artist\n",
 32 |     "\n",
 33 |     "g.V().hasLabel('Artist').as('art')\n",
 34 |     ".in('BY_ARTIST').select('art')\n",
 35 |     ".groupCount().by('name').order(local).by(values,desc).unfold()"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "%%gremlin \n",
 45 |     "// given an artist, find tracks\n",
 46 |     "\n",
 47 |     "g.V()\n",
 48 |     ".has('name', 'Martin Garrix')\n",
 49 |     ".hasLabel('Artist').as('art')\n",
 50 |     ".in('BY_ARTIST').as('trk')\n",
 51 |     ".valueMap('track_name')\n"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "%%gremlin \n",
 61 |     "// given an artist, find Albums\n",
 62 |     "\n",
 63 |     "g.V()\n",
 64 |     ".has('name', 'Martin Garrix')\n",
 65 |     ".hasLabel('Artist').as('art')\n",
 66 |     ".in('BY_ARTIST').as('trk')\n",
 67 |     ".out('IN_ALBUM').as('alb')\n",
 68 |     ".order().by('name')\n",
 69 |     ".dedup()\n",
 70 |     ".valueMap(true)\n"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "%%gremlin\n",
 80 |     "// find playlists for Genre\n",
 81 |     "g.V().hasLabel('Genre').has('name', 'pop')\n",
 82 |     ".in('HAS_GENRE')\n",
 83 |     ".valueMap('name')"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "# Validate Notebook configuration\n",
 91 |     "Execute `%graph_notebook_config` to find the configuration\n",
 92 |     "\n",
 93 |     "The \"auth_mode\" should be \"IAM\", if it is \"DEFAULT\"\n",
 94 |     "\n",
 95 |     "    Execute `%%graph_notebook_config`\n",
 96 |     "        with output from `%graph_notebook_config` copied over, and\n",
 97 |     "        replacing \"auth_mode\" to \"IAM\"\n",
 98 |     "\n",
 99 |     "Execute `%status` to check the connectivity\n",
100 |     "\n",
101 |     "The \"status\" should be \"healthy\"\n"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": [
110 |     "%status"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "%graph_notebook_config"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "%%graph_notebook_config\n",
129 |     "{\n",
130 |     "  \"host\": \"<your-db-host-name>\",\n",
131 |     "  \"port\": 8182,\n",
132 |     "  \"auth_mode\": \"IAM\",\n",
133 |     "  \"iam_credentials_provider_type\": \"ROLE\",\n",
134 |     "  \"load_from_s3_arn\": \"arn:aws:iam::<your-aws-account>:role/<role-that-allows-reading-from-s3-to-sagemaker>\",\n",
135 |     "  \"ssl\": true,\n",
136 |     "  \"aws_region\": \"<your-region>\",\n",
137 |     "  \"sparql\": {\n",
138 |     "    \"endpoint_prefix\": \"\"\n",
139 |     "  }\n",
140 |     "}"
141 |    ]
142 |   }
143 |  ],
144 |  "metadata": {
145 |   "kernelspec": {
146 |    "display_name": "Python 3",
147 |    "language": "python",
148 |    "name": "python3"
149 |   },
150 |   "language_info": {
151 |    "codemirror_mode": {
152 |     "name": "ipython",
153 |     "version": 3
154 |    },
155 |    "file_extension": ".py",
156 |    "mimetype": "text/x-python",
157 |    "name": "python",
158 |    "nbconvert_exporter": "python",
159 |    "pygments_lexer": "ipython3",
160 |    "version": "3.6.12"
161 |   }
162 |  },
163 |  "nbformat": 4,
164 |  "nbformat_minor": 4
165 | }


--------------------------------------------------------------------------------
/neptune-gremlin-js/README.md:
--------------------------------------------------------------------------------
  1 | # neptune-gremlin
  2 | 
  3 | The `neptune-gremlin` package is an SDK for querying an Amazon Neptune graph
  4 | database using gremlin. Amazon Neptune is is a fast, reliable, fully managed
  5 | graph database service that makes it easy to build and run applications. It
  6 | allows you to build connections between identities, build knowledge graphs,
  7 | detect fraud patterns, and make predictions. It can also simply be used as a
  8 | general purpose database, which is made easier by this package.
  9 | 
 10 | The source for this package includes an AWS CDK application that creates a Neptune 
 11 | cluster and a Lambda function in the same VPC to facilitate integration testing.
 12 | 
 13 | *NOTICE* _This is an experimental package that is not supported in any way by AWS. 
 14 | Please do not use it for mission critical production workloads!_
 15 | 
 16 | ## Installation
 17 | 
 18 | ```sh
 19 | npm install neptune-gremlin
 20 | ```
 21 | 
 22 | ## Usage
 23 | 
 24 | ### Connect to Neptune:
 25 | 
 26 | ```Javascript
 27 | const gremlin = require("neptune-gremlin")
 28 | 
 29 | // Get configuration values from the environment
 30 | const host = process.env.NEPTUNE_ENDPOINT
 31 | const port = process.env.NEPTUNE_PORT
 32 | const useIam = process.env.USE_IAM === "true"
 33 | 
 34 | // Create a new connection to the Neptune database
 35 | const connection = new gremlin.Connection(host, port, {useIam})
 36 | await connection.connect()
 37 | ```
 38 | 
 39 | ### Save a node (vertex):
 40 | 
 41 | ```Javascript
 42 | const node1 = {
 43 |     "unique-id-1",
 44 |     properties: {
 45 |         name: "Test Node",
 46 |         a: "A",
 47 |         b: "B",
 48 |     },
 49 |     labels: ["label1", "label2"],
 50 | }
 51 | await connection.saveNode(node1)
 52 | ```
 53 | 
 54 | ### Save an edge
 55 | 
 56 | ```JavaScript
 57 | 
 58 | const edge1 = {
 59 |     id: uuid.v4(),
 60 |     label: "points_to", 
 61 |     to: node2.id, 
 62 |     from: node1.id,
 63 |     properties: {
 64 |         "a": "b",
 65 |     },
 66 | }
 67 | 
 68 | await connection.saveEdge(edge1)
 69 | ```
 70 | 
 71 | ### Get all nodes and edges in the graph
 72 | 
 73 | ```JavaScript
 74 | const searchResult = await connection.search({})
 75 | ```
 76 | 
 77 | ### Run a custom traversal:
 78 | 
 79 | ```Javascript
 80 | const f = (g) => {
 81 |     return await g.V()
 82 |         .has("person", "name", "Eric")
 83 |         .bothE().bothV().dedup()
 84 |         .valueMap(true).toList()
 85 | }
 86 | const result = await connection.query(f)
 87 | ```
 88 | 
 89 | ### Partition the graph
 90 | 
 91 | A Neptune cluster does not have a native paritioning concept. All nodes are in the same database. 
 92 | Gremlin has a feature called a partition strategy that adds a property to each node and edge 
 93 | automatically to segment your graph into different sub graphs.
 94 | 
 95 | All you have to do with this library is set the partition on the connection:
 96 | 
 97 | ```JavaScript
 98 | connection.setPartition("test_partition")
 99 | ```
100 | 
101 | All subsequent calls using that connection will have the `_partition` property added by default.
102 | 
103 | ## Development
104 | 
105 | ### Building the project
106 | 
107 | This package is all Javascript, so the build script just runs eslint, copies `neptune-gremlin.js`
108 | into the cdk app's lambda folder, and then synthesizes the cdk app.
109 | 
110 | ```sh
111 | npm run build
112 | ```
113 | 
114 | ### Sample application
115 | 
116 | There is a sample app at [https://github.com/aws-samples/cdk-neptune-knowledge-graph](https://github.com/aws-samples/cdk-neptune-knowledge-graph) where you can see how to incorporate this library into a REST API.
117 | 
118 | ### Making changes
119 | 
120 | If you want to make a change or an addition to the package (contributions welcome!), please 
121 | add a test to `cdk-test-app/lambda/integration-test.js`. Deploy the stack to your AWS account 
122 | and invoke the function to make sure everything works as expected.
123 | 
124 | ### Deploying the cdk test app
125 | 
126 | Make sure you run the top level build script, since it copies the latest `neptune-gremlin.js` 
127 | file into the cdk app's lambda folder.
128 | 
129 | Also keep in mind that the very first call to a new Neptune cluster tends to fail with a 500, 
130 | so if that happens, just try again.
131 | 
132 | _Note that this app will result in charges in your AWS account! Be sure to destroy the stack
133 | when you are done!_
134 | 
135 | ```sh
136 | npm run build
137 | cd cdk-test-app
138 | npm install
139 | cd lambda
140 | npm install
141 | cd ..
142 | npx cdk bootstrap
143 | npx cdk synth
144 | npx cdk diff
145 | npx cdk deploy
146 | ```
147 | 
148 | ### Cleaning up the CDK stack to avoid charges to your AWS account
149 | 
150 | ```sh
151 | cdk destroy
152 | ```
153 | 


--------------------------------------------------------------------------------
/csv-to-neptune-bulk-format/notebooks/Prepare-Data-Spotify.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "run -i csv_converter.py -h"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "%%bash\n",
 19 |     "# clean the source and data folders\n",
 20 |     "rm -rf .data\n",
 21 |     "rm -rf .source\n"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "%run -i csv_converter.py -v ./data-config-spotify.json --s3"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "%tb"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "%%bash\n",
 49 |     "pwd\n",
 50 |     "ls\n",
 51 |     "echo '.source'\n",
 52 |     "ls -l .source\n",
 53 |     "echo '.data'\n",
 54 |     "ls -l .data\n"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "# Generate the token to reset and clean the database\n",
 64 |     "%db_reset --generate-token"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "# Use the token generated via --generate-token here\n",
 74 |     "%db_reset --token 3ebbc751-40e8-44d8-99c8-fad1213e9be4"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "# if you just reset the db, try the %status few times to make sure it is healthy\n",
 84 |     "%status"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "%%bash\n",
 94 |     "DB_HOST=<your-db-host-url>\n",
 95 |     "awscurl --service neptune-db \\\n",
 96 |     "    -X POST \\\n",
 97 |     "    -H 'Content-Type: application/json' \\\n",
 98 |     "    https://$DB_HOST:8182/loader -d '{\n",
 99 |     "    \"source\": \"s3://<your-s3-bucket-source-folder>/\",\n",
100 |     "    \"format\": \"csv\",\n",
101 |     "    \"iamRoleArn\": \"arn:aws:iam::<your-aws-account>:role/<role-that-allows-reading-from-s3-to-sagemaker>\",\n",
102 |     "    \"mode\": \"AUTO\",\n",
103 |     "    \"region\": \"<your-aws-region>\",\n",
104 |     "    \"failOnError\": \"FALSE\",\n",
105 |     "    \"parallelism\": \"OVERSUBSCRIBE\",\n",
106 |     "    \"updateSingleCardinalityProperties\": \"TRUE\"\n",
107 |     "}'"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": [
116 |     "%%bash\n",
117 |     "LOAD_ID=ff90fd71-66c5-427b-be1f-f95ded45ae70\n",
118 |     "DB_HOST=<your-db-host-url>\n",
119 |     "awscurl --service neptune-db \\\n",
120 |     "-X GET \\\n",
121 |     "https://$DB_HOST:8182/loader/$LOAD_ID?details=true&errors=true"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "# s3://<your-s3-bucket-source-folder>/\n",
131 |     "%load"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": [
140 |     "%load_status ff90fd71-66c5-427b-be1f-f95ded45ae70"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {},
147 |    "outputs": [],
148 |    "source": [
149 |     "%%gremlin\n",
150 |     "g.V().groupCount().by(label).unfold()"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "%%gremlin\n",
160 |     "g.E().groupCount().by(label).unfold()"
161 |    ]
162 |   }
163 |  ],
164 |  "metadata": {
165 |   "kernelspec": {
166 |    "display_name": "Python 3",
167 |    "language": "python",
168 |    "name": "python3"
169 |   },
170 |   "language_info": {
171 |    "codemirror_mode": {
172 |     "name": "ipython",
173 |     "version": 3
174 |    },
175 |    "file_extension": ".py",
176 |    "mimetype": "text/x-python",
177 |    "name": "python",
178 |    "nbconvert_exporter": "python",
179 |    "pygments_lexer": "ipython3",
180 |    "version": "3.6.12"
181 |   }
182 |  },
183 |  "nbformat": 4,
184 |  "nbformat_minor": 4
185 | }


--------------------------------------------------------------------------------
/neptune-streams-utils/examples/java8/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
  3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 |     <modelVersion>4.0.0</modelVersion>
  6 | 
  7 |     <groupId>org.example</groupId>
  8 |     <artifactId>neptune-streams-handler</artifactId>
  9 |     <version>1.0-SNAPSHOT</version>
 10 | 
 11 |     <properties>
 12 |         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 13 |         <javac.target>1.8</javac.target>
 14 |         <uberjar.name>stream_handler</uberjar.name>
 15 |         <gremlin.version>3.4.3</gremlin.version>
 16 |         <sig4.signer.version>2.0.2</sig4.signer.version>
 17 |     </properties>
 18 | 
 19 |     <dependencies>
 20 | 
 21 |         <dependency>
 22 |             <groupId>com.amazonaws</groupId>
 23 |             <artifactId>amazon-neptune-streams-replicator-core</artifactId>
 24 |             <version>1.0.0</version>
 25 |             <scope>provided</scope>
 26 |             <exclusions>
 27 |                 <exclusion>
 28 |                     <groupId>org.slf4j</groupId>
 29 |                     <artifactId>slf4j-api</artifactId>
 30 |                 </exclusion>
 31 |             </exclusions>
 32 |         </dependency>
 33 | 
 34 |         <dependency>
 35 |             <groupId>com.amazonaws</groupId>
 36 |             <artifactId>amazon-neptune-streams-replicator-lambda</artifactId>
 37 |             <version>1.0.0</version>
 38 |             <scope>provided</scope>
 39 |             <exclusions>
 40 |                 <exclusion>
 41 |                     <groupId>org.slf4j</groupId>
 42 |                     <artifactId>slf4j-api</artifactId>
 43 |                 </exclusion>
 44 |             </exclusions>
 45 |         </dependency>
 46 | 
 47 |         <dependency>
 48 |             <groupId>org.apache.tinkerpop</groupId>
 49 |             <artifactId>gremlin-driver</artifactId>
 50 |             <version>${gremlin.version}</version>
 51 |         </dependency>
 52 | 
 53 |         <dependency>
 54 |             <groupId>com.amazonaws</groupId>
 55 |             <artifactId>amazon-neptune-sigv4-signer</artifactId>
 56 |             <version>${sig4.signer.version}</version>
 57 |         </dependency>
 58 | 
 59 |         <dependency>
 60 |             <groupId>com.amazonaws</groupId>
 61 |             <artifactId>amazon-neptune-gremlin-java-sigv4</artifactId>
 62 |             <version>${sig4.signer.version}</version>
 63 |             <exclusions>
 64 |                 <exclusion>
 65 |                     <groupId>org.slf4j</groupId>
 66 |                     <artifactId>slf4j-api</artifactId>
 67 |                 </exclusion>
 68 |             </exclusions>
 69 |         </dependency>
 70 | 
 71 | 
 72 |     </dependencies>
 73 | 
 74 |     <build>
 75 |         <plugins>
 76 |             <plugin>
 77 |                 <groupId>org.apache.maven.plugins</groupId>
 78 |                 <artifactId>maven-shade-plugin</artifactId>
 79 |                 <version>3.2.4</version>
 80 |                 <configuration>
 81 |                     <createDependencyReducedPom>false</createDependencyReducedPom>
 82 |                 </configuration>
 83 |                 <executions>
 84 |                     <execution>
 85 |                         <phase>package</phase>
 86 |                         <goals>
 87 |                             <goal>shade</goal>
 88 |                         </goals>
 89 |                         <configuration>
 90 |                             <finalName>${uberjar.name}</finalName>
 91 |                             <transformers>
 92 |                             </transformers>
 93 |                             <filters>
 94 |                                 <filter>
 95 |                                     <artifact>*:*</artifact>
 96 |                                     <excludes>
 97 |                                         <exclude>META-INF/*.SF</exclude>
 98 |                                         <exclude>META-INF/*.DSA</exclude>
 99 |                                         <exclude>META-INF/*.RSA</exclude>
100 |                                     </excludes>
101 |                                 </filter>
102 |                             </filters>
103 |                         </configuration>
104 |                     </execution>
105 |                 </executions>
106 |                 <dependencies>
107 |                 </dependencies>
108 |             </plugin>
109 |             <plugin>
110 |                 <artifactId>maven-compiler-plugin</artifactId>
111 |                 <version>3.8.1</version>
112 |                 <configuration>
113 |                     <source>1.8</source>
114 |                     <target>1.8</target>
115 |                 </configuration>
116 |             </plugin>
117 |         </plugins>
118 |     </build>
119 | 
120 | 
121 | </project>


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/DataType.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3 | Licensed under the Apache License, Version 2.0 (the "License").
  4 | You may not use this file except in compliance with the License.
  5 | A copy of the License is located at
  6 |     http://www.apache.org/licenses/LICENSE-2.0
  7 | or in the "license" file accompanying this file. This file is distributed
  8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
  9 | express or implied. See the License for the specific language governing
 10 | permissions and limitations under the License.
 11 | */
 12 | 
 13 | package com.amazonaws.services.neptune.metadata;
 14 | 
 15 | import org.apache.commons.lang3.StringUtils;
 16 | 
 17 | import java.util.regex.Pattern;
 18 | 
 19 | public enum DataType {
 20 |     None {
 21 |         @Override
 22 |         public String typeDescription() {
 23 |             return "";
 24 |         }
 25 |     },
 26 |     Boolean,
 27 |     Byte {
 28 |         @Override
 29 |         public boolean isNumeric() {
 30 |             return true;
 31 |         }
 32 |     },
 33 |     Short {
 34 |         @Override
 35 |         public boolean isNumeric() {
 36 |             return true;
 37 |         }
 38 |     },
 39 |     Int {
 40 |         @Override
 41 |         public boolean isNumeric() {
 42 |             return true;
 43 |         }
 44 |     },
 45 |     Long {
 46 |         @Override
 47 |         public boolean isNumeric() {
 48 |             return true;
 49 |         }
 50 |     },
 51 |     Double {
 52 |         @Override
 53 |         public boolean isNumeric() {
 54 |             return true;
 55 |         }
 56 |     },
 57 |     Date,
 58 |     String;
 59 | 
 60 |     public boolean isNumeric() {
 61 |         return false;
 62 |     }
 63 | 
 64 |     public String typeDescription() {
 65 |         return java.lang.String.format(":%s", name().toLowerCase());
 66 |     }
 67 | 
 68 |     public static DataType identifyType(String s) {
 69 | 
 70 |         if (isBoolean(s)) {
 71 |             return DataType.Boolean;
 72 |         } else if (isByte(s)) {
 73 |             return DataType.Byte;
 74 |         } else if (isShort(s)) {
 75 |             return DataType.Short;
 76 |         } else if (isInt(s)) {
 77 |             return DataType.Int;
 78 |         } else if (isLong(s)) {
 79 |             return DataType.Long;
 80 |         } else if (isDouble(s)) {
 81 |             return DataType.Double;
 82 |         } else {
 83 |             if (StringUtils.isEmpty(s)) {
 84 |                 return DataType.None;
 85 |             }
 86 |             try {
 87 |                 DateTimeUtils.parseISODate(s);
 88 |                 return DataType.Date;
 89 |             } catch (Exception e) {
 90 |                 return DataType.String;
 91 |             }
 92 |         }
 93 |     }
 94 | 
 95 |     private static final String BOOLEAN_PATTERN = "true|false";
 96 |     private static final Pattern boolPattern = Pattern.compile(BOOLEAN_PATTERN, Pattern.CASE_INSENSITIVE);
 97 | 
 98 | 
 99 |     private static boolean isBoolean(String s) {
100 |         return boolPattern.matcher(s).matches();
101 |     }
102 | 
103 |     private static boolean isByte(String s) {
104 |         try {
105 |             java.lang.Byte.parseByte(s);
106 |             return true;
107 |         } catch (NumberFormatException e) {
108 |             return false;
109 |         }
110 |     }
111 | 
112 |     private static boolean isShort(String s) {
113 |         try {
114 |             java.lang.Short.parseShort(s);
115 |             return true;
116 |         } catch (NumberFormatException e) {
117 |             return false;
118 |         }
119 |     }
120 | 
121 |     private static boolean isInt(String s) {
122 |         try {
123 |             java.lang.Integer.parseInt(s);
124 |             return true;
125 |         } catch (NumberFormatException e) {
126 |             return false;
127 |         }
128 |     }
129 | 
130 |     private static boolean isLong(String s) {
131 |         try {
132 |             java.lang.Long.parseLong(s);
133 |             return true;
134 |         } catch (NumberFormatException e) {
135 |             return false;
136 |         }
137 |     }
138 | 
139 |     private static boolean isDouble(String s) {
140 |         try {
141 |             java.lang.Double.parseDouble(s);
142 |             return true;
143 |         } catch (NumberFormatException e) {
144 |             return false;
145 |         }
146 |     }
147 | 
148 |     public static DataType getBroadestType(DataType oldType, DataType newType) {
149 | 
150 |         if (oldType == newType) {
151 |             return oldType;
152 |         }
153 | 
154 |         if (oldType == DataType.None) {
155 |             return newType;
156 |         }
157 | 
158 |         if (newType == DataType.None) {
159 |             return oldType;
160 |         }
161 | 
162 |         if (oldType == DataType.String || newType == DataType.String) {
163 |             return DataType.String;
164 |         }
165 | 
166 |         if (oldType.isNumeric() && newType.isNumeric()) {
167 |             if (newType.ordinal() > oldType.ordinal()) {
168 |                 return newType;
169 |             } else {
170 |                 return oldType;
171 |             }
172 |         }
173 | 
174 |         if (newType.ordinal() > oldType.ordinal()) {
175 |             return DataType.String;
176 |         }
177 | 
178 |         return DataType.String;
179 |     }
180 | 
181 | }
182 | 


--------------------------------------------------------------------------------
/opencypher-compatability-checker/README.md:
--------------------------------------------------------------------------------
  1 | # openCypher Compatibility Checker
  2 | 
  3 | A migration helper tool that validates openCypher queries for compatibility with Amazon Neptune, identifying unsupported functions and clauses to help assess migration effort from Neo4j to Neptune.
  4 | 
  5 | ## Overview
  6 | 
  7 | This tool analyzes openCypher queries and reports:
  8 | - Compatibility status for each query
  9 | - Specific unsupported functions/clauses with their positions
 10 | - Suggested replacements where available
 11 | - Detailed error descriptions
 12 | 
 13 | Supports validation for both **Neptune Analytics (NA)** and **Neptune Database (NDB)**.
 14 | 
 15 | To select the appropriate version go to the [Releases](https://github.com/awslabs/amazon-neptune-tools/releases) page.
 16 | 
 17 | For Neptune Analytics, find the release [opencypher-compatability-checker-analytics](https://github.com/awslabs/amazon-neptune-tools/releases/tag/opencypher-compatability-checker-analytics)
 18 | 
 19 | For Neptune Database, find the [release](https://github.com/awslabs/amazon-neptune-tools/releases) tagged with the version of your Neptune Database cluster.  Support for openCypher clauses and functions are version dependant so please ensure you select the correct version. 
 20 | 
 21 | ## Prerequisites
 22 | 
 23 | - **Java 17** or higher
 24 | - Pre-built JAR file: `NeptuneNeo4jMigrationHelper-<VERSION>.jar` located on the [Releases](https://github.com/awslabs/amazon-neptune-tools/releases) page.
 25 | 
 26 | ## Installation & Build
 27 | 
 28 | The tool is distributed as a fat JAR created during the build process. No additional dependencies are required at runtime other than Java 17.
 29 | 
 30 | ## Usage
 31 | 
 32 | ### Basic Command
 33 | 
 34 | ```bash
 35 | java -jar NeptuneNeo4jMigrationHelper-<VERSION>.jar --input <input-file> [--output <output-file>]
 36 | ```
 37 | 
 38 | ### Parameters
 39 | 
 40 | | Parameter | Required | Description |
 41 | |-----------|----------|-------------|
 42 | | `--input` | Yes | Path to JSON file containing queries to validate.  A sample is provided in [input.json](./input.json)|
 43 | | `--output` | No | Path for JSON output file. If omitted, prints to stdout.  A sample of the output is provided in [output.json](./output.json) |
 44 | 
 45 | ### Examples
 46 | 
 47 | ```bash
 48 | # Validate queries and save results to file
 49 | java -jar NeptuneNeo4jMigrationHelper-1.0.jar --input queries.json --output results.json
 50 | 
 51 | # Validate queries and print to console
 52 | java -jar NeptuneNeo4jMigrationHelper-1.0.jar --input queries.json
 53 | ```
 54 | 
 55 | ## Configuration
 56 | 
 57 | ### Logging
 58 | 
 59 | Control log verbosity with the `LOG_LEVEL` environment variable:
 60 | 
 61 | ```bash
 62 | export LOG_LEVEL=DEBUG    # Detailed debugging information
 63 | export LOG_LEVEL=INFO     # Default level
 64 | export LOG_LEVEL=WARN     # Warnings only  
 65 | export LOG_LEVEL=ERROR    # Errors only
 66 | ```
 67 | 
 68 | A log file `migration-helper-<current-date>.log` is created in the execution directory.
 69 | 
 70 | ## Input Format
 71 | 
 72 | Create a JSON file with the following structure:
 73 | 
 74 | ```json
 75 | {
 76 |   "targetSystem": "NA",
 77 |   "queries": [
 78 |     {
 79 |       "id": 1,
 80 |       "query": "MATCH (n:Person) RETURN n LIMIT 10"
 81 |     },
 82 |     {
 83 |       "id": 2, 
 84 |       "query": "RETURN apoc.coll.intersection([1,2,3], [2,3,4])"
 85 |     }
 86 |   ]
 87 | }
 88 | ```
 89 | 
 90 | ### Fields
 91 | 
 92 | - **targetSystem**: Target Neptune system
 93 |   - `"NA"` - Neptune Analytics
 94 |   - `"NDB"` - Neptune Database
 95 | - **queries**: Array of query objects
 96 |   - **id**: Unique identifier for the query
 97 |   - **query**: openCypher query string to validate
 98 | 
 99 | ## Output Format
100 | 
101 | The tool generates a JSON report with validation results:
102 | 
103 | ```json
104 | {
105 |   "results": [
106 |     {
107 |       "id": 1,
108 |       "supported": true,
109 |       "errorDefinitions": []
110 |     },
111 |     {
112 |       "id": 2,
113 |       "supported": false,
114 |       "errorDefinitions": [
115 |         {
116 |           "position": "line 1, column 8 (offset: 7)",
117 |           "name": "apoc.coll.intersection",
118 |           "replacement": "collintersection",
119 |           "description": "apoc.coll.intersection is not supported in this release but try replacing with collintersection"
120 |         }
121 |       ]
122 |     }
123 |   ]
124 | }
125 | ```
126 | 
127 | ### Result Fields
128 | 
129 | - **id**: Matches the input query ID
130 | - **supported**: Boolean indicating Neptune compatibility
131 | - **errorDefinitions**: Array of compatibility issues
132 |   - **position**: Location of the issue in the query
133 |   - **name**: Unsupported function/clause name
134 |   - **replacement**: Suggested Neptune-compatible alternative (if available)
135 |   - **description**: Detailed explanation of the compatibility issue
136 | 
137 | ## Troubleshooting
138 | 
139 | ### Common Errors
140 | 
141 | 1. **Invalid input file**: Ensure JSON is properly formatted
142 | 2. **Java version**: Requires Java 17+
143 | 3. **File permissions**: Ensure read access to input file and write access to output directory
144 | 
145 | ### Getting Help
146 | 
147 | - Check log files for detailed error information
148 | - Verify input JSON format matches the specification
149 | - Ensure target system value is either "NA" or "NDB"
150 | 
151 | For all issues with the tool please file an issue on this GitHub repository.
152 | 
153 | ## License
154 | 
155 | This tool is part of the AWS Samples repository and follows the same licensing terms.


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/main/java/com/amazonaws/services/neptune/metadata/ConversionConfig.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3 | Licensed under the Apache License, Version 2.0 (the "License").
  4 | You may not use this file except in compliance with the License.
  5 | A copy of the License is located at
  6 |     http://www.apache.org/licenses/LICENSE-2.0
  7 | or in the "license" file accompanying this file. This file is distributed
  8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
  9 | express or implied. See the License for the specific language governing
 10 | permissions and limitations under the License.
 11 | */
 12 | 
 13 | package com.amazonaws.services.neptune.metadata;
 14 | 
 15 | import lombok.Data;
 16 | import lombok.NoArgsConstructor;
 17 | import org.yaml.snakeyaml.LoaderOptions;
 18 | import org.yaml.snakeyaml.Yaml;
 19 | import org.yaml.snakeyaml.constructor.Constructor;
 20 | 
 21 | import java.io.File;
 22 | import java.io.FileInputStream;
 23 | import java.io.IOException;
 24 | import java.util.*;
 25 | 
 26 | /**
 27 |  * Configuration class for label mapping and filtering from YAML file.
 28 |  * <p>
 29 |  * Expected YAML format:
 30 |  * vertexLabels:
 31 |  *   OldVertexLabel: NewVertexLabel
 32 |  *   AnotherOldLabel: AnotherNewLabel
 33 |  * edgeLabels:
 34 |  *   OLD_EDGE_TYPE: NEW_EDGE_TYPE
 35 |  *   ANOTHER_OLD_TYPE: ANOTHER_NEW_TYPE
 36 |  * vertexIdTransformation:
 37 |  *   ~id: "{_labels}_{_id}"
 38 |  * edgeIdTransformation:
 39 |  *   ~id: "{_type}_{_start}_{_end}"
 40 |  * skipVertices:
 41 |  *   byId:
 42 |  *     - "vertex_id_1"
 43 |  *     - "vertex_id_2"
 44 |  *   byLabel:
 45 |  *     - "LabelToSkip"
 46 |  *     - "AnotherLabelToSkip"
 47 |  * skipEdges:
 48 |  *   byLabel:
 49 |  *     - "RELATIONSHIP_TYPE_TO_SKIP"
 50 |  *     - "ANOTHER_TYPE_TO_SKIP"
 51 |  */
 52 | @Data
 53 | @NoArgsConstructor
 54 | public class ConversionConfig {
 55 | 
 56 |     // Label mapping configurations
 57 |     private Map<String, String> vertexLabels = new HashMap<>();
 58 |     private Map<String, String> edgeLabels = new HashMap<>();
 59 | 
 60 |     // ID transformation configurations
 61 |     private Map<String, String> vertexIdTransformation = new HashMap<>();
 62 |     private Map<String, String> edgeIdTransformation = new HashMap<>();
 63 | 
 64 |     // Skip configurations
 65 |     private SkipVertices skipVertices = new SkipVertices();
 66 |     private SkipEdges skipEdges = new SkipEdges();
 67 | 
 68 |     /**
 69 |      * Nested class for skipVertices configuration
 70 |      */
 71 |     @Data
 72 |     @NoArgsConstructor
 73 |     public static class SkipVertices {
 74 |         private Set<String> byId = new HashSet<>();
 75 |         private Set<String> byLabel = new HashSet<>();
 76 |     }
 77 | 
 78 |     /**
 79 |      * Nested class for skipEdges configuration
 80 |      */
 81 |     @Data
 82 |     @NoArgsConstructor
 83 |     public static class SkipEdges {
 84 |         private Set<String> byLabel = new HashSet<>();
 85 |     }
 86 | 
 87 |     /**
 88 |      * Factory method to create LabelMappingConfig from YAML file using automatic object mapping
 89 |      */
 90 |     public static ConversionConfig fromFile(File yamlFile) throws IOException {
 91 |         if (yamlFile == null || !yamlFile.exists()) {
 92 |             return new ConversionConfig(); // Return empty config if no file provided
 93 |         }
 94 | 
 95 |         Constructor constructor = new Constructor(ConversionConfig.class, new LoaderOptions());
 96 |         Yaml yaml = new Yaml(constructor);
 97 | 
 98 |         try (FileInputStream inputStream = new FileInputStream(yamlFile)) {
 99 |             ConversionConfig config = yaml.load(inputStream);
100 | 
101 |             // Handle null case if YAML file is empty or malformed
102 |             if (config == null) {
103 |                 config = new ConversionConfig();
104 |             }
105 | 
106 |             // Ensure nested objects are initialized when yaml fields are left empty
107 |             if (config.skipVertices == null) {
108 |                 config.skipVertices = new SkipVertices();
109 |             }
110 |             if (config.skipEdges == null) {
111 |                 config.skipEdges = new SkipEdges();
112 |             }
113 |             if (config.vertexLabels == null) {
114 |                 config.vertexLabels = new HashMap<>();
115 |             }
116 |             if (config.edgeLabels == null) {
117 |                 config.edgeLabels = new HashMap<>();
118 |             }
119 |             if (config.vertexIdTransformation == null) {
120 |                 config.vertexIdTransformation = new HashMap<>();
121 |             }
122 |             if (config.edgeIdTransformation == null) {
123 |                 config.edgeIdTransformation = new HashMap<>();
124 |             }
125 | 
126 |             // Ensure nested sets are initialized
127 |             if (config.skipVertices.byId == null) {
128 |                 config.skipVertices.byId = new HashSet<>();
129 |             }
130 |             if (config.skipVertices.byLabel == null) {
131 |                 config.skipVertices.byLabel = new HashSet<>();
132 |             }
133 |             if (config.skipEdges.byLabel == null) {
134 |                 config.skipEdges.byLabel = new HashSet<>();
135 |             }
136 | 
137 |             return config;
138 |         }
139 |     }
140 | 
141 |     /**
142 |      * Checks if any skip rules are configured.
143 |      */
144 |     public boolean hasSkipRules() {
145 |         return !skipVertices.byId.isEmpty() || !skipVertices.byLabel.isEmpty() || !skipEdges.byLabel.isEmpty();
146 |     }
147 | 
148 |     /**
149 |      * Checks if any ID transformations are configured.
150 |      */
151 |     public boolean hasIdTransformations() {
152 |         return !vertexIdTransformation.isEmpty() || !edgeIdTransformation.isEmpty();
153 |     }
154 | }
155 | 


--------------------------------------------------------------------------------
/csv-to-neptune-bulk-format/csv_converter.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import logging
  4 | logger = logging.getLogger(__name__)
  5 | 
  6 | import argparse
  7 | import csv
  8 | import data_config
  9 | 
 10 | import boto3
 11 | 
 12 | import locale
 13 | 
 14 | __all__ = []
 15 | __version__ = 0.1
 16 | __date__ = '2021-01-21'
 17 | __updated__ = '2021-01-21'
 18 | 
 19 | class RawCSVConverter:
 20 |     def __init__(self, conf_file_names, gen_dup_file=False, use_s3=False, local_enc='utf-8'):
 21 |         self.gen_dup_file = gen_dup_file
 22 |         self.use_s3 = use_s3
 23 |         self.local_enc = local_enc
 24 |         self.conf_defs =[]
 25 |         try:
 26 |             data_config.BaseDef.log_stats()
 27 |             for conf_file_name in conf_file_names:
 28 |                 self.conf_defs.append(data_config.ConfigDef(conf_file_name, self.gen_dup_file, self.local_enc))
 29 |         except Exception as ex:
 30 |             raise Exception(f'Unable to read configuration file {conf_file_name} \nexception: {str(ex)}')
 31 |         if self.use_s3 :
 32 |             try :
 33 |                 #self.s3 = boto3.client('s3')
 34 |                 self.s3 = boto3.resource('s3')
 35 |             except Exception as ex:
 36 |                 raise Exception(f'Unable to connect to s3 \nexception: {str(ex)}')
 37 | 
 38 |     def convert_to_csv(self):
 39 |         for index1, conf_def in enumerate(self.conf_defs) :
 40 |             data_file_names = conf_def.file_names
 41 |             # initialize writers
 42 |             conf_def.init_writers()
 43 |             
 44 |             for index2, data_file_name in enumerate(data_file_names):
 45 |                 try:
 46 |                     if self.use_s3 :
 47 |                         data_file_name = conf_def.download_source_file(self.s3, data_file_name)
 48 |                     else :
 49 |                         data_file_name = conf_def.source_folder + '/' + data_file_name
 50 |                     logger.info(f'Processing Data File:{index2}:{data_file_name}')
 51 |                     with open(data_file_name, newline='', encoding=self.local_enc) as csv_file:
 52 |                         reader = csv.DictReader(csv_file, escapechar="\\")
 53 |                         #process the file
 54 |                         try:
 55 |                             conf_def.process_csv_to_csv(reader)
 56 |                         except Exception as ex:
 57 |                             raise Exception(f'Unable to process the CSV file: {data_file_name} \nexception: {str(ex)}')
 58 |                         #close the file
 59 |                         csv_file.close()
 60 |                 except Exception as ex:
 61 |                     logger.error(f'Unable to load the CSV file: {data_file_name} \nexception: {str(ex)}')
 62 | 
 63 |             # close files
 64 |             conf_def.close_writers()
 65 |             # delete current files and upload new files
 66 |             if self.use_s3 :
 67 |                 if index1 == 0 : conf_def.delete_data_files(self.s3)
 68 |                 conf_def.upload_data_files(self.s3)
 69 | 
 70 |         if self.gen_dup_file: data_config.BaseDef.write_dup_files()
 71 |         #log stats
 72 |         data_config.BaseDef.log_stats()
 73 |         data_config.BaseDef.clean_stats()
 74 |    
 75 | def main(argv=None):
 76 |     program_name = os.path.basename(sys.argv[0])
 77 |     program_version = "v0.1"
 78 |     program_build_date = "%s" % __updated__
 79 | 
 80 |     program_version_string = '%%prog %s (%s)' % (
 81 |         program_version, program_build_date)
 82 |     program_longdesc = ("A utility python script to convert CSV data file into the Amazon Neptune CSV format "
 83 |                         "for bulk ingestion. See "
 84 |                         "https://docs.aws.amazon.com/neptune/latest/userguide/bulk-load-tutorial-format-gremlin.html."
 85 |                         )
 86 |     program_license = "Copyright 2018 Amazon.com, Inc. or its affiliates.	\
 87 |                 Licensed under the Apache License 2.0\nhttp://aws.amazon.com/apache2.0/"
 88 |     system_enc = locale.getpreferredencoding()
 89 | 
 90 |     if argv is None:
 91 |         argv = sys.argv[1:]
 92 |     try:
 93 |         # setup argument parser
 94 |         parser = argparse.ArgumentParser( description=program_license, epilog=program_longdesc)
 95 |         parser.add_argument("conf_file_names", nargs='+',
 96 |                           help="Space separated, one or more Data Configuration File(s) (json)\nUse separate files if Node/Edges are different.", metavar="DATA_CONF_FILES")
 97 |         parser.add_argument("-e", "--enc", default='utf-8', dest='local_enc',
 98 |                           help="Optional: encoding for the source files 'utf-8' or 'cp1252'")
 99 |         parser.add_argument("--s3", dest='use_s3', action='store_true',
100 |                           help="Use S3 as source and destination of files")
101 |         parser.add_argument("--dup", dest='gen_dup_file', action='store_true',
102 |                           help="Generate file for duplicates")
103 |         parser.add_argument("-v", "--verbose", dest='verbose', action='store_true',
104 |                           help="Emit Verbose logging")
105 | 
106 |         # process arguments
107 |         args = parser.parse_args(argv)
108 | 
109 |         conf_file_names = args.conf_file_names
110 |         log_level = logging.DEBUG if args.verbose else logging.INFO
111 |         logging.basicConfig(format='%(asctime)s %(name)s:%(levelname)s:%(message)s', datefmt='%H:%M:%S')
112 |         logging.getLogger(__name__).setLevel(log_level)
113 |         logging.getLogger('data_config').setLevel(log_level)
114 |         #logging.getLogger('botocore').setLevel(logging.ERROR)
115 |         #logging.getLogger('s3transfer').setLevel(logging.ERROR)
116 |         #logging.getLogger('urllib3').setLevel(logging.ERROR)
117 |       
118 |         # MAIN BODY #
119 | 
120 |         logger.info(f'Processing {conf_file_names}')
121 |         logger.debug(f'System File Encoding: {system_enc}')
122 |         csvConverter = RawCSVConverter(conf_file_names, args.gen_dup_file, args.use_s3, args.local_enc)
123 |         csvConverter.convert_to_csv()
124 |         return 0
125 | 
126 |     except Exception as e:
127 |         indent = len(program_name) * " "
128 |         logger.error(program_name + ": " + f'{e}' + "\n")
129 |         logger.error(indent + "  for help use --help")
130 |         return 2
131 | 
132 | if __name__ == '__main__':
133 |     sys.exit(main())


--------------------------------------------------------------------------------
/neptune-python-utils/neptune_python_utils/bulkload.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Amazon.com, Inc. or its affiliates.
  2 | # All Rights Reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License").
  5 | # You may not use this file except in compliance with the License.
  6 | # A copy of the License is located at
  7 | #
  8 | #    http://aws.amazon.com/apache2.0/
  9 | #
 10 | # or in the "license" file accompanying this file.
 11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 12 | # either express or implied. See the License for the specific language governing permissions
 13 | # and limitations under the License.
 14 | 
 15 | import json
 16 | import requests
 17 | import os
 18 | import sys
 19 | import time
 20 | from neptune_python_utils.endpoints import Endpoints
 21 | 
 22 | class BulkLoad:
 23 |     
 24 |     def __init__(self, 
 25 |         source, 
 26 |         format='csv', 
 27 |         role=None, 
 28 |         mode='AUTO',
 29 |         region=None, 
 30 |         fail_on_error=False, 
 31 |         parallelism='OVERSUBSCRIBE',
 32 |         base_uri='http://aws.amazon.com/neptune/default',
 33 |         named_graph_uri='http://aws.amazon.com/neptune/vocab/v01/DefaultNamedGraph',
 34 |         update_single_cardinality_properties=False,
 35 |         queue_request=False,
 36 |         dependencies=[],
 37 |         endpoints=None,
 38 |         **kwargs):
 39 |         
 40 |         self.source = source
 41 |         self.format = format
 42 |         
 43 |         if role is None:
 44 |             assert ('NEPTUNE_LOAD_FROM_S3_ROLE_ARN' in os.environ), 'role is missing.'
 45 |             self.role = os.environ['NEPTUNE_LOAD_FROM_S3_ROLE_ARN']
 46 |         else:
 47 |             self.role = role
 48 |             
 49 |         self.mode = mode
 50 |             
 51 |         if region is None:
 52 |             assert ('AWS_REGION' in os.environ or 'SERVICE_REGION' in os.environ), 'region is missing.'
 53 |             self.region = os.environ.get('SERVICE_REGION', os.environ.get('AWS_REGION', None))
 54 |         else:
 55 |             self.region = region
 56 |         
 57 |         if endpoints is None:
 58 |             self.endpoints = Endpoints(region_name=region)
 59 |         else:
 60 |             self.endpoints = endpoints
 61 |             
 62 |         self.fail_on_error = 'TRUE' if fail_on_error else 'FALSE'
 63 |         self.parallelism = parallelism
 64 |         self.base_uri = base_uri
 65 |         self.named_graph_uri = named_graph_uri
 66 |         self.update_single_cardinality_properties = 'TRUE' if update_single_cardinality_properties else 'FALSE'
 67 |         self.queue_request = 'TRUE' if queue_request else 'FALSE'
 68 |         self.dependencies = dependencies
 69 |         self.kwargs = kwargs
 70 |             
 71 |     def __load_from(self, source):
 72 |         return { 
 73 |               'source' : source, 
 74 |               'format' : self.format,  
 75 |               'iamRoleArn' : self.role, 
 76 |               'mode': self.mode,
 77 |               'region' : self.region, 
 78 |               'failOnError' : self.fail_on_error,
 79 |               'parallelism' : self.parallelism,
 80 |               'parserConfiguration': {
 81 |                   'baseUri': self.base_uri,
 82 |                   'namedGraphUri': self.named_graph_uri
 83 |               },
 84 |               'updateSingleCardinalityProperties': self.update_single_cardinality_properties,
 85 |               'queueRequest': self.queue_request,
 86 |               'dependencies': self.dependencies
 87 |             }
 88 |     
 89 |     def __load(self, loader_endpoint, data):  
 90 |         
 91 |         json_string = json.dumps(data)
 92 |         
 93 |         request_parameters = loader_endpoint.prepare_request('POST', json_string, headers={'Content-Type':'application/json'})
 94 |         
 95 |         response = requests.post(request_parameters.uri, data=json_string, headers=request_parameters.headers, **self.kwargs)
 96 |         response.encoding = 'utf-8'
 97 |         
 98 |         if response.status_code != 200:
 99 |             raise Exception('{}: {}'.format(response.status_code, response.text))
100 |         
101 |         json_response = response.json()
102 |         
103 |         return json_response['payload']['loadId']
104 |     
105 |     def load_async(self):
106 |         localised_source = self.source.replace('${AWS_REGION}', self.region)
107 |         loader_endpoint = self.endpoints.loader_endpoint()
108 |         json_payload = self.__load_from(localised_source)
109 |         print('''curl -X POST \\
110 |     -H 'Content-Type: application/json' \\
111 |     {} -d \'{}\''''.format(loader_endpoint, json.dumps(json_payload, indent=4)))
112 |         load_id = self.__load(loader_endpoint, json_payload)
113 |         return BulkLoadStatus(self.endpoints.load_status_endpoint(load_id), **self.kwargs)
114 |     
115 |     def load(self, interval=2):
116 |         status = self.load_async()
117 |         print('status_uri: {}'.format(status.load_status_endpoint))
118 |         status.wait(interval)
119 | 
120 | class BulkLoadStatus:
121 |     
122 |     def __init__(self, load_status_endpoint, **kwargs):
123 |         self.load_status_endpoint = load_status_endpoint
124 |         self.kwargs = kwargs
125 |         
126 |     def status(self, details=False, errors=False, page=1, errors_per_page=10):
127 |         params = {
128 |             'errors': 'TRUE' if errors else 'FALSE', 
129 |             'details': 'TRUE' if details else 'FALSE',
130 |             'page': page,
131 |             'errorsPerPage': errors_per_page
132 |         }
133 |         request_parameters = self.load_status_endpoint.prepare_request(querystring=params)
134 |         
135 |         response = requests.get(request_parameters.uri, params=params, headers=request_parameters.headers, **self.kwargs)
136 |         response.encoding = 'utf-8'
137 |         
138 |         if response.status_code != 200:
139 |             raise Exception('{}: {}'.format(response.status_code, response.text))
140 |             
141 |         json_response = response.json()
142 | 
143 |         status = json_response['payload']['overallStatus']['status']
144 |         
145 |         return (status, json_response)
146 |         
147 |     
148 |     def uri(self):
149 |         return self.load_status_endpoint
150 |     
151 |     def wait(self, interval=2):
152 |         while True:
153 |             status, json_response = self.status()
154 |             if status == 'LOAD_COMPLETED':
155 |                 print('load completed')
156 |                 break
157 |             if status == 'LOAD_IN_PROGRESS':
158 |                 print('loading... {} records inserted'.format(json_response['payload']['overallStatus']['totalRecords']))
159 |                 time.sleep(interval)
160 |             else:
161 |                 raise Exception(json_response)


--------------------------------------------------------------------------------
/export-neptune-to-elasticsearch/lambda/kinesis_to_elasticsearch.py:
--------------------------------------------------------------------------------
  1 | #  Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  2 | #  
  3 | #  Licensed under the Apache License, Version 2.0 (the "License").
  4 | #  You may not use this file except in compliance with the License.
  5 | #  A copy of the License is located at
  6 | #  
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #  
  9 | #  or in the "license" file accompanying this file. This file is distributed 
 10 | #  on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
 11 | #  express or implied. See the License for the specific language governing 
 12 | #  permissions and limitations under the License.
 13 | 
 14 | from aws_kinesis_agg.deaggregator import deaggregate_records, iter_deaggregate_records
 15 | #from metrics_publisher import MetricsPublisher
 16 | import importlib
 17 | import logging
 18 | import base64
 19 | import six
 20 | import os
 21 | import json
 22 | import neptune_to_es
 23 | from queue import Queue
 24 | import time
 25 | 
 26 | logger = logging.getLogger()
 27 | logger.setLevel(logging.INFO)
 28 | 
 29 | log_commit_nums = os.environ.get('log_commit_nums', 'false').lower() == 'true'
 30 | neptune_engine = os.environ['NEPTUNE_ENGINE']
 31 | stream_name = os.environ['STREAM_NAME']
 32 | handler_name = 'neptune_to_es.neptune_sparql_es_handler.ElasticSearchSparqlHandler' if neptune_engine == 'sparql' else 'neptune_to_es.neptune_gremlin_es_handler.ElasticSearchGremlinHandler'
 33 | 
 34 | # Dummy values
 35 | os.environ["StreamRecordsBatchSize"] = "100"
 36 | os.environ["MaxPollingWaitTime"] = "1"
 37 | os.environ["Application"] = ""
 38 | os.environ["LeaseTable"] = ""
 39 | os.environ["LoggingLevel"] = "INFO"
 40 | os.environ["MaxPollingInterval"] = "1"
 41 | os.environ["NeptuneStreamEndpoint"] = ""
 42 | os.environ["StreamRecordsHandler"] = handler_name
 43 | 
 44 | from stream_records_processor import StreamRecordsProcessor
 45 | 
 46 | stream_records_processor = StreamRecordsProcessor()
 47 | 
 48 | #metrics_publisher_client = MetricsPublisher()
 49 | 
 50 | def get_handler_instance(handler_name, retry_count=0):
 51 | 
 52 |     """
 53 |     Get Handler instance given a handler name with module
 54 |     :param handler_name: the handler class name with module.
 55 |     :return: Handler instance
 56 | 
 57 |     """
 58 |     logger.info('Handler: {}'.format(handler_name))
 59 |     
 60 |     try:
 61 |         parts = handler_name.rsplit('.', 1)
 62 |         module = importlib.import_module(parts[0])
 63 |         cls = getattr(module, parts[1])
 64 |         return cls()
 65 |     except Exception as e:
 66 |         error_msg = str(e)
 67 |         if 'resource_already_exists_exception' in error_msg:
 68 |             if retry_count > 3:           
 69 |                 logger.info('Elastic Search Index - amazon_neptune already exists')
 70 |                 raise e;
 71 |             else:
 72 |                 return get_handler_instance(handler_name, retry_count + 1)
 73 |         else:
 74 |             logger.error('Error occurred while creating handler instance for {} - {}.'.format(handler_name, error_msg))
 75 |             raise e
 76 | 
 77 | handler = get_handler_instance(handler_name)
 78 | 
 79 | def lambda_bulk_handler(event, context):
 80 |     """A Python AWS Lambda function to process Kinesis aggregated
 81 |     records in a bulk fashion."""
 82 |     
 83 |     logger.info('Starting bulk loading')
 84 |     
 85 |     raw_kinesis_records = event['Records']
 86 |     
 87 |     logger.info('Aggregated Kinesis record count: {}'.format(len(raw_kinesis_records)))
 88 |     
 89 |     # Deaggregate all records in one call
 90 |     user_records = deaggregate_records(raw_kinesis_records)
 91 |     
 92 |     total_records = len(user_records)
 93 |     
 94 |     logger.info('Deaggregated record count: {}'.format(total_records))
 95 |     
 96 |     log_stream = {
 97 |             "records": [],
 98 |             "lastEventId": {
 99 |                 "commitNum": -1,
100 |                 "opNum": 0
101 |             },
102 |             "totalRecords": total_records
103 |         }
104 |         
105 |     first_commit_num = None
106 |     first_op_num = None
107 |     prev_commit_num = None
108 |     prev_op_num = None
109 |     commit_nums = set()
110 |         
111 |     for user_record in user_records:
112 |         records_json = base64.b64decode(user_record['kinesis']['data'])
113 |         try:          
114 |             records = json.loads(records_json)
115 |         except Exception as e:
116 |             logger.error('Error parsing JSON: \'{}\': {}'.format(records_json, str(e)))
117 |             raise e
118 |         for record in records:
119 |             
120 |             commit_num = record['eventId']['commitNum']
121 |             op_num = record['eventId']['opNum']
122 |             
123 |             if log_commit_nums:
124 |                 commit_nums.add(commit_num)
125 |             
126 |             if not first_commit_num:
127 |                 first_commit_num = commit_num
128 |                 
129 |             if not first_op_num:
130 |                 first_op_num = op_num
131 |                 
132 |             #logger.info('Stream record: (commitNum: {}, opNum: {})'.format(commit_num, op_num)) 
133 |             
134 |             #if prev_commit_num and commit_num < prev_commit_num:
135 |             #    logger.warn('Current commitNum [{}] is less than previous commitNum [{}]'.format(commit_num, prev_commit_num))
136 |                 
137 |             if prev_commit_num and commit_num == prev_commit_num:
138 |                 if prev_op_num and op_num < prev_op_num:
139 |                     logger.warn('Current opNum [{}] is less than previous opNum [{}] (commitNum [{}])'.format(op_num, prev_op_num, commit_num))
140 |                     
141 |             log_stream['records'].append(record)
142 |             
143 |             prev_commit_num = commit_num
144 |             prev_op_num = op_num
145 |             
146 |     log_stream['lastEventId']['commitNum'] = prev_commit_num if prev_commit_num else -1
147 |     log_stream['lastEventId']['opNum'] = prev_op_num if prev_op_num else 0
148 |     log_stream['lastTrxTimestamp'] = str(round(time.time() * 1000))
149 |         
150 |     logger.info('Log stream record count: {}'.format(len(log_stream['records']))) 
151 |     logger.info('First record: (commitNum: {}, opNum: {})'.format(first_commit_num, first_op_num))
152 |     logger.info('Last record: (commitNum: {}, opNum: {})'.format(prev_commit_num, prev_op_num))
153 | 
154 |     if log_commit_nums:
155 |         logger.info('Commit nums: {}'.format(commit_nums))
156 | 
157 |     query_queue = Queue(maxsize=0)
158 |     for result in handler.handle_records(log_stream, query_queue):
159 |         records_processed = result.records_processed
160 |         logger.info('{} records processed'.format(records_processed))
161 |         #metrics_publisher_client.publish_metrics(metrics_publisher_client.generate_record_processed_metrics(records_processed))
162 | 
163 |     logger.info('Executing Opensearch queries')
164 | 
165 |     while not query_queue.empty():
166 |         stream_records_processor.write(query_queue)
167 | 
168 |     logger.info('Finished bulk loading')
169 | 


--------------------------------------------------------------------------------
/glue-neptune/glue_neptune/NeptuneGremlinClient.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2018 Amazon.com, Inc. or its affiliates.
  2 | # All Rights Reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License").
  5 | # You may not use this file except in compliance with the License.
  6 | # A copy of the License is located at
  7 | #
  8 | #    http://aws.amazon.com/apache2.0/
  9 | #
 10 | # or in the "license" file accompanying this file.
 11 | # This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 12 | # either express or implied. See the License for the specific language governing permissions
 13 | # and limitations under the License.
 14 | 
 15 | import sys
 16 | 
 17 | from pyspark.sql.functions import lit
 18 | from pyspark.sql.functions import format_string
 19 | from gremlin_python import statics
 20 | from gremlin_python.structure.graph import Graph
 21 | from gremlin_python.process.graph_traversal import __
 22 | from gremlin_python.process.strategies import *
 23 | from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
 24 | from gremlin_python.process.traversal import *
 25 |     
 26 | class NeptuneGremlinClient:
 27 |     
 28 |     def __init__(self, endpoint):
 29 |         self.endpoint = endpoint
 30 |         statics.load_statics(globals())
 31 |         del globals()['range']
 32 |         del globals()['map']
 33 | 
 34 |     def remote_connection(self):
 35 |         """Creates a connection to a Neptune database.
 36 |         
 37 |         Example:
 38 |         >>> gremlin_endpoint = NeptuneConnectionInfo(glueContext).neptune_endpoint('neptune')
 39 |         >>> neptune = NeptuneGremlinClient(gremlin_endpoint)
 40 |         >>> conn = neptune.remote_connection()
 41 |         >>> g = neptune.traversal_source(conn)
 42 |         >>> count = g.V().count().next()
 43 |         >>> conn.close()
 44 |         """
 45 |         return DriverRemoteConnection(self.endpoint,'g')
 46 |     
 47 |     def traversal_source(self, connection=None):
 48 |         """Creates a traversal source.
 49 |         
 50 |         Example:
 51 |         >>> gremlin_endpoint = NeptuneConnectionInfo(glueContext).neptune_endpoint('neptune')
 52 |         >>> neptune = NeptuneGremlinClient(gremlin_endpoint)
 53 |         >>> g = neptune.traversal_source()
 54 |         >>> count = g.V().count().next()
 55 |         """
 56 |         if connection is not None:
 57 |             return Graph().traversal().withRemote(connection)
 58 |         else:
 59 |             return Graph().traversal().withRemote(self.remote_connection())
 60 |         
 61 |     def add_vertices(self, label):
 62 |         """Adds a vertex with the supplied label for each row in a DataFrame partition.
 63 |         If the DataFrame contains an '~id' column, the values in this column will be treated as user-supplied IDs for the new vertices.
 64 |         If the DataFrame does not have an '~id' column, Neptune will autogenerate a UUID for each vertex. 
 65 |         
 66 |         Example:
 67 |         >>> dynamicframe.toDF().foreachPartition(neptune.add_vertices('Product'))
 68 |         """
 69 |         def add_vertices_for_label(rows):
 70 |             conn = self.remote_connection()
 71 |             g = self.traversal_source(conn) 
 72 |             for row in rows:
 73 |                 entries = row.asDict()
 74 |                 traversal = g.addV(label)
 75 |                 for key, value in entries.iteritems():
 76 |                     key = key.split(':')[0]
 77 |                     if key == '~id':
 78 |                         traversal.property(id, value)
 79 |                     elif key == '~label':
 80 |                         pass
 81 |                     else:
 82 |                         traversal.property(key, value)
 83 |                 traversal.next() 
 84 |             conn.close()
 85 |         return add_vertices_for_label
 86 |         
 87 |     def upsert_vertices(self, label):
 88 |         """Conditionally adds vertices for the rows in a DataFrame partition using the Gremlin coalesce() idiom.
 89 |         The DataFrame must contain an '~id' column. 
 90 |         
 91 |         Example:
 92 |         >>> dynamicframe.toDF().foreachPartition(neptune.upsert_vertices('Product'))
 93 |         """
 94 |         def upsert_vertices_for_label(rows):
 95 |             conn = self.remote_connection()
 96 |             g = self.traversal_source(conn) 
 97 |             for row in rows:
 98 |                 entries = row.asDict()
 99 |                 create_traversal = __.addV(label)
100 |                 for key, value in entries.iteritems():
101 |                     key = key.split(':')[0]
102 |                     if key == '~id':
103 |                         create_traversal.property(id, value)
104 |                     elif key == '~label':
105 |                         pass
106 |                     else:
107 |                         create_traversal.property(key, value)
108 |                 g.V(entries['~id']).fold().coalesce(__.unfold(), create_traversal).next() 
109 |             conn.close()
110 |         return upsert_vertices_for_label
111 |     
112 |     def add_edges(self, label):
113 |         """Adds an edge with the supplied label for each row in a DataFrame partition.
114 |         If the DataFrame contains an '~id' column, the values in this column will be treated as user-supplied IDs for the new edges.
115 |         If the DataFrame does not have an '~id' column, Neptune will autogenerate a UUID for each edge. 
116 |         
117 |         Example:
118 |         >>> dynamicframe.toDF().foreachPartition(neptune.add_edges('ORDER_DETAIL'))
119 |         """
120 |         def add_edges_for_label(rows):
121 |             conn = self.remote_connection()
122 |             g = self.traversal_source(conn)
123 |             for row in rows:
124 |                 entries = row.asDict()
125 |                 traversal = g.V(row['~from']).addE(label).to(V(row['~to'])).property(id, row['~id'])
126 |                 for key, value in entries.iteritems():
127 |                     key = key.split(':')[0]
128 |                     if key not in ['~id', '~from', '~to', '~label']:
129 |                         traversal.property(key, value)
130 |                 traversal.next() 
131 |             conn.close()
132 |         return add_edges_for_label
133 |         
134 |     def upsert_edges(self, label):
135 |         """Conditionally adds edges for the rows in a DataFrame partition using the Gremlin coalesce() idiom.
136 |         The DataFrame must contain '~id', '~from', '~to' and '~label' columns. 
137 |         
138 |         Example:
139 |         >>> dynamicframe.toDF().foreachPartition(neptune.upsert_edges('ORDER_DETAIL'))
140 |         """
141 |         def add_edges_for_label(rows):
142 |             conn = self.remote_connection()
143 |             g = self.traversal_source(conn)
144 |             for row in rows:
145 |                 entries = row.asDict()
146 |                 create_traversal = __.V(row['~from']).addE(label).to(V(row['~to'])).property(id, row['~id'])
147 |                 for key, value in entries.iteritems():
148 |                     key = key.split(':')[0]
149 |                     if key not in ['~id', '~from', '~to', '~label']:
150 |                         create_traversal.property(key, value)
151 |                 g.E(entries['~id']).fold().coalesce(__.unfold(), create_traversal).next()
152 |             conn.close()
153 |         return add_edges_for_label


--------------------------------------------------------------------------------
/neo4j-to-neptune/src/test/java/com/amazonaws/services/neptune/util/UtilsTest.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3 | Licensed under the Apache License, Version 2.0 (the "License").
  4 | You may not use this file except in compliance with the License.
  5 | A copy of the License is located at
  6 |     http://www.apache.org/licenses/LICENSE-2.0
  7 | or in the "license" file accompanying this file. This file is distributed
  8 | on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
  9 | express or implied. See the License for the specific language governing
 10 | permissions and limitations under the License.
 11 | */
 12 | 
 13 | package com.amazonaws.services.neptune.util;
 14 | 
 15 | import org.junit.Test;
 16 | import static org.junit.Assert.*;
 17 | 
 18 | /**
 19 |  * Unit tests for Utils class
 20 |  */
 21 | public class UtilsTest {
 22 | 
 23 |     @Test
 24 |     public void testFormatFileSize_Bytes() {
 25 |         // Test bytes (0-1023)
 26 |         assertEquals("0 B", Utils.formatFileSize(0));
 27 |         assertEquals("1 B", Utils.formatFileSize(1));
 28 |         assertEquals("512 B", Utils.formatFileSize(512));
 29 |         assertEquals("1023 B", Utils.formatFileSize(1023));
 30 |     }
 31 | 
 32 |     @Test
 33 |     public void testFormatFileSize_Kilobytes() {
 34 |         // Test kilobytes (1024 - 1048575)
 35 |         assertEquals("1.0 KB", Utils.formatFileSize(1024));
 36 |         assertEquals("1.5 KB", Utils.formatFileSize(1536)); // 1024 + 512
 37 |         assertEquals("2.0 KB", Utils.formatFileSize(2048));
 38 |         assertEquals("10.5 KB", Utils.formatFileSize(10752)); // 10.5 * 1024
 39 |         assertEquals("1024.0 KB", Utils.formatFileSize(1048575)); // Just under 1MB
 40 |     }
 41 | 
 42 |     @Test
 43 |     public void testFormatFileSize_Megabytes() {
 44 |         // Test megabytes (1048576 - 1073741823)
 45 |         assertEquals("1.0 MB", Utils.formatFileSize(1048576)); // 1024 * 1024
 46 |         assertEquals("1.5 MB", Utils.formatFileSize(1572864)); // 1.5 * 1024 * 1024
 47 |         assertEquals("2.0 MB", Utils.formatFileSize(2097152)); // 2 * 1024 * 1024
 48 |         assertEquals("10.5 MB", Utils.formatFileSize(11010048)); // 10.5 * 1024 * 1024
 49 |         assertEquals("500.0 MB", Utils.formatFileSize(524288000)); // 500 * 1024 * 1024
 50 |         assertEquals("1024.0 MB", Utils.formatFileSize(1073741823)); // Just under 1GB
 51 |     }
 52 | 
 53 |     @Test
 54 |     public void testFormatFileSize_Gigabytes() {
 55 |         // Test gigabytes (1073741824 and above)
 56 |         assertEquals("1.0 GB", Utils.formatFileSize(1073741824L)); // 1024 * 1024 * 1024
 57 |         assertEquals("1.5 GB", Utils.formatFileSize(1610612736L)); // 1.5 * 1024 * 1024 * 1024
 58 |         assertEquals("2.0 GB", Utils.formatFileSize(2147483648L)); // 2 * 1024 * 1024 * 1024
 59 |         assertEquals("10.5 GB", Utils.formatFileSize(11274289152L)); // 10.5 * 1024 * 1024 * 1024
 60 |         assertEquals("100.0 GB", Utils.formatFileSize(107374182400L)); // 100 * 1024 * 1024 * 1024
 61 |         assertEquals("1000.0 GB", Utils.formatFileSize(1073741824000L)); // 1000 * 1024 * 1024 * 1024
 62 |     }
 63 | 
 64 |     @Test
 65 |     public void testFormatFileSize_BoundaryValues() {
 66 |         // Test exact boundary values
 67 |         assertEquals("1023 B", Utils.formatFileSize(1023));
 68 |         assertEquals("1.0 KB", Utils.formatFileSize(1024));
 69 | 
 70 |         assertEquals("1024.0 KB", Utils.formatFileSize(1048575)); // 1024*1024 - 1
 71 |         assertEquals("1.0 MB", Utils.formatFileSize(1048576)); // 1024*1024
 72 | 
 73 |         assertEquals("1024.0 MB", Utils.formatFileSize(1073741823)); // 1024*1024*1024 - 1
 74 |         assertEquals("1.0 GB", Utils.formatFileSize(1073741824)); // 1024*1024*1024
 75 |     }
 76 | 
 77 |     @Test
 78 |     public void testFormatFileSize_DecimalPrecision() {
 79 |         // Test decimal precision (should be 1 decimal place)
 80 |         assertEquals("1.1 KB", Utils.formatFileSize(1126)); // 1.1 * 1024
 81 |         assertEquals("1.9 KB", Utils.formatFileSize(1946)); // 1.9 * 1024
 82 |         assertEquals("1.1 MB", Utils.formatFileSize(1153434)); // 1.1 * 1024 * 1024
 83 |         assertEquals("1.9 MB", Utils.formatFileSize(1992294)); // 1.9 * 1024 * 1024
 84 |         assertEquals("1.1 GB", Utils.formatFileSize(1181116006L)); // 1.1 * 1024 * 1024 * 1024
 85 |         assertEquals("1.9 GB", Utils.formatFileSize(2040109465L)); // 1.9 * 1024 * 1024 * 1024
 86 |     }
 87 | 
 88 |     @Test
 89 |     public void testFormatFileSize_LargeValues() {
 90 |         // Test very large values
 91 |         assertEquals("1024.0 GB", Utils.formatFileSize(1099511627776L)); // 1TB in GB format
 92 |         assertEquals("2048.0 GB", Utils.formatFileSize(2199023255552L)); // 2TB in GB format
 93 |         assertEquals("10240.0 GB", Utils.formatFileSize(10995116277760L)); // 10TB in GB format
 94 |     }
 95 | 
 96 |     @Test
 97 |     public void testFormatFileSize_EdgeCases() {
 98 |         // Test edge cases
 99 |         assertEquals("0 B", Utils.formatFileSize(0));
100 |         assertEquals("1 B", Utils.formatFileSize(1));
101 | 
102 |         // Test maximum long value (though unrealistic for file sizes)
103 |         long maxLong = Long.MAX_VALUE;
104 |         String result = Utils.formatFileSize(maxLong);
105 |         assertTrue("Should format very large numbers as GB", result.endsWith(" GB"));
106 |         assertTrue("Should be a very large number", result.startsWith("8589934592")); // ~8.6 billion GB
107 |     }
108 | 
109 |     @Test
110 |     public void testFormatFileSize_ConsistentFormatting() {
111 |         // Verify consistent decimal formatting
112 |         String result1KB = Utils.formatFileSize(1024);
113 |         String result1MB = Utils.formatFileSize(1048576);
114 |         String result1GB = Utils.formatFileSize(1073741824);
115 | 
116 |         assertTrue("KB should have .0 format", result1KB.contains(".0"));
117 |         assertTrue("MB should have .0 format", result1MB.contains(".0"));
118 |         assertTrue("GB should have .0 format", result1GB.contains(".0"));
119 |     }
120 | 
121 |     @Test
122 |     public void testFormatFileSize_RoundingBehavior() {
123 |         // Test rounding behavior for edge cases
124 |         assertEquals("1.0 KB", Utils.formatFileSize(1024)); // Exact
125 |         assertEquals("1.0 KB", Utils.formatFileSize(1025)); // Should round to 1.0
126 |         assertEquals("1.0 KB", Utils.formatFileSize(1075)); // Should round to 1.0 (1075/1024 = 1.05)
127 |         assertEquals("1.1 KB", Utils.formatFileSize(1126)); // Should round to 1.1 (1126/1024 = 1.1)
128 |         assertEquals("1.1 KB", Utils.formatFileSize(1177)); // Should round to 1.1 (1177/1024 = 1.15)
129 |     }
130 | 
131 |     @Test
132 |     public void testUtilsClassCannotBeInstantiated() {
133 |         // Test that Utils class has private constructor (utility class pattern)
134 |         try {
135 |             // This should work since we're in the same package, but constructor should be private
136 |             java.lang.reflect.Constructor<Utils> constructor = Utils.class.getDeclaredConstructor();
137 |             assertTrue("Constructor should be private",
138 |                 java.lang.reflect.Modifier.isPrivate(constructor.getModifiers()));
139 |         } catch (NoSuchMethodException e) {
140 |             fail("Utils class should have a private no-args constructor");
141 |         }
142 |     }
143 | }
144 | 


--------------------------------------------------------------------------------
/csv-to-neptune-bulk-format/data-config-spotify.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "source_folder": ".source",
  3 |     "data_folder": ".data",
  4 |     "fileNames": [
  5 |         "spotify_songs.csv"
  6 |     ],
  7 |     "nodes": [
  8 |         {
  9 |             "csvFileName": "Track.csv",
 10 |             "select": "'track_name' in row and 'track_artist' in row and row['track_name'] != '' and row['track_artist'] != ''",
 11 |             "id": "uuid()",
 12 |             "label": "'Track'",
 13 |             "uniqueKey": "row['track_artist'] + '-' + row['track_name']",
 14 |             "properties": [
 15 |                 {
 16 |                     "property": "track_name",
 17 |                     "key": "track_name"
 18 |                 },
 19 |                 {
 20 |                     "property": "track_artist",
 21 |                     "key": "track_artist"
 22 |                 },
 23 |                 {
 24 |                     "property": "track_popularity",
 25 |                     "key": "track_popularity"
 26 |                 },
 27 |                 {
 28 |                     "property": "danceability",
 29 |                     "key": "danceability"
 30 |                 },
 31 |                 {
 32 |                     "property": "energy",
 33 |                     "key": "energy"
 34 |                 },
 35 |                 {
 36 |                     "property": "key",
 37 |                     "key": "key"
 38 |                 },
 39 |                 {
 40 |                     "property": "loudness",
 41 |                     "key": "loudness"
 42 |                 },
 43 |                 
 44 |                 {
 45 |                     "property": "mode",
 46 |                     "key": "mode"
 47 |                 },
 48 |                 {
 49 |                     "property": "speechiness",
 50 |                     "key": "speechiness"
 51 |                 },
 52 |                 {
 53 |                     "property": "acousticness",
 54 |                     "key": "acousticness"
 55 |                 },
 56 |                 {
 57 |                     "property": "instrumentalness",
 58 |                     "key": "instrumentalness"
 59 |                 },
 60 |                 {
 61 |                     "property": "liveness",
 62 |                     "key": "liveness"
 63 |                 },
 64 |                 {
 65 |                     "property": "valence",
 66 |                     "key": "valence"
 67 |                 },
 68 |                 {
 69 |                     "property": "tempo",
 70 |                     "key": "tempo"
 71 |                 },
 72 |                 {
 73 |                     "property": "duration_ms",
 74 |                     "key": "duration_ms"
 75 |                 }
 76 |             ]
 77 |         },
 78 |         {
 79 |             "csvFileName": "Artist.csv",
 80 |             "select": "'track_artist' in row and row['track_artist'] != ''",
 81 |             "id": "uuid()",
 82 |             "label": "'Artist'",
 83 |             "uniqueKey": "row['track_artist']",
 84 |             "properties": [
 85 |                 {
 86 |                     "property": "name",
 87 |                     "value": "row['track_artist']"
 88 |                 }
 89 |             ]
 90 |         },
 91 |         {
 92 |             "csvFileName": "Album.csv",
 93 |             "select": "'track_album_name' in row and row['track_album_name'] != '' and 'track_album_id' in row and row['track_album_id'] != ''",
 94 |             "id": "uuid()",
 95 |             "label": "'Album'",
 96 |             "uniqueKey": "row['track_album_id']",
 97 |             "properties": [
 98 |                 {
 99 |                     "property": "name",
100 |                     "value": "row['track_album_name']"
101 |                 },
102 |                 {
103 |                     "property": "album_release_date",
104 |                     "value": "row['track_album_release_date']"
105 |                 }
106 |             ]
107 |         },
108 |         {
109 |             "csvFileName": "Playlist.csv",
110 |             "select": "'playlist_name' in row and row['playlist_name'] != '' and 'playlist_id' in row and row['playlist_id'] != ''",
111 |             "id": "uuid()",
112 |             "label": "'Playlist'",
113 |             "uniqueKey": "row['playlist_id']",
114 |             "properties": [
115 |                 {
116 |                     "property": "name",
117 |                     "value": "row['playlist_name']"
118 |                 }
119 |             ]
120 |         },
121 |         {
122 |             "csvFileName": "Playlist_Genre.csv",
123 |             "select": "'playlist_genre' in row and row['playlist_genre'] != ''",
124 |             "id": "uuid()",
125 |             "label": "'Genre'",
126 |             "uniqueKey": "row['playlist_genre']",
127 |             "properties": [
128 |                 {
129 |                     "property": "name",
130 |                     "value": "row['playlist_genre']"
131 |                 }
132 |             ]
133 |         },
134 |         {
135 |             "csvFileName": "Playlist_SubGenre.csv",
136 |             "select": "'playlist_subgenre' in row and row['playlist_subgenre'] != ''",
137 |             "id": "uuid()",
138 |             "label": "'SubGenre'",
139 |             "uniqueKey": "row['playlist_subgenre']",
140 |             "properties": [
141 |                 {
142 |                     "property": "name",
143 |                     "value": "row['playlist_subgenre']"
144 |                 }
145 |             ]
146 |         }
147 |     ],
148 |     "edges": [
149 |         {
150 |             "csvFileName": "Track_Album_Edges.csv",
151 |             "select": "'track_name' in row and 'track_artist' in row and row['track_name'] != '' and row['track_artist'] != '' and 'track_album_id' in row and row['track_album_id'] != ''",
152 |             "id": "uuid()",
153 |             "label": "'IN_ALBUM'",
154 |             "from": "row['track_artist'] + '-' + row['track_name']",
155 |             "to": "row['track_album_id']",
156 |             "fromLabel": "'Track'",
157 |             "toLabel": "'Album'",
158 |             "properties": []
159 |         },
160 |         {
161 |             "csvFileName": "Track_Artist_Edges.csv",
162 |             "select": "'track_name' in row and 'track_artist' in row and row['track_name'] != '' and row['track_artist'] != ''",
163 |             "id": "uuid()",
164 |             "label": "'BY_ARTIST'",
165 |             "from": "row['track_artist'] + '-' + row['track_name']",
166 |             "to": "row['track_artist']",
167 |             "fromLabel": "'Track'",
168 |             "toLabel": "'Artist'",
169 |             "properties": []
170 |         },
171 |         {
172 |             "csvFileName": "Track_Playlist_Edges.csv",
173 |             "select": "'track_name' in row and 'track_artist' in row and row['track_name'] != '' and row['track_artist'] != '' and 'playlist_id' in row and row['playlist_id'] != ''",
174 |             "id": "uuid()",
175 |             "label": "'IN_PLAYLIST'",
176 |             "from": "row['track_artist'] + '-' + row['track_name']",
177 |             "to": "row['playlist_id']",
178 |             "fromLabel": "'Track'",
179 |             "toLabel": "'Playlist'",
180 |             "properties": []
181 |         },
182 |         {
183 |             "csvFileName": "Playlist_Genre_Edges.csv",
184 |             "select": "'playlist_id' in row and row['playlist_id'] != ''and 'playlist_genre' in row and row['playlist_genre'] != ''",
185 |             "id": "uuid()",
186 |             "label": "'HAS_GENRE'",
187 |             "from": "row['playlist_id']",
188 |             "to": "row['playlist_genre']",
189 |             "fromLabel": "'Playlist'",
190 |             "toLabel": "'Genre'",
191 |             "properties": []
192 |         },
193 |         {
194 |             "csvFileName": "Genre_SubGenre_Edges.csv",
195 |             "select": "'playlist_genre' in row and row['playlist_genre'] != '' and 'playlist_subgenre' in row and row['playlist_subgenre'] != ''",
196 |             "id": "uuid()",
197 |             "label": "'HAS_SUBGENRE'",
198 |             "from": "row['playlist_genre']",
199 |             "to": "row['playlist_subgenre']",
200 |             "fromLabel": "'Genre'",
201 |             "toLabel": "'SubGenre'",
202 |             "properties": []
203 |         }
204 |     ]
205 | }


--------------------------------------------------------------------------------