├── .circleci └── config.yml ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── config.yml └── workflows │ ├── analyze.yml │ ├── docs.yaml │ └── release.yml ├── .gitignore ├── .readthedocs.yaml ├── CHANGELOG.md ├── LICENSE ├── README.md ├── _nx_arangodb ├── VERSION ├── __init__.py ├── _version.py └── core.py ├── doc ├── Makefile ├── _static │ ├── dispatch.png │ └── nxadb.png ├── algorithms │ └── index.rst ├── classes │ ├── digraph.rst │ ├── graph.rst │ ├── index.rst │ ├── multidigraph.rst │ └── multigraph.rst ├── conf.py ├── dict │ ├── adj.rst │ ├── graph.rst │ ├── index.rst │ └── node.rst ├── index.rst ├── make.bat ├── nx_arangodb.ipynb ├── quickstart.rst ├── requirements.txt └── views │ ├── coreviews.rst │ ├── index.rst │ └── reportviews.rst ├── nx_arangodb ├── __init__.py ├── algorithms │ ├── README.md │ ├── __init__.py │ └── shortest_paths │ │ ├── __init__.py │ │ └── generic.py ├── classes │ ├── __init__.py │ ├── coreviews.py │ ├── dict │ │ ├── README.md │ │ ├── __init__.py │ │ ├── adj.py │ │ ├── graph.py │ │ └── node.py │ ├── digraph.py │ ├── enum.py │ ├── function.py │ ├── graph.py │ ├── multidigraph.py │ ├── multigraph.py │ └── reportviews.py ├── convert.py ├── exceptions.py ├── interface.py ├── logger.py ├── typing.py └── utils │ ├── __init__.py │ ├── decorators.py │ └── misc.py ├── pyproject.toml ├── run_nx_tests.sh ├── starter.sh └── tests ├── __init__.py ├── conftest.py ├── static ├── cluster.conf ├── keyfile ├── service.zip └── setup.sh ├── test.py ├── test_digraph.py ├── test_graph.py ├── test_multidigraph.py └── test_multigraph.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | executors: 4 | python-executor: 5 | docker: 6 | - image: cimg/python:3.10 7 | environment: 8 | PACKAGE_DIR: nx_arangodb 9 | TESTS_DIR: tests 10 | 11 | machine-executor: 12 | machine: 13 | image: ubuntu-2404:current 14 | 15 | gpu-executor: 16 | machine: 17 | image: linux-cuda-12:default 18 | resource_class: gpu.nvidia.small.multi 19 | 20 | jobs: 21 | lint: 22 | executor: python-executor 23 | steps: 24 | - checkout 25 | 26 | - run: 27 | name: Setup pip 28 | command: python -m pip install --upgrade pip setuptools wheel 29 | 30 | - run: 31 | name: Install packages 32 | command: pip install .[dev] 33 | 34 | - run: 35 | name: Run black 36 | command: black --check --verbose --diff --color $PACKAGE_DIR $TESTS_DIR 37 | 38 | - run: 39 | name: Run flake8 40 | command: flake8 $PACKAGE_DIR $TESTS_DIR 41 | 42 | - run: 43 | name: Run isort 44 | command: isort --check --profile=black $PACKAGE_DIR $TESTS_DIR 45 | 46 | - run: 47 | name: Run mypy 48 | command: mypy $PACKAGE_DIR $TESTS_DIR 49 | 50 | test: 51 | parameters: 52 | python_version: 53 | type: string 54 | executor: machine-executor 55 | steps: 56 | - checkout 57 | 58 | - run: 59 | name: Set up ArangoDB 60 | command: | 61 | chmod +x starter.sh 62 | ./starter.sh 63 | 64 | - run: 65 | name: Setup Python 66 | command: | 67 | pyenv --version 68 | pyenv install -f << parameters.python_version >> 69 | pyenv global << parameters.python_version >> 70 | 71 | - run: 72 | name: Setup pip 73 | command: python -m pip install --upgrade pip setuptools wheel 74 | 75 | - run: 76 | name: Install packages 77 | command: pip install .[dev] 78 | 79 | - run: 80 | name: Run local tests 81 | command: pytest tests/*.py 82 | 83 | - run: 84 | name: Run NetworkX tests 85 | command: ./run_nx_tests.sh 86 | 87 | test-gpu: 88 | parameters: 89 | python_version: 90 | type: string 91 | executor: gpu-executor 92 | steps: 93 | - checkout 94 | 95 | - run: 96 | name: Set up ArangoDB 97 | command: | 98 | chmod +x starter.sh 99 | ./starter.sh 100 | 101 | - run: 102 | name: Setup Python 103 | command: | 104 | pyenv --version 105 | pyenv install -f << parameters.python_version >> 106 | pyenv global << parameters.python_version >> 107 | 108 | - run: 109 | name: Create virtual environment 110 | command: python -m venv venv 111 | 112 | - run: 113 | name: Activate virtual environment 114 | command: . venv/bin/activate 115 | 116 | - run: 117 | name: Setup pip 118 | command: venv/bin/python -m pip install --upgrade pip setuptools wheel 119 | 120 | - run: 121 | name: Install packages 122 | command: venv/bin/pip install .[dev] 123 | 124 | - run: 125 | name: Install cuda related dependencies 126 | command: | 127 | venv/bin/pip install pylibcugraph-cu12 --extra-index-url https://pypi.nvidia.com 128 | venv/bin/pip install nx-cugraph-cu12 --extra-index-url https://pypi.nvidia.com 129 | 130 | - run: 131 | name: Run local gpu tests 132 | command: venv/bin/pytest tests/test.py -k "test_gpu" --run-gpu-tests 133 | 134 | workflows: 135 | version: 2 136 | build: 137 | jobs: 138 | - lint 139 | - test: 140 | matrix: 141 | parameters: 142 | python_version: ["3.10", "3.11", "3.12"] 143 | - test-gpu: 144 | requires: 145 | - lint 146 | - test 147 | matrix: 148 | parameters: 149 | python_version: ["3.10", "3.11"] # "3.12" # TODO: Revisit 3.12 150 | filters: 151 | branches: 152 | only: 153 | - main -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: "Please describe the problem you have encountered" 4 | --- 5 | 6 | 7 | 8 | 9 | 10 | ### Current Behavior 11 | 12 | 13 | 14 | ### Expected Behavior 15 | 16 | 17 | 18 | ### Steps to Reproduce 19 | 20 | 21 | 22 | ### Environment 23 | 24 | 25 | 26 | OS: 27 | Python version: 28 | NetworkX version: 29 | NetworkX-ArangoDB version: 30 | NetworkX-cuGraph version (if applicable): 31 | ArangoDB version: 32 | 33 | 34 | ### Additional context 35 | 36 | 37 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | contact_links: 3 | - name: Questions about NetworkX-ArangoDB 4 | url: https://github.com/arangodb/nx-arangodb/discussions/new?category=q-a 5 | about: Ask questions about usage of NetworkX-ArangoDB 6 | - name: Discussions and Ideas 7 | url: https://github.com/arangodb/nx-arangodb/discussions 8 | about: Talk about new algorithms, feature requests, show your latest application of networks. 9 | -------------------------------------------------------------------------------- /.github/workflows/analyze.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: analyze 13 | on: 14 | workflow_dispatch: 15 | push: 16 | branches: [ main ] 17 | pull_request: 18 | branches: [ main ] 19 | schedule: 20 | - cron: "00 9 1,15 * *" 21 | jobs: 22 | analyze: 23 | runs-on: ubuntu-latest 24 | permissions: 25 | actions: read 26 | contents: read 27 | security-events: write 28 | 29 | strategy: 30 | fail-fast: false 31 | matrix: 32 | language: ["python"] 33 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 34 | # Learn more: 35 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 36 | 37 | steps: 38 | - name: Checkout repository 39 | uses: actions/checkout@v4 40 | 41 | # Initializes the CodeQL tools for scanning. 42 | - name: Initialize CodeQL 43 | uses: github/codeql-action/init@v3 44 | with: 45 | languages: ${{ matrix.language }} 46 | # If you wish to specify custom queries, you can do so here or in a config file. 47 | # By default, queries listed here will override any specified in a config file. 48 | # Prefix the list here with "+" to use these queries and those in the config file. 49 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 50 | 51 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 52 | # If this step fails, then you should remove it and run the build manually (see below) 53 | - name: Autobuild 54 | uses: github/codeql-action/autobuild@v3 55 | 56 | # ℹ️ Command-line programs to run using the OS shell. 57 | # 📚 https://git.io/JvXDl 58 | 59 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 60 | # and modify them (or add more) to build your code if your project 61 | # uses a compiled language 62 | 63 | #- run: | 64 | # make bootstrap 65 | # make release 66 | 67 | - name: Perform CodeQL Analysis 68 | uses: github/codeql-action/analyze@v3 69 | -------------------------------------------------------------------------------- /.github/workflows/docs.yaml: -------------------------------------------------------------------------------- 1 | name: docs 2 | 3 | on: 4 | pull_request: 5 | workflow_dispatch: 6 | 7 | jobs: 8 | docs: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout repository 12 | uses: actions/checkout@v4 13 | 14 | - name: Fetch all tags and branches 15 | run: git fetch --prune --unshallow 16 | 17 | - name: Set up Python 18 | uses: actions/setup-python@v4 19 | with: 20 | python-version: '3.10' 21 | 22 | - name: Install dependencies 23 | run: pip install .[dev] && pip install -r doc/requirements.txt 24 | 25 | - name: Generate Sphinx HTML 26 | run: cd doc && make html -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | on: 3 | workflow_dispatch: 4 | release: 5 | types: [published] 6 | jobs: 7 | release: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v4 11 | 12 | - name: Fetch complete history for all tags and branches 13 | run: git fetch --prune --unshallow 14 | 15 | - name: Setup Python 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: "3.10" 19 | 20 | - name: Install release packages 21 | run: pip install build twine 22 | 23 | - name: Build distribution 24 | run: python -m build 25 | 26 | - name: Publish to Test PyPi 27 | env: 28 | TWINE_USERNAME: __token__ 29 | TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD_TEST }} 30 | run: twine upload --repository testpypi dist/* 31 | 32 | - name: Publish to PyPi 33 | env: 34 | TWINE_USERNAME: __token__ 35 | TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }} 36 | run: twine upload --repository pypi dist/* 37 | 38 | changelog: 39 | needs: release 40 | runs-on: ubuntu-latest 41 | steps: 42 | - uses: actions/checkout@v4 43 | with: 44 | fetch-depth: 0 45 | 46 | - name: Create new branch 47 | run: git checkout -b actions/changelog 48 | 49 | - name: Set branch upstream 50 | run: git push -u origin actions/changelog 51 | env: 52 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 53 | 54 | - name: Setup Python 55 | uses: actions/setup-python@v4 56 | with: 57 | python-version: "3.10" 58 | 59 | - name: Install release packages 60 | run: pip install wheel gitchangelog pystache 61 | 62 | - name: Set variables 63 | run: echo "VERSION=$(curl ${GITHUB_API_URL}/repos/${GITHUB_REPOSITORY}/releases/latest | python -c "import sys; import json; print(json.load(sys.stdin)['tag_name'])")" >> $GITHUB_ENV 64 | 65 | - name: Generate newest changelog 66 | run: gitchangelog ${{env.VERSION}} > CHANGELOG.md 67 | 68 | - name: Make commit for auto-generated changelog 69 | uses: EndBug/add-and-commit@v9 70 | env: 71 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 72 | with: 73 | add: "CHANGELOG.md" 74 | new_branch: actions/changelog 75 | message: "!gitchangelog" 76 | 77 | - name: Create pull request for the auto generated changelog 78 | run: | 79 | echo "PR_URL=$(gh pr create \ 80 | --title "changelog: release ${{env.VERSION}}" \ 81 | --body "beep boop, i am a robot" \ 82 | --label documentation)" >> $GITHUB_ENV 83 | env: 84 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 85 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # Generated while building documentation. 29 | doc/auto_examples 30 | doc/modules 31 | doc/generated 32 | doc/algorithms/generated 33 | doc/classes/generated 34 | doc/readwrite/generated 35 | doc/path.to.file 36 | 37 | # PyInstaller 38 | # Usually these files are written by a python script from a template 39 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 40 | *.manifest 41 | *.spec 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | .hypothesis/ 57 | .pytest_cache/ 58 | 59 | # Translations 60 | *.mo 61 | *.pot 62 | 63 | # Django stuff: 64 | *.log 65 | local_settings.py 66 | db.sqlite3 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | doc/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # celery beat schedule file 88 | celerybeat-schedule 89 | 90 | # SageMath parsed files 91 | *.sage.py 92 | 93 | # Environments 94 | .env 95 | .venv 96 | env/ 97 | venv/ 98 | ENV/ 99 | env.bak/ 100 | venv.bak/ 101 | 102 | # Spyder project settings 103 | .spyderproject 104 | .spyproject 105 | 106 | # Rope project settings 107 | .ropeproject 108 | 109 | # mkdocs documentation 110 | /site 111 | 112 | # mypy 113 | .mypy_cache/ 114 | 115 | # MacOS 116 | .DS_Store 117 | 118 | # PyCharm 119 | .idea/ 120 | 121 | # ArangoDB Starter 122 | localdata/ 123 | 124 | # Node Modules 125 | node_modules/ 126 | 127 | # direnv 128 | .envrc 129 | .direnv/ 130 | 131 | # test results 132 | *_results.txt 133 | 134 | *.egg-info 135 | 136 | # VSCode 137 | .vscode/ -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the OS, Python version and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.12" 13 | 14 | # Build documentation in the "doc/" directory with Sphinx 15 | sphinx: 16 | configuration: doc/conf.py 17 | fail_on_warning: false 18 | 19 | # Optionally build your docs in additional formats such as PDF and ePub 20 | # formats: 21 | # - pdf 22 | # - epub 23 | 24 | # Optional but recommended, declare the Python requirements required 25 | # to build your documentation 26 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 27 | python: 28 | install: 29 | - requirements: doc/requirements.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. 10 | 11 | "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. 12 | 13 | "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. 14 | 15 | "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. 16 | 17 | "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. 18 | 19 | "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. 20 | 21 | "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). 22 | 23 | "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. 24 | 25 | "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." 26 | 27 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 28 | 29 | 2. Grant of Copyright License. 30 | 31 | Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 32 | 33 | 3. Grant of Patent License. 34 | 35 | Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 36 | 37 | 4. Redistribution. 38 | 39 | You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: 40 | 41 | You must give any other recipients of the Work or Derivative Works a copy of this License; and 42 | You must cause any modified files to carry prominent notices stating that You changed the files; and 43 | You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and 44 | If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. 45 | You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 46 | 47 | 5. Submission of Contributions. 48 | 49 | Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 50 | 51 | 6. Trademarks. 52 | 53 | This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 54 | 55 | 7. Disclaimer of Warranty. 56 | 57 | Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 58 | 59 | 8. Limitation of Liability. 60 | 61 | In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 62 | 63 | 9. Accepting Warranty or Additional Liability. 64 | 65 | While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. 66 | 67 | END OF TERMS AND CONDITIONS 68 | 69 | APPENDIX: How to apply the Apache License to your work 70 | 71 | To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. 72 | 73 | Copyright [yyyy] [name of copyright owner] 74 | 75 | Licensed under the Apache License, Version 2.0 (the "License"); 76 | you may not use this file except in compliance with the License. 77 | You may obtain a copy of the License at 78 | 79 | http://www.apache.org/licenses/LICENSE-2.0 80 | 81 | Unless required by applicable law or agreed to in writing, software 82 | distributed under the License is distributed on an "AS IS" BASIS, 83 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 84 | See the License for the specific language governing permissions and 85 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nx-arangodb 2 | 3 | 4 | NetworkX 5 | 6 | 7 | ArangoDB 8 | 9 | 10 | RAPIDS 11 | 12 | 13 | NVIDIA 14 | 15 | 16 |
17 |
18 | 19 | Open In Colab 20 | [![CircleCI](https://dl.circleci.com/status-badge/img/gh/arangodb/nx-arangodb/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/arangodb/nx-arangodb/tree/main) 21 | [![CodeQL](https://github.com/arangodb/nx-arangodb/actions/workflows/analyze.yml/badge.svg)](https://github.com/arangodb/nx-arangodb/actions/workflows/analyze.yml) 22 | [![Docs](https://readthedocs.org/projects/nx-arangodb/badge/?version=latest)](https://nx-arangodb.readthedocs.io/en/latest/?badge=latest) 23 | 24 | [![PyPI version badge](https://img.shields.io/pypi/v/nx-arangodb?color=3775A9&style=for-the-badge&logo=pypi&logoColor=FFD43B)](https://pypi.org/project/nx-arangodb/) 25 | [![Python versions badge](https://img.shields.io/pypi/pyversions/nx-arangodb?color=3776AB&style=for-the-badge&logo=python&logoColor=FFD43B)](https://pypi.org/project/nx-arangodb/) 26 | 27 | [![License](https://img.shields.io/github/license/arangodb/nx-arangodb?color=9E2165&style=for-the-badge)](https://github.com/arangodb/nx-arangodb/blob/main/LICENSE) 28 | [![Code style: black](https://img.shields.io/static/v1?style=for-the-badge&label=code%20style&message=black&color=black)](https://github.com/psf/black) 29 | [![Downloads](https://img.shields.io/pepy/dt/nx-arangodb?style=for-the-badge&color=282661 30 | )](https://pepy.tech/project/nx-arangodb) 31 | 32 | 33 | 34 | https://github.com/user-attachments/assets/e5f56574-d3ef-452c-ab21-b47b3d5d5900 35 | 36 | 37 | ## What is this? 38 | 39 | This is a [backend to NetworkX](https://networkx.org/documentation/stable/reference/backends.html) that offers [ArangoDB](https://github.com/arangodb/arangodb) as a [Persistence Layer to NetworkX Graphs](https://arangodb.com/introducing-the-arangodb-networkx-persistence-layer/): 40 | 1. Persist NetworkX Graphs to ArangoDB. 41 | 2. Reload NetworkX Graphs from ArangoDB. 42 | 2. Perform CRUD on ArangoDB Graphs via NetworkX. 43 | 3. Run algorithms (CPU & GPU) on ArangoDB Graphs via NetworkX. 44 | 45 | Benefits of having ArangoDB as a backend to NetworkX include: 46 | 1. No need to re-create the graph every time you start a new session. 47 | 2. Access to GPU-accelerated graph analytics ([nx-cugraph](https://rapids.ai/nx-cugraph/)). 48 | 3. Access to a database query language ([Arango Query Language](https://arangodb.com/sql-aql-comparison/)). 49 | 4. Access to a visual interface for graph exploration ([ArangoDB Web UI](https://docs.arangodb.com/stable/components/web-interface/graphs/)). 50 | 5. Access to cross-collaboration on the same graph ([ArangoDB Cloud](https://docs.arangodb.com/stable/get-started/set-up-a-cloud-instance/)). 51 | 6. Access to efficient distribution of graph data ([ArangoDB SmartGraphs](https://docs.arangodb.com/stable/graphs/smartgraphs/)). 52 | 53 |

54 | 55 |

56 | 57 | 58 | ## Does this replace NetworkX? 59 | 60 | Not really. This is a plugin to NetworkX, which means that you can use NetworkX as you normally would, but with the added benefit of persisting your graphs to a database. 61 | 62 | ```python 63 | import os 64 | import networkx as nx 65 | import nx_arangodb as nxadb 66 | 67 | os.environ["DATABASE_HOST"] = "http://localhost:8529" 68 | os.environ["DATABASE_USERNAME"] = "root" 69 | os.environ["DATABASE_PASSWORD"] = "openSesame" 70 | os.environ["DATABASE_NAME"] = "_system" 71 | 72 | G = nxadb.Graph(name="MyGraph") 73 | 74 | G.add_node(1, foo='bar') 75 | G.add_node(2, bar='foo') 76 | G.add_edge(1, 2, weight=2) 77 | 78 | res = nx.pagerank(G) 79 | 80 | for k, v in res.items(): 81 | G.nodes[k]['pagerank'] = v 82 | ``` 83 | 84 | ## Does this mean I need to learn ArangoDB? 85 | 86 | No. You can use `nx-arangodb` without knowing anything about ArangoDB. The UX of `nx-arangodb` is designed to be as close as possible to the UX of NetworkX. See the ReadTheDocs for a list of features that are currently unsupported/in-development. 87 | 88 | ```python 89 | import os 90 | import networkx as nx 91 | import nx_arangodb as nxadb 92 | 93 | # os.environ ... 94 | 95 | # Re-connect to the graph 96 | G = nxadb.Graph(name="MyGraph") 97 | 98 | assert G.number_of_nodes() == 2 99 | assert G.number_of_edges() == 1 100 | ``` 101 | 102 | 103 | ## How do I install it? 104 | 105 | ```bash 106 | pip install nx-arangodb 107 | ``` 108 | 109 | ### What if I want to use nx-cuGraph with it? 110 | 111 | ```bash 112 | pip install nx-cugraph-cu12 --extra-index-url https://pypi.nvidia.com 113 | pip install nx-arangodb 114 | ``` 115 | 116 | ## How can I set up ArangoDB? 117 | 118 | **1) Local Instance via Docker** 119 | 120 | Appears on `localhost:8529` with the user `root` & password `openSesame`. 121 | 122 | More info: [arangodb.com/download-major](https://arangodb.com/download-major/). 123 | 124 | ```bash 125 | docker run -e ARANGO_ROOT_PASSWORD=openSesame -p 8529:8529 arangodb/arangodb 126 | ``` 127 | 128 | **2) ArangoDB Cloud Trial** 129 | 130 | [ArangoGraph](https://dashboard.arangodb.cloud/home) is ArangoDB’s Cloud offering to use ArangoDB as a managed service. 131 | 132 | A 14-day trial is available upon sign up. 133 | 134 | **3) Temporary Cloud Instance via Python** 135 | 136 | A temporary cloud database can be provisioned using the [adb-cloud-connector](https://github.com/arangodb/adb-cloud-connector?tab=readme-ov-file#arangodb-cloud-connector) python package. 137 | 138 | ```python 139 | # !pip install adb-cloud-connector 140 | 141 | import os 142 | from adb_cloud_connector import get_temp_credentials 143 | 144 | credentials = get_temp_credentials() 145 | 146 | os.environ["DATABASE_HOST"] = credentials["url"] 147 | os.environ["DATABASE_USERNAME"] = credentials["username"] 148 | os.environ["DATABASE_PASSWORD"] = credentials["password"] 149 | os.environ["DATABASE_NAME"] = credentials["dbName"] 150 | 151 | # ... 152 | ``` 153 | 154 | ## How does algorithm dispatching work? 155 | 156 | `nx-arangodb` will automatically dispatch algorithm calls to either CPU or GPU based on if [nx-cugraph](https://rapids.ai/nx-cugraph/) is installed. We rely on a rust-based library called [phenolrs](https://github.com/arangoml/phenolrs) to retrieve ArangoDB Graphs as fast as possible. 157 | 158 | You can also force-run algorithms on CPU even if `nx-cugraph` is installed: 159 | 160 | ```python 161 | import os 162 | import networkx as nx 163 | import nx_arangodb as nxadb 164 | 165 | # os.environ ... 166 | 167 | G = nxadb.Graph(name="MyGraph") 168 | 169 | # Option 1: Use Global Config 170 | nx.config.backends.arangodb.use_gpu = False 171 | nx.pagerank(G) 172 | nx.betweenness_centrality(G) 173 | # ... 174 | nx.config.backends.arangodb.use_gpu = True 175 | 176 | # Option 2: Use Local Config 177 | nx.pagerank(G, use_gpu=False) 178 | nx.betweenness_centrality(G, use_gpu=False) 179 | ``` 180 | 181 |

182 | 183 |

184 | 185 | 186 | ## Can I create an ArangoDB Graph from an existing NetworkX Graph? 187 | 188 | Yes, this is actually the recommended way to start using `nx-arangodb`: 189 | 190 | ```python 191 | import os 192 | import networkx as nx 193 | import nx_arangodb as nxadb 194 | 195 | # os.environ ... 196 | 197 | G_nx = nx.karate_club_graph() 198 | 199 | G_nxadb = nxadb.Graph( 200 | incoming_graph_data=G_nx, 201 | name="MyKarateGraph" 202 | ) 203 | 204 | assert G_nxadb.number_of_nodes() == G_nx.number_of_nodes() 205 | assert G_nxadb.number_of_edges() == G_nx.number_of_edges() 206 | ``` 207 | -------------------------------------------------------------------------------- /_nx_arangodb/VERSION: -------------------------------------------------------------------------------- 1 | 1.3.0 -------------------------------------------------------------------------------- /_nx_arangodb/__init__.py: -------------------------------------------------------------------------------- 1 | """Tell NetworkX about the arangodb backend. This file can update itself: 2 | 3 | $ make plugin-info 4 | 5 | or 6 | 7 | $ make all # Recommended - runs 'plugin-info' followed by 'lint' 8 | 9 | or 10 | 11 | $ python _nx_arangodb/__init__.py 12 | """ 13 | 14 | import networkx as nx 15 | 16 | from _nx_arangodb._version import __version__ 17 | 18 | # This is normally handled by packaging.version.Version, but instead of adding 19 | # an additional runtime dependency on "packaging", assume __version__ will 20 | # always be in .. format. 21 | (_version_major, _version_minor) = __version__.split(".")[:2] 22 | 23 | # Entries between BEGIN and END are automatically generated 24 | _info = { 25 | "backend_name": "arangodb", 26 | "project": "nx-arangodb", 27 | "package": "nx_arangodb", 28 | "url": "https://github.com/arangodb/nx-arangodb", 29 | "short_summary": "ArangoDB storage backend to NetworkX.", 30 | "description": "Persist, maintain, and reload NetworkX graphs with ArangoDB.", 31 | "functions": { 32 | # BEGIN: functions 33 | "shortest_path", 34 | # END: functions 35 | }, 36 | "additional_docs": { 37 | # BEGIN: additional_docs 38 | "shortest_path": "limited version of nx.shortest_path", 39 | # END: additional_docs 40 | }, 41 | "additional_parameters": { 42 | # BEGIN: additional_parameters 43 | "shortest_path": { 44 | "dtype : dtype or None, optional": "The data type (np.float32, np.float64, or None) to use for the edge weights in the algorithm. If None, then dtype is determined by the edge values.", 45 | }, 46 | # END: additional_parameters 47 | }, 48 | } 49 | 50 | 51 | def get_info(): 52 | """Target of ``networkx.plugin_info`` entry point. 53 | 54 | This tells NetworkX about the arangodb backend without importing nx_arangodb. 55 | """ 56 | # Convert to e.g. `{"functions": {"myfunc": {"additional_docs": ...}}}` 57 | d = _info.copy() 58 | info_keys = {"additional_docs", "additional_parameters"} 59 | d["functions"] = { 60 | func: { 61 | info_key: vals[func] 62 | for info_key in info_keys 63 | if func in (vals := d[info_key]) 64 | } 65 | for func in d["functions"] 66 | } 67 | # Add keys for Networkx <3.3 68 | for func_info in d["functions"].values(): 69 | if "additional_docs" in func_info: 70 | func_info["extra_docstring"] = func_info["additional_docs"] 71 | if "additional_parameters" in func_info: 72 | func_info["extra_parameters"] = func_info["additional_parameters"] 73 | 74 | for key in info_keys: 75 | del d[key] 76 | 77 | d["default_config"] = {"use_gpu": True} 78 | 79 | return d 80 | 81 | 82 | if __name__ == "__main__": 83 | from pathlib import Path 84 | 85 | from _nx_arangodb.core import main 86 | 87 | filepath = Path(__file__) 88 | text = main(filepath) 89 | with filepath.open("w") as f: 90 | f.write(text) 91 | -------------------------------------------------------------------------------- /_nx_arangodb/_version.py: -------------------------------------------------------------------------------- 1 | # Copied from nx-cugraph 2 | 3 | import importlib.resources 4 | 5 | __version__ = ( 6 | importlib.resources.files("_nx_arangodb").joinpath("VERSION").read_text().strip() 7 | ) 8 | __git_commit__ = "" 9 | -------------------------------------------------------------------------------- /_nx_arangodb/core.py: -------------------------------------------------------------------------------- 1 | # Copied from nx-cugraph 2 | 3 | """Utilities to help keep _nx_arangodb up to date.""" 4 | 5 | 6 | def get_functions(): 7 | from nx_arangodb.interface import BackendInterface 8 | from nx_arangodb.utils import networkx_algorithm 9 | 10 | return { 11 | key: val 12 | for key, val in vars(BackendInterface).items() 13 | if isinstance(val, networkx_algorithm) 14 | } 15 | 16 | 17 | def get_additional_docs(functions=None): 18 | if functions is None: 19 | functions = get_functions() 20 | return {key: val.extra_doc for key, val in functions.items() if val.extra_doc} 21 | 22 | 23 | def get_additional_parameters(functions=None): 24 | if functions is None: 25 | functions = get_functions() 26 | return {key: val.extra_params for key, val in functions.items() if val.extra_params} 27 | 28 | 29 | def update_text(text, lines_to_add, target, indent=" " * 8): 30 | begin = f"# BEGIN: {target}\n" 31 | end = f"# END: {target}\n" 32 | start = text.index(begin) 33 | stop = text.index(end) 34 | to_add = "\n".join([f"{indent}{line}" for line in lines_to_add]) 35 | return f"{text[:start]}{begin}{to_add}\n{indent}{text[stop:]}" 36 | 37 | 38 | def dq_repr(s): 39 | """Return repr(s) quoted with the double quote preference used by black.""" 40 | rs = repr(s) 41 | if rs.startswith("'") and '"' not in rs: 42 | rs = rs.strip("'") 43 | return f'"{rs}"' 44 | return rs 45 | 46 | 47 | def dict_to_lines(d, *, indent=""): 48 | for key in sorted(d): 49 | val = d[key] 50 | if "\n" not in val: 51 | yield f"{indent}{dq_repr(key)}: {dq_repr(val)}," 52 | else: 53 | yield f"{indent}{dq_repr(key)}: (" 54 | *lines, last_line = val.split("\n") 55 | for line in lines: 56 | line += "\n" 57 | yield f" {indent}{dq_repr(line)}" 58 | yield f" {indent}{dq_repr(last_line)}" 59 | yield f"{indent})," 60 | 61 | 62 | def main(filepath): 63 | from pathlib import Path 64 | 65 | filepath = Path(filepath) 66 | with filepath.open() as f: 67 | orig_text = f.read() 68 | text = orig_text 69 | 70 | # Update functions 71 | functions = get_functions() 72 | to_add = [f'"{name}",' for name in sorted(functions)] 73 | text = update_text(text, to_add, "functions") 74 | 75 | # Update additional_docs 76 | additional_docs = get_additional_docs(functions) 77 | to_add = list(dict_to_lines(additional_docs)) 78 | text = update_text(text, to_add, "additional_docs") 79 | 80 | # Update additional_parameters 81 | additional_parameters = get_additional_parameters(functions) 82 | to_add = [] 83 | for name in sorted(additional_parameters): 84 | params = additional_parameters[name] 85 | to_add.append(f"{dq_repr(name)}: {{") 86 | to_add.extend(dict_to_lines(params, indent=" " * 4)) 87 | to_add.append("},") 88 | text = update_text(text, to_add, "additional_parameters") 89 | return text 90 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /doc/_static/dispatch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arangodb/nx-arangodb/a195c2ca1363899183c325e264850909c2f05c78/doc/_static/dispatch.png -------------------------------------------------------------------------------- /doc/_static/nxadb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arangodb/nx-arangodb/a195c2ca1363899183c325e264850909c2f05c78/doc/_static/nxadb.png -------------------------------------------------------------------------------- /doc/algorithms/index.rst: -------------------------------------------------------------------------------- 1 | .. _algorithms: 2 | 3 | ********** 4 | Algorithms 5 | ********** 6 | 7 | As NetworkX-ArangoDB is primarily a **Storage Backend** to NetworkX, its primary focus is on persisting and reloading graphs from ArangoDB. 8 | 9 | However, running algorithms on the graph is also still possible. 10 | 11 | There are 3 ways to run algorithms on the graph: 12 | 13 | 1. **NetworkX**: The traditional way of running algorithms on Graphs. 14 | 2. **NetworkX-cuGraph**: The GPU-accelerated way of running algorithms on Graphs. 15 | 3. **ArangoDB**: The database way of running algorithms on Graphs. 16 | 17 | Currently, Options 1 & 2 are supported, whereas Option 3 is a work-in-progress. 18 | 19 | Running algorithms with Option 2 requires ``nx-cugraph`` to be installed on a system with a compatible GPU: 20 | 21 | .. code-block:: 22 | 23 | pip install nx-cugraph-cu12 --extra-index-url https://pypi.nvidia.com 24 | 25 | When running algorithms with Option 2, the graph is converted to a ``nx-cugraph`` graph, and the algorithm is run on the GPU. 26 | 27 | This is only possible if ``nx-cugraph`` has implemented the algorithm you want to run. 28 | 29 | - For a list of algorithms that are supported by ``nx-cugraph``, refer to the `nx-cugraph README `_. 30 | - For a list of algorithms that are supported by ``networkx``, refer to the `NetworkX Documentation `_. 31 | 32 | ``nx-arangodb`` will automatically dispatch algorithm calls to either CPU or GPU based on if ``nx-cugraph`` is installed. We rely on a rust-based library called `phenolrs `_ to retrieve ArangoDB Graphs as fast as possible. 33 | 34 | You can also force-run algorithms on CPU even if ``nx-cugraph`` is installed: 35 | 36 | .. code-block:: python 37 | 38 | import os 39 | import networkx as nx 40 | import nx_arangodb as nxadb 41 | 42 | # os.environ ... 43 | 44 | G = nxadb.Graph(name="MyGraph") 45 | 46 | # Option 1: Use Global Config 47 | nx.config.backends.arangodb.use_gpu = False 48 | nx.pagerank(G) 49 | nx.betweenness_centrality(G) 50 | # ... 51 | nx.config.backends.arangodb.use_gpu = True 52 | 53 | # Option 2: Use Local Config 54 | nx.pagerank(G, use_gpu=False) 55 | nx.betweenness_centrality(G, use_gpu=False) 56 | 57 | 58 | .. image:: ../_static/dispatch.png 59 | :align: center 60 | :alt: nx-arangodb dispatching 61 | :height: 200px 62 | 63 | 64 | **Tip**: If you're running multiple CPU algorithms, it's recommended to rely on invoking ``nxadb.convert.nxadb_to_nx`` to convert the graph to a NetworkX Graph before running the algorithms. 65 | This is because we currently load the entire graph into memory before running *each* algorithm, which can be slow for large graphs. 66 | 67 | .. code-block:: python 68 | 69 | import networkx as nx 70 | import nx_arangodb as nxadb 71 | 72 | G_adb = nxadb.Graph(name="MyGraph") 73 | 74 | G_nx = nxadb.convert.nxadb_to_nx(G) 75 | 76 | nx.pagerank(G_nx) 77 | nx.betweenness_centrality(G_nx) 78 | # ... 79 | 80 | 81 | **Option 3** 82 | 83 | This is an experimental module seeking to provide server-side algorithms for `nx-arangodb` Graphs. 84 | The goal is to provide a set of algorithms that can be delegated to the server for processing, 85 | rather than having to pull all the data to the client and process it there. 86 | 87 | Currently, the module is in a very early stage and only provides a single algorithm: `shortest_path`. 88 | This is simply to demonstrate the potential of the module and to provide a starting point for further development. 89 | 90 | .. code-block:: python 91 | 92 | import os 93 | import networkx as nx 94 | from nx_arangodb as nxadb 95 | 96 | # os.environ ... 97 | 98 | G = nxadb.Graph(name="MyGraph") 99 | 100 | nx.pagerank(G) # Runs on the client 101 | nx.shortest_path(G, source="A", target="B") # Runs on the DB server 102 | nx.shortest_path.orig_func(G, source="A", target="B") # Runs on the client 103 | -------------------------------------------------------------------------------- /doc/classes/digraph.rst: -------------------------------------------------------------------------------- 1 | .. _digraph: 2 | 3 | ======= 4 | DiGraph 5 | ======= 6 | 7 | Overview 8 | ======== 9 | .. currentmodule:: nx_arangodb 10 | .. autoclass:: DiGraph 11 | :members: query, chat 12 | 13 | 14 | Methods 15 | ======= 16 | 17 | Adding and removing nodes and edges 18 | ----------------------------------- 19 | 20 | .. autosummary:: 21 | :toctree: generated/ 22 | 23 | DiGraph.__init__ 24 | DiGraph.add_node 25 | DiGraph.add_nodes_from 26 | DiGraph.remove_node 27 | DiGraph.remove_nodes_from 28 | DiGraph.add_edge 29 | DiGraph.add_edges_from 30 | DiGraph.add_weighted_edges_from 31 | DiGraph.remove_edge 32 | DiGraph.remove_edges_from 33 | DiGraph.update 34 | DiGraph.clear 35 | DiGraph.clear_edges 36 | 37 | 38 | 39 | Reporting nodes edges and neighbors 40 | ----------------------------------- 41 | .. autosummary:: 42 | :toctree: generated/ 43 | 44 | DiGraph.nodes 45 | DiGraph.__iter__ 46 | DiGraph.has_node 47 | DiGraph.__contains__ 48 | DiGraph.edges 49 | DiGraph.out_edges 50 | DiGraph.in_edges 51 | DiGraph.has_edge 52 | DiGraph.get_edge_data 53 | DiGraph.neighbors 54 | DiGraph.adj 55 | DiGraph.__getitem__ 56 | DiGraph.successors 57 | DiGraph.succ 58 | DiGraph.predecessors 59 | DiGraph.pred 60 | DiGraph.adjacency 61 | DiGraph.nbunch_iter 62 | 63 | 64 | Counting nodes edges and neighbors 65 | ---------------------------------- 66 | .. autosummary:: 67 | :toctree: generated/ 68 | 69 | DiGraph.order 70 | DiGraph.number_of_nodes 71 | DiGraph.__len__ 72 | DiGraph.degree 73 | DiGraph.in_degree 74 | DiGraph.out_degree 75 | DiGraph.size 76 | DiGraph.number_of_edges 77 | 78 | 79 | Making copies and subgraphs 80 | --------------------------- 81 | .. autosummary:: 82 | :toctree: generated/ 83 | 84 | DiGraph.copy 85 | DiGraph.to_undirected 86 | DiGraph.to_directed 87 | DiGraph.subgraph 88 | DiGraph.edge_subgraph 89 | DiGraph.reverse 90 | -------------------------------------------------------------------------------- /doc/classes/graph.rst: -------------------------------------------------------------------------------- 1 | .. _graph: 2 | 3 | ===== 4 | Graph 5 | ===== 6 | 7 | Overview 8 | ======== 9 | .. currentmodule:: nx_arangodb 10 | .. autoclass:: Graph 11 | :members: query, chat 12 | 13 | 14 | Methods 15 | ======= 16 | 17 | Adding and removing nodes and edges 18 | ----------------------------------- 19 | 20 | .. autosummary:: 21 | :toctree: generated/ 22 | 23 | Graph.__init__ 24 | Graph.add_node 25 | Graph.add_nodes_from 26 | Graph.remove_node 27 | Graph.remove_nodes_from 28 | Graph.add_edge 29 | Graph.add_edges_from 30 | Graph.add_weighted_edges_from 31 | Graph.remove_edge 32 | Graph.remove_edges_from 33 | Graph.update 34 | Graph.clear 35 | Graph.clear_edges 36 | 37 | 38 | 39 | Reporting nodes edges and neighbors 40 | ----------------------------------- 41 | .. autosummary:: 42 | :toctree: generated/ 43 | 44 | Graph.nodes 45 | Graph.__iter__ 46 | Graph.has_node 47 | Graph.__contains__ 48 | Graph.edges 49 | Graph.has_edge 50 | Graph.get_edge_data 51 | Graph.neighbors 52 | Graph.adj 53 | Graph.__getitem__ 54 | Graph.adjacency 55 | Graph.nbunch_iter 56 | 57 | 58 | 59 | Counting nodes edges and neighbors 60 | ---------------------------------- 61 | .. autosummary:: 62 | :toctree: generated/ 63 | 64 | Graph.order 65 | Graph.number_of_nodes 66 | Graph.__len__ 67 | Graph.degree 68 | Graph.size 69 | Graph.number_of_edges 70 | 71 | 72 | Making copies and subgraphs 73 | --------------------------- 74 | .. autosummary:: 75 | :toctree: generated/ 76 | 77 | Graph.copy 78 | Graph.to_undirected 79 | Graph.to_directed 80 | Graph.subgraph 81 | Graph.edge_subgraph 82 | -------------------------------------------------------------------------------- /doc/classes/index.rst: -------------------------------------------------------------------------------- 1 | .. _classes: 2 | 3 | ****** 4 | Graphs 5 | ****** 6 | 7 | NetworkX provides data structures and methods for storing graphs. 8 | 9 | All NetworkX graph classes allow (hashable) Python objects as nodes 10 | and any Python object can be assigned as an edge attribute. 11 | 12 | The choice of graph class depends on the structure of the 13 | graph you want to represent. 14 | 15 | **Which graph class should I use?** 16 | 17 | +----------------+------------+--------------------+------------------------+ 18 | | Networkx Class | Type | Self-loops allowed | Parallel edges allowed | 19 | +================+============+====================+========================+ 20 | | Graph | undirected | Yes | No | 21 | +----------------+------------+--------------------+------------------------+ 22 | | DiGraph | directed | Yes | No | 23 | +----------------+------------+--------------------+------------------------+ 24 | | MultiGraph | undirected | Yes | Yes | 25 | +----------------+------------+--------------------+------------------------+ 26 | | MultiDiGraph | directed | Yes | Yes | 27 | +----------------+------------+--------------------+------------------------+ 28 | 29 | .. toctree:: 30 | :maxdepth: 1 31 | 32 | graph 33 | digraph 34 | multigraph 35 | multidigraph 36 | -------------------------------------------------------------------------------- /doc/classes/multidigraph.rst: -------------------------------------------------------------------------------- 1 | .. _multidigraph: 2 | 3 | 4 | ============ 5 | MultiDiGraph 6 | ============ 7 | 8 | Overview 9 | ======== 10 | .. currentmodule:: nx_arangodb 11 | .. autoclass:: MultiDiGraph 12 | :members: query, chat 13 | 14 | 15 | Methods 16 | ======= 17 | 18 | Adding and Removing Nodes and Edges 19 | ----------------------------------- 20 | 21 | .. autosummary:: 22 | :toctree: generated/ 23 | 24 | MultiDiGraph.__init__ 25 | MultiDiGraph.add_node 26 | MultiDiGraph.add_nodes_from 27 | MultiDiGraph.remove_node 28 | MultiDiGraph.remove_nodes_from 29 | MultiDiGraph.add_edge 30 | MultiDiGraph.add_edges_from 31 | MultiDiGraph.add_weighted_edges_from 32 | MultiDiGraph.new_edge_key 33 | MultiDiGraph.remove_edge 34 | MultiDiGraph.remove_edges_from 35 | MultiDiGraph.update 36 | MultiDiGraph.clear 37 | MultiDiGraph.clear_edges 38 | 39 | 40 | 41 | Reporting nodes edges and neighbors 42 | ----------------------------------- 43 | .. autosummary:: 44 | :toctree: generated/ 45 | 46 | MultiDiGraph.nodes 47 | MultiDiGraph.__iter__ 48 | MultiDiGraph.has_node 49 | MultiDiGraph.__contains__ 50 | MultiDiGraph.edges 51 | MultiDiGraph.out_edges 52 | MultiDiGraph.in_edges 53 | MultiDiGraph.has_edge 54 | MultiDiGraph.get_edge_data 55 | MultiDiGraph.neighbors 56 | MultiDiGraph.adj 57 | MultiDiGraph.__getitem__ 58 | MultiDiGraph.successors 59 | MultiDiGraph.succ 60 | MultiDiGraph.predecessors 61 | MultiDiGraph.pred 62 | MultiDiGraph.adjacency 63 | MultiDiGraph.nbunch_iter 64 | 65 | 66 | Counting nodes edges and neighbors 67 | ---------------------------------- 68 | .. autosummary:: 69 | :toctree: generated/ 70 | 71 | MultiDiGraph.order 72 | MultiDiGraph.number_of_nodes 73 | MultiDiGraph.__len__ 74 | MultiDiGraph.degree 75 | MultiDiGraph.in_degree 76 | MultiDiGraph.out_degree 77 | MultiDiGraph.size 78 | MultiDiGraph.number_of_edges 79 | 80 | Making copies and subgraphs 81 | --------------------------- 82 | .. autosummary:: 83 | :toctree: generated/ 84 | 85 | MultiDiGraph.copy 86 | MultiDiGraph.to_undirected 87 | MultiDiGraph.to_directed 88 | MultiDiGraph.subgraph 89 | MultiDiGraph.edge_subgraph 90 | MultiDiGraph.reverse 91 | -------------------------------------------------------------------------------- /doc/classes/multigraph.rst: -------------------------------------------------------------------------------- 1 | .. _multigraph: 2 | 3 | ========== 4 | MultiGraph 5 | ========== 6 | 7 | Overview 8 | ======== 9 | .. currentmodule:: nx_arangodb 10 | .. autoclass:: MultiGraph 11 | :members: query, chat 12 | 13 | Methods 14 | ======= 15 | 16 | Adding and removing nodes and edges 17 | ----------------------------------- 18 | 19 | .. autosummary:: 20 | :toctree: generated/ 21 | 22 | MultiGraph.__init__ 23 | MultiGraph.add_node 24 | MultiGraph.add_nodes_from 25 | MultiGraph.remove_node 26 | MultiGraph.remove_nodes_from 27 | MultiGraph.add_edge 28 | MultiGraph.add_edges_from 29 | MultiGraph.add_weighted_edges_from 30 | MultiGraph.new_edge_key 31 | MultiGraph.remove_edge 32 | MultiGraph.remove_edges_from 33 | MultiGraph.update 34 | MultiGraph.clear 35 | MultiGraph.clear_edges 36 | 37 | 38 | 39 | Reporting nodes edges and neighbors 40 | ----------------------------------- 41 | .. autosummary:: 42 | :toctree: generated/ 43 | 44 | MultiGraph.nodes 45 | MultiGraph.__iter__ 46 | MultiGraph.has_node 47 | MultiGraph.__contains__ 48 | MultiGraph.edges 49 | MultiGraph.has_edge 50 | MultiGraph.get_edge_data 51 | MultiGraph.neighbors 52 | MultiGraph.adj 53 | MultiGraph.__getitem__ 54 | MultiGraph.adjacency 55 | MultiGraph.nbunch_iter 56 | 57 | 58 | 59 | Counting nodes edges and neighbors 60 | ---------------------------------- 61 | .. autosummary:: 62 | :toctree: generated/ 63 | 64 | MultiGraph.order 65 | MultiGraph.number_of_nodes 66 | MultiGraph.__len__ 67 | MultiGraph.degree 68 | MultiGraph.size 69 | MultiGraph.number_of_edges 70 | 71 | 72 | Making copies and subgraphs 73 | --------------------------- 74 | .. autosummary:: 75 | :toctree: generated/ 76 | 77 | MultiGraph.copy 78 | MultiGraph.to_undirected 79 | MultiGraph.to_directed 80 | MultiGraph.subgraph 81 | MultiGraph.edge_subgraph 82 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | import os 10 | import sys 11 | 12 | sys.path.insert(0, os.path.abspath("..")) 13 | 14 | project = 'nx-arangodb' 15 | copyright = '2024, ArangoDB' 16 | author = 'ArangoDB' 17 | 18 | # -- General configuration --------------------------------------------------- 19 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 20 | 21 | extensions = [ 22 | "sphinx_rtd_theme", 23 | "sphinx.ext.autodoc", 24 | "sphinx.ext.viewcode", 25 | "sphinx.ext.autosummary", 26 | "sphinx.ext.inheritance_diagram", 27 | ] 28 | templates_path = ['_templates'] 29 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 30 | 31 | 32 | # -- Options for HTML output ------------------------------------------------- 33 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 34 | 35 | html_theme = 'sphinx_rtd_theme' 36 | html_static_path = ['_static'] 37 | autodoc_member_order = "bysource" 38 | autodoc_inherit_docstrings = True 39 | autosummary_generate = True 40 | -------------------------------------------------------------------------------- /doc/dict/adj.rst: -------------------------------------------------------------------------------- 1 | .. _adj: 2 | 3 | ========= 4 | Adjacency 5 | ========= 6 | 7 | 8 | .. currentmodule:: nx_arangodb.classes.dict.adj 9 | .. autoclass:: AdjListOuterDict 10 | 11 | .. currentmodule:: nx_arangodb.classes.dict.adj 12 | .. autoclass:: AdjListInnerDict 13 | 14 | .. currentmodule:: nx_arangodb.classes.dict.adj 15 | .. autoclass:: EdgeKeyDict 16 | 17 | .. currentmodule:: nx_arangodb.classes.dict.adj 18 | .. autoclass:: EdgeAttrDict -------------------------------------------------------------------------------- /doc/dict/graph.rst: -------------------------------------------------------------------------------- 1 | .. _graph: 2 | 3 | ===== 4 | Graph 5 | ===== 6 | 7 | 8 | .. currentmodule:: nx_arangodb.classes.dict.graph 9 | .. autoclass:: GraphDict 10 | 11 | .. currentmodule:: nx_arangodb.classes.dict.graph 12 | .. autoclass:: GraphAttrDict -------------------------------------------------------------------------------- /doc/dict/index.rst: -------------------------------------------------------------------------------- 1 | .. _dict: 2 | 3 | ************ 4 | Dictionaries 5 | ************ 6 | 7 | The ``dict`` module provides a set of ``UserDict``-based classes that extend the traditional dictionary functionality to maintain a remote connection to an ArangoDB Database. 8 | 9 | NetworkX Graphs rely on dictionary-based structures to store their data, which are defined by their factory functions: 10 | 11 | 1. ``node_dict_factory`` 12 | 2. ``node_attr_dict_factory`` 13 | 3. ``adjlist_outer_dict_factory`` 14 | 4. ``adjlist_inner_dict_factory`` 15 | 5. ``edge_key_dict_factory`` (Only for MultiGraphs) 16 | 6. ``edge_attr_dict_factory`` 17 | 7. ``graph_attr_dict_factory`` 18 | 19 | These factories are used to create the dictionaries that store the data of the nodes, edges, and the graph itself. 20 | 21 | This module contains the following classes: 22 | 23 | 1. ``NodeDict`` 24 | 2. ``NodeAttrDict`` 25 | 3. ``AdjListOuterDict`` 26 | 4. ``AdjListInnerDict`` 27 | 5. ``EdgeKeyDict`` 28 | 6. ``EdgeAttrDict`` 29 | 7. ``GraphDict`` 30 | 8. ``GraphAttrDict`` 31 | 32 | Each class extends the functionality of the corresponding dictionary factory by adding methods to interact with the data in ArangoDB. Think of it as a CRUD interface for ArangoDB. This is done by overriding the primary dunder methods of the ``UserDict`` class. 33 | 34 | By using this strategy in addition to subclassing the ``nx.Graph`` class, we're able to preserve the original functionality of the NetworkX Graphs while adding ArangoDB support. 35 | 36 | .. toctree:: 37 | :maxdepth: 1 38 | 39 | adj 40 | node 41 | graph 42 | -------------------------------------------------------------------------------- /doc/dict/node.rst: -------------------------------------------------------------------------------- 1 | .. _node: 2 | 3 | ==== 4 | Node 5 | ==== 6 | 7 | 8 | .. currentmodule:: nx_arangodb.classes.dict.node 9 | .. autoclass:: NodeDict 10 | 11 | .. currentmodule:: nx_arangodb.classes.dict.node 12 | .. autoclass:: NodeAttrDict 13 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | nx-arangodb 2 | ============ 3 | 4 | .. raw:: html 5 | 6 | 20 | 21 | .. raw:: html 22 | 23 |
24 | 25 | .. image:: https://colab.research.google.com/assets/colab-badge.svg 26 | :target: https://colab.research.google.com/github/arangodb/nx-arangodb/blob/main/doc/nx_arangodb.ipynb 27 | :alt: Open In Colab 28 | 29 | .. image:: https://dl.circleci.com/status-badge/img/gh/arangodb/nx-arangodb/tree/main.svg?style=svg 30 | :target: https://dl.circleci.com/status-badge/redirect/gh/arangodb/nx-arangodb/tree/main 31 | :alt: CircleCI 32 | 33 | .. image:: https://github.com/arangodb/nx-arangodb/actions/workflows/analyze.yml/badge.svg 34 | :target: https://github.com/arangodb/nx-arangodb/actions/workflows/analyze.yml 35 | :alt: CodeQL 36 | 37 | .. image:: https://github.com/arangodb/nx-arangodb/actions/workflows/docs.yaml/badge.svg 38 | :target: https://github.com/arangodb/nx-arangodb/actions/workflows/docs.yaml 39 | :alt: Docs 40 | 41 | .. raw:: html 42 | 43 |
44 | 45 | .. image:: https://img.shields.io/pypi/v/nx-arangodb?color=3775A9&style=for-the-badge&logo=pypi&logoColor=FFD43B 46 | :target: https://pypi.org/project/nx-arangodb/ 47 | :alt: PyPI version badge 48 | 49 | .. image:: https://img.shields.io/pypi/pyversions/nx-arangodb?color=3776AB&style=for-the-badge&logo=python&logoColor=FFD43B 50 | :target: https://pypi.org/project/nx-arangodb/ 51 | :alt: Python versions badge 52 | 53 | .. raw:: html 54 | 55 |
56 | 57 | .. image:: https://img.shields.io/github/license/arangodb/nx-arangodb?color=9E2165&style=for-the-badge 58 | :target: https://github.com/arangodb/nx-arangodb/blob/main/LICENSE 59 | :alt: License 60 | 61 | .. image:: https://img.shields.io/static/v1?style=for-the-badge&label=code%20style&message=black&color=black 62 | :target: https://github.com/psf/black 63 | :alt: Code style: black 64 | 65 | .. image:: https://img.shields.io/pepy/dt/nx-arangodb?style=for-the-badge&color=282661 66 | :target: https://pepy.tech/project/nx-arangodb 67 | :alt: Downloads 68 | 69 | This is a `backend to NetworkX `_ that offers `ArangoDB `_ as a `Persistence Layer to NetworkX Graphs `_: 70 | 71 | 1. Persist NetworkX Graphs to ArangoDB. 72 | 2. Reload NetworkX Graphs from ArangoDB. 73 | 3. Perform CRUD on ArangoDB Graphs via NetworkX. 74 | 4. Run algorithms (CPU & GPU) on ArangoDB Graphs via NetworkX. 75 | 76 | Benefits of having ArangoDB as a backend to NetworkX include: 77 | 78 | 1. No need to re-create the graph every time you start a new session. 79 | 2. Access to GPU-accelerated graph analytics (`nx-cugraph `_). 80 | 3. Access to a database query language (`Arango Query Language `_). 81 | 4. Access to a visual interface for graph exploration (`ArangoDB Web UI `_). 82 | 5. Access to cross-collaboration on the same graph (`ArangoDB Cloud `_). 83 | 6. Access to efficient distribution of graph data (`ArangoDB SmartGraphs `_). 84 | 85 | .. image:: ./_static/nxadb.png 86 | :align: center 87 | :alt: nx-arangodb Diagram 88 | :height: 200px 89 | 90 | Requirements 91 | ------------ 92 | - Python 3.10+ 93 | - NetworkX 3.0+ 94 | - ArangoDB 3.10+ 95 | 96 | Installation 97 | ------------ 98 | 99 | Latest Release 100 | 101 | .. code-block:: 102 | 103 | pip install nx-arangodb 104 | 105 | Current State 106 | 107 | .. code-block:: 108 | 109 | pip install git+https://github.com/arangodb/nx-arangodb 110 | 111 | Contents 112 | -------- 113 | 114 | The UX of NetworkX-ArangoDB is similar to that of NetworkX, but with the 115 | added functionality of persisting graphs to ArangoDB. For an understanding 116 | of how to use NetworkX, refer to the `NetworkX Documentation `_. 117 | 118 | Expect documentation to grow over time: 119 | 120 | .. toctree:: 121 | :maxdepth: 2 122 | 123 | quickstart 124 | classes/index 125 | dict/index 126 | algorithms/index 127 | views/index -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /doc/nx_arangodb.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "wqxz9xg912bF" 7 | }, 8 | "source": [ 9 | "## nx-arangodb\n", 10 | "\n", 11 | "\"Open\n", 12 | "\n", 13 | "\n", 14 | "
\n", 15 | " \"NetworkX\"\n", 16 | " \"ArangoDB\"\n", 17 | " \"RAPIDS\"\n", 18 | " \"NVIDIA\"\n", 19 | "
\n", 20 | "\n", 21 | "This is a [backend to NetworkX](https://networkx.org/documentation/stable/reference/backends.html) that offers [ArangoDB](https://github.com/arangodb/arangodb) as a [Persistence Layer to NetworkX Graphs](https://arangodb.com/introducing-the-arangodb-networkx-persistence-layer/).\n", 22 | "\n", 23 | "Additional Documentation:\n", 24 | "- [NetworkX](https://networkx.org/documentation/stable/)\n", 25 | "- [ArangoDB](https://docs.arangodb.com/stable/)\n", 26 | "- [nx-cugraph](https://docs.rapids.ai/api/cugraph/nightly/nx_cugraph/nx_cugraph/)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": { 32 | "id": "L4nmLCFf3HoC" 33 | }, 34 | "source": [ 35 | "## Package Installation" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "id": "pV0dx8Ny1q64" 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "%%capture\n", 47 | "!pip install nx-arangodb" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": { 53 | "id": "hoyxgmOz3CwF" 54 | }, 55 | "source": [ 56 | "##### Optional: `nx-cugraph`\n", 57 | "\n", 58 | "❗Note that using GPU-accelerated algorithms requires **changing the runtime**❗\n", 59 | "\n", 60 | "`Runtime` --> `Change runtime type` --> `Hardware Accelerator`" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": { 67 | "id": "yoaY4NIo2d1b" 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "!nvidia-smi\n", 72 | "!nvcc --version" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": { 79 | "id": "UNOstqJt27xH" 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "%%capture\n", 84 | "!pip install nx-cugraph-cu12 --extra-index-url https://pypi.nvidia.com # Requires CUDA-capable GPU" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": { 90 | "id": "vrB4FpB63yEF" 91 | }, 92 | "source": [ 93 | "## Setting up ArangoDB" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "id": "IDCtRAyy30Ek" 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "%%capture\n", 105 | "!pip install adb-cloud-connector\n", 106 | "\n", 107 | "# Source: https://github.com/arangodb/adb-cloud-connector" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "id": "368O33d238EL" 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "import os\n", 119 | "import json\n", 120 | "\n", 121 | "from adb_cloud_connector import get_temp_credentials\n", 122 | "\n", 123 | "con = get_temp_credentials()\n", 124 | "\n", 125 | "os.environ[\"DATABASE_HOST\"] = con[\"url\"]\n", 126 | "os.environ[\"DATABASE_USERNAME\"] = con[\"username\"]\n", 127 | "os.environ[\"DATABASE_PASSWORD\"] = con[\"password\"]\n", 128 | "os.environ[\"DATABASE_NAME\"] = con[\"dbName\"]\n", 129 | "\n", 130 | "# Feel free to check out your temporary database!\n", 131 | "print(json.dumps(con, indent=4))" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": { 137 | "id": "5fsZEyyh3F87" 138 | }, 139 | "source": [ 140 | "## Starter (CPU Example)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": { 146 | "id": "p3q0pZJO62Ba" 147 | }, 148 | "source": [ 149 | "Hello World for `nx-arangodb`\n", 150 | "\n", 151 | "Steps breakdown:\n", 152 | "\n", 153 | "1. Using the Environment Variables established above, instantiate an `nxadb.Graph` that is able to connect to our ArangoDB database. In order to create an ArangoDB Graph, the `name` parameter is **required**.\n", 154 | "\n", 155 | "2. Add two nodes, which will be stored in the `node` ArangoDB Vertex Collection, with IDs `node/1` and `node/2`.\n", 156 | "\n", 157 | "3. Add an edge, which will be stored in the `node_to_node` ArangoDB Edge Collection, with an arbitrary ID.\n", 158 | "\n", 159 | "4. Re-instantiate the `nxadb` Graph. Given that the data is persisted in ArangoDB, we can pick up right where we left off.\n", 160 | "\n", 161 | "5. Run an algorithm on the graph, which will pull the data from ArangoDB.\n", 162 | "\n", 163 | "6. Iterate over the pagerank values to store the results back on each node.\n", 164 | "\n", 165 | "7. Clear the *local* cache of the graph.\n", 166 | "\n", 167 | "8. Fetch the node & edge data in ArangoDB\n", 168 | "\n", 169 | "9. Experiment with different ways of fetching a node by key." 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": { 176 | "id": "dfx5vaqx2bWl" 177 | }, 178 | "outputs": [], 179 | "source": [ 180 | "# 1. Create the Graph\n", 181 | "\n", 182 | "import networkx as nx\n", 183 | "import nx_arangodb as nxadb\n", 184 | "\n", 185 | "G = nxadb.Graph(name=\"MyGraph\", default_node_type=\"node\")" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": { 192 | "id": "SnGqBwYO49Kq" 193 | }, 194 | "outputs": [], 195 | "source": [ 196 | "# 2. Add two nodes\n", 197 | "\n", 198 | "G.add_node(1, foo='bar')\n", 199 | "G.add_node(2, bar='foo')" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": { 206 | "id": "eUJP7rNZ5Gqd" 207 | }, 208 | "outputs": [], 209 | "source": [ 210 | "# 3. Add an edge\n", 211 | "\n", 212 | "G.add_edge(1, 2, weight=2)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": { 219 | "id": "O3ThlpALI5G9" 220 | }, 221 | "outputs": [], 222 | "source": [ 223 | "# 4. Re-instantiate the Graph\n", 224 | "\n", 225 | "G = nxadb.Graph(name=\"MyGraph\")\n", 226 | "\n", 227 | "print(G)" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "metadata": { 234 | "id": "5F_vE8Bo5HSW" 235 | }, 236 | "outputs": [], 237 | "source": [ 238 | "# 5. Run an algorithm (CPU)\n", 239 | "res = nx.pagerank(G, use_gpu=False)" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": null, 245 | "metadata": { 246 | "id": "GScGo5Xn5Y1s" 247 | }, 248 | "outputs": [], 249 | "source": [ 250 | "# 6. Persist the results\n", 251 | "\n", 252 | "for k, v in res.items():\n", 253 | " G.nodes[k]['pagerank'] = v" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "metadata": { 260 | "id": "gAou4JwtI8Z3" 261 | }, 262 | "outputs": [], 263 | "source": [ 264 | "# 7. Clear the local cache\n", 265 | "\n", 266 | "G.clear()" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": { 273 | "id": "3hTO5lVg5h_S" 274 | }, 275 | "outputs": [], 276 | "source": [ 277 | "# 8. Observe the persisted results\n", 278 | "\n", 279 | "print(G[1])\n", 280 | "print(G[2])\n", 281 | "print(G[1][2])" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": { 288 | "id": "bL_fwOpt5zzA" 289 | }, 290 | "outputs": [], 291 | "source": [ 292 | "# 9. Experiment with different node keys\n", 293 | "\n", 294 | "print(G.nodes[1] == G.nodes[\"1\"] == G.nodes[\"node/1\"])\n", 295 | "print(G[1][2] == G[\"1\"][2] == G[\"node/1\"][2])" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": { 301 | "id": "VE6lCtDb6KGw" 302 | }, 303 | "source": [ 304 | "## Starter (GPU Example)" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": { 311 | "id": "jgg9PIpp6xhM" 312 | }, 313 | "outputs": [], 314 | "source": [ 315 | "# 1. Create the NetworkX Grid Graph\n", 316 | "\n", 317 | "G_nx = nx.grid_2d_graph(500, 500)\n", 318 | "\n", 319 | "print(G_nx)" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "metadata": { 326 | "id": "z7to3GoyHcrC" 327 | }, 328 | "outputs": [], 329 | "source": [ 330 | "# 2. Create the ArangoDB Grid Graph\n", 331 | "\n", 332 | "G = nxadb.Graph(incoming_graph_data=G_nx, name=\"Grid\")" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": null, 338 | "metadata": { 339 | "id": "K4vqYD-zJBwW" 340 | }, 341 | "outputs": [], 342 | "source": [ 343 | "# 3. Re-instantiate the Graph\n", 344 | "\n", 345 | "G = nxadb.Graph(name=\"Grid\")" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": null, 351 | "metadata": { 352 | "id": "hnYZc8daIhHR" 353 | }, 354 | "outputs": [], 355 | "source": [ 356 | "# 4. Run an algorithm (GPU)\n", 357 | "# See *Package Installation* to install nx-cugraph ^\n", 358 | "res = nx.pagerank(G)" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "metadata": { 365 | "id": "qIMJ2Z9_IyTz" 366 | }, 367 | "outputs": [], 368 | "source": [ 369 | "# 5. Run another algorithm (GPU, cached)\n", 370 | "\n", 371 | "res_2 = nx.community.louvain_communities(G)" 372 | ] 373 | } 374 | ], 375 | "metadata": { 376 | "accelerator": "GPU", 377 | "colab": { 378 | "collapsed_sections": [ 379 | "L4nmLCFf3HoC", 380 | "hoyxgmOz3CwF", 381 | "vrB4FpB63yEF", 382 | "5fsZEyyh3F87", 383 | "VE6lCtDb6KGw" 384 | ], 385 | "gpuType": "T4", 386 | "provenance": [] 387 | }, 388 | "kernelspec": { 389 | "display_name": "Python 3", 390 | "name": "python3" 391 | }, 392 | "language_info": { 393 | "name": "python" 394 | } 395 | }, 396 | "nbformat": 4, 397 | "nbformat_minor": 0 398 | } 399 | -------------------------------------------------------------------------------- /doc/quickstart.rst: -------------------------------------------------------------------------------- 1 | Quickstart 2 | ========== 3 | 4 | 1. Set up ArangoDB 5 | 2. Set environment variables 6 | 3. Instantiate a NetworkX-ArangoDB Graph 7 | 8 | 1. Set up ArangoDB 9 | ------------------ 10 | 11 | **Option A: Local Instance via Docker** 12 | 13 | Appears on ``localhost:8529`` with the user ``root`` & password ``openSesame``. 14 | 15 | More info: `arangodb.com/download-major `_. 16 | 17 | .. code-block:: bash 18 | 19 | docker run -e ARANGO_ROOT_PASSWORD=openSesame -p 8529:8529 arangodb/arangodb 20 | 21 | **Option B: ArangoDB Cloud Trial** 22 | 23 | `ArangoGraph `_ is ArangoDB's Cloud offering to use ArangoDB as a managed service. 24 | 25 | A 14-day trial is available upon sign up. 26 | 27 | **Option C: Temporary Cloud Instance via Python** 28 | 29 | A temporary cloud database can be provisioned using the `adb-cloud-connector `_ Python package. 30 | 31 | .. code-block:: bash 32 | 33 | pip install adb-cloud-connector 34 | 35 | .. code-block:: python 36 | 37 | from adb_cloud_connector import get_temp_credentials 38 | 39 | credentials = get_temp_credentials() 40 | 41 | print(credentials) 42 | 43 | 2. Set environment variables 44 | ---------------------------- 45 | 46 | Connecting to ArangoDB requires the following environment variables: 47 | 48 | 1. ``DATABASE_HOST``: The host URL of the ArangoDB instance. 49 | 2. ``DATABASE_USERNAME``: The username to connect to the ArangoDB instance. 50 | 3. ``DATABASE_PASSWORD``: The password to connect to the ArangoDB instance. 51 | 4. ``DATABASE_NAME``: The name of the database to connect to. 52 | 53 | For example, using Option 1 from above: 54 | 55 | .. code-block:: bash 56 | 57 | export DATABASE_HOST=http://localhost:8529 58 | export DATABASE_USERNAME=root 59 | export DATABASE_PASSWORD=openSesame 60 | export DATABASE_NAME=_system 61 | 62 | Or using Option 3 from above: 63 | 64 | .. code-block:: python 65 | 66 | import os 67 | from adb_cloud_connector import get_temp_credentials 68 | 69 | credentials = get_temp_credentials() 70 | 71 | os.environ["DATABASE_HOST"] = credentials["url"] 72 | os.environ["DATABASE_USERNAME"] = credentials["username"] 73 | os.environ["DATABASE_PASSWORD"] = credentials["password"] 74 | os.environ["DATABASE_NAME"] = credentials["dbName"] 75 | 76 | 3. Instantiate a NetworkX-ArangoDB Graph 77 | ---------------------------------------- 78 | 79 | Instantiating a NetworkX-ArangoDB Graph is similar to instantiating a NetworkX Graph. 80 | 81 | Providing the ``name`` parameter will create a new graph in ArangoDB if it does not already exist. 82 | 83 | Providing the ``incoming_graph_data`` in combination with the ``name`` parameter will create a new graph in ArangoDB 84 | with the provided data. If the graph already exists, an error will be raised. 85 | 86 | .. code-block:: python 87 | 88 | import networkx as nx 89 | import nx_arangodb as nxadb 90 | 91 | G = nxadb.Graph(name="MyGraph") # New ArangoDB Graph 92 | G2 = nxadb.Graph(incoming_graph_data=nx.karate_club_graph()) # Regular NetworkX Graph 93 | G3 = nxadb.Graph(incoming_graph_data=nx.karate_club_graph(), name="KarateGraph") # New ArangoDB Graph 94 | 95 | From here, you can use the conventional NetworkX API to interact with the graph. 96 | 97 | Assuming you already have a graph in ArangoDB named `MyGraph`, you can reload it as follows: 98 | 99 | .. code-block:: python 100 | 101 | import nx_arangodb as nxadb 102 | 103 | G = nxadb.Graph(name="MyGraph") 104 | 105 | print(G.number_of_nodes(), G.number_of_edges()) 106 | -------------------------------------------------------------------------------- /doc/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | sphinx_rtd_theme 3 | networkx 4 | nx-arangodb -------------------------------------------------------------------------------- /doc/views/coreviews.rst: -------------------------------------------------------------------------------- 1 | .. _coreviews: 2 | 3 | ========= 4 | Coreviews 5 | ========= 6 | 7 | 8 | .. currentmodule:: nx_arangodb.classes.coreviews 9 | .. autoclass:: ArangoAdjacencyView 10 | :members: 11 | 12 | .. currentmodule:: nx_arangodb.classes.coreviews 13 | .. autoclass:: ArangoAtlasView 14 | :members: 15 | -------------------------------------------------------------------------------- /doc/views/index.rst: -------------------------------------------------------------------------------- 1 | .. _views: 2 | 3 | ************** 4 | ArangoDB Views 5 | ************** 6 | 7 | Having a database as a backend to NetworkX allows us to delegate 8 | certain operations to the database. 9 | 10 | This can be applied to the concept of NetworkX Views. 11 | 12 | Below are a set of experimental overrides of the NetworkX Views that represent the 13 | nodes and edges of the graph. Overriding these classes allows us to 14 | implement custom logic for data filtering and updating in the database. 15 | 16 | These classes are a work-in-progress. The main goal is to try 17 | to delegate data processing to ArangoDB, whenever possible. 18 | 19 | To use these experimental views, you must set **use_arango_views=True** 20 | when creating a new graph object: 21 | 22 | .. code-block:: python 23 | 24 | import nx_arangodb as nxadb 25 | 26 | G = nxadb.Graph(name="MyGraph", use_arango_views=True) 27 | 28 | 29 | .. toctree:: 30 | :maxdepth: 1 31 | 32 | coreviews 33 | reportviews -------------------------------------------------------------------------------- /doc/views/reportviews.rst: -------------------------------------------------------------------------------- 1 | .. _reportviews: 2 | 3 | =========== 4 | Reportviews 5 | =========== 6 | 7 | 8 | .. currentmodule:: nx_arangodb.classes.reportviews 9 | .. autoclass:: ArangoNodeView 10 | :members: 11 | 12 | .. currentmodule:: nx_arangodb.classes.reportviews 13 | .. autoclass:: ArangoNodeDataView 14 | :members: 15 | 16 | .. currentmodule:: nx_arangodb.classes.reportviews 17 | .. autoclass:: ArangoEdgeView 18 | :members: 19 | 20 | .. currentmodule:: nx_arangodb.classes.reportviews 21 | .. autoclass:: ArangoEdgeDataView 22 | :members: -------------------------------------------------------------------------------- /nx_arangodb/__init__.py: -------------------------------------------------------------------------------- 1 | from networkx.exception import * 2 | 3 | from . import utils 4 | 5 | from . import classes 6 | from .classes import * 7 | 8 | from . import convert 9 | from .convert import * 10 | 11 | from . import algorithms 12 | from .algorithms import * 13 | 14 | from .logger import logger 15 | 16 | from _nx_arangodb._version import __git_commit__, __version__ 17 | -------------------------------------------------------------------------------- /nx_arangodb/algorithms/README.md: -------------------------------------------------------------------------------- 1 | # algorithms 2 | 3 | This is an experimental module seeking to provide server-side algorithms for `nx-arangodb` Graphs. The goal is to provide a set of algorithms that can be delegated to the server for processing, rather than having to pull all the data to the client and process it there. 4 | 5 | Currently, the module is in a very early stage and only provides a single algorithm: `shortest_path`. This is simply to demonstrate the potential of the module and to provide a starting point for further development. 6 | 7 | ```python 8 | import os 9 | import networkx as nx 10 | from nx_arangodb as nxadb 11 | 12 | # os.environ ... 13 | 14 | G = nxadb.Graph(name="MyGraph") 15 | 16 | nx.pagerank(G) # Runs on the client 17 | nx.shortest_path(G, source="A", target="B") # Runs on the DB server 18 | nx.shortest_path.orig_func(G, source="A", target="B") # Runs on the client 19 | ``` 20 | 21 | As ArangoDB continues to grow its Graph Analytics capabilities, this module will be updated to take advantage of those features. Stay tuned! -------------------------------------------------------------------------------- /nx_arangodb/algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | from . import shortest_paths 2 | from .shortest_paths import * 3 | -------------------------------------------------------------------------------- /nx_arangodb/algorithms/shortest_paths/__init__.py: -------------------------------------------------------------------------------- 1 | from .generic import * 2 | -------------------------------------------------------------------------------- /nx_arangodb/algorithms/shortest_paths/generic.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | # NOTE: NetworkX algorithms are not typed 3 | 4 | import networkx as nx 5 | 6 | import nx_arangodb as nxadb 7 | from nx_arangodb.utils import _dtype_param, networkx_algorithm 8 | 9 | __all__ = ["shortest_path"] 10 | 11 | 12 | @networkx_algorithm( 13 | extra_params=_dtype_param, version_added="24.04", _plc={"bfs", "sssp"} 14 | ) 15 | def shortest_path( 16 | G: nxadb.Graph, 17 | source=None, 18 | target=None, 19 | weight=None, 20 | method="dijkstra", 21 | *, 22 | dtype=None, 23 | ): 24 | """A server-side implementation of the nx.shortest_path algorithm. 25 | 26 | This algorithm will invoke the original NetworkX algorithm if one 27 | of the following conditions is met: 28 | - The graph is not stored in the database. 29 | - The method is not 'dijkstra'. 30 | - The target or source is not specified. 31 | 32 | Parameters 33 | ---------- 34 | G : NetworkX graph 35 | 36 | source : node, optional 37 | Starting node for path. If not specified, compute shortest 38 | paths for each possible starting node. 39 | 40 | target : node, optional 41 | Ending node for path. If not specified, compute shortest 42 | paths to all possible nodes. 43 | 44 | weight : None, string or function, optional (default = None) 45 | If None, every edge has weight/distance/cost 1. 46 | If a string, use this edge attribute as the edge weight. 47 | Any edge attribute not present defaults to 1. 48 | If this is a function, the weight of an edge is the value 49 | returned by the function. The function must accept exactly 50 | three positional arguments: the two endpoints of an edge and 51 | the dictionary of edge attributes for that edge. 52 | The function must return a number. 53 | 54 | method : string, optional (default = 'dijkstra') 55 | The algorithm to use to compute the path. 56 | Supported options: 'dijkstra', 'bellman-ford'. 57 | Other inputs produce a ValueError. 58 | If `weight` is None, unweighted graph methods are used, and this 59 | suggestion is ignored. 60 | 61 | Returns 62 | ------- 63 | path : list 64 | List of nodes in a shortest path. 65 | 66 | Raises 67 | ------ 68 | NodeNotFound 69 | If `source` is not in `G`. 70 | 71 | ValueError 72 | If `method` is not among the supported options. 73 | """ 74 | 75 | graph_does_not_exist = not G.graph_exists_in_db 76 | target_or_source_not_specified = target is None or source is None 77 | method_not_dijkstra = method != "dijkstra" 78 | 79 | if any([graph_does_not_exist, target_or_source_not_specified, method_not_dijkstra]): 80 | return nx.shortest_path.orig_func( 81 | G, source=source, target=target, weight=weight, method=method 82 | ) 83 | 84 | if isinstance(source, int): 85 | source = G.nodes[source]["_id"] 86 | 87 | if isinstance(target, int): 88 | target = G.nodes[target]["_id"] 89 | 90 | query = """ 91 | FOR vertex IN ANY SHORTEST_PATH @source TO @target GRAPH @graph 92 | OPTIONS {'weightAttribute': @weight} 93 | RETURN vertex._id 94 | """ 95 | 96 | bind_vars = { 97 | "source": source, 98 | "target": target, 99 | "graph": G.name, 100 | "weight": weight, 101 | } 102 | 103 | result = list(G.query(query, bind_vars=bind_vars)) 104 | 105 | if not result: 106 | raise nx.NodeNotFound(f"Either source {source} or target {target} is not in G") 107 | 108 | return result 109 | -------------------------------------------------------------------------------- /nx_arangodb/classes/__init__.py: -------------------------------------------------------------------------------- 1 | from .graph import Graph 2 | from .digraph import DiGraph 3 | from .multigraph import MultiGraph 4 | from .multidigraph import MultiDiGraph 5 | -------------------------------------------------------------------------------- /nx_arangodb/classes/coreviews.py: -------------------------------------------------------------------------------- 1 | """Experimental overrides of the NetworkX Views that represent the 2 | core data structures such as nested Mappings (e.g. dict-of-dicts). 3 | 4 | Overriding these classes allows us to implement custom logic for 5 | data filtering and updating in the database, instead of in Python. 6 | 7 | These classes are a work-in-progress. The main goal is to try 8 | to delegate data processing to ArangoDB, whenever possible. 9 | 10 | To use these experimental views, you must set **use_arango_views=True** 11 | when creating a new graph object: 12 | >>> G = nxadb.Graph(name="MyGraph", use_arango_views=True) 13 | """ 14 | 15 | import networkx as nx 16 | 17 | 18 | class ArangoAdjacencyView(nx.classes.coreviews.AdjacencyView): 19 | """The ArangoAdjacencyView class is an experimental subclass of 20 | the AdjacencyView class. 21 | 22 | Contrary to the original AdjacencyView class, the ArangoAdjacencyView 23 | is writable to allow for bulk updates to the graph in the DB. 24 | """ 25 | 26 | def update(self, data): 27 | """Update a set of edges within the graph. 28 | 29 | The benefit of this method is that it allows for bulk API updates, 30 | as opposed to `G.add_edges_from`, which currently makes 31 | one API request per edge. 32 | 33 | Example 34 | ------- 35 | >>> G = nxadb.Graph(name="MyGraph") 36 | >>> G.adj.update( 37 | { 38 | 'node/1': { 39 | 'node/2': {"node_to_node/1", "foo": "bar"}, 40 | 'node/3': {"node_to_node/2", "foo": "baz"}, 41 | ... 42 | }, 43 | ... 44 | }) 45 | """ 46 | return self._atlas.update(data) 47 | 48 | def __getitem__(self, name): 49 | return ArangoAtlasView(self._atlas[name]) 50 | 51 | 52 | class ArangoAtlasView(nx.classes.coreviews.AtlasView): 53 | """The ArangoAtlasView class is an experimental subclass of the 54 | AtlasView class. 55 | 56 | Contrary to the original AtlasView class, the ArangoAtlasView is 57 | writable to allow for bulk updates to the graph in the DB. 58 | """ 59 | 60 | def update(self, data): 61 | """Update a set of edges within the graph for a specific node. 62 | 63 | Example 64 | ------- 65 | >>> G = nxadb.Graph(name="MyGraph") 66 | >>> G.adj['node/1'].update( 67 | { 68 | 'node/2': {"node_to_node/1", "foo": "bar"}, 69 | 'node/3': {"node_to_node/2", "foo": "baz"}, 70 | ... 71 | }) 72 | """ 73 | return self._atlas.update(data) 74 | -------------------------------------------------------------------------------- /nx_arangodb/classes/dict/README.md: -------------------------------------------------------------------------------- 1 | # dict 2 | 3 | The `dict` module provides a set of `UserDict`-based classes that extend the traditional dictionary functionality to maintain a remote connection to an ArangoDB Database. 4 | 5 | NetworkX Graphs rely on dictionary-based structures to store their data, which are defined by their factory functions: 6 | 7 | 1. `node_dict_factory` 8 | 2. `node_attr_dict_factory` 9 | 3. `adjlist_outer_dict_factory` 10 | 4. `adjlist_inner_dict_factory` 11 | 5. `edge_key_dict_factory` (Only for MultiGraphs) 12 | 5. `edge_attr_dict_factory` 13 | 6. `graph_attr_dict_factory` 14 | 15 | These factories are used to create the dictionaries that store the data of the nodes, edges, and the graph itself. 16 | 17 | This module contains the following classes: 18 | 19 | 1. `NodeDict` 20 | 2. `NodeAttrDict` 21 | 3. `AdjListOuterDict` 22 | 4. `AdjListInnerDict` 23 | 5. `EdgeKeyDict` 24 | 6. `EdgeAttrDict` 25 | 7. `GraphDict` 26 | 8. `GraphAttrDict` 27 | 28 | Each class extends the functionality of the corresponding dictionary factory by adding methods to interact with the data in ArangoDB. Think of it as a CRUD interface for ArangoDB. This is done by overriding the primary dunder methods of the `UserDict` class. 29 | 30 | By using this strategy in addition to subclassing the `nx.Graph` class, we're able to preserve the original functionality of the NetworkX Graphs while adding ArangoDB support. -------------------------------------------------------------------------------- /nx_arangodb/classes/dict/__init__.py: -------------------------------------------------------------------------------- 1 | from .adj import ( # noqa 2 | adjlist_inner_dict_factory, 3 | adjlist_outer_dict_factory, 4 | edge_attr_dict_factory, 5 | edge_key_dict_factory, 6 | ) 7 | from .graph import graph_attr_dict_factory, graph_dict_factory # noqa 8 | from .node import node_attr_dict_factory, node_dict_factory # noqa 9 | -------------------------------------------------------------------------------- /nx_arangodb/classes/dict/graph.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import os 4 | from collections import UserDict 5 | from typing import Any, Callable 6 | 7 | from arango.database import StandardDatabase 8 | from arango.graph import Graph 9 | 10 | from ..function import ( 11 | aql_doc_get_key, 12 | aql_doc_has_key, 13 | create_collection, 14 | doc_get_or_insert, 15 | doc_update, 16 | get_update_dict, 17 | json_serializable, 18 | key_is_not_reserved, 19 | key_is_string, 20 | ) 21 | 22 | ############# 23 | # Factories # 24 | ############# 25 | 26 | 27 | def graph_dict_factory(db: StandardDatabase, graph: Graph) -> Callable[..., GraphDict]: 28 | """Factory function for creating a GraphDict.""" 29 | return lambda: GraphDict(db, graph) 30 | 31 | 32 | def graph_attr_dict_factory( 33 | db: StandardDatabase, graph: Graph, graph_id: str 34 | ) -> Callable[..., GraphAttrDict]: 35 | """Factory function for creating a GraphAttrDict.""" 36 | return lambda: GraphAttrDict(db, graph, graph_id) 37 | 38 | 39 | ######### 40 | # Graph # 41 | ######### 42 | 43 | GRAPH_FIELD = "networkx" 44 | 45 | 46 | def build_graph_attr_dict_data( 47 | parent: GraphAttrDict, data: dict[str, Any] 48 | ) -> dict[str, Any | GraphAttrDict]: 49 | """Recursively build an GraphAttrDict from a dict. 50 | 51 | It's possible that **value** is a nested dict, so we need to 52 | recursively build a GraphAttrDict for each nested dict. 53 | 54 | Parameters 55 | ---------- 56 | parent : GraphAttrDict 57 | The parent GraphAttrDict. 58 | data : dict[str, Any] 59 | The data to build the GraphAttrDict from. 60 | 61 | Returns 62 | ------- 63 | dict[str, Any | GraphAttrDict] 64 | The data for the new GraphAttrDict. 65 | """ 66 | graph_attr_dict_data = {} 67 | for key, value in data.items(): 68 | graph_attr_dict_value = process_graph_attr_dict_value(parent, key, value) 69 | graph_attr_dict_data[key] = graph_attr_dict_value 70 | 71 | return graph_attr_dict_data 72 | 73 | 74 | def process_graph_attr_dict_value(parent: GraphAttrDict, key: str, value: Any) -> Any: 75 | """Process the value of a particular key in an GraphAttrDict. 76 | 77 | If the value is a dict, then we need to recursively build an GraphAttrDict. 78 | Otherwise, we return the value as is. 79 | 80 | Parameters 81 | ---------- 82 | parent : GraphAttrDict 83 | The parent GraphAttrDict. 84 | key : str 85 | The key of the value. 86 | value : Any 87 | The value to process. 88 | 89 | Returns 90 | ------- 91 | Any 92 | The processed value. 93 | """ 94 | if not isinstance(value, dict): 95 | return value 96 | 97 | graph_attr_dict = parent.graph_attr_dict_factory() 98 | graph_attr_dict.parent_keys = parent.parent_keys + [key] 99 | graph_attr_dict.data = build_graph_attr_dict_data(graph_attr_dict, value) 100 | 101 | return graph_attr_dict 102 | 103 | 104 | class GraphDict(UserDict[str, Any]): 105 | """A dictionary-like object for storing graph attributes. 106 | 107 | Given that ArangoDB does not have a concept of graph attributes, this class 108 | stores the attributes in a collection with the graph name as the document key. 109 | 110 | The default collection is called `_graphs`. However, if the 111 | `DATABASE_GRAPH_COLLECTION` environment variable is specified, 112 | then that collection will be used. This variable is useful when the 113 | database user does not have permission to access the `_graphs` 114 | system collection. 115 | 116 | Parameters 117 | ---------- 118 | db : arango.database.StandardDatabase 119 | The ArangoDB database. 120 | 121 | graph : arango.graph.Graph 122 | The ArangoDB graph. 123 | 124 | Example 125 | ------- 126 | >>> G = nxadb.Graph(name='MyGraph', foo='bar') 127 | >>> G.graph['foo'] 128 | 'bar' 129 | >>> G.graph['foo'] = 'baz' 130 | >>> del G.graph['foo'] 131 | """ 132 | 133 | def __init__( 134 | self, 135 | db: StandardDatabase, 136 | graph: Graph, 137 | *args: Any, 138 | **kwargs: Any, 139 | ): 140 | super().__init__(*args, **kwargs) 141 | self.data: dict[str, Any] = {} 142 | 143 | self.db = db 144 | self.adb_graph = graph 145 | self.graph_name = graph.name 146 | self.collection_name = os.environ.get("DATABASE_GRAPH_COLLECTION", "_graphs") 147 | 148 | self.graph_id = f"{self.collection_name}/{self.graph_name}" 149 | self.parent_keys = [GRAPH_FIELD] 150 | 151 | self.collection = create_collection(db, self.collection_name) 152 | self.graph_attr_dict_factory = graph_attr_dict_factory( 153 | self.db, self.adb_graph, self.graph_id 154 | ) 155 | 156 | result = doc_get_or_insert(self.db, self.collection_name, self.graph_id) 157 | for k, v in result.get(GRAPH_FIELD, {}).items(): 158 | self.data[k] = self.__process_graph_dict_value(k, v) 159 | 160 | def __process_graph_dict_value(self, key: str, value: Any) -> Any: 161 | if not isinstance(value, dict): 162 | return value 163 | 164 | graph_attr_dict = self.graph_attr_dict_factory() 165 | graph_attr_dict.parent_keys += [key] 166 | graph_attr_dict.data = build_graph_attr_dict_data(graph_attr_dict, value) 167 | 168 | return graph_attr_dict 169 | 170 | @key_is_string 171 | def __contains__(self, key: str) -> bool: 172 | """'foo' in G.graph""" 173 | if key in self.data: 174 | return True 175 | 176 | return aql_doc_has_key(self.db, self.graph_id, key, self.parent_keys) 177 | 178 | @key_is_string 179 | def __getitem__(self, key: str) -> Any: 180 | """G.graph['foo']""" 181 | 182 | if value := self.data.get(key): 183 | return value 184 | 185 | result = aql_doc_get_key(self.db, self.graph_id, key, self.parent_keys) 186 | 187 | if result is None: 188 | raise KeyError(key) 189 | 190 | graph_dict_value = self.__process_graph_dict_value(key, result) 191 | self.data[key] = graph_dict_value 192 | 193 | return graph_dict_value 194 | 195 | @key_is_string 196 | @key_is_not_reserved 197 | def __setitem__(self, key: str, value: Any) -> None: 198 | """G.graph['foo'] = 'bar'""" 199 | if value is None: 200 | self.__delitem__(key) 201 | return 202 | 203 | graph_dict_value = self.__process_graph_dict_value(key, value) 204 | self.data[key] = graph_dict_value 205 | 206 | update_dict = get_update_dict(self.parent_keys, {key: value}) 207 | doc_update(self.db, self.graph_id, update_dict) 208 | 209 | @key_is_string 210 | @key_is_not_reserved 211 | def __delitem__(self, key: str) -> None: 212 | """del G.graph['foo']""" 213 | self.data.pop(key, None) 214 | update_dict = get_update_dict(self.parent_keys, {key: None}) 215 | doc_update(self.db, self.graph_id, update_dict) 216 | 217 | # @values_are_json_serializable # TODO? 218 | def update(self, attrs: Any) -> None: # type: ignore 219 | """G.graph.update({'foo': 'bar'})""" 220 | 221 | if not attrs: 222 | return 223 | 224 | graph_attr_dict = self.graph_attr_dict_factory() 225 | graph_attr_dict_data = build_graph_attr_dict_data(graph_attr_dict, attrs) 226 | graph_attr_dict.data = graph_attr_dict_data 227 | 228 | self.data.update(graph_attr_dict_data) 229 | update_dict = get_update_dict(self.parent_keys, attrs) 230 | doc_update(self.db, self.graph_id, update_dict) 231 | 232 | def clear(self) -> None: 233 | """G.graph.clear()""" 234 | self.data.clear() 235 | 236 | 237 | @json_serializable 238 | class GraphAttrDict(UserDict[str, Any]): 239 | """The inner-level of the dict of dict structure 240 | representing the attributes of a graph stored in the database. 241 | 242 | Only used if the value associated with a GraphDict key is a dict. 243 | 244 | Parameters 245 | ---------- 246 | db : arango.database.StandardDatabase 247 | The ArangoDB database. 248 | 249 | graph : arango.graph.Graph 250 | The ArangoDB graph. 251 | 252 | graph_id : str 253 | The ArangoDB document ID of the graph. 254 | 255 | Example 256 | ------- 257 | >>> G = nxadb.Graph(name='MyGraph', foo={'bar': 'baz'}) 258 | >>> G.graph['foo']['bar'] 259 | 'baz' 260 | >>> G.graph['foo']['bar'] = 'qux' 261 | """ 262 | 263 | def __init__( 264 | self, 265 | db: StandardDatabase, 266 | graph: Graph, 267 | graph_id: str, 268 | *args: Any, 269 | **kwargs: Any, 270 | ): 271 | super().__init__(*args, **kwargs) 272 | self.data: dict[str, Any] = {} 273 | 274 | self.db = db 275 | self.graph = graph 276 | self.graph_id: str = graph_id 277 | 278 | self.parent_keys: list[str] = [GRAPH_FIELD] 279 | self.graph_attr_dict_factory = graph_attr_dict_factory( 280 | self.db, self.graph, self.graph_id 281 | ) 282 | 283 | def clear(self) -> None: 284 | raise NotImplementedError("Cannot clear GraphAttrDict") 285 | 286 | @key_is_string 287 | def __contains__(self, key: str) -> bool: 288 | """'bar' in G.graph['foo']""" 289 | if key in self.data: 290 | return True 291 | 292 | return aql_doc_has_key(self.db, self.graph.name, key, self.parent_keys) 293 | 294 | @key_is_string 295 | def __getitem__(self, key: str) -> Any: 296 | """G.graph['foo']['bar']""" 297 | 298 | if value := self.data.get(key): 299 | return value 300 | 301 | result = aql_doc_get_key(self.db, self.graph_id, key, self.parent_keys) 302 | 303 | if result is None: 304 | raise KeyError(key) 305 | 306 | graph_attr_dict_value = process_graph_attr_dict_value(self, key, result) 307 | self.data[key] = graph_attr_dict_value 308 | 309 | return graph_attr_dict_value 310 | 311 | @key_is_string 312 | def __setitem__(self, key, value): 313 | """ 314 | G.graph['foo'] = 'bar' 315 | G.graph['object'] = {'foo': 'bar'} 316 | G._node['object']['foo'] = 'baz' 317 | """ 318 | if value is None: 319 | self.__delitem__(key) 320 | return 321 | 322 | graph_attr_dict_value = process_graph_attr_dict_value(self, key, value) 323 | update_dict = get_update_dict(self.parent_keys, {key: value}) 324 | self.data[key] = graph_attr_dict_value 325 | doc_update(self.db, self.graph_id, update_dict) 326 | 327 | @key_is_string 328 | def __delitem__(self, key): 329 | """del G.graph['foo']['bar']""" 330 | self.data.pop(key, None) 331 | update_dict = get_update_dict(self.parent_keys, {key: None}) 332 | doc_update(self.db, self.graph_id, update_dict) 333 | 334 | def update(self, attrs: Any) -> None: # type: ignore 335 | """G.graph['foo'].update({'bar': 'baz'})""" 336 | if not attrs: 337 | return 338 | 339 | self.data.update(build_graph_attr_dict_data(self, attrs)) 340 | updated_dict = get_update_dict(self.parent_keys, attrs) 341 | doc_update(self.db, self.graph_id, updated_dict) 342 | -------------------------------------------------------------------------------- /nx_arangodb/classes/dict/node.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections import UserDict 4 | from collections.abc import Iterator 5 | from typing import Any, Callable 6 | 7 | from arango.database import StandardDatabase 8 | from arango.graph import Graph 9 | 10 | from nx_arangodb.logger import logger 11 | 12 | from ..function import ( 13 | ArangoDBBatchError, 14 | aql, 15 | aql_doc_get_key, 16 | aql_doc_has_key, 17 | aql_fetch_data, 18 | check_update_list_for_errors, 19 | doc_delete, 20 | doc_insert, 21 | doc_update, 22 | edges_delete, 23 | get_arangodb_graph, 24 | get_node_id, 25 | get_node_type_and_id, 26 | get_update_dict, 27 | json_serializable, 28 | key_is_not_reserved, 29 | key_is_string, 30 | keys_are_not_reserved, 31 | keys_are_strings, 32 | separate_nodes_by_collections, 33 | upsert_collection_documents, 34 | vertex_get, 35 | ) 36 | 37 | ############# 38 | # Factories # 39 | ############# 40 | 41 | 42 | def node_dict_factory( 43 | db: StandardDatabase, 44 | graph: Graph, 45 | default_node_type: str, 46 | read_parallelism: int, 47 | read_batch_size: int, 48 | ) -> Callable[..., NodeDict]: 49 | """Factory function for creating a NodeDict.""" 50 | return lambda: NodeDict( 51 | db, 52 | graph, 53 | default_node_type, 54 | read_parallelism, 55 | read_batch_size, 56 | ) 57 | 58 | 59 | def node_attr_dict_factory( 60 | db: StandardDatabase, graph: Graph 61 | ) -> Callable[..., NodeAttrDict]: 62 | """Factory function for creating a NodeAttrDict.""" 63 | return lambda: NodeAttrDict(db, graph) 64 | 65 | 66 | ######## 67 | # Node # 68 | ######## 69 | 70 | 71 | def build_node_attr_dict_data( 72 | parent: NodeAttrDict, data: dict[str, Any] 73 | ) -> dict[str, Any | NodeAttrDict]: 74 | """Recursively build a NodeAttrDict from a dict. 75 | 76 | It's possible that **value** is a nested dict, so we need to 77 | recursively build a NodeAttrDict for each nested dict. 78 | 79 | Parameters 80 | ---------- 81 | parent : NodeAttrDict 82 | The parent NodeAttrDict. 83 | data : dict[str, Any] 84 | The data to build the NodeAttrDict from. 85 | 86 | Returns 87 | ------- 88 | dict[str, Any | NodeAttrDict] 89 | The data for the new NodeAttrDict. 90 | """ 91 | node_attr_dict_data = {} 92 | for key, value in data.items(): 93 | node_attr_dict_value = process_node_attr_dict_value(parent, key, value) 94 | node_attr_dict_data[key] = node_attr_dict_value 95 | 96 | return node_attr_dict_data 97 | 98 | 99 | def process_node_attr_dict_value(parent: NodeAttrDict, key: str, value: Any) -> Any: 100 | """Process the value of a particular key in a NodeAttrDict. 101 | 102 | If the value is a dict, then we need to recursively build an NodeAttrDict. 103 | Otherwise, we return the value as is. 104 | 105 | Parameters 106 | ---------- 107 | parent : NodeAttrDict 108 | The parent NodeAttrDict. 109 | key : str 110 | The key of the value. 111 | value : Any 112 | The value to process. 113 | 114 | Returns 115 | ------- 116 | Any 117 | The processed value. 118 | """ 119 | if not isinstance(value, dict): 120 | return value 121 | 122 | node_attr_dict = parent.node_attr_dict_factory() 123 | node_attr_dict.node_id = parent.node_id 124 | node_attr_dict.parent_keys = parent.parent_keys + [key] 125 | node_attr_dict.data = build_node_attr_dict_data(node_attr_dict, value) 126 | 127 | return node_attr_dict 128 | 129 | 130 | @json_serializable 131 | class NodeAttrDict(UserDict[str, Any]): 132 | """The inner-level of the dict of dict structure 133 | representing the nodes (vertices) of a graph. 134 | 135 | Parameters 136 | ---------- 137 | db : arango.database.StandardDatabase 138 | The ArangoDB database. 139 | 140 | graph : arango.graph.Graph 141 | The ArangoDB graph object. 142 | 143 | Example 144 | ------- 145 | >>> G = nxadb.Graph("MyGraph") 146 | >>> G.add_node('node/1', foo='bar') 147 | >>> G.nodes['node/1']['foo'] 148 | 'bar' 149 | """ 150 | 151 | def __init__(self, db: StandardDatabase, graph: Graph, *args: Any, **kwargs: Any): 152 | super().__init__(*args, **kwargs) 153 | self.data: dict[str, Any] = {} 154 | 155 | self.db = db 156 | self.graph = graph 157 | self.node_id: str | None = None 158 | 159 | # NodeAttrDict may be a child of another NodeAttrDict 160 | # e.g G._node['node/1']['object']['foo'] = 'bar' 161 | # In this case, **parent_keys** would be ['object'] 162 | self.parent_keys: list[str] = [] 163 | self.node_attr_dict_factory = node_attr_dict_factory(self.db, self.graph) 164 | 165 | def clear(self) -> None: 166 | raise NotImplementedError("Cannot clear NodeAttrDict") 167 | 168 | def copy(self) -> Any: 169 | return self.data.copy() 170 | 171 | @key_is_string 172 | def __contains__(self, key: str) -> bool: 173 | """'foo' in G._node['node/1']""" 174 | if key in self.data: 175 | return True 176 | 177 | assert self.node_id 178 | result: bool = aql_doc_has_key(self.db, self.node_id, key, self.parent_keys) 179 | return result 180 | 181 | @key_is_string 182 | def __getitem__(self, key: str) -> Any: 183 | """G._node['node/1']['foo']""" 184 | if key in self.data: 185 | return self.data[key] 186 | 187 | assert self.node_id 188 | result = aql_doc_get_key(self.db, self.node_id, key, self.parent_keys) 189 | 190 | if result is None: 191 | raise KeyError(key) 192 | 193 | node_attr_dict_value = process_node_attr_dict_value(self, key, result) 194 | self.data[key] = node_attr_dict_value 195 | 196 | return node_attr_dict_value 197 | 198 | @key_is_string 199 | @key_is_not_reserved 200 | # @value_is_json_serializable # TODO? 201 | def __setitem__(self, key: str, value: Any) -> None: 202 | """ 203 | G._node['node/1']['foo'] = 'bar' 204 | G._node['node/1']['object'] = {'foo': 'bar'} 205 | G._node['node/1']['object']['foo'] = 'baz' 206 | """ 207 | if value is None: 208 | self.__delitem__(key) 209 | return 210 | 211 | assert self.node_id 212 | node_attr_dict_value = process_node_attr_dict_value(self, key, value) 213 | update_dict = get_update_dict(self.parent_keys, {key: value}) 214 | self.data[key] = node_attr_dict_value 215 | doc_update(self.db, self.node_id, update_dict) 216 | 217 | @key_is_string 218 | @key_is_not_reserved 219 | def __delitem__(self, key: str) -> None: 220 | """del G._node['node/1']['foo']""" 221 | assert self.node_id 222 | self.data.pop(key, None) 223 | update_dict = get_update_dict(self.parent_keys, {key: None}) 224 | doc_update(self.db, self.node_id, update_dict) 225 | 226 | @keys_are_strings 227 | @keys_are_not_reserved 228 | # @values_are_json_serializable # TODO? 229 | def update(self, attrs: Any) -> None: 230 | """G._node['node/1'].update({'foo': 'bar'})""" 231 | if not attrs: 232 | return 233 | 234 | node_attr_dict_data = build_node_attr_dict_data(self, attrs) 235 | self.data.update(node_attr_dict_data) 236 | 237 | if not self.node_id: 238 | logger.debug("Node ID not set, skipping NodeAttrDict(?).update()") 239 | return 240 | 241 | update_dict = get_update_dict(self.parent_keys, attrs) 242 | doc_update(self.db, self.node_id, update_dict) 243 | 244 | 245 | class NodeDict(UserDict[str, NodeAttrDict]): 246 | """The outer-level of the dict of dict structure representing the 247 | nodes (vertices) of a graph. 248 | 249 | The outer dict is keyed by ArangoDB Vertex IDs and the inner dict 250 | is keyed by Vertex attributes. 251 | 252 | Parameters 253 | ---------- 254 | db : arango.database.StandardDatabase 255 | The ArangoDB database. 256 | 257 | graph : arango.graph.Graph 258 | The ArangoDB graph object. 259 | 260 | default_node_type : str 261 | The default node type for the graph. 262 | 263 | read_parallelism : int 264 | The number of parallel threads to use for reading data in _fetch_all. 265 | 266 | read_batch_size : int 267 | The number of documents to read in each batch in _fetch_all. 268 | 269 | Example 270 | ------- 271 | >>> G = nxadb.Graph("MyGraph") 272 | >>> G.add_node('node/1', foo='bar') 273 | >>> G.nodes 274 | """ 275 | 276 | def __init__( 277 | self, 278 | db: StandardDatabase, 279 | graph: Graph, 280 | default_node_type: str, 281 | read_parallelism: int, 282 | read_batch_size: int, 283 | *args: Any, 284 | **kwargs: Any, 285 | ): 286 | super().__init__(*args, **kwargs) 287 | self.data: dict[str, NodeAttrDict] = {} 288 | 289 | self.db = db 290 | self.graph = graph 291 | self.default_node_type = default_node_type 292 | self.read_parallelism = read_parallelism 293 | self.read_batch_size = read_batch_size 294 | 295 | self.node_attr_dict_factory = node_attr_dict_factory(self.db, self.graph) 296 | 297 | self.FETCHED_ALL_DATA = False 298 | self.FETCHED_ALL_IDS = False 299 | 300 | def _create_node_attr_dict( 301 | self, node_id: str, node_data: dict[str, Any] 302 | ) -> NodeAttrDict: 303 | node_attr_dict = self.node_attr_dict_factory() 304 | node_attr_dict.node_id = node_id 305 | node_attr_dict.data = build_node_attr_dict_data(node_attr_dict, node_data) 306 | 307 | return node_attr_dict 308 | 309 | def __repr__(self) -> str: 310 | if self.FETCHED_ALL_IDS: 311 | return self.data.keys().__repr__() 312 | 313 | return f"NodeDict('{self.graph.name}')" 314 | 315 | def __str__(self) -> str: 316 | return self.__repr__() 317 | 318 | @key_is_string 319 | def __contains__(self, key: str) -> bool: 320 | """'node/1' in G._node""" 321 | node_id = get_node_id(key, self.default_node_type) 322 | 323 | if node_id in self.data: 324 | return True 325 | 326 | if self.FETCHED_ALL_IDS: 327 | return False 328 | 329 | if self.graph.has_vertex(node_id): 330 | empty_node_attr_dict = self.node_attr_dict_factory() 331 | empty_node_attr_dict.node_id = node_id 332 | self.data[node_id] = empty_node_attr_dict 333 | return True 334 | 335 | return False 336 | 337 | @key_is_string 338 | def __getitem__(self, key: str) -> NodeAttrDict: 339 | """G._node['node/1']""" 340 | node_id = get_node_id(key, self.default_node_type) 341 | 342 | if vertex_cache := self.data.get(node_id): 343 | return vertex_cache 344 | 345 | if node_id not in self.data and self.FETCHED_ALL_IDS: 346 | raise KeyError(key) 347 | 348 | if node := vertex_get(self.graph, node_id): 349 | node_attr_dict = self._create_node_attr_dict(node["_id"], node) 350 | self.data[node_id] = node_attr_dict 351 | 352 | return node_attr_dict 353 | 354 | raise KeyError(key) 355 | 356 | @key_is_string 357 | def __setitem__(self, key: str, value: NodeAttrDict) -> None: 358 | """G._node['node/1'] = {'foo': 'bar'}""" 359 | assert isinstance(value, NodeAttrDict) 360 | 361 | node_type, node_id = get_node_type_and_id(key, self.default_node_type) 362 | 363 | result = doc_insert(self.db, node_type, node_id, value.data) 364 | 365 | node_attr_dict = self._create_node_attr_dict( 366 | result["_id"], {**value.data, **result} 367 | ) 368 | 369 | self.data[node_id] = node_attr_dict 370 | 371 | @key_is_string 372 | def __delitem__(self, key: str) -> None: 373 | """del g._node['node/1']""" 374 | node_id = get_node_id(key, self.default_node_type) 375 | 376 | if not self.graph.has_vertex(node_id): 377 | raise KeyError(key) 378 | 379 | edges_delete(self.db, self.graph, node_id) 380 | 381 | doc_delete(self.db, node_id) 382 | 383 | self.data.pop(node_id, None) 384 | 385 | def __len__(self) -> int: 386 | """len(g._node)""" 387 | return sum( 388 | [ 389 | self.graph.vertex_collection(c).count() 390 | for c in self.graph.vertex_collections() 391 | ] 392 | ) 393 | 394 | def __iter__(self) -> Iterator[str]: 395 | """for k in g._node""" 396 | if not (self.FETCHED_ALL_IDS or self.FETCHED_ALL_DATA): 397 | self._fetch_all() 398 | 399 | yield from self.data.keys() 400 | 401 | def keys(self) -> Any: 402 | """g._node.keys()""" 403 | if self.FETCHED_ALL_IDS: 404 | yield from self.data.keys() 405 | else: 406 | self.FETCHED_ALL_IDS = True 407 | for collection in self.graph.vertex_collections(): 408 | for node_id in self.graph.vertex_collection(collection).ids(): 409 | empty_node_attr_dict = self.node_attr_dict_factory() 410 | empty_node_attr_dict.node_id = node_id 411 | self.data[node_id] = empty_node_attr_dict 412 | yield node_id 413 | 414 | def clear(self) -> None: 415 | """g._node.clear()""" 416 | self.data.clear() 417 | self.FETCHED_ALL_DATA = False 418 | self.FETCHED_ALL_IDS = False 419 | 420 | def copy(self) -> Any: 421 | """g._node.copy()""" 422 | if not self.FETCHED_ALL_DATA: 423 | self._fetch_all() 424 | 425 | return {key: value.copy() for key, value in self.data.items()} 426 | 427 | @keys_are_strings 428 | def __update_local_nodes(self, nodes: Any) -> None: 429 | for node_id, node_data in nodes.items(): 430 | node_attr_dict = self._create_node_attr_dict(node_id, node_data) 431 | self.data[node_id] = node_attr_dict 432 | 433 | @keys_are_strings 434 | def update(self, nodes: Any) -> None: 435 | """g._node.update({'node/1': {'foo': 'bar'}, 'node/2': {'baz': 'qux'}})""" 436 | separated_by_collection = separate_nodes_by_collections( 437 | nodes, self.default_node_type 438 | ) 439 | 440 | result = upsert_collection_documents(self.db, separated_by_collection) 441 | 442 | all_good = check_update_list_for_errors(result) 443 | if all_good: 444 | # Means no single operation failed, in this case we update the local cache 445 | self.__update_local_nodes(nodes) 446 | else: 447 | # In this case some or all documents failed. Right now we will not 448 | # update the local cache, but raise an error instead. 449 | # Reason: We cannot set silent to True, because we need as it does 450 | # not report errors then. We need to update the driver to also pass 451 | # the errors back to the user, then we can adjust the behavior here. 452 | # This will also save network traffic and local computation time. 453 | errors = [] 454 | for collections_results in result: 455 | for collection_result in collections_results: 456 | errors.append(collection_result) 457 | m = "Failed to insert at least one node. Will not update local cache." 458 | logger.warning(m) 459 | raise ArangoDBBatchError(errors) 460 | 461 | def values(self) -> Any: 462 | """g._node.values()""" 463 | if not self.FETCHED_ALL_DATA: 464 | self._fetch_all() 465 | 466 | yield from self.data.values() 467 | 468 | def items(self, data: str | None = None, default: Any | None = None) -> Any: 469 | """g._node.items() or G._node.items(data='foo')""" 470 | if data is None: 471 | if not self.FETCHED_ALL_DATA: 472 | self._fetch_all() 473 | 474 | yield from self.data.items() 475 | else: 476 | v_cols = list(self.graph.vertex_collections()) 477 | yield from aql_fetch_data(self.db, v_cols, data, default) 478 | 479 | def _fetch_all(self): 480 | self.clear() 481 | 482 | ( 483 | node_dict, 484 | *_, 485 | ) = get_arangodb_graph( 486 | self.graph, 487 | load_node_dict=True, 488 | load_adj_dict=False, 489 | load_coo=False, 490 | edge_collections_attributes=set(), # not used 491 | load_all_vertex_attributes=True, 492 | load_all_edge_attributes=False, # not used 493 | is_directed=False, # not used 494 | is_multigraph=False, # not used 495 | symmetrize_edges_if_directed=False, # not used 496 | read_parallelism=self.read_parallelism, 497 | read_batch_size=self.read_batch_size, 498 | ) 499 | 500 | for node_id, node_data in node_dict.items(): 501 | del node_data["_rev"] # TODO: Optimize away via phenolrs 502 | node_attr_dict = self._create_node_attr_dict(node_data["_id"], node_data) 503 | self.data[node_id] = node_attr_dict 504 | 505 | self.FETCHED_ALL_DATA = True 506 | self.FETCHED_ALL_IDS = True 507 | -------------------------------------------------------------------------------- /nx_arangodb/classes/digraph.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable, ClassVar 2 | 3 | import networkx as nx 4 | from arango.database import StandardDatabase 5 | 6 | import nx_arangodb as nxadb 7 | from nx_arangodb.classes.graph import Graph 8 | from nx_arangodb.logger import logger 9 | 10 | from .dict.adj import AdjListOuterDict 11 | from .enum import TraversalDirection 12 | from .function import get_node_id 13 | 14 | networkx_api = nxadb.utils.decorators.networkx_class(nx.DiGraph) # type: ignore 15 | 16 | __all__ = ["DiGraph"] 17 | 18 | 19 | class DiGraph(Graph, nx.DiGraph): 20 | """ 21 | Base class for directed graphs. 22 | 23 | Subclasses ``nxadb.Graph`` and ``nx.DiGraph``. 24 | 25 | In order to connect to an ArangoDB instance, the following environment 26 | variables must be set: 27 | 28 | 1. ``DATABASE_HOST`` 29 | 2. ``DATABASE_USERNAME`` 30 | 3. ``DATABASE_PASSWORD`` 31 | 4. ``DATABASE_NAME`` 32 | 33 | Furthermore, the ``name`` parameter is required to create a new graph 34 | or to connect to an existing graph in the database. 35 | 36 | Example 37 | ------- 38 | >>> import os 39 | >>> import networkx as nx 40 | >>> import nx_arangodb as nxadb 41 | >>> 42 | >>> os.environ["DATABASE_HOST"] = "http://localhost:8529" 43 | >>> os.environ["DATABASE_USERNAME"] = "root" 44 | >>> os.environ["DATABASE_PASSWORD"] = "openSesame" 45 | >>> os.environ["DATABASE_NAME"] = "_system" 46 | >>> 47 | >>> G = nxadb.DiGraph(name="MyGraph") 48 | >>> ... 49 | 50 | 51 | Parameters 52 | ---------- 53 | incoming_graph_data : input graph (optional, default: None) 54 | Data to initialize graph. If None (default) an empty 55 | graph is created. Must be used in conjunction with **name** if 56 | the user wants to persist the graph in ArangoDB. NOTE: It is 57 | recommended for incoming_graph_data to be a NetworkX graph due 58 | to faster loading times. 59 | 60 | name : str (optional, default: None) 61 | Name of the graph in the database. If the graph already exists, 62 | the user can pass the name of the graph to connect to it. If 63 | the graph does not exist, a General Graph will be created by 64 | passing the **name**. NOTE: Must be used in conjunction with 65 | **incoming_graph_data** if the user wants to persist the graph 66 | in ArangoDB. 67 | 68 | default_node_type : str (optional, default: None) 69 | Default node type for the graph. In ArangoDB terms, this is the 70 | default vertex collection. If the graph already exists, the user can 71 | omit this parameter and the default node type will be set to the 72 | first vertex collection in the graph. If the graph does not exist, 73 | the user can pass the default node type to create the default vertex 74 | collection. 75 | 76 | edge_type_key : str (optional, default: "_edge_type") 77 | Key used to store the edge type when inserting edges into the graph. 78 | Useful for working with Heterogeneous Graphs. 79 | 80 | edge_type_func : Callable[[str, str], str] (optional, default: None) 81 | Function to determine the edge type between two nodes. If the graph 82 | already exists, the user can omit this parameter and the edge type 83 | function will be set based on the existing edge definitions. If the 84 | graph does not exist, the user can pass a function that determines 85 | the edge type between two nodes. 86 | 87 | edge_collections_attributes : set[str] (optional, default: None) 88 | Set of edge attributes to fetch when executing a NetworkX algorithm. 89 | Useful if the user has edge weights or other edge attributes that 90 | they want to use in a NetworkX algorithm. 91 | 92 | db : arango.database.StandardDatabase (optional, default: None) 93 | ArangoDB database object. If the user has an existing python-arango 94 | connection to the database, they can pass the database object to the graph. 95 | If not provided, a database object will be created using the environment 96 | variables DATABASE_HOST, DATABASE_USERNAME, DATABASE_PASSWORD, and 97 | DATABASE_NAME. 98 | 99 | read_parallelism : int (optional, default: 10) 100 | Number of parallel threads to use when reading data from ArangoDB. 101 | Used for fetching node and edge data from the database. 102 | 103 | read_batch_size : int (optional, default: 100000) 104 | Number of documents to fetch in a single batch when reading data from ArangoDB. 105 | Used for fetching node and edge data from the database. 106 | 107 | write_batch_size : int (optional, default: 50000) 108 | Number of documents to insert in a single batch when writing data to ArangoDB. 109 | Used for inserting node and edge data into the database if and only if 110 | **incoming_graph_data** is a NetworkX graph. 111 | 112 | write_async : bool (optional, default: False) 113 | Whether to insert data into ArangoDB asynchronously. Used for inserting 114 | node and edge data into the database if and only if **incoming_graph_data** 115 | is a NetworkX graph. 116 | 117 | symmetrize_edges : bool (optional, default: False) 118 | Whether to symmetrize the edges in the graph when fetched from the database. 119 | Only applies to directed graphs, thereby converting them to undirected graphs. 120 | 121 | use_arango_views : bool (optional, default: False) 122 | Whether to use experimental work-in-progress ArangoDB Views for the 123 | nodes, adjacency list, and edges. These views are designed to improve 124 | data processing performance by delegating CRUD operations to the database 125 | whenever possible. NOTE: This feature is experimental and may not work 126 | as expected. 127 | 128 | overwrite_graph : bool (optional, default: False) 129 | Whether to overwrite the graph in the database if it already exists. If 130 | set to True, the graph collections will be dropped and recreated. Note that 131 | this operation is irreversible and will result in the loss of all data in 132 | the graph. NOTE: If set to True, Collection Indexes will also be lost. 133 | 134 | args: positional arguments for nx.Graph 135 | Additional arguments passed to nx.Graph. 136 | 137 | kwargs: keyword arguments for nx.Graph 138 | Additional arguments passed to nx.Graph. 139 | """ 140 | 141 | __networkx_backend__: ClassVar[str] = "arangodb" # nx >=3.2 142 | __networkx_plugin__: ClassVar[str] = "arangodb" # nx <3.2 143 | 144 | @classmethod 145 | def to_networkx_class(cls) -> type[nx.DiGraph]: 146 | return nx.DiGraph # type: ignore[no-any-return] 147 | 148 | def __init__( 149 | self, 150 | incoming_graph_data: Any = None, 151 | name: str | None = None, 152 | default_node_type: str | None = None, 153 | edge_type_key: str = "_edge_type", 154 | edge_type_func: Callable[[str, str], str] | None = None, 155 | edge_collections_attributes: set[str] | None = None, 156 | db: StandardDatabase | None = None, 157 | read_parallelism: int = 10, 158 | read_batch_size: int = 100000, 159 | write_batch_size: int = 50000, 160 | write_async: bool = False, 161 | symmetrize_edges: bool = False, 162 | use_arango_views: bool = False, 163 | overwrite_graph: bool = False, 164 | *args: Any, 165 | **kwargs: Any, 166 | ): 167 | super().__init__( 168 | incoming_graph_data, 169 | name, 170 | default_node_type, 171 | edge_type_key, 172 | edge_type_func, 173 | edge_collections_attributes, 174 | db, 175 | read_parallelism, 176 | read_batch_size, 177 | write_batch_size, 178 | write_async, 179 | symmetrize_edges, 180 | use_arango_views, 181 | overwrite_graph, 182 | *args, 183 | **kwargs, 184 | ) 185 | 186 | if self.graph_exists_in_db: 187 | self.clear_edges = self.clear_edges_override 188 | self.add_node = self.add_node_override 189 | self.add_nodes_from = self.add_nodes_from_override 190 | self.remove_node = self.remove_node_override 191 | self.reverse = self.reverse_override 192 | 193 | assert isinstance(self._succ, AdjListOuterDict) 194 | assert isinstance(self._pred, AdjListOuterDict) 195 | self._succ.mirror = self._pred 196 | self._pred.mirror = self._succ 197 | self._succ.traversal_direction = TraversalDirection.OUTBOUND 198 | self._pred.traversal_direction = TraversalDirection.INBOUND 199 | 200 | if ( 201 | not self.is_multigraph() 202 | and incoming_graph_data is not None 203 | and not self._loaded_incoming_graph_data 204 | ): 205 | nx.convert.to_networkx_graph(incoming_graph_data, create_using=self) 206 | self._loaded_incoming_graph_data = True 207 | 208 | ####################### 209 | # nx.DiGraph Overides # 210 | ####################### 211 | 212 | # TODO? 213 | # If we want to continue with "Experimental Views" we need to implement the 214 | # InEdgeView and OutEdgeView classes. 215 | # @cached_property 216 | # def in_edges(self): 217 | # pass 218 | 219 | # TODO? 220 | # @cached_property 221 | # def out_edges(self): 222 | # pass 223 | 224 | def reverse_override(self, copy: bool = True) -> Any: 225 | if copy is False: 226 | raise NotImplementedError("In-place reverse is not supported yet.") 227 | 228 | return super().reverse(copy=True) 229 | 230 | def clear_edges_override(self): 231 | logger.info("Note that clearing edges ony erases the edges in the local cache") 232 | for predecessor_dict in self._pred.data.values(): 233 | predecessor_dict.clear() 234 | 235 | super().clear_edges() 236 | 237 | def add_node_override(self, node_for_adding, **attr): 238 | if node_for_adding is None: 239 | raise ValueError("None cannot be a node") 240 | 241 | if node_for_adding not in self._succ: 242 | 243 | self._succ[node_for_adding] = self.adjlist_inner_dict_factory() 244 | self._pred[node_for_adding] = self.adjlist_inner_dict_factory() 245 | 246 | ###################### 247 | # NOTE: monkey patch # 248 | ###################### 249 | 250 | # Old: 251 | # attr_dict = self._node[node_for_adding] = self.node_attr_dict_factory() 252 | # attr_dict.update(attr) 253 | 254 | # New: 255 | node_attr_dict = self.node_attr_dict_factory() 256 | node_attr_dict.data = attr 257 | self._node[node_for_adding] = node_attr_dict 258 | 259 | # Reason: 260 | # We can optimize the process of adding a node by creating avoiding 261 | # the creation of a new dictionary and updating it with the attributes. 262 | # Instead, we can create a new node_attr_dict object and set the attributes 263 | # directly. This only makes 1 network call to the database instead of 2. 264 | 265 | ########################### 266 | 267 | else: 268 | self._node[node_for_adding].update(attr) 269 | 270 | nx._clear_cache(self) 271 | 272 | def add_nodes_from_override(self, nodes_for_adding, **attr): 273 | for n in nodes_for_adding: 274 | try: 275 | newnode = n not in self._node 276 | newdict = attr 277 | except TypeError: 278 | n, ndict = n 279 | newnode = n not in self._node 280 | newdict = attr.copy() 281 | newdict.update(ndict) 282 | if newnode: 283 | if n is None: 284 | raise ValueError("None cannot be a node") 285 | self._succ[n] = self.adjlist_inner_dict_factory() 286 | self._pred[n] = self.adjlist_inner_dict_factory() 287 | 288 | ###################### 289 | # NOTE: monkey patch # 290 | ###################### 291 | 292 | # Old: 293 | # self._node[n] = self.node_attr_dict_factory() 294 | # 295 | # self._node[n].update(newdict) 296 | 297 | # New: 298 | node_attr_dict = self.node_attr_dict_factory() 299 | node_attr_dict.data = newdict 300 | self._node[n] = node_attr_dict 301 | 302 | else: 303 | self._node[n].update(newdict) 304 | 305 | # Reason: 306 | # We can optimize the process of adding a node by creating avoiding 307 | # the creation of a new dictionary and updating it with the attributes. 308 | # Instead, we create a new node_attr_dict object and set the attributes 309 | # directly. This only makes 1 network call to the database instead of 2. 310 | 311 | ########################### 312 | 313 | nx._clear_cache(self) 314 | 315 | def remove_node_override(self, n): 316 | if isinstance(n, (str, int)): 317 | n = get_node_id(str(n), self.default_node_type) 318 | 319 | try: 320 | 321 | ###################### 322 | # NOTE: monkey patch # 323 | ###################### 324 | 325 | # Old: 326 | # nbrs = self._succ[n] 327 | 328 | # New: 329 | nbrs_succ = list(self._succ[n]) 330 | nbrs_pred = list(self._pred[n]) 331 | 332 | # Reason: 333 | # We need to fetch the outbound/inbound edges _prior_ to deleting the node, 334 | # as node deletion will already take care of deleting edges 335 | 336 | ########################### 337 | 338 | del self._node[n] 339 | except KeyError as err: # NetworkXError if n not in self 340 | raise nx.NetworkXError(f"The node {n} is not in the digraph.") from err 341 | for u in nbrs_succ: 342 | del self._pred[u][n] # remove all edges n-u in digraph 343 | del self._succ[n] # remove node from succ 344 | for u in nbrs_pred: 345 | ###################### 346 | # NOTE: Monkey patch # 347 | ###################### 348 | 349 | # Old: Nothing 350 | 351 | # New: 352 | if u == n: 353 | continue # skip self loops 354 | 355 | # Reason: We need to skip self loops, as they are 356 | # already taken care of in the previous step. This 357 | # avoids getting a KeyError on the next line. 358 | 359 | ########################### 360 | 361 | del self._succ[u][n] # remove all edges n-u in digraph 362 | del self._pred[n] # remove node from pred 363 | nx._clear_cache(self) 364 | -------------------------------------------------------------------------------- /nx_arangodb/classes/enum.py: -------------------------------------------------------------------------------- 1 | from enum import Enum, auto 2 | 3 | 4 | class TraversalDirection(Enum): 5 | OUTBOUND = auto() 6 | INBOUND = auto() 7 | ANY = auto() 8 | 9 | 10 | class GraphType(Enum): 11 | Graph = auto() 12 | DiGraph = auto() 13 | MultiGraph = auto() 14 | MultiDiGraph = auto() 15 | 16 | 17 | DIRECTED_GRAPH_TYPES = {GraphType.DiGraph.name, GraphType.MultiDiGraph.name} 18 | MULTIGRAPH_TYPES = {GraphType.MultiGraph.name, GraphType.MultiDiGraph.name} 19 | -------------------------------------------------------------------------------- /nx_arangodb/classes/multidigraph.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from typing import Any, Callable, ClassVar 3 | 4 | import networkx as nx 5 | from arango.database import StandardDatabase 6 | 7 | import nx_arangodb as nxadb 8 | from nx_arangodb.classes.digraph import DiGraph 9 | from nx_arangodb.classes.multigraph import MultiGraph 10 | 11 | networkx_api = nxadb.utils.decorators.networkx_class(nx.MultiDiGraph) # type: ignore 12 | 13 | __all__ = ["MultiDiGraph"] 14 | 15 | 16 | class MultiDiGraph(MultiGraph, DiGraph, nx.MultiDiGraph): 17 | """ 18 | A directed graph class that can store multiedges. 19 | 20 | Subclasses ``nxadb.MultiGraph``, ``nxadb.Digraph``, and ``nx.MultiGraph``. 21 | 22 | In order to connect to an ArangoDB instance, the following environment 23 | variables must be set: 24 | 25 | 1. ``DATABASE_HOST`` 26 | 2. ``DATABASE_USERNAME`` 27 | 3. ``DATABASE_PASSWORD`` 28 | 4. ``DATABASE_NAME`` 29 | 30 | Furthermore, the ``name`` parameter is required to create a new graph 31 | or to connect to an existing graph in the database. 32 | 33 | Example 34 | ------- 35 | >>> import os 36 | >>> import networkx as nx 37 | >>> import nx_arangodb as nxadb 38 | >>> 39 | >>> os.environ["DATABASE_HOST"] = "http://localhost:8529" 40 | >>> os.environ["DATABASE_USERNAME"] = "root" 41 | >>> os.environ["DATABASE_PASSWORD"] = "openSesame" 42 | >>> os.environ["DATABASE_NAME"] = "_system" 43 | >>> 44 | >>> G = nxadb.DiGraph(name="MyGraph") 45 | >>> ... 46 | 47 | 48 | Parameters 49 | ---------- 50 | incoming_graph_data : input graph (optional, default: None) 51 | Data to initialize graph. If None (default) an empty 52 | graph is created. Must be used in conjunction with **name** if 53 | the user wants to persist the graph in ArangoDB. NOTE: It is 54 | recommended for incoming_graph_data to be a NetworkX graph due 55 | to faster loading times. 56 | 57 | multigraph_input : bool or None (default None) 58 | Note: Only used when `incoming_graph_data` is a dict. 59 | If True, `incoming_graph_data` is assumed to be a 60 | dict-of-dict-of-dict-of-dict structure keyed by 61 | node to neighbor to edge keys to edge data for multi-edges. 62 | A NetworkXError is raised if this is not the case. 63 | If False, :func:`to_networkx_graph` is used to try to determine 64 | the dict's graph data structure as either a dict-of-dict-of-dict 65 | keyed by node to neighbor to edge data, or a dict-of-iterable 66 | keyed by node to neighbors. 67 | If None, the treatment for True is tried, but if it fails, 68 | the treatment for False is tried. 69 | 70 | name : str (optional, default: None) 71 | Name of the graph in the database. If the graph already exists, 72 | the user can pass the name of the graph to connect to it. If 73 | the graph does not exist, a General Graph will be created by 74 | passing the **name**. NOTE: Must be used in conjunction with 75 | **incoming_graph_data** if the user wants to persist the graph 76 | in ArangoDB. 77 | 78 | default_node_type : str (optional, default: None) 79 | Default node type for the graph. In ArangoDB terms, this is the 80 | default vertex collection. If the graph already exists, the user can 81 | omit this parameter and the default node type will be set to the 82 | first vertex collection in the graph. If the graph does not exist, 83 | the user can pass the default node type to create the default vertex 84 | collection. 85 | 86 | edge_type_key : str (optional, default: "_edge_type") 87 | Key used to store the edge type when inserting edges into the graph. 88 | Useful for working with Heterogeneous Graphs. 89 | 90 | edge_type_func : Callable[[str, str], str] (optional, default: None) 91 | Function to determine the edge type between two nodes. If the graph 92 | already exists, the user can omit this parameter and the edge type 93 | function will be set based on the existing edge definitions. If the 94 | graph does not exist, the user can pass a function that determines 95 | the edge type between two nodes. 96 | 97 | edge_collections_attributes : set[str] (optional, default: None) 98 | Set of edge attributes to fetch when executing a NetworkX algorithm. 99 | Useful if the user has edge weights or other edge attributes that 100 | they want to use in a NetworkX algorithm. 101 | 102 | db : arango.database.StandardDatabase (optional, default: None) 103 | ArangoDB database object. If the user has an existing python-arango 104 | connection to the database, they can pass the database object to the graph. 105 | If not provided, a database object will be created using the environment 106 | variables DATABASE_HOST, DATABASE_USERNAME, DATABASE_PASSWORD, and 107 | DATABASE_NAME. 108 | 109 | read_parallelism : int (optional, default: 10) 110 | Number of parallel threads to use when reading data from ArangoDB. 111 | Used for fetching node and edge data from the database. 112 | 113 | read_batch_size : int (optional, default: 100000) 114 | Number of documents to fetch in a single batch when reading data from ArangoDB. 115 | Used for fetching node and edge data from the database. 116 | 117 | write_batch_size : int (optional, default: 50000) 118 | Number of documents to insert in a single batch when writing data to ArangoDB. 119 | Used for inserting node and edge data into the database if and only if 120 | **incoming_graph_data** is a NetworkX graph. 121 | 122 | write_async : bool (optional, default: False) 123 | Whether to insert data into ArangoDB asynchronously. Used for inserting 124 | node and edge data into the database if and only if **incoming_graph_data** 125 | is a NetworkX graph. 126 | 127 | symmetrize_edges : bool (optional, default: False) 128 | Whether to symmetrize the edges in the graph when fetched from the database. 129 | Only applies to directed graphs, thereby converting them to undirected graphs. 130 | 131 | use_arango_views : bool (optional, default: False) 132 | Whether to use experimental work-in-progress ArangoDB Views for the 133 | nodes, adjacency list, and edges. These views are designed to improve 134 | data processing performance by delegating CRUD operations to the database 135 | whenever possible. NOTE: This feature is experimental and may not work 136 | as expected. 137 | 138 | overwrite_graph : bool (optional, default: False) 139 | Whether to overwrite the graph in the database if it already exists. If 140 | set to True, the graph collections will be dropped and recreated. Note that 141 | this operation is irreversible and will result in the loss of all data in 142 | the graph. NOTE: If set to True, Collection Indexes will also be lost. 143 | 144 | args: positional arguments for nx.Graph 145 | Additional arguments passed to nx.Graph. 146 | 147 | kwargs: keyword arguments for nx.Graph 148 | Additional arguments passed to nx.Graph. 149 | """ 150 | 151 | __networkx_backend__: ClassVar[str] = "arangodb" # nx >=3.2 152 | __networkx_plugin__: ClassVar[str] = "arangodb" # nx <3.2 153 | 154 | @classmethod 155 | def to_networkx_class(cls) -> type[nx.MultiDiGraph]: 156 | return nx.MultiDiGraph # type: ignore[no-any-return] 157 | 158 | def __init__( 159 | self, 160 | incoming_graph_data: Any = None, 161 | multigraph_input: bool | None = None, 162 | name: str | None = None, 163 | default_node_type: str | None = None, 164 | edge_type_key: str = "_edge_type", 165 | edge_type_func: Callable[[str, str], str] | None = None, 166 | edge_collections_attributes: set[str] | None = None, 167 | db: StandardDatabase | None = None, 168 | read_parallelism: int = 10, 169 | read_batch_size: int = 100000, 170 | write_batch_size: int = 50000, 171 | write_async: bool = False, 172 | symmetrize_edges: bool = False, 173 | use_arango_views: bool = False, 174 | overwrite_graph: bool = False, 175 | *args: Any, 176 | **kwargs: Any, 177 | ): 178 | super().__init__( 179 | incoming_graph_data, 180 | multigraph_input, 181 | name, 182 | default_node_type, 183 | edge_type_key, 184 | edge_type_func, 185 | edge_collections_attributes, 186 | db, 187 | read_parallelism, 188 | read_batch_size, 189 | write_batch_size, 190 | write_async, 191 | symmetrize_edges, 192 | use_arango_views, 193 | overwrite_graph, 194 | *args, 195 | **kwargs, 196 | ) 197 | 198 | if self.graph_exists_in_db: 199 | self.reverse = self.reverse_override 200 | self.to_undirected = self.to_undirected_override 201 | 202 | ####################### 203 | # Init helper methods # 204 | ####################### 205 | 206 | ########################## 207 | # nx.MultiGraph Overides # 208 | ########################## 209 | 210 | def reverse_override(self, copy: bool = True) -> Any: 211 | if copy is False: 212 | raise NotImplementedError("In-place reverse is not supported yet.") 213 | 214 | return super().reverse(copy=True) 215 | 216 | def to_undirected_override(self, reciprocal=False, as_view=False): 217 | if reciprocal is False: 218 | return super().to_undirected(reciprocal=False, as_view=as_view) 219 | 220 | graph_class = self.to_undirected_class() 221 | if as_view is True: 222 | return nx.graphviews.generic_graph_view(self, graph_class) 223 | 224 | # deepcopy when not a view 225 | G = graph_class() 226 | G.graph.update(deepcopy(self.graph)) 227 | G.add_nodes_from((n, deepcopy(d)) for n, d in self._node.items()) 228 | 229 | ###################### 230 | # NOTE: Monkey patch # 231 | ###################### 232 | 233 | # Old 234 | # G.add_edges_from( 235 | # (u, v, key, deepcopy(data)) 236 | # for u, nbrs in self._adj.items() 237 | # for v, keydict in nbrs.items() 238 | # for key, data in keydict.items() 239 | # if v in self._pred[u] and key in self._pred[u][v] 240 | # ) 241 | 242 | # New: 243 | G.add_edges_from( 244 | (u, v, key, deepcopy(data)) 245 | for u, nbrs in self._adj.items() 246 | for v, keydict in nbrs.items() 247 | for key, data in keydict.items() 248 | if v in self._pred[u] # and key in self._pred[u][v] 249 | ) 250 | 251 | # Reason: MultiGraphs in `nxadb` don't use integer-based keys for edges. 252 | # They use ArangoDB Edge IDs. Therefore, the statement `key in self._pred[u][v]` 253 | # will always be False in the context of MultiDiGraphs. For more details on why 254 | # this adjustment is needed, see the `test_to_undirected_reciprocal` 255 | # in `test_multidigraph.py`. 256 | 257 | ########################### 258 | 259 | return G 260 | -------------------------------------------------------------------------------- /nx_arangodb/classes/multigraph.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable, ClassVar 2 | 3 | import networkx as nx 4 | from arango.database import StandardDatabase 5 | 6 | import nx_arangodb as nxadb 7 | from nx_arangodb.classes.graph import Graph 8 | from nx_arangodb.logger import logger 9 | 10 | from .dict import edge_key_dict_factory 11 | 12 | networkx_api = nxadb.utils.decorators.networkx_class(nx.MultiGraph) # type: ignore 13 | 14 | __all__ = ["MultiGraph"] 15 | 16 | 17 | class MultiGraph(Graph, nx.MultiGraph): 18 | """ 19 | An undirected graph class that can store multiedges. 20 | 21 | Subclasses ``nxadb.Graph`` and ``nx.MultiGraph``. 22 | 23 | In order to connect to an ArangoDB instance, the following environment 24 | variables must be set: 25 | 26 | 1. ``DATABASE_HOST`` 27 | 2. ``DATABASE_USERNAME`` 28 | 3. ``DATABASE_PASSWORD`` 29 | 4. ``DATABASE_NAME`` 30 | 31 | Furthermore, the ``name`` parameter is required to create a new graph 32 | or to connect to an existing graph in the database. 33 | 34 | Example 35 | ------- 36 | >>> import os 37 | >>> import networkx as nx 38 | >>> import nx_arangodb as nxadb 39 | >>> 40 | >>> os.environ["DATABASE_HOST"] = "http://localhost:8529" 41 | >>> os.environ["DATABASE_USERNAME"] = "root" 42 | >>> os.environ["DATABASE_PASSWORD"] = "openSesame" 43 | >>> os.environ["DATABASE_NAME"] = "_system" 44 | >>> 45 | >>> G = nxadb.DiGraph(name="MyGraph") 46 | >>> ... 47 | 48 | 49 | Parameters 50 | ---------- 51 | incoming_graph_data : input graph (optional, default: None) 52 | Data to initialize graph. If None (default) an empty 53 | graph is created. Must be used in conjunction with **name** if 54 | the user wants to persist the graph in ArangoDB. NOTE: It is 55 | recommended for incoming_graph_data to be a NetworkX graph due 56 | to faster loading times. 57 | 58 | multigraph_input : bool or None (default None) 59 | Note: Only used when `incoming_graph_data` is a dict. 60 | If True, `incoming_graph_data` is assumed to be a 61 | dict-of-dict-of-dict-of-dict structure keyed by 62 | node to neighbor to edge keys to edge data for multi-edges. 63 | A NetworkXError is raised if this is not the case. 64 | If False, :func:`to_networkx_graph` is used to try to determine 65 | the dict's graph data structure as either a dict-of-dict-of-dict 66 | keyed by node to neighbor to edge data, or a dict-of-iterable 67 | keyed by node to neighbors. 68 | If None, the treatment for True is tried, but if it fails, 69 | the treatment for False is tried. 70 | 71 | name : str (optional, default: None) 72 | Name of the graph in the database. If the graph already exists, 73 | the user can pass the name of the graph to connect to it. If 74 | the graph does not exist, a General Graph will be created by 75 | passing the **name**. NOTE: Must be used in conjunction with 76 | **incoming_graph_data** if the user wants to persist the graph 77 | in ArangoDB. 78 | 79 | default_node_type : str (optional, default: None) 80 | Default node type for the graph. In ArangoDB terms, this is the 81 | default vertex collection. If the graph already exists, the user can 82 | omit this parameter and the default node type will be set to the 83 | first vertex collection in the graph. If the graph does not exist, 84 | the user can pass the default node type to create the default vertex 85 | collection. 86 | 87 | edge_type_key : str (optional, default: "_edge_type") 88 | Key used to store the edge type when inserting edges into the graph. 89 | Useful for working with Heterogeneous Graphs. 90 | 91 | edge_type_func : Callable[[str, str], str] (optional, default: None) 92 | Function to determine the edge type between two nodes. If the graph 93 | already exists, the user can omit this parameter and the edge type 94 | function will be set based on the existing edge definitions. If the 95 | graph does not exist, the user can pass a function that determines 96 | the edge type between two nodes. 97 | 98 | edge_collections_attributes : set[str] (optional, default: None) 99 | Set of edge attributes to fetch when executing a NetworkX algorithm. 100 | Useful if the user has edge weights or other edge attributes that 101 | they want to use in a NetworkX algorithm. 102 | 103 | db : arango.database.StandardDatabase (optional, default: None) 104 | ArangoDB database object. If the user has an existing python-arango 105 | connection to the database, they can pass the database object to the graph. 106 | If not provided, a database object will be created using the environment 107 | variables DATABASE_HOST, DATABASE_USERNAME, DATABASE_PASSWORD, and 108 | DATABASE_NAME. 109 | 110 | read_parallelism : int (optional, default: 10) 111 | Number of parallel threads to use when reading data from ArangoDB. 112 | Used for fetching node and edge data from the database. 113 | 114 | read_batch_size : int (optional, default: 100000) 115 | Number of documents to fetch in a single batch when reading data from ArangoDB. 116 | Used for fetching node and edge data from the database. 117 | 118 | write_batch_size : int (optional, default: 50000) 119 | Number of documents to insert in a single batch when writing data to ArangoDB. 120 | Used for inserting node and edge data into the database if and only if 121 | **incoming_graph_data** is a NetworkX graph. 122 | 123 | write_async : bool (optional, default: False) 124 | Whether to insert data into ArangoDB asynchronously. Used for inserting 125 | node and edge data into the database if and only if **incoming_graph_data** 126 | is a NetworkX graph. 127 | 128 | symmetrize_edges : bool (optional, default: False) 129 | Whether to symmetrize the edges in the graph when fetched from the database. 130 | Only applies to directed graphs, thereby converting them to undirected graphs. 131 | 132 | use_arango_views : bool (optional, default: False) 133 | Whether to use experimental work-in-progress ArangoDB Views for the 134 | nodes, adjacency list, and edges. These views are designed to improve 135 | data processing performance by delegating CRUD operations to the database 136 | whenever possible. NOTE: This feature is experimental and may not work 137 | as expected. 138 | 139 | overwrite_graph : bool (optional, default: False) 140 | Whether to overwrite the graph in the database if it already exists. If 141 | set to True, the graph collections will be dropped and recreated. Note that 142 | this operation is irreversible and will result in the loss of all data in 143 | the graph. NOTE: If set to True, Collection Indexes will also be lost. 144 | 145 | args: positional arguments for nx.Graph 146 | Additional arguments passed to nx.Graph. 147 | 148 | kwargs: keyword arguments for nx.Graph 149 | Additional arguments passed to nx.Graph. 150 | """ 151 | 152 | __networkx_backend__: ClassVar[str] = "arangodb" # nx >=3.2 153 | __networkx_plugin__: ClassVar[str] = "arangodb" # nx <3.2 154 | 155 | @classmethod 156 | def to_networkx_class(cls) -> type[nx.MultiGraph]: 157 | return nx.MultiGraph # type: ignore[no-any-return] 158 | 159 | def __init__( 160 | self, 161 | incoming_graph_data: Any = None, 162 | multigraph_input: bool | None = None, 163 | name: str | None = None, 164 | default_node_type: str | None = None, 165 | edge_type_key: str = "_edge_type", 166 | edge_type_func: Callable[[str, str], str] | None = None, 167 | edge_collections_attributes: set[str] | None = None, 168 | db: StandardDatabase | None = None, 169 | read_parallelism: int = 10, 170 | read_batch_size: int = 100000, 171 | write_batch_size: int = 50000, 172 | write_async: bool = False, 173 | symmetrize_edges: bool = False, 174 | use_arango_views: bool = False, 175 | overwrite_graph: bool = False, 176 | *args: Any, 177 | **kwargs: Any, 178 | ): 179 | super().__init__( 180 | incoming_graph_data, 181 | name, 182 | default_node_type, 183 | edge_type_key, 184 | edge_type_func, 185 | edge_collections_attributes, 186 | db, 187 | read_parallelism, 188 | read_batch_size, 189 | write_batch_size, 190 | write_async, 191 | symmetrize_edges, 192 | use_arango_views, 193 | overwrite_graph, 194 | *args, 195 | **kwargs, 196 | ) 197 | 198 | if self.graph_exists_in_db: 199 | self.add_edge = self.add_edge_override 200 | self.has_edge = self.has_edge_override 201 | self.copy = self.copy_override 202 | 203 | if incoming_graph_data is not None and not self._loaded_incoming_graph_data: 204 | # Taken from networkx.MultiGraph.__init__ 205 | if isinstance(incoming_graph_data, dict) and multigraph_input is not False: 206 | try: 207 | nx.convert.from_dict_of_dicts( 208 | incoming_graph_data, create_using=self, multigraph_input=True 209 | ) 210 | except Exception as err: 211 | if multigraph_input is True: 212 | m = f"converting multigraph_input raised:\n{type(err)}: {err}" 213 | raise nx.NetworkXError(m) 214 | 215 | # Reset the graph 216 | for v_col in self.adb_graph.vertex_collections(): 217 | self.db.collection(v_col).truncate() 218 | 219 | for e_def in self.adb_graph.edge_definitions(): 220 | self.db.collection(e_def["edge_collection"]).truncate() 221 | 222 | nx.convert.to_networkx_graph(incoming_graph_data, create_using=self) 223 | else: 224 | nx.convert.to_networkx_graph(incoming_graph_data, create_using=self) 225 | 226 | self._loaded_incoming_graph_data = True 227 | 228 | ####################### 229 | # Init helper methods # 230 | ####################### 231 | 232 | def _set_factory_methods(self, read_parallelism: int, read_batch_size: int) -> None: 233 | super()._set_factory_methods(read_parallelism, read_batch_size) 234 | self.edge_key_dict_factory = edge_key_dict_factory( 235 | self.db, 236 | self.adb_graph, 237 | self.edge_type_key, 238 | self.edge_type_func, 239 | self.is_directed(), 240 | ) 241 | 242 | ########################## 243 | # nx.MultiGraph Overides # 244 | ########################## 245 | 246 | def add_edge_override(self, u_for_edge, v_for_edge, key=None, **attr): 247 | if key is not None: 248 | m = "ArangoDB MultiGraph does not support custom edge keys yet." 249 | logger.warning(m) 250 | 251 | _ = super().add_edge(u_for_edge, v_for_edge, key="-1", **attr) 252 | 253 | ###################### 254 | # NOTE: monkey patch # 255 | ###################### 256 | 257 | # Old: 258 | # return key 259 | 260 | # New: 261 | keys = list(self._adj[u_for_edge][v_for_edge].data.keys()) 262 | last_key = keys[-1] 263 | return last_key 264 | 265 | # Reason: 266 | # nxadb.MultiGraph does not yet support the ability to work 267 | # with custom edge keys. As a Database, we must rely on the official 268 | # ArangoDB Edge _id to uniquely identify edges. The EdgeKeyDict.__setitem__ 269 | # method will be responsible for setting the edge key to the _id of the edge 270 | # document. This will allow us to use the edge key as a unique identifier 271 | 272 | ########################### 273 | 274 | def has_edge_override(self, u, v, key=None): 275 | try: 276 | if key is None: 277 | return v in self._adj[u] 278 | else: 279 | ###################### 280 | # NOTE: monkey patch # 281 | ###################### 282 | 283 | # Old: Nothing 284 | 285 | # New: 286 | if isinstance(key, int): 287 | return len(self._adj[u][v]) > key 288 | 289 | # Reason: 290 | # Integer keys in nxadb.MultiGraph are simply used 291 | # as syntactic sugar to access the edge data of a specific 292 | # edge that is **cached** in the adjacency dictionary. 293 | # So we simply just check if the integer key is within the 294 | # range of the number of edges between u and v. 295 | 296 | return key in self._adj[u][v] 297 | except KeyError: 298 | return False 299 | 300 | def copy_override(self, *args, **kwargs): 301 | logger.warning("Note that copying a graph loses the connection to the database") 302 | G = super().copy(*args, **kwargs) 303 | G.edge_key_dict_factory = nx.MultiGraph.edge_key_dict_factory 304 | return G 305 | -------------------------------------------------------------------------------- /nx_arangodb/classes/reportviews.py: -------------------------------------------------------------------------------- 1 | """Experimental overrides of the NetworkX Views that represent the 2 | nodes and edges of the graph. 3 | 4 | Overriding these classes allows us to implement custom logic for 5 | data filtering and updating in the database, instead of in Python. 6 | 7 | These classes are a work-in-progress. The main goal is to try 8 | to delegate data processing to ArangoDB, whenever possible. 9 | 10 | To use these experimental views, you must set **use_arango_views=True** 11 | when creating a new graph object: 12 | >>> G = nxadb.Graph(name="MyGraph", use_arango_views=True) 13 | """ 14 | 15 | from __future__ import annotations 16 | 17 | import networkx as nx 18 | 19 | import nx_arangodb as nxadb 20 | 21 | 22 | class ArangoNodeView(nx.classes.reportviews.NodeView): 23 | """The ArangoNodeView class is an experimental subclass of the 24 | NodeView class. 25 | 26 | Contrary to the original NodeView class, the ArangoNodeView is 27 | writable to allow for bulk updates to the graph in the DB. 28 | """ 29 | 30 | # DataView method 31 | def __call__(self, data=False, default=None): 32 | if data is False: 33 | return self 34 | return ArangoNodeDataView(self._nodes, data, default) 35 | 36 | def data(self, data=True, default=None): 37 | """Return a read-only view of node data. 38 | 39 | Parameters 40 | ---------- 41 | data : bool or node data key, default=True 42 | If ``data=True`` (the default), return a `NodeDataView` object that 43 | maps each node to *all* of its attributes. `data` may also be an 44 | arbitrary key, in which case the `NodeDataView` maps each node to 45 | the value for the keyed attribute. In this case, if a node does 46 | not have the `data` attribute, the `default` value is used. 47 | default : object, default=None 48 | The value used when a node does not have a specific attribute. 49 | 50 | Returns 51 | ------- 52 | NodeDataView 53 | The layout of the returned NodeDataView depends on the value of the 54 | `data` parameter. 55 | 56 | Notes 57 | ----- 58 | If ``data=False``, returns a `NodeView` object without data. 59 | 60 | See Also 61 | -------- 62 | NodeDataView 63 | """ 64 | if data is False: 65 | return self 66 | return ArangoNodeDataView(self._nodes, data, default) 67 | 68 | def update(self, data): 69 | """Update a set of nodes within the graph. 70 | 71 | The benefit of this method is that it allows for bulk API updates, 72 | as opposed to `G.add_nodes_from`, which currently makes 73 | one API request per node. 74 | 75 | Example 76 | ------- 77 | >>> G = nxadb.Graph(name="MyGraph") 78 | >>> G.nodes.update( 79 | { 80 | 'node/1': {"node/1", "foo": "bar"}, 81 | 'node/2': {"node/2", "foo": "baz"}, 82 | ... 83 | }) 84 | """ 85 | return self._nodes.update(data) 86 | 87 | 88 | class ArangoNodeDataView(nx.classes.reportviews.NodeDataView): 89 | """The ArangoNodeDataView class is an experimental subclass of the 90 | NodeDataView class. 91 | 92 | The main use for this class is to iterate through node-data pairs. 93 | The data can be the entire data-dictionary for each node, or it 94 | can be a specific attribute (with default) for each node. 95 | 96 | In the event that the data is a specific attribute, the data is 97 | filtered server-side, instead of in Python. This is done by using 98 | the ArangoDB Query Language (AQL) to filter the data. 99 | """ 100 | 101 | def __iter__(self): 102 | data = self._data 103 | if data is False: 104 | return iter(self._nodes) 105 | if data is True: 106 | return iter(self._nodes.items()) 107 | 108 | ###################### 109 | # NOTE: Monkey Patch # 110 | ###################### 111 | 112 | # Old: 113 | # return ( 114 | # (n, dd[data] if data in dd else self._default) 115 | # for n, dd in self._nodes.items() 116 | # ) 117 | 118 | # New: 119 | return iter(self._nodes.items(data=data, default=self._default)) 120 | 121 | # Reason: We can utilize AQL to filter the data we 122 | # want to return, instead of filtering it in Python 123 | 124 | ########################### 125 | 126 | 127 | class ArangoEdgeDataView(nx.classes.reportviews.EdgeDataView): 128 | """The ArangoEdgeDataView class is an experimental subclass of the 129 | EdgeDataView class. 130 | 131 | This view is primarily used to iterate over the edges reporting 132 | edges as node-tuples with edge data optionally reported. 133 | 134 | In the event that the data is a specific attribute, the data is 135 | filtered server-side, instead of in Python. This is done by using 136 | the ArangoDB Query Language (AQL) to filter the data. 137 | """ 138 | 139 | def __iter__(self): 140 | ###################### 141 | # NOTE: Monkey Patch # 142 | ###################### 143 | 144 | if self._nbunch is None and self._data not in [None, True, False]: 145 | # Reason: We can utilize AQL to filter the data we 146 | # want to return, instead of filtering it in Python 147 | # This is hacky for now, but it's meant to show that 148 | # the data can be filtered server-side. 149 | # We solve this by relying on self._adjdict, which 150 | # is the AdjListOuterDict object that has a custom 151 | # items() method that can filter data with AQL. 152 | 153 | yield from self._adjdict.items(data=self._data, default=self._default) 154 | else: 155 | yield from super().__iter__() 156 | 157 | 158 | class ArangoEdgeView(nx.classes.reportviews.EdgeView): 159 | """The ArangoEdgeView class is an experimental subclass of the 160 | EdgeView class. 161 | 162 | The __len__ method is overridden to count the number of edges 163 | in the graph by querying the database, instead of iterating 164 | through the edges in Python. 165 | """ 166 | 167 | dataview = ArangoEdgeDataView 168 | 169 | def __len__(self): 170 | 171 | ###################### 172 | # NOTE: Monkey Patch # 173 | ###################### 174 | 175 | # Old: 176 | # num_nbrs = (len(nbrs) + (n in nbrs) for n, nbrs in self._nodes_nbrs()) 177 | # return sum(num_nbrs) // 2 178 | 179 | # New: 180 | G: nxadb.Graph = self._graph 181 | return sum( 182 | [ 183 | G.db.collection(ed["edge_collection"]).count() 184 | for ed in G.adb_graph.edge_definitions() 185 | ] 186 | ) 187 | 188 | # Reason: We can utilize AQL to count the number of edges 189 | # instead of making individual requests to the database 190 | # i.e avoid having to do `n in nbrs` for each node 191 | 192 | ###################### 193 | -------------------------------------------------------------------------------- /nx_arangodb/convert.py: -------------------------------------------------------------------------------- 1 | """Functions to convert between NetworkX, NetworkX-ArangoDB, 2 | and NetworkX-cuGraph. 3 | 4 | Examples 5 | -------- 6 | >>> import networkx as nx 7 | >>> import nx_arangodb as nxadb 8 | >>> import nx_cugraph as nxcg 9 | >>> 10 | >>> G = nx.Graph() 11 | >>> G.add_edge(1, 2, weight=3.0) 12 | >>> G.add_edge(2, 3, weight=7.5) 13 | >>> 14 | >>> G_ADB = nxadb.convert._to_nxadb_graph(G) 15 | >>> G_CG = nxadb.convert._to_nxcg_graph(G_ADB) 16 | >>> G_NX = nxadb.convert._to_nx_graph(G_ADB) 17 | """ 18 | 19 | from __future__ import annotations 20 | 21 | import time 22 | from typing import Any 23 | 24 | import networkx as nx 25 | 26 | import nx_arangodb as nxadb 27 | from nx_arangodb.classes.dict.adj import AdjListOuterDict 28 | from nx_arangodb.classes.dict.node import NodeDict 29 | from nx_arangodb.logger import logger 30 | 31 | try: 32 | import cupy as cp 33 | import nx_cugraph as nxcg 34 | 35 | GPU_AVAILABLE = True 36 | logger.info("NetworkX-cuGraph is available.") 37 | except Exception as e: 38 | GPU_AVAILABLE = False 39 | logger.info(f"NetworkX-cuGraph is unavailable: {e}.") 40 | 41 | __all__ = [ 42 | "_to_nx_graph", 43 | "_to_nxadb_graph", 44 | "_to_nxcg_graph", 45 | ] 46 | 47 | 48 | def _to_nx_graph(G: Any, *args: Any, **kwargs: Any) -> nx.Graph: 49 | """Convert a graph to a NetworkX graph. 50 | 51 | Parameters 52 | ---------- 53 | G : Any 54 | The graph to convert. 55 | 56 | Currently supported types: 57 | - nx.Graph 58 | - nxadb.Graph 59 | 60 | Returns 61 | ------- 62 | nx.Graph 63 | The converted graph. 64 | """ 65 | logger.debug(f"_to_nx_graph for {G.__class__.__name__}") 66 | 67 | if isinstance(G, nxadb.Graph): 68 | return nxadb_to_nx(G) 69 | 70 | if isinstance(G, nx.Graph): 71 | return G 72 | 73 | raise TypeError(f"Expected nxadb.Graph or nx.Graph; got {type(G)}") 74 | 75 | 76 | def _to_nxadb_graph( 77 | G: Any, *args: Any, as_directed: bool = False, **kwargs: Any 78 | ) -> nxadb.Graph: 79 | """Convert a graph to a NetworkX-ArangoDB graph. 80 | 81 | Parameters 82 | ---------- 83 | G : Any 84 | The graph to convert. 85 | 86 | Currently supported types: 87 | - nx.Graph 88 | - nxadb.Graph 89 | 90 | as_directed : bool, optional 91 | Whether to convert the graph to a directed graph. 92 | Default is False. 93 | 94 | Returns 95 | ------- 96 | nxadb.Graph 97 | The converted graph. 98 | """ 99 | logger.debug(f"_to_nxadb_graph for {G.__class__.__name__}") 100 | 101 | if isinstance(G, nxadb.Graph): 102 | return G 103 | 104 | if isinstance(G, nx.Graph): 105 | return nx_to_nxadb(G, as_directed=as_directed) 106 | 107 | raise TypeError(f"Expected nxadb.Graph or nx.Graph; got {type(G)}") 108 | 109 | 110 | if GPU_AVAILABLE: 111 | 112 | def _to_nxcg_graph(G: Any, as_directed: bool = False) -> nxcg.Graph: 113 | """Convert a graph to a NetworkX-cuGraph graph. 114 | 115 | NOTE: Only supported if NetworkX-cuGraph is installed. 116 | 117 | Parameters 118 | ---------- 119 | G : Any 120 | The graph to convert. 121 | 122 | Currently supported types: 123 | - nxadb.Graph 124 | - nxcg.Graph 125 | 126 | as_directed : bool, optional 127 | Whether to convert the graph to a directed graph. 128 | Default is False. 129 | 130 | Returns 131 | ------- 132 | nxcg.Graph 133 | The converted graph. 134 | """ 135 | logger.debug(f"_to_nxcg_graph for {G.__class__.__name__}") 136 | 137 | if isinstance(G, nxcg.Graph): 138 | return G 139 | 140 | if isinstance(G, nxadb.Graph): 141 | logger.debug("converting nx_arangodb graph to nx_cugraph graph") 142 | 143 | if not G.graph_exists_in_db: 144 | return nxcg.convert.from_networkx(G) 145 | 146 | return nxadb_to_nxcg(G, as_directed=as_directed) 147 | 148 | raise TypeError(f"Expected nx_arangodb.Graph or nxcg.Graph; got {type(G)}") 149 | 150 | else: 151 | 152 | def _to_nxcg_graph(G: Any, as_directed: bool = False) -> nxcg.Graph: 153 | m = "nx-cugraph is not installed; cannot convert to nx-cugraph" 154 | raise NotImplementedError(m) 155 | 156 | 157 | def nx_to_nxadb( 158 | graph: nx.Graph, 159 | *args: Any, 160 | as_directed: bool = False, 161 | **kwargs: Any, 162 | ) -> nxadb.Graph: 163 | """Convert a NetworkX graph to a NetworkX-ArangoDB graph. 164 | 165 | Parameters 166 | ---------- 167 | graph : nx.Graph 168 | The NetworkX graph to convert. 169 | 170 | as_directed : bool, optional 171 | Whether to convert the graph to a directed graph. 172 | Default is False. 173 | 174 | Returns 175 | ------- 176 | nxadb.Graph 177 | The converted graph. 178 | """ 179 | logger.debug(f"from_networkx for {graph.__class__.__name__}") 180 | 181 | klass: type[nxadb.Graph] 182 | if graph.is_multigraph(): 183 | if graph.is_directed() or as_directed: 184 | klass = nxadb.MultiDiGraph 185 | else: 186 | klass = nxadb.MultiGraph 187 | 188 | else: 189 | if graph.is_directed() or as_directed: 190 | klass = nxadb.DiGraph 191 | else: 192 | klass = nxadb.Graph 193 | 194 | return klass(incoming_graph_data=graph) 195 | 196 | 197 | def nxadb_to_nx(G: nxadb.Graph) -> nx.Graph: 198 | """Convert a NetworkX-ArangoDB graph to a NetworkX graph. 199 | 200 | This function will pull the graph from the database if it does 201 | not exist in the cache. A new NetworkX graph will be created 202 | using the node and adjacency dictionaries that are fetched. 203 | 204 | NOTE: The current downside of this approach is that we are not 205 | able to take advantage of the custom Dictionary classes that we 206 | have implemented in nx_arangodb.classes.dict. This is because 207 | the node and adjacency dictionaries are fetched as regular 208 | Python dictionaries. Furthermore, we don't cache the dictionaries 209 | themselves, so we have to fetch them every time we convert the 210 | graph, which is currently being invoked on *every* algorithm 211 | call. See the note below for a potential solution. As a temporary 212 | workaround, users can do the following: 213 | 214 | ``` 215 | import networkx as nx 216 | import nx_arangodb as nxadb 217 | 218 | G_ADB = nxadb.Graph(name="MyGraph") # Connect to the graph 219 | G_NX = nxadb.convert._to_nx_graph(G_ADB) # Pull the graph 220 | 221 | nx.pagerank(G_NX) 222 | nx.betweenness_centrality(G_NX) 223 | ... 224 | ``` 225 | 226 | Parameters 227 | ---------- 228 | G : nxadb.Graph 229 | The NetworkX-ArangoDB graph to convert. 230 | 231 | Returns 232 | ------- 233 | nx.Graph 234 | The converted graph. 235 | """ 236 | if not G.graph_exists_in_db: 237 | # Since nxadb.Graph is a subclass of nx.Graph, we can return it as is. 238 | # This only applies if the graph does not exist in the database. 239 | return G 240 | 241 | assert isinstance(G._node, NodeDict) 242 | assert isinstance(G._adj, AdjListOuterDict) 243 | if G._node.FETCHED_ALL_DATA and G._adj.FETCHED_ALL_DATA: 244 | return G 245 | 246 | start_time = time.time() 247 | 248 | node_dict, adj_dict, *_ = nxadb.classes.function.get_arangodb_graph( 249 | adb_graph=G.adb_graph, 250 | load_node_dict=True, 251 | load_adj_dict=True, 252 | load_coo=False, 253 | edge_collections_attributes=G.edge_attributes, 254 | load_all_vertex_attributes=False, 255 | load_all_edge_attributes=len(G.edge_attributes) == 0, 256 | is_directed=G.is_directed(), 257 | is_multigraph=G.is_multigraph(), 258 | symmetrize_edges_if_directed=G.symmetrize_edges if G.is_directed() else False, 259 | read_parallelism=G.read_parallelism, 260 | read_batch_size=G.read_batch_size, 261 | ) 262 | 263 | logger.info(f"Graph '{G.adb_graph.name}' load took {time.time() - start_time}s") 264 | 265 | # NOTE: At this point, we _could_ choose to implement something similar to 266 | # NodeDict._fetch_all() and AdjListOuterDict._fetch_all() to iterate through 267 | # **node_dict** and **adj_dict**, and establish the "custom" Dictionary classes 268 | # that we've implemented in nx_arangodb.classes.dict. 269 | # However, this would involve adding additional for-loops and would likely be 270 | # slower than the current implementation. 271 | # Perhaps we should consider adding a feature flag to allow users to choose 272 | # between the two methods? e.g `build_remote_dicts=True/False` 273 | # If True, then we would return the (updated) nxadb.Graph that was passed in. 274 | # If False, then we would return the nx.Graph that is built below: 275 | 276 | G_NX: nx.Graph = G.to_networkx_class()() 277 | G_NX._node = node_dict 278 | 279 | if isinstance(G_NX, nx.DiGraph): 280 | G_NX._succ = G_NX._adj = adj_dict["succ"] 281 | G_NX._pred = adj_dict["pred"] 282 | 283 | else: 284 | G_NX._adj = adj_dict 285 | 286 | return G_NX 287 | 288 | 289 | if GPU_AVAILABLE: 290 | 291 | def nxadb_to_nxcg(G: nxadb.Graph, as_directed: bool = False) -> nxcg.Graph: 292 | """Convert a NetworkX-ArangoDB graph to a NetworkX-cuGraph graph. 293 | 294 | This function will pull the graph from the database if it does 295 | not exist in the cache. A new NetworkX-cuGraph graph will be 296 | created using the COO format that is fetched. The created graph 297 | will be cached in the nxadb.Graph object for future use. 298 | 299 | Parameters 300 | ---------- 301 | G : nxadb.Graph 302 | The NetworkX-ArangoDB graph to convert. 303 | 304 | as_directed : bool, optional 305 | Whether to convert the graph to a directed graph. 306 | Default is False. 307 | 308 | Returns 309 | ------- 310 | nxcg.Graph 311 | The converted graph. 312 | """ 313 | if G.use_nxcg_cache and G.nxcg_graph is not None: 314 | m = "**use_nxcg_cache** is enabled. using cached NXCG Graph. no pull required." # noqa 315 | logger.debug(m) 316 | 317 | return G.nxcg_graph 318 | 319 | start_time = time.time() 320 | 321 | ( 322 | _, 323 | _, 324 | src_indices, 325 | dst_indices, 326 | edge_indices, 327 | vertex_ids_to_index, 328 | edge_values, 329 | ) = nxadb.classes.function.get_arangodb_graph( 330 | adb_graph=G.adb_graph, 331 | load_node_dict=False, 332 | load_adj_dict=False, 333 | load_coo=True, 334 | edge_collections_attributes=G.edge_attributes, 335 | load_all_vertex_attributes=False, # not used 336 | load_all_edge_attributes=len(G.edge_attributes) == 0, 337 | is_directed=G.is_directed(), 338 | is_multigraph=G.is_multigraph(), 339 | symmetrize_edges_if_directed=( 340 | G.symmetrize_edges if G.is_directed() else False 341 | ), 342 | read_parallelism=G.read_parallelism, 343 | read_batch_size=G.read_batch_size, 344 | ) 345 | 346 | logger.info(f"Graph '{G.adb_graph.name}' load took {time.time() - start_time}s") 347 | 348 | start_time = time.time() 349 | 350 | N = len(vertex_ids_to_index) 351 | src_indices_cp = cp.array(src_indices) 352 | dst_indices_cp = cp.array(dst_indices) 353 | edge_indices_cp = cp.array(edge_indices) 354 | 355 | if G.is_multigraph(): 356 | if G.is_directed() or as_directed: 357 | klass = nxcg.MultiDiGraph 358 | else: 359 | klass = nxcg.MultiGraph 360 | 361 | G.nxcg_graph = klass.from_coo( 362 | N=N, 363 | src_indices=src_indices_cp, 364 | dst_indices=dst_indices_cp, 365 | edge_indices=edge_indices_cp, 366 | edge_values=edge_values, 367 | # edge_masks, 368 | # node_values, 369 | # node_masks, 370 | key_to_id=vertex_ids_to_index, 371 | # edge_keys=edge_keys, 372 | ) 373 | 374 | else: 375 | if G.is_directed() or as_directed: 376 | klass = nxcg.DiGraph 377 | else: 378 | klass = nxcg.Graph 379 | 380 | G.nxcg_graph = klass.from_coo( 381 | N=N, 382 | src_indices=src_indices_cp, 383 | dst_indices=dst_indices_cp, 384 | edge_values=edge_values, 385 | # edge_masks, 386 | # node_values, 387 | # node_masks, 388 | key_to_id=vertex_ids_to_index, 389 | ) 390 | 391 | logger.info(f"NXCG Graph construction took {time.time() - start_time}s") 392 | 393 | return G.nxcg_graph 394 | -------------------------------------------------------------------------------- /nx_arangodb/exceptions.py: -------------------------------------------------------------------------------- 1 | class NetworkXArangoDBException(Exception): 2 | pass 3 | 4 | 5 | class GraphDoesNotExist(NetworkXArangoDBException): 6 | pass 7 | 8 | 9 | class DatabaseNotSet(NetworkXArangoDBException): 10 | pass 11 | 12 | 13 | class GraphNameNotSet(NetworkXArangoDBException): 14 | pass 15 | 16 | 17 | class GraphNotEmpty(NetworkXArangoDBException): 18 | pass 19 | 20 | 21 | class InvalidTraversalDirection(NetworkXArangoDBException): 22 | pass 23 | 24 | 25 | class EdgeAlreadyExists(NetworkXArangoDBException): 26 | pass 27 | 28 | 29 | class AQLMultipleResultsFound(NetworkXArangoDBException): 30 | pass 31 | 32 | 33 | class ArangoDBAlgorithmError(NetworkXArangoDBException): 34 | pass 35 | 36 | 37 | class MultipleEdgesFound(NetworkXArangoDBException): 38 | pass 39 | 40 | 41 | class EdgeTypeAmbiguity(NetworkXArangoDBException): 42 | pass 43 | 44 | 45 | class InvalidDefaultNodeType(NetworkXArangoDBException): 46 | pass 47 | -------------------------------------------------------------------------------- /nx_arangodb/interface.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import os 4 | import sys 5 | from functools import partial 6 | from typing import Any, Callable, Protocol, Set 7 | 8 | import networkx as nx 9 | from networkx.utils.backends import _load_backend, _registered_algorithms 10 | 11 | import nx_arangodb as nxadb 12 | from nx_arangodb.logger import logger 13 | 14 | # Avoid infinite recursion when testing 15 | _IS_TESTING = os.environ.get("NETWORKX_TEST_BACKEND") in {"arangodb"} 16 | 17 | 18 | class NetworkXFunction(Protocol): 19 | graphs: dict[str, Any] 20 | name: str 21 | list_graphs: Set[str] 22 | orig_func: Callable[..., Any] 23 | _returns_graph: bool 24 | 25 | 26 | class BackendInterface: 27 | @staticmethod 28 | def convert_from_nx(graph: nx.Graph, *args: Any, **kwargs: Any) -> nxadb.Graph: 29 | return nxadb._to_nxadb_graph(graph, *args, **kwargs) 30 | 31 | @staticmethod 32 | def convert_to_nx(obj: Any, *args: Any, **kwargs: Any) -> nx.Graph: 33 | if not isinstance(obj, nxadb.Graph): 34 | return obj 35 | 36 | return nxadb._to_nx_graph(obj, *args, **kwargs) 37 | 38 | def __getattr__(self, attr: str, *, from_backend_name: str = "arangodb") -> Any: 39 | """ 40 | Dispatching mechanism for all networkx algorithms. This avoids having to 41 | write a separate function for each algorithm. 42 | """ 43 | if ( 44 | attr not in _registered_algorithms 45 | or _IS_TESTING 46 | and attr in {"empty_graph"} 47 | ): 48 | raise AttributeError(attr) 49 | 50 | if from_backend_name != "arangodb": 51 | raise ValueError(f"Unsupported source backend: '{from_backend_name}'") 52 | 53 | return partial(_auto_func, attr) 54 | 55 | 56 | def _auto_func(func_name: str, /, *args: Any, **kwargs: Any) -> Any: 57 | """ 58 | Function to automatically dispatch to the correct backend for a given algorithm. 59 | 60 | :param func_name: The name of the algorithm to run. 61 | :type func_name: str 62 | """ 63 | dfunc = _registered_algorithms[func_name] 64 | 65 | backend_priority: list[str] = [] 66 | 67 | use_gpu = bool(kwargs.pop("use_gpu", nx.config.backends.arangodb.use_gpu)) 68 | if nxadb.convert.GPU_AVAILABLE and use_gpu: 69 | backend_priority.append("cugraph") 70 | 71 | for backend in backend_priority: 72 | if not _should_backend_run(backend, dfunc, *args, **kwargs): 73 | continue 74 | 75 | if not _can_backend_run(backend, dfunc, *args, **kwargs): 76 | continue 77 | 78 | try: 79 | return _run_with_backend( 80 | backend, 81 | dfunc, 82 | args, 83 | kwargs, 84 | ) 85 | 86 | except NotImplementedError: 87 | logger.debug(f"'{func_name}' not implemented for backend '{backend}'") 88 | pass 89 | 90 | default_backend = "networkx" 91 | logger.debug(f"'{func_name}' running on default backend '{default_backend}'") 92 | return _run_with_backend(default_backend, dfunc, args, kwargs) 93 | 94 | 95 | def _should_backend_run(backend: str, dfunc: Any, *args: Any, **kwargs: Any) -> bool: 96 | """Wrapper around NetworkX's should_backend_run function, because 97 | the signature is different for NetworkX <=3.3 and 3.4: 98 | 99 | - https://github.com/networkx/networkx/blob/networkx-3.3/networkx/utils/backends.py#L821 # noqa: E501 100 | - https://github.com/networkx/networkx/blob/networkx-3.4.1/networkx/utils/backends.py#L1514 # noqa: E501 101 | """ 102 | try: 103 | return bool(dfunc.__wrapped__._should_backend_run(backend, *args, **kwargs)) 104 | except TypeError: 105 | return bool(dfunc.__wrapped__._should_backend_run(backend, args, kwargs)) 106 | 107 | 108 | def _can_backend_run(backend: str, dfunc: Any, *args: Any, **kwargs: Any) -> bool: 109 | """Wrapper around NetworkX's _can_backend_run function, because 110 | the signature is different for NetworkX <=3.3 and 3.4: 111 | 112 | - https://github.com/networkx/networkx/blob/networkx-3.3/networkx/utils/backends.py#L810 # noqa: E501 113 | - https://github.com/networkx/networkx/blob/networkx-3.4.1/networkx/utils/backends.py#L1489 # noqa: E501 114 | """ 115 | try: 116 | return bool(dfunc.__wrapped__._can_backend_run(backend, *args, **kwargs)) 117 | except TypeError: 118 | return bool(dfunc.__wrapped__._can_backend_run(backend, args, kwargs)) 119 | 120 | 121 | def _run_with_backend( 122 | backend_name: str, 123 | dfunc: NetworkXFunction, 124 | args: Any, 125 | kwargs: Any, 126 | ) -> Any: 127 | """ 128 | :param backend: The name of the backend to run the algorithm on. 129 | :type backend: str 130 | :param dfunc: The function to run. 131 | :type dfunc: NetworkXFunction 132 | """ 133 | func_name = dfunc.name 134 | backend_func = ( 135 | dfunc.orig_func 136 | if backend_name == "networkx" 137 | else getattr(_load_backend(backend_name), func_name) 138 | ) 139 | 140 | graphs_resolved = { 141 | gname: val 142 | for gname, pos in dfunc.graphs.items() 143 | if (val := args[pos] if pos < len(args) else kwargs.get(gname)) is not None 144 | } 145 | 146 | if dfunc.list_graphs: 147 | graphs_converted = { 148 | gname: ( 149 | [_convert_to_backend(g, backend_name) for g in val] 150 | if gname in dfunc.list_graphs 151 | else _convert_to_backend(val, backend_name) 152 | ) 153 | for gname, val in graphs_resolved.items() 154 | } 155 | else: 156 | graphs_converted = { 157 | gname: _convert_to_backend(graph, backend_name) 158 | for gname, graph in graphs_resolved.items() 159 | } 160 | 161 | converted_args = list(args) 162 | converted_kwargs = dict(kwargs) 163 | 164 | for gname, val in graphs_converted.items(): 165 | if gname in kwargs: 166 | converted_kwargs[gname] = val 167 | else: 168 | converted_args[dfunc.graphs[gname]] = val 169 | 170 | result = backend_func(*converted_args, **converted_kwargs) 171 | 172 | # TODO: Convert to nxadb.Graph? 173 | # What would this look like? Create a new graph in ArangoDB? 174 | # Or just establish a remote connection? 175 | # For now, if dfunc._returns_graph is True, it will return a 176 | # regular nx.Graph object. 177 | # if dfunc._returns_graph: 178 | # raise NotImplementedError("Returning Graphs not implemented yet") 179 | 180 | return result 181 | 182 | 183 | def _convert_to_backend(G_from: Any, backend_name: str) -> Any: 184 | if backend_name == "networkx": 185 | return nxadb._to_nx_graph(G_from) 186 | 187 | if backend_name == "cugraph": 188 | return nxadb._to_nxcg_graph(G_from) 189 | 190 | raise ValueError(f"Unsupported backend: '{backend_name}'") 191 | 192 | 193 | backend_interface = BackendInterface() 194 | -------------------------------------------------------------------------------- /nx_arangodb/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | logger = logging.getLogger(__package__) 4 | 5 | if logger.hasHandlers(): 6 | logger.handlers.clear() 7 | 8 | handler = logging.StreamHandler() 9 | 10 | formatter = logging.Formatter( 11 | "[%(asctime)s] [%(levelname)s]: %(message)s", 12 | "%H:%M:%S %z", 13 | ) 14 | 15 | handler.setFormatter(formatter) 16 | 17 | logger.addHandler(handler) 18 | 19 | logger.setLevel(logging.INFO) 20 | -------------------------------------------------------------------------------- /nx_arangodb/typing.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Hashable 4 | from typing import TypeVar 5 | 6 | import numpy as np 7 | import numpy.typing as npt 8 | 9 | AttrKey = TypeVar("AttrKey", bound=Hashable) 10 | EdgeKey = TypeVar("EdgeKey", bound=Hashable) 11 | NodeKey = TypeVar("NodeKey", bound=Hashable) 12 | EdgeTuple = tuple[NodeKey, NodeKey] 13 | EdgeValue = TypeVar("EdgeValue") 14 | NodeValue = TypeVar("NodeValue") 15 | IndexValue = TypeVar("IndexValue") 16 | Dtype = TypeVar("Dtype") 17 | -------------------------------------------------------------------------------- /nx_arangodb/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .decorators import * 2 | from .misc import * 3 | -------------------------------------------------------------------------------- /nx_arangodb/utils/decorators.py: -------------------------------------------------------------------------------- 1 | # type: ignore 2 | # Copied from nx-cugraph 3 | 4 | from __future__ import annotations 5 | 6 | from functools import partial, update_wrapper 7 | from textwrap import dedent 8 | 9 | import networkx as nx 10 | from networkx.utils.decorators import nodes_or_number, not_implemented_for 11 | 12 | from nx_arangodb.interface import BackendInterface 13 | 14 | try: 15 | from networkx.utils.backends import _registered_algorithms 16 | except ModuleNotFoundError: 17 | from networkx.classes.backends import _registered_algorithms 18 | 19 | 20 | __all__ = ["not_implemented_for", "nodes_or_number", "networkx_algorithm"] 21 | 22 | 23 | def networkx_class(api): 24 | def inner(func): 25 | func.__doc__ = getattr(api, func.__name__).__doc__ 26 | return func 27 | 28 | return inner 29 | 30 | 31 | class networkx_algorithm: 32 | name: str 33 | extra_doc: str | None 34 | extra_params: dict[str, str] | None 35 | version_added: str 36 | is_incomplete: bool 37 | is_different: bool 38 | _plc_names: set[str] | None 39 | 40 | def __new__( 41 | cls, 42 | func=None, 43 | *, 44 | name: str | None = None, 45 | # Extra parameter info that is added to NetworkX docstring 46 | extra_params: dict[str, str] | str | None = None, 47 | # Applies `nodes_or_number` decorator compatibly across versions (3.3 changed) 48 | nodes_or_number: list[int] | int | None = None, 49 | # Metadata (for introspection only) 50 | version_added: str, # Required 51 | is_incomplete: bool = False, # See self.extra_doc for details if True 52 | is_different: bool = False, # See self.extra_doc for details if True 53 | _plc: str | set[str] | None = None, # Hidden from user, may be removed someday 54 | ): 55 | if func is None: 56 | return partial( 57 | networkx_algorithm, 58 | name=name, 59 | extra_params=extra_params, 60 | nodes_or_number=nodes_or_number, 61 | version_added=version_added, 62 | is_incomplete=is_incomplete, 63 | is_different=is_different, 64 | _plc=_plc, 65 | ) 66 | instance = object.__new__(cls) 67 | if nodes_or_number is not None and nx.__version__[:3] > "3.2": 68 | func = nx.utils.decorators.nodes_or_number(nodes_or_number)(func) 69 | # update_wrapper sets __wrapped__, which will be used for the signature 70 | update_wrapper(instance, func) 71 | instance.__defaults__ = func.__defaults__ 72 | instance.__kwdefaults__ = func.__kwdefaults__ 73 | instance.name = func.__name__ if name is None else name 74 | if extra_params is None: 75 | pass 76 | elif isinstance(extra_params, str): 77 | extra_params = {extra_params: ""} 78 | elif not isinstance(extra_params, dict): 79 | raise TypeError( 80 | f"extra_params must be dict, str, or None; got {type(extra_params)}" 81 | ) 82 | instance.extra_params = extra_params 83 | if _plc is None or isinstance(_plc, set): 84 | instance._plc_names = _plc 85 | elif isinstance(_plc, str): 86 | instance._plc_names = {_plc} 87 | else: 88 | raise TypeError( 89 | f"_plc argument must be str, set, or None; got {type(_plc)}" 90 | ) 91 | instance.version_added = version_added 92 | instance.is_incomplete = is_incomplete 93 | instance.is_different = is_different 94 | # The docstring on our function is added to the NetworkX docstring. 95 | instance.extra_doc = ( 96 | dedent(func.__doc__.lstrip("\n").rstrip()) if func.__doc__ else None 97 | ) 98 | # Copy __doc__ from NetworkX 99 | if instance.name in _registered_algorithms: 100 | instance.__doc__ = _registered_algorithms[instance.name].__doc__ 101 | instance.can_run = _default_can_run 102 | setattr(BackendInterface, instance.name, instance) 103 | # Set methods so they are in __dict__ 104 | instance._can_run = instance._can_run 105 | if nodes_or_number is not None and nx.__version__[:3] <= "3.2": 106 | instance = nx.utils.decorators.nodes_or_number(nodes_or_number)(instance) 107 | return instance 108 | 109 | def _can_run(self, func): 110 | """Set the `can_run` attribute to the decorated function.""" 111 | if not func.__name__.startswith("_"): 112 | raise ValueError( 113 | "The name of the function used by `_can_run` must begin with '_'; " 114 | f"got: {func.__name__!r}" 115 | ) 116 | self.can_run = func 117 | 118 | def __call__(self, /, *args, **kwargs): 119 | return self.__wrapped__(*args, **kwargs) 120 | 121 | def __reduce__(self): 122 | return _restore_networkx_dispatched, (self.name,) 123 | 124 | 125 | def _default_can_run(*args, **kwargs): 126 | return True 127 | 128 | 129 | def _restore_networkx_dispatched(name): 130 | return getattr(BackendInterface, name) 131 | -------------------------------------------------------------------------------- /nx_arangodb/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copied from nx-cugraph 2 | from __future__ import annotations 3 | 4 | import itertools 5 | import operator as op 6 | import sys 7 | from random import Random 8 | from typing import TYPE_CHECKING, SupportsIndex 9 | 10 | # import cupy as cp 11 | import numpy as np 12 | 13 | if TYPE_CHECKING: 14 | # import nx_cugraph as nxcg 15 | 16 | from ..typing import Dtype, EdgeKey # noqa 17 | 18 | __all__ = [ 19 | "index_dtype", 20 | "_dtype_param", 21 | ] 22 | 23 | # This may switch to np.uint32 at some point 24 | index_dtype = np.int32 25 | 26 | # To add to `extra_params=` of `networkx_algorithm` 27 | _dtype_param = { 28 | "dtype : dtype or None, optional": ( 29 | "The data type (np.float32, np.float64, or None) to use for the edge weights " 30 | "in the algorithm. If None, then dtype is determined by the edge values." 31 | ), 32 | } 33 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | 3 | requires = [ 4 | "setuptools>=61.0.0", 5 | "wheel", 6 | ] 7 | build-backend = "setuptools.build_meta" 8 | 9 | [project] 10 | name = "nx-arangodb" 11 | dynamic = ["version"] 12 | description = "ArangoDB backend for NetworkX" 13 | readme = { file = "README.md", content-type = "text/markdown" } 14 | authors = [ 15 | { name = "ArangoDB" }, 16 | ] 17 | license = { text = "Apache 2.0" } 18 | requires-python = ">=3.10" 19 | classifiers = [ 20 | "License :: OSI Approved :: Apache Software License", 21 | "Programming Language :: Python", 22 | "Programming Language :: Python :: 3", 23 | "Programming Language :: Python :: 3.10", 24 | "Programming Language :: Python :: 3.11", 25 | "Programming Language :: Python :: 3.12", 26 | "Programming Language :: Python :: 3 :: Only", 27 | "Intended Audience :: Developers", 28 | "Topic :: Software Development :: Libraries :: Python Modules", 29 | ] 30 | dependencies = [ 31 | "networkx>=3.0,<=3.5", 32 | "phenolrs~=0.5", 33 | "python-arango~=8.1", 34 | "adbnx-adapter~=5.0.5" 35 | ] 36 | 37 | [project.optional-dependencies] 38 | dev = [ 39 | "packaging>=21", 40 | "pandas", 41 | "pytest", 42 | "pytest-benchmark", 43 | "pytest-cov", 44 | "pytest-mpl", 45 | "pytest-xdist", 46 | "scipy", 47 | "black", 48 | "flake8", 49 | "Flake8-pyproject", 50 | "isort", 51 | "mypy", 52 | "pandas", 53 | "sphinx", 54 | "sphinx_rtd_theme", 55 | ] 56 | llm = [ 57 | "langchain-arangodb", 58 | "langchain_openai" 59 | ] 60 | 61 | [project.urls] 62 | Homepage = "https://github.com/arangodb/nx-arangodb" 63 | 64 | # "plugin" used in nx version < 3.2 65 | [project.entry-points."networkx.plugins"] 66 | arangodb = "nx_arangodb.interface:backend_interface" 67 | 68 | # "backend" used in nx version >= 3.2 69 | [project.entry-points."networkx.backends"] 70 | arangodb = "nx_arangodb.interface:backend_interface" 71 | 72 | [project.entry-points."networkx.plugin_info"] 73 | arangodb = "_nx_arangodb:get_info" 74 | 75 | [project.entry-points."networkx.backend_info"] 76 | arangodb = "_nx_arangodb:get_info" 77 | 78 | [tool.setuptools] 79 | license-files = ["LICENSE"] 80 | 81 | [tool.setuptools.dynamic] 82 | version = {file = "_nx_arangodb/VERSION"} 83 | 84 | [tool.setuptools.packages.find] 85 | include = [ 86 | "nx_arangodb*", 87 | "nx_arangodb.*", 88 | "_nx_arangodb*", 89 | "_nx_arangodb.*", 90 | ] 91 | 92 | [tool.black] 93 | line-length = 88 94 | target-version = ["py39", "py310", "py311"] 95 | 96 | [tool.isort] 97 | sections = ["FUTURE", "STDLIB", "THIRDPARTY", "FIRSTPARTY", "LOCALFOLDER"] 98 | profile = "black" 99 | skip_gitignore = true 100 | float_to_top = true 101 | default_section = "THIRDPARTY" 102 | known_first_party = "nx_arangodb" 103 | line_length = 88 104 | extend_skip_glob = [ 105 | "nx_arangodb/__init__.py", 106 | "nx_arangodb/classes/__init__.py", 107 | ] 108 | 109 | [tool.flake8] 110 | max-line-length = 88 111 | extend-ignore = ["E203", "W503", "E251", "F401", "F403", "F824"] 112 | exclude = [".git", ".idea", ".*_cache", "dist", "venv"] 113 | 114 | [tool.mypy] 115 | strict = true 116 | ignore_missing_imports = true 117 | disallow_untyped_defs = false 118 | disallow_untyped_calls = false 119 | implicit_reexport = true 120 | scripts_are_modules = true 121 | follow_imports = "skip" 122 | disallow_subclassing_any = false 123 | disallow_untyped_decorators = false 124 | exclude = ["venv", "build", "vendor/integration_api", "vendor/protodeps"] 125 | 126 | 127 | [tool.pytest.ini_options] 128 | minversion = "6.0" 129 | testpaths = "nx_arangodb/tests" 130 | xfail_strict = true 131 | markers = [ 132 | "slow: Skipped unless --runslow passed", 133 | ] 134 | log_cli_level = "info" 135 | filterwarnings = [ 136 | # See: https://docs.python.org/3/library/warnings.html#describing-warning-filters 137 | # and: https://docs.pytest.org/en/7.2.x/how-to/capture-warnings.html#controlling-warnings 138 | # "error", 139 | ] 140 | python_files = [ 141 | "bench_*.py", 142 | "test_*.py", 143 | ] 144 | python_functions = [ 145 | "bench_*", 146 | "test_*", 147 | ] 148 | addopts = [ 149 | "-s", 150 | "-vv", 151 | "--color=yes", 152 | "--code-highlight=yes", 153 | "--strict-config", # Force error if config is mispelled 154 | "--strict-markers", # Force error if marker is mispelled (must be defined in config) 155 | # "-ra", # Print summary of all fails/errors 156 | # "--benchmark-warmup=off", 157 | # "--benchmark-max-time=0", 158 | # "--benchmark-min-rounds=1", 159 | # "--benchmark-columns=min,median,max", 160 | ] 161 | 162 | [tool.coverage.run] 163 | branch = true 164 | source = ["nx_arangodb"] 165 | omit = [] 166 | 167 | [tool.coverage.report] 168 | ignore_errors = false 169 | precision = 1 170 | fail_under = 0 171 | skip_covered = false # Nice to see fully covered files when running `run_nx_tests.sh` 172 | skip_empty = true 173 | exclude_lines = [ 174 | "pragma: no cover", 175 | "raise AssertionError", 176 | "raise NotImplementedError", 177 | ] 178 | -------------------------------------------------------------------------------- /run_nx_tests.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | NETWORKX_GRAPH_CONVERT=arangodb \ 4 | NETWORKX_TEST_BACKEND=arangodb \ 5 | NETWORKX_FALLBACK_TO_NX=True \ 6 | pytest \ 7 | --pyargs networkx.classes \ 8 | --cov-config=$(dirname $0)/pyproject.toml \ 9 | --cov=nx_arangodb \ 10 | --cov-report= \ 11 | "$@" 12 | coverage report \ 13 | --include="*/nx_arangodb/classes/*" \ 14 | --omit=__init__.py \ 15 | --show-missing \ 16 | --rcfile=$(dirname $0)/pyproject.toml 17 | -------------------------------------------------------------------------------- /starter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Starts a local ArangoDB cluster (enterprise). 4 | 5 | extra_ports="-p 8539:8539 -p 8549:8549" 6 | image_name="enterprise" 7 | conf_file="cluster.conf" 8 | 9 | docker run -d \ 10 | --name arango \ 11 | -p 8528:8528 \ 12 | -p 8529:8529 \ 13 | $extra_ports \ 14 | -v "$(pwd)/tests/static/":/tests/static \ 15 | -v /tmp:/tmp \ 16 | "arangodb/$image_name:3.12.2" \ 17 | /bin/sh -c "arangodb --configuration=/tests/static/$conf_file" 18 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arangodb/nx-arangodb/a195c2ca1363899183c325e264850909c2f05c78/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from typing import Any, Dict 4 | 5 | import networkx as nx 6 | import pytest 7 | from adbnx_adapter import ADBNX_Adapter 8 | from arango import ArangoClient 9 | from arango.database import StandardDatabase 10 | 11 | import nx_arangodb as nxadb 12 | from nx_arangodb.logger import logger 13 | 14 | logger.setLevel(logging.INFO) 15 | 16 | con: Dict[str, Any] 17 | client: ArangoClient 18 | db: StandardDatabase 19 | run_gpu_tests: bool 20 | 21 | 22 | def pytest_addoption(parser: Any) -> None: 23 | parser.addoption("--url", action="store", default="http://localhost:8529") 24 | parser.addoption("--dbName", action="store", default="_system") 25 | parser.addoption("--username", action="store", default="root") 26 | parser.addoption("--password", action="store", default="test") 27 | parser.addoption( 28 | "--run-gpu-tests", action="store_true", default=False, help="Run GPU tests" 29 | ) 30 | 31 | 32 | def pytest_configure(config: Any) -> None: 33 | global con 34 | con = { 35 | "url": config.getoption("url"), 36 | "username": config.getoption("username"), 37 | "password": config.getoption("password"), 38 | "dbName": config.getoption("dbName"), 39 | } 40 | 41 | print("----------------------------------------") 42 | print("URL: " + con["url"]) 43 | print("Username: " + con["username"]) 44 | print("Password: " + con["password"]) 45 | print("Database: " + con["dbName"]) 46 | 47 | global client 48 | client = ArangoClient(hosts=con["url"]) 49 | 50 | global db 51 | db = client.db(con["dbName"], con["username"], con["password"], verify=True) 52 | 53 | print("Version: " + db.version()) 54 | print("----------------------------------------") 55 | 56 | os.environ["DATABASE_HOST"] = con["url"] 57 | os.environ["DATABASE_USERNAME"] = con["username"] 58 | os.environ["DATABASE_PASSWORD"] = con["password"] 59 | os.environ["DATABASE_NAME"] = con["dbName"] 60 | 61 | global run_gpu_tests 62 | run_gpu_tests = config.getoption("--run-gpu-tests") 63 | 64 | 65 | @pytest.fixture(scope="function") 66 | def load_karate_graph() -> None: 67 | global db 68 | db.delete_graph("KarateGraph", drop_collections=True, ignore_missing=True) 69 | adapter = ADBNX_Adapter(db) 70 | adapter.networkx_to_arangodb( 71 | "KarateGraph", 72 | nx.karate_club_graph(), 73 | edge_definitions=[ 74 | { 75 | "edge_collection": "knows", 76 | "from_vertex_collections": ["person"], 77 | "to_vertex_collections": ["person"], 78 | } 79 | ], 80 | ) 81 | 82 | 83 | @pytest.fixture(scope="function") 84 | def load_two_relation_graph() -> None: 85 | global db 86 | graph_name = "IntegrationTestTwoRelationGraph" 87 | v1 = graph_name + "_v1" 88 | v2 = graph_name + "_v2" 89 | e1 = graph_name + "_e1" 90 | e2 = graph_name + "_e2" 91 | 92 | if db.has_graph(graph_name): 93 | db.delete_graph(graph_name, drop_collections=True) 94 | 95 | g = db.create_graph(graph_name) 96 | g.create_edge_definition( 97 | e1, from_vertex_collections=[v1], to_vertex_collections=[v2] 98 | ) 99 | g.create_edge_definition( 100 | e2, from_vertex_collections=[v2], to_vertex_collections=[v1] 101 | ) 102 | 103 | 104 | def get_db(db_name: str) -> StandardDatabase: 105 | global con 106 | global client 107 | return client.db(db_name, con["username"], con["password"], verify=True) 108 | 109 | 110 | def create_line_graph(load_attributes: set[str]) -> nxadb.Graph: 111 | G = nx.Graph() 112 | G.add_edge(1, 2, my_custom_weight=1) 113 | G.add_edge(2, 3, my_custom_weight=1) 114 | G.add_edge(3, 4, my_custom_weight=1000) 115 | G.add_edge(4, 5, my_custom_weight=1000) 116 | 117 | return nxadb.Graph( 118 | incoming_graph_data=G, 119 | name="LineGraph", 120 | edge_collections_attributes=load_attributes, 121 | ) 122 | 123 | 124 | def create_grid_graph(graph_cls: type[nxadb.Graph]) -> nxadb.Graph: 125 | global db 126 | if db.has_graph("GridGraph"): 127 | return graph_cls(name="GridGraph") 128 | 129 | grid_graph = nx.grid_graph(dim=(500, 500)) 130 | return graph_cls( 131 | incoming_graph_data=grid_graph, name="GridGraph", write_async=False 132 | ) 133 | -------------------------------------------------------------------------------- /tests/static/cluster.conf: -------------------------------------------------------------------------------- 1 | [starter] 2 | mode = cluster 3 | local = true 4 | address = 0.0.0.0 5 | port = 8528 6 | 7 | [auth] 8 | jwt-secret = /tests/static/keyfile 9 | 10 | [args] 11 | all.database.password = test 12 | all.database.extended-names = true 13 | all.log.api-enabled = true 14 | all.javascript.allow-admin-execute = true 15 | all.server.options-api = admin 16 | -------------------------------------------------------------------------------- /tests/static/keyfile: -------------------------------------------------------------------------------- 1 | secret 2 | -------------------------------------------------------------------------------- /tests/static/service.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arangodb/nx-arangodb/a195c2ca1363899183c325e264850909c2f05c78/tests/static/service.zip -------------------------------------------------------------------------------- /tests/static/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | mkdir -p /tests/static 4 | wget -O /tests/static/service.zip "http://localhost:8000/$PROJECT/tests/static/service.zip" 5 | wget -O /tests/static/keyfile "http://localhost:8000/$PROJECT/tests/static/keyfile" 6 | wget -O /tests/static/arangodb.conf "http://localhost:8000/$PROJECT/tests/static/$ARANGODB_CONF" 7 | arangodb --configuration=/tests/static/arangodb.conf 8 | --------------------------------------------------------------------------------