├── .circleci └── config.yml ├── .conda_build.sh ├── .flake8 ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── release.yaml │ └── test.yaml ├── .gitignore ├── .nojekyll ├── LICENSE ├── MANIFEST.in ├── README.md ├── ci_scripts └── circleci │ ├── install.sh │ ├── push_doc.sh │ └── pypi_deploy.sh ├── doc ├── .special.rst ├── Makefile ├── _figures │ ├── datasets.svg │ ├── example_graph.pdf │ ├── example_graph.svg │ ├── example_graph_attributed.pdf │ ├── example_graph_attributed.svg │ ├── example_graph_directed.pdf │ ├── example_graph_directed.svg │ ├── example_graph_edge_attributed.pdf │ ├── example_graph_edge_attributed.svg │ ├── example_graph_edge_labeled.pdf │ ├── example_graph_edge_labeled.svg │ ├── example_graph_labeled.pdf │ ├── example_graph_labeled.svg │ ├── example_graph_weighted.pdf │ ├── example_graph_weighted.svg │ ├── grakel_schema.svg │ ├── graph_schema.svg │ ├── kernel_schema.svg │ ├── logo.svg │ ├── number_of_graphs.svg │ ├── number_of_nodes.svg │ ├── optimal_assignment_example.png │ ├── optimal_assignment_histograms.png │ └── wl_optimal_assignment.png ├── _static │ ├── css │ │ └── supplementary.css │ ├── kataoka1.png │ ├── marion1.png │ ├── marion2.png │ ├── marion3.png │ ├── marion4.png │ ├── odd_sth_1.png │ ├── odd_sth_2.png │ ├── odd_sth_3.png │ └── odd_sth_4.png ├── _templates │ ├── class.rst │ ├── function.rst │ ├── function_bib.rst │ └── kernel.rst ├── api.rst ├── benchmarks.rst ├── benchmarks │ ├── benchmarks.bib │ ├── comparison.rst │ └── evaluation.rst ├── biblio.bib ├── classes.rst ├── conf.py ├── datasets.rst ├── documentation.rst ├── documentation │ ├── code_for_examples │ │ ├── vertex_kernel.py │ │ └── vertex_kernel_advanced.py │ ├── contributing.rst │ ├── core_concepts.rst │ ├── creating_kernels.rst │ ├── installation.rst │ └── introduction.rst ├── graph.rst ├── graph_kernel.rst ├── index.rst ├── kernels.rst ├── kernels │ ├── core_framework.rst │ ├── edge_histogram.rst │ ├── graph_hopper.rst │ ├── graph_kernels.bib │ ├── graphlet_sampling.rst │ ├── hadamard_code.rst │ ├── kernel.rst │ ├── lovasz_theta.rst │ ├── multiscale_laplacian.rst │ ├── neighborhood_hash.rst │ ├── neighborhood_subgraph_pairwise_distance.rst │ ├── odd_sth.rst │ ├── propagation.rst │ ├── pyramid_match.rst │ ├── random_walk.rst │ ├── shortest_path.rst │ ├── subgraph_matching.rst │ ├── svm_theta.rst │ ├── vertex_histogram.rst │ ├── weisfeiler_lehman.rst │ └── weisfeiler_lehman_optimal_assignment.rst ├── make.bat ├── sphinxext │ ├── MANIFEST.in │ ├── github_link.py │ ├── sphinx_issues.py │ └── xref.py └── tutorials.rst ├── examples ├── README.txt ├── document_retrieval_example.py ├── erdos_renyi.py ├── node_attributed_dataset.py ├── nx_to_grakel.py ├── optimizing_hyperparameters.py ├── plot_pipeline_example.py ├── shortest_path.py └── weisfeiler_lehman_subtree.py ├── git ├── grakel ├── __init__.py ├── datasets │ ├── __init__.py │ ├── base.py │ └── testing.py ├── graph.py ├── graph_kernels.py ├── kernels │ ├── __init__.py │ ├── _c_functions │ │ ├── __init__.pyx │ │ ├── functions.pyx │ │ ├── header.pxd │ │ ├── include │ │ │ └── functions.hpp │ │ └── src │ │ │ ├── ArashPartov.cpp │ │ │ └── sm_core.cpp │ ├── _isomorphism │ │ ├── __init__.py │ │ ├── bliss-0.50 │ │ │ ├── bignum.hh │ │ │ ├── bliss.cc │ │ │ ├── bliss_C.cc │ │ │ ├── bliss_C.h │ │ │ ├── defs.hh │ │ │ ├── graph.cc │ │ │ ├── graph.hh │ │ │ ├── heap.cc │ │ │ ├── heap.hh │ │ │ ├── kqueue.hh │ │ │ ├── kstack.hh │ │ │ ├── orbit.cc │ │ │ ├── orbit.hh │ │ │ ├── partition.cc │ │ │ ├── partition.hh │ │ │ ├── uintseqhash.cc │ │ │ ├── uintseqhash.hh │ │ │ ├── utils.cc │ │ │ └── utils.hh │ │ ├── bliss.pyx │ │ ├── intpyblissmodule_2.cc │ │ └── intpyblissmodule_3.cc │ ├── core_framework.py │ ├── edge_histogram.py │ ├── graph_hopper.py │ ├── graphlet_sampling.py │ ├── hadamard_code.py │ ├── kernel.py │ ├── lovasz_theta.py │ ├── multiscale_laplacian.py │ ├── neighborhood_hash.py │ ├── neighborhood_subgraph_pairwise_distance.py │ ├── odd_sth.py │ ├── propagation.py │ ├── pyramid_match.py │ ├── random_walk.py │ ├── shortest_path.py │ ├── subgraph_matching.py │ ├── svm_theta.py │ ├── vertex_histogram.py │ ├── weisfeiler_lehman.py │ └── weisfeiler_lehman_optimal_assignment.py ├── tests │ ├── __main__.py │ ├── data │ │ ├── Cuneiform │ │ │ ├── Cuneiform_A.txt │ │ │ ├── Cuneiform_edge_attributes.txt │ │ │ ├── Cuneiform_edge_labels.txt │ │ │ ├── Cuneiform_graph_indicator.txt │ │ │ ├── Cuneiform_graph_labels.txt │ │ │ ├── Cuneiform_node_attributes.txt │ │ │ ├── Cuneiform_node_labels.txt │ │ │ └── README.txt │ │ └── MUTAG │ │ │ ├── MUTAG_A.txt │ │ │ ├── MUTAG_edge_labels.txt │ │ │ ├── MUTAG_graph_indicator.txt │ │ │ ├── MUTAG_graph_labels.txt │ │ │ ├── MUTAG_node_labels.txt │ │ │ └── README.txt │ ├── test_Kernel.py │ ├── test_common.py │ ├── test_graph.py │ ├── test_graph_kernels.py │ ├── test_kernels.py │ ├── test_utils.py │ └── test_windows_sdp_issue.py ├── tools.py └── utils.py ├── meta.yaml ├── misc ├── implement_list └── install_pynauty.py ├── pyproject.toml ├── requirements.txt ├── setup.py └── tutorials ├── digit_classification └── digit_classification.ipynb └── text_categorization ├── data ├── TREC_10_coarse.label └── train_5500_coarse.label └── text_categorization.ipynb /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | jobs: 4 | python3: 5 | docker: 6 | - image: circleci/python:3.6 7 | steps: 8 | - checkout 9 | - run: python -m venv ./venv 10 | - run: bash ./ci_scripts/circleci/install.sh 11 | - store_artifacts: 12 | path: doc/_build/ 13 | destination: doc 14 | - store_artifacts: 15 | path: ~/log.txt 16 | destination: log.txt 17 | - persist_to_workspace: 18 | root: . 19 | paths: . 20 | 21 | deploy: 22 | docker: 23 | - image: circleci/python:3.6 24 | environment: 25 | # The github organization or username of the repository which hosts the 26 | # project and documentation. 27 | - USERNAME: ysig 28 | 29 | # The repository where the documentation will be hosted 30 | - DOC_REPO: GraKeL 31 | 32 | # The base URL for the Github page where the documentation will be hosted 33 | - DOC_URL: 0.1a10 34 | 35 | # The email is to be used for commits in the Github Page 36 | - EMAIL: y.siglidis@gmail.com 37 | 38 | # Deploy docs on pypi 39 | - DEPLOY_PYPI: false 40 | 41 | - PUSH_DOCS: false 42 | 43 | steps: 44 | - checkout 45 | - attach_workspace: 46 | at: ~/project 47 | - run: bash ./ci_scripts/circleci/pypi_deploy.sh 48 | - run: bash ./ci_scripts/circleci/push_doc.sh 49 | 50 | workflows: 51 | version: 2 52 | build-doc-and-deploy: 53 | jobs: 54 | - python3 55 | - deploy: 56 | requires: 57 | - python3 58 | # filters: 59 | # branches: 60 | # only: develop 61 | 62 | -------------------------------------------------------------------------------- /.conda_build.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | set -x 3 | rm -rf conda_build 4 | mkdir -p conda_build 5 | conda build purge 6 | conda-build . --output-folder conda_build/ --python 2.7 7 | conda-build . --output-folder conda_build/ --python 3.5 8 | conda-build . --output-folder conda_build/ --python 3.6 9 | conda-build . --output-folder conda_build/ --python 3.7 10 | conda-build . --output-folder conda_build/ --python 3.8 11 | conda-build . --output-folder conda_build/ --python 3.9 12 | conda-build . --output-folder conda_build/ --python 3.10 13 | conda-build . --output-folder conda_build/ --python 3.11 14 | conda-build . --output-folder conda_build/ --python 3.12 15 | conda convert -f --platform all conda_build/linux-64/*.tar.bz2 -o conda_build 16 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | # Default flake8 3.5 ignored flags 3 | max-line-length = 130 4 | ignore=H306,E121,E123,E126,E741,E226,E24,E704,W503,W504 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | X = ... 16 | y = ... 17 | import grakel as gk 18 | y = gk.fit(..) 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Stack Trace** 24 | If applicable, provide the stack trace related to your error. 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | # Workflow to build and test wheels 2 | # ================================= 3 | # This github action gets triggered whenever there is a push to the master branch or a release is created. 4 | # It generates both wheels and distributions files, making sure their contents are correct via unit-testing. 5 | # 6 | # Please note that creating a git tag and pushing it (git tag <>; git push --tags) is not 7 | # sufficient to append the wheels and distribution files to your release. 8 | # You need to generate a new release using github, not git. 9 | # 10 | # Guides 11 | # ------ 12 | # cibuildwheel docs: 13 | # * https://cibuildwheel.readthedocs.io/en/stable/options/ 14 | # github actions: 15 | # * https://docs.github.com/en/actions 16 | 17 | name: Wheel builder 18 | 19 | on: 20 | # Manual trigger on github 21 | workflow_dispatch: 22 | inputs: 23 | deploy: 24 | description: "Deploy to PyPI" 25 | required: false 26 | type: boolean 27 | 28 | # Bushj 29 | push: 30 | branches: 31 | - master 32 | # Release branches 33 | - "[0-9]+.[0-9]+.X" 34 | 35 | env: 36 | package-name: GraKeL 37 | 38 | test-cmd: pytest 39 | extra-requires: "wheel" 40 | sdist-version: "3.7" 41 | 42 | jobs: 43 | 44 | build_wheels: 45 | name: Wheels ${{ matrix.os }}-${{ matrix.py }} 46 | runs-on: ${{ matrix.os }} 47 | 48 | # Parellilize as much as possible across github action workers 49 | strategy: 50 | # 1 runner per combination of (os, py) 51 | matrix: 52 | os: [ubuntu-latest, windows-latest, macos-latest] 53 | py: ["cp37-*", "cp38-*", "cp39-*", "cp310-*", "cp311-*"] #, "cp312-*"] not yet supported 54 | # All workers independent, don't cancel all if one fails 55 | fail-fast: false 56 | 57 | steps: 58 | - uses: actions/checkout@v3 59 | 60 | # Let's us build aarch64 on linux 61 | - name: Set up QEMU 62 | if: runner.os == 'Linux' 63 | uses: docker/setup-qemu-action@v2 64 | with: 65 | platforms: all 66 | 67 | # https://cibuildwheel.readthedocs.io/en/1.x/options 68 | - name: Build wheels 69 | uses: pypa/cibuildwheel@v2.9.0 70 | env: 71 | CIBW_BUILD_FRONTEND: "build" 72 | CIBW_BUILD: ${{ matrix.py }} 73 | CIBW_ARCHS_MACOS: x86_64 universal2 74 | CIBW_ARCHS_LINUX: x86_64 aarch64 75 | # No win32 ("x86") for Windows as scipy declared it has stopped releasing wheels 76 | # from 1.8.0 onwards, officially from 1.9.3 77 | CIBW_ARCHS_WINDOWS: AMD64 78 | # Install test requirements and run the test-cmd 79 | CIBW_TEST_EXTRAS: ${{ env.extra-requires }} 80 | # {project} is a special string recognized by CIBW and replaced with the project dir 81 | CIBW_TEST_COMMAND: ${{ env.test-cmd }} {project} 82 | # * Scipy has no wheels released for musllinux and will not build because OpenBLAS is not found 83 | CIBW_SKIP: "*-musllinux*" 84 | # https://cibuildwheel.readthedocs.io/en/stable/options/#test-skip 85 | # * Will avoid testing on emulated architectures (aarch64) 86 | # * Skip trying to test arm64 builds on Intel Macs 87 | CIBW_TEST_SKIP: "*-*linux_aarch64 *-macosx_universal2:arm64 " 88 | 89 | - uses: actions/upload-artifact@v3 90 | with: 91 | path: ./wheelhouse/*.whl 92 | 93 | build_sdist: 94 | name: sdist 95 | runs-on: ubuntu-latest 96 | 97 | steps: 98 | - name: Checkout ${{ env.package-name }} 99 | uses: actions/checkout@v3 100 | 101 | - name: Setup Python 102 | uses: actions/setup-python@v4 103 | with: 104 | python-version: ${{ env.sdist-version }} 105 | 106 | - name: Build source distribution 107 | run: | 108 | python -m pip install --upgrade pip setuptools wheel build 109 | python -m build -s 110 | echo "sdist_name=$(ls -t dist/${{ env.package-name }}-*.tar.gz | head -n 1)" >> $GITHUB_ENV 111 | 112 | - name: Twine check ${{ env.package-name }} 113 | run: | 114 | python -m pip install twine 115 | twine_output=`twine check ${{ env.sdist_name }}` 116 | twine check ${{env.sdist_name}} --strict 117 | 118 | - name: Install dist 119 | run: | 120 | python -m pip install ${{ env.sdist_name }}[${{ env.extra-requires }}] 121 | 122 | - name: Tests 123 | run: | 124 | ${{ env.test-cmd }} 125 | 126 | - name: Store artifacts 127 | uses: actions/upload-artifact@v2 128 | with: 129 | path: dist/*.tar.gz 130 | 131 | release_assets: 132 | # Only when manually specified 133 | if: ${{ inputs.deploy }} 134 | 135 | name: Upload Release 136 | runs-on: ubuntu-latest 137 | needs: [build_wheels, build_sdist] 138 | steps: 139 | - name: Download artifacts 140 | uses: actions/download-artifact@v2 141 | with: 142 | path: dist 143 | 144 | - name: Display structure of downloaded files 145 | run: | 146 | ls -R 147 | mv dist/artifact/* dist/ 148 | rm -rf dist/artifact 149 | ls -R 150 | 151 | - name: Publish a Python distribution to PyPI 152 | uses: pypa/gh-action-pypi-publish@release/v1 153 | with: 154 | password: ${{ secrets.PYPI_API_TOKEN }} 155 | packages_dir: dist/ 156 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | # Allow to manually trigger through github API 5 | workflow_dispatch: 6 | 7 | # Triggers with push to master 8 | push: 9 | branches: 10 | - master 11 | 12 | # Triggers with push to a pr aimed at master 13 | pull_request: 14 | branches: 15 | - master 16 | 17 | schedule: 18 | # https://crontab.guru/#42_2_3_*_* 19 | # "At 02:42 on day-of-month 3" 20 | # Put it at this odd time to reduce competing with load spikes on github action servers 21 | - cron: '42 2 3 * *' 22 | 23 | env: 24 | 25 | import-package-name: grakel 26 | extra-requires: "[dev]" # "" for no extra_requires 27 | extra-requires-soft: "[test]" # "" for no extra_requires 28 | test-dir: grakel/tests 29 | 30 | # https://github.com/eddiebergman/GraKeL/blob/63a2723fc9488257a7c880fa9b5e5cc95ada9f42/ci_scripts/travis/install.sh#L8-L11 31 | coverage-reqs: "networkx pandas" 32 | codecov-py: "3.7" 33 | codecov-args: >- 34 | --cov=grakel 35 | --cov-report=xml 36 | 37 | jobs: 38 | 39 | # General unit tests 40 | source-test: 41 | name: ${{ matrix.py }}-${{ matrix.os }} 42 | 43 | runs-on: ${{ matrix.os }} 44 | defaults: 45 | run: 46 | shell: bash # Default to using bash on all 47 | 48 | strategy: 49 | fail-fast: false 50 | matrix: 51 | py: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] 52 | os: ["ubuntu-latest", "macos-latest", "windows-latest"] 53 | 54 | steps: 55 | - name: Checkout 56 | uses: actions/checkout@v3 57 | 58 | - name: Setup Python 59 | uses: actions/setup-python@v4 60 | with: 61 | python-version: ${{ matrix.py }} 62 | 63 | - name: Install ${{ env.import-package-name }} 64 | run: | 65 | python -m pip install --upgrade pip setuptools wheel 66 | # escape for cvxopt 67 | if ( [[ "${{ matrix.os }}" == "windows-latest" ]] && [[ "${{ matrix.py }}" == "3.7" ]] ) || [[ "${{ matrix.py }}" == "3.12" ]];then 68 | python -m pip install -e ".${{ env.extra-requires-soft }}" 69 | else 70 | python -m pip install -e ".${{ env.extra-requires }}" 71 | fi 72 | 73 | - name: Tests 74 | run: | 75 | python -m pytest # ${{ env.test-dir }} 76 | 77 | # Testing with codecov coverage uploaded 78 | codecov-test: 79 | name: codecov-test 80 | runs-on: ubuntu-latest 81 | 82 | steps: 83 | - name: Checkout 84 | uses: actions/checkout@v3 85 | 86 | - name: Setup Python 87 | uses: actions/setup-python@v4 88 | with: 89 | python-version: ${{ env.codecov-py }} 90 | 91 | - name: Install ${{ env.import-package-name }} 92 | run: | 93 | python -m pip install --upgrade pip setuptools 94 | python -m pip install -e ".${{ env.extra-requires }}" 95 | 96 | - name: Tests 97 | run: | 98 | python -m pytest ${{ env.codecov-args }} ${{ env.test-dir }} 99 | 100 | - name: Upload coverage 101 | uses: codecov/codecov-action@v3 102 | # Only upload coverage when it's **not** a scheduled test run 103 | if: ${{ ! github.event.schedule }} 104 | with: 105 | fail_ci_if_error: true 106 | verbose: true 107 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # scikit-learn specific 10 | doc/_build/ 11 | doc/auto_examples/ 12 | doc/generated/ 13 | doc/modules/generated 14 | 15 | # Distribution / packaging 16 | 17 | .Python 18 | env/ 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *,cover 53 | .hypothesis/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | 62 | # Sphinx documentation 63 | docs/_build/ 64 | 65 | # PyBuilder 66 | target/ 67 | 68 | # Pycharm 69 | .idea 70 | 71 | # PyPi 72 | .pypirc 73 | 74 | # Cython Generated files 75 | grakel/kernels/_c_functions.cpython-35m-x86_64-linux-gnu.so 76 | grakel/kernels/_c_functions/functions.cpp 77 | grakel/kernels/_isomorphism/bliss.cpp 78 | -------------------------------------------------------------------------------- /.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/.nojekyll -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | New BSD License 2 | 3 | Copyright (c) 2018- The grakel developers. 4 | All rights reserved. 5 | 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | a. Redistributions of source code must retain the above copyright notice, 11 | this list of conditions and the following disclaimer. 12 | b. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | c. Neither the name of the Scikit-learn Developers nor the names of 16 | its contributors may be used to endorse or promote products 17 | derived from this software without specific prior written 18 | permission. 19 | 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 31 | DAMAGE. 32 | 33 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include requirements.txt 3 | include LICENSE 4 | 5 | include grakel/kernels/_c_functions/*.pxd 6 | include grakel/kernels/_c_functions/*.pyx 7 | recursive-include grakel/kernels/_c_functions/include * 8 | recursive-include grakel/kernels/_c_functions/src * 9 | 10 | recursive-include grakel/kernels/_isomorphism/bliss-0.50 * 11 | recursive-include grakel/kernels/_isomorphism * 12 | 13 | recursive-include grakel/tests/data/Cuneiform * 14 | recursive-include grakel/tests/data/MUTAG * 15 | 16 | include doc/*.rst 17 | include doc/conf.py 18 | include doc/Makefile 19 | include doc/make.bat 20 | recursive-include doc/documentation * 21 | recursive-include doc/benchmarks * 22 | recursive-include doc/_static * 23 | recursive-include doc/_templates * 24 | recursive-include doc/kernels * 25 | recursive-include doc/sphinxext * 26 | recursive-include examples * 27 | -------------------------------------------------------------------------------- /ci_scripts/circleci/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script is meant to be called in the "deploy" step defined in 3 | # circle.yml. See https://circleci.com/docs/ for more details. 4 | # The behavior of the script is controlled by environment variable defined 5 | # in the circle.yml in the top level folder of the project. 6 | 7 | # System dependencies 8 | sudo -E apt-get -yq remove texlive-binaries --purge > /dev/null 9 | sudo apt-get install software-properties-common 10 | sudo add-apt-repository universe /dev/null 11 | sudo add-apt-repository main > /dev/null 12 | sudo apt-get update > /dev/null 13 | sudo apt-get install libatlas-dev libatlas3gf-base > /dev/null 14 | sudo apt-get install build-essential python-dev python-setuptools > /dev/null 15 | 16 | # Setup a python venv and install basics 17 | source ./venv/bin/activate 18 | pip install --upgrade pip 19 | 20 | pip install --upgrade pandas networkx matplotlib setuptools nose coverage "Sphinx<5" pillow sphinx-gallery sphinx_rtd_theme "sphinxcontrib-bibtex==1.0" nb2plots numpydoc tqdm > /dev/null 21 | pip install -r requirements.txt > /dev/null 22 | pip install "cvxopt==1.2.0" > /dev/null 23 | 24 | 25 | # More dependencies 26 | sudo -E apt-get -yq update > /dev/null 27 | sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install dvipng texlive-latex-base texlive-latex-extra > /dev/null 28 | 29 | # Install project 30 | python setup.py clean 31 | pip install -e . 32 | 33 | set -o pipefail && cd doc && make clean html doctest 2>&1 | tee ~/log.txt && cd .. 34 | cat ~/log.txt && if tail -n 1 ~/log.txt | grep -q "Error " ~/log.txt; then false; else true; fi 35 | -------------------------------------------------------------------------------- /ci_scripts/circleci/push_doc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script is meant to be called in the "deploy" step defined in 3 | # circle.yml. See https://circleci.com/docs/ for more details. 4 | # The behavior of the script is controlled by environment variable defined 5 | # in the circle.yml in the top level folder of the project. 6 | 7 | if [[ $PUSH_DOCS == "true" ]]; then 8 | MSG="Pushing the docs for revision for branch: $CIRCLE_BRANCH, commit $CIRCLE_SHA1" 9 | 10 | cd $HOME 11 | # Copy the build docs to a temporary folder 12 | 13 | # rename the project folder to the doc repo folder 14 | if [ -d project ]; 15 | then mv project $DOC_REPO; 16 | fi 17 | 18 | rm -rf tmp 19 | mkdir tmp 20 | cp -R $HOME/$DOC_REPO/doc/_build/html/* ./tmp/ 21 | 22 | # Clone the docs repo if it isnt already there 23 | if [ ! -d $DOC_REPO ]; 24 | then git clone "git@github.com:$USERNAME/"$DOC_REPO".git"; 25 | fi 26 | 27 | cd $DOC_REPO 28 | git branch gh-pages 29 | git checkout -f gh-pages 30 | git reset --hard origin/gh-pages 31 | git clean -dfx 32 | 33 | # Copy the new build docs 34 | git rm -rf $DOC_URL 35 | mkdir $DOC_URL 36 | rm -f .nojekyll 37 | touch .nojekyll 38 | cp -R $HOME/tmp/* ./$DOC_URL/ 39 | 40 | git config --global user.email $EMAIL 41 | git config --global user.name $USERNAME 42 | git add -f ./$DOC_URL/ index.html .nojekyll 43 | git commit -m "$MSG [ci skip]" 44 | git push -f origin gh-pages 45 | if [ $? -ne 0 ]; then 46 | echo "Pushing docs failed" 47 | echo 48 | exit 1 49 | fi 50 | 51 | echo $MSG 52 | fi -------------------------------------------------------------------------------- /ci_scripts/circleci/pypi_deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script is meant to be called in the "deploy" step defined in 3 | # circle.yml. See https://circleci.com/docs/ for more details. 4 | # The behavior of the script is controlled by environment variable defined 5 | # in the circle.yml in the top level folder of the project. 6 | 7 | # Deploy on PyPi: Only works with python2 8 | if [[ $DEPLOY_PYPI == "true" ]]; then 9 | # Build & Upload sphinx-docs 10 | # Initialise .pypirc 11 | echo "[distutils]" > ~/.pypirc 12 | echo "index-servers = pypi" >> ~/.pypirc 13 | echo >> ~/.pypirc 14 | echo "[pypi]" >> ~/.pypirc 15 | echo "username=$USERNAME" >> ~/.pypirc 16 | echo "password=$PYPI_PASSWORD" >> ~/.pypirc 17 | 18 | # Upload sphinx docs 19 | cd ~/project 20 | sudo apt-get install tree 21 | source ~/project/venv/bin/activate 22 | pip install sphinx-pypi-upload 23 | ls ./doc/_build/html 24 | mkdir upload_dir && cp -r ./doc/_build/html ./upload_dir 25 | tree -d ~/project/ || true 26 | ls ./upload_dir/html 27 | sudo apt-get install realpath 28 | ls $(realpath ./upload_dir/html) 29 | python setup.py upload_sphinx --upload-dir=$(realpath ./upload_dir/html) --show-response || true 30 | fi 31 | -------------------------------------------------------------------------------- /doc/.special.rst: -------------------------------------------------------------------------------- 1 | .. Color profiles for Sphinx. 2 | .. Has to be used with hacks.css (bitbucket.org/lbesson/web-sphinx/src/master/.static/hacks.css) 3 | .. role:: black 4 | .. role:: gray 5 | .. role:: grey 6 | .. role:: silver 7 | .. role:: white 8 | .. role:: maroon 9 | .. role:: red 10 | .. role:: magenta 11 | .. role:: fuchsia 12 | .. role:: pink 13 | .. role:: orange 14 | .. role:: yellow 15 | .. role:: lime 16 | .. role:: green 17 | .. role:: olive 18 | .. role:: teal 19 | .. role:: cyan 20 | .. role:: aqua 21 | .. role:: blue 22 | .. role:: navy 23 | .. role:: purple 24 | 25 | .. role:: under 26 | .. role:: over 27 | .. role:: blink 28 | .. role:: line 29 | .. role:: strike 30 | 31 | .. role:: it 32 | .. role:: ob 33 | 34 | .. role:: small 35 | .. role:: large 36 | 37 | .. role:: center 38 | .. role:: left 39 | .. role:: right 40 | 41 | 42 | .. (c) Lilian Besson, 2011-2016, https://bitbucket.org/lbesson/web-sphinx/ 43 | -------------------------------------------------------------------------------- /doc/_figures/example_graph.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/example_graph.pdf -------------------------------------------------------------------------------- /doc/_figures/example_graph.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /doc/_figures/example_graph_attributed.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/example_graph_attributed.pdf -------------------------------------------------------------------------------- /doc/_figures/example_graph_directed.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/example_graph_directed.pdf -------------------------------------------------------------------------------- /doc/_figures/example_graph_edge_attributed.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/example_graph_edge_attributed.pdf -------------------------------------------------------------------------------- /doc/_figures/example_graph_edge_labeled.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/example_graph_edge_labeled.pdf -------------------------------------------------------------------------------- /doc/_figures/example_graph_labeled.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/example_graph_labeled.pdf -------------------------------------------------------------------------------- /doc/_figures/example_graph_weighted.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/example_graph_weighted.pdf -------------------------------------------------------------------------------- /doc/_figures/optimal_assignment_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/optimal_assignment_example.png -------------------------------------------------------------------------------- /doc/_figures/optimal_assignment_histograms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/optimal_assignment_histograms.png -------------------------------------------------------------------------------- /doc/_figures/wl_optimal_assignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/wl_optimal_assignment.png -------------------------------------------------------------------------------- /doc/_static/kataoka1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/kataoka1.png -------------------------------------------------------------------------------- /doc/_static/marion1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/marion1.png -------------------------------------------------------------------------------- /doc/_static/marion2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/marion2.png -------------------------------------------------------------------------------- /doc/_static/marion3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/marion3.png -------------------------------------------------------------------------------- /doc/_static/marion4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/marion4.png -------------------------------------------------------------------------------- /doc/_static/odd_sth_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/odd_sth_1.png -------------------------------------------------------------------------------- /doc/_static/odd_sth_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/odd_sth_2.png -------------------------------------------------------------------------------- /doc/_static/odd_sth_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/odd_sth_3.png -------------------------------------------------------------------------------- /doc/_static/odd_sth_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/odd_sth_4.png -------------------------------------------------------------------------------- /doc/_templates/class.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | .. automethod:: __init__ 10 | {% endblock %} 11 | 12 | .. include:: {{module}}.{{objname}}.examples 13 | 14 | .. raw:: html 15 | 16 |
17 | -------------------------------------------------------------------------------- /doc/_templates/function.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}==================== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | .. include:: {{module}}.{{objname}}.examples 9 | 10 | .. raw:: html 11 | 12 |
13 | 14 | -------------------------------------------------------------------------------- /doc/_templates/function_bib.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}==================== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | Bibliography 9 | ------------ 10 | .. bibliography:: ../kernels/graph_kernels.bib 11 | :filter: docname in docnames 12 | 13 | .. include:: {{module}}.{{objname}}.examples 14 | 15 | .. raw:: html 16 | 17 |
18 | 19 | -------------------------------------------------------------------------------- /doc/_templates/kernel.rst: -------------------------------------------------------------------------------- 1 | :mod:`{{module}}`.{{objname}} 2 | {{ underline }}============== 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | .. automethod:: __init__ 10 | {% endblock %} 11 | 12 | 13 | Bibliography 14 | ------------ 15 | .. bibliography:: ../kernels/graph_kernels.bib 16 | :filter: docname in docnames 17 | 18 | .. include:: {{module}}.{{objname}}.examples 19 | 20 | .. raw:: html 21 | 22 |
23 | -------------------------------------------------------------------------------- /doc/api.rst: -------------------------------------------------------------------------------- 1 | ====== 2 | GraKeL 3 | ====== 4 | .. module:: grakel 5 | 6 | The :code:`grakel` module is structured as follows 7 | 8 | .. toctree:: 9 | :maxdepth: 1 10 | 11 | kernels 12 | graph_kernel 13 | graph 14 | datasets 15 | -------------------------------------------------------------------------------- /doc/benchmarks.rst: -------------------------------------------------------------------------------- 1 | .. _benchmarks: 2 | 3 | ========== 4 | Benchmarks 5 | ========== 6 | 7 | In this section, we measure the running times of the implementations of several graph kernels from *GraKeL* and we compare them to each other and to implementations from other packages. 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | 12 | benchmarks/comparison 13 | benchmarks/evaluation -------------------------------------------------------------------------------- /doc/benchmarks/benchmarks.bib: -------------------------------------------------------------------------------- 1 | @article{sugiyama2017graphkernels, 2 | title={graphkernels: R and Python packages for graph comparison}, 3 | author={Sugiyama, Mahito and Ghisu, M Elisabetta and Llinares-L{\'o}pez, Felipe and Borgwardt, Karsten}, 4 | journal={Bioinformatics}, 5 | volume={34}, 6 | number={3}, 7 | pages={530--532}, 8 | year={2017} 9 | } 10 | 11 | -------------------------------------------------------------------------------- /doc/classes.rst: -------------------------------------------------------------------------------- 1 | .. _api_ref: 2 | 3 | ============= 4 | API Reference 5 | ============= 6 | 7 | This is the class and function reference of *GraKeL*. In order for the user to understand how to use the package, we suggest he reads :ref:`documentation` section. 8 | 9 | :mod:`grakel.graph`: Graph class with its utility functions 10 | =========================================================== 11 | 12 | Base Class 13 | ---------- 14 | .. currentmodule:: grakel 15 | 16 | .. autosummary:: 17 | :toctree: generated/ 18 | :template: class.rst 19 | 20 | Graph 21 | 22 | 23 | Utility Functions 24 | ----------------- 25 | .. currentmodule:: grakel 26 | 27 | .. autosummary:: 28 | :toctree: generated/ 29 | :template: function.rst 30 | 31 | graph.is_adjacency 32 | graph.is_edge_dictionary 33 | graph.laplacian 34 | graph.floyd_warshall 35 | 36 | **User guide:** See the :ref:`graph` section for further details. 37 | 38 | :mod:`grakel.graph_kernels`: A kernel decorator 39 | =============================================== 40 | .. currentmodule:: grakel 41 | 42 | Graph Kernel (decorator) 43 | ------------------------ 44 | .. autosummary:: 45 | :toctree: generated/ 46 | :template: class.rst 47 | 48 | grakel.GraphKernel 49 | 50 | **User guide:** See the :ref:`graph_kernel` section for further details. 51 | 52 | :mod:`grakel.kernels`: A collection of graph kernels 53 | ==================================================== 54 | 55 | Kernels 56 | ------- 57 | 58 | .. currentmodule:: grakel 59 | 60 | .. autosummary:: 61 | :toctree: generated/ 62 | :template: kernel.rst 63 | 64 | Kernel 65 | RandomWalk 66 | RandomWalkLabeled 67 | PyramidMatch 68 | NeighborhoodHash 69 | ShortestPath 70 | ShortestPathAttr 71 | GraphletSampling 72 | SubgraphMatching 73 | WeisfeilerLehman 74 | HadamardCode 75 | NeighborhoodSubgraphPairwiseDistance 76 | LovaszTheta 77 | SvmTheta 78 | Propagation 79 | PropagationAttr 80 | OddSth 81 | MultiscaleLaplacian 82 | MultiscaleLaplacianFast 83 | HadamardCode 84 | VertexHistogram 85 | EdgeHistogram 86 | GraphHopper 87 | CoreFramework 88 | WeisfeilerLehmanOptimalAssignment 89 | 90 | **User guide:** See the :ref:`kernels` section for further details. 91 | 92 | :mod:`grakel.datasets`: Datasets 93 | ================================= 94 | 95 | Fetch 96 | ----- 97 | 98 | .. currentmodule:: grakel.datasets 99 | 100 | .. autosummary:: 101 | :toctree: generated/ 102 | :template: function_bib.rst 103 | 104 | fetch_dataset 105 | 106 | .. autosummary:: 107 | :toctree: generated/ 108 | :template: function.rst 109 | 110 | get_dataset_info 111 | 112 | 113 | **User guide:** See the :ref:`datasets` section for further details. 114 | 115 | 116 | :mod:`grakel`: Utils 117 | ================================= 118 | 119 | .. currentmodule:: grakel 120 | 121 | Use a kernel matrix as a transformer 122 | ------------------------------------ 123 | 124 | .. autosummary:: 125 | :toctree: generated/ 126 | :template: class.rst 127 | 128 | KMTransformer 129 | 130 | Cross Validation 131 | ---------------- 132 | 133 | .. autosummary:: 134 | :toctree: generated/ 135 | :template: function.rst 136 | 137 | cross_validate_Kfold_SVM 138 | 139 | Load from other file formats 140 | ---------------------------- 141 | 142 | .. autosummary:: 143 | :toctree: generated/ 144 | :template: function.rst 145 | 146 | graph_from_networkx 147 | graph_from_pandas 148 | graph_from_csv 149 | 150 | **User guide:** Usefull functions for applying to existing datasets, of other formats. 151 | 152 | .. _gd: https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets 153 | -------------------------------------------------------------------------------- /doc/datasets.rst: -------------------------------------------------------------------------------- 1 | .. _datasets: 2 | 3 | ========================= 4 | Dataset loading utilities 5 | ========================= 6 | .. module:: grakel.datasets 7 | 8 | A module for loading and fetching datasets related with graph kernels. 9 | 10 | .. autosummary:: 11 | :toctree: generated/ 12 | 13 | grakel.datasets.fetch_dataset 14 | grakel.datasets.get_dataset_info 15 | 16 | .. _gd: https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets -------------------------------------------------------------------------------- /doc/documentation.rst: -------------------------------------------------------------------------------- 1 | .. _documentation: 2 | 3 | ============= 4 | Documentation 5 | ============= 6 | 7 | In this section, we cover the core concepts in *GraKeL* and show how to use it. 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | 12 | documentation/installation 13 | documentation/introduction 14 | documentation/core_concepts 15 | documentation/creating_kernels 16 | documentation/contributing -------------------------------------------------------------------------------- /doc/documentation/code_for_examples/vertex_kernel.py: -------------------------------------------------------------------------------- 1 | from warnings import warn 2 | from collections import Counter 3 | from grakel import Kernel, Graph 4 | 5 | # For python2/3 compatibility 6 | from six.moves.collections_abc import Iterable 7 | 8 | 9 | class VertexHistogram(Kernel): 10 | """Vertex Histogram kernel as found in :cite:`Sugiyama2015NIPS` 11 | 12 | Parameters 13 | ---------- 14 | None. 15 | 16 | Attributes 17 | ---------- 18 | None. 19 | 20 | """ 21 | 22 | # Define the graph format that this kernel needs (if needed) 23 | # _graph_format = "auto" (default: "auto") 24 | 25 | def __init__(self, 26 | n_jobs=n_jobs, 27 | verbose=False, 28 | normalize=False, 29 | # kernel_param_1=kernel_param_1_default, 30 | # ... 31 | # kernel_param_n=kernel_param_n_default, 32 | ): 33 | """Initialise an `odd_sth` kernel.""" 34 | 35 | # Add new parameters 36 | self._valid_parameters |= new_parameters 37 | 38 | super(VertexHistogram, self).__init__(n_jobs=n_jobs, verbose=verbose, normalize=normalize) 39 | 40 | # Get parameters and check the new ones 41 | # @for i=1 to num_new_parameters 42 | # self.kernel_param_i = kernel_param_i 43 | 44 | # self.initialized_.update({ 45 | # param_needing_initialization_1 : False 46 | # ... 47 | # param_needing_initialization_m : False 48 | # }) 49 | 50 | def initialize_(self): 51 | """Initialize all transformer arguments, needing initialization.""" 52 | # If you want to implement a parallelization by your self here is your chance 53 | # If there is a pairwise operation on the Kernel object there is parallelization is implemented 54 | # Just run the initialise from father to initialise a joblib Parallel (if n_jobs is not None). 55 | super(VertexHistogram, self).initialize_() 56 | 57 | # for i=1 .. m 58 | # if not self.initialized_["param_needing_initialization_i"]: 59 | # # Apply checks (raise ValueError or TypeError accordingly) 60 | # # calculate derived fields stored on self._derived_field_ia .. z 61 | # self.initialized_["param_needing_initialization_i"] = True 62 | pass 63 | 64 | def parse_input(self, X): 65 | """Parse and check the given input for vertex kernel. 66 | 67 | Parameters 68 | ---------- 69 | X : iterable 70 | For the input to pass the test, we must have: 71 | Each element must be an iterable with at most three features and at 72 | least one. The first that is obligatory is a valid graph structure 73 | (adjacency matrix or edge_dictionary) while the second is 74 | node_labels and the third edge_labels (that fitting the given graph 75 | format). 76 | 77 | Returns 78 | ------- 79 | out : list 80 | List of frequency-histogram for each Graph. 81 | 82 | """ 83 | if not isinstance(X, Iterable): 84 | raise TypeError('input must be an iterable\n') 85 | else: 86 | out = list() 87 | for (i, x) in enumerate(iter(X)): 88 | is_iter = isinstance(x, Iterable) 89 | if is_iter: 90 | x = list(x) 91 | if is_iter and len(x) in [0, 2, 3]: 92 | if len(x) == 0: 93 | warn('Ignoring empty element on index: '+str(i)) 94 | continue 95 | else: 96 | # Our element is an iterable of at least 2 elements 97 | labels = x[1] 98 | elif type(x) is Graph: 99 | # get labels in any existing format 100 | labels = x.get_labels(purpose="any") 101 | else: 102 | raise TypeError('each element of X must be either a ' + 103 | 'graph object or a list with at least ' + 104 | 'a graph like object and node labels ' + 105 | 'dict \n') 106 | 107 | # Append frequencies for the current Graph 108 | out.append(Counter(labels.values())) 109 | 110 | if len(out) == 0: 111 | raise ValueError('parsed input is empty') 112 | return out 113 | 114 | def pairwise_operation(self, x, y): 115 | """Calculate sum of frequency products. 116 | 117 | Parameters 118 | ---------- 119 | x, y : Counter 120 | Label-Frequency Counters as occur from `parse_input`. 121 | 122 | Returns 123 | ------- 124 | kernel : number 125 | The kernel value. 126 | 127 | """ 128 | return sum(x[k]*y[k] for k in x.keys()) 129 | -------------------------------------------------------------------------------- /doc/documentation/contributing.rst: -------------------------------------------------------------------------------- 1 | .. _contributing: 2 | 3 | ============ 4 | Contributing 5 | ============ 6 | 7 | All contributions are welcome! If you are not sure about how you can contribute, please contact the authors of the library. 8 | 9 | Areas you can contribute 10 | ------------------------ 11 | Curious about how you can contribute to *GraKeL*? Here are a few ideas! 12 | 13 | * **Implementing a kernel**: The number of graph kernels that have been proposed in the past years is very large. *GraKeL* contains implementations of several of these kernels, but still, there are many kernels that are not contained in the library. You can help making *GraKeL* more complete by implementing new graph kernels. 14 | 15 | * **Optimizing kernel computation**: We have done our best to write maintainable and efficient Python code. However, this does not mean that the code cannot be further optimized. For even higher efficiency, some graph kernels can be re-implemented using wrapped C++ packages. Furthermore, most kernels solve combinatorial problems for which more efficient algorithms (than the employed ones) may exist. 16 | 17 | * **Improving the** :class:`grakel.Graph` **class**: As discussed in the :ref:`core_concepts` section, the :class:`grakel.Graph` class supports both adjacency matrix and edgelist representations of graphs. There are also methods that allow *GraKeL* to read graphs in various formats (e.g., NetworkX graphs). Furthermore, there are methods that implement graph algorithms (e.g., shortest path distances). These operations have to be efficient, both in terms of time and space complexity. Therefore, the :class:`grakel.Graph` class needs to be optimized. The project may benefit a lot from a *Cython* implementation of the class. 18 | 19 | * **Redesigning the** :class:`grakel.Kernel` **class**: The :class:`grakel.Kernel` class was designed to satisfy some constraints (e.g., compatibility with *scikit-learn*) and to be as simple as possible. This class can be extended to support families of kernels or frameworks that are not currently developed such as *deep graph kernels*. 20 | 21 | * **Unit-Testing**: As far as the kernel module is concerned, we have not managed to come up with any methodology for testing if the kernels are correctly implemented. We could use some "reference" code to check if our kernels produce identical results on some datasets, however, in most cases, this is not practical. Our tests check if the kernel matrices produced by the kernels are positive semidefinite, however, this can be true even if a kernel is not correctly implemented. We would like to design new tests that can verify the validity of implemeted kernels. 22 | 23 | * **Parallel/Concurrent execution**: The :class:`grakel.GraphKernel` class supports a parallel computation scheme (i.e., using the :code:`n_jobs` attribute), but this has not been implemented for all the kernels or the current implementation is not optimal. Implementations that allow parallel computation of the kernels are of high importance since they can lead to significant speed-ups of kernel computations. 24 | 25 | * **Examples and tutorials**: Have you created an example or tutorial that makes use of the *GraKeL* library? Please let us know. We would be more than happy to include it in our list of examples or tutorials. 26 | 27 | .. _master: https://github.com/ysig/GraKeL 28 | .. _develop: https://github.com/ysig/GraKeL/tree/develop 29 | 30 | 31 | Who to Blame for the GraKeL Project 32 | ----------------------------------- 33 | The *GraKeL* project started in 2018 as part of a one year project funded by `Labex DigiCosme`_. The main contributor to *GraKeL*'s development is Ioannis Siglidis. Ioannis is also responsible for its maintenance. Giannis Nikolentzos is also an active contributor. The project was carried out under the supervision of Professor `Michalis Vazirgiannis`_ at the LIX laboratory of École Polytechnique. The following people have also contributed to the project: Christos Giatsidis, Stratis Limnios and Konstantinos Skianis. 34 | 35 | License 36 | ------- 37 | GraKeL is distributed under the **BSD 3-clause** license. The library makes use of the C++ source code of BLISS_ (a tool for computing automorphism groups and canonical labelings of graphs) which is **LGPL** licensed. Futhermore, the cvxopt_ package (a software package for convex optimization) which is an optional dependency of GraKeL is **GPL** licensed. 38 | 39 | .. _Labex DigiCosme: https://digicosme.lri.fr/tiki-index.php 40 | .. _Michalis Vazirgiannis: http://www.lix.polytechnique.fr/~mvazirg/ 41 | .. _BLISS: http://www.tcs.hut.fi/Software/bliss 42 | .. _cvxopt: https://cvxopt.org/ 43 | -------------------------------------------------------------------------------- /doc/documentation/installation.rst: -------------------------------------------------------------------------------- 1 | .. _installation: 2 | 3 | ================= 4 | Installing GraKeL 5 | ================= 6 | The GraKeL library requires the following packages to be installed: 7 | 8 | * Python (>=2.7, >=3.5) 9 | * NumPy (>=1.8.2) 10 | * SciPy (>=0.13.3) 11 | * Cython (>=0.27.3) 12 | * cvxopt (>=1.2.0) [optional] 13 | * future (>=0.16.0) (for python 2.7) 14 | 15 | *GraKeL* is available via `PyPI`_ . You can install the latest release of *GraKeL* using the following command: 16 | 17 | .. code-block:: bash 18 | 19 | $ pip install grakel 20 | 21 | To also install the cvxopt package, which is a requirement of the Lovász-:math:`\vartheta` kernel, you can use the following command: 22 | 23 | .. code-block:: bash 24 | 25 | $ pip install grakel[lovasz] 26 | 27 | .. *GraKeL* is also available via `anaconda`_. 28 | 29 | Building GraKeL 30 | --------------- 31 | 32 | In order to build your own version of *GraKeL*, you need a C++ compiler since the package contains some C++ extensions. To build and install a local version of `GraKeL`, you need to execute :code:`pip install .` or :code:`python setup.py install` on the root folder. Furthermore, in case you want to build the extensions locally, execute :code:`python setup.py build_ext`. 33 | 34 | In order for the C++ extensions to compile our extensions, a system-specific build environment should be configured. What you generally need is a C++ compiler and some python header files. 35 | 36 | Unix Environment 37 | ^^^^^^^^^^^^^^^^^ 38 | 39 | In the case of Unix environments, you need to have installed: 40 | 41 | - A C++ compiler like `g++` 42 | - The package that contains the `Python.h` file such as `python-dev` 43 | 44 | Windows Environment 45 | ^^^^^^^^^^^^^^^^^^^ 46 | 47 | In the case of a Windows environment, you need to install parts of the Windows Virtual Studio SDK (for C++) (for more details, please have a look here_). 48 | 49 | .. note:: 50 | 51 | If you have trouble building `GraKeL`, please raise an issue_ so that we can enrich our installation instructions, as well as addressing the problem. 52 | 53 | Why so Many Packages? 54 | --------------------- 55 | 56 | Graph kernels deal with the problem of graph comparison, a very challenging problem which has been studied for decades. Due to the complex nature of the problem, different types of approaches have been developed so far. Some approaches employ combinatorial algorithms, others formulate the graph comparison algorithm as a continuous optimization problem, while there are also other approaches that apply heuristics. The field of graph kernels is also characterized by such a large diversity of methods. For instance, the *graphlet kernel* solves the graph isomorphism problem to determine the identity of each graphet, while the *Lovász*-:math:`\vartheta` kernel solves a semidefinite programming problem to compute the Lovász number of each graph and the associated orthonormal representations. To solve such problems, *GraKeL* relies on well-established external libraries that provide optimized software that has been developed to address these problems. For example, *GraKeL* uses [bliss]_ to test graph isomorphism and the cvxopt_ library to optimize semidefinite programs. 57 | 58 | .. _cvxopt: https://cvxopt.org/ 59 | 60 | .. [bliss] To test graph isomorphism, *GraKeL* extended `PyBliss`_, a Python wrapper for bliss. This allowed *GraKeL* to remain compatible with Python 2/3 and its installation on Windows. Among all the candidate packages, PyBliss was chosen thanks to the information shared by `Tamás Nepusz`_ (developer of the `iGraph`_ library), who pointed out that this package was the most efficient (both in terms of time and memory) for deciding isomorphism between small graphs in experiments conducted using the iGraph library. Other candidate packages include `pynauty`_ (a Python extension of `nauty`_) and `networkx`_ (contains an implementation of the `VF2`_ algorithm). 61 | 62 | .. _PyBliss: http://www.tcs.hut.fi/Software/bliss/ 63 | .. _Tamás Nepusz: http://hal.elte.hu/~nepusz/ 64 | .. _iGraph: http://igraph.org/ 65 | .. _pynauty: https://web.cs.dal.ca/~peter/software/pynauty/html/ 66 | .. _nauty: http://users.cecs.anu.edu.au/~bdm/nauty/ 67 | .. _networkx: https://networkx.github.io/ 68 | .. _VF2: https://networkx.github.io/documentation/networkx-1.10/reference/algorithms.isomorphism.vf2.html 69 | .. _PyPI: https://pypi.org/project/grakel-dev/ 70 | .. _anaconda: https://anaconda.org/ysig/grakel-dev 71 | .. _issue: https://github.com/ysig/GraKeL/issues 72 | .. _here: https://docs.microsoft.com/en-us/visualstudio/python/working-with-c-cpp-python-in-visual-studio?view=vs-2019#prerequisites 73 | -------------------------------------------------------------------------------- /doc/graph.rst: -------------------------------------------------------------------------------- 1 | .. _graph: 2 | 3 | Graph (class) 4 | ============= 5 | Documentation for the graph class. 6 | .. currentmodule:: grakel 7 | 8 | .. autosummary:: 9 | :toctree: generated/ 10 | 11 | grakel.Graph 12 | 13 | .. currentmodule:: grakel.graph 14 | 15 | .. autosummary:: 16 | :toctree: generated/ 17 | 18 | grakel.graph.is_adjacency 19 | grakel.graph.is_edge_dictionary 20 | grakel.graph.laplacian 21 | grakel.graph.floyd_warshall -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | .. grakel documentation master file, created by 2 | sphinx-quickstart on Mon Jan 18 14:44:12 2016. 3 | 4 | ======== 5 | Overview 6 | ======== 7 | 8 | *GraKeL* is a Python package which provides implementations of several graph kernels, a family of powerful methods which allow kernel-based learning approaches such as SVMs to work directly on graphs. 9 | 10 | Getting Started 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | 15 | documentation 16 | 17 | ========== 18 | Benchmarks 19 | ========== 20 | 21 | To demonstrate the efficiency of the algorithms implemented in *GraKeL*, we present a comparison of the running times of the implementations of some graph kernels from *GraKeL* and from other packages. We also compare the running times of the different kernels to each other. 22 | 23 | .. toctree:: 24 | :maxdepth: 2 25 | 26 | benchmarks 27 | 28 | ================= 29 | Package Reference 30 | ================= 31 | 32 | A collection of all classes and functions important for the use and understanding of the *GraKeL* package. 33 | 34 | GraKeL provides 35 | 36 | .. toctree:: 37 | :maxdepth: 1 38 | 39 | api 40 | classes 41 | auto_examples/index 42 | tutorials 43 | 44 | 45 | ========== 46 | What's New 47 | ========== 48 | 49 | - Version **0.1a8** 50 | 51 | + Added a new kernel: [Weisfeiler-Lehman-Optimal-Assignment](https://ysig.github.io/GraKeL/0.1a8/kernels/weisfeiler_lehman_optimal_assignment.html). 52 | + Removed MultiScaleLaplacian (as being really slow and useless) and renamed MultiScaleLaplacianFast to MultiScaleLaplacian. 53 | + Fixed minor issues (joblib deprecation, skbunch etc) from `0.1a7`. 54 | 55 | - Version **0.1a7** 56 | 57 | + Detailed installation instructions for c++ extensions in windows. 58 | + Changed `base_kernel` alias in frameworks with `base_graph_kernel` to disambiguate with vectorial kernels. 59 | + Speed-up for floyd_warshall calculation in graph.py. 60 | + Large update throughout all the documentation. 61 | 62 | - Version **0.1a6** 63 | 64 | + More scikit-learn compatibility: 65 | 66 | 1. Initialise kernels by name and alias on GraphKernel (as GraphKernel(kernel="shortest_path"). 67 | 2. Fit and instantion by default parameters. 68 | 3. Random number generator standardized `check_random_state`. `random_seed` are now `random_state` arguments. 69 | 4. Doctests. 70 | 71 | + Miscelanous: 72 | 73 | 1. Detailed unsupported kernel output. 74 | 2. More detailed licensing information considering **cvxopt** and **BLISS** 75 | 3. Small bugfix inside the (Count Sensitive) Neighborhood Hash Kernel. 76 | 4. Added sparse-compatibility for VertexHistogram and for EdgeHistogram. 77 | 78 | - Version **0.1a5** 79 | 80 | + Various bugfixes in kernel implementations. 81 | + Added a bunch of :code:`utils` functions for external operations: transforming existing *graph formats* (csv, pandas, networkx) to the grakel native, *k-fold cross validation* with an SVM and *kernel matrix transformer* for manipulating precomputed kernel matrices in an :code:`Transformer` fashion. 82 | + **Conda** compatibility: visit ``_. 83 | 84 | ================== 85 | Indices and tables 86 | ================== 87 | 88 | * :ref:`genindex` 89 | * :ref:`modindex` 90 | * :ref:`search` 91 | -------------------------------------------------------------------------------- /doc/kernels.rst: -------------------------------------------------------------------------------- 1 | .. _kernels: 2 | 3 | Kernels (between graphs) 4 | ======================== 5 | .. module:: grakel.kernels 6 | 7 | The documentation of the `kernels` submodule. 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | :glob: 12 | 13 | kernels/* 14 | -------------------------------------------------------------------------------- /doc/kernels/core_framework.rst: -------------------------------------------------------------------------------- 1 | .. _core_framework: 2 | 3 | Core Kernel Framework 4 | ===================== 5 | 6 | The core framework is a tool for increasing the expressive power of graph kernels :cite:`nikolentzos2018degeneracy`. 7 | The framework is not restricted to graph kernels, but can be applied to any graph comparison algorithm. 8 | It capitalizes on the :math:`k`-core decomposition which is capable of uncovering topological and hierarchical properties of graphs. 9 | Specifically, the :math:`k`-core decomposition is a powerful tool for network analysis and it is commonly used as a measure of importance and well connectedness for vertices in a broad spectrum of applications. 10 | The notion of :math:`k`-core was first introduced by Seidman to study the cohesion of social networks :cite:`seidman1983network`. 11 | In recent years, the :math:`k`-core decomposition has been established as a standard tool in many application domains such as in network visualization :cite:`alvarez2006large`, in protein function prediction :cite:`wuchty2005peeling` and in graph clustering :cite:`giatsidis2014corecluster`. 12 | 13 | Core Decomposition 14 | ------------------ 15 | 16 | Let :math:`G = (V,E)` be an undirected and unweighted graph. 17 | Let :math:`n` and :math:`m` denote the number of vertices and number of edges, respectively. 18 | Given a subset of vertices :math:`S \subseteq V`, let :math:`E(S)` be the set of edges that have both end-points in :math:`S`. 19 | Then, :math:`G'=(S,E(S))` is the subgraph induced by :math:`S`. 20 | We use :math:`G' \subseteq G` to denote that :math:`G'` is a subgraph of :math:`G`. 21 | The degree of a vertex :math:`v \in S`, :math:`d_{G'}(v)`, is equal to the number of vertices that are adjacent to :math:`v` in :math:`G'`. 22 | Let :math:`G` be a graph and :math:`G'` a subgraph of :math:`G` induced by a set of vertices :math:`S`. 23 | Then, :math:`G'` is defined to be a :math:`k`-core of :math:`G`, denoted by :math:`C_k`, if it is a maximal subgraph of :math:`G` in which all vertices have degree at least :math:`k`. 24 | Hence, if :math:`G'` is a :math:`k`-core of :math:`G`, then :math:`\forall v \in S`, :math:`d_{G'}(v) \geq k`. 25 | Each :math:`k`-core is a unique subgraph of :math:`G`, and it is not necessarily connected. 26 | The core number :math:`c(v)` of a vertex :math:`v` is equal to the highest-order core that :math:`v` belongs to. 27 | In other words, :math:`v` has core number :math:`c(v) = k`, if it belongs to a :math:`k`-core but not to a :math:`(k+1)`-core. 28 | The degeneracy :math:`\delta^*(G)` of a graph :math:`G` is defined as the maximum :math:`k` for which graph :math:`G` contains a non-empty :math:`k`-core subgraph, :math:`\delta^*(G) = \max_{v \in V}c(v)`. 29 | Furthermore, assuming that :math:`\mathcal{C} = \{ C_0, C_1, \ldots, C_{\delta^*(G)} \}` is the set of all :math:`k`-cores, then :math:`\mathcal{C}` forms a nested chain 30 | 31 | .. math:: 32 | 33 | C_{\delta^*(G)} \subseteq \ldots \subseteq C_1 \subseteq C_0 = G 34 | 35 | Therefore, the :math:`k`-core decomposition is a very useful tool for discovering the hierarchical structure of graphs. 36 | The :math:`k`-core decomposition of a graph can be computed in :math:`\mathcal{O}(n+m)` time \cite{matula1983smallest,batagelj2011fast}. 37 | The underlying idea is that we can obtain the :math:`i`-core of a graph if we recursively remove all vertices with degree less than :math:`i` and their incident edges from the graph until no other vertex can be removed. 38 | 39 | 40 | Core Kernels 41 | ------------ 42 | 43 | The :math:`k`-core decomposition builds a hierarchy of nested subgraphs, each having stronger connectedness properties compared to the previous ones. 44 | The core framework measures the similarity between the corresponding according to the hierarchy subgraphs and aggregates the results. 45 | Let :math:`G=(V,E)` and :math:`G'=(V',E')` be two graphs. 46 | Let also :math:`k` be any kernel for graphs. 47 | Then, the core variant of the base kernel :math:`k` is defined as 48 | 49 | .. math:: 50 | 51 | k_c(G, G') = k(C_0,C'_0) + k(C_1,C'_1) + \ldots + k(C_{\delta^*_{min}},C'_{\delta^*_{min}}) 52 | 53 | where :math:`\delta^*_{min}` is the minimum of the degeneracies of the two graphs, and :math:`C_0,C_1,\ldots,C_{\delta^*_{min}}` and :math:`C'_0,C'_1,\ldots,C'_{\delta^*_{min}}` are the :math:`0`-core, :math:`1`-core,:math:`\ldots`, :math:`\delta^*_{min}`-core subgraphs of :math:`G` and :math:`G'`, respectively. 54 | By decomposing graphs into subgraphs of increasing importance, the algorithm is capable of more accurately capturing their underlying structure. 55 | 56 | The computational complexity of the core framework depends on the complexity of the base kernel and the degeneracy of the graphs under comparison. 57 | Given a pair of graphs :math:`G, G'` and an algorithm :math:`A` for comparing the two graphs, let :math:`\mathcal{O}_A` be the time complexity of algorithm :math:`A`. 58 | Let also :math:`\delta^*_{min} = \min \big( \delta^*(G),\delta^*(G') \big)` be the minimum of the degeneracies of the two graphs. 59 | Then, the complexity of computing the core variant of algorithm :math:`A` is :math:`\mathcal{O}_{c}=\delta^*_{min}\mathcal{O}_A`. 60 | It is well-known that the degeneracy of a graph is upper bounded by the maximum of the degrees of its vertices and by the largest eigenvalue of its adjacency matrix :math:`\lambda_1`. 61 | Since in most real-world graphs it holds that :math:`\lambda_1 \ll n`, it also holds that :math:`\delta^*_{max} \ll n`, and hence, the time complexity added by the core framework is not very high. 62 | 63 | The implementation of the core framework can be found below 64 | 65 | .. currentmodule:: grakel 66 | 67 | .. autosummary:: 68 | 69 | CoreFramework 70 | 71 | Bibliography 72 | ------------ 73 | .. bibliography:: graph_kernels.bib 74 | :filter: docname in docnames 75 | -------------------------------------------------------------------------------- /doc/kernels/edge_histogram.rst: -------------------------------------------------------------------------------- 1 | .. _edge_histogram: 2 | 3 | Edge Histogram Kernel 4 | ===================== 5 | 6 | The edge histogram kernel is a basic linear kernel on edge label histograms. 7 | The kernel assumes edge-labeled graphs. 8 | Let :math:`\mathcal{G}` be a collection of graphs, and assume that each of their edges comes from an abstract edge space :math:`\mathcal{E}`. 9 | Given a set of edge labels :math:`\mathcal{L}`, :math:`\ell : \mathcal{E} \rightarrow \mathcal{L}` is a function that assigns labels to the edges of the graphs. 10 | Assume that there are :math:`d` labels in total, that is :math:`d = |\mathcal{L}|`. 11 | Then, the edge label histogram of a graph :math:`G=(V,E)` is a vector :math:`\mathbf{f} = (f_1, f_2, \ldots, f_d)`, such that :math:`f_i = |\{ (v,u) \in E : \ell(v,u) = i \}|` for each :math:`i \in \mathcal{L}`. 12 | Let :math:`\mathbf{f}, \mathbf{f}'` be the edge label histograms of two graphs :math:`G, G'`, respectively. 13 | The edge histogram kernel is then defined as the linear kernel between :math:`\mathbf{f}` and :math:`\mathbf{f}'`, that is 14 | 15 | .. math:: 16 | 17 | k(G, G') = \langle \mathbf{f}, \mathbf{f}' \rangle 18 | 19 | The complexity of the edge histogram kernel is linear in the number of edges of the graphs. 20 | 21 | An implementation of that kernel can be found below 22 | 23 | .. currentmodule:: grakel 24 | 25 | .. autosummary:: 26 | 27 | EdgeHistogram 28 | -------------------------------------------------------------------------------- /doc/kernels/graph_hopper.rst: -------------------------------------------------------------------------------- 1 | .. _graph_hopper: 2 | 3 | Graph Hopper Kernel 4 | =================== 5 | 6 | Given two graphs, the GraphHopper kernel compares shortest paths between pairs of vertices from the two graphs :cite:`feragen2013scalable`. 7 | The kernel takes into account both path lengths and the vertices encountered while ``hopping'' along shortest paths. 8 | The kernel is equivalent to a weighted sum of node kernels. 9 | 10 | 11 | Let :math:`G=(V,E)` be a graph. 12 | The graph contains either discrete node labels or continuous node attributes. 13 | Let :math:`\ell : \mathcal{V} \rightarrow \mathcal{L}` be a labeling function that assigns either discrete labels or continuous attributes to vertices. 14 | The kernel compares node labels/attributes using a kernel :math:`k_n` (\eg delta kernel in the case of node labels, and linear or gaussian kernel in the case of node attributes). 15 | Given two vertices :math:`v,u \in V`, a path :math:`\pi` from :math:`v` to :math:`u` in :math:`G` is defined as a sequence of vertices 16 | 17 | .. math:: 18 | 19 | \pi = [v_1, v_2, v_3, \ldots, v_l] 20 | 21 | where :math:`v_1 = v`, :math:`v_l = u` and :math:`(v_i, v_{i+1}) \in E` for all :math:`i=1,\ldots,l-1`. 22 | Let :math:`\pi(i) = v_i` denote the :math:`i^{th}` vertex encountered when ``hopping'' along the path. 23 | Denote by :math:`l(\pi)` the weighted length of :math:`\pi` and by :math:`|\pi|` its discrete length, defined as the number of vertices in :math:`\pi`. 24 | The shortest path :math:`\pi_{ij}` from :math:`v_i` to :math:`v_j` is defined in terms of weighted length. 25 | The diameter :math:`\delta(G)` of :math:`G` is the maximal number of nodes in a shortest path in :math:`G`, with respect to weighted path length. 26 | 27 | The GraphHopper kernel is defined as a sum of path kernels :math:`k_p` over the families :math:`P, P'` of shortest 28 | paths in :math:`G,G'` 29 | 30 | .. math:: 31 | 32 | k(G,G') = \sum_{\pi \in P} \sum_{\pi' \in P'} k_p(\pi, \pi') 33 | 34 | The path kernel :math:`k_p(\pi, \pi')` is a sum of node kernels :math:`k_n` on vertices simultaneously encountered while simultaneously hopping along paths :math:`\pi` and :math:`\pi'` of equal discrete length, that is 35 | 36 | .. math:: 37 | 38 | k_p(\pi, \pi') = \begin{cases} 39 | \sum_{j=1}^{|\pi|} k_n(\pi(j), \pi'(j)), & \text{if $|\pi| = |\pi'|$},\\ 40 | 0, & \text{otherwise.} 41 | \end{cases} 42 | 43 | The :math:`k(G,G')` kernel can be decomposed into a weighted sum of node kernels 44 | 45 | .. math:: 46 | 47 | k(G,G') = \sum_{v \in V} \sum_{v' \in V'} w(v,v') k_n(v, v') 48 | 49 | where :math:`w(v,v')` counts the number of times :math:`v` and :math:`v'` appear at the same hop, or coordinate, :math:`i` of shortest paths :math:`\pi,\pi'` of equal discrete length :math:`|\pi| = |\pi'|`. 50 | We can decompose the weight :math:`w(v,v')` as 51 | 52 | .. math:: 53 | 54 | w(v,v') = \sum_{j=1}^\delta \sum_{i=1}^\delta | \{ (\pi,\pi') : \pi(i)=v, \pi'(i)=v', |\pi|=|\pi'|=j \} | = \sum_{j=1}^\delta \sum_{i=1}^\delta [\mathbf{M_v}]_{ij} [\mathbf{M_{v'}}]_{ij} 55 | 56 | where :math:`\mathbf{M_v}` is a :math:`\delta \times \delta` matrix whose entry :math:`[\mathbf{M_v}]_{ij}` counts how many times :math:`v` appears at the :math:`i^{th}` coordinate of a shortest path in :math:`G` of discrete length :math:`j`, and :math:`\delta = \max(\delta(G), \delta(G'))`. 57 | The components of these matrices can be computed efficiently using recursive message-passing algorithms. 58 | The total complexity of computing :math:`k(G,G')` is :math:`\mathcal{O}(n^2(m + \log n + d + \delta^2))` where :math:`n` is the number of vertices, :math:`m` is the number of edges and :math:`d` is the dimensionality of the node attributes (:math:`d=1` in the case of discrete node labels). 59 | 60 | The implementation of the neighborhood hash kernel can be found below 61 | 62 | .. currentmodule:: grakel 63 | 64 | .. autosummary:: 65 | 66 | GraphHopper 67 | 68 | Bibliography 69 | ------------ 70 | .. bibliography:: graph_kernels.bib 71 | :filter: docname in docnames 72 | -------------------------------------------------------------------------------- /doc/kernels/graphlet_sampling.rst: -------------------------------------------------------------------------------- 1 | .. _graphlet_sampling: 2 | 3 | Graphlet Sampling Kernel 4 | ======================== 5 | 6 | The graphlet sampling kernel decomposes graphs into graphlets (i.e. small subgraphs with :math:`k` nodes 7 | where :math:`k \in \{ 3,4,5, \ldots \}`) :cite:`prvzulj2007biological` and counts matching graphlets 8 | in the input graphs. Let :math:`\mathcal{G} = \{ graphlet_1,graphlet_2, \ldots, graphlet_r\}` be the set 9 | of size-:math:`k` graphlets. 10 | Let also :math:`f_G \in \mathbb{N}^r` be a vector such that its :math:`i`-th entry is equal to the 11 | frequency of occurrence of :math:`graphlet_i` in :math:`G`, :math:`f_{G,i} = \#(graphlet_i \sqsubseteq G)`. 12 | Then, the graphlet kernel is defined as follows. 13 | 14 | Graphlet of size :math:`k` Kernel 15 | --------------------------------- 16 | Let :math:`G_i`, :math:`G_j` be two graphs of size :math:`n \geq k`, and :math:`f_{G_i}, f_{G_j}` vectors 17 | that count the occurrence of each graphlet of size :math:`k` in the two graphs. 18 | Then the graphlet kernel is defined as 19 | 20 | .. math:: 21 | :nowrap: 22 | 23 | \begin{equation} 24 | k(G_i,G_j) = f_{G_i}^\top \ f_{G_j} 25 | \end{equation} 26 | 27 | As is evident from the above definition, the graphlet kernel is computed by explicit feature maps. 28 | First, the representation of each graph in the feature space is computed. 29 | And then, the kernel value is computed as the dot product of the two feature vectors. 30 | The main problem of graphlet kernel is that an exaustive enumeration of graphlets is very expensive. 31 | Since there are :math:`\binom{n}{k}` size-:math:`k` subgraphs in a graph, computing the feature vector 32 | for a graph of size :math:`n` requires :math:`\mathcal{O}(n^k)` time. 33 | To account for that, Shervashidze et al. resorted to sampling :cite:`shervashidze2009efficient`. 34 | Following Weissman et al. :cite:`weissman2003inequalities`, they showed that by sampling a fixed number 35 | of graphlets the empirical distribution of graphlets will be sufficiently close to their actual distribution 36 | in the graph. 37 | 38 | Below follows the implemented graphlet sampling kernel. By using the parameter *sampling* the user 39 | can explore various possibilities from sampling all graphlets, to sampling probabilistically based 40 | either on the number of samples or the satisfaction of a certain probabilistic on error. 41 | 42 | .. currentmodule:: grakel 43 | 44 | .. autosummary:: 45 | 46 | GraphletSampling 47 | 48 | Bibliography 49 | ------------ 50 | .. bibliography:: graph_kernels.bib 51 | :filter: docname in docnames 52 | -------------------------------------------------------------------------------- /doc/kernels/hadamard_code.rst: -------------------------------------------------------------------------------- 1 | .. _hadamard_code: 2 | 3 | Hadamard Code Kernel 4 | ==================== 5 | 6 | A similar framework to the neighborhood-hashing kernel and the Weisfeiler-Lehman kernel was introduced by Tetsuya Kataoka and Akihito Inokuchi in :cite:`icpram16`, known as Hadamard-code kernel. 7 | Given a collection of **labeled** graphs :math:`\mathbf{G}=[G]^{N}_{i=1}` collect the set :math:`\Sigma` of all distinct labels inside :math:`\mathbf{G}`. A :math:`2^{k}`-th Hadamard code matrix :math:`H_{2^{k}}` is defined as follows: 8 | 9 | .. math:: 10 | 11 | H_{2^{k+1}}= \begin{cases} 12 | \begin{pmatrix} 13 | 1 & 1\\ 14 | 1 & -1 15 | \end{pmatrix},\text{ if }k = 0 16 | \\\\ 17 | \begin{pmatrix} 18 | H_{2^{k}} & H_{2^{k}}\\ 19 | H_{2^{k}} & -H_{2^{k}} 20 | \end{pmatrix},\text{if } k > 0 21 | \end{cases} 22 | 23 | Now by defining a Hadamard matrix :math:`\mathbb{H} = H_{2^{\lceil \log_{2}|\Sigma|\rceil}}`, then we initially label each node inside a graph: 24 | 25 | .. math:: 26 | 27 | l^{(0)}(v) = \mathtt{row}_{i}\mathbb{H},\text{ }\textbf{iff}\text{ }label(v) = \Sigma_{i} 28 | 29 | Based on this initial labeling the following relabeling rule: 30 | 31 | .. math:: 32 | 33 | l^{(k+1)}(v) = l^{(k)}(v) + \sum_{u \in N(v)}l^{(k)}(u) 34 | 35 | was used. :math:`N(v)` is used to denote the neighborhood of a node :math:`v`. 36 | Following the above scheme, relabeling is applied iteratively for a fixed number of iterations, while each kernel matrix (calculated from a give *base-kernel*) between relabeled graphs is aggregated to a total one, through summation. 37 | 38 | .. figure:: ../_static/kataoka1.png 39 | 40 | An example of the relabeling procedure of the Hadamard code kernel for a single graph. 41 | 42 | 43 | 44 | The implementation of the hadamard code kernel framework can be found below. Note that use can use :code:`base_kernel` to attach as a base kernel any kernel for **labeled** graphs. 45 | 46 | .. currentmodule:: grakel 47 | 48 | .. autosummary:: 49 | 50 | HadamardCode 51 | 52 | Bibliography 53 | ------------ 54 | .. bibliography:: graph_kernels.bib 55 | :filter: docname in docnames 56 | -------------------------------------------------------------------------------- /doc/kernels/kernel.rst: -------------------------------------------------------------------------------- 1 | .. _kernel: 2 | 3 | Kernel (general class) 4 | ====================== 5 | 6 | In the literature, a graph kernel appears as a function: :math:`k \; : \; \mathcal{G} \times \mathcal{G} \rightarrow \mathbb{R}` for which there exists a map: 7 | 8 | .. math:: 9 | 10 | \phi \; :\; \mathcal{G} \rightarrow \mathbb{H}, \text{for a Hilbert space } \mathbb{H} 11 | 12 | where each kernel value can be computed as :math:`k(G_{i}, G_{j}) = \langle \phi(G_{i}), \phi(G_{j}) \rangle` where :math:`\langle \cdot , \cdot \rangle` denotes the inner product inside this space. The emerging matrix :math:`\mathbf{K}_{ij} = k(G_{i}, G_{j})` is known as the kernel matrix. For a kernel matrix to be valid it is required to be *positive semidefinite* (i.e. :math:`\lambda_{min} \ge 0`). 13 | 14 | .. note:: 15 | 16 | The kernels implemented inside this package have all a **Polynomial** time complexity. 17 | 18 | In many cases, if instead of computing the kernel for each pair of graphs, we calculate it for the whole collection of graphs :math:`[G]_{i=1}^{N}`, we have a significant computational advantage. Given two collections of graphs: :math:`G^{n}, G^{m}`, the full kernel matrix :math:`\mathcal{K}` looks like: 19 | 20 | .. math:: 21 | 22 | \mathcal{K} = 23 | \left[ 24 | \begin{array}{c||c} 25 | \mathcal{K}^{n\times n} & \mathcal{K}^{n\times m} \\ 26 | \hline 27 | \hline 28 | \mathcal{K}^{m\times n} & \mathcal{K}^{m\times m} 29 | \end{array} 30 | \right] 31 | 32 | Any :code:`Kernel` object inherited class should match the following behavior: 33 | 34 | - :math:`\mathcal{K}^{n\times n}=\texttt{.fit_transform}(\mathcal{G}^{n})` 35 | - :math:`\mathcal{K}^{m\times n}=\texttt{.fit}(\mathcal{G}^{\text{n}}).\texttt{transform}(\mathcal{G}^{\text{m}})` 36 | - :math:`\mathcal{K}=\texttt{.fit_transform}([\mathcal{G}^{n}\; \mathcal{G}^{m}])` 37 | 38 | In graph classification, a problem tackled mainly by graph kernels, we are usually interested in calculating the matrices :math:`\mathcal{K}^{n\times n}` and :math:`\mathcal{K}^{m\times n}`. 39 | 40 | .. currentmodule:: grakel 41 | 42 | Parametrization 43 | --------------- 44 | 45 | Any :code:`Kernel` inherited object comes with three parameters: 46 | 47 | - :code:`verbose` is a :code:`bool` parameter in order for the kernel to print messages that are related to the progress of the execution. 48 | - :code:`normalize` parameter is a :code:`bool` parameter which ensures that the kernel output will be normalized, that is :math:`[\mathcal{\hat{K}}]_{ij} = \frac{[\mathcal{K}]_{ij}}{\sqrt[]{[\mathcal{K}]_{ii}*[\mathcal{K}]_{jj}}}`. 49 | - :code:`n_jobs` is an :code:`int` parameter that defines the amount of parallel jobs on which parts of the kernel calculation will be executed (if a parallelization has been implemented). 50 | 51 | .. note:: 52 | 53 | In order for normalization to happen even in a framework scheme, its :code:`Kernel` should have an implemented :code:`diagonal` method. 54 | 55 | The :code:`Kernel` class as discussed above can be found below: 56 | 57 | .. autosummary:: 58 | 59 | Kernel 60 | -------------------------------------------------------------------------------- /doc/kernels/lovasz_theta.rst: -------------------------------------------------------------------------------- 1 | .. _lovasz_theta: 2 | 3 | Lovasz Theta Kernel 4 | =================== 5 | 6 | The Lovász number :math:`\vartheta(G)` of a graph :math:`G=(V,E)` is a real number that is an upper bound on the Shannon capacity of the graph. 7 | It was introduced by László Lovász in :math:`1979` :cite:`lovasz1979shannon`. 8 | The Lovász number is intimately connected with the notion of orthonormal representations of graphs. 9 | An orthonormal representation of a graph :math:`G` consists of a set of unit vectors :math:`U_G = \{ \mathbf{u}_i \in \mathbb{R}^d : || \mathbf{u}_i || = 1 \}_{i \in V}` where each vertex :math:`i` is assigned a unit vector :math:`\mathbf{u}_i` such that :math:`(i,j) \not \in E \implies \mathbf{u}_i^\top \mathbf{u}_j = 0`. 10 | Specifically, the Lovász number of a graph :math:`G` is defined as 11 | 12 | .. math:: 13 | 14 | \vartheta(G) = \min_{\mathbf{c}, U_G} \max_{i \in V} \frac{1}{(\mathbf{c}^\top \mathbf{u}_i)^2} 15 | 16 | where :math:`\mathbf{c} \in \mathbb{R}^d` is a unit vector and :math:`U_G` is an orthonormal representation of :math:`G`. 17 | Geometrically, :math:`\vartheta(G)` is defined by the smallest cone enclosing a valid orthonormal representation :math:`U_G`. 18 | The Lovász number :math:`\vartheta(G)` of a graph :math:`G` can be computed to arbitrary precision in polynomial time by solving a semidefinite program. 19 | 20 | The Lovász :math:`\vartheta` kernel utilizes the orthonormal representations associated with the Lovász number to compare graphs :cite:`johansson2014global`. 21 | The kernel is applicable only to unlabeled graphs. 22 | Given a collection of graphs, it first generates orthonormal representations for the vertices of each graph by computing the Lovász :math:`\vartheta` number. 23 | Hence, :math:`U_G` is a set that contains the orthonormal representations of :math:`G`. 24 | Let :math:`S \subseteq V` be a subset of the vertex set of :math:`G`. 25 | Then, the Lovász value of the set of vertices :math:`S` is defined as 26 | 27 | .. math:: 28 | 29 | \vartheta_S(G) = \min_{\mathbf{c}} \max_{i \in S} \frac{1}{(\mathbf{c}^\top \mathbf{u}_i)^2} 30 | 31 | where :math:`\mathbf{c} \in \mathbb{R}^d` is a unit vector and :math:`\mathbf{u}_i` is the representation of vertex :math:`i` obtained by computing the Lovász number :math:`\vartheta(G)` of :math:`G`. 32 | The Lovász value of a set of vertices :math:`S` represents the angle of the smallest cone enclosing the set of orthonormal representations of these vertices (\ie subset of :math:`U_G` defined as :math:`\{ \mathbf{u}_i : \mathbf{u}_i \in U_G, i \in S \}`). 33 | 34 | The Lovász :math:`vartheta` kernel between two graphs :math:`G, G'` is then defined as follows: 35 | 36 | .. math:: 37 | 38 | k_{Lo}(G, G') = \sum_{S \subseteq V} \sum_{S' \subseteq V'} \delta(|S|, |S'|) \frac{1}{Z_{|S|}} k(\vartheta_S(G), \vartheta_{S'}(G')) 39 | 40 | where :math:`Z_{|S|} = \binom{|V|}{|S|} \binom{|V'|}{|S|}`, :math:`\delta(|S|, |S'|)` is a delta kernel (equal to :math:`1` if :math:`|S|=|S'|`, and :math:`0` otherwise), and :math:`k` is a positive semi-definite kernel between Lovász values (\eg linear kernel, gaussian kernel). 41 | 42 | The Lovász :math:`\vartheta` kernel consists of two main steps: (:math:`1`) computing the Lovász number :math:`\vartheta` of each graph and obtaining the associated orthonormal representations, and (:math:`2`) computing the Lovász value for all subgraphs (\ie subsets of vertices :math:`S \subseteq V`) of each graph. 43 | Exact computation of the Lovász :math:`\vartheta` kernel is in most real settings infeasible since it requires computing the minimum enclosing cones of :math:`2^n` sets of vertices. 44 | 45 | When dealing with large graphs, it is thus necessary to resort to sampling. 46 | Given a graph :math:`G`, instead of evaluating the Lovász value on all :math:`2^n` sets of vertices, the algorithm evaluates it in on a smaller number of subgraphs :math:`\mathfrak{S} \in 2^V`. 47 | Then, the Lovász :math:`\vartheta` kernel is defined as follows 48 | 49 | .. math:: 50 | 51 | \hat{k}_{Lo}(G, G') = \sum_{S \subseteq \mathfrak{S}} \sum_{S' \subseteq \mathfrak{S}'} \delta(|S|, |S'|) \frac{1}{\hat{Z}_{|S|}} k(\vartheta_S(G), \vartheta_{S'}(G')) 52 | 53 | where :math:`\hat{Z}_{|S|} = |\mathfrak{S}_{|S|}| |\mathfrak{S}'_{|S|}|` and :math:`\mathfrak{S}_{|S|}` denotes the subset of :math:`\mathfrak{S}` consisting of all sets of cardinality :math:`|S|`, that is :math:`\mathfrak{S}_{|S|} = \{ B \in \mathfrak{S} : |B| = |S| \}`. 54 | 55 | The time complexity of computing :math:`\hat{k}_{Lo}(G, G')` is :math:`\mathcal{O}(n^2 m \epsilon^{-1} + s^2 T(k) + sn)` where :math:`T(k)` is the complexity of computing the base kernel :math:`k`, :math:`n = |V|`, :math:`m = |E|` and :math:`s = \max(|\mathfrak{S}|, |\mathfrak{S}'|)`. 56 | The first term represents the cost of solving the semi-definite program that computes the Lovász number :math:`\vartheta`. 57 | The second term corresponds to the worst-case complexity of computing the sum of the Lovász values. 58 | And finally, the third term is the cost of computing the Lovász values of the sampled subsets of vertices. 59 | 60 | The implementation of the Lovász :math:`\vartheta` kernel can be found below 61 | 62 | .. currentmodule:: grakel 63 | 64 | .. autosummary:: 65 | 66 | LovaszTheta 67 | 68 | Bibliography 69 | ------------ 70 | .. bibliography:: graph_kernels.bib 71 | :filter: docname in docnames 72 | -------------------------------------------------------------------------------- /doc/kernels/neighborhood_subgraph_pairwise_distance.rst: -------------------------------------------------------------------------------- 1 | .. _nspdk: 2 | 3 | Neighborhood Subgraph Pairwise Distance Kernel 4 | ============================================== 5 | The neighborhood subgraph pairwise distance kernel extracts pairs of rooted subgraphs from each graph whose roots are located at a certain distance from each other, and which contain vertices up to a certain distance from the root :cite:`costa2010fast`. 6 | It then compares graphs based on these pairs of rooted subgraphs. 7 | To avoid isomorphism checking, graph invariants are employed to encode each rooted subgraph. 8 | 9 | Let :math:`G=(V,E)` be a graph. 10 | The distance between two vertices :math:`u,v \in V`, denoted :math:`D(u,v)`, is the length of the shortest path between them. 11 | The neighborhood of radius :math:`r` of a vertex :math:`v` is the set of vertices at a distance less than or equal to :math:`r` from :math:`v`, that is :math:`\{ u \in V : D(u,v) \leq r\}`. 12 | Given a subset of vertices :math:`S \subseteq V`, let :math:`E(S)` be the set of edges that have both end-points in :math:`S`. 13 | Then, the subgraph with vertex set :math:`S` and edge set :math:`E(S)` is known as the subgraph induced by :math:`S`. 14 | The neighborhood subgraph of radius :math:`r` of vertex :math:`v` is the subgraph induced by the neighborhood of radius :math:`r` of :math:`v` and is denoted by :math:`N_r^v`. 15 | Let also :math:`R_{r,d}(A_v,B_u,G)` be a relation between two rooted graphs :math:`A_v`, :math:`B_u` and a graph :math:`G=(V,E)` that is true if and only if both :math:`A_v` and :math:`B_u` are in :math:`\{N_r^v : v \in V \}`, where we require :math:`A_v, B_u` to be isomorphic to some :math:`N_r^v` to verify the set inclusion, and that :math:`D(u,v) = d`. 16 | We denote with :math:`R^{-1}(G)` the inverse relation that yields all the pairs of rooted graphs :math:`A_v`, :math:`B_u` satisfying the above constraints. 17 | Hence, :math:`R^{-1}(G)` selects all pairs of neighborhood graphs of radius :math:`r` whose roots are at distance :math:`d` in a given graph :math:`G`. 18 | The neighborhood subgraph pairwise distance kernel utilizes the following kernel 19 | 20 | .. math:: 21 | 22 | k_{r,d}(G, G') = \sum_{A_v, B_v \in R_{r,d}^{-1}(G)} \quad \sum_{A'_{v'}, B'_{v'} \in R_{r,d}^{-1}(G')} \delta(A_v, A'_{v'}) \delta(B_v, B'_{v'}) 23 | 24 | where :math:`\delta` is :math:`1` if its input subgraphs are isomorphic, and :math:`0` otherwise. 25 | The above kernel counts the number of identical pairs of neighboring graphs of radius :math:`r` at distance :math:`d` between two graphs. 26 | Then, the neighborhood subgraph pairwise distance kernel is defined as 27 | 28 | .. math:: 29 | 30 | k(G, G') = \sum_{r=0}^{r^*} \sum_{d=0}^{d^*} \hat{k}_{r,d}(G, G') 31 | 32 | where :math:`\hat{k}_{r,d}` is a normalized version of :math:`k_{r,d}`, that is 33 | 34 | .. math:: 35 | 36 | \hat{k}_{r,d}(G,G') = \frac{k_{r,d}(G,G')}{\sqrt{k_{r,d}(G,G) k_{r,d}(G',G')}} 37 | 38 | The above version ensures that relations of all orders are equally weighted regardless of the size of the induced part sets. 39 | 40 | The neighborhood subgraph pairwise distance kernel includes an exact matching kernel over two graphs (\ie the :math:`\delta` kernel) which is equivalent to solving the graph isomorphism problem. 41 | Solving the graph isomorphism problem is not feasible. 42 | Therefore, the kernel produces an approximate solution to it instead. 43 | Given a subgraph :math:`G_S` induced by the set of vertices :math:`S`, the kernel computes a graph invariant encoding for the subgraph via a label function :math:`\mathcal{L}^g : \mathcal{G} \rightarrow \Sigma^*`, where :math:`\mathcal{G}` is the set of rooted graphs and :math:`\Sigma^*` is the set of strings over a finite alphabet :math:`\Sigma`. 44 | The function :math:`\mathcal{L}^g` makes use of two other label functions: (:math:`1`) a function :math:`\mathcal{L}^n` for vertices, and (:math:`2`) a function :math:`\mathcal{L}^e` for edges. 45 | The :math:`\mathcal{L}^n` function assigns to vertex :math:`v` the concatenation of the lexicographically sorted list of distance-distance from root-label triplets :math:`\langle D(v,u), D(v,h), \mathcal{L}(u) \rangle` for all :math:`u \in S`, where :math:`h` is the root of the subgraph and :math:`\mathcal{L}` is a function that maps vertices/edges to their label symbol. 46 | Hence, the above function relabels each vertex with a string that encodes the initial label of the vertex, the vertex distance from all other labeled vertices, and the distance from the root vertex. 47 | The :math:`\mathcal{L}^e(u,v)` function assigns to edge :math:`(u,v)` the label :math:`\langle \mathcal{L}^n(u)`, :math:`\mathcal{L}^n(v)`, :math:`\mathcal{L}((u,v)) \rangle`. 48 | The :math:`\mathcal{L}^e(u,v)` function thus annotates each edge based on the new labels of its endpoints, and its initial label, if any. 49 | Finally, the function :math:`\mathcal{L}^g(G_S)` assigns to the rooted graph induced by :math:`S` the concatenation of the lexicographically sorted list of :math:`\mathcal{L}^e(u,v)` for all :math:`(u,v) \in E(S)`. 50 | The kernel then employs a hashing function from strings to natural numbers :math:`H : \Sigma^* \rightarrow \mathbb{N}` to obtain a unique identifier for each subgraph. 51 | Hence, instead of testing pairs of subgraphs for isomorphism, the kernel just checks if the subgraphs share the same identifier. 52 | 53 | The computational complexity of the neighborhood subgraph pairwise distance kernel is :math:`\mathcal{O}(|V| |S| |E(S)| \log |E(S)|)` and is dominated by the repeated computation of the graph invariant for each vertex of the graph. 54 | Since this is a constant time procedure, for small values of :math:`d^*` and :math:`r^*`, the complexity of the kernel is in practice linear in the size of the graph. 55 | 56 | The implementation of the neighborhood subgraph pairwise distance kernel can be found below 57 | 58 | .. currentmodule:: grakel 59 | 60 | .. autosummary:: 61 | 62 | NeighborhoodSubgraphPairwiseDistance 63 | 64 | Bibliography 65 | ------------ 66 | .. bibliography:: graph_kernels.bib 67 | :filter: docname in docnames 68 | -------------------------------------------------------------------------------- /doc/kernels/odd_sth.rst: -------------------------------------------------------------------------------- 1 | .. _odd_sth: 2 | 3 | ODD-STh Kernel 4 | ============== 5 | The ODD-STh kernel is a kernel between labeled graphs. Its approach derives from the idea of utilizing tree-based kernels, i.e. kernels that take as input graphs that are trees. Such kernels are in general more computationally efficient as trees are constrained to interesting properties. 6 | The idea behind the ODD-STh kernel proposed in :cite:`Martino2012ATK`, has to do with decomposition of two graph to ordered DAGs and adding the kernel values between all pairs of DAGs of the original graphs as: 7 | 8 | .. math:: 9 | 10 | K_{K_{DAG}}(G_{1}, G_{2}) = \sum_{\substack{D_{1} \in DD(G_{1}) \\ D_{2} \in DD(G_{2})}} K_{DAG}(D1, D2) 11 | 12 | where :math:`DD(G_{i})` corresponds to a graph decomposition of this graph and :math:`K_{DAG}` is a kernel between DAGs. As a DAG decomposition of each graph they considered the set of all directed BFS explorations starting from each node inside the graph, as follows in the picture: 13 | 14 | .. figure:: ../_static/odd_sth_1.png 15 | :scale: 100 % 16 | 17 | A simple DAG decomposition of a single graph 18 | 19 | 20 | 21 | Now in order to move from DAGs to trees each :math:`K_{DAG}` kernel was calculated as the sum of tree kernel between derived trees between each of the two DAGs: 22 | 23 | .. math:: 24 | 25 | K_{DAG} = \sum_{\substack{v_{1} \in V(D_{1}) \\ v_{2} \in V(D_{2})}} C(root(T(v_{1})), root(T(v_{2}))) 26 | 27 | where :math:`T()` corresponds to the tree-visits on DAGs (which preserve an essence of \textit{ordering} as found in (:cite:`Martino2012ATK`, section 5.2). An example of such tree visits follows: 28 | 29 | .. figure:: ../_static/odd_sth_2.png 30 | :scale: 100 % 31 | 32 | Ordered tree visits on a DAG decomposed from a graph 33 | 34 | :math:`C()` is a kernel between trees, where in our case it will be the **S**\ ub-\ **T**\ ree Kernel (as found in :cite:`STKernel`). 35 | 36 | .. note:: 37 | Tree isomorphism can `be decided in linear time on the sum of the number of nodes and the number of edges `_ 38 | 39 | For increasing the efficiency of this algorithm for the new set of DAG decomposition, known as ODD (*Ordered Dag Decomposition*), an aggregation of all the decomposition in a single DAG was proposed notated as :math:`BigDAG`. This method introduced in (:cite:`Martino2006`, MinimalDAG: Figure 2, p. 3), aggregates nodes having same labels with frequencies if they correspond to the same path on each DAG, while conserves the existence of nodes that cannot be aggregated. 40 | 41 | .. figure:: ../_static/odd_sth_3.png 42 | :scale: 100 % 43 | 44 | Construction of a :math:`BigDAG` from two DAGs 45 | 46 | Doing so allows as to replace the kernel computation: 47 | 48 | .. math:: 49 | 50 | K_{K_{DAG}}(G_{1}, G_{2}) = \sum_{\substack{D_{1} \in DD(G_{1}) \\ D_{2} \in DD(G_{2})}} K_{DAG}(D1, D2) 51 | 52 | with: 53 | 54 | .. math:: 55 | 56 | K_{BigDAG}(G_{1}, G_{2}) = \sum_{\substack{u_{1} \in V(BigDAG(G_{1}))\\ u_{2} \in V(BigDAG(G_{2})}} f_{u_{1}}f_{u_{2}}C(u_{1}, u_{2}) 57 | 58 | where :math:`f_{u}` is the frequency counter of the node :math:`u` and :math:`C(u, v)` is the number of matching proper subtrees from :math:`u` and :math:`v`. An even more abstract idea they followed was to created a :math:`Big^{2}DAG` where all the :math:`BigDAGs` created from each graph, would be aggregated to a single one, in the same way as in trees, but instead of incrementing frequencies on common nodes a frequency vector of appended frequencies for each DAG, was constructed. 59 | 60 | .. figure:: ../_static/odd_sth_4.png 61 | :scale: 100 % 62 | 63 | Construction of a :math:`Big^{2}DAG` from two :math:`BigDAGs` 64 | 65 | In the final :math:`Big^{2}DAG` graph, the computation of the kernel matrix is all about calculating the following formula: 66 | 67 | .. math:: 68 | 69 | K_{Big^{2}DAG}(G_{i}, G_{j}) = \sum_{u_{1}, u_{2} \in V(Big^{2}DAG)} F_{u_{1}}[i] * F_{u_{2}}[j] C(u_{1}, u_{2}) 70 | 71 | which is equivalent to: 72 | 73 | .. math:: 74 | 75 | K_{Big^{2}DAG}(G_{i}, G_{j}) = \sum_{u \in V(Big^{2}DAG)} F_{u}[i] * F_{u}[j] C(u, u) 76 | 77 | because the subtree kernel will have a match only between identical subtrees, that is: 78 | 79 | .. math:: 80 | 81 | C(u_{1}, u_{2}) \not= 0 \leftrightarrow T(u_{1}) = T(u_{2}) 82 | 83 | Finally in order to construct the :math:`Big^{2}DAG` each vertex would be represented by a tuple containing a unique hash (whose uniqueness has to do with the ordering) a frequency vector and a depth, which where utilized for calculating the kernel value. In order to restrict the size of the produced graphs a parameter :math:`h` was introduced which restricts the maximum depth of the BFS exploration when doing the graph decomposition. 84 | 85 | The Ordered Dag Decomposition - Sub-Tree :math:`h` (ODD-STh) kernel can be found implemented below: 86 | 87 | .. currentmodule:: grakel 88 | 89 | .. autosummary:: 90 | 91 | OddSth 92 | 93 | .. note:: 94 | 95 | Because the :math:`Big^{2}DAG` graph should be preserved through consequent transformations, the cost of copying it may make :code:`fit_transform` calculation between all the graphs of *train* and *test* faster than fitting on *train* graphs and transforming on *test* graphs. 96 | 97 | Bibliography 98 | ------------ 99 | .. bibliography:: graph_kernels.bib 100 | :filter: docname in docnames -------------------------------------------------------------------------------- /doc/kernels/propagation.rst: -------------------------------------------------------------------------------- 1 | .. _propagation: 2 | 3 | Propagation Kernel 4 | ====================== 5 | Propagation kernels where introduced as a general framework in :cite:`neumann2015propagation`. They are based in the idea of propagating label information between nodes of the graph, based on the graph structure. 6 | A graph is considered to have **attributes** on nodes, where in the case of labels they correspond to One-Hot-Vectors of the full dictionary of labels. 7 | The totality of nodes for each graph, can be seen as a probability distribution :math:`P` of size :math:`n \times d` where :math:`n` corresponds to the number of nodes and :math:`d` to the size of attributes. 8 | After the idea of diffusion is applied in order to construct the algorithmic framework of propagation kernels. 9 | Given a transition matrix :math:`T` that is row normalized, an iterative propagation scheme will be build on the basis of the following simple substitution rule: 10 | 11 | .. math:: 12 | 13 | P_{t+1} \leftarrow T P_{t} 14 | 15 | Other than a user given transition matrix, :math:`T = D^{−1}A`, was considered as default for each graph, where :math:`D = diag(\sum_{j} A_{ij})` and :math:`A` corresponds to the adjacency matrix of this graph. 16 | The general algorithm for propagation kernels is as follows: 17 | 18 | .. figure:: ../_static/marion1.png 19 | 20 | The general algorithmic scheme for propagation kernels. 21 | 22 | 23 | The kernel computation :math:`\langle \Phi, \Phi \rangle_{ij}`, at iteration :math:`t` between two graphs :math:`i, j` is equivalent with the: 24 | 25 | .. math:: 26 | 27 | K(G^{(i)}_{t}, G^{(j)}_{t}) = \sum_{u \in G^{(i)}_{t}} \sum_{v \in G^{(j)}_{t}} k(u, v) 28 | 29 | where the node kernel :math:`k(u, v)` is resolved through binning. 30 | In order to bin nodes a method should be found that was both efficient and expressive. 31 | A simple hashing function was considered a bad idea as it would separate values that where much more common than others. A sense of *locality* was needed when binning in order to group similar diffusion patterns in the same bin, similar to what is shown in the following: 32 | 33 | .. figure:: ../_static/marion2.png 34 | 35 | A binning scheme between a two step label propagation. 36 | 37 | For that the technique of locally sensitive hashing [**LSH**] was utilized, which was applied to all the input graphs as shown in the following: 38 | 39 | .. figure:: ../_static/marion3.png 40 | 41 | The locally sensitive function implemented inside the kernel. 42 | 43 | Finally the following algorithm was implemented, in our case where we consider all graphs to be *fully-labeled*: 44 | 45 | .. figure:: ../_static/marion4.png 46 | 47 | The propagation kernel algorithm, which was implemented inside the package. 48 | 49 | In case we have an attributed graph the :math:`P_{0} \leftarrow \delta_{l(V)}` is replaced by :math:`P_{0} \leftarrow attr(V)` considering all the node attributes have the same dimension. 50 | 51 | Both for attributed and for labeled graphs, implementations of the propagation can be found below: 52 | 53 | .. currentmodule:: grakel 54 | 55 | .. autosummary:: 56 | 57 | Propagation 58 | PropagationAttr 59 | 60 | Bibliography 61 | ------------ 62 | .. bibliography:: graph_kernels.bib 63 | :filter: docname in docnames -------------------------------------------------------------------------------- /doc/kernels/pyramid_match.rst: -------------------------------------------------------------------------------- 1 | .. _pyramid_match: 2 | 3 | Pyramid Match Kernel 4 | ==================== 5 | 6 | The pyramid match kernel is a very popular algorithm in Computer Vision, and has proven useful for many applications including object recognition and image retrieval :cite:`grauman2007pyramid`, :cite:`lazebnik2006beyond`. 7 | The pyramid match graph kernel extends its applicability to graph-structured data :cite:`nikolentzos2017matching`. 8 | The kernel can handle both unlabeled graphs and graphs that contains discrete node labels. 9 | 10 | The pyramid match graph kernel first embedds the vertices of each graph into a low-dimensional vector space using the eigenvectors of the :math:`d` largest in magnitude eigenvalues of the adjacency matrix of the graph. 11 | Since the signs of these eigenvectors are arbitrary, it replaces all their components by their absolute values. 12 | Each vertex is thus a point in the :math:`d`-dimensional unit hypercube. 13 | To find an approximate correspondence between the sets of vertices of two graphs, the kernel maps these points to multi-resolution histograms, and compares the emerging histograms with a weighted histogram intersection function. 14 | 15 | Initially, the kernel partitions the feature space into regions of increasingly larger size and takes a weighted sum of the matches that occur at each level. 16 | Two points match with each other if they fall into the same region. 17 | Matches made within larger regions are weighted less than those found in smaller regions. 18 | The kernel repeatedly fits a grid with cells of increasing size to the :math:`d`-dimensional unit hypercube. 19 | Each cell is related only to a specific dimension and its size along that dimension is doubled at each iteration, while its size along the other dimensions stays constant and equal to :math:`1`. 20 | Given a sequence of levels from :math:`0` to :math:`L`, then at level :math:`l`, the :math:`d`-dimensional unit hypercube has :math:`2^l` cells along each dimension and :math:`D = 2^{l}d` cells in total. 21 | Given a pair of graphs :math:`G,G'`, let :math:`H_G^l` and :math:`H_{G'}^l` denote the histograms of :math:`G` and :math:`G'` at level :math:`l` and :math:`H_G^l(i)`, :math:`H_{G'}^l(i)`, the number of vertices of :math:`G`, :math:`G'` that lie in the :math:`i^{th}` cell. 22 | The number of points in two sets which match at level $l$ is then computed using the histogram intersection function 23 | 24 | .. math:: 25 | 26 | I(H_G^l,H_{G'}^l) = \sum_{i=1}^D \min\big(H_G^l(i),H_{G'}^l(i)\big) 27 | 28 | The matches that occur at level :math:`l` also occur at levels :math:`0, \ldots, l-1`. 29 | We are interested in the number of new matches found at each level which is given by :math:`I(H_{G_1}^l,H_{G_2}^l) - I(H_{G_1}^{l+1},H_{G_2}^{l+1})` for :math:`l=0,\ldots,L-1`. 30 | The number of new matches found at each level in the pyramid is weighted according to the size of that level's cells. 31 | Matches found within smaller cells are weighted more than those made in larger cells. 32 | Specifically, the weight for level :math:`l` is set equal to :math:`\frac{1}{2^{L-l}}`. 33 | Hence, the weights are inversely proportional to the length of the side of the cells that varies in size as the levels increase. 34 | The pyramid match kernel is then defined as follows 35 | 36 | .. math:: 37 | 38 | k(G,G') = I(H_G^L,H_{G'}^L) + \sum_{l=0}^{L-1} \frac{1}{2^{L-l}}\big(I(H_G^l,H_{G'}^l) - I(H_G^{l+1},H_{G'}^{l+1})\big) 39 | 40 | The complexity of the pyramid match kernel is :math:`\mathcal{O}(dnL)` where $n$ is the number of nodes of the graphs under comparison. 41 | 42 | In the case of labeled graphs, the kernel restricts matchings to occur only between vertices that share same labels. 43 | It represents each graph as a set of sets of vectors, and matches pairs of sets of two graphs corresponding to the same label using the pyramid match kernel. 44 | The emerging kernel for labeled graphs corresponds to the sum of the separate kernels 45 | 46 | .. math:: 47 | 48 | k(G, G') = \sum_{i=1}^c k^i(G,G') 49 | 50 | where :math:`c` is the number of distinct labels and :math:`k^i(G_1,G_2)` is the pyramid match kernel between the sets of vertices of the two graphs which are assigned the label :math:`i`. 51 | 52 | The above kernel is implemented below 53 | 54 | .. currentmodule:: grakel 55 | 56 | .. autosummary:: 57 | 58 | PyramidMatch 59 | 60 | Bibliography 61 | ------------ 62 | .. bibliography:: graph_kernels.bib 63 | :filter: docname in docnames 64 | -------------------------------------------------------------------------------- /doc/kernels/random_walk.rst: -------------------------------------------------------------------------------- 1 | .. _random_walk: 2 | 3 | .. raw:: latex 4 | 5 | \newtheorem{definition}{Definition} 6 | 7 | Random Walk Kernel 8 | ================== 9 | The most well-studied family of graph kernels is probably the *random walk kernels* which quantify the similarity between a pair of graphs based on the number of common walks in the two graphs 10 | :cite:`kashima2003marginalized`, :cite:`gartner2003graph`, :cite:`mahe2004extensions`, :cite:`borgwardt2005protein`, :cite:`vishwanathan2010graph`, :cite:`sugiyama2015halting`. 11 | 12 | Kernels belonging to this family have concentrated mainly on counting matching walks in the two input graphs. There are several variations of random walk kernels. The :math:`k`-step random walk kernel compares random walks up to length :math:`k` in the two graphs. The most widely-used kernel from this family is the geometric random walk kernel :cite:`gartner2003graph` which compares walks up to infinity assigning a weight :math:`\lambda^k` (:math:`\lambda < 1`) to walks of length :math:`k` in order to ensure convergence of the corresponding geometric series. We next give the formal definition of the geometric random walk kernel. 13 | Given two node-labeled graphs :math:`G_i=(V_i,E_i)` and :math:`G_j=(V_j,E_j)`, their direct product 14 | :math:`G_\times=(V_\times,E_\times)` is a graph with vertex set: 15 | 16 | .. math:: 17 | :nowrap: 18 | 19 | \begin{equation} 20 | V_{\times} = \{(v_i,v_j) : v_i \in V_i \wedge v_j \in V_j \wedge \ell(v_i) = \ell(v_j) \} 21 | \end{equation} 22 | 23 | and edge set: 24 | 25 | .. math:: 26 | :nowrap: 27 | 28 | \begin{equation} 29 | E_{\times} = \{\{(v_i,v_j),(u_i,u_j)\} : \{v_i,u_i\} \in E_i \wedge \{v_j,u_j\} \in E_j\} 30 | \end{equation} 31 | 32 | Performing a random walk on :math:`G_{\times}` is equivalent to performing a simultaneous random walk 33 | on :math:`G_i` and :math:`G_j`. 34 | The geometric random walk kernel counts common walks (of potentially infinite length) in two graphs 35 | and is defined as follows. 36 | 37 | Definition: Geometric Random Walk Kernel 38 | ---------------------------------------- 39 | 40 | Let :math:`G_i` and :math:`G_j` be two graphs, let :math:`A_\times` denote the adjacency matrix of their 41 | product graph :math:`G_\times`, and let :math:`V_\times` denote the vertex set of the product 42 | graph :math:`G_\times`. 43 | 44 | Then, the geometric random walk kernel is defined as 45 | 46 | .. math:: 47 | :nowrap: 48 | 49 | \begin{equation} 50 | K_{\times}^{\infty}(G_i,G_j) = \sum_{p,q=1}^{|V_{\times}|} \Big[ \sum_{l=0}^{\infty} \lambda^l A_{\times}^l \Big]_{pq} = e^T(I - \lambda A_{\times})^{-1} e 51 | \end{equation} 52 | 53 | where :math:`I` is the identity matrix, :math:`e` is the all-ones vector, and :math:`\lambda` 54 | is a positive, real-valued weight. The geometric random walk kernel converges only if 55 | :math:`\lambda < \frac{1}{\lambda_\times}` where :math:`\lambda_\times` is the largest eigenvalue of 56 | :math:`A_{\times}`. 57 | 58 | Direct computation of the geometric random walk kernel requires :math:`\mathcal{O}(n^6)` time. 59 | The computational complexity of the method severely limits its applicability to real-world applications. 60 | To account for this, Vishwanathan et al. proposed in :cite:`vishwanathan2010graph` four efficient 61 | methods to compute random walk graph kernels which generally reduce the computational complexity from 62 | :math:`\mathcal{O}(n^6)` to :math:`\mathcal{O}(n^3)`. 63 | Mahé et al. proposed in :cite:`mahe2004extensions` some other extensions of random walk kernels. 64 | Specifically, they proposed a label enrichment approach which increases specificity and in most 65 | cases also reduces computational complexity. 66 | They also employed a second order Markov random walk to deal with the problem of "tottering". 67 | Sugiyama and Borgwardt focused in :cite:`sugiyama2015halting` on a different problem of random walk 68 | kernels, a phenomenon referred to as "halting". 69 | 70 | Next follow two implementations of this kernel (one for unlabeled graphs and one for graphs with discrete node labels) 71 | 72 | .. currentmodule:: grakel 73 | 74 | .. autosummary:: 75 | RandomWalk 76 | RandomWalkLabeled 77 | 78 | 79 | Bibliography 80 | ------------ 81 | .. bibliography:: graph_kernels.bib 82 | :filter: docname in docnames 83 | -------------------------------------------------------------------------------- /doc/kernels/shortest_path.rst: -------------------------------------------------------------------------------- 1 | .. _shortest_path: 2 | 3 | Shortest Path Kernel 4 | ==================== 5 | The shortest-path kernel decomposes graphs into shortest paths and compares pairs of shortest paths 6 | according to their lengths and the labels of their endpoints. 7 | The first step of the shortest-path kernel is to transform the input graphs into shortest-paths graphs. 8 | Given an input graph :math:`G=(V,E)`, we create a new graph :math:`S=(V,E_s)` (i.e. its shortest-path graph). 9 | The shortest-path graph :math:`S` contains the same set of vertices as the graph from which it originates. 10 | The edge set of the former is a superset of that of the latter, since in the shortest-path graph :math:`S`, 11 | there exists an edge between all vertices which are connected by a walk in the original graph :math:`G`. 12 | To complete the transformation, we assign labels to all the edges of the shortest-path graph :math:`S`. 13 | The label of each edge is set equal to the shortest distance between its endpoints in the original graph :math:`G`. 14 | 15 | Given the above procedure for transforming a graph into a shortest-path graph, the shortest-path kernel is defined as follows. 16 | 17 | Definition: Shortest-Path Kernel 18 | -------------------------------- 19 | Let :math:`G_i`, :math:`G_j` be two graphs, and :math:`S_i`, :math:`S_j` their corresponding shortest-path graphs. 20 | 21 | The shortest-path kernel is then defined on :math:`S_i=(V_i,E_i)` and :math:`S_j=(V_j,E_j)` as 22 | 23 | .. math:: 24 | :nowrap: 25 | 26 | \begin{equation} 27 | k(S_i,S_j) = \sum_{e_i \in E_i} \sum_{e_j \in E_j} k_{walk}^{(1)}(e_i, e_j) 28 | \end{equation} 29 | 30 | where :math:`k_{walk}^{(1)}(e_i, e_j)` is a positive semidefinite kernel on edge walks of length :math:`1`. 31 | 32 | In labeled graphs, the :math:`k_{walk}^{(1)}(e_i, e_j)` kernel is designed to compare both the lengths 33 | of the shortest paths corresponding to edges :math:`e_i` and :math:`e_j`, and the labels of their endpoint vertices. 34 | 35 | Let :math:`e_i = \{v_i, u_i\}` and :math:`e_j = \{v_j, u_j\}`. 36 | Then, :math:`k_{walk}^{(1)}(e_i, e_j)` is usually defined as: 37 | 38 | .. math:: 39 | :nowrap: 40 | 41 | \begin{equation} 42 | \begin{split} 43 | k_{walk}^{(1)}(e_i, e_j) &= k_v(\ell(v_i),\ell(v_j)) \ k_e(\ell(e_i),\ell(e_j)) \ k_v(\ell(u_i),\ell(u_j)) \\ 44 | &+ k_v(\ell(v_i),\ell(u_j)) \ k_e(\ell(e_i),\ell(e_j)) \ k_v(\ell(u_i),\ell(v_j)) 45 | \end{split} 46 | \end{equation} 47 | 48 | where :math:`k_v` is a kernel comparing vertex labels, and :math:`k_e` a kernel comparing shortest path lengths. 49 | Vertex labels are usually compared via a dirac kernel, while shortest path lengths may also be compared via 50 | a dirac kernel or, more rarely, via a brownian bridge kernel :cite:`borgwardt2005shortest`. 51 | 52 | In terms of runtime complexity, the shortest-path kernel is very expensive since its computation takes :math:`\mathcal{O}(n^4)` time. 53 | 54 | 55 | Two versions of this kernel can be found implemented below. The first takes as input graphs with discrete node labels and applies a speed-up technique for faster kernel calculation. 56 | 57 | .. currentmodule:: grakel 58 | 59 | .. autosummary:: 60 | 61 | ShortestPath 62 | ShortestPathAttr 63 | 64 | Bibliography 65 | ------------ 66 | .. bibliography:: graph_kernels.bib 67 | :filter: docname in docnames 68 | -------------------------------------------------------------------------------- /doc/kernels/subgraph_matching.rst: -------------------------------------------------------------------------------- 1 | .. _subgraph_matching: 2 | 3 | Subgraph Matching Kernel 4 | ======================== 5 | 6 | The subgraph matching kernel counts the number of matchings between subgraphs of bounded size in two graphs :cite:`kriege2012subgraph`. 7 | The kernel is very general since it can be applied to graphs that contain node labels, edge labels, node attributes or edge attributes. 8 | 9 | Let :math:`\mathcal{G}` be a set of graphs. 10 | We assume that the graphs that are contained in the set are labeled or attributed. 11 | Specifically, let :math:`\ell : \mathcal{V} \cup \mathcal{E} \rightarrow \mathcal{L}` be a labeling function that assigns either discrete labels or continuous attributes to vertices and edges. 12 | A graph isomorphism between two labeled/attributed graphs :math:`G=(V,E)` and :math:`G'=(V',E')` is a bijection :math:`\phi : V \rightarrow V'` that preserves adjacencies, \ie :math:`\forall v,u \in V : (v,u) \in E \Leftrightarrow (\phi(v), \phi(u)) \in E'`, and labels, \ie if :math:`\psi \in V \times V \rightarrow V' \times V'` is the mapping of vertex pairs implicated by the bijection :math:`\phi` such that :math:`\psi((v,u)) = (\phi(v), \phi(u))`, then, the conditions :math:`\forall v \in V : \ell(v) \equiv \ell(\phi(v))` and :math:`\forall e \in E : \ell(e) \equiv \ell(\psi(e))` must hold, where :math:`\equiv` denotes that two labels are considered equivalent. 13 | 14 | Given two graphs :math:`G=(V,E)` and :math:`G'=(V',E')`, let :math:`\mathcal{B}(G,G')` denote the set of all bijections between sets :math:`S \subseteq V` and :math:`S' \subseteq V'`, and let :math:`\lambda : \mathcal{B}(G,G') \rightarrow \mathbb{R}^+` be a weight function. 15 | The subgraph matching kernel is defined as 16 | 17 | .. math:: 18 | 19 | k(G, G') = \sum_{\phi \in \mathcal{B}(G,G')} \lambda(\phi) \prod_{v \in S} \kappa_V(v, \phi(v)) \prod_{e \in S \times S} \kappa_E(e, \psi(e)) 20 | 21 | where :math:`S = dom(\phi)` and :math:`\kappa_V, \kappa_E` are kernel functions defined on vertices and edges, respectively. 22 | 23 | The instance of the subgraph matching kernel that is obtained if we set the :math:`\kappa_V, \kappa_E` functions as follows 24 | 25 | .. math:: 26 | 27 | \begin{split} 28 | \kappa_V(v,v') &= \begin{cases} 29 | 1, & \text{if } \ell(v) \equiv \ell(v'),\\ 30 | 0, & \text{otherwise and} 31 | \end{cases}\\ 32 | \kappa_E(e,e') &= \begin{cases} 33 | 1, & \text{if } e \in E \wedge e' \in E' \wedge \ell(e) \equiv \ell(e') \text{ or } e \not \in E \wedge e' \not \in E',\\ 34 | 0, & \text{otherwise.} 35 | \end{cases} 36 | \end{split} 37 | 38 | is known as the common subgraph isomorphism kernel. 39 | This kernel counts the number of isomorphic subgraphs contained in two graphs. 40 | 41 | To count the number of isomorphisms between subgraphs, the kernel capitalizes on a classical result of Levi :cite:`levi1973note` which makes a connection between common subgraphs of two graphs and cliques in their product graph. 42 | More specifically, each maximum clique in the product graph is associated with a maximum common subgraph of the factor graphs. 43 | This allows someone to compute the common subgraph isomorphism kernel by enumerating the cliques of the product graph. 44 | 45 | The general subgraph matching kernel extends the theory of Levi and builds a weighted product graph to allow a more flexible scoring of bijections. 46 | Given two graphs :math:`G=(V,E)`, :math:`G'=(V',E')`, and vertex and edge kernels :math:`\kappa_V` and :math:`\kappa_E`, the weighted product graph :math:`G_P=(V_P, E_P)` of :math:`G` and :math:`G'` is defined as 47 | 48 | .. math:: 49 | 50 | \begin{split} 51 | V_P &= \{ (v,v') \in V \times V' : \kappa_V(v,v') > 0 \} \\ 52 | E_P &= \{ ((v,v'),(u,u')) \in V_P \times V_P : v \neq u \wedge v' \neq u' \wedge \kappa_E((v,v'),(u,u')) > 0 \} \\ 53 | c(u) &= \kappa_V(v,v') \quad \forall u=(v,v') \in V_P \\ 54 | c(e) &= \kappa_E((v,u),(v',u')) \quad \forall e \in E_P, \\ 55 | \text{where } &e=((v,v'),(u,u')) 56 | \end{split} 57 | 58 | After creating the weighted product graph, the kernel enumerates its cliques. 59 | The kernel starts from an empty clique and extends it stepwise by all vertices preserving the clique property. 60 | Let :math:`w` be the weight of a clique :math:`C`. 61 | Whenever the clique :math:`C` is extended by a new vertex :math:`v`, the weight of the clique is updated as follows: first it is multiplied by the weight of the vertex :math:`w' = w \cdot c(v)`, and then, it is multiplied by all the edges connecting :math:`v` to a vertex in :math:`C`, that is :math:`w' = \sum_{u \in C} w \cdot c((v,u))`. 62 | The algorithm effectively avoids duplicates by removing a vertex from the candidate set after all cliques containing it have been exhaustively explored. 63 | 64 | The runtime of the subgraph matching kernel depends on the number of cliques in the product graph. 65 | The worst-case runtime complexity of the kernel when considering subgraphs of size up to :math:`k` is :math:`\mathcal{O}(kn^{k+1})`, where :math:`n=|V|+|V'|` is the sum of the number of vertices of the two graphs. 66 | 67 | The implementation of the subgraph matching kernel can be found below 68 | 69 | .. currentmodule:: grakel 70 | 71 | .. autosummary:: 72 | 73 | SubgraphMatching 74 | 75 | Bibliography 76 | ------------ 77 | .. bibliography:: graph_kernels.bib 78 | :filter: docname in docnames 79 | -------------------------------------------------------------------------------- /doc/kernels/svm_theta.rst: -------------------------------------------------------------------------------- 1 | .. _svm_theta: 2 | 3 | SVM Theta Kernel 4 | ================ 5 | 6 | The SVM-:math:`\vartheta` kernel is very related to the Lovász :math:`\vartheta` kernel :cite:`johansson2014global`. 7 | The Lovász :math:`\vartheta` kernel suffers from high computational complexity, and the SVM-:math:`\vartheta` kernel was developed as a more efficient alternative. 8 | Similar to the Lovász :math:`\vartheta` kernel, this kernel also assumes unlabeled graphs. 9 | 10 | Given a graph :math:`G=(V,E)` such that :math:`|V| = n`, the Lovász number of :math:`G` can be defined as 11 | 12 | .. math:: 13 | 14 | \vartheta(G) = \min_{\mathbf{K} \in L} \omega(\mathbf{K}) 15 | 16 | where :math:`\omega(\mathbf{K})` is the one-class SVM given by 17 | 18 | .. math:: \omega(\mathbf{K}) = \max_{\alpha_i > 0} 2\sum_{i=1}^{n} \alpha_i - \sum_{i=1}^{n} \sum_{j=1}^{n} \alpha_i \alpha_j \mathbf{K}_{ij} 19 | :label: oneclass_svm 20 | 21 | 22 | and :math:`L` is a set of positive semidefinite matrices defined as 23 | 24 | .. math:: 25 | 26 | L = \{ \mathbf{K} \in S_{n}^+ : \mathbf{K}_{ii} = 1, \mathbf{K}_{ij}=0 \: \forall (i,j) \not \in E \} 27 | 28 | where :math:`S_{n}^+` is the set of all :math:`n \times n` positive semidefinite matrices. 29 | 30 | The SVM-:math:`\vartheta` kernel first computes the matrix :math:`\mathbf{K}_{LS}` which is equal to 31 | 32 | .. math:: 33 | 34 | \mathbf{K}_{LS} = \frac{\mathbf{A}}{\rho} + \mathbf{I} 35 | 36 | where :math:`\mathbf{A}` is the adjacency matrix of :math:`G`, :math:`\mathbf{I}` is the :math:`n \times n` identity matrix, and :math:`\rho \geq -\lambda_n` with :math:`\lambda_n` the minimum eigenvalue of :math:`\mathbf{A}`. 37 | The matrix :math:`\mathbf{K}_{LS}` is positive semidefinite by construction and it has been shown in :cite:`jethava2013lovasz` that 38 | 39 | .. math:: 40 | 41 | \omega(\mathbf{K}_{LS}) = \sum_{i=1}^n \alpha_i 42 | 43 | where :math:`\alpha_i` are the maximizers of Equation :eq:`oneclass_svm`. 44 | Furthermore, it was shown that on certain families of graphs (e.g. Erdős–Rényi random graphs), :math:`\omega(\mathbf{K}_{LS})` is with high probability a constant factor approximation to :math:`\vartheta(G)`. 45 | 46 | Then, the SVM-:math:`\vartheta` kernel is defined as follows 47 | 48 | .. math:: 49 | 50 | k_{SVM}(G, G') = \sum_{S \subseteq V} \sum_{S' \subseteq V'} \delta(|S|, |S'|) \frac{1}{Z_{|S|}} k \Big(\sum_{i \in S} \alpha_i, \sum_{j \in S'} \alpha_j \Big) 51 | 52 | where :math:`Z_{|S|} = \binom{|V|}{|S|} \binom{|V'|}{|S|}`, :math:`\delta(|S|, |S'|)` is a delta kernel (equal to :math:`1` if :math:`|S|=|S'|`, and :math:`0` otherwise), and :math:`k` is a positive semi-definite kernel between real values (\eg linear kernel, gaussian kernel). 53 | 54 | The SVM-:math:`\vartheta` kernel consists of three main steps: (:math:`1`) constructing matrix :math:`\mathbf{K}_{LS}` of :math:`G` which takes :math:`\mathcal{O}(n^3)` time (:math:`2`) solving the one-class SVM problem in :math:`\mathcal{O}(n^2)` time to obtain the :math:`\alpha_i` values, and (:math:`3`) computing the sum of the :math:`\alpha_i` values for all subgraphs (\ie subsets of vertices :math:`S \subseteq V`) of each graph. 55 | Computing the above quantity for all :math:`2^n` sets of vertices is not feasible in real-world scenarios. 56 | 57 | To address the above issue, the SVM-:math:`\vartheta` kernel employs sampling schemes. 58 | Given a graph :math:`G`, the kernel samples a specific number of subgraphs :math:`\mathfrak{S} \in 2^V`. 59 | Then, the SVM-:math:`\vartheta` kernel is defined as follows 60 | 61 | .. math:: 62 | 63 | \hat{k}_{SVM}(G, G') = \sum_{S \subseteq \mathfrak{S}} \sum_{S' \subseteq \mathfrak{S}'} \delta(|S|, |S'|) \frac{1}{\hat{Z}_{|S|}} \Big(\sum_{i \in S} \alpha_i, \sum_{j \in S'} \alpha_j \Big) 64 | 65 | where :math:`\hat{Z}_{|S|} = |\mathfrak{S}_{|S|}| |\mathfrak{S}'_{|S|}|` and :math:`\mathfrak{S}_{|S|}` denotes the subset of :math:`\mathfrak{S}` consisting of all sets of cardinality :math:`|S|`, that is :math:`\mathfrak{S}_{|S|} = \{ B \in \mathfrak{S} : |B| = |S| \}`. 66 | 67 | The time complexity of computing :math:`\hat{k}_{SVM}(G, G')` is :math:`\mathcal{O}(n^3 + s^2 T(k) + sn)` where :math:`T(k)` is the complexity of computing the base kernel :math:`k` and :math:`s = \max(|\mathfrak{S}|, |\mathfrak{S}'|)`. 68 | The first term represents the cost of computing :math:`\mathbf{K}_{LS}` (dominated by the eigenvalue decomposition). 69 | The second term corresponds to the worst-case complexity of comparing the sums of the :math:`\alpha_i` values. 70 | And finally, the third term is the cost of computing the sum of the :math:`\alpha_i` values for the sampled subsets of vertices. 71 | 72 | The implementation of the SVM-:math:`\vartheta` kernel can be found below 73 | 74 | .. currentmodule:: grakel 75 | 76 | .. autosummary:: 77 | 78 | SvmTheta 79 | 80 | Bibliography 81 | ------------ 82 | .. bibliography:: graph_kernels.bib 83 | :filter: docname in docnames 84 | -------------------------------------------------------------------------------- /doc/kernels/vertex_histogram.rst: -------------------------------------------------------------------------------- 1 | .. _vertex_histogram: 2 | 3 | Vertex Histogram Kernel 4 | ======================= 5 | 6 | The vertex histogram kernel is a basic linear kernel on vertex label histograms. 7 | The kernel assumes node-labeled graphs. 8 | Let :math:`\mathcal{G}` be a collection of graphs, and assume that each of their vertices comes from an abstract vertex space :math:`\mathcal{V}`. 9 | Given a set of node labels :math:`\mathcal{L}`, :math:`\ell : \mathcal{V} \rightarrow \mathcal{L}` is a function that assigns labels to the vertices of the graphs. 10 | Assume that there are :math:`d` labels in total, that is :math:`d = |\mathcal{L}|`. 11 | Then, the vertex label histogram of a graph :math:`G=(V,E)` is a vector :math:`\mathbf{f} = (f_1, f_2, \ldots, f_d)`, such that :math:`f_i = |\{ v \in V : \ell(v) = i \}|` for each :math:`i \in \mathcal{L}`. 12 | Let :math:`\mathbf{f}, \mathbf{f}'` be the vertex label histograms of two graphs :math:`G, G'`, respectively. 13 | The vertex histogram kernel is then defined as the linear kernel between :math:`\mathbf{f}` and :math:`\mathbf{f}'`, that is 14 | 15 | .. math:: 16 | 17 | k(G, G') = \langle \mathbf{f}, \mathbf{f}' \rangle 18 | 19 | The complexity of the vertex histogram kernel is linear in the number of vertices of the graphs. 20 | 21 | An implementation of that kernel can be found below 22 | 23 | .. currentmodule:: grakel 24 | 25 | .. autosummary:: 26 | 27 | VertexHistogram 28 | -------------------------------------------------------------------------------- /doc/kernels/weisfeiler_lehman.rst: -------------------------------------------------------------------------------- 1 | .. _weisfeiler_lehman: 2 | 3 | Weisfeiler Lehman Framework 4 | =========================== 5 | 6 | This Weisfeiler Lehman framework operates on top of existing graph kernels and is inspired by the 7 | Weisfeiler-Lehman test of graph isomorphism :cite:`weisfeiler1968reduction`. 8 | The key idea of the Weisfeiler-Lehman algorithm is to replace the label of each vertex with a multiset 9 | label consisting of the original label of the vertex and the sorted set of labels of its neighbors. 10 | The resultant multiset is then compressed into a new, short label. 11 | This relabeling procedure is then repeated for :math:`h` iterations. 12 | Note that this procedure is performed simultaneously on all input graphs. 13 | Therefore, two vertices from different graphs will get identical new labels 14 | if and only if they have identical multiset labels. 15 | 16 | More formally, given a graph :math:`G=(V,E)` endowed with a labeling function :math:`\ell=\ell_0`, 17 | the Weisfeiler-Lehman graph of :math:`G` at height :math:`i` is a graph :math:`G_i=(V,E)` endowed 18 | with a labeling function :math:`\ell_i` which has emerged after :math:`i` iterations of the 19 | relabeling procedure described above. 20 | 21 | The Weisfeiler-Lehman sequence up to height :math:`h` of :math:`G` consists of the Weisfeiler-Lehman 22 | graphs of :math:`G` at heights from :math:`0` to :math:`h`, :math:`\{ G_0,G_1,\ldots,G_h\}`. 23 | 24 | 25 | Definition: Weisfeiler-Lehman Framework 26 | --------------------------------------- 27 | 28 | Let :math:`k` be any kernel for graphs, that we will call the base kernel. 29 | Then the Weisfeiler-Lehman kernel with :math:`h` iterations with the base 30 | kernel :math:`k` between two graphs :math:`G` and :math:`G'` is defined as 31 | 32 | .. math:: 33 | :nowrap: 34 | 35 | \begin{equation} 36 | k_{WL}(G,G') = k(G_0,G_0') + k(G_1,G_1') + \ldots + k(G_h,G_h') 37 | \end{equation} 38 | 39 | where :math:`h` is the number of Weisfeiler-Lehman iterations, and 40 | :math:`\{ G_0,G_1,\ldots,G_h\}` and :math:`\{ G_0',G_1',\ldots,G_h'\}` 41 | are the Weisfeiler-Lehman sequences of :math:`G` and :math:`G'` respectively. 42 | 43 | From the above definition, it is clear that any graph kernel that takes into 44 | account discrete node labels can take advantage of the Weisfeiler-Lehman framework 45 | and compare graphs based on the whole Weisfeiler-Lehman sequence. 46 | 47 | The general implementation of this framework can be found here: 48 | 49 | .. currentmodule:: grakel 50 | 51 | .. autosummary:: 52 | 53 | WeisfeilerLehman 54 | 55 | It should support all :code:`Kernel` objects inside its parameter :code:`base_kernel` (formulated in the correct way). 56 | 57 | 58 | Weisfeiler-Lehman Subtree Kernel 59 | -------------------------------- 60 | The Weisfeiler-Lehman subtree kernel is a very popular algorithm, and is considered the state-of-the-art in graph classification. 61 | Let :math:`G`, :math:`G'` be two graphs. Define :math:`\Sigma_i \subseteq \Sigma` as the set of letters that occur as node labels 62 | at least once in :math:`G` or :math:`G'` at the end of the :math:`i^{th}` iteration of the Weisfeiler-Lehman algorithm. 63 | Let :math:`\Sigma_0` be the set of original node labels of :math:`G` and :math:`G'`. 64 | Assume all :math:`\Sigma_i` are pairwise disjoint. 65 | Without loss of generality, assume that every :math:`\Sigma_i = \{ \sigma_{i1},\ldots,\sigma_{i|\Sigma_i|} \}` is ordered. 66 | Define a map :math:`c_i : \{ G,G' \} \times \Sigma_i \rightarrow \mathbb{N}` such that :math:`c_i(G, \sigma_{ij})` 67 | is the number of occurrences of the letter :math:`\sigma_{ij}` in the graph :math:`G`. 68 | 69 | The Weisfeiler-Lehman subtree kernel on two graphs :math:`G` and :math:`G'` with :math:`h` iterations is defined as 70 | 71 | .. math:: 72 | :nowrap: 73 | 74 | \begin{equation} 75 | k(G,G') = \langle \phi(G),\phi(G') \rangle 76 | \end{equation} 77 | 78 | where 79 | 80 | .. math:: 81 | :nowrap: 82 | 83 | \begin{equation} 84 | \phi(G) = (c_0(G,\sigma_{01}),\ldots,c_0(G,\sigma_{0|\Sigma_0|}),\ldots,c_h(G,\sigma_{h1}),\ldots,c_h(G,\sigma_{h|\Sigma_h|})) 85 | \end{equation} 86 | 87 | and 88 | 89 | .. math:: 90 | :nowrap: 91 | 92 | \begin{equation} 93 | \phi(G') = (c_0(G',\sigma_{01}),\ldots,c_0(G',\sigma_{0|\Sigma_0|}),\ldots,c_h(G',\sigma_{h1}),\ldots,c_h(G',\sigma_{h|\Sigma_h|})) 94 | \end{equation} 95 | 96 | It can be shown that the above definition is equivalent to comparing the number of shared subtrees between the two input graphs :cite:`shervashidze2011weisfeiler`. 97 | It is interesting to note that the Weisfeiler-Lehman subtree kernel exhibits an attractive computational complexity since it can be computed in :math:`\mathcal{O}(hm)` time. 98 | 99 | .. note:: 100 | 101 | To create an instance of the above kernel use the :ref:`vertex_histogram` as the :code:`base_kernel`. 102 | 103 | Bibliography 104 | ------------ 105 | .. bibliography:: graph_kernels.bib 106 | :filter: docname in docnames 107 | -------------------------------------------------------------------------------- /doc/sphinxext/MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include tests *.py 2 | include *.txt 3 | -------------------------------------------------------------------------------- /doc/sphinxext/github_link.py: -------------------------------------------------------------------------------- 1 | from operator import attrgetter 2 | import inspect 3 | import subprocess 4 | import os 5 | import sys 6 | from functools import partial 7 | 8 | REVISION_CMD = 'git rev-parse --short HEAD' 9 | 10 | 11 | def _get_git_revision(): 12 | try: 13 | revision = subprocess.check_output(REVISION_CMD.split()).strip() 14 | except (subprocess.CalledProcessError, OSError): 15 | print('Failed to execute git to get revision') 16 | return None 17 | return revision.decode('utf-8') 18 | 19 | 20 | def _linkcode_resolve(domain, info, package, url_fmt, revision): 21 | """Determine a link to online source for a class/method/function 22 | 23 | This is called by sphinx.ext.linkcode 24 | 25 | An example with a long-untouched module that everyone has 26 | >>> _linkcode_resolve('py', {'module': 'tty', 27 | ... 'fullname': 'setraw'}, 28 | ... package='tty', 29 | ... url_fmt='http://hg.python.org/cpython/file/' 30 | ... '{revision}/Lib/{package}/{path}#L{lineno}', 31 | ... revision='xxxx') 32 | 'http://hg.python.org/cpython/file/xxxx/Lib/tty/tty.py#L18' 33 | """ 34 | 35 | if revision is None: 36 | return 37 | if domain not in ('py', 'pyx'): 38 | return 39 | if not info.get('module') or not info.get('fullname'): 40 | return 41 | 42 | class_name = info['fullname'].split('.')[0] 43 | if type(class_name) != str: 44 | # Python 2 only 45 | class_name = class_name.encode('utf-8') 46 | module = __import__(info['module'], fromlist=[class_name]) 47 | obj = attrgetter(info['fullname'])(module) 48 | 49 | try: 50 | fn = inspect.getsourcefile(obj) 51 | except Exception: 52 | fn = None 53 | if not fn: 54 | try: 55 | fn = inspect.getsourcefile(sys.modules[obj.__module__]) 56 | except Exception: 57 | fn = None 58 | if not fn: 59 | return 60 | 61 | fn = os.path.relpath(fn, 62 | start=os.path.dirname(__import__(package).__file__)) 63 | try: 64 | lineno = inspect.getsourcelines(obj)[1] 65 | except Exception: 66 | lineno = '' 67 | return url_fmt.format(revision=revision, package=package, 68 | path=fn, lineno=lineno) 69 | 70 | 71 | def make_linkcode_resolve(package, url_fmt): 72 | """Returns a linkcode_resolve function for the given URL format 73 | 74 | revision is a git commit reference (hash or name) 75 | 76 | package is the name of the root module of the package 77 | 78 | url_fmt is along the lines of ('https://github.com/USER/PROJECT/' 79 | 'blob/{revision}/{package}/' 80 | '{path}#L{lineno}') 81 | """ 82 | revision = _get_git_revision() 83 | return partial(_linkcode_resolve, revision=revision, package=package, 84 | url_fmt=url_fmt) 85 | -------------------------------------------------------------------------------- /doc/sphinxext/sphinx_issues.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """A Sphinx extension for linking to your project's issue tracker. 3 | 4 | Copyright 2014 Steven Loria 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | """ 24 | 25 | from docutils import nodes, utils 26 | from sphinx.util.nodes import split_explicit_title 27 | 28 | __version__ = '0.2.0' 29 | __author__ = 'Steven Loria' 30 | __license__ = 'MIT' 31 | 32 | 33 | def user_role(name, rawtext, text, lineno, 34 | inliner, options=None, content=None): 35 | """Sphinx role for linking to a user profile. Defaults to linking to 36 | GitHub profiles, but the profile URIS can be configured via the 37 | ``issues_user_uri`` config value. 38 | 39 | Example: :: 40 | 41 | :user:`sloria` 42 | """ 43 | options = options or {} 44 | content = content or [] 45 | has_explicit_title, title, target = split_explicit_title(text) 46 | 47 | target = utils.unescape(target).strip() 48 | title = utils.unescape(title).strip() 49 | config = inliner.document.settings.env.app.config 50 | if config.issues_user_uri: 51 | ref = config.issues_user_uri.format(user=target) 52 | else: 53 | ref = 'https://github.com/{0}'.format(target) 54 | if has_explicit_title: 55 | text = title 56 | else: 57 | text = '@{0}'.format(target) 58 | 59 | link = nodes.reference(text=text, refuri=ref, **options) 60 | return [link], [] 61 | 62 | 63 | def _make_issue_node(issue_no, config, options=None): 64 | options = options or {} 65 | if issue_no not in ('-', '0'): 66 | if config.issues_uri: 67 | ref = config.issues_uri.format(issue=issue_no) 68 | elif config.issues_github_path: 69 | ref = 'https://github.com/{0}/issues/{1}'.format( 70 | config.issues_github_path, issue_no 71 | ) 72 | issue_text = '#{0}'.format(issue_no) 73 | link = nodes.reference(text=issue_text, refuri=ref, **options) 74 | else: 75 | link = None 76 | return link 77 | 78 | 79 | def issue_role(name, rawtext, text, lineno, 80 | inliner, options=None, content=None): 81 | """Sphinx role for linking to an issue. Must have 82 | `issues_uri` or `issues_github_path` configured in ``conf.py``. 83 | 84 | Examples: :: 85 | 86 | :issue:`123` 87 | :issue:`42,45` 88 | """ 89 | options = options or {} 90 | content = content or [] 91 | issue_nos = [each.strip() for each in utils.unescape(text).split(',')] 92 | config = inliner.document.settings.env.app.config 93 | ret = [] 94 | for i, issue_no in enumerate(issue_nos): 95 | node = _make_issue_node(issue_no, config, options=options) 96 | ret.append(node) 97 | if i != len(issue_nos) - 1: 98 | sep = nodes.raw(text=', ', format='html') 99 | ret.append(sep) 100 | return ret, [] 101 | 102 | 103 | def setup(app): 104 | # Format template for issues URI 105 | # e.g. 'https://github.com/sloria/marshmallow/issues/{issue} 106 | app.add_config_value('issues_uri', default=None, rebuild='html') 107 | # Shortcut for GitHub, e.g. 'sloria/marshmallow' 108 | app.add_config_value('issues_github_path', default=None, rebuild='html') 109 | # Format template for user profile URI 110 | # e.g. 'https://github.com/{user}' 111 | app.add_config_value('issues_user_uri', default=None, rebuild='html') 112 | app.add_role('issue', issue_role) 113 | app.add_role('user', user_role) 114 | -------------------------------------------------------------------------------- /doc/sphinxext/xref.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Taken from: https://github.com/michaeljones/sphinx-xref/blob/master/xref.py 3 | 4 | from docutils import nodes 5 | 6 | from sphinx.util import caption_ref_re 7 | 8 | def xref( typ, rawtext, text, lineno, inliner, options={}, content=[] ): 9 | 10 | title = target = text 11 | titleistarget = True 12 | # look if explicit title and target are given with `foo ` syntax 13 | brace = text.find('<') 14 | if brace != -1: 15 | titleistarget = False 16 | m = caption_ref_re.match(text) 17 | if m: 18 | target = m.group(2) 19 | title = m.group(1) 20 | else: 21 | # fallback: everything after '<' is the target 22 | target = text[brace+1:] 23 | title = text[:brace] 24 | 25 | link = xref.links[target] 26 | 27 | if brace != -1: 28 | pnode = nodes.reference(target, title, refuri=link[1]) 29 | else: 30 | pnode = nodes.reference(target, link[0], refuri=link[1]) 31 | 32 | return [pnode], [] 33 | 34 | def get_refs(app): 35 | 36 | xref.links = app.config.xref_links 37 | 38 | def setup(app): 39 | 40 | app.add_config_value('xref_links', {}, True) 41 | app.add_role('xref', xref) 42 | app.connect("builder-inited", get_refs) 43 | 44 | -------------------------------------------------------------------------------- /doc/tutorials.rst: -------------------------------------------------------------------------------- 1 | .. _tutorials: 2 | 3 | Tutorials 4 | ========= 5 | Here is a list of available tutorials: 6 | 7 | 8 | `Digit Classification `_ 9 | ------------------------------------------------------------------------------------------------------------------------------------------------------ 10 | This tutorial explores how an object recognition task can be formulated as a graph classification problem and solved using graph kernels. 11 | 12 | 13 | `Text Categorization `_ 14 | ---------------------------------------------------------------------------------------------------------------------------------------------------- 15 | In this tutorial, graph kernels are applied to the task of text categorization. Documents are represented as word co-occurence networks and then graph kernels are employed to classify the emerging graphs. -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | .. _general_examples: 2 | 3 | Examples 4 | ======== 5 | 6 | Examples that use graph kernels to perform various tasks (e.g., graph classification, retrieval of most similar document, etc). 7 | -------------------------------------------------------------------------------- /examples/document_retrieval_example.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================================================================== 3 | Retrieval of most similar document using the Weisfeiler-Lehman subtree kernel. 4 | ============================================================================== 5 | Script makes use of :class:`grakel.WeisfeilerLehman`, :class:`grakel.VertexHistogram` 6 | """ 7 | from __future__ import print_function 8 | print(__doc__) 9 | 10 | import numpy as np 11 | import time 12 | 13 | from nltk import word_tokenize 14 | from nltk.corpus import sentence_polarity 15 | 16 | from grakel.kernels import WeisfeilerLehman, VertexHistogram 17 | from grakel import Graph 18 | 19 | sents = sentence_polarity.sents() 20 | sents = [sent for sent in sents if len(sent) > 1] 21 | n_sents = 3000 22 | sents = sents[:n_sents] 23 | print("Loaded %d sentences\n" % n_sents) 24 | 25 | print("Creating word co-occurrence networks\n") 26 | word_networks = list() 27 | for sent in sents: 28 | 29 | node_labels = dict() 30 | tokens_to_ids = dict() 31 | for token in sent: 32 | if token not in tokens_to_ids: 33 | tokens_to_ids[token] = len(tokens_to_ids) 34 | node_labels[tokens_to_ids[token]] = token 35 | 36 | edges = list() 37 | for i in range(len(sent)-1): 38 | edges.append((tokens_to_ids[sent[i]], tokens_to_ids[sent[i+1]])) 39 | 40 | word_networks.append(Graph(edges, node_labels=node_labels)) 41 | 42 | query_sent_id = 54 43 | query_sent = [word_networks[query_sent_id]] 44 | 45 | # Initialize Weisfeiler-Lehman subtree kernel 46 | gk = WeisfeilerLehman(niter=2, normalize=True, base_graph_kernel=VertexHistogram) 47 | 48 | print("Computing similarities\n") 49 | t0 = time.time() 50 | gk.fit(query_sent) 51 | K = gk.transform(word_networks) 52 | print("done in %0.3fs\n" % (time.time() - t0)) 53 | 54 | print("Query sentence") 55 | print("--------------") 56 | print(" ".join(sents[query_sent_id])) 57 | print() 58 | print("Most similar sentence") 59 | print("---------------------") 60 | print(" ".join(sents[np.argsort(K[:,0])[-2]])) 61 | -------------------------------------------------------------------------------- /examples/erdos_renyi.py: -------------------------------------------------------------------------------- 1 | """ 2 | =========================================================================== 3 | Graph classification on a randomly generated dataset of Erdos-Renyi graphs. 4 | =========================================================================== 5 | 6 | Script makes use of :class:`grakel.Graph` and :class:`grakel.ShortestPath` 7 | """ 8 | from __future__ import print_function 9 | print(__doc__) 10 | 11 | import numpy as np 12 | 13 | from random import random 14 | 15 | from sklearn.model_selection import train_test_split 16 | from sklearn.svm import SVC 17 | from sklearn.metrics import accuracy_score 18 | 19 | from grakel import Graph 20 | from grakel.kernels import ShortestPath 21 | 22 | # Generates 3 sets of Erdos-Renyi graphs. Each edge is included in the graph with probability p 23 | # independent from every other edge. The probability p is set equal to 0.25, 0.5 and 0.75 for 24 | # the graphs of the 1st, 2nd and 3rd set, respectivery 25 | Gs = list() 26 | y = list() 27 | probs = [0.25, 0.5, 0.75] 28 | for i in range(len(probs)): 29 | for j in range(5, 35): 30 | edges = list() 31 | for n1 in range(j): 32 | for n2 in range(n1+1, j): 33 | if random() <= probs[i]: 34 | edges.append((n1, n2)) 35 | edges.append((n2, n1)) 36 | 37 | Gs.append(Graph(edges)) 38 | y.append(i) 39 | 40 | # Splits the dataset into a training and a test set 41 | G_train, G_test, y_train, y_test = train_test_split(Gs, y, test_size=0.1, random_state=42) 42 | 43 | # Uses the shortest path kernel to generate the kernel matrices 44 | gk = ShortestPath(normalize=True, with_labels=False) 45 | K_train = gk.fit_transform(G_train) 46 | K_test = gk.transform(G_test) 47 | 48 | # Uses the SVM classifier to perform classification 49 | clf = SVC(kernel="precomputed") 50 | clf.fit(K_train, y_train) 51 | y_pred = clf.predict(K_test) 52 | 53 | # Computes and prints the classification accuracy 54 | acc = accuracy_score(y_test, y_pred) 55 | print("Accuracy:", str(round(acc*100, 2)) + "%") 56 | -------------------------------------------------------------------------------- /examples/node_attributed_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | ======================================================================= 3 | Graph classification on a dataset that contains node-attributed graphs. 4 | ======================================================================= 5 | 6 | Script makes use of :class:`grakel.PropagationAttr` 7 | """ 8 | from __future__ import print_function 9 | print(__doc__) 10 | 11 | import numpy as np 12 | 13 | from sklearn.model_selection import train_test_split 14 | from sklearn.svm import SVC 15 | from sklearn.metrics import accuracy_score 16 | 17 | from grakel.datasets import fetch_dataset 18 | from grakel.kernels import PropagationAttr 19 | 20 | # Loads the ENZYMES dataset 21 | ENZYMES_attr = fetch_dataset("ENZYMES", prefer_attr_nodes=True, verbose=False) 22 | G, y = ENZYMES_attr.data, ENZYMES_attr.target 23 | 24 | # Splits the dataset into a training and a test set 25 | G_train, G_test, y_train, y_test = train_test_split(G, y, test_size=0.1, random_state=42) 26 | 27 | # Uses the graphhopper kernel to generate the kernel matrices 28 | gk = PropagationAttr(normalize=True) 29 | K_train = gk.fit_transform(G_train) 30 | K_test = gk.transform(G_test) 31 | 32 | # Uses the SVM classifier to perform classification 33 | clf = SVC(kernel="precomputed") 34 | clf.fit(K_train, y_train) 35 | y_pred = clf.predict(K_test) 36 | 37 | # Computes and prints the classification accuracy 38 | acc = accuracy_score(y_test, y_pred) 39 | print("Accuracy:", str(round(acc*100, 2)) + "%") 40 | -------------------------------------------------------------------------------- /examples/nx_to_grakel.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================================= 3 | Example of transforming NetworkX graphs to GraKeL graphs. 4 | ========================================================= 5 | """ 6 | from __future__ import print_function 7 | print(__doc__) 8 | 9 | import numpy as np 10 | import networkx as nx 11 | 12 | from grakel.utils import graph_from_networkx 13 | 14 | # Creates a list of two simple graphs 15 | G1 = nx.Graph() 16 | G1.add_nodes_from([0,1,2]) 17 | G1.add_edges_from([(0,1), (1,2)]) 18 | 19 | G2 = nx.Graph() 20 | G2.add_nodes_from([0,1,2]) 21 | G2.add_edges_from([(0,1), (0,2), (1,2)]) 22 | 23 | G_nx = [G1, G2] 24 | 25 | # Transforms list of NetworkX graphs into a list of GraKeL graphs 26 | G = graph_from_networkx(G_nx) 27 | print("1 - Simple graphs transformed\n") 28 | 29 | 30 | # Creates a list of two node-labeled graphs 31 | G1 = nx.Graph() 32 | G1.add_nodes_from([0,1,2]) 33 | G1.add_edges_from([(0,1), (1,2)]) 34 | nx.set_node_attributes(G1, {0:'a', 1:'b', 2:'a'}, 'label') 35 | 36 | G2 = nx.Graph() 37 | G2.add_nodes_from([0,1,2]) 38 | G2.add_edges_from([(0,1), (0,2), (1,2)]) 39 | nx.set_node_attributes(G2, {0:'a', 1:'b', 2:'c'}, 'label') 40 | 41 | G_nx = [G1, G2] 42 | 43 | # Transforms list of NetworkX graphs into a list of GraKeL graphs 44 | G = graph_from_networkx(G_nx, node_labels_tag='label') 45 | print("2 - Node-labeled graphs transformed\n") 46 | 47 | 48 | # Creates a list of two node-attributed graphs 49 | G1 = nx.Graph() 50 | G1.add_nodes_from([0,1,2]) 51 | G1.add_edges_from([(0,1), (1,2)]) 52 | nx.set_node_attributes(G1, {0:np.array([1.1, 0.8]), 53 | 1:np.array([0.2, -0.3]), 2:np.array([0.9, 1.0])}, 'attributes') 54 | 55 | G2 = nx.Graph() 56 | G2.add_nodes_from([0,1,2]) 57 | G2.add_edges_from([(0,1), (0,2), (1,2)]) 58 | nx.set_node_attributes(G2, {0:np.array([1.8, 0.5]), 59 | 1:np.array([-0.1, 0.2]), 2:np.array([2.3, 1.2])}, 'attributes') 60 | 61 | G_nx = [G1, G2] 62 | 63 | # Transforms list of NetworkX graphs into a list of GraKeL graphs 64 | G = graph_from_networkx(G_nx, node_labels_tag='attributes') 65 | print("3 - Node-attributed graphs transformed") -------------------------------------------------------------------------------- /examples/optimizing_hyperparameters.py: -------------------------------------------------------------------------------- 1 | """ 2 | =================================================================================== 3 | Performing cross-validation n times, optimizing SVM's and kernel's hyperparameters. 4 | =================================================================================== 5 | 6 | Script makes use of :class:`grakel.WeisfeilerLehman`, :class:`grakel.VertexHistogram` 7 | """ 8 | from __future__ import print_function 9 | print(__doc__) 10 | 11 | import numpy as np 12 | 13 | from grakel.datasets import fetch_dataset 14 | from grakel.utils import cross_validate_Kfold_SVM 15 | from grakel.kernels import WeisfeilerLehman, VertexHistogram 16 | 17 | # Loads the MUTAG dataset 18 | MUTAG = fetch_dataset("MUTAG", verbose=False) 19 | G, y = MUTAG.data, MUTAG.target 20 | 21 | # Generates a list of kernel matrices using the Weisfeiler-Lehman subtree kernel 22 | # Each kernel matrix is generated by setting the number of iterations of the 23 | # kernel to a different value (from 2 to 7) 24 | Ks = list() 25 | for i in range(1, 7): 26 | gk = WeisfeilerLehman(n_iter=i, base_graph_kernel=VertexHistogram, normalize=True) 27 | K = gk.fit_transform(G) 28 | Ks.append(K) 29 | 30 | 31 | # Performs 10-fold cross-validation over different kernels and the parameter C of 32 | # SVM and repeats the experiment 10 times with different folds 33 | accs = cross_validate_Kfold_SVM([Ks], y, n_iter=10) 34 | print("Average accuracy:", str(round(np.mean(accs[0])*100, 2)) + "%") 35 | print("Standard deviation:", str(round(np.std(accs[0])*100, 2)) + "%") -------------------------------------------------------------------------------- /examples/plot_pipeline_example.py: -------------------------------------------------------------------------------- 1 | """ 2 | ==================================================== 3 | Example of building a graph classification pipeline. 4 | ==================================================== 5 | 6 | Script makes use of :class:`grakel.ShortestPath` 7 | """ 8 | from __future__ import print_function 9 | print(__doc__) 10 | 11 | import numpy as np 12 | 13 | from sklearn.svm import SVC 14 | from sklearn.model_selection import GridSearchCV 15 | from sklearn.model_selection import cross_val_predict 16 | from sklearn.pipeline import make_pipeline 17 | from sklearn.metrics import accuracy_score 18 | 19 | from grakel.datasets import fetch_dataset 20 | from grakel.kernels import ShortestPath 21 | 22 | # Loads the Mutag dataset from: 23 | MUTAG = fetch_dataset("MUTAG", verbose=False) 24 | G, y = MUTAG.data, MUTAG.target 25 | 26 | # Values of C parameter of SVM 27 | C_grid = (10. ** np.arange(-4,6,1) / len(G)).tolist() 28 | 29 | # Creates pipeline 30 | estimator = make_pipeline( 31 | ShortestPath(normalize=True), 32 | GridSearchCV(SVC(kernel='precomputed'), dict(C=C_grid), 33 | scoring='accuracy', cv=10)) 34 | 35 | # Performs cross-validation and computes accuracy 36 | n_folds = 10 37 | acc = accuracy_score(y, cross_val_predict(estimator, G, y, cv=n_folds)) 38 | print("Accuracy:", str(round(acc*100, 2)) + "%") -------------------------------------------------------------------------------- /examples/shortest_path.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================================================= 3 | Graph classification on MUTAG using the shortest path kernel. 4 | ============================================================= 5 | 6 | Script makes use of :class:`grakel.ShortestPath` 7 | """ 8 | from __future__ import print_function 9 | print(__doc__) 10 | 11 | import numpy as np 12 | 13 | from sklearn.model_selection import train_test_split 14 | from sklearn.svm import SVC 15 | from sklearn.metrics import accuracy_score 16 | 17 | from grakel.datasets import fetch_dataset 18 | from grakel.kernels import ShortestPath 19 | 20 | # Loads the MUTAG dataset 21 | MUTAG = fetch_dataset("MUTAG", verbose=False) 22 | G, y = MUTAG.data, MUTAG.target 23 | 24 | # Splits the dataset into a training and a test set 25 | G_train, G_test, y_train, y_test = train_test_split(G, y, test_size=0.1, random_state=42) 26 | 27 | # Uses the shortest path kernel to generate the kernel matrices 28 | gk = ShortestPath(normalize=True) 29 | K_train = gk.fit_transform(G_train) 30 | K_test = gk.transform(G_test) 31 | 32 | # Uses the SVM classifier to perform classification 33 | clf = SVC(kernel="precomputed") 34 | clf.fit(K_train, y_train) 35 | y_pred = clf.predict(K_test) 36 | 37 | # Computes and prints the classification accuracy 38 | acc = accuracy_score(y_test, y_pred) 39 | print("Accuracy:", str(round(acc*100, 2)) + "%") 40 | -------------------------------------------------------------------------------- /examples/weisfeiler_lehman_subtree.py: -------------------------------------------------------------------------------- 1 | """ 2 | ========================================================================= 3 | Graph classification on MUTAG using the Weisfeiler-Lehman subtree kernel. 4 | ========================================================================= 5 | 6 | Script makes use of :class:`grakel.WeisfeilerLehman`, :class:`grakel.VertexHistogram` 7 | """ 8 | from __future__ import print_function 9 | print(__doc__) 10 | 11 | import numpy as np 12 | 13 | from sklearn.model_selection import train_test_split 14 | from sklearn.svm import SVC 15 | from sklearn.metrics import accuracy_score 16 | 17 | from grakel.datasets import fetch_dataset 18 | from grakel.kernels import WeisfeilerLehman, VertexHistogram 19 | 20 | # Loads the MUTAG dataset 21 | MUTAG = fetch_dataset("MUTAG", verbose=False) 22 | G, y = MUTAG.data, MUTAG.target 23 | 24 | # Splits the dataset into a training and a test set 25 | G_train, G_test, y_train, y_test = train_test_split(G, y, test_size=0.1, random_state=42) 26 | 27 | # Uses the Weisfeiler-Lehman subtree kernel to generate the kernel matrices 28 | gk = WeisfeilerLehman(n_iter=4, base_graph_kernel=VertexHistogram, normalize=True) 29 | K_train = gk.fit_transform(G_train) 30 | K_test = gk.transform(G_test) 31 | 32 | # Uses the SVM classifier to perform classification 33 | clf = SVC(kernel="precomputed") 34 | clf.fit(K_train, y_train) 35 | y_pred = clf.predict(K_test) 36 | 37 | # Computes and prints the classification accuracy 38 | acc = accuracy_score(y_test, y_pred) 39 | print("Accuracy:", str(round(acc*100, 2)) + "%") 40 | -------------------------------------------------------------------------------- /git: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/git -------------------------------------------------------------------------------- /grakel/__init__.py: -------------------------------------------------------------------------------- 1 | """Init file for the whole grakel project.""" 2 | from grakel import datasets 3 | 4 | from grakel.graph import Graph 5 | 6 | from grakel.graph_kernels import GraphKernel 7 | 8 | 9 | from grakel.kernels import Kernel 10 | 11 | from grakel.kernels import GraphletSampling 12 | from grakel.kernels import RandomWalk 13 | from grakel.kernels import RandomWalkLabeled 14 | from grakel.kernels import ShortestPath 15 | from grakel.kernels import ShortestPathAttr 16 | from grakel.kernels import WeisfeilerLehman 17 | from grakel.kernels import NeighborhoodHash 18 | from grakel.kernels import PyramidMatch 19 | from grakel.kernels import SubgraphMatching 20 | from grakel.kernels import NeighborhoodSubgraphPairwiseDistance 21 | from grakel.kernels import LovaszTheta 22 | from grakel.kernels import SvmTheta 23 | from grakel.kernels import OddSth 24 | from grakel.kernels import Propagation 25 | from grakel.kernels import PropagationAttr 26 | from grakel.kernels import HadamardCode 27 | from grakel.kernels import MultiscaleLaplacian 28 | from grakel.kernels import VertexHistogram 29 | from grakel.kernels import EdgeHistogram 30 | from grakel.kernels import GraphHopper 31 | from grakel.kernels import CoreFramework 32 | from grakel.kernels import WeisfeilerLehmanOptimalAssignment 33 | 34 | from grakel.utils import KMTransformer 35 | from grakel.utils import cross_validate_Kfold_SVM 36 | from grakel.utils import graph_from_networkx 37 | from grakel.utils import graph_from_pandas 38 | from grakel.utils import graph_from_csv 39 | from grakel.utils import graph_from_torch_geometric 40 | 41 | __all__ = [ 42 | "datasets", 43 | "GraphKernel", 44 | "Graph", 45 | "Kernel", 46 | "GraphletSampling", 47 | "RandomWalk", 48 | "RandomWalkLabeled", 49 | "ShortestPath", 50 | "ShortestPathAttr", 51 | "WeisfeilerLehman", 52 | "NeighborhoodHash", 53 | "PyramidMatch", 54 | "SubgraphMatching", 55 | "NeighborhoodSubgraphPairwiseDistance", 56 | "LovaszTheta", 57 | "SvmTheta", 58 | "OddSth", 59 | "Propagation", 60 | "PropagationAttr", 61 | "HadamardCode", 62 | "MultiscaleLaplacian", 63 | "VertexHistogram", 64 | "EdgeHistogram", 65 | "GraphHopper", 66 | "CoreFramework", 67 | "WeisfeilerLehmanOptimalAssignment", 68 | "graph_from_networkx", 69 | "graph_from_pandas", 70 | "graph_from_csv", 71 | "graph_from_torch_geometric", 72 | "KMTransformer", 73 | "cross_validate_Kfold_SVM" 74 | ] 75 | 76 | # Generic release markers: 77 | # X.Y 78 | # X.Y.Z # For bugfix releases 79 | # 80 | # Admissible pre-release markers: 81 | # X.YaN # Alpha release 82 | # X.YbN # Beta release 83 | # X.YrcN # Release Candidate 84 | # X.Y # Final release 85 | # 86 | # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. 87 | # 'X.Y.dev0' is the canonical version of 'X.Y.dev' 88 | # 89 | __version__ = '0.1.8' 90 | -------------------------------------------------------------------------------- /grakel/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """Import datasets related with graph kernels, from a large collection.""" 2 | from grakel.datasets.base import fetch_dataset 3 | from grakel.datasets.base import get_dataset_info 4 | from grakel.datasets.testing import generate_dataset 5 | 6 | __all__ = [ 7 | "get_dataset_info", 8 | "fetch_dataset", 9 | "generate_dataset" 10 | ] 11 | -------------------------------------------------------------------------------- /grakel/kernels/__init__.py: -------------------------------------------------------------------------------- 1 | """__init__ file for kernel sub-module of grakel.""" 2 | # Author: Ioannis Siglidis 3 | # License: BSD 3 clause 4 | from grakel.kernels.kernel import Kernel 5 | 6 | from grakel.kernels.graphlet_sampling import GraphletSampling 7 | from grakel.kernels.random_walk import RandomWalk 8 | from grakel.kernels.random_walk import RandomWalkLabeled 9 | from grakel.kernels.shortest_path import ShortestPath 10 | from grakel.kernels.shortest_path import ShortestPathAttr 11 | from grakel.kernels.weisfeiler_lehman import WeisfeilerLehman 12 | from grakel.kernels.neighborhood_hash import NeighborhoodHash 13 | from grakel.kernels.pyramid_match import PyramidMatch 14 | from grakel.kernels.subgraph_matching import SubgraphMatching 15 | from grakel.kernels.neighborhood_subgraph_pairwise_distance import \ 16 | NeighborhoodSubgraphPairwiseDistance 17 | from grakel.kernels.lovasz_theta import LovaszTheta 18 | from grakel.kernels.svm_theta import SvmTheta 19 | from grakel.kernels.odd_sth import OddSth 20 | from grakel.kernels.propagation import Propagation 21 | from grakel.kernels.propagation import PropagationAttr 22 | from grakel.kernels.hadamard_code import HadamardCode 23 | from grakel.kernels.multiscale_laplacian import MultiscaleLaplacian 24 | from grakel.kernels.vertex_histogram import VertexHistogram 25 | from grakel.kernels.edge_histogram import EdgeHistogram 26 | from grakel.kernels.graph_hopper import GraphHopper 27 | from grakel.kernels.core_framework import CoreFramework 28 | from grakel.kernels.weisfeiler_lehman_optimal_assignment import WeisfeilerLehmanOptimalAssignment 29 | 30 | __all__ = [ 31 | "default_executor", 32 | "Kernel", 33 | "GraphletSampling", 34 | "RandomWalk", 35 | "RandomWalkLabeled", 36 | "ShortestPath", 37 | "ShortestPathAttr", 38 | "WeisfeilerLehman", 39 | "NeighborhoodHash", 40 | "PyramidMatch", 41 | "SubgraphMatching", 42 | "NeighborhoodSubgraphPairwiseDistance", 43 | "LovaszTheta", 44 | "SvmTheta", 45 | "OddSth", 46 | "Propagation", 47 | "PropagationAttr", 48 | "HadamardCode", 49 | "MultiscaleLaplacian", 50 | "VertexHistogram", 51 | "EdgeHistogram", 52 | "GraphHopper", 53 | "CoreFramework", 54 | "WeisfeilerLehmanOptimalAssignment" 55 | ] 56 | -------------------------------------------------------------------------------- /grakel/kernels/_c_functions/__init__.pyx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/grakel/kernels/_c_functions/__init__.pyx -------------------------------------------------------------------------------- /grakel/kernels/_c_functions/header.pxd: -------------------------------------------------------------------------------- 1 | cimport cython 2 | cdef extern from "include/functions.hpp": 3 | unsigned int ArashPartov(const char* str, unsigned int length) 4 | void sm_core_init(double value, int* d, int nv, int kappa, double *cost_vertices, double **cost_edges, double *total_value) 5 | -------------------------------------------------------------------------------- /grakel/kernels/_c_functions/include/functions.hpp: -------------------------------------------------------------------------------- 1 | #ifndef FUNCTIONS_H_ 2 | #define FUNCTIONS_H_ 3 | 4 | unsigned int ArashPartov(const char* str, unsigned int length); 5 | void sm_core_init(double value, int* d, int nv, int kappa, double *cost_vertices, double **cost_edges, double *total_value); 6 | 7 | #endif -------------------------------------------------------------------------------- /grakel/kernels/_c_functions/src/ArashPartov.cpp: -------------------------------------------------------------------------------- 1 | /* Arash Partov Function 2 | * Author: Ioannis Siglidis 3 | * License: BSD 3 clause" 4 | * Code taken from: http://www.partow.net/programming/hashfunctions/#APHashFunction 5 | */ 6 | #include "../include/functions.hpp" 7 | 8 | unsigned int ArashPartov(const char* str, unsigned int length) 9 | { 10 | unsigned int hash = 0xAAAAAAAA; 11 | unsigned int i = 0; 12 | 13 | for (i = 0; i < length; ++str, ++i) 14 | { 15 | hash ^= ((i & 1) == 0) ? ( (hash << 7) ^ (*str) * (hash >> 3)) : 16 | (~((hash << 11) + ((*str) ^ (hash >> 5)))); 17 | } 18 | 19 | return hash; 20 | } 21 | -------------------------------------------------------------------------------- /grakel/kernels/_c_functions/src/sm_core.cpp: -------------------------------------------------------------------------------- 1 | /* Subgraph Matching Kernel (Supplementary Functions) 2 | * Author: Ioannis Siglidis 3 | * License: BSD 3 clause" 4 | * Code taken from: http://www.partow.net/programming/hashfunctions/#APHashFunction 5 | */ 6 | #include "../include/functions.hpp" 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | double *cv; 14 | double **ce; 15 | double *totalValue; 16 | unsigned int k; 17 | 18 | void sm_core(double value, list c, list p, int* d, int lBound, int uBound) { 19 | 20 | while (!p.empty()) { 21 | int i = p.front(); 22 | p.pop_front(); 23 | 24 | double nValue = value * cv[i]; 25 | double* iEdgeValue = ce[i]; 26 | for (list::const_iterator it = c.begin(); it != c.end(); it++) { 27 | nValue *= abs(iEdgeValue[*it]); 28 | } 29 | 30 | totalValue[c.size()] += nValue; 31 | 32 | if (c.size()+1 < k) { 33 | c.push_back(i); 34 | 35 | // prepare candidate set for recursive call 36 | list newP; 37 | for (list::const_iterator it = p.begin(); it != p.end(); it++) { 38 | int v = *it; 39 | if (iEdgeValue[v] != 0) 40 | newP.push_back(v); 41 | } 42 | 43 | int newUBound = uBound; 44 | int newLBound = lBound; 45 | if (lBound <= uBound) { 46 | int tmp; 47 | while (iEdgeValue[d[newUBound]] == 0 && --newUBound > lBound); 48 | while (iEdgeValue[d[newLBound]] == 0 && ++newLBound < newUBound); 49 | int nm = newLBound-1; 50 | while (++nm <= newUBound) { 51 | if (iEdgeValue[d[nm]] < 0) continue; 52 | if (iEdgeValue[d[nm]] > 0) 53 | newP.push_back(d[nm]); 54 | // swap 55 | tmp = d[newLBound]; 56 | d[newLBound] = d[nm]; 57 | d[nm] = tmp; 58 | newLBound++; 59 | } 60 | } 61 | 62 | sm_core(nValue, c, newP, d, newLBound, newUBound); 63 | c.pop_back(); 64 | } 65 | } 66 | } 67 | 68 | 69 | void sm_core_init(double value, int* d, int nv, int kappa, double *cost_vertices, double **cost_edges, double *total_value) { 70 | 71 | cv = cost_vertices; 72 | ce = cost_edges; 73 | totalValue = total_value; 74 | k = (unsigned int) kappa; 75 | 76 | list c; 77 | int lBound = 0; 78 | int uBound = nv-1; 79 | 80 | for (int it=lBound; it<=uBound; it++) { 81 | int i = d[it]; 82 | double nValue = value * cv[i]; 83 | 84 | totalValue[0] += nValue; 85 | 86 | if (k > 1) { 87 | c.push_back(i); 88 | 89 | // prepare candidate set for recursive call 90 | list p; 91 | int tmp; 92 | int newUBound = uBound; 93 | double *iEdgeValue = ce[i]; 94 | while (iEdgeValue[d[newUBound]] == 0 && --newUBound > it); 95 | int newLBound = it; 96 | while (++newLBound <= newUBound && iEdgeValue[d[newLBound]] == 0); 97 | int nm = newLBound-1; 98 | while (++nm <= newUBound) { 99 | if (iEdgeValue[d[nm]] < 0) continue; 100 | if (iEdgeValue[d[nm]] > 0) 101 | p.push_back(d[nm]); 102 | // swap 103 | tmp = d[newLBound]; 104 | d[newLBound] = d[nm]; 105 | d[nm] = tmp; 106 | newLBound++; 107 | } 108 | 109 | sm_core(nValue, c, p, d, newLBound, newUBound); 110 | c.pop_back(); 111 | } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /grakel/kernels/_isomorphism/__init__.py: -------------------------------------------------------------------------------- 1 | """Init file for the _isomorphism submodule project.""" 2 | # Author: Ioannis Siglidis 3 | # This file is a modification and extension of the [GNU LPGL] licensed 4 | # PyBliss which can be found at: http://www.tcs.hut.fi/Software/bliss/ 5 | # PyBliss and Bliss are copyright of their respective owners. 6 | # License: BSD 3 clause" 7 | from grakel.kernels._isomorphism.bliss import Graph 8 | 9 | __all__ = [ 10 | "Graph", 11 | ] 12 | -------------------------------------------------------------------------------- /grakel/kernels/_isomorphism/bliss-0.50/bignum.hh: -------------------------------------------------------------------------------- 1 | #ifndef BLISS_BIGNUM_HH 2 | #define BLISS_BIGNUM_HH 3 | 4 | /* 5 | * Copyright (c) Tommi Junttila 6 | * Released under the GNU General Public License version 2. 7 | */ 8 | 9 | #if defined(BLISS_USE_GMP) 10 | #include 11 | #endif 12 | 13 | #include 14 | #include 15 | #include "defs.hh" 16 | 17 | namespace bliss { 18 | 19 | /** 20 | * \brief A very simple class for big integers (or approximation of them). 21 | * 22 | * If the compile time flag BLISS_USE_GMP is set, 23 | * then the GNU Multiple Precision Arithmetic library (GMP) is used to 24 | * obtain arbitrary precision, otherwise "long double" is used to 25 | * approximate big integers. 26 | */ 27 | 28 | 29 | #if defined(BLISS_USE_GMP) 30 | 31 | 32 | class BigNum 33 | { 34 | mpz_t v; 35 | public: 36 | /** 37 | * Create a new big number and set it to zero. 38 | */ 39 | BigNum() {mpz_init(v); } 40 | 41 | /** 42 | * Destroy the number. 43 | */ 44 | ~BigNum() {mpz_clear(v); } 45 | 46 | /** 47 | * Set the number to 'n'. 48 | */ 49 | void assign(const int n) {mpz_set_si(v, n); } 50 | 51 | /** 52 | * Multiply the number with 'n'. 53 | */ 54 | void multiply(const int n) {mpz_mul_si(v, v, n); } 55 | 56 | /** 57 | * Print the number in the file stream 'fp'. 58 | */ 59 | int print(FILE *fp) {return mpz_out_str(fp, 10, v); } 60 | }; 61 | 62 | #else 63 | 64 | class BigNum 65 | { 66 | long double v; 67 | public: 68 | /** 69 | * Create a new big number and set it to zero. 70 | */ 71 | BigNum(): v(0.0) {} 72 | 73 | /** 74 | * Set the number to 'n'. 75 | */ 76 | void assign(const int n) {v = (long double)n; } 77 | 78 | /** 79 | * Multiply the number with 'n'. 80 | */ 81 | void multiply(const int n) {v *= (long double)n; } 82 | 83 | /** 84 | * Print the number in the file stream 'fp'. 85 | */ 86 | int print(FILE *fp) {return fprintf(fp, "%Lg", v); } 87 | }; 88 | 89 | #endif 90 | 91 | } //namespace bliss 92 | 93 | #endif 94 | -------------------------------------------------------------------------------- /grakel/kernels/_isomorphism/bliss-0.50/bliss_C.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "graph.hh" 5 | extern "C" { 6 | #include "bliss_C.h" 7 | } 8 | 9 | struct bliss_graph_struct { 10 | bliss::Graph *g; 11 | }; 12 | 13 | extern "C" 14 | BlissGraph *bliss_new(const unsigned int n) 15 | { 16 | BlissGraph *graph = new bliss_graph_struct; 17 | assert(graph); 18 | graph->g = new bliss::Graph(n); 19 | assert(graph->g); 20 | return graph; 21 | } 22 | 23 | extern "C" 24 | BlissGraph *bliss_read_dimacs(FILE *fp) 25 | { 26 | bliss::Graph *g = bliss::Graph::read_dimacs(fp); 27 | if(!g) 28 | return 0; 29 | BlissGraph *graph = new bliss_graph_struct; 30 | assert(graph); 31 | graph->g = g; 32 | return graph; 33 | } 34 | 35 | extern "C" 36 | void bliss_write_dimacs(BlissGraph *graph, FILE *fp) 37 | { 38 | assert(graph); 39 | assert(graph->g); 40 | graph->g->write_dimacs(fp); 41 | } 42 | 43 | extern "C" 44 | void bliss_release(BlissGraph *graph) 45 | { 46 | assert(graph); 47 | assert(graph->g); 48 | delete graph->g; graph->g = 0; 49 | delete graph; 50 | } 51 | 52 | extern "C" 53 | void bliss_write_dot(BlissGraph *graph, FILE *fp) 54 | { 55 | assert(graph); 56 | assert(graph->g); 57 | graph->g->write_dot(fp); 58 | } 59 | 60 | extern "C" 61 | unsigned int bliss_get_nof_vertices(BlissGraph *graph) 62 | { 63 | assert(graph); 64 | assert(graph->g); 65 | return graph->g->get_nof_vertices(); 66 | } 67 | 68 | extern "C" 69 | unsigned int bliss_add_vertex(BlissGraph *graph, unsigned int l) 70 | { 71 | assert(graph); 72 | assert(graph->g); 73 | return graph->g->add_vertex(l); 74 | } 75 | 76 | extern "C" 77 | void bliss_add_edge(BlissGraph *graph, unsigned int v1, unsigned int v2) 78 | { 79 | assert(graph); 80 | assert(graph->g); 81 | graph->g->add_edge(v1, v2); 82 | } 83 | 84 | extern "C" 85 | int bliss_cmp(BlissGraph *graph1, BlissGraph *graph2) 86 | { 87 | assert(graph1); 88 | assert(graph1->g); 89 | assert(graph2); 90 | assert(graph2->g); 91 | return graph1->g->cmp(graph2->g); 92 | } 93 | 94 | extern "C" 95 | unsigned int bliss_hash(BlissGraph *graph) 96 | { 97 | assert(graph); 98 | assert(graph->g); 99 | return graph->g->get_hash(); 100 | } 101 | 102 | extern "C" 103 | BlissGraph *bliss_permute(BlissGraph *graph, const unsigned int *perm) 104 | { 105 | assert(graph); 106 | assert(graph->g); 107 | assert(graph->g->get_nof_vertices() == 0 || perm); 108 | BlissGraph *permuted_graph = new bliss_graph_struct; 109 | assert(permuted_graph); 110 | permuted_graph->g = graph->g->permute(perm); 111 | return permuted_graph; 112 | } 113 | 114 | extern "C" 115 | void 116 | bliss_find_automorphisms(BlissGraph *graph, 117 | void (*hook)(void *user_param, 118 | unsigned int n, 119 | const unsigned int *aut), 120 | void *hook_user_param, 121 | BlissStats *stats) 122 | { 123 | bliss::Stats s; 124 | assert(graph); 125 | assert(graph->g); 126 | graph->g->find_automorphisms(s, hook, hook_user_param); 127 | 128 | if(stats) 129 | { 130 | stats->group_size_approx = s.group_size_approx; 131 | stats->nof_nodes = s.nof_nodes; 132 | stats->nof_leaf_nodes = s.nof_leaf_nodes; 133 | stats->nof_bad_nodes = s.nof_bad_nodes; 134 | stats->nof_canupdates = s.nof_canupdates; 135 | stats->nof_generators = s.nof_generators; 136 | stats->max_level = s.max_level; 137 | } 138 | } 139 | 140 | 141 | extern "C" 142 | const unsigned int * 143 | bliss_find_canonical_labeling(BlissGraph *graph, 144 | void (*hook)(void *user_param, 145 | unsigned int n, 146 | const unsigned int *aut), 147 | void *hook_user_param, 148 | BlissStats *stats) 149 | { 150 | bliss::Stats s; 151 | const unsigned int *canonical_labeling = 0; 152 | assert(graph); 153 | assert(graph->g); 154 | 155 | canonical_labeling = graph->g->canonical_form(s, hook, hook_user_param); 156 | 157 | if(stats) 158 | { 159 | stats->group_size_approx = s.group_size_approx; 160 | stats->nof_nodes = s.nof_nodes; 161 | stats->nof_leaf_nodes = s.nof_leaf_nodes; 162 | stats->nof_bad_nodes = s.nof_bad_nodes; 163 | stats->nof_canupdates = s.nof_canupdates; 164 | stats->nof_generators = s.nof_generators; 165 | stats->max_level = s.max_level; 166 | } 167 | 168 | return canonical_labeling; 169 | } 170 | -------------------------------------------------------------------------------- /grakel/kernels/_isomorphism/bliss-0.50/defs.hh: -------------------------------------------------------------------------------- 1 | #ifndef BLISS_DEFS_HH 2 | #define BLISS_DEFS_HH 3 | 4 | #include 5 | 6 | namespace bliss { 7 | 8 | /** 9 | * The version number of bliss. 10 | */ 11 | static const char * const version = "0.50"; 12 | 13 | 14 | #if defined(BLISS_DEBUG) 15 | #define BLISS_CONSISTENCY_CHECKS 16 | #define BLISS_EXPENSIVE_CONSISTENCY_CHECKS 17 | #endif 18 | 19 | #if defined(BLISS_CONSISTENCY_CHECKS) 20 | #define BLISS_ASSERT(a) assert(a) 21 | //inline void BLISS_ASSERT(const int c) {assert(c); } 22 | #else 23 | #define BLISS_ASSERT(a) ; 24 | //inline void BLISS_ASSERT(const int c) {} 25 | #endif 26 | 27 | 28 | #if defined(BLISS_CONSISTENCY_CHECKS) 29 | /* Force a check that the found automorphisms are valid */ 30 | #define BLISS_VERIFY_AUTOMORPHISMS 31 | #endif 32 | 33 | 34 | #if defined(BLISS_CONSISTENCY_CHECKS) 35 | /* Force a check that the generated partitions are equitable */ 36 | #define BLISS_VERIFY_EQUITABLEDNESS 37 | #endif 38 | 39 | 40 | } // namespace bliss 41 | 42 | 43 | 44 | /*! \mainpage Bliss 45 | * 46 | * \section intro_sec Introduction 47 | * 48 | * This is the source code documentation of bliss, 49 | * produced by running doxygen in 50 | * the source directory. 51 | * The algorithms and data structures used in bliss are documented in 52 | * the papers found at the 53 | * bliss web site. 54 | * 55 | * 56 | * \section compile_sec Compiling 57 | * 58 | * Compiling bliss in Linux should be easy, just execute 59 | * \code 60 | * make 61 | * \endcode 62 | * in the bliss source directory. 63 | * This will produce the executable program \c bliss as well as 64 | * the library file \c libbliss.a that can be linked in other programs. 65 | * If you have the GNU Multiple Precision 66 | * Arithmetic Library (GMP) installed in your machine, you can also use 67 | * \code 68 | * make gmp 69 | * \endcode 70 | * to enable exact computation of automorphism group sizes. 71 | * 72 | * When linking the bliss library \c libbliss.a in other programs, 73 | * remember to include the standard c++ library 74 | * (and the GMP library if you compiled bliss to include it). 75 | * For instance, 76 | * \code gcc -o test test.c -lstdc++ -lgmp -lbliss\endcode 77 | * 78 | * \section cppapi_sec The C++ language API 79 | * 80 | * The C++ language API is the main API to bliss; 81 | * all other APIs are just more or less complete variants of it. 82 | * The C++ API consists basically of the public methods in 83 | * the classes bliss::AbstractGraph, bliss::Graph, and bliss::Digraph. 84 | * For an example of its use, 85 | * see the \ref executable "source of the bliss executable". 86 | * 87 | * 88 | * \section capi_sec The C language API 89 | * 90 | * The C language API is given in the file bliss_C.h. 91 | * It is currently more restricted than the C++ API so 92 | * consider using the C++ API whenever possible. 93 | */ 94 | 95 | 96 | #endif 97 | -------------------------------------------------------------------------------- /grakel/kernels/_isomorphism/bliss-0.50/heap.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "defs.hh" 5 | #include "heap.hh" 6 | 7 | namespace bliss { 8 | 9 | Heap::~Heap() 10 | { 11 | if(array) 12 | { 13 | free(array); 14 | array = 0; 15 | n = 0; 16 | N = 0; 17 | } 18 | } 19 | 20 | void Heap::upheap(unsigned int index) 21 | { 22 | BLISS_ASSERT(n >= 1); 23 | BLISS_ASSERT(index >= 1 && index <= n); 24 | const unsigned int v = array[index]; 25 | array[0] = 0; 26 | while(array[index/2] > v) 27 | { 28 | array[index] = array[index/2]; 29 | index = index/2; 30 | } 31 | array[index] = v; 32 | } 33 | 34 | void Heap::downheap(unsigned int index) 35 | { 36 | const unsigned int v = array[index]; 37 | while(index <= n/2) 38 | { 39 | unsigned int new_index = index + index; 40 | if((new_index < n) && (array[new_index] > array[new_index+1])){ 41 | new_index++;} 42 | if(v <= array[new_index]){ 43 | break;} 44 | array[index] = array[new_index]; 45 | index = new_index; 46 | } 47 | array[index] = v; 48 | } 49 | 50 | void Heap::init(const unsigned int size) 51 | { 52 | BLISS_ASSERT(size > 0); 53 | if(size > N) 54 | { 55 | if(array) 56 | free(array); 57 | array = (unsigned int*)malloc((size + 1) * sizeof(unsigned int)); 58 | N = size; 59 | } 60 | n = 0; 61 | } 62 | 63 | void Heap::insert(const unsigned int v) 64 | { 65 | BLISS_ASSERT(n < N); 66 | array[++n] = v; 67 | upheap(n); 68 | } 69 | 70 | unsigned int Heap::remove() 71 | { 72 | BLISS_ASSERT(n >= 1 && n <= N); 73 | const unsigned int v = array[1]; 74 | array[1] = array[n--]; 75 | downheap(1); 76 | return v; 77 | } 78 | 79 | } // namespace bliss 80 | -------------------------------------------------------------------------------- /grakel/kernels/_isomorphism/bliss-0.50/heap.hh: -------------------------------------------------------------------------------- 1 | #ifndef BLISS_HEAP_HH 2 | #define BLISS_HEAP_HH 3 | 4 | namespace bliss { 5 | 6 | /** 7 | * \brief A capacity bounded heap data structure. 8 | */ 9 | 10 | class Heap 11 | { 12 | unsigned int N; 13 | unsigned int n; 14 | unsigned int *array; 15 | void upheap(unsigned int k); 16 | void downheap(unsigned int k); 17 | public: 18 | /** 19 | * Create a new heap. 20 | * init() must be called after this. 21 | */ 22 | Heap() {array = 0; n = 0; N = 0; } 23 | ~Heap(); 24 | 25 | /** 26 | * Initialize the heap to have the capacity to hold \e size elements. 27 | */ 28 | void init(const unsigned int size); 29 | 30 | /** 31 | * Is the heap empty? 32 | * Time complexity is O(1). 33 | */ 34 | bool is_empty() const {return(n==0); } 35 | 36 | /** 37 | * Remove all the elements in the heap. 38 | * Time complexity is O(1). 39 | */ 40 | void clear() {n = 0;} 41 | 42 | /** 43 | * Insert the element \a e in the heap. 44 | * Time complexity is O(log(N)), where N is the number of elements 45 | * currently in the heap. 46 | */ 47 | void insert(const unsigned int e); 48 | 49 | /** 50 | * Remove and return the smallest element in the heap. 51 | * Time complexity is O(log(N)), where N is the number of elements 52 | * currently in the heap. 53 | */ 54 | unsigned int remove(); 55 | }; 56 | 57 | } // namespace bliss 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /grakel/kernels/_isomorphism/bliss-0.50/kqueue.hh: -------------------------------------------------------------------------------- 1 | #ifndef BLISS_KQUEUE_HH 2 | #define BLISS_KQUEUE_HH 3 | 4 | /* 5 | * Copyright (c) Tommi Junttila 6 | * Released under the GNU General Public License version 2. 7 | */ 8 | 9 | #include "defs.hh" 10 | 11 | namespace bliss { 12 | 13 | /** 14 | * \brief A very simple implementation of queues with fixed capacity. 15 | */ 16 | 17 | template 18 | class KQueue 19 | { 20 | public: 21 | /** 22 | * Create a new queue with capacity zero. 23 | * The function init() should be called next. 24 | */ 25 | KQueue(); 26 | 27 | ~KQueue(); 28 | 29 | /** 30 | * Initialize the queue to have the capacity to hold at most \a N elements. 31 | */ 32 | void init(const unsigned int N); 33 | 34 | /** Is the queue empty? */ 35 | bool is_empty() const; 36 | 37 | /** Return the number of elements in the queue. */ 38 | unsigned int size() const; 39 | 40 | /** Remove all the elements in the queue. */ 41 | void clear(); 42 | 43 | /** Return (but don't remove) the first element in the queue. */ 44 | Type front() const; 45 | 46 | /** Remove and return the first element of the queue. */ 47 | Type pop_front(); 48 | 49 | /** Push the element \a e in the front of the queue. */ 50 | void push_front(Type e); 51 | 52 | /** Remove and return the last element of the queue. */ 53 | Type pop_back(); 54 | 55 | /** Push the element \a e in the back of the queue. */ 56 | void push_back(Type e); 57 | private: 58 | Type *entries, *end; 59 | Type *head, *tail; 60 | }; 61 | 62 | template 63 | KQueue::KQueue() 64 | { 65 | entries = 0; 66 | end = 0; 67 | head = 0; 68 | tail = 0; 69 | } 70 | 71 | template 72 | KQueue::~KQueue() 73 | { 74 | if(entries) 75 | free(entries); 76 | } 77 | 78 | template 79 | void KQueue::init(const unsigned int k) 80 | { 81 | assert(k > 0); 82 | if(entries) 83 | free(entries); 84 | entries = (Type*)malloc((k + 1) * sizeof(Type)); 85 | end = entries + k + 1; 86 | head = entries; 87 | tail = head; 88 | } 89 | 90 | template 91 | void KQueue::clear() 92 | { 93 | head = entries; 94 | tail = head; 95 | } 96 | 97 | template 98 | bool KQueue::is_empty() const 99 | { 100 | return(head == tail); 101 | } 102 | 103 | template 104 | unsigned int KQueue::size() const 105 | { 106 | if(tail >= head) 107 | return(tail - head); 108 | return((end - head) + (tail - entries)); 109 | } 110 | 111 | template 112 | Type KQueue::front() const 113 | { 114 | BLISS_ASSERT(head != tail); 115 | return *head; 116 | } 117 | 118 | template 119 | Type KQueue::pop_front() 120 | { 121 | BLISS_ASSERT(head != tail); 122 | Type *old_head = head; 123 | head++; 124 | if(head == end) 125 | head = entries; 126 | return *old_head; 127 | } 128 | 129 | template 130 | void KQueue::push_front(Type e) 131 | { 132 | if(head == entries) 133 | head = end - 1; 134 | else 135 | head--; 136 | BLISS_ASSERT(head != tail); 137 | *head = e; 138 | } 139 | 140 | template 141 | void KQueue::push_back(Type e) 142 | { 143 | *tail = e; 144 | tail++; 145 | if(tail == end) 146 | tail = entries; 147 | BLISS_ASSERT(head != tail); 148 | } 149 | 150 | } // namespace bliss 151 | 152 | #endif 153 | -------------------------------------------------------------------------------- /grakel/kernels/_isomorphism/bliss-0.50/kstack.hh: -------------------------------------------------------------------------------- 1 | #ifndef BLISS_KSTACK_H 2 | #define BLISS_KSTACK_H 3 | 4 | /* 5 | * Copyright (c) Tommi Junttila 6 | * Released under the GNU General Public License version 2. 7 | */ 8 | 9 | #include 10 | #include "defs.hh" 11 | 12 | namespace bliss { 13 | 14 | /** 15 | * \brief A very simple implementation of a stack with fixed capacity. 16 | */ 17 | template 18 | class KStack { 19 | public: 20 | /** 21 | * Create a new stack with zero capacity. 22 | * The function init() should be called next. 23 | */ 24 | KStack(); 25 | 26 | /** 27 | * Create a new stack with the capacity to hold at most \a N elements. 28 | */ 29 | KStack(int N); 30 | 31 | ~KStack(); 32 | 33 | /** 34 | * Initialize the stack to have the capacity to hold at most \a N elements. 35 | */ 36 | void init(int N); 37 | 38 | /** 39 | * Is the stack empty? 40 | */ 41 | bool is_empty() const {return(cursor == entries); } 42 | 43 | /** 44 | * Return (but don't remove) the top element of the stack. 45 | */ 46 | Type top() const {BLISS_ASSERT(cursor > entries); return *cursor; } 47 | 48 | /** 49 | * Pop (remove) the top element of the stack. 50 | */ 51 | Type pop() 52 | { 53 | BLISS_ASSERT(cursor > entries); 54 | return *cursor--; 55 | } 56 | 57 | /** 58 | * Push the element \a e in the stack. 59 | */ 60 | void push(Type e) 61 | { 62 | BLISS_ASSERT(cursor < entries + kapacity); 63 | *(++cursor) = e; 64 | } 65 | 66 | /** Remove all the elements in the stack. */ 67 | void clean() {cursor = entries; } 68 | 69 | /** 70 | * Get the number of elements in the stack. 71 | */ 72 | unsigned int size() const {return(cursor - entries); } 73 | 74 | /** 75 | * Return the i:th element in the stack, where \a i is in the range 76 | * 0,...,this.size()-1; the 0:th element is the bottom element 77 | * in the stack. 78 | */ 79 | Type element_at(unsigned int i) 80 | { 81 | assert(i < size()); 82 | return entries[i+1]; 83 | } 84 | 85 | /** Return the capacity (NOT the number of elements) of the stack. */ 86 | int capacity() {return kapacity; } 87 | private: 88 | int kapacity; 89 | Type *entries; 90 | Type *cursor; 91 | }; 92 | 93 | template 94 | KStack::KStack() 95 | { 96 | kapacity = 0; 97 | entries = 0; 98 | cursor = 0; 99 | } 100 | 101 | template 102 | KStack::KStack(int k) 103 | { 104 | assert(k > 0); 105 | kapacity = k; 106 | entries = (Type*)malloc((k+1) * sizeof(Type)); 107 | cursor = entries; 108 | } 109 | 110 | template 111 | void KStack::init(int k) 112 | { 113 | assert(k > 0); 114 | if(entries) 115 | free(entries); 116 | kapacity = k; 117 | entries = (Type*)malloc((k+1) * sizeof(Type)); 118 | cursor = entries; 119 | } 120 | 121 | template 122 | KStack::~KStack() 123 | { 124 | free(entries); 125 | } 126 | 127 | } // namespace bliss 128 | 129 | #endif 130 | -------------------------------------------------------------------------------- /grakel/kernels/_isomorphism/bliss-0.50/orbit.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "defs.hh" 4 | #include "orbit.hh" 5 | 6 | /* 7 | * Copyright (c) Tommi Junttila 8 | * Released under the GNU General Public License version 2. 9 | */ 10 | 11 | namespace bliss { 12 | 13 | Orbit::Orbit() 14 | { 15 | orbits = 0; 16 | in_orbit = 0; 17 | nof_elements = 0; 18 | } 19 | 20 | 21 | Orbit::~Orbit() 22 | { 23 | if(orbits) 24 | { 25 | free(orbits); 26 | orbits = 0; 27 | } 28 | if(in_orbit) 29 | { 30 | free(in_orbit); 31 | in_orbit = 0; 32 | } 33 | nof_elements = 0; 34 | } 35 | 36 | 37 | void Orbit::init(const unsigned int n) 38 | { 39 | assert(n > 0); 40 | if(orbits) free(orbits); 41 | orbits = (OrbitEntry*)malloc(n * sizeof(OrbitEntry)); 42 | if(in_orbit) free(in_orbit); 43 | in_orbit = (OrbitEntry**)malloc(n * sizeof(OrbitEntry*)); 44 | nof_elements = n; 45 | 46 | reset(); 47 | } 48 | 49 | 50 | void Orbit::reset() 51 | { 52 | assert(orbits); 53 | assert(in_orbit); 54 | 55 | for(unsigned int i = 0; i < nof_elements; i++) 56 | { 57 | orbits[i].element = i; 58 | orbits[i].next = 0; 59 | orbits[i].size = 1; 60 | in_orbit[i] = &orbits[i]; 61 | } 62 | _nof_orbits = nof_elements; 63 | } 64 | 65 | 66 | void Orbit::merge_orbits(OrbitEntry *orbit1, OrbitEntry *orbit2) 67 | { 68 | BLISS_ASSERT((orbit1 == orbit2) == (orbit1->element == orbit2->element)); 69 | BLISS_ASSERT(orbit1->element < nof_elements); 70 | BLISS_ASSERT(orbit2->element < nof_elements); 71 | 72 | if(orbit1 != orbit2) 73 | { 74 | _nof_orbits--; 75 | /* Only update the elements in the smaller orbit */ 76 | if(orbit1->size > orbit2->size) 77 | { 78 | OrbitEntry * const temp = orbit2; 79 | orbit2 = orbit1; 80 | orbit1 = temp; 81 | } 82 | /* Link the elements of orbit1 to the almost beginning of orbit2 */ 83 | OrbitEntry *e = orbit1; 84 | while(e->next) 85 | { 86 | in_orbit[e->element] = orbit2; 87 | e = e->next; 88 | } 89 | in_orbit[e->element] = orbit2; 90 | e->next = orbit2->next; 91 | orbit2->next = orbit1; 92 | /* Keep the minimal orbit representative in the beginning */ 93 | if(orbit1->element < orbit2->element) 94 | { 95 | const unsigned int temp = orbit1->element; 96 | orbit1->element = orbit2->element; 97 | orbit2->element = temp; 98 | } 99 | orbit2->size += orbit1->size; 100 | } 101 | } 102 | 103 | 104 | void Orbit::merge_orbits(unsigned int e1, unsigned int e2) 105 | { 106 | BLISS_ASSERT(e1 < nof_elements); 107 | BLISS_ASSERT(e2 < nof_elements); 108 | 109 | merge_orbits(in_orbit[e1], in_orbit[e2]); 110 | } 111 | 112 | 113 | bool Orbit::is_minimal_representative(unsigned int element) const 114 | { 115 | return(get_minimal_representative(element) == element); 116 | } 117 | 118 | 119 | unsigned int Orbit::get_minimal_representative(unsigned int element) const 120 | { 121 | BLISS_ASSERT(element < nof_elements); 122 | 123 | OrbitEntry * const orbit = in_orbit[element]; 124 | 125 | BLISS_ASSERT(orbit->element <= element); 126 | return(orbit->element); 127 | } 128 | 129 | 130 | unsigned int Orbit::orbit_size(unsigned int element) const 131 | { 132 | BLISS_ASSERT(element < nof_elements); 133 | 134 | return(in_orbit[element]->size); 135 | } 136 | 137 | 138 | } // namespace bliss 139 | -------------------------------------------------------------------------------- /grakel/kernels/_isomorphism/bliss-0.50/orbit.hh: -------------------------------------------------------------------------------- 1 | #ifndef BLISS_ORBIT_HH 2 | #define BLISS_ORBIT_HH 3 | 4 | /* 5 | * Copyright (c) Tommi Junttila 6 | * Released under the GNU General Public License version 2. 7 | */ 8 | 9 | namespace bliss { 10 | 11 | /** 12 | * \brief A class for representing orbit information. 13 | * 14 | * Given a set {0,...,N-1} of N elements, represent equivalence 15 | * classes (that is, unordered partitions) of the elements. 16 | * Supports only equivalence class merging, not splitting. 17 | * Merging two classes requires time O(k), where k is the number of 18 | * the elements in the smaller of the merged classes. 19 | * Getting the smallest representative in a class (and thus testing 20 | * whether two elements belong to the same class) is a constant time operation. 21 | */ 22 | class Orbit 23 | { 24 | class OrbitEntry 25 | { 26 | public: 27 | unsigned int element; 28 | OrbitEntry *next; 29 | unsigned int size; 30 | }; 31 | 32 | OrbitEntry *orbits; 33 | OrbitEntry **in_orbit; 34 | unsigned int nof_elements; 35 | unsigned int _nof_orbits; 36 | void merge_orbits(OrbitEntry *o1, OrbitEntry *o2); 37 | 38 | public: 39 | /** 40 | * Create a new orbit information object. 41 | * The init() function must be called next to actually initialize 42 | * the object. 43 | */ 44 | Orbit(); 45 | ~Orbit(); 46 | 47 | /** 48 | * Initialize the orbit information to consider sets of \a N elements. 49 | * It is required that \a N > 0. 50 | * The orbit information is reset so that each element forms 51 | * an orbit of its own. 52 | * Time complexity is O(N). 53 | * \sa reset() 54 | */ 55 | void init(const unsigned int N); 56 | 57 | /** 58 | * Reset the orbits so that each element forms an orbit of its own. 59 | * Time complexity is O(N). 60 | */ 61 | void reset(); 62 | 63 | /** 64 | * Merge the orbits of the elements \a e1 and \a e2. 65 | * Time complexity is O(k), where k is the number of elements in 66 | * the smaller of the merged orbits. 67 | */ 68 | void merge_orbits(unsigned int e1, unsigned int e2); 69 | 70 | /** 71 | * Is the element \a e the smallest element in its orbit? 72 | * Time complexity is O(1). 73 | */ 74 | bool is_minimal_representative(unsigned int e) const; 75 | 76 | /** 77 | * Get the smallest element in the orbit of the element \a e. 78 | * Time complexity is O(1). 79 | */ 80 | unsigned int get_minimal_representative(unsigned int e) const; 81 | 82 | /** 83 | * Get the number of elements in the orbit of the element \a e. 84 | * Time complexity is O(1). 85 | */ 86 | unsigned int orbit_size(unsigned int e) const; 87 | 88 | /** 89 | * Get the number of orbits. 90 | * Time complexity is O(1). 91 | */ 92 | unsigned int nof_orbits() const {return _nof_orbits; } 93 | }; 94 | 95 | } // namespace bliss 96 | 97 | #endif 98 | -------------------------------------------------------------------------------- /grakel/kernels/_isomorphism/bliss-0.50/uintseqhash.cc: -------------------------------------------------------------------------------- 1 | #include "uintseqhash.hh" 2 | 3 | namespace bliss { 4 | 5 | /* 6 | * Random bits generated by 7 | * http://www.fourmilab.ch/hotbits/ 8 | */ 9 | static unsigned int rtab[256] = { 10 | 0xAEAA35B8, 0x65632E16, 0x155EDBA9, 0x01349B39, 11 | 0x8EB8BD97, 0x8E4C5367, 0x8EA78B35, 0x2B1B4072, 12 | 0xC1163893, 0x269A8642, 0xC79D7F6D, 0x6A32DEA0, 13 | 0xD4D2DA56, 0xD96D4F47, 0x47B5F48A, 0x2587C6BF, 14 | 0x642B71D8, 0x5DBBAF58, 0x5C178169, 0xA16D9279, 15 | 0x75CDA063, 0x291BC48B, 0x01AC2F47, 0x5416DF7C, 16 | 0x45307514, 0xB3E1317B, 0xE1C7A8DE, 0x3ACDAC96, 17 | 0x11B96831, 0x32DE22DD, 0x6A1DA93B, 0x58B62381, 18 | 0x283810E2, 0xBC30E6A6, 0x8EE51705, 0xB06E8DFB, 19 | 0x729AB12A, 0xA9634922, 0x1A6E8525, 0x49DD4E19, 20 | 0xE5DB3D44, 0x8C5B3A02, 0xEBDE2864, 0xA9146D9F, 21 | 0x736D2CB4, 0xF5229F42, 0x712BA846, 0x20631593, 22 | 0x89C02603, 0xD5A5BF6A, 0x823F4E18, 0x5BE5DEFF, 23 | 0x1C4EBBFA, 0x5FAB8490, 0x6E559B0C, 0x1FE528D6, 24 | 0xB3198066, 0x4A965EB5, 0xFE8BB3D5, 0x4D2F6234, 25 | 0x5F125AA4, 0xBCC640FA, 0x4F8BC191, 0xA447E537, 26 | 0xAC474D3C, 0x703BFA2C, 0x617DC0E7, 0xF26299D7, 27 | 0xC90FD835, 0x33B71C7B, 0x6D83E138, 0xCBB1BB14, 28 | 0x029CF5FF, 0x7CBD093D, 0x4C9825EF, 0x845C4D6D, 29 | 0x124349A5, 0x53942D21, 0x800E60DA, 0x2BA6EB7F, 30 | 0xCEBF30D3, 0xEB18D449, 0xE281F724, 0x58B1CB09, 31 | 0xD469A13D, 0x9C7495C3, 0xE53A7810, 0xA866C08E, 32 | 0x832A038B, 0xDDDCA484, 0xD5FE0DDE, 0x0756002B, 33 | 0x2FF51342, 0x60FEC9C8, 0x061A53E3, 0x47B1884E, 34 | 0xDC17E461, 0xA17A6A37, 0x3158E7E2, 0xA40D873B, 35 | 0x45AE2140, 0xC8F36149, 0x63A4EE2D, 0xD7107447, 36 | 0x6F90994F, 0x5006770F, 0xC1F3CA9A, 0x91B317B2, 37 | 0xF61B4406, 0xA8C9EE8F, 0xC6939B75, 0xB28BBC3B, 38 | 0x36BF4AEF, 0x3B12118D, 0x4D536ECF, 0x9CF4B46B, 39 | 0xE8AB1E03, 0x8225A360, 0x7AE4A130, 0xC4EE8B50, 40 | 0x50651797, 0x5BB4C59F, 0xD120EE47, 0x24F3A386, 41 | 0xBE579B45, 0x3A378EFC, 0xC5AB007B, 0x3668942B, 42 | 0x2DBDCC3A, 0x6F37F64C, 0xC24F862A, 0xB6F97FCF, 43 | 0x9E4FA23D, 0x551AE769, 0x46A8A5A6, 0xDC1BCFDD, 44 | 0x8F684CF9, 0x501D811B, 0x84279F80, 0x2614E0AC, 45 | 0x86445276, 0xAEA0CE71, 0x0812250F, 0xB586D18A, 46 | 0xC68D721B, 0x44514E1D, 0x37CDB99A, 0x24731F89, 47 | 0xFA72E589, 0x81E6EBA2, 0x15452965, 0x55523D9D, 48 | 0x2DC47E14, 0x2E7FA107, 0xA7790F23, 0x40EBFDBB, 49 | 0x77E7906B, 0x6C1DB960, 0x1A8B9898, 0x65FA0D90, 50 | 0xED28B4D8, 0x34C3ED75, 0x768FD2EC, 0xFAB60BCB, 51 | 0x962C75F4, 0x304F0498, 0x0A41A36B, 0xF7DE2A4A, 52 | 0xF4770FE2, 0x73C93BBB, 0xD21C82C5, 0x6C387447, 53 | 0x8CDB4CB9, 0x2CC243E8, 0x41859E3D, 0xB667B9CB, 54 | 0x89681E8A, 0x61A0526C, 0x883EDDDC, 0x539DE9A4, 55 | 0xC29E1DEC, 0x97C71EC5, 0x4A560A66, 0xBD7ECACF, 56 | 0x576AE998, 0x31CE5616, 0x97172A6C, 0x83D047C4, 57 | 0x274EA9A8, 0xEB31A9DA, 0x327209B5, 0x14D1F2CB, 58 | 0x00FE1D96, 0x817DBE08, 0xD3E55AED, 0xF2D30AFC, 59 | 0xFB072660, 0x866687D6, 0x92552EB9, 0xEA8219CD, 60 | 0xF7927269, 0xF1948483, 0x694C1DF5, 0xB7D8B7BF, 61 | 0xFFBC5D2F, 0x2E88B849, 0x883FD32B, 0xA0331192, 62 | 0x8CB244DF, 0x41FAF895, 0x16902220, 0x97FB512A, 63 | 0x2BEA3CC4, 0xAF9CAE61, 0x41ACD0D5, 0xFD2F28FF, 64 | 0xE780ADFA, 0xB3A3A76E, 0x7112AD87, 0x7C3D6058, 65 | 0x69E64FFF, 0xE5F8617C, 0x8580727C, 0x41F54F04, 66 | 0xD72BE498, 0x653D1795, 0x1275A327, 0x14B499D4, 67 | 0x4E34D553, 0x4687AA39, 0x68B64292, 0x5C18ABC3, 68 | 0x41EABFCC, 0x92A85616, 0x82684CF8, 0x5B9F8A4E, 69 | 0x35382FFE, 0xFB936318, 0x52C08E15, 0x80918B2E, 70 | 0x199EDEE0, 0xA9470163, 0xEC44ACDD, 0x612D6735, 71 | 0x8F88EA7D, 0x759F5EA4, 0xE5CC7240, 0x68CFEB8B, 72 | 0x04725601, 0x0C22C23E, 0x5BC97174, 0x89965841, 73 | 0x5D939479, 0x690F338A, 0x3C2D4380, 0xDAE97F2B 74 | }; 75 | 76 | 77 | void UintSeqHash::update(unsigned int i) 78 | { 79 | i++; 80 | while(i > 0) 81 | { 82 | h ^= rtab[i & 0xff]; 83 | #if 1 84 | const unsigned int b = (h & 0x80000000) >> 31; 85 | i = i >> 8; 86 | h = (h << 1) | b; 87 | #else 88 | const unsigned int b = h & 0x80000000; 89 | h = h << 1; 90 | if(b != 0) 91 | h++; 92 | i = i >> 8; 93 | #endif 94 | } 95 | } 96 | 97 | 98 | } // namespace bliss 99 | -------------------------------------------------------------------------------- /grakel/kernels/_isomorphism/bliss-0.50/uintseqhash.hh: -------------------------------------------------------------------------------- 1 | #ifndef BLISS_UINTSEQHASH_HH 2 | #define BLISS_UINTSEQHASH_HH 3 | 4 | #include 5 | 6 | namespace bliss { 7 | 8 | /** 9 | * \brief A hash for sequences of unsigned ints. 10 | */ 11 | class UintSeqHash 12 | { 13 | protected: 14 | unsigned int h; 15 | public: 16 | UintSeqHash() {h = 0; } 17 | UintSeqHash(const UintSeqHash &other) {h = other.h; } 18 | UintSeqHash& operator=(const UintSeqHash &other) {h = other.h; return *this; } 19 | 20 | /** Reset the hash value. */ 21 | void reset() {h = 0; } 22 | 23 | /** Add the unsigned int \a n to the sequence. */ 24 | void update(unsigned int n); 25 | 26 | /** Get the hash value of the sequence seen so far. */ 27 | unsigned int get_value() const {return h; } 28 | 29 | /** Compare the hash values of this and \a other. 30 | * Return -1/0/1 if the value of this is smaller/equal/greater than 31 | * that of \a other. */ 32 | int cmp(const UintSeqHash &other) const { 33 | return (h < other.h)?-1:((h == other.h)?0:1); 34 | } 35 | /** An abbreviation for cmp(other) < 0 */ 36 | bool is_lt(const UintSeqHash &other) const {return(cmp(other) < 0); } 37 | /** An abbreviation for cmp(other) <= 0 */ 38 | bool is_le(const UintSeqHash &other) const {return(cmp(other) <= 0); } 39 | /** An abbreviation for cmp(other) == 0 */ 40 | bool is_equal(const UintSeqHash &other) const {return(cmp(other) == 0); } 41 | }; 42 | 43 | 44 | } // namespace bliss 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /grakel/kernels/_isomorphism/bliss-0.50/utils.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "utils.hh" 3 | 4 | namespace bliss { 5 | 6 | void print_permutation(FILE *fp, 7 | const unsigned int N, 8 | const unsigned int *perm, 9 | const unsigned int offset) 10 | { 11 | assert(N > 0); 12 | assert(perm); 13 | for(unsigned int i = 0; i < N; i++) { 14 | unsigned int j = perm[i]; 15 | if(j == i) 16 | continue; 17 | bool is_first = true; 18 | while(j != i) { 19 | if(j < i) { 20 | is_first = false; 21 | break; 22 | } 23 | j = perm[j]; 24 | } 25 | if(!is_first) 26 | continue; 27 | fprintf(fp, "(%u,", i+offset); 28 | j = perm[i]; 29 | while(j != i) { 30 | fprintf(fp, "%u", j+offset); 31 | j = perm[j]; 32 | if(j != i) 33 | fprintf(fp, ","); 34 | } 35 | fprintf(fp, ")"); 36 | } 37 | } 38 | 39 | } // namespace bliss 40 | -------------------------------------------------------------------------------- /grakel/kernels/_isomorphism/bliss-0.50/utils.hh: -------------------------------------------------------------------------------- 1 | #ifndef BLISS_UTILS_HH 2 | #define BLISS_UTILS_HH 3 | 4 | /** 5 | * \file 6 | * \brief Some small utilities. 7 | * 8 | */ 9 | 10 | /* 11 | * Copyright (c) Tommi Junttila 12 | * Released under the GNU General Public License version 2. 13 | */ 14 | 15 | #include 16 | 17 | namespace bliss { 18 | 19 | /** 20 | * Print the permutation \a perm of {0,...,N-1} in the cycle format 21 | * in the file stream \a fp. 22 | * The amount \a offset is added to each element before printing, 23 | * e.g. the permutation (2 4) is printed as (3 5) when \a offset is 1. 24 | */ 25 | void print_permutation(FILE *fp, 26 | const unsigned int N, 27 | const unsigned int *perm, 28 | const unsigned int offset = 0); 29 | 30 | } // namespace bliss 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /grakel/tests/__main__.py: -------------------------------------------------------------------------------- 1 | """The main function for the tests sub-module.""" 2 | # Author: Ioannis Siglidis 3 | # License: BSD 3 clause 4 | 5 | if __name__ == '__main__': 6 | import os 7 | import sys 8 | import warnings 9 | 10 | from subprocess import check_call 11 | 12 | warnings.filterwarnings('ignore', category=UserWarning) 13 | 14 | python_executable_address = str(sys.executable) 15 | test_dir = str(os.path.dirname(os.path.realpath(__file__))) 16 | project_dir = str(os.path.realpath(os.path.join(__file__, "../../../"))) 17 | 18 | print('Installing the latest "GraKeL"..') 19 | print('--------------------------------') 20 | 21 | cwd = os.getcwd() 22 | os.chdir(project_dir) 23 | try: 24 | check_call([python_executable_address, project_dir + "/setup.py", 25 | "install"]) 26 | finally: 27 | os.chdir(cwd) 28 | 29 | print('................................................................\n') 30 | 31 | print('Testing Graph..') 32 | print('---------------') 33 | check_call([python_executable_address, test_dir + "/test_graph.py", 34 | "--ignore_warnings", "--verbose"]) 35 | print('................................................................\n') 36 | 37 | print('Testing Kernels..') 38 | print('-----------------') 39 | check_call([python_executable_address, test_dir + "/test_kernels.py", 40 | "--verbose", "--time", "--ignore_warnings", "--all"]) 41 | print('................................................................\n') 42 | 43 | print('Testing Graph Kernels..') 44 | print('-----------------------') 45 | check_call([python_executable_address, 46 | test_dir + "/test_graph_kernels.py", 47 | "--verbose", "--time", "--ignore_warnings", "--all"]) 48 | print('................................................................') 49 | -------------------------------------------------------------------------------- /grakel/tests/data/Cuneiform/Cuneiform_graph_labels.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 1 3 | 2 4 | 3 5 | 4 6 | 5 7 | 6 8 | 7 9 | 8 10 | 9 11 | 10 12 | 11 13 | 12 14 | 13 15 | 14 16 | 15 17 | 16 18 | 17 19 | 18 20 | 19 21 | 20 22 | 21 23 | 22 24 | 23 25 | 24 26 | 25 27 | 26 28 | 0 29 | 1 30 | 2 31 | 3 32 | 4 33 | 5 34 | 6 35 | 7 36 | 8 37 | 9 38 | 10 39 | 11 40 | 12 41 | 13 42 | 14 43 | 15 44 | 16 45 | 17 46 | 18 47 | 19 48 | 20 49 | 21 50 | 22 51 | 23 52 | 24 53 | 25 54 | 26 55 | 27 56 | 28 57 | 29 58 | 0 59 | 1 60 | 2 61 | 3 62 | 4 63 | 5 64 | 6 65 | 7 66 | 8 67 | 9 68 | 10 69 | 11 70 | 12 71 | 13 72 | 14 73 | 15 74 | 16 75 | 17 76 | 18 77 | 19 78 | 20 79 | 21 80 | 22 81 | 23 82 | 24 83 | 25 84 | 26 85 | 27 86 | 28 87 | 29 88 | 0 89 | 1 90 | 2 91 | 3 92 | 4 93 | 5 94 | 6 95 | 7 96 | 8 97 | 9 98 | 10 99 | 11 100 | 12 101 | 13 102 | 14 103 | 15 104 | 16 105 | 17 106 | 18 107 | 19 108 | 20 109 | 21 110 | 22 111 | 23 112 | 24 113 | 25 114 | 26 115 | 27 116 | 28 117 | 29 118 | 0 119 | 1 120 | 2 121 | 3 122 | 4 123 | 5 124 | 6 125 | 7 126 | 8 127 | 9 128 | 10 129 | 11 130 | 12 131 | 13 132 | 14 133 | 15 134 | 16 135 | 17 136 | 18 137 | 19 138 | 20 139 | 21 140 | 22 141 | 23 142 | 24 143 | 25 144 | 26 145 | 27 146 | 28 147 | 29 148 | 0 149 | 1 150 | 2 151 | 3 152 | 4 153 | 5 154 | 6 155 | 7 156 | 8 157 | 9 158 | 10 159 | 11 160 | 12 161 | 13 162 | 14 163 | 15 164 | 16 165 | 17 166 | 18 167 | 19 168 | 20 169 | 21 170 | 22 171 | 23 172 | 24 173 | 25 174 | 26 175 | 27 176 | 28 177 | 29 178 | 0 179 | 1 180 | 2 181 | 3 182 | 4 183 | 5 184 | 6 185 | 7 186 | 8 187 | 9 188 | 10 189 | 11 190 | 12 191 | 13 192 | 14 193 | 15 194 | 16 195 | 17 196 | 18 197 | 19 198 | 20 199 | 21 200 | 22 201 | 23 202 | 24 203 | 25 204 | 26 205 | 27 206 | 28 207 | 29 208 | 0 209 | 1 210 | 2 211 | 3 212 | 4 213 | 5 214 | 6 215 | 7 216 | 8 217 | 9 218 | 10 219 | 11 220 | 12 221 | 13 222 | 14 223 | 15 224 | 16 225 | 17 226 | 18 227 | 19 228 | 20 229 | 21 230 | 22 231 | 23 232 | 24 233 | 25 234 | 26 235 | 27 236 | 28 237 | 29 238 | 0 239 | 1 240 | 2 241 | 3 242 | 4 243 | 5 244 | 6 245 | 7 246 | 8 247 | 9 248 | 10 249 | 11 250 | 12 251 | 13 252 | 14 253 | 15 254 | 16 255 | 17 256 | 18 257 | 19 258 | 20 259 | 21 260 | 22 261 | 23 262 | 24 263 | 25 264 | 26 265 | 27 266 | 28 267 | 29 268 | -------------------------------------------------------------------------------- /grakel/tests/data/Cuneiform/README.txt: -------------------------------------------------------------------------------- 1 | README for dataset Cuneiform 2 | 3 | 4 | === Usage === 5 | 6 | This folder contains the following comma separated text files 7 | (replace DS by the name of the dataset): 8 | 9 | n = total number of nodes 10 | m = total number of edges 11 | N = number of graphs 12 | 13 | (1) DS_A.txt (m lines) 14 | sparse (block diagonal) adjacency matrix for all graphs, 15 | each line corresponds to (row, col) resp. (node_id, node_id) 16 | 17 | (2) DS_graph_indicator.txt (n lines) 18 | column vector of graph identifiers for all nodes of all graphs, 19 | the value in the i-th line is the graph_id of the node with node_id i 20 | 21 | (3) DS_graph_labels.txt (N lines) 22 | class labels for all graphs in the dataset, 23 | the value in the i-th line is the class label of the graph with graph_id i 24 | 25 | (4) DS_node_labels.txt (n lines) 26 | column vector of node labels, 27 | the value in the i-th line corresponds to the node with node_id i 28 | 29 | There are OPTIONAL files if the respective information is available: 30 | 31 | (5) DS_edge_labels.txt (m lines; same size as DS_A_sparse.txt) 32 | labels for the edges in DS_A_sparse.txt 33 | 34 | (6) DS_edge_attributes.txt (m lines; same size as DS_A.txt) 35 | attributes for the edges in DS_A.txt 36 | 37 | (7) DS_node_attributes.txt (n lines) 38 | matrix of node attributes, 39 | the comma seperated values in the i-th line is the attribute vector of the node with node_id i 40 | 41 | (8) DS_graph_attributes.txt (N lines) 42 | regression values for all graphs in the dataset, 43 | the value in the i-th line is the attribute of the graph with graph_id i 44 | 45 | 46 | === Description === 47 | 48 | The Cuneiform dataset contains graphs representing 29 different Hittite cuneiform signs. 49 | The data was obtained from nine cuneiform tablets written by scholars of Hittitology in 50 | the course of a study about individualistic characteristics of cuneiform hand writing. 51 | After automated extraction of individual wedges, the affiliation of the wedges to the 52 | cuneiform signs were determined manually. The graph model is explained in detail in the 53 | referenced publication. 54 | 55 | 56 | === References === 57 | 58 | Nils M. Kriege, Matthias Fey, Denis Fisseler, Petra Mutzel, Frank Weichert 59 | Recognizing Cuneiform Signs Using Graph Based Methods. 2018. arXiv:1802.05908 60 | https://arxiv.org/abs/1802.05908 61 | 62 | 63 | === Description of Labels === 64 | 65 | Node labels were converted to integer values using this map: 66 | 67 | Component 0: 68 | 0 depthPoint 69 | 1 tailVertex 70 | 2 leftVertex 71 | 3 rightVertex 72 | 73 | Component 1: 74 | 0 vertical 75 | 1 Winkelhaken 76 | 2 horizontal 77 | 78 | 79 | 80 | Edge labels were converted to integer values using this map: 81 | 82 | Component 0: 83 | 0 wedge 84 | 1 arrangement 85 | 86 | 87 | 88 | Class labels were converted to integer values using this map: 89 | 90 | 0 tu 91 | 1 ta 92 | 2 ti 93 | 3 nu 94 | 4 na 95 | 5 ni 96 | 6 bu 97 | 7 ba 98 | 8 bi 99 | 9 zu 100 | 10 za 101 | 11 zi 102 | 12 su 103 | 13 sa 104 | 14 si 105 | 15 hu 106 | 16 ha 107 | 17 hi 108 | 18 du 109 | 19 da 110 | 20 di 111 | 21 ru 112 | 22 ra 113 | 23 ri 114 | 24 ku 115 | 25 ka 116 | 26 ki 117 | 27 lu 118 | 28 la 119 | 29 li 120 | -------------------------------------------------------------------------------- /grakel/tests/data/MUTAG/MUTAG_graph_labels.txt: -------------------------------------------------------------------------------- 1 | 1 2 | -1 3 | -1 4 | 1 5 | -1 6 | 1 7 | -1 8 | 1 9 | -1 10 | 1 11 | 1 12 | 1 13 | 1 14 | -1 15 | 1 16 | 1 17 | -1 18 | 1 19 | -1 20 | 1 21 | 1 22 | 1 23 | 1 24 | 1 25 | 1 26 | 1 27 | 1 28 | 1 29 | 1 30 | 1 31 | 1 32 | 1 33 | 1 34 | -1 35 | 1 36 | -1 37 | 1 38 | -1 39 | -1 40 | -1 41 | 1 42 | -1 43 | 1 44 | 1 45 | 1 46 | 1 47 | 1 48 | 1 49 | 1 50 | 1 51 | 1 52 | 1 53 | 1 54 | 1 55 | -1 56 | 1 57 | 1 58 | 1 59 | 1 60 | 1 61 | 1 62 | -1 63 | 1 64 | 1 65 | -1 66 | -1 67 | 1 68 | 1 69 | 1 70 | -1 71 | 1 72 | 1 73 | -1 74 | 1 75 | 1 76 | -1 77 | -1 78 | -1 79 | 1 80 | 1 81 | 1 82 | 1 83 | 1 84 | -1 85 | 1 86 | 1 87 | 1 88 | -1 89 | -1 90 | 1 91 | 1 92 | 1 93 | 1 94 | 1 95 | 1 96 | 1 97 | 1 98 | -1 99 | 1 100 | -1 101 | 1 102 | 1 103 | 1 104 | 1 105 | 1 106 | 1 107 | 1 108 | 1 109 | 1 110 | -1 111 | -1 112 | 1 113 | -1 114 | -1 115 | 1 116 | -1 117 | 1 118 | 1 119 | -1 120 | -1 121 | 1 122 | 1 123 | -1 124 | -1 125 | 1 126 | 1 127 | 1 128 | 1 129 | -1 130 | -1 131 | -1 132 | -1 133 | -1 134 | 1 135 | -1 136 | 1 137 | 1 138 | -1 139 | -1 140 | 1 141 | -1 142 | -1 143 | -1 144 | -1 145 | 1 146 | 1 147 | -1 148 | 1 149 | 1 150 | -1 151 | 1 152 | 1 153 | 1 154 | -1 155 | -1 156 | -1 157 | 1 158 | 1 159 | 1 160 | -1 161 | 1 162 | 1 163 | 1 164 | 1 165 | 1 166 | 1 167 | 1 168 | -1 169 | 1 170 | 1 171 | 1 172 | 1 173 | 1 174 | 1 175 | -1 176 | 1 177 | 1 178 | 1 179 | -1 180 | 1 181 | -1 182 | -1 183 | 1 184 | 1 185 | -1 186 | -1 187 | 1 188 | -1 189 | -------------------------------------------------------------------------------- /grakel/tests/data/MUTAG/README.txt: -------------------------------------------------------------------------------- 1 | README for dataset MUTAG 2 | 3 | 4 | === Usage === 5 | 6 | This folder contains the following comma separated text files 7 | (replace DS by the name of the dataset): 8 | 9 | n = total number of nodes 10 | m = total number of edges 11 | N = number of graphs 12 | 13 | (1) DS_A.txt (m lines) 14 | sparse (block diagonal) adjacency matrix for all graphs, 15 | each line corresponds to (row, col) resp. (node_id, node_id) 16 | 17 | (2) DS_graph_indicator.txt (n lines) 18 | column vector of graph identifiers for all nodes of all graphs, 19 | the value in the i-th line is the graph_id of the node with node_id i 20 | 21 | (3) DS_graph_labels.txt (N lines) 22 | class labels for all graphs in the dataset, 23 | the value in the i-th line is the class label of the graph with graph_id i 24 | 25 | (4) DS_node_labels.txt (n lines) 26 | column vector of node labels, 27 | the value in the i-th line corresponds to the node with node_id i 28 | 29 | There are OPTIONAL files if the respective information is available: 30 | 31 | (5) DS_edge_labels.txt (m lines; same size as DS_A_sparse.txt) 32 | labels for the edges in DD_A_sparse.txt 33 | 34 | (6) DS_edge_attributes.txt (m lines; same size as DS_A.txt) 35 | attributes for the edges in DS_A.txt 36 | 37 | (7) DS_node_attributes.txt (n lines) 38 | matrix of node attributes, 39 | the comma seperated values in the i-th line is the attribute vector of the node with node_id i 40 | 41 | (8) DS_graph_attributes.txt (N lines) 42 | regression values for all graphs in the dataset, 43 | the value in the i-th line is the attribute of the graph with graph_id i 44 | 45 | 46 | === Description of the dataset === 47 | 48 | The MUTAG dataset consists of 188 chemical compounds divided into two 49 | classes according to their mutagenic effect on a bacterium. 50 | 51 | The chemical data was obtained form http://cdb.ics.uci.edu and converted 52 | to graphs, where vertices represent atoms and edges represent chemical 53 | bonds. Explicit hydrogen atoms have been removed and vertices are labeled 54 | by atom type and edges by bond type (single, double, triple or aromatic). 55 | Chemical data was processed using the Chemistry Development Kit (v1.4). 56 | 57 | Node labels: 58 | 59 | 0 C 60 | 1 N 61 | 2 O 62 | 3 F 63 | 4 I 64 | 5 Cl 65 | 6 Br 66 | 67 | Edge labels: 68 | 69 | 0 aromatic 70 | 1 single 71 | 2 double 72 | 3 triple 73 | 74 | 75 | === Previous Use of the Dataset === 76 | 77 | Kriege, N., Mutzel, P.: Subgraph matching kernels for attributed graphs. In: Proceedings 78 | of the 29th International Conference on Machine Learning (ICML-2012) (2012). 79 | 80 | 81 | === References === 82 | 83 | Debnath, A.K., Lopez de Compadre, R.L., Debnath, G., Shusterman, A.J., and Hansch, C. 84 | Structure-activity relationship of mutagenic aromatic and heteroaromatic nitro compounds. 85 | Correlation with molecular orbital energies and hydrophobicity. J. Med. Chem. 34(2):786-797 (1991). 86 | -------------------------------------------------------------------------------- /grakel/tests/test_graph.py: -------------------------------------------------------------------------------- 1 | """Tests for the Graph class.""" 2 | # Author: Ioannis Siglidis 3 | # License: BSD 3 clause 4 | import numpy as np 5 | import numpy.testing as npt 6 | 7 | from grakel.graph import Graph 8 | 9 | global verbose 10 | 11 | # Add extra arguments for allowing unit testing 12 | if __name__ == '__main__': 13 | import argparse 14 | parser = argparse.ArgumentParser( 15 | description='A test file for all `Graph` type objects') 16 | parser.add_argument( 17 | '--verbose', 18 | help='verbose outputs on stdout', 19 | action="store_true") 20 | parser.add_argument( 21 | '--ignore_warnings', 22 | help='ignore warnings produced by kernel executions', 23 | action="store_true") 24 | 25 | args = parser.parse_args() 26 | verbose = bool(args.verbose) 27 | 28 | if bool(args.ignore_warnings): 29 | import warnings 30 | warnings.filterwarnings('ignore', category=UserWarning) 31 | else: 32 | import warnings 33 | warnings.filterwarnings('ignore', category=UserWarning) 34 | verbose = False 35 | 36 | 37 | def test_graph_adjacency(): 38 | """Testing Graph object consistency for an adjacency-type initialization object.""" 39 | # Input 40 | X = np.array([[1, 1, 0, 3], [1, 0, 0, 2], [2, 3, 0, 1], [1, 0, 0, 0]]) 41 | labels = {0: 'banana', 1: 'cherry', 2: 'banana', 3: 'cherry'} 42 | 43 | # try all formats 44 | g = dict() 45 | g["auto"] = Graph(X, labels, {}, "auto") 46 | g["dict"] = Graph(X, labels, {}, "dictionary") 47 | g["adjc"] = Graph(X, labels, {}, "adjacency") 48 | g["all"] = Graph(X, labels, {}, "all") 49 | 50 | # Desired output label group 51 | desired_output_label_group = {'cherry': [1, 3], 'banana': [0, 2]} 52 | 53 | for k in g.keys(): 54 | gklg = g[k].get_label_group() 55 | if verbose: 56 | print(k) 57 | print(gklg, '\n') 58 | else: 59 | npt.assert_equal(desired_output_label_group, gklg) 60 | 61 | # Desired Shortest path matrix 62 | spm_do = [[0., 1., float("Inf"), 3.], 63 | [1., 0., float("Inf"), 2.], 64 | [2., 3., 0., 1.], 65 | [1., 2., float("Inf"), 0.]] 66 | 67 | for k in g.keys(): 68 | spm, spl = g[k].build_shortest_path_matrix(algorithm_type="auto") 69 | if verbose: 70 | print(k) 71 | print(spm, '\n', spl, '\n') 72 | else: 73 | npt.assert_array_equal(spm, spm_do) 74 | npt.assert_equal(spl, labels) 75 | 76 | 77 | def test_graph_edge_dictionary(): 78 | """Testing Graph object consistency for an edge-dictionary-type initialization object.""" 79 | # Input 80 | X = {'a': {'a': 1, 'b': 1, 'd': 3}, 81 | 'b': {'a': 1, 'd': 2}, 82 | 'c': {'a': 2, 'b': 3, 'd': 1}, 83 | 'd': {'a': 1}} 84 | 85 | labels = {'a': 'banana', 'b': 'cherry', 'c': 'banana', 'd': 'cherry'} 86 | 87 | # Test for all Graph formats 88 | g = dict() 89 | g["auto"] = Graph(X, labels, {}, "auto") 90 | g["dict"] = Graph(X, labels, {}, "dictionary") 91 | g["adjc"] = Graph(X, labels, {}, "adjacency") 92 | g["all"] = Graph(X, labels, {}, "all") 93 | 94 | # Desired output label group 95 | desired_output_label_group = {'cherry': set(['d', 'b']), 96 | 'banana': set(['a', 'c'])} 97 | desired_output_label_group_idx = {'banana': set([0, 2]), 98 | 'cherry': set([1, 3])} 99 | 100 | def proper_dict(x): 101 | return {key: set(x[key]) for key in x.keys()} 102 | 103 | for k in g.keys(): 104 | gklg = g[k].get_label_group() 105 | if verbose: 106 | print(k) 107 | print(gklg, '\n') 108 | else: 109 | if (k == "adjc"): 110 | npt.assert_equal( 111 | desired_output_label_group_idx, 112 | proper_dict(gklg)) 113 | else: 114 | npt.assert_equal( 115 | desired_output_label_group, 116 | proper_dict(gklg)) 117 | 118 | # Desired Shortest path matrix 119 | spm_do = [[0., 1., float("Inf"), 3.], 120 | [1., 0., float("Inf"), 2.], 121 | [2., 3., 0., 1.], 122 | [1., 2., float("Inf"), 0.]] 123 | 124 | desired_labels = {0: 'banana', 1: 'cherry', 2: 'banana', 3: 'cherry'} 125 | 126 | for k in g.keys(): 127 | spm, spl = g[k].build_shortest_path_matrix(algorithm_type="auto") 128 | if verbose: 129 | print(k) 130 | print(spm, '\n', spl, '\n') 131 | else: 132 | npt.assert_array_equal(spm, spm_do) 133 | npt.assert_equal(spl, desired_labels) 134 | 135 | 136 | if __name__ == '__main__': 137 | test_graph_adjacency() 138 | test_graph_edge_dictionary() 139 | -------------------------------------------------------------------------------- /grakel/tests/test_windows_sdp_issue.py: -------------------------------------------------------------------------------- 1 | # This test case was found to be inconsistent when running on github action runners 2 | # However the `test_lovasz` tests seem to pass locally. The test body comes from 3 | # `LovaszTheta.parse_input` and is a minimal example which causes those tests to fail. 4 | # 5 | # To enable the test, change `DISABLED` to False 6 | # 7 | # This test is here in case debugging is required in the future. 8 | import pytest 9 | 10 | DISABLED = True 11 | cvxopt = True 12 | try: 13 | import cvxopt 14 | except ImportError: 15 | cvxopt = False 16 | 17 | @pytest.mark.skipif(DISABLED or not cvxopt, reason="Skipping debugging test") 18 | @pytest.mark.parametrize( 19 | "nv, ne, e_list, x_list", 20 | [ 21 | ( 22 | 9, 23 | 15, 24 | [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 25 | 12, 12, 13, 13, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15], 26 | [2, 2, 3, 3, 6, 6, 7, 7, 12, 12, 14, 14, 15, 15, 21, 21, 22, 22, 23, 23, 25, 27 | 25, 41, 41, 44, 44, 53, 53, 71, 71, 0, 10, 20, 30, 40, 50, 60, 70, 80], 28 | ) 29 | ], 30 | ) 31 | def test_windows_sdp(nv, ne, e_list, x_list) -> None: 32 | # initialise g sparse (to values -1, based on two list that 33 | # define index and one that defines shape 34 | from cvxopt.base import matrix, spmatrix 35 | from cvxopt.solvers import sdp 36 | 37 | print(nv, ne, e_list, x_list) 38 | g_sparse = spmatrix(-1, x_list, e_list, (nv * nv, ne + 1)) 39 | 40 | # Initialise optimization parameters 41 | h = matrix(-1.0, (nv, nv)) 42 | c = matrix([0.0] * ne + [1.0]) 43 | 44 | # Solve the convex optimization problem 45 | # Should raise here on windows 46 | sol = sdp(c, Gs=[g_sparse], hs=[h]) 47 | assert sol is not None 48 | -------------------------------------------------------------------------------- /meta.yaml: -------------------------------------------------------------------------------- 1 | {% set name = "grakel-dev" %} 2 | {% set version = "0.1a5" %} 3 | 4 | package: 5 | name: '{{ name|lower }}' 6 | version: '{{ version }}' 7 | 8 | source: 9 | path: ../GraKeL 10 | 11 | build: 12 | number: 0 13 | script: python setup.py install --single-version-externally-managed --record=record.txt 14 | 15 | requirements: 16 | host: 17 | - python 18 | - setuptools 19 | - pytest 20 | - numpy >=1.14.0 21 | - scikit-learn >=0.19.0 22 | - scipy >=1.0.1 23 | - cython >=0.27.3 24 | - future >=0.16.0 25 | - six >=1.11.0 26 | - cvxopt >=1.2.0 27 | run: 28 | - python 29 | - pytest 30 | - numpy >=1.14.0 31 | - scikit-learn >=0.19.0 32 | - scipy >=1.0.1 33 | - cython >=0.27.3 34 | - future >=0.16.0 35 | - six >=1.11.0 36 | - cvxopt >=1.2.0 37 | build: 38 | - {{ compiler('cxx') }} 39 | 40 | test: 41 | imports: 42 | - grakel 43 | - grakel.datasets 44 | - grakel.kernels 45 | - grakel.kernels._c_functions 46 | - grakel.kernels._isomorphism 47 | - grakel.tests 48 | 49 | about: 50 | home: https://github.com/ysig/GraKeL 51 | license: BSD-3-Clause 52 | license_family: BSD 53 | license_file: '' 54 | summary: A scikit-learn compatible library for graph kernels. 55 | description: "For more info, please visit the documenantion or the github-page." 56 | doc_url: 'https://ysig.github.io/GraKeL/dev/' 57 | 58 | extra: 59 | recipe-maintainers: 'Ioannis Siglidis' 60 | -------------------------------------------------------------------------------- /misc/implement_list: -------------------------------------------------------------------------------- 1 | Current: 2 | 3 | 4 | Archive: 5 | - Calculates the full graphlet kernel 6 | as proposed by Shervashidze and Vishwanathan (2009) 7 | for graphlets of size 3,4,5 8 | 9 | TOSOLVE: 10 | - overflow error for some executions of the multiscale laplacian 11 | 12 | TODO: 13 | - correct the build frame work 14 | - unit test by asserting the existing values 15 | - add new kernels to graph_kernels.py 16 | - support input format for dictionaries as list of tuples, 17 | list of truples, dictionary of tuples 18 | - support sparse matrix format. 19 | (maybe change adjacency mat to scipy csr_mat?) 20 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0.0", "cython", "numpy>=1.14.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "GraKeL" 7 | version='0.1.10' 8 | dependencies = [ 9 | "numpy >= 1.14.0", 10 | "cython >= 0.27.3", 11 | "scikit-learn >= 0.19", 12 | "six >= 1.11.0", 13 | "future >= 0.16.0", 14 | "joblib" 15 | ] 16 | requires-python=">=3.5" 17 | authors = [ 18 | { name = "Ioannis-Siglidis", email = "y.siglidis@gmail.com" } 19 | ] 20 | readme = "README.md" 21 | description='A scikit-learn compatible library for graph kernels' 22 | classifiers=[ 23 | 'Intended Audience :: Science/Research', 24 | 'Intended Audience :: Developers', 25 | 'License :: OSI Approved', 26 | 'Programming Language :: C', 27 | 'Programming Language :: Python', 28 | 'Topic :: Software Development', 29 | 'Topic :: Scientific/Engineering', 30 | 'Operating System :: POSIX', 31 | 'Operating System :: Unix', 32 | 'Operating System :: MacOS', 33 | 'Programming Language :: Python :: 3', 34 | ] 35 | license= { file = "LICENSE" } 36 | 37 | [project.optional-dependencies] 38 | lovasz = ["cvxopt>=1.2.0"] 39 | dev = ["cvxopt>=1.2.0", "pytest", "pytest-coverage", "torch_geometric"] 40 | test = ["pytest", "pytest-coverage", "torch_geometric"] 41 | wheel = ["pytest", "pytest-coverage"] 42 | 43 | [project.urls] 44 | documentation = 'https://ysig.github.io/GraKeL/' 45 | feedback = 'http://www.lix.polytechnique.fr/dascim/contact/' 46 | source = 'https://github.com/ysig/GraKeL/' 47 | tracker = 'https://github.com/ysig/GraKeL/issues' 48 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.14.0 2 | cython>=0.27.3 3 | scikit-learn>=0.19 4 | scipy>=1.12.0 5 | six>=1.11.0 6 | future>=0.16.0 7 | joblib 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """The general `setup.py` file.""" 2 | # Author: Ioannis Siglidis 3 | # License: BSD 3 clause 4 | import sys 5 | from platform import system 6 | 7 | from setuptools import Extension, find_packages, setup 8 | from numpy import get_include 9 | from Cython.Build import build_ext 10 | 11 | # Compile extensions 12 | 13 | # Set optimization arguments for compilation 14 | OS = system() 15 | if OS == "Windows": 16 | extra_compile_args = ["/O2", "/w"] 17 | elif OS in ["Linux", "Darwin"]: 18 | extra_compile_args = ["-O3", "-w"] 19 | 20 | # Remove the "-Wstrict-prototypes" compiler option, which isn't valid for C++. 21 | import distutils.sysconfig 22 | 23 | cfg_vars = distutils.sysconfig.get_config_vars() 24 | for key, value in cfg_vars.items(): 25 | if type(value) == str: 26 | cfg_vars[key] = value.replace("-Wstrict-prototypes", "") 27 | 28 | # Add the _c_functions extension on kernels 29 | ext_address = "./grakel/kernels/_c_functions/" 30 | ext = Extension( 31 | name="grakel.kernels._c_functions", 32 | sources=[ 33 | ext_address + "functions.pyx", 34 | ext_address + "src/ArashPartov.cpp", 35 | ext_address + "src/sm_core.cpp", 36 | ], 37 | include_dirs=[ext_address + "include", get_include()], 38 | depends=[ext_address + "include/functions.hpp"], 39 | language="c++", 40 | extra_compile_args=extra_compile_args, 41 | ) 42 | 43 | # Add the bliss library extension for calculating isomorphism 44 | isodir = "./grakel/kernels/_isomorphism/" 45 | blissdir = isodir + "bliss-0.50/" 46 | 47 | # The essential bliss source files 48 | blisssrcs = ["graph.cc", "heap.cc", "orbit.cc", "partition.cc", "uintseqhash.cc"] 49 | blisssrcs = [blissdir + src for src in blisssrcs] 50 | pn = str(sys.version_info[0]) 51 | 52 | # Compile intpybliss 53 | intpybliss = Extension( 54 | name="grakel.kernels._isomorphism.intpybliss", 55 | define_macros=[("MAJOR_VERSION", "0"), ("MINOR_VERSION", "50beta")], 56 | include_dirs=[blissdir], 57 | language="c++", 58 | extra_compile_args=extra_compile_args, 59 | sources=[isodir + "intpyblissmodule_" + pn + ".cc"] + blisssrcs, 60 | ) 61 | 62 | # Make bliss extension 63 | bliss = Extension( 64 | name="grakel.kernels._isomorphism.bliss", 65 | include_dirs=[isodir], 66 | language="c++", 67 | extra_compile_args=extra_compile_args, 68 | sources=[isodir + "bliss.pyx"], 69 | ) 70 | 71 | setup( 72 | packages=find_packages(), 73 | package_data={"grakel.tests": ["data/Cuneiform/*.txt", "data/MUTAG/*.txt"]}, 74 | ext_modules=[intpybliss, bliss, ext], 75 | cmdclass={"build_ext": build_ext}, 76 | ) 77 | --------------------------------------------------------------------------------